├── tests
    ├── sample.rap
    ├── Makefile
    ├── sample.c
    └── ck_sample.c
├── sim
    ├── Kerr.rap
    ├── Kerr.opts
    ├── infcam.opts
    ├── gray.opts
    ├── Makefile
    ├── initcond.h
    ├── gray.h
    ├── gray
    │   ├── infcam.c
    │   └── Kerr
    │   │   └── ocl.c
    └── gray.c
├── .gitignore
├── sim-org
    ├── Makefile
    ├── icond.opts
    ├── setup.opts
    ├── AoS.cl
    ├── SoA.cl
    ├── param.opts
    ├── flow.cl
    ├── RK4.cl
    ├── phys.cl
    ├── evolve.c
    ├── icond.c
    ├── gray.h
    ├── driver.cl
    ├── build.c
    ├── rt.cl
    ├── preamble.cl
    ├── dyst.cl
    ├── interp.cl
    ├── gray.c
    ├── KS.cl
    └── io.c
├── doc
    ├── Makefile
    └── Doxyfile
├── Makefile
├── tools
    ├── gray.py
    ├── generate_data.py
    └── boosted_ks.py
├── README.md
└── COPYING


/tests/sample.rap:
--------------------------------------------------------------------------------
1 | XSPEC	size_t  n;
2 | ISPEC	size_t  n1, n2;
3 | PARAMS	double  alpha;
4 | STATES	double *x, *y, *z;
5 | 


--------------------------------------------------------------------------------
/sim/Kerr.rap:
--------------------------------------------------------------------------------
1 | OPTS	size_t nque; cl_command_queue *que;
2 | XSPEC	size_t n;
3 | ISPEC	size_t bsz, gsz;
4 | PARAMS	double aspin, target;
5 | STATES	cl_mem rays;
6 | 


--------------------------------------------------------------------------------
/sim/Kerr.opts:
--------------------------------------------------------------------------------
1 | real	m_bh:M	= 4e6	= strtod(val, &rem);	/**< Mass of the central black hole in unit of solar mass; note that it is needed in radiative transfer but not in geodesic integration */
2 | real	a_spin:a	= 0	= strtod(val, &rem);	/**< Dimensionless black hole spin */
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Global settings
 2 | *
 3 | !*/
 4 | 
 5 | # Source codes
 6 | !*.[hc]
 7 | !*.cl
 8 | !*.py
 9 | 
10 | # Optgen-ed files
11 | !*.rap
12 | !*.opts
13 | *_rap.h
14 | *_opts.h
15 | 
16 | # Makefile etc
17 | !*file
18 | 
19 | # Documentations
20 | html/
21 | latex/
22 | !*.md
23 | 
24 | # Data files
25 | !*.?sv
26 | 


--------------------------------------------------------------------------------
/sim-org/Makefile:
--------------------------------------------------------------------------------
 1 | all: gray.la
 2 | 
 3 | install: gray.la
 4 | 	lux-install *.cl gray.la +i sim
 5 | 
 6 | %.h: %.opts
 7 | 	lux-optgen $<
 8 | 
 9 | gray.la: gray.h *.c icond.h param.h setup.h
10 | 	lux-build gray.h *.c -f opencl hdf5 -o gray.la
11 | 
12 | clean:
13 | 	-rm -fr .libs _libs
14 | 	-rm -f  *.lo gray.*so gray.la
15 | 	-rm -f  icond.h param.h setup.h
16 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | default: doc
 2 | 
 3 | all:
 4 | 
 5 | doc: html/index.html latex/refman.pdf
 6 | 
 7 | check:
 8 | 
 9 | install:
10 | 
11 | clean:
12 | 
13 | html/index.html:
14 | 	doxygen
15 | 
16 | latex/refman.pdf:
17 | 	if [ ! -f latex/refman.tex ]; then doxygen; fi
18 | 	cd latex; pdflatex refman; pdflatex refman; cd ..
19 | 
20 | clean:
21 | 	rm -rf html/ latex/
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SUBDIRS = doc tests sim
 2 | 
 3 | ifeq ($(MAKECMDGOALS),)
 4 | 	GOALS = all
 5 | else
 6 | 	GOALS = $(MAKECMDGOALS)
 7 | endif
 8 | 
 9 | default: all
10 | 
11 | all: recursive
12 | 
13 | doc: recursive
14 | 
15 | check: recursive
16 | 
17 | install: recursive
18 | 
19 | clean: recursive
20 | 
21 | recursive:
22 | 	@list='$(SUBDIRS)'; goals='$(GOALS)'; for subdir in $$list; do \
23 | 	  test "$$subdir" = . || (cd $$subdir && make $$goals); \
24 | 	done
25 | 


--------------------------------------------------------------------------------
/sim-org/icond.opts:
--------------------------------------------------------------------------------
1 | # Initial conditions
2 | real	w_img:w	= 64	= strtod(val, &rem);	/**< Width  of the image in \f$GM/c^2\f$ */
3 | real	h_img:h	= 64	= strtod(val, &rem);	/**< Height of the image in \f$GM/c^2\f$ */
4 | real	r_obs:r	= 1024	= strtod(val, &rem);	/**< Distance of the image from the black hole */
5 | real	i_obs:i	= 60	= strtod(val, &rem);	/**< Inclination angle of the image in degrees */
6 | real	j_obs:j	= 0	= strtod(val, &rem);	/**< Azimuthal   angle of the image in degrees */
7 | 


--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
 1 | default: check
 2 | 
 3 | all: sample.la ck_sample
 4 | 
 5 | doc:
 6 | 
 7 | check: all
 8 | 	for ck in $$(ls -1 ck_* | grep -v '\.'); do ./$$ck --debug; done
 9 | 
10 | install:
11 | 
12 | clean:
13 | 	for f in $$(ls *.rap  2> /dev/null); do rm -f $${f%.rap}_rap.h;   done
14 | 	for g in $$(ls *.opts 2> /dev/null); do rm -f $${g%.opts}_opts.h; done
15 | 	rm -fr .libs _libs
16 | 	rm -f  *.{lo,so,la}                  # remove modules
17 | 	rm -f  $$(ls -1 ck_* | grep -v '\.') # remove binaries
18 | 
19 | ck_%: %_rap.h ck_%.c
20 | 	lux-build $^ -o $@
21 | 
22 | %.la: %_rap.h %.c
23 | 	lux-build $^ -o $@
24 | 
25 | %_rap.h: %.rap
26 | 	lux-rapgen $<
27 | 
28 | %_opts.h: %.opts
29 | 	lux-optgen $<
30 | 


--------------------------------------------------------------------------------
/sim/infcam.opts:
--------------------------------------------------------------------------------
 1 | # Camera Setup
 2 | real	w_img:w	= 64	= strtod(val, &rem);	/**< Width  of the image in \f$GM/c^2\f$ */
 3 | real	h_img:h	= 64	= strtod(val, &rem);	/**< Height of the image in \f$GM/c^2\f$ */
 4 | real	r_obs:r	= 1024	= strtod(val, &rem);	/**< Distance of the image from the black hole; TODO: analytically integrate from infinity */
 5 | real	i_obs:i	= 60	= strtod(val, &rem);	/**< Inclination angle of the image in degrees */
 6 | real	j_obs:j	= 0	= strtod(val, &rem);	/**< Azimuthal   angle of the image in degrees */
 7 | 
 8 | # Numerical Setup
 9 | const char *	coordinates:coor	= "pxcenter"	= val;	/**< Coordinate system */
10 | size_t	n_width:W	= 512	= strtoul(val, &rem, 0);	/**< Number of rays along the width  of the image */
11 | size_t	n_height:H	= 512	= strtoul(val, &rem, 0);	/**< Number of rays along the height of the image */
12 | 


--------------------------------------------------------------------------------
/sim/gray.opts:
--------------------------------------------------------------------------------
 1 | unsigned	i_platform:ip	= 0	= strtoul(val, &rem, 0);	/**< Index of platform to use */
 2 | unsigned	i_device:id	= 0	= strtoul(val, &rem, 0);	/**< Index of device   to use */
 3 | cl_device_type	device_type:type	= CL_DEVICE_TYPE_ALL	= strtotype(val);	/**< Type  of device   to use */
 4 | 
 5 | const char *	spacetime:st	= "Kerr"	= val;	/**< Spacetime geometry */
 6 | 
 7 | const char *	initcond:ic	= "infcam"	= val;	/**< Initial conditions for the rays */
 8 | 
 9 | real	t_init:t0	= 0	= strtod(val, &rem);	/**< Initial time */
10 | real	dt_dump:dt	= -32	= strtod(val, &rem);	/**< Time interval between dumps */
11 | size_t	i_init:i0	= 0	= strtoul(val, &rem, 0);	/**< Initial dump id */
12 | size_t	n_dump:N	= 64	= strtoul(val, &rem, 0);	/**< Number of dumps (in addition to the initial condition) */
13 | 
14 | const char *	rayfile:ray	= "ray%04d.h5"	= val;	/**< File name format for ray dump */
15 | 


--------------------------------------------------------------------------------
/sim/Makefile:
--------------------------------------------------------------------------------
 1 | default: all
 2 | 
 3 | all: gray/Kerr/ocl.la gray/infcam.la gray.la
 4 | 
 5 | doc:
 6 | 
 7 | check:
 8 | 
 9 | install: gray/Kerr/ocl.la gray/infcam.la gray.la
10 | 	lux-install $^ +i sim
11 | 
12 | clean:
13 | 	for f in $$(ls *.rap  2> /dev/null); do rm -f $${f%.rap}_rap.h;   done
14 | 	for g in $$(ls *.opts 2> /dev/null); do rm -f $${g%.opts}_opts.h; done
15 | 	rm -fr {,*/,*/*/}{.,_}libs
16 | 	rm -f  {,*/,*/*/}*.{lo,so,la}
17 | 
18 | # mod:            interfaces                   sources         options etc
19 | # v               v                            v               v
20 | gray/Kerr/ocl.la: Kerr_rap.h                   gray/Kerr/ocl.c Kerr_opts.h
21 | gray/infcam.la:   initcond.h                   gray/infcam.c   infcam_opts.h
22 | gray.la:          gray.h initcond.h Kerr_rap.h gray.c          gray_opts.h Kerr_opts.h infcam_opts.h
23 | 	lux-build $^ -f hdf5 -o $@
24 | 
25 | %.la:
26 | 	lux-build $^ -o $@
27 | 
28 | %_rap.h: %.rap
29 | 	lux-rapgen $<
30 | 
31 | %_opts.h: %.opts
32 | 	lux-optgen $<
33 | 


--------------------------------------------------------------------------------
/sim-org/setup.opts:
--------------------------------------------------------------------------------
 1 | # Job setup/configuration
 2 | unsigned	i_platform:ip	= 0	= strtoul(val, &rem, 0);	/**< Index of platform to use */
 3 | unsigned	i_device:id	= 0	= strtoul(val, &rem, 0);	/**< Index of device   to use */
 4 | cl_device_type	device_type:type	= CL_DEVICE_TYPE_ALL	= strtotype(val);	/**< Type  of device   to use */
 5 | 
 6 | size_t	precision:p	= sizeof(float)	= strtoprec(val);	/**< Size of real type */
 7 | const char *	scheme:scm	= "RK4"	= val;	/**< Integration scheme */
 8 | const char *	morder:mo:ml	= "AoS"	= val;	/**< Memory order/layout */
 9 | const char *	kflags:kf	= "-w"	= val;	/**< Kernl compilation flags */
10 | 
11 | const char *	outfile:out	= "%04d.raw"	= val;	/**< Dump file name format */
12 | real	t_init:t0	= 0	= strtod(val, &rem);	/**< Initial time */
13 | real	dt_dump:dt	= -32	= strtod(val, &rem);	/**< Time interval between dumps */
14 | size_t	i_init:i0	= 0	= strtoul(val, &rem, 0);	/**< Initial dump id */
15 | size_t	n_sub:n	= 1024	= strtoul(val, &rem, 0);	/**< Number of substeps per dump */
16 | size_t	n_dump:N	= 64	= strtoul(val, &rem, 0);	/**< Number of dumps (in addition to the initial condition) */
17 | 


--------------------------------------------------------------------------------
/sim-org/AoS.cl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2016 Chi-kwan Chan
 3 |  * Copyright (C) 2016 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | /** \file
22 |  ** Array-of-Structures global index
23 |  **
24 |  ** GRay2 uses OpenCL's just-in-time compilation feature to implement
25 |  ** run-time configurable algorithms.  In this file we provide global
26 |  ** index for Array-of-Structures memory order.
27 |  **/
28 | 
29 | #define DATA(g, s) data[g * n_data + s]
30 | #define INFO(g, s) info[g * n_info + s]
31 | 


--------------------------------------------------------------------------------
/sim-org/SoA.cl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2016 Chi-kwan Chan
 3 |  * Copyright (C) 2016 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | /** \file
22 |  ** Structure-of-Arrays global index
23 |  **
24 |  ** GRay2 uses OpenCL's just-in-time compilation feature to implement
25 |  ** run-time configurable algorithms.  In this file we provide global
26 |  ** index for Structure-of-Arrays memory order.
27 |  **/
28 | 
29 | #define DATA(g, s) data[s * n_rays + g]
30 | #define INFO(g, s) info[s * n_rays + g]
31 | 


--------------------------------------------------------------------------------
/sim/initcond.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan
 3 |  * Copyright (C) 2021 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | #ifndef _GRAY_INITCOND_H_
21 | #define _GRAY_INITCOND_H_
22 | 
23 | #include <lux.h>
24 | #include <lux/opencl.h>
25 | 
26 | typedef struct LuxSgray_initcond Lux_gray_initcond;
27 | typedef struct LuxOgray_initcond Lux_gray_initcond_opts;
28 | 
29 | struct LuxSgray_initcond {
30 | 	int (*init)(Lux_gray_initcond *, cl_mem);
31 | 	size_t n_width, n_height;
32 | };
33 | 
34 | struct LuxOgray_initcond {
35 | 	size_t            nque;
36 | 	cl_command_queue *que;
37 | 	void             *opts;
38 | };
39 | 
40 | #endif /* _GRAY_INITCOND_H */
41 | 


--------------------------------------------------------------------------------
/sim-org/param.opts:
--------------------------------------------------------------------------------
 1 | # Runtime parameters needed in kernels
 2 | 
 3 | # Coordinate system
 4 | real	M_ADM:m	= 4.3e6	= strtod(val, &rem);	/**< Spacetime mass in unit of solar mass; It is needed in radiative transfer and in the spacetime boundary conditions*/
 5 | const char *	coordinates:coor	    = "dyst"	    = val;	/**< Coordinate system */
 6 | const char *	dyst_file:data   	    = "data.h5"	    = val;	/**< File with all the information for dynamical spacetime */
 7 | size_t	enable_fast_light:fast_light	= 0          	= strtoul(val, &rem, 0);	/**< If non-zero, assume fast light */
 8 | 
 9 | # Numerical setup
10 | size_t	w_rays:W	= 512	= strtoul(val, &rem, 0);	/**< Number of rays along the width  of the image */
11 | size_t	h_rays:H	= 512	= strtoul(val, &rem, 0);	/**< Number of rays along the height of the image */
12 | 
13 | # Plasma parameter
14 | real	n_electron:ne	= 1e6	= strtod(val, &rem);	/**< Electron number density normalization */
15 | real	beta_crit:bc	= 1	= strtod(val, &rem);	/**< Critical plasma \f$\beta_\mathrm{crit} = P_\mathrm{gas}/P_\mathrm{mag}\f$ */
16 | real	R_high:Rh	= 1	= strtod(val, &rem);	/**< The ratio \f$R_\mathrm{high} = T_p/T_e\f$ at high-beta,  weakly magnetized (disk) regions */
17 | real	R_low:Rl	= 1	= strtod(val, &rem);	/**< The ratio \f$R_\mathrm{low}  = T_p/T_e\f$ at low-beta, strongly magnetized (jet)  regions */
18 | real *	nu	= NULL	= strtoda(val, &rem);	/**< Frequency channels; NULL means turning off radiative transfer */
19 | 


--------------------------------------------------------------------------------
/tools/gray.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2016 Chi-kwan Chan
 2 | # Copyright (C) 2016 Steward Observatory
 3 | #
 4 | # This file is part of GRay2.
 5 | #
 6 | # GRay2 is free software: you can redistribute it and/or modify it
 7 | # under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # GRay2 is distributed in the hope that it will be useful, but WITHOUT
12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14 | # License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | from os import path
20 | import numpy as np
21 | 
22 | def load_raw(name):
23 |     """ Load a GRay2 raw file """
24 |     ext = path.splitext(name)[1][1:]
25 |     if ext != "raw":
26 |         raise NameError("Fail to load file \"{}\", "
27 |                         "which is in an unsupported format".format(name))
28 | 
29 |     with open(name, "rb") as f:
30 |         print("Loading GRay2 raw file \"{}\"... ".format(name), end="")
31 | 
32 |         d = np.fromfile(f, dtype=np.uint64, count=4)
33 |         t = np.double if d[0] == 8 else np.single
34 |         n = d[1]
35 |         w = d[2]
36 |         h = d[3]
37 | 
38 |         states = np.fromfile(f, dtype=t, count=n*w*h).reshape((h,w,n))
39 |         diagno = np.fromfile(f, dtype=t, count=  w*h).reshape((h,w  ))
40 | 
41 |         print("DONE")
42 | 
43 |         return states, diagno
44 | 


--------------------------------------------------------------------------------
/sim/gray.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan
 3 |  * Copyright (C) 2021 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | #ifndef _GRAY_H_
21 | #define _GRAY_H_
22 | 
23 | #include <lux.h>
24 | #include <lux/io.h>
25 | #include <lux/job.h>
26 | #include <lux/task.h>
27 | 
28 | #include <lux/opencl.h>
29 | #include <lux/darray.h>
30 | 
31 | #include "initcond.h"
32 | 
33 | #include "gray_opts.h"
34 | #include "Kerr_opts.h"
35 | #include "infcam_opts.h"
36 | 
37 | struct gray {
38 | 	Lux_job super;
39 | 
40 | 	struct gray_opts gray;
41 | 	union {
42 | 		struct Kerr_opts Kerr;
43 | 	} spacetime;
44 | 	union {
45 | 		struct infcam_opts infcam;
46 | 	} initcond;
47 | 
48 | 	Lux_opencl *ocl;
49 | 	Lux_io     *io;
50 | 
51 | 	struct darray rays;
52 | 	real         *rays_host;
53 | 
54 | 	struct basealgo gi;
55 | 	struct basealgo flow;
56 | 	struct basealgo rt;
57 | 
58 | 	double t, dt;
59 | 	size_t i, n;
60 | };
61 | 
62 | #endif /* _GRAY_H */
63 | 


--------------------------------------------------------------------------------
/sim-org/flow.cl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2016 Chi-kwan Chan
 3 |  * Copyright (C) 2016 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | /** \file
22 |  ** A template for flow models
23 |  **
24 |  ** A template for an accretion flows model, which may be
25 |  ** interpolation of GRMHD simulations or analytical models.
26 |  **/
27 | 
28 | struct flow {
29 | 	real ne;
30 | 	real te;
31 | 	real b;
32 | 	real bkcos;
33 | 	real shift;
34 | };
35 | 
36 | struct flow
37 | getflow(real4 q, /* "up"   position 4-vector q^mu */
38 |         real4 k, /* "down" momentum 4-vector k_mu */
39 | 	SPACETIME_PROTOTYPE_ARGS)
40 | {
41 | 	struct flow f;
42 | 
43 | 	real4 u = {1, 0, 0, 0};
44 | 	real4 b = {0, 0, 0, 0}; /* magnetic field four-vector defined as
45 | 	                           b^mu = (1/2)
46 |                                            epsilon^{mu,nu,kappa,lambda}
47 |                                            u_nu F_{lambda,kappa},
48 |                                    see Gammie et al. (2003) */
49 | 
50 | 	f.ne = interpolate(q, bounding_box, num_points, rho_t1, rho_t2);
51 | 	f.te = 1e12;
52 | 	f.b  = 0;
53 | 
54 | 	f.shift = -dot(k, u);
55 | 	f.bkcos =  dot(k, b) / (f.shift * f.b + (real)EPSILON);
56 | 
57 | 	return f;
58 | }
59 | 


--------------------------------------------------------------------------------
/sim-org/RK4.cl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2016 Chi-kwan Chan
 3 |  * Copyright (C) 2016 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | /** \file
22 |  ** Classical 4th-order Runge-Kutta integrator
23 |  **
24 |  ** GRay2 uses OpenCL's just-in-time compilation feature to implement
25 |  ** a run-time configurable algorithms.  In this file we implement the
26 |  ** classical 4th-order Runge-Kutta integrator in integrate().
27 |  **/
28 | 
29 | /**
30 |  ** OpenCL implementation of the classical 4th-order Runge-Kutta integrator
31 |  **
32 |  ** Assuming rhs() is provided, this function performs the classical
33 |  ** 4th-order Runge-Kutta integrator with a single step size dt.
34 |  **
35 |  ** \return The new state
36 |  **/
37 | struct state
38 | integrate(struct state s,  /**< state of the ray */
39 |           real         dt, /**< step size        */
40 |           SPACETIME_PROTOTYPE_ARGS)
41 | {
42 | 	struct state k1 = rhs(X(E(s)                      ), SPACETIME_ARGS);
43 | 	struct state k2 = rhs(X(E(s) + K(0.5) * dt * E(k1)), SPACETIME_ARGS);
44 | 	struct state k3 = rhs(X(E(s) + K(0.5) * dt * E(k2)), SPACETIME_ARGS);
45 | 	struct state k4 = rhs(X(E(s) +          dt * E(k3)), SPACETIME_ARGS);
46 | 	return X(E(s) + dt * (E(k1) + K(2.0) * (E(k2) + E(k3)) + E(k4)) / K(6.0));
47 | }
48 | 


--------------------------------------------------------------------------------
/sim/gray/infcam.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan
 3 |  * Copyright (C) 2021 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include "initcond.h"
22 | 
23 | #include <lux/mangle.h>
24 | #include <lux/switch.h>
25 | #include <lux/zalloc.h>
26 | 
27 | #include "../infcam_opts.h"
28 | 
29 | struct infcam {
30 | 	Lux_gray_initcond  super;
31 | 	size_t             nque;
32 | 	cl_command_queue  *que;
33 | };
34 | 
35 | #define EGO ((struct infcam *)ego)
36 | 
37 | static int
38 | init(Lux_gray_initcond *ego, cl_mem rays)
39 | {
40 | 	/* TODO: initialize cl_mem */
41 | 	return 0;
42 | }
43 | 
44 | void *
45 | LUX_MKMOD(const void *opts)
46 | {
47 | 	void *ego;
48 | 
49 | 	lux_debug("GRay2:infcam: constructing an instance with options %p\n", opts);
50 | 
51 | 	ego = zalloc(sizeof(struct infcam));
52 | 	if(ego) {
53 | 		struct infcam_opts *o = ((Lux_gray_initcond_opts*)opts)->opts;
54 | 
55 | 		EGO->super.init     = init;
56 | 		EGO->super.n_width  = o->n_width;
57 | 		EGO->super.n_height = o->n_height;
58 | 
59 | 		EGO->nque = ((Lux_gray_initcond_opts*)opts)->nque;
60 | 		EGO->que  = ((Lux_gray_initcond_opts*)opts)->que;
61 | 	}
62 | 	return ego;
63 | }
64 | 
65 | void
66 | LUX_RMMOD(void *ego)
67 | {
68 | 	lux_debug("GRay2:infcam: destructing instance %p\n", ego);
69 | 
70 | 	free(ego);
71 | }
72 | 


--------------------------------------------------------------------------------
/tests/sample.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2016 Chi-kwan Chan
 3 |  * Copyright (C) 2016 Steward Observatory
 4 |  *
 5 |  * This file is part of lux.
 6 |  *
 7 |  * lux is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * lux is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with lux.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | #include <lux.h>
21 | #include <lux/dynamic.h>
22 | #include <lux/mangle.h>
23 | #include <lux/pvector.h>
24 | #include <lux/solver.h>
25 | 
26 | #define LUX_RAP_CASTING 1
27 | #include "sample_rap.h"
28 | 
29 | static int
30 | driver(Lux_spec *s, Lux_args *a)
31 | {
32 | 	size_t i, j;
33 | 	for(i = 0; i < s->n1; ++i) {
34 | 		for(j = 0; j < s->n2; ++j) {
35 | 			size_t h = i * s->n2 + j;
36 | 			if(h < s->n)
37 | 				a->z[h] = a->x[h] + a->alpha * a->y[h];
38 | 		}
39 | 	}
40 | 	return 0;
41 | }
42 | 
43 | Lux_solution *
44 | LUX_MOD(Lux_problem *prob, unsigned flags)
45 | {
46 | 	Lux_spec *spec1 = mkspec(prob, (prob->n+ 1-1)/ 1,  1);
47 | 	Lux_spec *spec2 = mkspec(prob, (prob->n+ 2-1)/ 2,  2);
48 | 	Lux_spec *spec3 = mkspec(prob, (prob->n+ 4-1)/ 4,  4);
49 | 	Lux_spec *spec4 = mkspec(prob, (prob->n+ 8-1)/ 8,  8);
50 | 	Lux_spec *spec5 = mkspec(prob, (prob->n+16-1)/16, 16);
51 | 	Lux_spec *spec6 = mkspec(prob, (prob->n+32-1)/32, 32);
52 | 	Lux_spec *spec7 = mkspec(prob, (prob->n+64-1)/64, 64);
53 | 
54 | 	Lux_args *args  = mkargs(prob);
55 | 
56 | 	return pvector(
57 | 		Lux_solution,
58 | 		{{{driver, spec1}, args}, {0, 0, prob->n, 0}},
59 | 		{{{driver, spec2}, args}, {0, 0, prob->n, 0}},
60 | 		{{{driver, spec3}, args}, {0, 0, prob->n, 0}},
61 | 		{{{driver, spec4}, args}, {0, 0, prob->n, 0}},
62 | 		{{{driver, spec5}, args}, {0, 0, prob->n, 0}},
63 | 		{{{driver, spec6}, args}, {0, 0, prob->n, 0}},
64 | 		{{{driver, spec7}, args}, {0, 0, prob->n, 0}}
65 | 	);
66 | 
67 | 	(void)flags; /* silence unused variable warning */
68 | }
69 | 


--------------------------------------------------------------------------------
/sim-org/phys.cl:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2016 Chi-kwan Chan
 3 |  * Copyright (C) 2016 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | /** \file
22 |  ** Composing different physics modules
23 |  **
24 |  ** This file contains functions that compose different physics, e.g.,
25 |  ** geodesic integration, radiative transfer, together to form the
26 |  ** initial conditions and the right hand side of the full
27 |  ** differential equation.
28 |  **/
29 | 
30 | struct state {
31 | 	struct gr g;
32 | 	struct rt r;
33 | };
34 | 
35 | struct state
36 | icond(real r_obs, /**< distance of the image from the black hole */
37 |       real i_obs, /**< inclination angle of the image in degrees */
38 |       real j_obs, /**< azimuthal   angle of the image in degrees */
39 |       real alpha, /**< one of the local Cartesian coordinates */
40 |       real beta)  /**< the other  local Cartesian coordinate  */
41 | {
42 | 	return (struct state){
43 | 		gr_icond(r_obs, i_obs, j_obs, alpha, beta),
44 | 		rt_icond()
45 | 	};
46 | }
47 | 
48 | real
49 | getdt(struct gr g, real dt)
50 | {
51 | 	real r   = sqrt(getrr(g.q));
52 | 	real eps = geteps(g.q);
53 | 
54 | 	if(eps < 0.01) /* stop near horizon */
55 | 		return 0;
56 | 
57 | 	if(dot(g.q.s123, g.q.s123) < 0 && r > K(1e3)) /* stop outside domain */
58 | 		return 0;
59 | 
60 | 	if(fabs(dt) > 0.01 * eps * eps)
61 | 		return sign(dt) * 0.01 * eps * eps;
62 | 	else
63 | 		return dt;
64 | }
65 | 
66 | struct state
67 | rhs(struct state s, SPACETIME_PROTOTYPE_ARGS) /**< state of the ray */
68 | {
69 | 	real4 q = s.g.q;
70 | 	real4 k = down(q, s.g.u, SPACETIME_ARGS);
71 | 	return (struct state){
72 | 		gr_rhs(s.g, SPACETIME_ARGS),
73 | 		rt_rhs(s.r, getflow(q, k, SPACETIME_ARGS))};
74 | }
75 | 


--------------------------------------------------------------------------------
/sim-org/evolve.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2016 Chi-kwan Chan
 3 |  * Copyright (C) 2016 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | #include "gray.h"
21 | 
22 | static inline size_t
23 | max(size_t a, size_t b)
24 | {
25 | 	return a > b ? a : b;
26 | }
27 | 
28 | real
29 | evolve(Lux_job *ego, real t, real target, size_t n_sub)
30 | {
31 | 	Lux_opencl        *ocl    = EGO->ocl;
32 | 	Lux_opencl_kernel *evolve = EGO->evolve;
33 | 
34 | 	struct param *p = &EGO->param;
35 | 	struct setup *s = &EGO->setup;
36 | 
37 | 	const  size_t sz     = s->precision;
38 | 	const  size_t n_data = EGO->n_coor + EGO->n_freq * 2;
39 | 	const  size_t n_info = EGO->n_info;
40 | 
41 | 	const  size_t shape[] = {p->h_rays, p->w_rays};
42 | 
43 | 	size_t arg_num = 0;
44 | 
45 | 	evolve->setM(evolve, arg_num, EGO->data);
46 | 	arg_num++;
47 | 	evolve->setM(evolve, arg_num, EGO->info);
48 | 	arg_num++;
49 | 	evolve->setR(evolve, arg_num, target - t);
50 | 	arg_num++;
51 | 	evolve->setW(evolve, arg_num, n_sub);
52 | 	arg_num++;
53 | 	evolve->setS(evolve, arg_num, sz * max(n_data, n_info));
54 | 	arg_num++;
55 | 	evolve->set(evolve, arg_num, sizeof(cl_float8), &(EGO->bounding_box));
56 | 	arg_num++;
57 | 	evolve->set(evolve, arg_num, sizeof(cl_int4), &(EGO->num_points));
58 | 	arg_num++;
59 | 	/* We have 40 Gammas + 10 metric components + 1 fluid property at t1 */
60 | 	for (size_t old_arg_num = arg_num; arg_num < old_arg_num + 51; arg_num++)
61 | 		evolve->setM(evolve, arg_num, EGO->spacetime_t1[arg_num-old_arg_num]);
62 | 	/* And here the 40 Gammas + 10 metric components + 1 fluid property at t2 */
63 | 	for (size_t old_arg_num = arg_num; arg_num < old_arg_num + 51; arg_num++)
64 | 		evolve->setM(evolve, arg_num, EGO->spacetime_t2[arg_num-old_arg_num]);
65 | 
66 | 	return ocl->exec(ocl, evolve, 2, shape);
67 | }
68 | 


--------------------------------------------------------------------------------
/sim/gray/Kerr/ocl.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan
 3 |  * Copyright (C) 2021 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include <lux.h>
22 | #include <lux/mangle.h>
23 | #include <lux/opencl.h>
24 | #include <lux/pvector.h>
25 | #include <lux/solver.h>
26 | 
27 | #define  LUX_RAP_CASTING 1
28 | #include "../../Kerr_rap.h"
29 | 
30 | static int
31 | driver(Lux_spec *s, Lux_args *a)
32 | {
33 | 	lux_print("Kerr driver %p %p\n", s, a);
34 | 
35 | 	return 0;
36 | 
37 | 	(void)s; /* silence unused variable warning */
38 | 	(void)a; /* silence unused variable warning */
39 | }
40 | 
41 | Lux_solution *
42 | LUX_MOD(Lux_problem *prob, unsigned flags)
43 | {
44 | 	Lux_opencl       *ocl  = NULL;
45 | 	struct LuxOopencl opts = OPENCL_NULL;
46 | 
47 | 	Lux_spec *spec1 = mkspec(prob, (prob->n+ 1-1)/ 1,  1);
48 | 	Lux_spec *spec2 = mkspec(prob, (prob->n+ 2-1)/ 2,  2);
49 | 	Lux_spec *spec3 = mkspec(prob, (prob->n+ 4-1)/ 4,  4);
50 | 	Lux_spec *spec4 = mkspec(prob, (prob->n+ 8-1)/ 8,  8);
51 | 	Lux_spec *spec5 = mkspec(prob, (prob->n+16-1)/16, 16);
52 | 	Lux_spec *spec6 = mkspec(prob, (prob->n+32-1)/32, 32);
53 | 	Lux_spec *spec7 = mkspec(prob, (prob->n+64-1)/64, 64);
54 | 
55 | 	Lux_args *args  = mkargs(prob);
56 | 
57 | 	opts.nque = prob->nque;
58 | 	opts.que  = prob->que;
59 | 	ocl = lux_load("opencl", &opts);
60 | 
61 | 	lux_unload(ocl);
62 | 
63 | 	return pvector(
64 | 		Lux_solution,
65 | 		{{{driver, spec1}, args}, {0, 0, 0, 0}},
66 | 		{{{driver, spec2}, args}, {0, 0, 0, 0}},
67 | 		{{{driver, spec3}, args}, {0, 0, 0, 0}},
68 | 		{{{driver, spec4}, args}, {0, 0, 0, 0}},
69 | 		{{{driver, spec5}, args}, {0, 0, 0, 0}},
70 | 		{{{driver, spec6}, args}, {0, 0, 0, 0}},
71 | 		{{{driver, spec7}, args}, {0, 0, 0, 0}}
72 | 	);
73 | 
74 | 	(void)flags; /* silence unused variable warning */
75 | }
76 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GRay2 Documentation {#mainpage}
 2 | 
 3 | GRay2 is a hardware-accelerated geodesic integrator for performing
 4 | general relativistic ray tracing for accreting black holes.
 5 | It is based on the [lux framework](https://luxsrc.org) and runs on a
 6 | wide range of modern hardware/accelerators such as GPUs and Intel&reg;
 7 | Xeon Phi.
 8 | 
 9 | ## For users
10 | 
11 | For people we are interested in using GRay2 as-is, please download a
12 | tarball from GitHub's release page:
13 | 
14 | 	https://github.com/luxsrc/gray/releases
15 | 
16 | Assuming `lux` is installed (see https://github.com/luxsrc/lux), users
17 | can simply run
18 | 
19 | 	make
20 | 
21 | to build GRay2 as a `lux` job and run GRay2 by `lux gray`.
22 | 
23 | ## For developers
24 | 
25 | For people we are interested in contributing to GRay2, please fork
26 | GRay2's git repository on GitHub:
27 | 
28 | 	https://github.com/luxsrc/gray
29 | 
30 | work on your fork, and then create pull request to merge your changes
31 | back to the main repository.
32 | 
33 | GRay2 is flexible and easily extendable.
34 | To turn hard-wired constants into run-time options, follow the
35 | instructions in \ref newopts "this page".
36 | To add new computation kernels to GRay2, see \ref newkern "this page".
37 | We also keep track of a list of TODOs found in the code \ref todo
38 | "here".
39 | 
40 | ## Structure of HDF5 files
41 | 
42 | GRay2 can read spacetime and fluid data from HDF5 files. These files must
43 | be structured in a specific way:
44 | * At the top level, they must contain a group called "grid". This group has to
45 |   contain three datasets named "x", "y", "z", which contains the coordinates
46 |   along the three directions.
47 | * Always at the top level, all the groups that are not named "grid" will be
48 |   considered time levels. The names of such groups has to be their time. For
49 |   example, possible groups names would be "1.0", "1.1", "1.2", ... The group
50 |   "1.0" contains variables at that time. The groups have to be in alphanumerical
51 |   order.
52 | * In each group, the following datasets have to be defined. Gamma_ttt,
53 |   Gamma_ttx, Gamma_tty, Gamma_ttz, Gamma_txx, Gamma_txy, Gamma_txz, Gamma_tyy,
54 |   Gamma_tyz, Gamma_tzz, Gamma_xtt, Gamma_xtx, Gamma_xty, Gamma_xtz, Gamma_xxx,
55 |   Gamma_xxy, Gamma_xxz, Gamma_xyy, Gamma_xyz, Gamma_xzz, Gamma_ytt, Gamma_ytx,
56 |   Gamma_yty, Gamma_ytz, Gamma_yxx, Gamma_yxy, Gamma_yxz, Gamma_yyy, Gamma_yyz,
57 |   Gamma_yzz, Gamma_ztt, Gamma_ztx, Gamma_zty, Gamma_ztz, Gamma_zxx, Gamma_zxy,
58 |   Gamma_zxz, Gamma_zyy, Gamma_zyz, Gamma_zzz, g_tt, g_tx, g_ty, g_tz, g_xx,
59 |   g_xy, g_xz, g_yy, g_yz, g_zz,
60 | * All the variables must have the same precision (e.g., single or double).
61 | 


--------------------------------------------------------------------------------
/sim-org/icond.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2016 Chi-kwan Chan
 3 |  * Copyright (C) 2016 Steward Observatory
 4 |  *
 5 |  * This file is part of GRay2.
 6 |  *
 7 |  * GRay2 is free software: you can redistribute it and/or modify it
 8 |  * under the terms of the GNU General Public License as published by
 9 |  * the Free Software Foundation, either version 3 of the License, or
10 |  * (at your option) any later version.
11 |  *
12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 |  * License for more details.
16 |  *
17 |  * You should have received a copy of the GNU General Public License
18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | #include "gray.h"
21 | #include <stdio.h>
22 | 
23 | static inline size_t
24 | max(size_t a, size_t b)
25 | {
26 | 	return a > b ? a : b;
27 | }
28 | 
29 | void
30 | icond(Lux_job *ego, real t_init)
31 | {
32 | 	Lux_opencl *ocl = EGO->ocl;
33 | 
34 | 	struct icond *i = &EGO->icond;
35 | 	struct param *p = &EGO->param;
36 | 	struct setup *s = &EGO->setup;
37 | 
38 | 	const  size_t sz     = s->precision;
39 | 	const  size_t n_data = EGO->n_coor + EGO->n_freq * 2;
40 | 	const  size_t n_info = EGO->n_info;
41 | 
42 | 	const size_t shape[] = {p->h_rays, p->w_rays};
43 | 
44 | 	Lux_opencl_kernel *icond;
45 | 
46 | 	lux_debug("GRay2: executing job %p\n", ego);
47 | 
48 | 	icond = ocl->mkkern(ocl, "icond_drv");
49 | 
50 | 
51 | 	size_t arg_num = 0;
52 | 
53 | 	icond->setM(icond, arg_num, EGO->data);
54 | 	arg_num++;
55 | 	icond->setM(icond, arg_num, EGO->info);
56 | 	arg_num++;
57 | 
58 | 	icond->setR(icond, arg_num, i->w_img);
59 | 	arg_num++;
60 | 	icond->setR(icond, arg_num, i->h_img);
61 | 	arg_num++;
62 | 	icond->setR(icond, arg_num, i->r_obs);
63 | 	arg_num++;
64 | 	icond->setR(icond, arg_num, i->i_obs);
65 | 	arg_num++;
66 | 	icond->setR(icond, arg_num, i->j_obs);
67 | 	arg_num++;
68 | 
69 | 	icond->setS(icond, arg_num, sz * max(n_data, n_info));
70 | 	arg_num++;
71 | 
72 | 	icond->set(icond, arg_num, sizeof(cl_float8), &(EGO->bounding_box));
73 | 	arg_num++;
74 | 	icond->set(icond, arg_num, sizeof(cl_int4), &(EGO->num_points));
75 | 	arg_num++;
76 | 	/* We have 40 Gammas + 10 metric components + 1 fluid property at t1 */
77 | 	for (size_t old_arg_num = arg_num; arg_num < old_arg_num + 51; arg_num++)
78 | 		icond->setM(icond, arg_num, EGO->spacetime_t1[arg_num-old_arg_num]);
79 | 	/* And here the 40 Gammas + 10 metric components + 1 fluid property at t2 */
80 | 	for (size_t old_arg_num = arg_num; arg_num < old_arg_num + 51; arg_num++)
81 | 		icond->setM(icond, arg_num, EGO->spacetime_t2[arg_num-old_arg_num]);
82 | 
83 | 	(void)ocl->exec(ocl, icond, 2, shape);
84 | 
85 | 	ocl->rmkern(ocl, icond);
86 | }
87 | 


--------------------------------------------------------------------------------
/tests/ck_sample.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan
  3 |  * Copyright (C) 2021 Steward Observatory
  4 |  *
  5 |  * This file is part of GRay2.
  6 |  *
  7 |  * GRay2 is free software: you can redistribute it and/or modify it
  8 |  * under the terms of the GNU General Public License as published by
  9 |  * the Free Software Foundation, either version 3 of the License, or
 10 |  * (at your option) any later version.
 11 |  *
 12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 15 |  * License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | #include <lux.h>
 21 | #include <lux/estimate.h>
 22 | #include <lux/measure.h>
 23 | #include <lux/planner.h>
 24 | #include <lux/pvector.h>
 25 | #include <lux/solver.h>
 26 | #include <stdlib.h>
 27 | #include <stdio.h>
 28 | 
 29 | #define LUX_RAP_CASTING 1
 30 | #include "sample_rap.h"
 31 | 
 32 | static double test_alpha = 3.0;
 33 | 
 34 | static void
 35 | test_init(Lux_problem *p)
 36 | {
 37 | 	int i;
 38 | 	for(i = 0; i < (int)p->n; ++i) {
 39 | 		p->x[i] = i;
 40 | 		p->y[i] = i * 2.0;
 41 | 	}
 42 | }
 43 | 
 44 | static int
 45 | test_check(Lux_problem *p)
 46 | {
 47 | 	int failed = 0, i;
 48 | 	for(i = 0; i < (int)p->n; ++i)
 49 | 		if(p->z[i] != 7.0 * i)
 50 | 			failed = 1;
 51 | 	return failed;
 52 | }
 53 | 
 54 | int
 55 | main(int argc, char *argv[])
 56 | {
 57 | 	int failed = 0;
 58 | 
 59 | 	Lux_problem   prob;
 60 | 	Lux_solver   *solve;
 61 | 	Lux_solution *sols;
 62 | 	size_t        i, n;
 63 | 
 64 | 	double m_best = HUGE_VAL;
 65 | 	size_t i_best = 0;
 66 | 
 67 | 	prob.n     = 1024 * 1024;
 68 | 	prob.alpha = test_alpha;
 69 | 	prob.x     = malloc(sizeof(double) * prob.n);
 70 | 	prob.y     = malloc(sizeof(double) * prob.n);
 71 | 	prob.z     = malloc(sizeof(double) * prob.n);
 72 | 
 73 | 	lux_setup(&argc, &argv);
 74 | 
 75 | 	lux_print("1. Load solvers from the current directory into planner ... ");
 76 | 	solve = lux_load("sample", NULL);
 77 | 	lux_print("%p DONE\n", solve);
 78 | 
 79 | 	lux_print("2. Solve the problem... ");
 80 | 	sols = solve(&prob, LUX_PLAN_EXHAUSTIVE);
 81 | 	n    = pgetn(sols, 0);
 82 | 	lux_print("%p; %zu solutions DONE\n", sols, n);
 83 | 
 84 | 	lux_print("3. Estimate performance for the solutions ...\n");
 85 | 	for(i = 0; i < n; ++i) {
 86 | 		double e = estimate(&sols[i].opcnt);
 87 | 		lux_print("   * Solution %zu, estimated cost = %g\n", i, e);
 88 | 	}
 89 | 	lux_print("   DONE\n");
 90 | 
 91 | 	lux_print("4. Measure performance for the solutions ...\n");
 92 | 	for(i = 0; i < n; ++i) {
 93 | 		Lux_task *t = mkluxbasetask(sols[i].task);
 94 | 		double    m = measure(t);
 95 | 		free(t);
 96 | 		lux_print("   * Solution %zu, measured cost = %g\n", i, m);
 97 | 		if(m_best > m) {
 98 | 			m_best = m;
 99 | 			i_best = i;
100 | 		}
101 | 	}
102 | 	lux_print("   DONE\n");
103 | 
104 | 	lux_print("5. Run the optimal solutoin %zu ... ", i_best);
105 | 	{
106 | 		Lux_task *t;
107 | 
108 | 		test_init(&prob);
109 | 
110 | 		t = mkluxbasetask(sols[i_best].task);
111 | 		t->exec(t);
112 | 		free(t);
113 | 
114 | 		failed = test_check(&prob);
115 | 	}
116 | 	if(failed) {
117 | 		lux_print("FAILED\n");
118 | 		lux_abort();
119 | 	} else
120 | 		lux_print("DONE\n");
121 | 
122 | 	lux_print("6. Free the solutions ... ");
123 | 	for(i = 0; i < n; ++i)
124 | 		free(sols[i].task.algo.spec);
125 | 	free(sols[0].task.args);
126 | 	pfree(sols);
127 | 	lux_print("DONE\n");
128 | 
129 | 	lux_print("7. Unload the solver ... ");
130 | 	lux_unload(solve);
131 | 	lux_print("DONE\n");
132 | 
133 | 	free(prob.x);
134 | 	free(prob.y);
135 | 	free(prob.z);
136 | 
137 | 	return failed;
138 | }
139 | 


--------------------------------------------------------------------------------
/sim-org/gray.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2016 Chi-kwan Chan
  3 |  * Copyright (C) 2016 Steward Observatory
  4 |  *
  5 |  * This file is part of GRay2.
  6 |  *
  7 |  * GRay2 is free software: you can redistribute it and/or modify it
  8 |  * under the terms of the GNU General Public License as published by
  9 |  * the Free Software Foundation, either version 3 of the License, or
 10 |  * (at your option) any later version.
 11 |  *
 12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 15 |  * License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | /** \file
 22 |  **
 23 |  ** Data structure definitions and function declarations for GRay2
 24 |  **
 25 |  ** GRay2 is implemented as a lux module.  Its run-time data is stored
 26 |  ** in a subclass of Lux_job, which is defined in this header file.
 27 |  ** Additional structure that holds run-time adjustable parameters,
 28 |  ** constructor, destructor, internal functions, and standard methods
 29 |  ** in Lux_job, are all declared here as well.
 30 |  **/
 31 | #ifndef _GRAY_H_
 32 | #define _GRAY_H_
 33 | 
 34 | #include <lux.h>
 35 | #include <lux/check.h>
 36 | #include <lux/job.h>
 37 | #include <lux/numeric.h>
 38 | #include <lux/opencl.h>
 39 | #include <lux/strutils.h>
 40 | 
 41 | #include "icond.h"
 42 | #include "param.h"
 43 | #include "setup.h"
 44 | 
 45 | /* Max number of times in the HDF5 file and max length of the group
 46 |  * name in the files */
 47 | #define MAX_AVAILABLE_TIMES 1024
 48 | #define MAX_TIME_NAME_LENGTH 64
 49 | 
 50 | /**
 51 |  ** Run-time data structure for GRay2
 52 |  **
 53 |  ** To take advantage of all the low level features provided by lux,
 54 |  ** GRay2 is implemented as a lux module.  Its runtime data is stored
 55 |  ** in a subclass of Lux_job so that it can be loaded by the lux
 56 |  ** runtime.
 57 |  **/
 58 | struct gray {
 59 | 	Lux_job super;
 60 | 
 61 | 	struct icond icond;
 62 | 	struct param param;
 63 | 	struct setup setup;
 64 | 
 65 | 	size_t n_coor;
 66 | 	size_t n_freq;
 67 | 	size_t n_info;
 68 | 
 69 | 	Lux_opencl *ocl;
 70 | 	cl_mem data;
 71 | 	cl_mem info;
 72 | 	Lux_opencl_kernel *evolve;
 73 | 
 74 | 	/* Grid details */
 75 | 	/* Bounding_box is a vector with 8 numbers:
 76 | 	 * {tmin, xmin, ymin, zmin, tmax, xmax, ymax, zmax} */
 77 | 	/* tmin and tmax are between the two lodaded timesteps */
 78 | 
 79 | 	/* We need these quantities to convert from unnormalized OpenCL coordiantes
 80 | 	   to physical coordiantes and viceversa. */
 81 | 	cl_float8 bounding_box;
 82 | 	/* Points along the various coordinates */
 83 | 	cl_int4 num_points;			/* The .s0 coordinate is not used */
 84 | 	/* num_points.s1 contains point along the x direction */
 85 | 	/* num_points.s2 contains point along the y direction */
 86 | 	/* num_points.s3 contains point along the z direction */
 87 | 
 88 | 	/* We need 40+10+1 == 51 images to contain all the 40 christoffel
 89 | 	   symbols, 10 metric components, and 1 fluid quality */
 90 | 
 91 | 	/* We always have two timesteps loaded */
 92 | 	cl_mem spacetime_t1[40+10+1];
 93 | 	cl_mem spacetime_t2[40+10+1];
 94 | 
 95 | 	char available_times[MAX_AVAILABLE_TIMES][MAX_TIME_NAME_LENGTH];
 96 | 
 97 | 	cl_float max_available_time;
 98 | 
 99 | };
100 | 
101 | #define EGO ((struct gray *)ego)
102 | #define CKR lux_check_func_success
103 | 
104 | /** Build the OpenCL module for GRay2 */
105 | extern Lux_opencl *build(Lux_job *);
106 | 
107 | /** Set the initial conditions */
108 | extern void icond(Lux_job *, real);
109 | 
110 | /** Evolve the states of photons to the next (super) step */
111 | extern real evolve(Lux_job *, real, real, size_t);
112 | 
113 | /** Output data to a file */
114 | extern void dump(Lux_job *, size_t);
115 | 
116 | /** I/O helper functions */
117 | extern size_t populate_ego_available_times(Lux_job *);
118 | extern size_t load_coordinates(Lux_job *);
119 | extern size_t load_snapshot(Lux_job *, size_t, size_t);
120 | extern void   copy_snapshot(Lux_job *, size_t);
121 | 
122 | #endif /* _GRAY_H */
123 | 


--------------------------------------------------------------------------------
/sim-org/driver.cl:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2016 Chi-kwan Chan
  3 |  * Copyright (C) 2016 Steward Observatory
  4 |  *
  5 |  * This file is part of GRay2.
  6 |  *
  7 |  * GRay2 is free software: you can redistribute it and/or modify it
  8 |  * under the terms of the GNU General Public License as published by
  9 |  * the Free Software Foundation, either version 3 of the License, or
 10 |  * (at your option) any later version.
 11 |  *
 12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 15 |  * License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | /** \file
 22 |  ** Generic driver kernels
 23 |  **
 24 |  ** GRay2 uses OpenCL's just-in-time compilation feature to implement
 25 |  ** run-time configurable algorithms.  In this file we implement
 26 |  ** generic driver kernels icond_drv() and evolve_drv() that uses
 27 |  ** IDX(h, k) to access global memory.
 28 |  **
 29 |  ** We use the index convention `h`, `i`, `j`, `k` for time and the
 30 |  ** three spactial coordinates, respectively.  We use `s` to index the
 31 |  ** record/field.  These indices may be prefixed by `g` for global
 32 |  ** indices, `l` for local indices, etc.
 33 |  **/
 34 | 
 35 | /** OpenCL driver kernel for initializing states */
 36 | __kernel void
 37 | icond_drv(__global real *data,  /**< states of the rays     */
 38 |           __global real *info,  /**< diagnostic information */
 39 |           const    real  w_img, /**< Width  of the image in \f$GM/c^2\f$ */
 40 |           const    real  h_img, /**< Height of the image in \f$GM/c^2\f$ */
 41 |           const    real  r_obs, /**< Distance of the image from the black hole */
 42 |           const    real  i_obs, /**< Inclination angle of the image in degrees */
 43 |           const    real  j_obs, /**< Azimuthal   angle of the image in degrees */
 44 |           __local  real *scratch,
 45 |           SPACETIME_PROTOTYPE_ARGS)
 46 | {
 47 | 	const size_t gj = get_global_id(0); /* for h, slowest changing index */
 48 | 	const size_t gi = get_global_id(1); /* for w, fastest changing index */
 49 | 	const size_t g  = gi + gj * w_rays;
 50 | 
 51 | 	if(gi < w_rays && gj < h_rays) {
 52 | 		struct state d;
 53 | 		int s;
 54 | 
 55 | 		/* Compute initial conditions from parameters */
 56 | 		real alpha = ((gi + 0.5) / w_rays - 0.5) * w_img;
 57 | 		real beta  = ((gj + 0.5) / h_rays - 0.5) * h_img;
 58 | 		d = icond(r_obs, i_obs, j_obs, alpha, beta);
 59 | 
 60 | 		/* Output to global array */
 61 | 		for(s = 0; s < n_data; ++s)
 62 | 			DATA(g, s) = ((real *)&d)[s];
 63 | 
 64 | 		for(s = 0; s < n_info; ++s)
 65 | 			INFO(g, s) = getuu(d.g, SPACETIME_ARGS);
 66 | 	}
 67 | }
 68 | 
 69 | /** OpenCL driver kernel for integrating the geodesic equations */
 70 | __kernel void
 71 | evolve_drv(__global real *data,  /**< states of the rays     */
 72 |            __global real *info,  /**< diagnostic information */
 73 |            const    real  dt,    /**< step size              */
 74 |            const    whole n_sub, /**< number of sub-steps    */
 75 |            __local  real *scratch,
 76 |            SPACETIME_PROTOTYPE_ARGS)
 77 | {
 78 | 	const size_t gj = get_global_id(0); /* for h, slowest changing index */
 79 | 	const size_t gi = get_global_id(1); /* for w, fastest changing index */
 80 | 	const size_t g  = gi + gj * w_rays;
 81 | 	const int    n  = (INT_MAX / n_sub) * n_sub;
 82 | 
 83 | 	if(gi < w_rays && gj < h_rays) {
 84 | 		struct state d;
 85 | 		int s, h, dh;
 86 | 
 87 | 		/* Input from global array */
 88 | 		for(s = 0; s < n_data; ++s)
 89 | 			((real *)&d)[s] = DATA(g, s);
 90 | 
 91 | 		/* Substepping */
 92 | 		for(h = 0; h < n; h += dh) {
 93 | 			dh = getdt(d.g, dt/n_sub) / dt * n;
 94 | 			if(!dh)
 95 | 				break;
 96 | 			if(dh > n - h)
 97 | 				dh = n - h;
 98 | 			d = integrate(d, dh * dt / n, SPACETIME_ARGS);
 99 | 		}
100 | 
101 | 		/* Output to global array */
102 | 		for(s = 0; s < n_data; ++s)
103 | 			DATA(g, s) = ((real *)&d)[s];
104 | 
105 | 		for(s = 0; s < n_info; ++s)
106 | 			INFO(g, s) = getuu(d.g, SPACETIME_ARGS);
107 | 	}
108 | }
109 | 


--------------------------------------------------------------------------------
/sim-org/build.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2016 Chi-kwan Chan
  3 |  * Copyright (C) 2016 Steward Observatory
  4 |  *
  5 |  * This file is part of GRay2.
  6 |  *
  7 |  * GRay2 is free software: you can redistribute it and/or modify it
  8 |  * under the terms of the GNU General Public License as published by
  9 |  * the Free Software Foundation, either version 3 of the License, or
 10 |  * (at your option) any later version.
 11 |  *
 12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 15 |  * License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | #include "gray.h"
 21 | 
 22 | #include <stdio.h>
 23 | 
 24 | static inline size_t
 25 | min(size_t a, size_t b)
 26 | {
 27 | 	return a < b ? a : b;
 28 | }
 29 | 
 30 | Lux_opencl *
 31 | build(Lux_job *ego)
 32 | {
 33 | 	/** \page newkern New OpenCL Kernels
 34 | 	 **
 35 | 	 ** Extend GRay2 by adding new OpenCL kernels
 36 | 	 **
 37 | 	 ** GRay2 uses the just-in-time compilation feature of OpenCL
 38 | 	 ** to build computation kernels at run-time.  Most of the low
 39 | 	 ** level OpenCL codes are actually in a lux module called
 40 | 	 ** "opencl".  GRay2 developers simply need to load this
 41 | 	 ** module with a list of OpenCL source codes, e.g.,
 42 | 	 ** \code{.c}
 43 | 	 **   const char *buf   = "static constant real a_spin = 0.999;\n";
 44 | 	 **   const char *src[] = {buf, "KS", "RK4", "AoS", NULL};
 45 | 	 **   const char *flags = "-cl-mad-enable";
 46 | 	 **   struct LuxOopencl otps = {..., flags, src};
 47 | 	 **   Lux_opencl *ocl = lux_load("opencl", &opts);
 48 | 	 ** \endcode
 49 | 	 ** and then an OpenCL kernel can be obtained and run by
 50 | 	 ** \code{.c}
 51 | 	 **   Lux_opencl_kernel *icond = ocl->mkkern(ocl, "icond_drv");
 52 | 	 **   ...
 53 | 	 **   ocl->exec(ocl, icond, ...);
 54 | 	 ** \endcode
 55 | 	 ** Therefore, with this powerful lux module, it is
 56 | 	 ** straightforward to add a new OpenCL kernels to GRay2:
 57 | 	 **
 58 | 	 ** -# Name the OpenCL source code with an extension ".cl" and
 59 | 	 **    add it to the "sim/" source code folder.
 60 | 	 ** -# In "sim/Makefile.am", append the new file name to
 61 |                dist_krn_DATA.
 62 | 	 ** -# Add new code to the C files in "sim" to use the new
 63 | 	 **    kernel, or make the new source code default in
 64 | 	 **    "sim/gray.c" if necessary.
 65 | 	 **
 66 | 	 ** Note that, however, the developer is responsible to make
 67 | 	 ** sure that the new OpenCL source code is compatible with
 68 | 	 ** other OpenCL codes.  This is because GRay2 place all the
 69 | 	 ** OpenCL codes together and build them as a single program.
 70 | 	 **/
 71 | 	struct LuxOopencl opts = OPENCL_NULL;
 72 | 
 73 | 	struct icond *i = &EGO->icond;
 74 | 	struct param *p = &EGO->param;
 75 | 	struct setup *s = &EGO->setup;
 76 | 
 77 | 	const size_t n_data  = EGO->n_coor + EGO->n_freq * 2;
 78 | 	const size_t n_info  = 1;
 79 | 	const size_t e_chunk = min(16, n_data & ~(n_data-1)); /* number of real elements in chunk */
 80 | 	const size_t n_chunk = n_data / e_chunk;              /* number of chunks */
 81 | 	size_t j;
 82 | 
 83 | 	char lst[10240], *tail;
 84 | 	char buf[10240];
 85 | 
 86 | 	const char *src[] = {buf,
 87 |                          "preamble.cl",
 88 |                          "interp.cl",
 89 | 	                     p->coordinates,
 90 | 	                     "flow.cl",
 91 | 	                     "rt.cl",
 92 | 	                     s->morder,
 93 | 	                     "phys.cl",
 94 | 	                     s->scheme,
 95 | 	                     "driver.cl",
 96 | 	                     NULL};
 97 | 
 98 | 	for(j = 0, tail = lst; j < EGO->n_freq; ++j) {
 99 | 		sprintf(tail, "%.18e,", EGO->param.nu[j]);
100 | 		tail = lst + strlen(lst);
101 | 	}
102 | 	if(EGO->n_freq)
103 | 		tail[-1] = '\0';
104 | 
105 | 	snprintf(buf, sizeof(buf),
106 | 	         "#define n_freq %zu\n"
107 | 	         "#define n_data %zu\n"
108 | 	         "#define n_info %zu\n"
109 | 	         "#define n_rays %zu\n"
110 | 	         "#define w_rays %zu\n"
111 | 	         "#define h_rays %zu\n"
112 | 	         "#define n_chunk %zu\n"
113 | 	         "typedef real%zu realE;\n"
114 | 	         "static __constant real nus[n_freq] = {%s};\n"
115 | 	         "static __constant real M_ADM = %.18e;\n",
116 | 	         EGO->n_freq,
117 | 	         n_data,
118 | 	         n_info,
119 | 	         p->h_rays * p->w_rays,
120 | 	         p->w_rays,
121 | 	         p->h_rays,
122 | 	         n_chunk,
123 | 	         e_chunk,
124 | 	         lst,
125 | 	         p->M_ADM);
126 | 
127 | 	lux_print("n_data  = %zu\n"
128 | 	          "n_info  = %zu\n"
129 | 	          "e_chunk = %zu\n",
130 | 	          n_data,
131 | 	          n_info,
132 | 	          e_chunk);
133 | 
134 | 	opts.base    = build; /* this function */
135 | 	opts.iplf    = s->i_platform;
136 | 	opts.idev    = s->i_device;
137 | 	opts.devtype = s->device_type;
138 | 	opts.realsz  = s->precision;
139 | 	opts.flags   = s->kflags;
140 | 	opts.src     = src;
141 | 
142 | 	return lux_load("opencl", &opts);
143 | }
144 | 


--------------------------------------------------------------------------------
/sim/gray.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan
  3 |  * Copyright (C) 2021 Steward Observatory
  4 |  *
  5 |  * This file is part of GRay2.
  6 |  *
  7 |  * GRay2 is free software: you can redistribute it and/or modify it
  8 |  * under the terms of the GNU General Public License as published by
  9 |  * the Free Software Foundation, either version 3 of the License, or
 10 |  * (at your option) any later version.
 11 |  *
 12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 15 |  * License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #include "gray.h"
 22 | 
 23 | #include <lux/hdf5.h>
 24 | #include <lux/mangle.h>
 25 | #include <lux/planner.h>
 26 | #include <lux/switch.h>
 27 | #include <lux/zalloc.h>
 28 | 
 29 | #include <stdio.h>
 30 | 
 31 | #include "Kerr_rap.h"
 32 | 
 33 | #define EGO ((struct gray *)ego)
 34 | 
 35 | #define MATCH(opt, str) CASE(!strcmp(EGO->opt, str))
 36 | 
 37 | static int
 38 | conf(Lux_job *ego, const char *restrict arg)
 39 | {
 40 | 	const char *spacetime_org = EGO->gray.spacetime;
 41 | 	const char *initcond_org  = EGO->gray.initcond;
 42 | 	int status;
 43 | 
 44 | 	lux_debug("GRay2: configuring instance %p with \"%s\"\n", ego, arg);
 45 | 
 46 | 	status = gray_config(&EGO->gray, arg);
 47 | 
 48 | 	/* TODO: take full advantage of dynamic module and avoid switch */
 49 | 	SWITCH {
 50 | 	MATCH(gray.spacetime, "Kerr")
 51 | 		if(EGO->gray.spacetime != spacetime_org)
 52 | 			Kerr_init(&EGO->spacetime.Kerr);
 53 | 		else if(status)
 54 | 			status = Kerr_config(&EGO->spacetime.Kerr, arg);
 55 | 	DEFAULT
 56 | 		lux_fatal("Unknown spacetime configuration \"%s\"\n",
 57 | 		          EGO->gray.spacetime);
 58 | 	}
 59 | 
 60 | 	/* TODO: take full advantage of dynamic module and avoid switch */
 61 | 	SWITCH {
 62 | 	MATCH(gray.initcond, "infcam")
 63 | 		if(EGO->gray.initcond != initcond_org)
 64 | 			infcam_init(&EGO->initcond.infcam);
 65 | 		else if(status)
 66 | 			status = infcam_config(&EGO->initcond.infcam, arg);
 67 | 	DEFAULT
 68 | 		lux_fatal("Unknown initial conditions for rays \"%s\"\n",
 69 | 		          EGO->gray.initcond);
 70 | 	}
 71 | 
 72 | 	return status;
 73 | }
 74 | 
 75 | static int
 76 | init(Lux_job *ego)
 77 | {
 78 | 	Lux_planner       *gi = NULL;
 79 | 	Lux_gray_initcond *ic = NULL;
 80 | 
 81 | 	lux_debug("GRay2: initializing instance %p\n", ego);
 82 | 
 83 | 	EGO->t  = EGO->gray.t_init;
 84 | 	EGO->dt = EGO->gray.dt_dump;
 85 | 	EGO->i  = EGO->gray.i_init;
 86 | 	EGO->n  = EGO->gray.n_dump;
 87 | 
 88 | 	lux_print("GRay2:init: setup opencl module\n");
 89 | 	{
 90 | 		struct LuxOopencl opts = OPENCL_NULL;
 91 | 		opts.iplf    = EGO->gray.i_platform;
 92 | 		opts.idev    = EGO->gray.i_device;
 93 | 		opts.devtype = EGO->gray.device_type;
 94 | 		EGO->ocl = lux_load("opencl", &opts);
 95 | 	}
 96 | 
 97 | 	EGO->io = lux_load("hdf5", NULL);
 98 | 
 99 | 	lux_print("GRay2:init: initcond:ic: %s\n", EGO->gray.initcond);
100 | 	{
101 | 		Lux_gray_initcond_opts opts = {
102 | 			EGO->ocl->nque,
103 | 			EGO->ocl->que,
104 | 			&EGO->initcond
105 | 		};
106 | 
107 | 		char buf[256];
108 | 		sprintf(buf, "sim/gray/%s", EGO->gray.initcond);
109 | 
110 | 		ic = lux_load(buf, &opts);
111 | 		if(!ic)
112 | 			return -1;
113 | 	}
114 | 
115 | 	lux_print("GRay2:init: allocate memory\n");
116 | 	EGO->rays      = dmk(EGO->ocl, real[8], ic->n_width * ic->n_height);
117 | 	EGO->rays_host = palloc(real, ic->n_width, ic->n_height, 8);
118 | 
119 | 	lux_print("GRay2:init: initialize rays\n");
120 | 	(void)ic->init(ic, EGO->rays.data);
121 | 
122 | 	lux_print("GRay2:init: spacetime:st: %s\n", EGO->gray.spacetime);
123 | 	/* TODO: take full advantage of dynamic module and avoid switch */
124 | 	SWITCH {
125 | 	MATCH(gray.spacetime, "Kerr")
126 | 		Lux_Kerr_problem prob = {
127 | 			EGO->ocl->nque,
128 | 			EGO->ocl->que,
129 | 			dgetn(EGO->rays, 0),
130 | 			EGO->spacetime.Kerr.a_spin,
131 | 			-1.0,
132 | 			EGO->rays.data
133 | 		};
134 | 
135 | 		char buf[256];
136 | 		sprintf(buf, "sim/gray/%s", EGO->gray.spacetime);
137 | 		gi = lux_load("planner", buf);
138 | 
139 | 		EGO->gi = gi->plan(gi, (Lux_problem *)&prob, LUX_PLAN_DEFAULT);
140 | 	DEFAULT
141 | 		lux_fatal("Unknown spacetime configuration \"%s\"\n",
142 | 		          EGO->gray.spacetime);
143 | 	}
144 | 
145 | 	if(gi)
146 | 		lux_unload(gi);
147 | 	if(ic)
148 | 		lux_unload(ic);
149 | 
150 | 	return 0;
151 | }
152 | 
153 | static int
154 | exec(Lux_job *ego)
155 | {
156 | 	lux_debug("GRay2: executing instance %p\n", ego);
157 | 
158 | 	while(EGO->i < EGO->n) {
159 | 		size_t next   = EGO->i + 1;
160 | 		double t      = EGO->t;
161 | 		double target = EGO->dt * next;
162 | 
163 | 		Lux_file *file;
164 | 		char buf[256];
165 | 
166 | 		lux_print("%zu: %4.1f -> %4.1f", next, t, target);
167 | 
168 | 		/* TODO: EGO->gi->exec(EGO->gi); */
169 | 
170 | 		EGO->ocl->d2h(EGO->ocl,
171 | 			EGO->rays_host,
172 | 			EGO->rays.data, dope_getsz(EGO->rays.dope));
173 | 
174 | 		sprintf(buf, EGO->gray.rayfile, next);
175 | 		file = EGO->io(buf, H5F_ACC_EXCL);
176 | 		file->write_pa(file, "/rays", typecodeof(real), EGO->rays_host);
177 | 
178 | 		lux_print(": DONE\n");
179 | 
180 | 		EGO->i = next;
181 | 		EGO->t = target;
182 | 	}
183 | 
184 | 	return 0;
185 | }
186 | 
187 | void *
188 | LUX_MKMOD(const void *opts)
189 | {
190 | 	void *ego;
191 | 
192 | 	lux_debug("GRay2: constructing an instance with options %p\n", opts);
193 | 
194 | 	ego = zalloc(sizeof(struct gray));
195 | 	if(ego) {
196 | 		EGO->super.conf = conf;
197 | 		EGO->super.init = init;
198 | 		EGO->super.exec = exec;
199 | 
200 | 		gray_init(&EGO->gray);
201 | 		infcam_init(&EGO->initcond.infcam);
202 | 	}
203 | 	return ego;
204 | }
205 | 
206 | void
207 | LUX_RMMOD(void *ego)
208 | {
209 | 	lux_debug("GRay2: destructing instance %p\n", ego);
210 | 
211 | 	pfree(EGO->rays_host);
212 | 	drm(EGO->ocl, EGO->rays);
213 | 	lux_unload(EGO->ocl);
214 | 
215 | 	free(ego);
216 | }
217 | 


--------------------------------------------------------------------------------
/tools/generate_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Copyright (C) 2020-2021 Gabriele Bozzola
  4 | #
  5 | # This program is free software; you can redistribute it and/or modify it under the terms
  6 | # of the GNU General Public License as published by the Free Software Foundation; either
  7 | # version 3 of the License, or (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | # PARTICULAR PURPOSE. See the GNU General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU General Public License along with this
 14 | # program; if not, see <https://www.gnu.org/licenses/>.
 15 | 
 16 | import concurrent.futures
 17 | 
 18 | import h5py
 19 | import numpy as np
 20 | 
 21 | from boosted_ks import Christoffel, metric
 22 | 
 23 | """Prepare a HDF5 file in the format required by GRay2 containing spacetime and fluid
 24 | information. The output file will have multiple HDF5 groups, one called 'grid' contains
 25 | the coordinates, the other ones have as name the timestep at which the variables are
 26 | defined. Each of these groups contain numerous datasets, one for each variable."""
 27 | 
 28 | # User controllable parameters
 29 | 
 30 | # KS parameters
 31 | a_spin = 0.6
 32 | boostv = 0.5
 33 | 
 34 | # File parameters
 35 | output_file = "data.h5"
 36 | times = ["0"]
 37 | precision = np.single
 38 | num_points_x, num_points_y, num_points_z = 10, 12, 14
 39 | xmin, xmax = -300, 300
 40 | ymin, ymax = -400, 400
 41 | zmin, zmax = -600, 600
 42 | # If num_worker is not None, use this many CPUs for the following computations. If it is
 43 | # None, use as many as possible.
 44 | num_workers = None
 45 | 
 46 | 
 47 | _dims = ("t", "x", "y", "z")
 48 | 
 49 | def fisheye(coord, min_, max_, num_points):
 50 |     """Fisheye transformation.
 51 | 
 52 |     Takes in the logically-Cartesian coordinates, and returns the corresponding fisheye
 53 |     coordinates as defined from min_ to max_.
 54 | 
 55 |     """
 56 |     B = np.cbrt(np.arcsinh(min_))
 57 |     A = (np.cbrt(np.arcsinh(max_)) - B) / (num_points - 1)
 58 |     return np.sinh((A * coord + B) ** 3)
 59 | 
 60 | 
 61 | # Values 0, 1, 2, 3, .... num_points - 1
 62 | cart_x = np.linspace(0, num_points_x - 1, num_points_x, dtype=precision)
 63 | cart_y = np.linspace(0, num_points_y - 1, num_points_y, dtype=precision)
 64 | cart_z = np.linspace(0, num_points_z - 1, num_points_z, dtype=precision)
 65 | 
 66 | # Physical coordinates
 67 | xx = fisheye(cart_x, xmin, xmax, num_points_x)
 68 | yy = fisheye(cart_y, ymin, ymax, num_points_y)
 69 | zz = fisheye(cart_z, zmin, zmax, num_points_z)
 70 | 
 71 | # Now we have to prepare all the variables.
 72 | 
 73 | # This can be computationally expensive, so we are going to distribute the computation on
 74 | # as many workers as we can.
 75 | 
 76 | # Gamma dict is a dictionary with keys the times and values another dictionary that has
 77 | # as keys the indices and as values the Christoffel symbols. Similarly, metric dict. The
 78 | # other dicts have only one level.
 79 | Gamma_dict = {}
 80 | metric_dict = {}
 81 | 
 82 | fluid_vars = ['rho']
 83 | # The fluid variables have to follow this naming convention in this file. They have to
 84 | # be called name_dict, where name is one of those that enter fluid_vars.
 85 | rho_dict = {}
 86 | 
 87 | # Not very Pythonic
 88 | indices = []
 89 | indices_metric = []
 90 | for i in range(4):
 91 |     for j in range(4):
 92 |         for k in range(j, 4):
 93 |             indices += [(i, j, k)]
 94 |             if i == 0:
 95 |                 indices_metric += [(j, k)]
 96 | 
 97 | for time in times:
 98 |     print(f"Working on time {time}")
 99 | 
100 |     def compute_Gamma(ind):
101 |         """Compute the Christoffel symbol with given indices."""
102 |         return [[[Christoffel((1, a_spin, boostv), (float(time), x, y, z), *ind)
103 |                   for x in xx] for y in yy] for z in zz]
104 | 
105 |     def compute_metric(ind):
106 |         """Compute the metric component with given indices."""
107 |         return [[[metric((1, a_spin, boostv), (float(time), x, y, z), *ind)
108 |                   for x in xx] for y in yy] for z in zz]
109 | 
110 |     Gamma_dict[time] = {}
111 |     metric_dict[time] = {}
112 | 
113 |     # Do the actual computation
114 |     with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as exe:
115 |         for index, Gamma in zip(indices, exe.map(compute_Gamma, indices)):
116 |             Gamma_dict[time][index] = Gamma
117 |     print("Computed Gammas")
118 | 
119 |     with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as exe:
120 |         for index, met in zip(indices_metric, exe.map(compute_metric, indices_metric)):
121 |             metric_dict[time][index] = met
122 |     print("Computed metric")
123 | 
124 |     def compute_rho(x, y, z):
125 |         """Compute the density as 1/r."""
126 |         return 1/np.sqrt(x*x + y*y + z*z)
127 | 
128 |     rho_dict[time] = [[[compute_rho(x, y, z) for x in xx] for y in yy] for z in zz]
129 |     print("Computed rho")
130 | 
131 | 
132 | with h5py.File(output_file, "w") as f:
133 |     grid_group = f.create_group("grid")
134 |     grid_group.create_dataset("x", data=xx)
135 |     grid_group.create_dataset("y", data=yy)
136 |     grid_group.create_dataset("z", data=zz)
137 |     for time in times:
138 |         it_group = f.create_group(time)
139 |         for ind in indices:
140 |             i, j, k = ind
141 |             name = f"Gamma_{_dims[i]}{_dims[j]}{_dims[k]}"
142 |             data = Gamma_dict[time][(i, j, k)]
143 |             data = np.nan_to_num(data)
144 |             data = data.astype(precision)
145 |             it_group.create_dataset(name, data=data)
146 |         for ind in indices_metric:
147 |             i, j = ind
148 |             name = f"g_{_dims[i]}{_dims[j]}"
149 |             data = metric_dict[time][(i, j)]
150 |             data = np.nan_to_num(data)
151 |             data = data.astype(precision)
152 |             it_group.create_dataset(name, data=data)
153 |         # Fluid variables
154 |         for var in fluid_vars:
155 |             name = var
156 |             # We read the _dict variables from the global namespace
157 |             data = globals()[f"{var}_dict"][time]
158 |             data = np.nan_to_num(data)
159 |             data = data.astype(precision)
160 |             it_group.create_dataset(name, data=data)
161 | 


--------------------------------------------------------------------------------
/sim-org/rt.cl:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2016 Chi-kwan Chan
  3 |  * Copyright (C) 2016 Steward Observatory
  4 |  *
  5 |  * This file is part of GRay2.
  6 |  *
  7 |  * GRay2 is free software: you can redistribute it and/or modify it
  8 |  * under the terms of the GNU General Public License as published by
  9 |  * the Free Software Foundation, either version 3 of the License, or
 10 |  * (at your option) any later version.
 11 |  *
 12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 15 |  * License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | /** \file
 22 |  ** Radiative transfer
 23 |  **
 24 |  ** Radiative transfer related functions such as the emission and
 25 |  ** extinction (absorption) coefficients.
 26 |  **/
 27 | 
 28 | #define CONST_c     K(2.99792458e+10)
 29 | #define CONST_h     K(6.62606957e-27)
 30 | #define CONST_G     K(6.67384800e-08)
 31 | #define CONST_kB    K(1.38064881e-16)
 32 | #define CONST_Ry    K(2.17987197e-11)
 33 | #define CONST_e     K(4.80320425e-10)
 34 | #define CONST_me    K(9.10938291e-28)
 35 | #define CONST_mp_me K(1836.152672450)
 36 | #define CONST_mSun  K(1.98910000e+33)
 37 | 
 38 | #define M_PI    K(3.14159265358979323846)
 39 | #define M_SQRT2 K(1.41421356237309504880)
 40 | 
 41 | #define T_MIN K(1e-1)
 42 | #define T_MAX K(1e+2)
 43 | #define T_GRID (60)
 44 | 
 45 | #define LOG(x)    log(x)    /* \todo Select the right precision for log()  */
 46 | #define SQRT(x)   sqrt(x)   /* \todo Select the right precision for sqrt() */
 47 | #define CBRT(x)   cbrt(x)   /* \todo Select the right precision for cbrt() */
 48 | #define POW(x, y) pow(x, y) /* \todo Select the right precision for pow()  */
 49 | #define EXP(x)    exp(x)    /* \todo Select the right precision for exp()  */
 50 | 
 51 | static __constant real log_K2it_tab[] = {
 52 | 	-10.747001122, -9.5813378172, -8.5317093904, -7.5850496322,
 53 | 	-6.7296803564, -5.9551606678, -5.2521532618, -4.6123059955,
 54 | 	-4.0281471473, -3.4929929282, -3.0008659288, -2.5464232845,
 55 | 	-2.1248934192, -1.7320202979, -1.3640141782, -1.0175079137,
 56 | 	-0.6895179334, -0.3774091024, -0.0788627660, +0.2081526098,
 57 | 	+0.4854086716, +0.7544426322, +1.0165811787, +1.2729629642,
 58 | 	+1.5245597366, +1.7721960959, +2.0165678441, +2.2582588804,
 59 | 	+2.4977566043, +2.7354658112, +2.9717210921, +3.2067977811,
 60 | 	+3.4409215189, +3.6742765257, +3.9070126886, +4.1392515843,
 61 | 	+4.3710915520, +4.6026119396, +4.8338766306, +5.0649369599,
 62 | 	+5.2958341090, +5.5266010659, +5.7572642218, +5.9878446670,
 63 | 	+6.2183592400, +6.4488213736, +6.6792417767, +6.9096289812,
 64 | 	+7.1399897815, +7.3703295860, +7.6006526984, +7.8309625420,
 65 | 	+8.0612618396, +8.2915527560, +8.5218370124, +8.7521159766,
 66 | 	+8.9823907360, +9.2126621546, +9.4429309191, +9.6731975749,
 67 | 	+9.9034625556
 68 | };
 69 | 
 70 | static inline real
 71 | log_K2it(real te)
 72 | {
 73 | 	const real h = LOG(te/(real)T_MIN) * (real)(T_GRID / LOG(T_MAX/T_MIN));
 74 | 	const int  i = h;
 75 | 	const real d = h - i;
 76 | 
 77 | 	return (1 - d) * log_K2it_tab[i] + d * log_K2it_tab[i+1];
 78 | } /* 7 FLOP */
 79 | 
 80 | static inline real
 81 | B_Planck(real nu, real te)
 82 | {
 83 | 	real f1 = 2 * CONST_h * CONST_c;          /* ~ 4e-16 */
 84 | 	real f2 = CONST_h / (CONST_me * CONST_c); /* ~ 2e-10 */
 85 | 
 86 | 	nu /= (real)CONST_c;             /* 1e-02 -- 1e+12 */
 87 | 	f1 *= nu * nu;                   /* 4e-20 -- 4e+08 */
 88 | 	f2 *= nu / (te + (real)EPSILON); /* 1e-12 -- 1e+02 */
 89 | 
 90 | 	return nu * (f2 > (real)1e-5 ?
 91 | 	             f1 / (EXP(f2) - 1) :
 92 | 	             (f1 / f2) / (1 + f2 / 2 + f2 * f2 / 6));
 93 | } /* 10+ FLOP */
 94 | 
 95 | static inline real
 96 | Gaunt(real x, real y)
 97 | {
 98 | 	const real sqrt_x = SQRT(x);
 99 | 	const real sqrt_y = SQRT(y);
100 | 
101 | 	if(x > 1)
102 | 		return y > 1 ?
103 | 			(real)SQRT(K(3.0) / M_PI) / sqrt_y :
104 | 			(real)(SQRT(K(3.0)) / M_PI) *
105 | 			((real)LOG(K(4.0) / K(1.78107241799)) - LOG(y + (real)EPSILON));
106 | 	else if(x * y > 1)
107 | 		return (real)SQRT(K(12.0)) / (sqrt_x * sqrt_y);
108 | 	else if(y > sqrt_x)
109 | 		return 1;
110 | 	else {
111 | 		/* The "small-angle classical region" formulae in
112 | 		   Rybicki & Lightman (1979) and Novikov & Thorne
113 | 		   (1973) are inconsistent; it seems that both
114 | 		   versions contain typos.  TODO: double-check the
115 | 		   following formula */
116 | 		const real g = (real)(SQRT(K(3.0)) / M_PI) *
117 | 			((real)LOG(K(4.0) / POW(K(1.78107241799), K(2.5))) + LOG(sqrt_x / (y + (real)EPSILON)));
118 | 		return g > (real)EPSILON ? g : (real)EPSILON;
119 | 	}
120 | } /* 3+ FLOP */
121 | 
122 | static inline real
123 | L_j_ff(real nu, real te, real ne)
124 | {
125 | 	/* "Standard" formula for thermal bremsstrahlung, Rybicki &
126 | 	   Lightman equation (5.14b) divided by 4 pi.
127 | 	   Because the physical length scale L has to be part of the
128 | 	   radiative transfer, we multiple it with the emissivity
129 | 	   j_ff. */
130 | 
131 | 	/* Assume Z == 1 and ni == ne */
132 | 
133 | 	real x = CONST_me * CONST_c * CONST_c / CONST_Ry;  /* ~ 4e4 */
134 | 	real y = CONST_h / (CONST_me * CONST_c * CONST_c); /* ~ 3e-21 */
135 | 	real f = SQRT(CONST_G * CONST_mSun / (CONST_c * CONST_c) * K(6.8e-38) /
136 | 	              (4 * M_PI * SQRT(CONST_me * CONST_c * CONST_c / CONST_kB)));
137 | 
138 | 	x *= te;      /* ~ 1e+04 */
139 | 	y *= nu / te; /* ~ 1e-10 */
140 | 	f *= ne;      /* ~ 1e-15 */
141 | 
142 | 	return (M_ADM * f * Gaunt(x, y)) * (f / (SQRT(te) * EXP(y) + (real)EPSILON));
143 | } /* 12 FLOP + FLOP(Gaunt) == 15+ FLOP */
144 | 
145 | static inline real
146 | L_j_syn(real nu, real te, real ne, real B,  real cos_theta)
147 | {
148 | 	/* An approximate expression for thermal magnetobremsstrahlung
149 | 	   emission, see Leung, Gammie, & Noble (2011) equation (72).
150 | 	   Because the physical length scale L has to be part of the
151 | 	   radiative transfer, we multiple it with the emissivity j_ff. */
152 | 
153 | 	if(te        <= (real)T_MIN ||
154 | 	   cos_theta <=          -1 ||
155 | 	   cos_theta >=           1) return 0;
156 | 
157 | 	const real nus = te * te * B * SQRT(1 - cos_theta * cos_theta) *
158 | 		         (real)(CONST_e / (9 * M_PI * CONST_me * CONST_c)); /* ~ 1e5 */
159 | 	const real x   = nu / (nus + (real)EPSILON); /* 1e6 -- 1e18 */
160 | 
161 | 	const real f      = (CONST_G * CONST_mSun / (CONST_c * CONST_c)) *
162 | 		            (M_SQRT2 * M_PI * CONST_e * CONST_e / (3 * CONST_c));
163 | 	const real cbrtx  = CBRT(x);                                     /* 1e2 -- 1e6 */
164 | 	const real xx     = SQRT(x) + (real)1.88774862536 * SQRT(cbrtx); /* 1e3 -- 1e9 */
165 | 	const real log_K2 = (te > (real)T_MAX) ?
166 | 		            LOG(2 * te * te - (real)0.5) :
167 | 		            log_K2it(te);
168 | 
169 | 	return (M_ADM * xx * EXP(-cbrtx)) * (xx * EXP(-log_K2)) * (f * ne * nus);
170 | } /* 25 FLOP + min(4 FLOP, FLOP(log_K2it)) == 29+ FLOP */
171 | 
172 | 
173 | 
174 | struct rt {
175 | 	real I  [n_freq];
176 | 	real tau[n_freq];
177 | };
178 | 
179 | struct rt
180 | rt_icond(void)
181 | {
182 | 	return (struct rt){{0}};
183 | }
184 | 
185 | struct rt
186 | rt_rhs(struct rt r, struct flow f)
187 | {
188 | 	for(whole i = 0; i < n_freq; ++i) {
189 | 		const real nu     = nus[i] * f.shift;
190 | 		const real B_nu   = B_Planck(nu, f.te);
191 | 		const real L_j_nu = L_j_syn(nu, f.te, f.ne, f.b, f.bkcos) + L_j_ff(nu, f.te, f.ne);
192 | 
193 | 		r.I  [i] = -L_j_nu * EXP(-r.tau[i]) / (f.shift * f.shift + (real)EPSILON);
194 | 		r.tau[i] = -L_j_nu * f.shift        / (B_nu              + (real)EPSILON);
195 | 	}
196 | 
197 | 	return r;
198 | }
199 | 


--------------------------------------------------------------------------------
/sim-org/preamble.cl:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2016 Chi-kwan Chan
  3 |  * Copyright (C) 2016 Steward Observatory
  4 |  *
  5 |  * This file is part of GRay2.
  6 |  *
  7 |  * GRay2 is free software: you can redistribute it and/or modify it
  8 |  * under the terms of the GNU General Public License as published by
  9 |  * the Free Software Foundation, either version 3 of the License, or
 10 |  * (at your option) any later version.
 11 |  *
 12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 15 |  * License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | /** \file
 22 |  ** Preamble: useful OpenCL macros and functions
 23 |  **
 24 |  ** GRay2 uses OpenCL's just-in-time compilation feature to implement
 25 |  ** run-time configurable algorithms.  In this preamble we provide
 26 |  ** OpenCL macros and functions that help implementing the other parts
 27 |  ** of GRay2.
 28 |  **/
 29 | 
 30 | #define EPSILON 1e-28
 31 | 
 32 | /** Helper macros to write equations for vector of length n_vars **/
 33 | #define EACH(s) for(whole _e_ = 0; _e_ < n_chunk; ++_e_) E(s)
 34 | #define E(s) ((realE *)&(s))[_e_]
 35 | 
 36 | /** Turn an expression into a local variable that can be passed to function **/
 37 | #define X(x) ({ struct state _; EACH(_) = (x); _; })
 38 | 
 39 | /** Spacetime arguments for functions **/
 40 | #define SPACETIME_PROTOTYPE_ARGS \
 41 | const    real8 bounding_box, /**< Max coordinates of the grid    */ \
 42 | const    int4 num_points, /**< Number of points on the grid    */ \
 43 | __read_only image3d_t Gamma_ttt_t1, \
 44 | __read_only image3d_t Gamma_ttx_t1, \
 45 | __read_only image3d_t Gamma_tty_t1, \
 46 | __read_only image3d_t Gamma_ttz_t1, \
 47 | __read_only image3d_t Gamma_txx_t1, \
 48 | __read_only image3d_t Gamma_txy_t1, \
 49 | __read_only image3d_t Gamma_txz_t1, \
 50 | __read_only image3d_t Gamma_tyy_t1, \
 51 | __read_only image3d_t Gamma_tyz_t1, \
 52 | __read_only image3d_t Gamma_tzz_t1, \
 53 | __read_only image3d_t Gamma_xtt_t1, \
 54 | __read_only image3d_t Gamma_xtx_t1, \
 55 | __read_only image3d_t Gamma_xty_t1, \
 56 | __read_only image3d_t Gamma_xtz_t1, \
 57 | __read_only image3d_t Gamma_xxx_t1, \
 58 | __read_only image3d_t Gamma_xxy_t1, \
 59 | __read_only image3d_t Gamma_xxz_t1, \
 60 | __read_only image3d_t Gamma_xyy_t1, \
 61 | __read_only image3d_t Gamma_xyz_t1, \
 62 | __read_only image3d_t Gamma_xzz_t1, \
 63 | __read_only image3d_t Gamma_ytt_t1, \
 64 | __read_only image3d_t Gamma_ytx_t1, \
 65 | __read_only image3d_t Gamma_yty_t1, \
 66 | __read_only image3d_t Gamma_ytz_t1, \
 67 | __read_only image3d_t Gamma_yxx_t1, \
 68 | __read_only image3d_t Gamma_yxy_t1, \
 69 | __read_only image3d_t Gamma_yxz_t1, \
 70 | __read_only image3d_t Gamma_yyy_t1, \
 71 | __read_only image3d_t Gamma_yyz_t1, \
 72 | __read_only image3d_t Gamma_yzz_t1, \
 73 | __read_only image3d_t Gamma_ztt_t1, \
 74 | __read_only image3d_t Gamma_ztx_t1, \
 75 | __read_only image3d_t Gamma_zty_t1, \
 76 | __read_only image3d_t Gamma_ztz_t1, \
 77 | __read_only image3d_t Gamma_zxx_t1, \
 78 | __read_only image3d_t Gamma_zxy_t1, \
 79 | __read_only image3d_t Gamma_zxz_t1, \
 80 | __read_only image3d_t Gamma_zyy_t1, \
 81 | __read_only image3d_t Gamma_zyz_t1, \
 82 | __read_only image3d_t Gamma_zzz_t1, \
 83 | __read_only image3d_t g_tt_t1, \
 84 | __read_only image3d_t g_tx_t1, \
 85 | __read_only image3d_t g_ty_t1, \
 86 | __read_only image3d_t g_tz_t1, \
 87 | __read_only image3d_t g_xx_t1, \
 88 | __read_only image3d_t g_xy_t1, \
 89 | __read_only image3d_t g_xz_t1, \
 90 | __read_only image3d_t g_yy_t1, \
 91 | __read_only image3d_t g_yz_t1, \
 92 | __read_only image3d_t g_zz_t1, \
 93 | __read_only image3d_t rho_t1, \
 94 | __read_only image3d_t Gamma_ttt_t2, \
 95 | __read_only image3d_t Gamma_ttx_t2, \
 96 | __read_only image3d_t Gamma_tty_t2, \
 97 | __read_only image3d_t Gamma_ttz_t2, \
 98 | __read_only image3d_t Gamma_txx_t2, \
 99 | __read_only image3d_t Gamma_txy_t2, \
100 | __read_only image3d_t Gamma_txz_t2, \
101 | __read_only image3d_t Gamma_tyy_t2, \
102 | __read_only image3d_t Gamma_tyz_t2, \
103 | __read_only image3d_t Gamma_tzz_t2, \
104 | __read_only image3d_t Gamma_xtt_t2, \
105 | __read_only image3d_t Gamma_xtx_t2, \
106 | __read_only image3d_t Gamma_xty_t2, \
107 | __read_only image3d_t Gamma_xtz_t2, \
108 | __read_only image3d_t Gamma_xxx_t2, \
109 | __read_only image3d_t Gamma_xxy_t2, \
110 | __read_only image3d_t Gamma_xxz_t2, \
111 | __read_only image3d_t Gamma_xyy_t2, \
112 | __read_only image3d_t Gamma_xyz_t2, \
113 | __read_only image3d_t Gamma_xzz_t2, \
114 | __read_only image3d_t Gamma_ytt_t2, \
115 | __read_only image3d_t Gamma_ytx_t2, \
116 | __read_only image3d_t Gamma_yty_t2, \
117 | __read_only image3d_t Gamma_ytz_t2, \
118 | __read_only image3d_t Gamma_yxx_t2, \
119 | __read_only image3d_t Gamma_yxy_t2, \
120 | __read_only image3d_t Gamma_yxz_t2, \
121 | __read_only image3d_t Gamma_yyy_t2, \
122 | __read_only image3d_t Gamma_yyz_t2, \
123 | __read_only image3d_t Gamma_yzz_t2, \
124 | __read_only image3d_t Gamma_ztt_t2, \
125 | __read_only image3d_t Gamma_ztx_t2, \
126 | __read_only image3d_t Gamma_zty_t2, \
127 | __read_only image3d_t Gamma_ztz_t2, \
128 | __read_only image3d_t Gamma_zxx_t2, \
129 | __read_only image3d_t Gamma_zxy_t2, \
130 | __read_only image3d_t Gamma_zxz_t2, \
131 | __read_only image3d_t Gamma_zyy_t2, \
132 | __read_only image3d_t Gamma_zyz_t2, \
133 | __read_only image3d_t Gamma_zzz_t2, \
134 | __read_only image3d_t g_tt_t2, \
135 | __read_only image3d_t g_tx_t2, \
136 | __read_only image3d_t g_ty_t2, \
137 | __read_only image3d_t g_tz_t2, \
138 | __read_only image3d_t g_xx_t2, \
139 | __read_only image3d_t g_xy_t2, \
140 | __read_only image3d_t g_xz_t2, \
141 | __read_only image3d_t g_yy_t2, \
142 | __read_only image3d_t g_yz_t2, \
143 | __read_only image3d_t g_zz_t2, \
144 | __read_only image3d_t rho_t2
145 | 
146 | 
147 | #define SPACETIME_ARGS \
148 | bounding_box, \
149 | num_points,   \
150 | Gamma_ttt_t1, \
151 | Gamma_ttx_t1, \
152 | Gamma_tty_t1, \
153 | Gamma_ttz_t1, \
154 | Gamma_txx_t1, \
155 | Gamma_txy_t1, \
156 | Gamma_txz_t1, \
157 | Gamma_tyy_t1, \
158 | Gamma_tyz_t1, \
159 | Gamma_tzz_t1, \
160 | Gamma_xtt_t1, \
161 | Gamma_xtx_t1, \
162 | Gamma_xty_t1, \
163 | Gamma_xtz_t1, \
164 | Gamma_xxx_t1, \
165 | Gamma_xxy_t1, \
166 | Gamma_xxz_t1, \
167 | Gamma_xyy_t1, \
168 | Gamma_xyz_t1, \
169 | Gamma_xzz_t1, \
170 | Gamma_ytt_t1, \
171 | Gamma_ytx_t1, \
172 | Gamma_yty_t1, \
173 | Gamma_ytz_t1, \
174 | Gamma_yxx_t1, \
175 | Gamma_yxy_t1, \
176 | Gamma_yxz_t1, \
177 | Gamma_yyy_t1, \
178 | Gamma_yyz_t1, \
179 | Gamma_yzz_t1, \
180 | Gamma_ztt_t1, \
181 | Gamma_ztx_t1, \
182 | Gamma_zty_t1, \
183 | Gamma_ztz_t1, \
184 | Gamma_zxx_t1, \
185 | Gamma_zxy_t1, \
186 | Gamma_zxz_t1, \
187 | Gamma_zyy_t1, \
188 | Gamma_zyz_t1, \
189 | Gamma_zzz_t1, \
190 | g_tt_t1, \
191 | g_tx_t1, \
192 | g_ty_t1, \
193 | g_tz_t1, \
194 | g_xx_t1, \
195 | g_xy_t1, \
196 | g_xz_t1, \
197 | g_yy_t1, \
198 | g_yz_t1, \
199 | g_zz_t1, \
200 | rho_t1, \
201 | Gamma_ttt_t2, \
202 | Gamma_ttx_t2, \
203 | Gamma_tty_t2, \
204 | Gamma_ttz_t2, \
205 | Gamma_txx_t2, \
206 | Gamma_txy_t2, \
207 | Gamma_txz_t2, \
208 | Gamma_tyy_t2, \
209 | Gamma_tyz_t2, \
210 | Gamma_tzz_t2, \
211 | Gamma_xtt_t2, \
212 | Gamma_xtx_t2, \
213 | Gamma_xty_t2, \
214 | Gamma_xtz_t2, \
215 | Gamma_xxx_t2, \
216 | Gamma_xxy_t2, \
217 | Gamma_xxz_t2, \
218 | Gamma_xyy_t2, \
219 | Gamma_xyz_t2, \
220 | Gamma_xzz_t2, \
221 | Gamma_ytt_t2, \
222 | Gamma_ytx_t2, \
223 | Gamma_yty_t2, \
224 | Gamma_ytz_t2, \
225 | Gamma_yxx_t2, \
226 | Gamma_yxy_t2, \
227 | Gamma_yxz_t2, \
228 | Gamma_yyy_t2, \
229 | Gamma_yyz_t2, \
230 | Gamma_yzz_t2, \
231 | Gamma_ztt_t2, \
232 | Gamma_ztx_t2, \
233 | Gamma_zty_t2, \
234 | Gamma_ztz_t2, \
235 | Gamma_zxx_t2, \
236 | Gamma_zxy_t2, \
237 | Gamma_zxz_t2, \
238 | Gamma_zyy_t2, \
239 | Gamma_zyz_t2, \
240 | Gamma_zzz_t2, \
241 | g_tt_t2, \
242 | g_tx_t2, \
243 | g_ty_t2, \
244 | g_tz_t2, \
245 | g_xx_t2, \
246 | g_xy_t2, \
247 | g_xz_t2, \
248 | g_yy_t2, \
249 | g_yz_t2, \
250 | g_zz_t2, \
251 | rho_t2
252 | 


--------------------------------------------------------------------------------
/sim-org/dyst.cl:
--------------------------------------------------------------------------------
  1 | /* Automatically generated, do not edit */
  2 | 
  3 | struct gr {
  4 |     real4 q;
  5 |     real4 u;
  6 | };
  7 | 
  8 | inline real GRAY_SQUARE (real x) { return x*x; };
  9 | inline real GRAY_CUBE (real x) { return x*x*x; };
 10 | inline real GRAY_FOUR (real x) { return x*x*x*x; };
 11 | inline real GRAY_SQRT (real x) { return sqrt(x); };
 12 | inline real GRAY_SQRT_CUBE (real x) { return sqrt(x*x*x); };
 13 | 
 14 | real16 matrix_product(real16 a, real16 b){
 15 | 
 16 |   real4 a_row0 = a.s0123;
 17 |   real4 a_row1 = a.s4567;
 18 |   real4 a_row2 = a.s89ab;
 19 |   real4 a_row3 = a.scdef;
 20 |   real4 b_col0 = b.s048c;
 21 |   real4 b_col1 = b.s159d;
 22 |   real4 b_col2 = b.s26ae;
 23 |   real4 b_col3 = b.s37bf;
 24 | 
 25 |   return (real16){dot(a_row0, b_col0), dot(a_row0, b_col1),
 26 |   dot(a_row0, b_col2), dot(a_row0, b_col3),
 27 |   dot(a_row1, b_col0), dot(a_row1, b_col1),
 28 |   dot(a_row1, b_col2), dot(a_row1, b_col3),
 29 |   dot(a_row2, b_col0), dot(a_row2, b_col1),
 30 |   dot(a_row2, b_col2), dot(a_row2, b_col3),
 31 |   dot(a_row3, b_col0), dot(a_row3, b_col1),
 32 |   dot(a_row3, b_col2), dot(a_row3, b_col3)};
 33 | };
 34 | 
 35 | real4 matrix_vector_product(real16 a, real4 b){
 36 | 
 37 |   return (real4){dot(a.s0123, b),
 38 |   dot(a.s4567, b),
 39 |   dot(a.s89ab, b),
 40 |   dot(a.scdef, b)};
 41 | };
 42 | 
 43 | real
 44 | getrr(real4 q)
 45 | {
 46 |   return 1; /* \todo define the black hole location and implement getrr() */
 47 | }
 48 | 
 49 | real
 50 | geteps(real4 q)
 51 | {
 52 |   return 1; /* \todo */
 53 | }
 54 | 
 55 | real4
 56 | down(real4 q, real4 u, SPACETIME_PROTOTYPE_ARGS)
 57 | {
 58 |   real16 g;
 59 | 
 60 |   g.s0 = interpolate(q, bounding_box, num_points, g_tt_t1, g_tt_t2);
 61 |   g.s1 = interpolate(q, bounding_box, num_points, g_tx_t1, g_tx_t2);
 62 |   g.s2 = interpolate(q, bounding_box, num_points, g_ty_t1, g_ty_t2);
 63 |   g.s3 = interpolate(q, bounding_box, num_points, g_tz_t1, g_tz_t2);
 64 | 
 65 |   g.s4 = g.s1;
 66 |   g.s5 = interpolate(q, bounding_box, num_points, g_xx_t1, g_xx_t2);
 67 |   g.s6 = interpolate(q, bounding_box, num_points, g_xy_t1, g_xy_t2);
 68 |   g.s7 = interpolate(q, bounding_box, num_points, g_xz_t1, g_xz_t2);
 69 | 
 70 |   g.s8 = g.s2;
 71 |   g.s9 = g.s6;
 72 |   g.sa = interpolate(q, bounding_box, num_points, g_yy_t1, g_yy_t2);
 73 |   g.sb = interpolate(q, bounding_box, num_points, g_yz_t1, g_yz_t2);
 74 | 
 75 |   g.sc = g.s3;
 76 |   g.sd = g.s7;
 77 |   g.se = g.sb;
 78 |   g.sf = interpolate(q, bounding_box, num_points, g_yz_t1, g_yz_t2);
 79 | 
 80 |   return matrix_vector_product(g, u);
 81 | }
 82 | 
 83 | real
 84 | getuu(struct gr s, SPACETIME_PROTOTYPE_ARGS)  /**< state of the ray */
 85 | {
 86 |   return dot(s.u, down(s.q, s.u, SPACETIME_ARGS));
 87 | }
 88 | 
 89 | struct gr
 90 | gr_icond(real r_obs, /**< Distance of the observer from the black hole */
 91 |          real i_obs, /**< Inclination angle of the observer in degrees */
 92 |          real j_obs, /**< Azimuthal   angle of the observer in degrees */
 93 |          real alpha, /**< One of the local Cartesian coordinates       */
 94 |          real beta)  /**< The other  local Cartesian coordinate        */
 95 | {
 96 | 
 97 |   real  deg2rad = K(3.14159265358979323846264338327950288) / K(180.0);
 98 |   real  ci, si  = sincos(deg2rad * i_obs, &ci);
 99 |   real  cj, sj  = sincos(deg2rad * j_obs, &cj);
100 | 
101 |   real  R0 = r_obs * si - beta  * ci;
102 |   real  z  = r_obs * ci + beta  * si;
103 |   real  y  = R0    * sj + alpha * cj;
104 |   real  x  = R0    * cj - alpha * sj;
105 | 
106 |   real4 q = (real4){K(0.0), x, y, z};
107 |   real4 u = (real4){K(1.0), si * cj, si * sj, ci};
108 | 
109 |   return (struct gr){q, u};
110 | }
111 | 
112 | struct gr
113 | gr_rhs(struct gr g, SPACETIME_PROTOTYPE_ARGS)
114 | {
115 |     real4 q = g.q;
116 |     real4 u = g.u;
117 | 
118 |   real16 GammaUPt, GammaUPx, GammaUPy, GammaUPz;
119 | 
120 |   /* We compute the commented ones in one shot */
121 |   GammaUPt.s0 = interpolate(q, bounding_box, num_points, Gamma_ttt_t1, Gamma_ttt_t2);
122 |   GammaUPt.s1 = interpolate(q, bounding_box, num_points, Gamma_ttx_t1, Gamma_ttx_t2);
123 |   GammaUPt.s2 = interpolate(q, bounding_box, num_points, Gamma_tty_t1, Gamma_tty_t2);
124 |   GammaUPt.s3 = interpolate(q, bounding_box, num_points, Gamma_ttz_t1, Gamma_ttz_t2);
125 |   /* GammaUPt.s4 = GammaUPt.s1; */
126 |   GammaUPt.s5 = interpolate(q, bounding_box, num_points, Gamma_txx_t1, Gamma_txx_t2);
127 |   GammaUPt.s6 = interpolate(q, bounding_box, num_points, Gamma_txy_t1, Gamma_txy_t2);
128 |   GammaUPt.s7 = interpolate(q, bounding_box, num_points, Gamma_txz_t1, Gamma_txz_t2);
129 |   /* GammaUPt.s8 = GammaUPt.s2; */
130 |   /* GammaUPt.s9 = GammaUPt.s6; */
131 |   GammaUPt.sa = interpolate(q, bounding_box, num_points, Gamma_tyy_t1, Gamma_tyy_t2);
132 |   GammaUPt.sb = interpolate(q, bounding_box, num_points, Gamma_tyz_t1, Gamma_tyz_t2);
133 |   /* GammaUPt.sc = GammaUPt.s3; */
134 |   /* GammaUPt.sd = GammaUPt.s7; */
135 |   /* GammaUPt.se = GammaUPt.sb; */
136 |   GammaUPt.sf = interpolate(q, bounding_box, num_points, Gamma_tzz_t1, Gamma_tzz_t2);
137 | 
138 |   GammaUPt.s489 = GammaUPt.s126;
139 |   GammaUPt.scde = GammaUPt.s37b;
140 | 
141 | 
142 |   GammaUPx.s0 = interpolate(q, bounding_box, num_points, Gamma_xtt_t1, Gamma_xtt_t2);
143 |   GammaUPx.s1 = interpolate(q, bounding_box, num_points, Gamma_xtx_t1, Gamma_xtx_t2);
144 |   GammaUPx.s2 = interpolate(q, bounding_box, num_points, Gamma_xty_t1, Gamma_xty_t2);
145 |   GammaUPx.s3 = interpolate(q, bounding_box, num_points, Gamma_xtz_t1, Gamma_xtz_t2);
146 |   GammaUPx.s5 = interpolate(q, bounding_box, num_points, Gamma_xxx_t1, Gamma_xxx_t2);
147 |   GammaUPx.s6 = interpolate(q, bounding_box, num_points, Gamma_xxy_t1, Gamma_xxy_t2);
148 |   GammaUPx.s7 = interpolate(q, bounding_box, num_points, Gamma_xxz_t1, Gamma_xxz_t2);
149 |   GammaUPx.sa = interpolate(q, bounding_box, num_points, Gamma_xyy_t1, Gamma_xyy_t2);
150 |   GammaUPx.sb = interpolate(q, bounding_box, num_points, Gamma_xyz_t1, Gamma_xyz_t2);
151 |   GammaUPx.sf = interpolate(q, bounding_box, num_points, Gamma_xzz_t1, Gamma_xzz_t2);
152 | 
153 |   GammaUPx.s489 = GammaUPx.s126;
154 |   GammaUPx.scde = GammaUPx.s37b;
155 | 
156 | 
157 |   GammaUPy.s0 = interpolate(q, bounding_box, num_points, Gamma_ytt_t1, Gamma_ytt_t2);
158 |   GammaUPy.s1 = interpolate(q, bounding_box, num_points, Gamma_ytx_t1, Gamma_ytx_t2);
159 |   GammaUPy.s2 = interpolate(q, bounding_box, num_points, Gamma_yty_t1, Gamma_yty_t2);
160 |   GammaUPy.s3 = interpolate(q, bounding_box, num_points, Gamma_ytz_t1, Gamma_ytz_t2);
161 |   GammaUPy.s5 = interpolate(q, bounding_box, num_points, Gamma_yxx_t1, Gamma_yxx_t2);
162 |   GammaUPy.s6 = interpolate(q, bounding_box, num_points, Gamma_yxy_t1, Gamma_yxy_t2);
163 |   GammaUPy.s7 = interpolate(q, bounding_box, num_points, Gamma_yxz_t1, Gamma_yxz_t2);
164 |   GammaUPy.sa = interpolate(q, bounding_box, num_points, Gamma_yyy_t1, Gamma_yyy_t2);
165 |   GammaUPy.sb = interpolate(q, bounding_box, num_points, Gamma_yyz_t1, Gamma_yyz_t2);
166 |   GammaUPy.sf = interpolate(q, bounding_box, num_points, Gamma_yzz_t1, Gamma_yzz_t2);
167 | 
168 |   GammaUPy.s489 = GammaUPy.s126;
169 |   GammaUPy.scde = GammaUPy.s37b;
170 | 
171 | 
172 |   GammaUPz.s0 = interpolate(q, bounding_box, num_points, Gamma_ztt_t1, Gamma_ztt_t2);
173 |   GammaUPz.s1 = interpolate(q, bounding_box, num_points, Gamma_ztx_t1, Gamma_ztx_t2);
174 |   GammaUPz.s2 = interpolate(q, bounding_box, num_points, Gamma_zty_t1, Gamma_zty_t2);
175 |   GammaUPz.s3 = interpolate(q, bounding_box, num_points, Gamma_ztz_t1, Gamma_ztz_t2);
176 |   GammaUPz.s5 = interpolate(q, bounding_box, num_points, Gamma_zxx_t1, Gamma_zxx_t2);
177 |   GammaUPz.s6 = interpolate(q, bounding_box, num_points, Gamma_zxy_t1, Gamma_zxy_t2);
178 |   GammaUPz.s7 = interpolate(q, bounding_box, num_points, Gamma_zxz_t1, Gamma_zxz_t2);
179 |   GammaUPz.sa = interpolate(q, bounding_box, num_points, Gamma_zyy_t1, Gamma_zyy_t2);
180 |   GammaUPz.sb = interpolate(q, bounding_box, num_points, Gamma_zyz_t1, Gamma_zyz_t2);
181 |   GammaUPz.sf = interpolate(q, bounding_box, num_points, Gamma_zzz_t1, Gamma_zzz_t2);
182 | 
183 |   GammaUPz.s489 = GammaUPz.s126;
184 |   GammaUPz.scde = GammaUPz.s37b;
185 | 
186 |   real GammaUU = dot(u, matrix_vector_product(GammaUPt, u));
187 | 
188 |   real4 rhs = {-dot(u, matrix_vector_product(GammaUPt, u)) + GammaUU * u.s0,
189 |                -dot(u, matrix_vector_product(GammaUPx, u)) + GammaUU * u.s1,
190 |                -dot(u, matrix_vector_product(GammaUPy, u)) + GammaUU * u.s2,
191 |                -dot(u, matrix_vector_product(GammaUPz, u)) + GammaUU * u.s3};
192 | 
193 |   return (struct gr){u, rhs};
194 | }
195 | 


--------------------------------------------------------------------------------
/sim-org/interp.cl:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2021 Gabriele Bozzola
  3 |  *
  4 |  * This file is part of GRay2.
  5 |  *
  6 |  * GRay2 is free software: you can redistribute it and/or modify it
  7 |  * under the terms of the GNU General Public License as published by
  8 |  * the Free Software Foundation, either version 3 of the License, or
  9 |  * (at your option) any later version.
 10 |  *
 11 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 12 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 13 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 14 |  * License for more details.
 15 |  *
 16 |  * You should have received a copy of the GNU General Public License
 17 |  * along with GRay2.  If not, see <https://www.gnu.org/licenses/>.
 18 |  */
 19 | 
 20 | /** \file
 21 |  ** Interpolate fisheye coordinates.
 22 |  **
 23 |  ** To study black holes in dynamical spacetimes, one needs enough resolution
 24 |  ** near the horizons.  This is typically reported as a fraction of the mass of
 25 |  ** the black hole, for example a value of M/80 is the resolution that roughly
 26 |  ** covers the radius of a non-spinning black hole in the puncture gauge with 80
 27 |  ** points.  It is impossible to cover the entire numerical grid with such
 28 |  ** resolution, and possible solutions are mesh-refinement of fisheye
 29 |  ** coordinates.  This latter solution consists in deforming the coordinate
 30 |  ** system in such a way that the points are more concentrated near the horizons
 31 |  ** and less outside.  This can be achieved sampling the relevant functions in a
 32 |  ** non uniform way.  The fisheye transformation that we use here is:
 33 |  ** \f[
 34 |  **  x = \sinh \left[ {\left( A \xi + B \right)}^{n} \right]\,,
 35 |  ** \f]
 36 |  ** with \f$x\f$ physical coordinates and \f$\xi\f$ logically-Cartesian
 37 |  ** coordinates.  The parameter \f$n\f$ determines how much concentrated are
 38 |  ** points near the center, and the other two parameters are fixed by fixing the
 39 |  ** extent that \f$x\f$ has to cover.
 40 |  **
 41 |  ** When fisheye coordinates are used, it is critical to correct how OpenCL
 42 |  ** performs the multilinear interpolation to account for the fact that points
 43 |  ** are distributed unevenly.  This module provides the infrastructure to do
 44 |  ** that.  Additionally, it also handles time interpolation (with a linear
 45 |  ** transformation).
 46 |  **
 47 |  **/
 48 | 
 49 | /* In this module we call xyz the physical coordinates and uvw the
 50 |  * unnormalized, OpenCL ones.  The values are always stored in the .s123 slots
 51 |  * of the 4-vectors.  The slot .s0 is not used. */
 52 | 
 53 | inline int4 address_mode(int4 x, int4 size){
 54 |   /* We implement CLK_ADDRESS_CLAMP_TO_EDGE */
 55 | 
 56 |   /* We need this function because we need to convert from physical variables to
 57 |    * unnormalized ones. */
 58 |   return clamp(x, (int4){0,0,0,0}, size - 1);
 59 | }
 60 | 
 61 | inline real4 address_mode_real(real4 x, real4 size){
 62 |   /* We implement CLK_ADDRESS_CLAMP_TO_EDGE */
 63 | 
 64 |   /* We need this function because we need to convert from physical variables to
 65 |    * unnormalized ones. */
 66 |   return clamp(x, (real4){K(0.0), K(0.0), K(0.0), K(0.0)}, size - 1);
 67 | }
 68 | 
 69 | int4 xyz_to_uvw(real4 xyz, real8 bounding_box, int4 num_points){
 70 |   /* Returns the unnormalized point uvw corresponding to xyz (rounding down) */
 71 | 
 72 |   /* FISHEYE IS HERE */
 73 |   /* In this function, we compute the coordinate transformation from physical
 74 |    * coordinates to unnormalized ones.  This does not take into account the
 75 |    * fact that coordinates are unevenly spaced, and it simply consists in the
 76 |    * application of the fisheye transformation. */
 77 | 
 78 |   /* Xmin = {0, xmin, ymin, zmin} */
 79 |   /* Xmax = {0, xmax, ymax, zmax} */
 80 |   real4 Xmin = {K(0.0), bounding_box.s1, bounding_box.s2, bounding_box.s3};
 81 |   real4 Xmax = {K(0.0), bounding_box.s5, bounding_box.s6, bounding_box.s7};
 82 | 
 83 |   /* num_points_real is defined like this because there is no easy way to cast a
 84 |    * vector to a real4, so we instead define a new variables where we cast the
 85 |    * individual variables. */
 86 |   real4 num_points_real = {num_points.s0, num_points.s1, num_points.s2,
 87 |                            num_points.s3};
 88 | 
 89 |   /* The fisheye transformation is hard-coded here */
 90 |   /* We work with n = 3 (n is the exponent in the sinh) */
 91 |   real4 B = cbrt(asinh(Xmin));
 92 |   real4 A = (cbrt(asinh(Xmax)) - B)/(num_points_real - 1);
 93 | 
 94 |   real4 xi = floor((cbrt(asinh(xyz)) - B)/A);
 95 | 
 96 |   return address_mode(convert_int4(xi), num_points);
 97 | }
 98 | 
 99 | real4 uvw_to_xyz(int4 uvw, real8 bounding_box, real4 num_points_real){
100 | 
101 |   /* Returns the physical point xyz corresponding to unnormalized uvw */
102 | 
103 |   /* FISHEYE IS HERE */
104 | 
105 |   /* Xmin = {0, xmin, ymin, zmin} */
106 |   /* Xmax = {0, xmax, ymax, zmax} */
107 |   real4 Xmin = {K(0.0), bounding_box.s1, bounding_box.s2, bounding_box.s3};
108 |   real4 Xmax = {K(0.0), bounding_box.s5, bounding_box.s6, bounding_box.s7};
109 | 
110 |   /* uvw_real is defined like this because there is no easy way to cast a
111 |    * vector to a real4, so we instead define a new variables where we cast the
112 |    * individual variables. */
113 |   real4 uvw_real = {uvw.s0, uvw.s1, uvw.s2, uvw.s3};
114 | 
115 |   /* Hardcode the coordinate transformation with n = 3 */
116 |   real4 B = cbrt(asinh(Xmin));
117 |   real4 A = (cbrt(asinh(Xmax)) - B)/(num_points_real - 1);
118 | 
119 |   real4 fact = (A * uvw_real + B);
120 |   return sinh(fact * fact * fact);
121 | }
122 | 
123 | real4 find_correct_uvw(real4 xyz,
124 |                        real8 bounding_box,
125 |                        int4 num_points){
126 | 
127 |   /* Return the OpenCL unnormalized coordinates uvw that, if plugged in the
128 |    * multilinear interpolation routines, would return the correct interpolated
129 |    * value for the physical coordinate xyz. */
130 | 
131 |   /* To do this, we first need to find the unnormalized coordinates that bound
132 |    * the given physical points xyz.  We call these uvw_ijk and uvw_ijkp1.  The
133 |    * "p1" means "plus_one" as we know that uvw_ijk will be the lower edge.
134 |    * Then, we compute the corresponding physical coordinates and we perform
135 |    * linear interpolation between the two. */
136 | 
137 |   /* num_points_real is defined like this because there is no easy way to cast a
138 |    * vector to a real4, so we instead define a new variables where we cast the
139 |    * individual variables. */
140 |   real4 num_points_real = {num_points.s0, num_points.s1, num_points.s2,
141 |                            num_points.s3};
142 | 
143 |   int4 uvw_ijk   = xyz_to_uvw(xyz, bounding_box, num_points);
144 |   int4 uvw_ijkp1 = uvw_ijk + 1;
145 | 
146 |   real4 xyz_ijk   = uvw_to_xyz(uvw_ijk,   bounding_box, num_points_real);
147 |   real4 xyz_ijkp1 = uvw_to_xyz(uvw_ijkp1, bounding_box, num_points_real);
148 | 
149 |   /* uvw_ijk_real is defined like this because there is no easy way to cast a
150 |    * vector to a real4, so we instead define a new variables where we cast the
151 |    * individual variables. */
152 |   real4 uvw_ijk_real = {uvw_ijk.s0, uvw_ijk.s1, uvw_ijk.s2, uvw_ijk.s3};
153 | 
154 |   /* Linear interpolation of coordinates*/
155 |   real4 uvw_interp = uvw_ijk_real + (xyz - xyz_ijk)/(xyz_ijkp1 - xyz_ijk);
156 | 
157 |   /* We clamp to edge, to make sure we are not producing values that are outside
158 |    * the range of definition of the data */
159 |   uvw_interp = address_mode_real(uvw_interp, num_points_real);
160 | 
161 |   /* Finally, we have to offset by 0.5.  This 0.5 is very important because OpenCL
162 |    * uses a pixel offset of 0.5 */
163 |   return uvw_interp + (real4){K(0.5), K(0.5), K(0.5), K(0.5)};
164 | }
165 | 
166 | real space_interpolate(real4 xyz,
167 |                        real8 bounding_box,
168 |                        int4 num_points,
169 |                        __read_only image3d_t var){
170 | 
171 |   /* Return var evaluated on xyz */
172 |   sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE \
173 |                       | CLK_FILTER_LINEAR;
174 | 
175 |   real4 coords = find_correct_uvw(xyz, bounding_box, num_points);
176 |   /* In read_imagef, coords have to be in the slots 0, 1, and 2. The slot 3 is
177 |    * ignored */
178 |   coords.s012 = coords.s123;
179 | 
180 |   return read_imagef(var, sampler, coords).x;
181 | }
182 | 
183 | real interpolate(real4 q,
184 |                  real8 bounding_box,
185 |                  int4 num_points,
186 |                  __read_only image3d_t var_t1,
187 |                  __read_only image3d_t var_t2){
188 | 
189 |   /* Return var interpolated on q.  If the bounding box is defined on a single
190 |    * time level, then use only var_t1, otherwise perform linear interpolation
191 |    * in time between var_t1 and var_t2. */
192 | 
193 |   real t1 = bounding_box.s0;
194 |   real t2 = bounding_box.s4;
195 | 
196 |   if (t1 == t2)
197 |     return space_interpolate(q, bounding_box, num_points, var_t1);
198 | 
199 |   /* y(t) = y_1 + (t - t_1) / (t2 - t1) * (y_2 - y_1) */
200 | 
201 |   real y1 = space_interpolate(q, bounding_box, num_points, var_t1);
202 |   real y2 = space_interpolate(q, bounding_box, num_points, var_t2);
203 | 
204 |   return y1 + (q.s0 - t1) / (t2 - t1) * (y2 - y1);
205 | }
206 | 


--------------------------------------------------------------------------------
/tools/boosted_ks.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Copyright (C) 2021 Pierre Christian, Gabriele Bozzola
  4 | # Copyright (C) 2020 Pierre Christian
  5 | #
  6 | # This program is free software: you can redistribute it and/or modify
  7 | # it under the terms of the GNU General Public License as published by
  8 | # the Free Software Foundation, either version 3 of the License, or
  9 | # (at your option) any later version.
 10 | #
 11 | # This program is distributed in the hope that it will be useful, but
 12 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14 | # General Public License for more details.
 15 | #
 16 | # You should have received a copy of the GNU General Public License
 17 | # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 18 | 
 19 | ############## How to use ##################################
 20 | """
 21 | Step 1) Check that Pierre wrote down metric/Christoffel symbols correctly
 22 | 
 23 | Step 2) Compute Christoffel symbol Gamma^i_kl using Christoffel(Param,Coord,i,k,l). Param=parameters of the metric; for the KS metric, Param=[Mass,spin]. Coord=coordinates; for the KS metric, Coord=[time,x,y,z]. i, k, and l are spacetime indices; they range from 0-3 (Yes, I use i for something that ranges from 0-3)
 24 | 
 25 | Other functions:
 26 | -) metric(Param,Coord,i,j,updown) gives the metric. The switch updown is either "up" for contravariant metric or "down" for covariant metric. Again, despite how they appear, i and j are spacetime indices (ranges from 0-3).
 27 | 
 28 | -) dm(Param,Coord,i,j,wrt,updown) gives the metric derivatives. The variable wrt specifies that the derivative is taken "with respect to" which coordinate. For example, dm(Param,Coord,i,j,0,updown) is the metric derivative w.r.t. time.
 29 | 
 30 | """
 31 | 
 32 | ############## import all sorts of things, most of them not used ##################################
 33 | 
 34 | import numpy as np
 35 | from functools import lru_cache
 36 | 
 37 | ############## Dual number stuff ###########################
 38 | 
 39 | 
 40 | class dual:
 41 |     def __init__(self, first, second):
 42 |         self.f = first
 43 |         self.s = second
 44 | 
 45 |     @lru_cache(1024)
 46 |     def __mul__(self, other):
 47 |         if isinstance(other, dual):
 48 |             return dual(self.f * other.f, self.s * other.f + self.f * other.s)
 49 |         else:
 50 |             return dual(self.f * other, self.s * other)
 51 | 
 52 |     @lru_cache(1024)
 53 |     def __rmul__(self, other):
 54 |         if isinstance(other, dual):
 55 |             return dual(self.f * other.f, self.s * other.f + self.f * other.s)
 56 |         else:
 57 |             return dual(self.f * other, self.s * other)
 58 | 
 59 |     @lru_cache(1024)
 60 |     def __add__(self, other):
 61 |         if isinstance(other, dual):
 62 |             return dual(self.f + other.f, self.s + other.s)
 63 |         else:
 64 |             return dual(self.f + other, self.s)
 65 | 
 66 |     @lru_cache(1024)
 67 |     def __radd__(self, other):
 68 |         if isinstance(other, dual):
 69 |             return dual(self.f + other.f, self.s + other.s)
 70 |         else:
 71 |             return dual(self.f + other, self.s)
 72 | 
 73 |     @lru_cache(1024)
 74 |     def __sub__(self, other):
 75 |         if isinstance(other, dual):
 76 |             return dual(self.f - other.f, self.s - other.s)
 77 |         else:
 78 |             return dual(self.f - other, self.s)
 79 | 
 80 |     @lru_cache(1024)
 81 |     def __rsub__(self, other):
 82 |         return dual(other, 0) - self
 83 | 
 84 |     @lru_cache(1024)
 85 |     def __truediv__(self, other):
 86 |         """ when the first component of the divisor is not 0 """
 87 |         if isinstance(other, dual):
 88 |             return dual(
 89 |                 self.f / other.f,
 90 |                 (self.s * other.f - self.f * other.s) / (other.f ** 2.0),
 91 |             )
 92 |         else:
 93 |             return dual(self.f / other, self.s / other)
 94 | 
 95 |     @lru_cache(1024)
 96 |     def __rtruediv__(self, other):
 97 |         return dual(other, 0).__truediv__(self)
 98 | 
 99 |     @lru_cache(1024)
100 |     def __neg__(self):
101 |         return dual(-self.f, -self.s)
102 | 
103 |     @lru_cache(1024)
104 |     def __pow__(self, power):
105 |         return dual(self.f ** power, self.s * power * self.f ** (power - 1))
106 | 
107 | @lru_cache(1024)
108 | def dif(func, x):
109 |     funcdual = func(dual(x, 1.0))
110 | 
111 |     if isinstance(funcdual, dual):
112 |         return func(dual(x, 1.0)).s
113 | 
114 |     # This is for when the function is a constant, e.g. gtt:=0
115 |     return 0
116 | 
117 | 
118 | ################### Metric #####################################
119 | 
120 | def metric(Param, Coord, i, j, up=False,
121 |            Kerr_KerrSchild__t=0, Kerr_KerrSchild__x=0,
122 |            Kerr_KerrSchild__y=0, Kerr_KerrSchild__z=0):
123 | 
124 |     m, a, boostv = Param
125 |     t, x, y, z = Coord
126 | 
127 |     gamma = 1 / (1 - boostv**2)**0.5
128 | 
129 |     t0 = gamma * ((t - Kerr_KerrSchild__t) - boostv * (z - Kerr_KerrSchild__z))
130 |     z0 = gamma * ((z - Kerr_KerrSchild__z) - boostv * (t - Kerr_KerrSchild__t))
131 |     x0 = x - Kerr_KerrSchild__x
132 |     y0 = y - Kerr_KerrSchild__y
133 | 
134 |     rho02 = x0**2 + y0**2 + z0**2
135 | 
136 |     r02 = 0.5 * (rho02 - a**2) + (0.25 * (rho02 - a**2)**2 + a**2 * z0**2)**0.5
137 |     r0 = r02**0.5
138 |     costheta0 = z0 / r0
139 | 
140 |     hh = m * r0 / (r0**2 + a**2 * costheta0**2)
141 | 
142 |     lt0 = 1
143 |     lx0 = (r0 * x0 + a * y0) / (r0**2 + a**2)
144 |     ly0 = (r0 * y0 - a * x0) / (r0**2 + a**2)
145 |     lz0 = z0 / r0
146 | 
147 |     lt = gamma * (lt0 - boostv * lz0)
148 |     lz = gamma * (lz0 - boostv * lt0)
149 |     lx = lx0
150 |     ly = ly0
151 | 
152 |     gdtt = - 1 + 2 * hh * lt * lt
153 |     gdtx = 2 * hh * lt * lx
154 |     gdty = 2 * hh * lt * ly
155 |     gdtz = 2 * hh * lt * lz
156 |     gdxx = 1 + 2 * hh * lx * lx
157 |     gdyy = 1 + 2 * hh * ly * ly
158 |     gdzz = 1 + 2 * hh * lz * lz
159 |     gdxy = 2 * hh * lx * ly
160 |     gdyz = 2 * hh * ly * lz
161 |     gdzx = 2 * hh * lz * lx
162 | 
163 |     gutt = - 1 - 2 * hh * lt * lt
164 |     gutx = 2 * hh * lt * lx
165 |     guty = 2 * hh * lt * ly
166 |     gutz = 2 * hh * lt * lz
167 |     guxx = 1 - 2 * hh * lx * lx
168 |     guyy = 1 - 2 * hh * ly * ly
169 |     guzz = 1 - 2 * hh * lz * lz
170 |     guxy = - 2 * hh * lx * ly
171 |     guyz = - 2 * hh * ly * lz
172 |     guzx = - 2 * hh * lz * lx
173 | 
174 |     g_down = [[gdtt, gdtx, gdty, gdtz],
175 |               [gdtx, gdxx, gdxy, gdzx],
176 |               [gdty, gdxy, gdyy, gdyz],
177 |               [gdtz, gdzx, gdyz, gdzz]]
178 | 
179 |     g_up = [[gutt, gutx, guty, gutz],
180 |             [gutx, guxx, guxy, guzx],
181 |             [guty, guxy, guyy, guyz],
182 |             [gutz, guzx, guyz, guzz]]
183 | 
184 |     if up:
185 |         return g_up[i][j]
186 |     return g_down[i][j]
187 | 
188 | 
189 | ##################### Metric derivatives #############################
190 | 
191 | @lru_cache(1024)
192 | def dm(Param, Coord, i, j, wrt, up=False):
193 |     """ This computes metric derivatives. wrt = 0,1,2,3 is derivative "with respect to" which coordinate; i,j are spacetime indices. (Yes, I use i and j for something that range from 0-3) """
194 |     point_d = Coord[wrt]
195 | 
196 |     point_0 = dual(Coord[0], 0)
197 |     point_1 = dual(Coord[1], 0)
198 |     point_2 = dual(Coord[2], 0)
199 |     point_3 = dual(Coord[3], 0)
200 | 
201 |     if wrt == 0:
202 |         return dif(
203 |             lambda p: metric(
204 |                 Param, (p, point_1, point_2, point_3), i, j, up
205 |             ),
206 |             point_d,
207 |         )
208 |     elif wrt == 1:
209 |         return dif(
210 |             lambda p: metric(
211 |                 Param, (point_0, p, point_2, point_3), i, j, up
212 |             ),
213 |             point_d,
214 |         )
215 |     elif wrt == 2:
216 |         return dif(
217 |             lambda p: metric(
218 |                 Param, (point_0, point_1, p, point_3), i, j, up
219 |             ),
220 |             point_d,
221 |         )
222 |     elif wrt == 3:
223 |         return dif(
224 |             lambda p: metric(
225 |                 Param, (point_0, point_1, point_2, p), i, j, up
226 |             ),
227 |             point_d,
228 |         )
229 | 
230 | 
231 | ##################### Christoffel Symbols #############################
232 | 
233 | @lru_cache(1024)
234 | def Chris_anc_A(Param, Coord, i, m, k, l):
235 |     return (
236 |         metric(Param, Coord, i, m, up=True)
237 |         * dm(Param, Coord, m, k, l)
238 |     )
239 | 
240 | @lru_cache(1024)
241 | def Chris_anc_B(Param, Coord, i, m, k, l):
242 |     return (
243 |         metric(Param, Coord, i, m, up=True)
244 |         * dm(Param, Coord, m, l, k)
245 |     )
246 | 
247 | @lru_cache(1024)
248 | def Chris_anc_C(Param, Coord, i, m, k, l):
249 |     return (
250 |         metric(Param, Coord, i, m, up=True)
251 |         * dm(Param, Coord, k, l, m)
252 |     )
253 | 
254 | def Christoffel(Param, Coord, i, k, l):
255 |     """ Gamma^i_kl """
256 |     Term1 = (
257 |         Chris_anc_A(Param, Coord, i, 0, k, l)
258 |         + Chris_anc_A(Param, Coord, i, 1, k, l)
259 |         + Chris_anc_A(Param, Coord, i, 2, k, l)
260 |         + Chris_anc_A(Param, Coord, i, 3, k, l)
261 |     )
262 |     Term2 = (
263 |         Chris_anc_B(Param, Coord, i, 0, k, l)
264 |         + Chris_anc_B(Param, Coord, i, 1, k, l)
265 |         + Chris_anc_B(Param, Coord, i, 2, k, l)
266 |         + Chris_anc_B(Param, Coord, i, 3, k, l)
267 |     )
268 |     Term3 = (
269 |         Chris_anc_C(Param, Coord, i, 0, k, l)
270 |         + Chris_anc_C(Param, Coord, i, 1, k, l)
271 |         + Chris_anc_C(Param, Coord, i, 2, k, l)
272 |         + Chris_anc_C(Param, Coord, i, 3, k, l)
273 |     )
274 | 
275 |     return 0.5 * (Term1 + Term2 - Term3)
276 | 


--------------------------------------------------------------------------------
/sim-org/gray.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2016 Chi-kwan Chan
  3 |  * Copyright (C) 2016 Steward Observatory
  4 |  *
  5 |  * This file is part of GRay2.
  6 |  *
  7 |  * GRay2 is free software: you can redistribute it and/or modify it
  8 |  * under the terms of the GNU General Public License as published by
  9 |  * the Free Software Foundation, either version 3 of the License, or
 10 |  * (at your option) any later version.
 11 |  *
 12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 15 |  * License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | #include "gray.h"
 21 | 
 22 | #include <lux/mangle.h>
 23 | #include <lux/zalloc.h>
 24 | 
 25 | #include <math.h>
 26 | #include <stdio.h>
 27 | #include <unistd.h>				/* For access and F_OK */
 28 | #include <hdf5.h>
 29 | 
 30 | inline static real time_at_snapshot(Lux_job *ego, int snapshot_number){
 31 | 	/* Return the time corresponding to the given snapshot */
 32 | 
 33 | 	/* Times are saved as chars, so we need to do operations with this
 34 | 	 * data type.  This is used to read the times in the HDF5 files. */
 35 | 	char *rem;
 36 | 
 37 | 	return strtod(EGO->available_times[snapshot_number], &rem);
 38 | }
 39 | 
 40 | 
 41 | static size_t find_snapshot(Lux_job *ego, real t){
 42 | 	/* Find snapshot number so that t1 <= t <= t2, where t1 is the time
 43 | 	 * corresponding to the snapshot number */
 44 | 
 45 | 	real t1, t2;
 46 | 
 47 | 	/* We assume that snapshots they are ordered from the min to the max. */
 48 | 	size_t snap = -1;
 49 | 	/* We have already performed all the necessary checks, so this loop should
 50 | 	 * be well defined. */
 51 | 	do{
 52 | 		snap++;
 53 | 		t1 = time_at_snapshot(ego, snap);
 54 | 		t2 = time_at_snapshot(ego, snap + 1);
 55 | 		/* It has to be that slow_light_t2 > slow_light_t1 */
 56 | 	}while(!(t >= t1 && t <= t2));
 57 | 
 58 | 	return snap;
 59 | }
 60 | 
 61 | 
 62 | static int
 63 | _conf(Lux_job *ego, const char *restrict arg)
 64 | {
 65 | 	/** \page newopts New Run-Time Options
 66 | 	 **
 67 | 	 ** Turn hard-wired constants into run-time options
 68 | 	 **
 69 | 	 ** GRay2 uses the lux framework and hence follows lux's
 70 | 	 ** approach to support many run-time options.  To turn
 71 | 	 ** hard-wired constants into run-time options, one needs to
 72 | 	 **
 73 | 	 ** -# Add an option table ".opts" file in the "sim/"
 74 | 	 **    directory.
 75 | 	 ** -# Embed the automatically generated structure to `struct
 76 | 	 **    gray` in "sim/gray.h"
 77 | 	 ** -# Logically `&&` the automatically generated configure
 78 | 	 **    function to the return values of `_conf()` in
 79 | 	 **    "sim/gray.c".
 80 | 	 **/
 81 | 	int invalid;
 82 | 	real *nu;
 83 | 
 84 | 	lux_debug("GRay2: configuring instance %p with \"%s\"\n", ego, arg);
 85 | 
 86 | 	nu = EGO->param.nu; /* save the previous nu */
 87 | 
 88 | 	invalid = (icond_config(&EGO->icond, arg) &&
 89 | 	           param_config(&EGO->param, arg) &&
 90 | 	           setup_config(&EGO->setup, arg));
 91 | 
 92 | 	if(EGO->param.nu != nu) { /* nu was configured */
 93 | 		if(nu)
 94 | 			free(nu); /* avoid memory leackage by freeing the old nu */
 95 | 
 96 | 		nu = EGO->param.nu;
 97 | 		if(isnan(nu[0])) {
 98 | 			lux_print("nu: []\n");
 99 | 			EGO->n_freq = 0;
100 | 		} else {
101 | 			size_t n;
102 | 			lux_print("nu: [%f", nu[0]);
103 | 			for(n = 1; !isnan(nu[n]); ++n)
104 | 				lux_print(", %f", nu[n]);
105 | 			lux_print("]\n");
106 | 			EGO->n_freq = n;
107 | 		}
108 | 	}
109 | 
110 | 	return invalid;
111 | }
112 | 
113 | static int
114 | _init(Lux_job *ego)
115 | {
116 | 	Lux_opencl *ocl; /* to be loaded */
117 | 
118 | 	struct param *p = &EGO->param;
119 | 	struct setup *s = &EGO->setup;
120 | 
121 | 	const size_t sz     = s->precision;
122 | 	const size_t n_rays = p->h_rays * p->w_rays;
123 | 	const size_t n_data = EGO->n_coor + EGO->n_freq * 2;
124 | 	const size_t n_info = EGO->n_info;
125 | 
126 | 	cl_mem_flags flags  = CL_MEM_READ_WRITE;
127 | 
128 | 	lux_debug("GRay2: initializing instance %p\n", ego);
129 | 
130 | 	CKR(EGO->ocl    = ocl    = build(ego),                       cleanup1);
131 | 	CKR(EGO->data   = ocl->mk(ocl, sz * n_rays * n_data, flags), cleanup2);
132 | 	CKR(EGO->info   = ocl->mk(ocl, sz * n_rays * n_info, flags), cleanup3);
133 | 	CKR(EGO->evolve = ocl->mkkern(ocl, "evolve_drv"),            cleanup4);
134 | 
135 | 	return EXIT_SUCCESS;
136 | 
137 |  cleanup4:
138 | 	ocl->rm(ocl, EGO->info);
139 |  cleanup3:
140 | 	ocl->rm(ocl, EGO->data);
141 |  cleanup2:
142 | 	lux_unload(EGO->ocl);
143 |  cleanup1:
144 | 	return EXIT_FAILURE;
145 | }
146 | 
147 | static int
148 | _exec(Lux_job *ego)
149 | {
150 | 	struct param *p = &EGO->param;
151 | 	struct setup *s = &EGO->setup;
152 | 
153 | 	const  size_t n_rays  = p->h_rays * p->w_rays;
154 | 
155 | 	       size_t i       = s->i_init;
156 | 	const  size_t n_sub   = s->n_sub;
157 | 	const  size_t n_dump  = s->n_dump;
158 | 
159 | 	const  real t_init  = s->t_init;
160 | 	const  real dt_dump = s->dt_dump;
161 | 	/* If we are working with slow light, these are the two extrema. */
162 | 	real slow_light_t1, slow_light_t2;
163 | 
164 | 	size_t frozen_spacetime = p->enable_fast_light;
165 | 	size_t only_one_snapshot = 0;
166 | 
167 | 	lux_debug("GRay2: executing instance %p\n", ego);
168 | 
169 | 	lux_print("GRay2: Reading spacetime from file %s\n", p->dyst_file);
170 | 
171 | 	/* We perform basic checks here */
172 | 	lux_check_failure_code(access(p->dyst_file, F_OK), cleanup1);
173 | 	hid_t file_id = H5Fopen(p->dyst_file, H5F_ACC_RDONLY, H5P_DEFAULT);
174 | 	if (file_id == -1) goto cleanup2;
175 | 
176 | 	/* We list all the available times in the file */
177 | 	lux_check_failure_code(populate_ego_available_times(ego), cleanup3);
178 | 
179 | 	/* We load the coordinates */
180 | 	lux_check_failure_code(load_coordinates(ego), cleanup3);
181 | 
182 | 	/* If max_available_time is equal to the first time available, it
183 | 	 * means that it is the only one. */
184 | 
185 | 	real min_available_time = time_at_snapshot(ego, 0);
186 | 	if (EGO->max_available_time == min_available_time){
187 | 		lux_print("Found only one time in data, freezing spacetime\n");
188 | 		only_one_snapshot = 1;
189 | 		frozen_spacetime = 1;
190 | 	}else{
191 | 		/* It does not make sense to perform the integration if we don't have
192 | 		 * the desired initial time and final in range, unless we only have one
193 | 		 * time snapshot. */
194 | 		if ((t_init < min_available_time || t_init > EGO->max_available_time)){
195 | 			lux_print("ERROR: t_init (%4.1f) is outside domain of the data (%5.1f, %5.1f)\n",
196 | 					  t_init, min_available_time, EGO->max_available_time);
197 | 			return EXIT_FAILURE;
198 | 		}
199 | 		real t_final = t_init + (i+1) * dt_dump * n_dump;
200 | 		if ((t_final < min_available_time || t_final > EGO->max_available_time)){
201 | 			lux_print("ERROR: t_final (%4.1f) is outside domain of the data (%5.1f, %5.1f)\n",
202 | 					  t_final, min_available_time, EGO->max_available_time);
203 | 			return EXIT_FAILURE;
204 | 		}
205 | 	}
206 | 
207 | 	/* Snapshot of interest */
208 | 	size_t snap_number;
209 | 
210 | 	if (frozen_spacetime){
211 | 		lux_print("Assuming fast light\n");
212 | 
213 | 		/* If we have only one snapshot, then we must read it (it has index 0). */
214 | 		snap_number = 0 ? only_one_snapshot: find_snapshot(ego, t_init);
215 | 
216 | 		/* 1 here means "load in t1" */
217 | 		lux_check_failure_code(load_snapshot(ego, snap_number, 1), cleanup3);
218 | 
219 | 		/* We have to fill t2 with something, otherwise it will produce errors.
220 | 		 * We fill with the same data as t1.  We do not use copy_snapshot, because
221 | 		 * the function assumes that the EGO already contains valid images.
222 | 		 * Here 0 means "to_t2" */
223 | 		lux_check_failure_code(load_snapshot(ego, snap_number, 0), cleanup3);
224 | 
225 | 		/* Next, we disable time interpolation by setting the two time extrema
226 | 		 * of the bounding box to be the same */
227 | 		EGO->bounding_box.s0 = 0;
228 | 		EGO->bounding_box.s4 = 0;
229 | 	}else{
230 | 		lux_print("Working with slow light\n");
231 | 		/* Here we read the snapshot at t1 and t2 so that they contain t_init. */
232 | 		snap_number = find_snapshot(ego, t_init);
233 | 		slow_light_t1 = time_at_snapshot(ego, snap_number);
234 | 		slow_light_t2 = time_at_snapshot(ego, snap_number + 1);
235 | 		/* 1 here means "load in t1" */
236 | 		lux_check_failure_code(load_snapshot(ego, snap_number, 1), cleanup3);
237 | 		/* 0 here means "load in t2" */
238 | 		lux_check_failure_code(load_snapshot(ego, snap_number + 1, 0), cleanup3);
239 | 		EGO->bounding_box.s0 = slow_light_t1;
240 | 		EGO->bounding_box.s4 = slow_light_t2;
241 | 	}
242 | 
243 | 	lux_print("%zu:  initialize at %4.1f", i, t_init);
244 | 	icond(ego, t_init);
245 | 	dump (ego, i);
246 | 	lux_print(": DONE\n");
247 | 
248 | 	while(i < n_dump) {
249 | 		real ns, t, target;
250 | 
251 | 		t      = t_init +    i  * dt_dump;
252 | 		target = t_init + (++i) * dt_dump;
253 | 
254 | 		lux_print("%zu: %4.1f -> %4.1f", i, t, target);
255 | 		ns = evolve(ego, t, target, n_sub);
256 | 		dump(ego, i);
257 | 		lux_print(": DONE (%.3gns/step/ray)\n", ns/n_sub/n_rays);
258 | 
259 | 		/* If we are not freezing the spacetime, we need to change the snapshots */
260 | 		if (!frozen_spacetime && (target < slow_light_t1 || target > slow_light_t2)){
261 | 
262 | 			/* If snap_number is off by 1 compared to old_snap_number, this
263 | 			 * means that we can read only one of the two snapshots and copy
264 | 			 * over the other one.  If it is off by more than 1, then we have to
265 | 			 * read them both. */
266 | 			size_t old_snap_number = snap_number;
267 | 			snap_number = find_snapshot(ego, target);
268 | 			slow_light_t1 = time_at_snapshot(ego, snap_number);
269 | 			slow_light_t2 = time_at_snapshot(ego, snap_number + 1);
270 | 
271 | 			if (snap_number == old_snap_number + 1){
272 | 				/* In this case, the old t2 has to become the new t1.  Here 1
273 | 				 * means "copy to t1" */
274 | 				copy_snapshot(ego, 1);
275 | 				/* 0 here means "load in t2" */
276 | 				lux_check_failure_code(load_snapshot(ego, snap_number + 1, 0), cleanup3);
277 | 			}else if (snap_number == old_snap_number - 1){
278 | 				/* In this case, the old t1 has to become the new t2.  Here 0
279 | 				 * means "copy to t2" */
280 | 				copy_snapshot(ego, 0);
281 | 				/* 1 here means "load in t1" */
282 | 				lux_check_failure_code(load_snapshot(ego, snap_number, 1), cleanup3);
283 | 			}else{
284 | 				/* We have to read them both */
285 | 				/* 1 here means "load in t1" */
286 | 				lux_check_failure_code(load_snapshot(ego, snap_number, 1), cleanup3);
287 | 				/* 0 here means "load in t2" */
288 | 				lux_check_failure_code(load_snapshot(ego, snap_number + 1, 0), cleanup3);
289 | 			}
290 | 			/* Update bounding box */
291 | 			EGO->bounding_box.s0 = slow_light_t1;
292 | 			EGO->bounding_box.s4 = slow_light_t2;
293 | 		}
294 | 	}
295 | 
296 | 	return EXIT_SUCCESS;
297 | 
298 | cleanup1:
299 | 	lux_print("ERROR: File %s could not be read\n", p->dyst_file);
300 | 	return EXIT_FAILURE;
301 | cleanup2:
302 | 	lux_print("ERROR: File %s is not a valid HDF5 file\n", p->dyst_file);
303 | 	return EXIT_FAILURE;
304 | cleanup3:
305 | 	return EXIT_FAILURE;
306 | }
307 | 
308 | void *
309 | LUX_MKMOD(const void *opts)
310 | {
311 | 	void *ego;
312 | 
313 | 	lux_debug("GRay2: constructing with options %p\n", opts);
314 | 
315 | 	ego = zalloc(sizeof(struct gray));
316 | 	if(ego) {
317 | 		EGO->super.conf = _conf;
318 | 		EGO->super.init = _init;
319 | 		EGO->super.exec = _exec;
320 | 		icond_init(&EGO->icond);
321 | 		param_init(&EGO->param);
322 | 		setup_init(&EGO->setup);
323 | 		EGO->n_coor = 8; /** \todo Adjust n_coor using setup.coordinates. */
324 | 		EGO->n_freq = 0;
325 | 		EGO->n_info = 1; /** \todo Adjust n_info using setup.coordinates. */
326 | 	}
327 | 	return ego;
328 | }
329 | 
330 | void
331 | LUX_RMMOD(void *ego)
332 | {
333 | 	Lux_opencl *ocl = EGO->ocl;
334 | 
335 | 	lux_debug("GRay2: destructing instance %p\n", ego);
336 | 
337 | 	if(EGO->evolve)
338 | 		ocl->rmkern(ocl, EGO->evolve);
339 | 	if(EGO->info)
340 | 		ocl->rm(ocl, EGO->info);
341 | 	if(EGO->data)
342 | 		ocl->rm(ocl, EGO->data);
343 | 	if(EGO->ocl)
344 | 		lux_unload(EGO->ocl);
345 | 	free(ego);
346 | }
347 | 


--------------------------------------------------------------------------------
/doc/Doxyfile:
--------------------------------------------------------------------------------
  1 | # Doxyfile 1.8.11
  2 | 
  3 | #---------------------------------------------------------------------------
  4 | # Project related configuration options
  5 | #---------------------------------------------------------------------------
  6 | DOXYFILE_ENCODING      = UTF-8
  7 | PROJECT_NAME           = GRay
  8 | PROJECT_NUMBER         = 2.0-alpha
  9 | PROJECT_BRIEF          =
 10 | PROJECT_LOGO           =
 11 | OUTPUT_DIRECTORY       =
 12 | CREATE_SUBDIRS         = NO
 13 | ALLOW_UNICODE_NAMES    = NO
 14 | OUTPUT_LANGUAGE        = English
 15 | BRIEF_MEMBER_DESC      = YES
 16 | REPEAT_BRIEF           = YES
 17 | ABBREVIATE_BRIEF       =
 18 | ALWAYS_DETAILED_SEC    = NO
 19 | INLINE_INHERITED_MEMB  = NO
 20 | FULL_PATH_NAMES        = NO
 21 | STRIP_FROM_PATH        =
 22 | STRIP_FROM_INC_PATH    =
 23 | SHORT_NAMES            = NO
 24 | JAVADOC_AUTOBRIEF      = YES
 25 | QT_AUTOBRIEF           = NO
 26 | MULTILINE_CPP_IS_BRIEF = NO
 27 | INHERIT_DOCS           = YES
 28 | SEPARATE_MEMBER_PAGES  = NO
 29 | TAB_SIZE               = 8
 30 | ALIASES                =
 31 | TCL_SUBST              =
 32 | OPTIMIZE_OUTPUT_FOR_C  = YES
 33 | OPTIMIZE_OUTPUT_JAVA   = NO
 34 | OPTIMIZE_FOR_FORTRAN   = NO
 35 | OPTIMIZE_OUTPUT_VHDL   = NO
 36 | EXTENSION_MAPPING      = cl=C++
 37 | MARKDOWN_SUPPORT       = YES
 38 | AUTOLINK_SUPPORT       = YES
 39 | BUILTIN_STL_SUPPORT    = NO
 40 | CPP_CLI_SUPPORT        = NO
 41 | SIP_SUPPORT            = NO
 42 | IDL_PROPERTY_SUPPORT   = YES
 43 | DISTRIBUTE_GROUP_DOC   = NO
 44 | GROUP_NESTED_COMPOUNDS = NO
 45 | SUBGROUPING            = YES
 46 | INLINE_GROUPED_CLASSES = NO
 47 | INLINE_SIMPLE_STRUCTS  = NO
 48 | TYPEDEF_HIDES_STRUCT   = NO
 49 | LOOKUP_CACHE_SIZE      = 0
 50 | #---------------------------------------------------------------------------
 51 | # Build related configuration options
 52 | #---------------------------------------------------------------------------
 53 | EXTRACT_ALL            = YES
 54 | EXTRACT_PRIVATE        = NO
 55 | EXTRACT_PACKAGE        = NO
 56 | EXTRACT_STATIC         = YES
 57 | EXTRACT_LOCAL_CLASSES  = YES
 58 | EXTRACT_LOCAL_METHODS  = NO
 59 | EXTRACT_ANON_NSPACES   = NO
 60 | HIDE_UNDOC_MEMBERS     = NO
 61 | HIDE_UNDOC_CLASSES     = NO
 62 | HIDE_FRIEND_COMPOUNDS  = NO
 63 | HIDE_IN_BODY_DOCS      = NO
 64 | INTERNAL_DOCS          = NO
 65 | CASE_SENSE_NAMES       = YES
 66 | HIDE_SCOPE_NAMES       = NO
 67 | HIDE_COMPOUND_REFERENCE= NO
 68 | SHOW_INCLUDE_FILES     = YES
 69 | SHOW_GROUPED_MEMB_INC  = NO
 70 | FORCE_LOCAL_INCLUDES   = NO
 71 | INLINE_INFO            = YES
 72 | SORT_MEMBER_DOCS       = YES
 73 | SORT_BRIEF_DOCS        = NO
 74 | SORT_MEMBERS_CTORS_1ST = NO
 75 | SORT_GROUP_NAMES       = NO
 76 | SORT_BY_SCOPE_NAME     = NO
 77 | STRICT_PROTO_MATCHING  = NO
 78 | GENERATE_TODOLIST      = YES
 79 | GENERATE_TESTLIST      = YES
 80 | GENERATE_BUGLIST       = YES
 81 | GENERATE_DEPRECATEDLIST= YES
 82 | ENABLED_SECTIONS       =
 83 | MAX_INITIALIZER_LINES  = 30
 84 | SHOW_USED_FILES        = YES
 85 | SHOW_FILES             = YES
 86 | SHOW_NAMESPACES        = YES
 87 | FILE_VERSION_FILTER    =
 88 | LAYOUT_FILE            =
 89 | CITE_BIB_FILES         =
 90 | #---------------------------------------------------------------------------
 91 | # Configuration options related to warning and progress messages
 92 | #---------------------------------------------------------------------------
 93 | QUIET                  = NO
 94 | WARNINGS               = YES
 95 | WARN_IF_UNDOCUMENTED   = YES
 96 | WARN_IF_DOC_ERROR      = YES
 97 | WARN_NO_PARAMDOC       = NO
 98 | WARN_AS_ERROR          = NO
 99 | WARN_FORMAT            = "$file:$line: $text"
100 | WARN_LOGFILE           =
101 | #---------------------------------------------------------------------------
102 | # Configuration options related to the input files
103 | #---------------------------------------------------------------------------
104 | INPUT                  = ../README.md ../sim
105 | INPUT_ENCODING         = UTF-8
106 | FILE_PATTERNS          = *.h *.c *.cl
107 | RECURSIVE              = YES
108 | EXCLUDE                =
109 | EXCLUDE_SYMLINKS       = NO
110 | EXCLUDE_PATTERNS       =
111 | EXCLUDE_SYMBOLS        =
112 | EXAMPLE_PATH           =
113 | EXAMPLE_PATTERNS       =
114 | EXAMPLE_RECURSIVE      = NO
115 | IMAGE_PATH             =
116 | INPUT_FILTER           =
117 | FILTER_PATTERNS        =
118 | FILTER_SOURCE_FILES    = NO
119 | FILTER_SOURCE_PATTERNS =
120 | USE_MDFILE_AS_MAINPAGE =
121 | #---------------------------------------------------------------------------
122 | # Configuration options related to source browsing
123 | #---------------------------------------------------------------------------
124 | SOURCE_BROWSER         = YES
125 | INLINE_SOURCES         = NO
126 | STRIP_CODE_COMMENTS    = NO
127 | REFERENCED_BY_RELATION = NO
128 | REFERENCES_RELATION    = NO
129 | REFERENCES_LINK_SOURCE = YES
130 | SOURCE_TOOLTIPS        = YES
131 | USE_HTAGS              = NO
132 | VERBATIM_HEADERS       = YES
133 | #---------------------------------------------------------------------------
134 | # Configuration options related to the alphabetical class index
135 | #---------------------------------------------------------------------------
136 | ALPHABETICAL_INDEX     = YES
137 | COLS_IN_ALPHA_INDEX    = 5
138 | IGNORE_PREFIX          =
139 | #---------------------------------------------------------------------------
140 | # Configuration options related to the HTML output
141 | #---------------------------------------------------------------------------
142 | GENERATE_HTML          = YES
143 | HTML_OUTPUT            = html
144 | HTML_FILE_EXTENSION    = .html
145 | HTML_HEADER            =
146 | HTML_FOOTER            =
147 | HTML_STYLESHEET        =
148 | HTML_EXTRA_STYLESHEET  =
149 | HTML_EXTRA_FILES       =
150 | HTML_COLORSTYLE_HUE    = 220
151 | HTML_COLORSTYLE_SAT    = 100
152 | HTML_COLORSTYLE_GAMMA  = 80
153 | HTML_TIMESTAMP         = NO
154 | HTML_DYNAMIC_SECTIONS  = NO
155 | HTML_INDEX_NUM_ENTRIES = 100
156 | GENERATE_DOCSET        = NO
157 | DOCSET_FEEDNAME        = "Doxygen generated docs"
158 | DOCSET_BUNDLE_ID       = org.doxygen.Project
159 | DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
160 | DOCSET_PUBLISHER_NAME  = Publisher
161 | GENERATE_HTMLHELP      = NO
162 | CHM_FILE               =
163 | HHC_LOCATION           =
164 | GENERATE_CHI           = NO
165 | CHM_INDEX_ENCODING     =
166 | BINARY_TOC             = NO
167 | TOC_EXPAND             = NO
168 | GENERATE_QHP           = NO
169 | QCH_FILE               =
170 | QHP_NAMESPACE          = org.doxygen.Project
171 | QHP_VIRTUAL_FOLDER     = doc
172 | QHP_CUST_FILTER_NAME   =
173 | QHP_CUST_FILTER_ATTRS  =
174 | QHP_SECT_FILTER_ATTRS  =
175 | QHG_LOCATION           =
176 | GENERATE_ECLIPSEHELP   = NO
177 | ECLIPSE_DOC_ID         = org.doxygen.Project
178 | DISABLE_INDEX          = NO
179 | GENERATE_TREEVIEW      = NO
180 | ENUM_VALUES_PER_LINE   = 4
181 | TREEVIEW_WIDTH         = 250
182 | EXT_LINKS_IN_WINDOW    = NO
183 | FORMULA_FONTSIZE       = 10
184 | FORMULA_TRANSPARENT    = YES
185 | USE_MATHJAX            = YES
186 | MATHJAX_FORMAT         = HTML-CSS
187 | MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
188 | MATHJAX_EXTENSIONS     =
189 | MATHJAX_CODEFILE       =
190 | SEARCHENGINE           = YES
191 | SERVER_BASED_SEARCH    = NO
192 | EXTERNAL_SEARCH        = NO
193 | SEARCHENGINE_URL       =
194 | SEARCHDATA_FILE        = searchdata.xml
195 | EXTERNAL_SEARCH_ID     =
196 | EXTRA_SEARCH_MAPPINGS  =
197 | #---------------------------------------------------------------------------
198 | # Configuration options related to the LaTeX output
199 | #---------------------------------------------------------------------------
200 | GENERATE_LATEX         = YES
201 | LATEX_OUTPUT           = latex
202 | LATEX_CMD_NAME         = latex
203 | MAKEINDEX_CMD_NAME     = makeindex
204 | COMPACT_LATEX          = NO
205 | PAPER_TYPE             = a4
206 | EXTRA_PACKAGES         =
207 | LATEX_HEADER           =
208 | LATEX_FOOTER           =
209 | LATEX_EXTRA_STYLESHEET =
210 | LATEX_EXTRA_FILES      =
211 | PDF_HYPERLINKS         = YES
212 | USE_PDFLATEX           = YES
213 | LATEX_BATCHMODE        = NO
214 | LATEX_HIDE_INDICES     = NO
215 | LATEX_SOURCE_CODE      = NO
216 | LATEX_BIB_STYLE        = plain
217 | LATEX_TIMESTAMP        = NO
218 | #---------------------------------------------------------------------------
219 | # Configuration options related to the RTF output
220 | #---------------------------------------------------------------------------
221 | GENERATE_RTF           = NO
222 | RTF_OUTPUT             = rtf
223 | COMPACT_RTF            = NO
224 | RTF_HYPERLINKS         = NO
225 | RTF_STYLESHEET_FILE    =
226 | RTF_EXTENSIONS_FILE    =
227 | RTF_SOURCE_CODE        = NO
228 | #---------------------------------------------------------------------------
229 | # Configuration options related to the man page output
230 | #---------------------------------------------------------------------------
231 | GENERATE_MAN           = NO
232 | MAN_OUTPUT             = man
233 | MAN_EXTENSION          = .3
234 | MAN_SUBDIR             =
235 | MAN_LINKS              = NO
236 | #---------------------------------------------------------------------------
237 | # Configuration options related to the XML output
238 | #---------------------------------------------------------------------------
239 | GENERATE_XML           = NO
240 | XML_OUTPUT             = xml
241 | XML_PROGRAMLISTING     = YES
242 | #---------------------------------------------------------------------------
243 | # Configuration options related to the DOCBOOK output
244 | #---------------------------------------------------------------------------
245 | GENERATE_DOCBOOK       = NO
246 | DOCBOOK_OUTPUT         = docbook
247 | DOCBOOK_PROGRAMLISTING = NO
248 | #---------------------------------------------------------------------------
249 | # Configuration options for the AutoGen Definitions output
250 | #---------------------------------------------------------------------------
251 | GENERATE_AUTOGEN_DEF   = NO
252 | #---------------------------------------------------------------------------
253 | # Configuration options related to the Perl module output
254 | #---------------------------------------------------------------------------
255 | GENERATE_PERLMOD       = NO
256 | PERLMOD_LATEX          = NO
257 | PERLMOD_PRETTY         = YES
258 | PERLMOD_MAKEVAR_PREFIX =
259 | #---------------------------------------------------------------------------
260 | # Configuration options related to the preprocessor
261 | #---------------------------------------------------------------------------
262 | ENABLE_PREPROCESSING   = YES
263 | MACRO_EXPANSION        = NO
264 | EXPAND_ONLY_PREDEF     = NO
265 | SEARCH_INCLUDES        = YES
266 | INCLUDE_PATH           =
267 | INCLUDE_FILE_PATTERNS  =
268 | PREDEFINED             =
269 | EXPAND_AS_DEFINED      =
270 | SKIP_FUNCTION_MACROS   = YES
271 | #---------------------------------------------------------------------------
272 | # Configuration options related to external references
273 | #---------------------------------------------------------------------------
274 | TAGFILES               =
275 | GENERATE_TAGFILE       =
276 | ALLEXTERNALS           = NO
277 | EXTERNAL_GROUPS        = YES
278 | EXTERNAL_PAGES         = YES
279 | PERL_PATH              = /usr/bin/perl
280 | #---------------------------------------------------------------------------
281 | # Configuration options related to the dot tool
282 | #---------------------------------------------------------------------------
283 | CLASS_DIAGRAMS         = YES
284 | MSCGEN_PATH            =
285 | DIA_PATH               =
286 | HIDE_UNDOC_RELATIONS   = YES
287 | HAVE_DOT               = YES
288 | DOT_NUM_THREADS        = 0
289 | DOT_FONTNAME           = Helvetica
290 | DOT_FONTSIZE           = 10
291 | DOT_FONTPATH           =
292 | CLASS_GRAPH            = YES
293 | COLLABORATION_GRAPH    = YES
294 | GROUP_GRAPHS           = YES
295 | UML_LOOK               = NO
296 | UML_LIMIT_NUM_FIELDS   = 10
297 | TEMPLATE_RELATIONS     = NO
298 | INCLUDE_GRAPH          = YES
299 | INCLUDED_BY_GRAPH      = YES
300 | CALL_GRAPH             = YES
301 | CALLER_GRAPH           = YES
302 | GRAPHICAL_HIERARCHY    = YES
303 | DIRECTORY_GRAPH        = YES
304 | DOT_IMAGE_FORMAT       = png
305 | INTERACTIVE_SVG        = NO
306 | DOT_PATH               =
307 | DOTFILE_DIRS           =
308 | MSCFILE_DIRS           =
309 | DIAFILE_DIRS           =
310 | PLANTUML_JAR_PATH      =
311 | PLANTUML_INCLUDE_PATH  =
312 | DOT_GRAPH_MAX_NODES    = 50
313 | MAX_DOT_GRAPH_DEPTH    = 0
314 | DOT_TRANSPARENT        = NO
315 | DOT_MULTI_TARGETS      = NO
316 | GENERATE_LEGEND        = YES
317 | DOT_CLEANUP            = YES
318 | 


--------------------------------------------------------------------------------
/sim-org/KS.cl:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2016 Chi-kwan Chan
  3 |  * Copyright (C) 2016 Steward Observatory
  4 |  *
  5 |  * This file is part of GRay2.
  6 |  *
  7 |  * GRay2 is free software: you can redistribute it and/or modify it
  8 |  * under the terms of the GNU General Public License as published by
  9 |  * the Free Software Foundation, either version 3 of the License, or
 10 |  * (at your option) any later version.
 11 |  *
 12 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 14 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 15 |  * License for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU General Public License
 18 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | /** \file
 22 |  ** Cartesian Kerr-Schild coordinate specific schemes
 23 |  **
 24 |  ** Implement the coordinate specific functions getuu(), icond(), and
 25 |  ** rhs() in the Cartesian form of the Kerr-Schild coordiantes.  Let
 26 |  ** \f$t\f$, \f$x\f$, \f$y\f$, \f$z\f$ be the coordinates, the
 27 |  ** Cartesian Kerr-Schild metric is given by
 28 |  ** \f[
 29 |  **   g_{\mu\nu} = \gamma_{\mu\nu} + f l_\mu l_\nu
 30 |  ** \f]
 31 |  ** where \f$\gamma_{\mu\nu}\f$ is the Minkowski metric, \f$f\f$ and
 32 |  ** \f$l_\mu\f$ are defined by
 33 |  ** \f[
 34 |  **   f = \frac{2r^3}{r^4 + a^2 z^2} \mbox{ and }
 35 |  **   l_\mu = \left(1, \frac{rx + ay}{r^2 + a^2},
 36 |  **                    \frac{ry - ax}{r^2 + a^2},
 37 |  **                    \frac{z}{r}\right),
 38 |  ** \f]
 39 |  ** respectively, and \f$r\f$ is defined implicitly by\f$ x^2 + y^2 +
 40 |  ** z^2 = r^2 + a^2 (1 - z^2 / r^2)\f$.
 41 |  **/
 42 | 
 43 | struct gr {
 44 | 	real4 q;
 45 | 	real4 u;
 46 | };
 47 | 
 48 | real
 49 | getrr(real4 q)
 50 | {
 51 | 	real aa = a_spin * a_spin;
 52 | 	real zz = q.s3 * q.s3;
 53 | 	real kk = K(0.5) * (q.s1 * q.s1 + q.s2 * q.s2 + zz - aa);
 54 | 	return sqrt(kk * kk + aa * zz) + kk;
 55 | }
 56 | 
 57 | real
 58 | geteps(real4 q)
 59 | {
 60 | 	return sqrt(getrr(q)) - (1.0 + sqrt(1.0 - a_spin * a_spin));
 61 | }
 62 | 
 63 | real4
 64 | getsphKS(real4 q)
 65 | {
 66 | 	real r     = sqrt(getrr(q));
 67 | 	real theta = acos(q.s3 / r);
 68 | 	real phi   = atan2(q.s2 * r + q.s1 * a_spin,
 69 | 	                   q.s1 * r - q.s2 * a_spin);
 70 | 	return (real4){q.s0, r, theta, phi};
 71 | }
 72 | 
 73 | real4
 74 | getBL(real4 q)
 75 | {
 76 | 	q = getsphKS(q);
 77 | 
 78 | 	real h  = sqrt(K(1.0) - a_spin * a_spin);
 79 | 	real rp = 1.0 + h;
 80 | 	real rm = 1.0 - h;
 81 | 	real r  = q.s1;
 82 | 
 83 | 	return (real4){
 84 | 		q.s0 + (rm * log((r-rm)/(r_match-rm)) - rp * log((r-rp)/(r_match-rp))) / h,
 85 | 		q.s1,
 86 | 		q.s2,
 87 | 		q.s3 + K(0.5) * a_spin * log((r-rm)/(r-rp)) / h /* use r = inf for matching the phi coordinate */
 88 | 	};
 89 | }
 90 | 
 91 | real4
 92 | down(real4 q, real4 u)
 93 | {
 94 | 	real  aa = a_spin * a_spin;
 95 | 	real  zz = q.s3 * q.s3;
 96 | 	real  kk = K(0.5) * (q.s1 * q.s1 + q.s2 * q.s2 + zz - aa);
 97 | 	real  rr = sqrt(kk * kk + aa * zz) + kk;
 98 | 	real  r  = sqrt(rr);
 99 | 
100 | 	real  f  = K(2.0) * rr * r / (rr * rr + aa * zz);
101 | 	real  lx = (r * q.s1 + a_spin * q.s2) / (rr + aa);
102 | 	real  ly = (r * q.s2 - a_spin * q.s1) / (rr + aa);
103 | 	real  lz = q.s3 / r;
104 | 
105 | 	real4 gt = {-1 + f   ,     f*   lx,     f*   ly,     f*   lz};
106 | 	real4 gx = {     f*lx, 1 + f*lx*lx,     f*lx*ly,     f*lx*lz};
107 | 	real4 gy = {     f*ly,     f*ly*lx, 1 + f*ly*ly,     f*ly*lz};
108 | 	real4 gz = {     f*lz,     f*lz*lx,     f*lz*ly, 1 + f*lz*lz};
109 | 
110 | 	return (real4){dot(gt, u),
111 | 	               dot(gx, u),
112 | 	               dot(gy, u),
113 | 	               dot(gz, u)};
114 | }
115 | 
116 | real4
117 | getsphKSu(real4 q, real4 u)
118 | {
119 | 	real  aa = a_spin * a_spin;
120 | 	real  RR = q.s1 * q.s1 + q.s2 * q.s2;
121 | 	real  zz = q.s3 * q.s3;
122 | 	real  kk = K(0.5) * (RR + zz - aa);
123 | 	real  dd = sqrt(kk * kk + aa * zz);
124 | 	real  rr = dd + kk;
125 | 	real  r  = sqrt(rr);
126 | 
127 | 	real4 J1 = ((real4){0, rr, rr, rr + aa} * q) / (K(2.0) * r * dd);
128 | 	real4 J2 = ((q.s3 / r) * J1 - (real4){0, 0, 0, 1}) / sqrt(rr - zz);
129 | 	real4 J3 = (-a_spin / (rr + aa)) * J1 + (real4){0, -q.s2, q.s1, 0} / RR;
130 | 
131 | 	return (real4){u.s0,
132 | 	               dot(J1, u),
133 | 	               dot(J2, u),
134 | 	               dot(J3, u)};
135 | }
136 | 
137 | real4
138 | getBLu(real4 q, real4 u)
139 | {
140 | 	u = getsphKSu(q, u);
141 | 
142 | 	real  aa = a_spin * a_spin;
143 | 	real  zz = q.s3 * q.s3;
144 | 	real  kk = K(0.5) * (q.s1 * q.s1 + q.s2 * q.s2 + zz - aa);
145 | 	real  rr = sqrt(kk * kk + aa * zz) + kk;
146 | 	real  r  = sqrt(rr);
147 | 	real  D  = rr - K(2.0) * r + aa;
148 | 
149 | 	return (real4){u.s0 - u.s3 * K(2.0) * r / D,
150 | 	               u.s1,
151 | 	               u.s2,
152 | 	               u.s3 - u.s0 * a_spin     / D};
153 | }
154 | 
155 | /**
156 |  ** Sqaure of vector u at the spacetime event q in Kerr-Schild coordiantes
157 |  **
158 |  ** Compute \f$u\cdot u \equiv g_{\alpha\beta} u^\alpha u^\beta\f$,
159 |  ** where \f$g_{\alpha\beta}\f$ is the Cartesian form of the
160 |  ** Kerr-Schild metric.
161 |  **
162 |  ** \return The square of u at q
163 |  **/
164 | real
165 | getuu(struct gr g) /**< state of the ray */
166 | {
167 | 	return dot(down(g.q, g.u), g.u);
168 | }
169 | 
170 | /**
171 |  ** Initial conditions of a ray in an image plane
172 |  **
173 |  ** To perform ray tracing calculations of an image in Kerr spacetime,
174 |  ** we follow Johannsen & Psaltis (2010) and consider an observer
175 |  ** viewing the central black hole from a large distance \p r_obs and
176 |  ** at an inclination angle \p i_obs from its rotation axis (see
177 |  ** Figure 1 of Psaltis & Johannsen 2012).  We set up a virtual image
178 |  ** plane that is perpendicular to the line of sight and centered at
179 |  ** \f$\phi\f$ = \p j_obs of the spacetime.  We define the set of
180 |  ** local Cartesian coordinates (\p alpha, \p beta) on the image plane
181 |  ** such that the \p beta axis is along the same fiducial plane and
182 |  ** the \p alpha axis is perpendicular to it.  These input parameters
183 |  ** define a unique ray, whose initial spacetime position and
184 |  ** wavevector are then computed by icond().
185 |  **
186 |  ** \return The initial conditions of a ray
187 |  **/
188 | struct gr
189 | gr_icond(real r_obs, /**< distance of the image from the black hole */
190 |          real i_obs, /**< inclination angle of the image in degrees */
191 |          real j_obs, /**< azimuthal   angle of the image in degrees */
192 |          real alpha, /**< one of the local Cartesian coordinates */
193 |          real beta)  /**< the other  local Cartesian coordinate  */
194 | {
195 | 	real  deg2rad = K(3.14159265358979323846264338327950288) / K(180.0);
196 | 	real  ci, si  = sincos(deg2rad * i_obs, &ci);
197 | 	real  cj, sj  = sincos(deg2rad * j_obs, &cj);
198 | 
199 | 	real  R0 = r_obs * si - beta  * ci; /* cylindrical radius */
200 | 	real  z  = r_obs * ci + beta  * si;
201 | 	real  y  = R0    * sj + alpha * cj;
202 | 	real  x  = R0    * cj - alpha * sj;
203 | 
204 | 	real4 q  = (real4){0, x, y, z};
205 | 	real4 u  = (real4){1, si * cj, si * sj, ci};
206 | 
207 | 	real  aa = a_spin * a_spin;
208 | 	real  zz = q.s3 * q.s3;
209 | 	real  kk = K(0.5) * (q.s1 * q.s1 + q.s2 * q.s2 + zz - aa);
210 | 	real  rr = sqrt(kk * kk + aa * zz) + kk;
211 | 	real  r  = sqrt(rr);
212 | 
213 | 	real  f  = K(2.0) * rr * r / (rr * rr + aa * zz);
214 | 	real  lx = (r * q.s1 + a_spin * q.s2) / (rr + aa);
215 | 	real  ly = (r * q.s2 - a_spin * q.s1) / (rr + aa);
216 | 	real  lz = q.s3 / r;
217 | 
218 | 	real4 gt = {-1 + f   ,     f*   lx,     f*   ly,     f*   lz};
219 | 	real4 gx = {     f*lx, 1 + f*lx*lx,     f*lx*ly,     f*lx*lz};
220 | 	real4 gy = {     f*ly,     f*ly*lx, 1 + f*ly*ly,     f*ly*lz};
221 | 	real4 gz = {     f*lz,     f*lz*lx,     f*lz*ly, 1 + f*lz*lz};
222 | 
223 | 	real  A  =  gt.s0;
224 | 	real  B  =  dot(gt.s123, u.s123) * K(2.0);
225 | 	real  C  = (dot(gx.s123, u.s123) * u.s1 +
226 | 	            dot(gy.s123, u.s123) * u.s2 +
227 | 	            dot(gz.s123, u.s123) * u.s3);
228 | 
229 | 	u.s123 /= -(B + sqrt(B * B - K(4.0) * A * C)) / (K(2.0) * A);
230 | 
231 | 	return (struct gr){q, u};
232 | }
233 | 
234 | /**
235 |  ** Right hand sides of the geodesic equations in Kerr-Schild coordiantes
236 |  **
237 |  ** One of the breakthroughs we achieve in GRay2 is that, by a series
238 |  ** of mathematical manipulations and regrouping, we significantly
239 |  ** reduce the operation count of the geodesic equations in the
240 |  ** Cartesian Kerr-Schild coordinates.  Let \f$\lambda\f$ be the
241 |  ** affine parameter and \f$\dot{x}^\mu \equiv dx^\mu/d\lambda\f$.  We
242 |  ** show in Chan et al. (2017) that the geodesic equations in the
243 |  ** Cartesian KS coordinates can be optimized to the following form:
244 |  ** \f[
245 |  **  \ddot{x}^\mu = - \left(\eta^{\mu\beta} \dot{x}^\alpha -
246 |  **                         \frac{1}{2}\eta^{\mu\alpha} \dot{x}^\beta\right)
247 |  **                 \dot{x}_{\beta,\alpha} + F l^\mu
248 |  ** \f]
249 |  ** where
250 |  ** \f[
251 |  **   F = f \left(l^\beta \dot{x}^\alpha -
252 |  **               \frac{1}{2}l^\alpha \dot{x}^\beta\right)
253 |  **       \dot{x}_{\beta,\alpha}.
254 |  ** \f]
255 |  ** In this new form, the right hand sides (RHS) of the geodesic
256 |  ** equations have only ~65% more floating-point operations than in
257 |  ** the Boyer-Lindquist coordinates.  Furthermore, the evaluation of
258 |  ** the RHS uses many matrix-vector products, which are optimized in
259 |  ** modern hardwares.
260 |  **
261 |  ** \return The right hand sides of the geodesic equations
262 |  **/
263 | struct gr
264 | gr_rhs(struct gr g) /**< state of the ray */
265 | {
266 | 	real4 q = g.q;
267 | 	real4 u = g.u;
268 | 
269 | 	real  f,  dx_f,  dy_f,  dz_f;
270 | 	real  lx, dx_lx, dy_lx, dz_lx;
271 | 	real  ly, dx_ly, dy_ly, dz_ly;
272 | 	real  lz, dx_lz, dy_lz, dz_lz;
273 | 
274 | 	real  hDxu, hDyu, hDzu;
275 | 	real4 uD;
276 | 	real  tmp;
277 | 
278 | 	{
279 | 		real dx_r, dy_r, dz_r;
280 | 		real r, ir, iss;
281 | 		{
282 | 			real aa = a_spin * a_spin;
283 | 			real rr, tmp2;
284 | 			{
285 | 				real zz = q.s3 * q.s3;
286 | 				real dd;
287 | 				{
288 | 					real kk = K(0.5) * (q.s1 * q.s1 + q.s2 * q.s2 + zz - aa);
289 | 					dd = sqrt(kk * kk + aa * zz);
290 | 					rr = dd + kk;
291 | 				}
292 | 				r  = sqrt(rr);
293 | 				ir = K(1.0) / r;
294 | 				{
295 | 					real ss = rr + aa;
296 | 					iss  = K(1.0) / ss;
297 | 					tmp  = K(0.5) / (r * dd);
298 | 					dz_r = tmp * ss * q.s3;
299 | 					tmp *= rr;
300 | 				}
301 | 				dy_r = tmp * q.s2;
302 | 				dx_r = tmp * q.s1;
303 | 				tmp  = K(2.0) / (rr + aa * zz / rr);
304 | 			}
305 | 			tmp2 = K(3.0) - K(2.0) * rr * tmp;
306 | 			f    = tmp *  r;
307 | 			dx_f = tmp *  dx_r * tmp2;
308 | 			dy_f = tmp *  dy_r * tmp2;
309 | 			dz_f = tmp * (dz_r * tmp2 - tmp * aa * q.s3 * ir);
310 | 		} /* 48 (-8) FLOPs; estimated FLoating-point OPerations, the number
311 | 		     in the parentheses is (the negative of) the number of FMA */
312 | 		{
313 | 			real m2r  = K(-2.0) * r;
314 | 			real issr = iss     * r;
315 | 			real issa = iss     * a_spin;
316 | 
317 | 			lx    = iss * (q.s1 * r + q.s2 * a_spin);
318 | 			tmp   = iss * (q.s1 + m2r * lx);
319 | 			dx_lx = tmp * dx_r + issr;
320 | 			dy_lx = tmp * dy_r + issa;
321 | 			dz_lx = tmp * dz_r;
322 | 
323 | 			ly    = iss * (q.s2 * r - q.s1 * a_spin);
324 | 			tmp   = iss * (q.s2 + m2r * ly);
325 | 			dx_ly = tmp * dx_r - issa;
326 | 			dy_ly = tmp * dy_r + issr;
327 | 			dz_ly = tmp * dz_r;
328 | 
329 | 			lz    = q.s3 * ir;
330 | 			tmp   = -lz * ir;
331 | 			dx_lz = tmp * dx_r;
332 | 			dy_lz = tmp * dy_r;
333 | 			dz_lz = tmp * dz_r + ir;
334 | 		} /* 35 (-9) FLOPs */
335 | 	}
336 | 
337 | 	{
338 | 		real  flu;
339 | 		real4 Dx, Dy, Dz;
340 | 		{
341 | 			real lu = u.s0 + lx * u.s1 + ly * u.s2 + lz * u.s3;
342 | 			flu   = f * lu;
343 | 			Dx.s0 = dx_f * lu + f * (dx_lx * u.s1 + dx_ly * u.s2 + dx_lz * u.s3);
344 | 			Dy.s0 = dy_f * lu + f * (dy_lx * u.s1 + dy_ly * u.s2 + dy_lz * u.s3);
345 | 			Dz.s0 = dz_f * lu + f * (dz_lx * u.s1 + dz_ly * u.s2 + dz_lz * u.s3); /* 31 (-12) FLOPs */
346 | 		}
347 | 		Dx.s1 = Dx.s0 * lx + flu * dx_lx;
348 | 		Dx.s2 = Dx.s0 * ly + flu * dx_ly;
349 | 		Dx.s3 = Dx.s0 * lz + flu * dx_lz; /* 9 (-3) FLOPs */
350 | 
351 | 		Dy.s1 = Dy.s0 * lx + flu * dy_lx;
352 | 		Dy.s2 = Dy.s0 * ly + flu * dy_ly;
353 | 		Dy.s3 = Dy.s0 * lz + flu * dy_lz; /* 9 (-3) FLOPs */
354 | 
355 | 		Dz.s1 = Dz.s0 * lx + flu * dz_lx;
356 | 		Dz.s2 = Dz.s0 * ly + flu * dz_ly;
357 | 		Dz.s3 = Dz.s0 * lz + flu * dz_lz; /* 9 (-3) FLOPs */
358 | 
359 | 		hDxu = K(0.5) * dot(Dx, u);
360 | 		hDyu = K(0.5) * dot(Dy, u);
361 | 		hDzu = K(0.5) * dot(Dz, u); /* 24 (-9) FLOPs */
362 | 
363 | 		uD  = u.s1 * Dx + u.s2 * Dy + u.s3 * Dz; /* 20 (-8) FLOPs */
364 | 
365 | 		tmp = f * (-uD.s0 + lx * (uD.s1 - hDxu) + ly * (uD.s2 - hDyu) + lz * (uD.s3 - hDzu)); /* 10 (-3) FLOPs */
366 | 	}
367 | 
368 | 	{
369 | 		real4 a = {
370 | 			       uD.s0 -      tmp,
371 | 			hDxu - uD.s1 + lx * tmp,
372 | 			hDyu - uD.s2 + ly * tmp,
373 | 			hDzu - uD.s3 + lz * tmp
374 | 		}; /* 10 (-3) FLOPs */
375 | 
376 | 		return (struct gr){u, a};
377 | 	}
378 | }
379 | 


--------------------------------------------------------------------------------
/sim-org/io.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2020-2021 Gabriele Bozzola
  3 |  * Copyright (C) 2016 Chi-kwan Chan
  4 |  * Copyright (C) 2016 Steward Observatory
  5 |  *
  6 |  * This file is part of GRay2.
  7 |  *
  8 |  * GRay2 is free software: you can redistribute it and/or modify it
  9 |  * under the terms of the GNU General Public License as published by
 10 |  * the Free Software Foundation, either version 3 of the License, or
 11 |  * (at your option) any later version.
 12 |  *
 13 |  * GRay2 is distributed in the hope that it will be useful, but WITHOUT
 14 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 15 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 16 |  * License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License
 19 |  * along with GRay2.  If not, see <http://www.gnu.org/licenses/>.
 20 |  */
 21 | #include "gray.h"
 22 | #include <stdio.h>
 23 | #include <stdlib.h>
 24 | #include <hdf5.h>
 25 | #include <time.h>
 26 | 
 27 | /* Compare function, needed for qsort (needed to sort the times in the HDF5 file) */
 28 | int compare (const void *a, const void *b)
 29 | {
 30 | 	/* With the help of https://stackoverflow.com/a/3886497 */
 31 | 	char *rem;
 32 | 	real a_num = strtod((char*)a, &rem);
 33 | 	real b_num = strtod((char*)b, &rem);
 34 | 	return (a_num > b_num) - (a_num < b_num);
 35 | }
 36 | 
 37 | /** \todo Implement load() */
 38 | 
 39 | void
 40 | dump(Lux_job *ego, size_t i)
 41 | {
 42 | 	Lux_opencl *ocl = EGO->ocl;
 43 | 
 44 | 	struct param *p = &EGO->param;
 45 | 	struct setup *s = &EGO->setup;
 46 | 
 47 | 	const  size_t sz     = s->precision;
 48 | 	const  size_t n_data = EGO->n_coor + EGO->n_freq * 2;
 49 | 	const  size_t n_info = EGO->n_info;
 50 | 	const  size_t n_rays = p->h_rays * p->w_rays;
 51 | 
 52 | 	void *data = ocl->mmap(ocl, EGO->data, sz * n_rays * n_data);
 53 | 	void *info = ocl->mmap(ocl, EGO->info, sz * n_rays * n_info);
 54 | 
 55 | 	char  buf[64];
 56 | 	FILE *f;
 57 | 
 58 | 	snprintf(buf, sizeof(buf), s->outfile, i);
 59 | 	f = fopen(buf, "wb");
 60 | 
 61 | 	fwrite(&sz,        sizeof(size_t), 1,      f);
 62 | 	fwrite(&n_data,    sizeof(size_t), 1,      f);
 63 | 	fwrite(&p->w_rays, sizeof(size_t), 1,      f);
 64 | 	fwrite(&p->h_rays, sizeof(size_t), 1,      f);
 65 | 	fwrite( data,      sz * n_data,    n_rays, f);
 66 | 	fwrite( info,      sz * n_info,    n_rays, f);
 67 | 
 68 | 	fclose(f);
 69 | 
 70 | 	ocl->munmap(ocl, EGO->info, info);
 71 | 	ocl->munmap(ocl, EGO->data, data);
 72 | }
 73 | 
 74 | size_t
 75 | read_variable_from_h5_file_and_return_num_points(const hid_t group_id,
 76 |                                                  const char *var_name,
 77 |                                                  void **var_array)
 78 | {
 79 | 
 80 | 	/* Here we read var_name from group_id and put in var_array*/
 81 | 	/* var_array is a pointer to a pointer to the area of memory
 82 | 	 * where the data will be written */
 83 | 	/* We want a pointer to a pointer because we want to modify var_array
 84 | 	 * with malloc */
 85 | 
 86 | 	/* We allocate memory, so it has to be freed! */
 87 | 	/* WARNING: The memory has to be freed! */
 88 | 
 89 | 	/* The return value is the number of elements */
 90 | 
 91 | 	herr_t status;
 92 | 	hid_t datasetH5type;
 93 | 	hid_t dataset_id, dataspace_id; /* identifiers for dsets*/
 94 | 
 95 | 	lux_debug("Reading variable %s\n", var_name);
 96 | 
 97 | 	dataset_id = H5Dopen(group_id, var_name, H5P_DEFAULT);
 98 | 	if (dataset_id == -1) {
 99 | 		lux_print("Error in opening dataset: %s", var_name);
100 | 		return dataset_id;
101 | 	}
102 | 
103 | 	/* The dataspace will tell us about the size (in bytes) of the data */
104 | 	dataspace_id = H5Dget_space(dataset_id);
105 | 	if (dataspace_id == -1) {
106 | 		lux_print("Error in getting dataspace: %s", var_name);
107 | 		return dataset_id;
108 | 	}
109 | 
110 | 	const size_t total_num_bytes = H5Dget_storage_size(dataset_id);
111 | 
112 | 	/* Here we allocate the memory */
113 | 	/* IT MUST BE FREED! */
114 | 	*var_array = malloc(total_num_bytes);
115 | 
116 | 	/* To read the data, we must know what type is it */
117 | 	datasetH5type = H5Tget_native_type(H5Dget_type(dataset_id), H5T_DIR_DEFAULT);
118 | 	if (datasetH5type == -1) {
119 | 		lux_print("Error in determining type in dataset: %s", var_name);
120 | 		return datasetH5type;
121 | 	}
122 | 
123 | 	/* Size in bytes of each signle element */
124 | 	const size_t sz = H5Tget_size(datasetH5type);
125 | 
126 | 	status = H5Dread(dataset_id, datasetH5type, H5S_ALL, H5S_ALL, H5P_DEFAULT,
127 | 	                 *var_array);
128 | 
129 | 	if (status != 0) {
130 | 		lux_print("Error in reading dataset: %s", var_name);
131 | 		return status;
132 | 	}
133 | 
134 | 	status = H5Dclose(dataset_id);
135 | 	if (status != 0) {
136 | 		printf("Error in closing dataset: %s", var_name);
137 | 		return status;
138 | 	}
139 | 
140 | 	status = H5Sclose(dataspace_id);
141 | 	if (status != 0) {
142 | 		printf("Error in closing dataspace: %s", var_name);
143 | 		return status;
144 | 	}
145 | 
146 | 	return total_num_bytes / sz;
147 | }
148 | 
149 | size_t
150 | populate_ego_available_times(Lux_job *ego) {
151 | 
152 | 	/* HDF5 identifiers */
153 | 	hid_t file_id;
154 | 	hid_t group_id;
155 | 	herr_t status;
156 | 
157 | 	hsize_t nobj;
158 | 
159 | 	struct param *p = &EGO->param;
160 | 	const char *file_name = p->dyst_file;
161 | 
162 | 	file_id = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT);
163 | 	if (file_id == -1) {
164 | 		lux_print("ERROR: File %s is not a valid HDF5 file\n", file_name);
165 | 		return file_id;
166 | 	}
167 | 
168 | 	/* Open group corresponding to root */
169 | 	group_id = H5Gopen(file_id, "/", H5P_DEFAULT);
170 | 	if (group_id == -1) {
171 | 		lux_print("ERROR: Could not open root of HDF5 file\n");
172 | 		return group_id;
173 | 	}
174 | 
175 | 	/* Get all the members of the groups, one at a time */
176 | 	status = H5Gget_num_objs(group_id, &nobj);
177 | 	if (status != 0) {
178 | 		lux_print("ERROR: Could not obtain number of groups in HDF5 file\n");
179 | 		return status;
180 | 	}
181 | 
182 | 	if (nobj < 2) {
183 | 		lux_print("ERROR: Not enough groups in the HDF5 file\n");
184 | 		return -1;
185 | 	}
186 | 
187 | 	lux_debug("Available times: \n");
188 | 
189 | 	char time_name[MAX_TIME_NAME_LENGTH];
190 | 
191 | 	for (size_t i = 0; i < nobj; i++) {
192 | 		H5Gget_objname_by_idx(group_id, i, time_name, MAX_TIME_NAME_LENGTH);
193 | 		/* We esclude the "grid" group, which contains the coordinates */
194 | 		if (time_name[0] != 'g'){
195 | 			char *time_name_in_ego = EGO->available_times[i];
196 | 			snprintf(time_name_in_ego, sizeof(time_name), "%s", time_name);
197 | 			lux_debug("%s\n", time_name_in_ego);
198 | 		}
199 | 	}
200 | 
201 | 	/* Now we sort the available_times array in ascending order */
202 | 	qsort(EGO->available_times, nobj - 1, sizeof(EGO->available_times[0]), compare);
203 | 
204 | 	lux_debug("Sorted available times: \n");
205 | 
206 | 	for (size_t i = 0; i < nobj - 1; i++){
207 | 		lux_debug("%s\n", EGO->available_times[i]);
208 | 	}
209 | 
210 | 	char *rem;
211 | 	/* Here it is -2 because we have a 'grid' group around, and it has to be after
212 | 	 * the numbers (nobj - 2 is the last element of the array) */
213 | 	EGO->max_available_time = strtod(EGO->available_times[nobj - 2], &rem);
214 | 
215 | 	return 0;
216 | }
217 | 
218 | size_t
219 | load_coordinates(Lux_job *ego){
220 | 	/* Here we load the coordinates from the 'grid' dataset in the HDF5 file */
221 | 
222 | 	/* OpenCL Image properties */
223 | 	cl_image_format imgfmt;
224 | 	cl_image_desc imgdesc;
225 | 	cl_int err;
226 | 
227 | 	struct param *p = &EGO->param;
228 | 
229 | 	const char *file_name = p->dyst_file;
230 | 
231 | 	/* Dimension names, useful for loops */
232 | 	const char dimension_names[4][2] = {"t", "x", "y", "z"};
233 | 
234 | 	/* HDF5 identifiers */
235 | 	hid_t file_id;
236 | 	hid_t group_id;
237 | 	herr_t status;
238 | 
239 | 	/* Array of pointers for the coordinates */
240 | 	/* There are only 3: x, y, z */
241 | 	void *coordinates[3];
242 | 
243 | 	file_id = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT);
244 | 	if (file_id == -1) {
245 | 		lux_print("ERROR: File %s is not a valid HDF5 file\n", file_name);
246 | 		return file_id;
247 | 	}
248 | 
249 | 	group_id = H5Gopen(file_id, "grid", H5P_DEFAULT);
250 | 	if (group_id == -1) {
251 | 		lux_print("ERROR: grid group not found!\n");
252 | 		return group_id;
253 | 	}
254 | 
255 | 	/* First, we read the coordinates */
256 | 	/* dimension_names[i + 1] because we ignore the time, which is the zeroth */
257 | 	for (size_t i = 0; i < 3; i++){
258 | 		EGO->num_points.s[i + 1] = read_variable_from_h5_file_and_return_num_points(
259 | 			      group_id, dimension_names[i + 1], &coordinates[i]);
260 | 		/* This is an error, something didn't work as expected */
261 | 		/* We have already printed what */
262 | 		if (EGO->num_points.s[i + 1] <= 0)
263 | 			return EGO->num_points.s[i + 1];
264 | 	}
265 | 
266 | 	lux_debug("Read coordiantes\n");
267 | 
268 | 	/* Fill spatial bounding box */
269 | 	for (int i = 1; i < 4; i++){
270 | 		/* xmin */
271 | 		EGO->bounding_box.s[i] = ((cl_float *)coordinates[i - 1])[0];
272 | 		/* xmax */
273 | 		EGO->bounding_box.s[i + 4] = ((cl_float *)coordinates[i - 1])[EGO->num_points.s[i] - 1];
274 | 	}
275 | 
276 | 	return 0;
277 | }
278 | 
279 | void
280 | copy_snapshot(Lux_job *ego, size_t to_t1){
281 | 	/* This function copies over the snapshot in _t2 to _t1 if to_t1 is true,
282 | 	 * otherwise from _t1 to _t2.  Before doing this, the memory is released.
283 | 	 * We assume that data is already defined before copying. */
284 | 	size_t index = 0;
285 | 	for (size_t i = 0; i < 4; i++)
286 | 		for (size_t j = 0; j < 4; j++)
287 | 			for (size_t k = j; k < 4; k++) {
288 | 				if (to_t1){
289 | 					/* TODO: Error checking on clReleaseMemObject */
290 | 					clReleaseMemObject(EGO->spacetime_t1[index]);
291 | 					EGO->spacetime_t1[index] = EGO->spacetime_t2[index];
292 | 				}else{
293 | 					/* TODO: Error checking on clReleaseMemObject */
294 | 					clReleaseMemObject(EGO->spacetime_t2[index]);
295 | 					EGO->spacetime_t2[index] = EGO->spacetime_t1[index];
296 | 				}
297 | 				index++;
298 | 			}
299 | 
300 | }
301 | 
302 | size_t
303 | load_snapshot(Lux_job *ego, size_t time_snapshot_index, size_t load_in_t1){
304 | 
305 | 	/* If load_in_t1 is true, then fill the t1 slot, otherwise, fill the t2 slot. */
306 | 
307 | 	/* TODO: Add support to compressed HDF5 files */
308 | 	/* https://support.hdfgroup.org/ftp/HDF5/examples/examples-by-api/hdf5-examples/1_10/C/H5D/h5ex_d_shuffle.cgo */
309 | 
310 | 	struct param *p = &EGO->param;
311 | 
312 | 	const char *file_name = p->dyst_file;
313 | 
314 | 	/* Dimension names, useful for loops */
315 | 	const char dimension_names[4][2] = {"t", "x", "y", "z"};
316 | 
317 | 	/* OpenCL Image properties */
318 | 	cl_image_format imgfmt;
319 | 	cl_image_desc imgdesc;
320 | 	cl_int err;
321 | 
322 | 	/* HDF5 identifiers */
323 | 	hid_t file_id;
324 | 	hid_t group_id;
325 | 	herr_t status;
326 | 
327 | 	clock_t start, end;
328 | 	double cpu_time_used;
329 | 
330 | 	char *time = EGO->available_times[time_snapshot_index];
331 | 
332 | 	start = clock();
333 | 
334 | 	file_id = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT);
335 | 	if (file_id == -1) {
336 | 		lux_print("ERROR: File %s is not a valid HDF5 file\n", file_name);
337 | 		return file_id;
338 | 	}
339 | 	/* Open group corresponding to time */
340 | 	group_id = H5Gopen(file_id, time, H5P_DEFAULT);
341 | 	if (group_id == -1) {
342 | 		lux_print("ERROR: Time %s not found\n", time);
343 | 		return group_id;
344 | 	}
345 | 
346 | 	lux_debug("Reading time %s\n", time);
347 | 
348 | 	/* Now, we read the Gammas */
349 | 	void *Gamma[40];
350 | 	size_t num_points;
351 | 	size_t index = 0;
352 | 	const size_t expected_num_points = EGO->num_points.s[1] *
353 | 		EGO->num_points.s[2] *
354 | 		EGO->num_points.s[3];
355 | 
356 | 	for (size_t i = 0; i < 4; i++)
357 | 		for (size_t j = 0; j < 4; j++)
358 | 			for (size_t k = j; k < 4; k++) {
359 | 				char var_name[256];
360 | 				snprintf(var_name, sizeof(var_name), "Gamma_%s%s%s", dimension_names[i],
361 | 						 dimension_names[j], dimension_names[k]);
362 | 
363 | 				/* We treat our 3D data as 1D */
364 | 				num_points = read_variable_from_h5_file_and_return_num_points(
365 | 					group_id, var_name, &Gamma[index]);
366 | 
367 | 				/* 	for (size_t kk=0; kk < num_points; kk++){ */
368 | 				/* 		float val = ((float*)Gamma[index])[kk]; */
369 | 				/* 		if (val > 1) printf("Gamma kk %d, %.16g\n", kk, val); */
370 | 				/* } */
371 | 
372 | 				/* This is an error, something didn't work as expected */
373 | 				/* We have already printed what */
374 | 				if (num_points <= 0)
375 | 					return num_points;
376 | 
377 | 				if (num_points != expected_num_points) {
378 | 					lux_print("Number of points in Gammas inconsistent with coordinates\n");
379 | 					return -1;
380 | 				}
381 | 				index++;
382 | 			}
383 | 
384 | 	lux_debug("Read Gammas\n");
385 | 
386 | 	/* Read metric */
387 | 	void *g[10];
388 | 
389 | 	index = 0;
390 | 	for(size_t i = 0; i < 4; i++)
391 | 		for(size_t j = i; j < 4; j++) {
392 | 			char var_name[256];
393 | 			snprintf(var_name, sizeof(var_name), "g_%s%s", dimension_names[i], dimension_names[j]);
394 | 
395 | 			/* We treat our 3D data as 1D */
396 | 			num_points = read_variable_from_h5_file_and_return_num_points(
397 | 				group_id, var_name, &g[index]);
398 | 
399 | 			/* This is an error, something didn't work as expected */
400 | 			/* We have already printed what */
401 | 			if (num_points <= 0)
402 | 				return num_points;
403 | 
404 | 			if (num_points != expected_num_points) {
405 | 				lux_print("Number of points in Gammas inconsistent with coordinates\n");
406 | 				return -1;
407 | 			}
408 | 			index++;
409 | 		}
410 | 
411 | 	lux_debug("Read metric\n");
412 | 
413 | 	void *rho;
414 | 	{
415 | 		/* We treat our 3D data as 1D */
416 | 		num_points = read_variable_from_h5_file_and_return_num_points(
417 | 			group_id, "rho", &rho);
418 | 
419 | 		/* This is an error, something didn't work as expected */
420 | 		/* We have already printed what */
421 | 		if (num_points <= 0)
422 | 			return num_points;
423 | 
424 | 		if (num_points != expected_num_points) {
425 | 			lux_print("Number of points in Gammas inconsistent with coordinates\n");
426 | 			return -1;
427 | 		}
428 | 	}
429 | 
430 | 	lux_debug("Read fluid\n");
431 | 
432 | 	/* Finally, we create the images */
433 | 
434 | 	imgfmt.image_channel_order = CL_R;         /* use one channel */
435 | 	imgfmt.image_channel_data_type = CL_FLOAT; /* each channel is a float */
436 | 	imgdesc.image_type = CL_MEM_OBJECT_IMAGE3D;
437 | 	imgdesc.image_width = EGO->num_points.s[1];  /* x */
438 | 	imgdesc.image_height = EGO->num_points.s[2]; /* y */
439 | 	imgdesc.image_depth = EGO->num_points.s[3];  /* z */
440 | 	imgdesc.image_row_pitch = 0;
441 | 	imgdesc.image_slice_pitch = 0;
442 | 	imgdesc.num_mip_levels = 0;
443 | 	imgdesc.num_samples = 0;
444 | 	imgdesc.buffer = NULL;
445 | 
446 | 	index = 0;
447 | 	for (size_t i = 0; i < 4; i++)
448 | 		for (size_t j = 0; j < 4; j++)
449 | 			for (size_t k = j; k < 4; k++) {
450 | 				/* We fill _t1 only the first time, when snapshot_index = 0,
451 | 				 * in all the other cases we fill _t2, and then we shift the pointers.*/
452 | 				if (load_in_t1){
453 | 					EGO->spacetime_t1[index] = clCreateImage(
454 | 						EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt,
455 | 						&imgdesc, Gamma[index], &err);
456 | 				}else{
457 | 					EGO->spacetime_t2[index] = clCreateImage(
458 | 						EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt,
459 | 						&imgdesc, Gamma[index], &err);
460 | 				}
461 | 				/* https://streamhpc.com/blog/2013-04-28/opencl-error-codes/ */
462 | 				if (err != CL_SUCCESS) {
463 | 					lux_print("Error in creating images\n");
464 | 					return err;
465 | 				}
466 | 				index++;
467 | 			}
468 | 
469 | 	for (size_t i = 0; i < 4; i++)
470 | 		for (size_t j = i; j < 4; j++) {
471 | 			/* We fill _t1 only the first time, when snapshot_index = 0,
472 | 			 * in all the other cases we fill _t2, and then we shift the pointers.*/
473 | 			if (load_in_t1){
474 | 				EGO->spacetime_t1[index] = clCreateImage(
475 | 					EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt,
476 | 					&imgdesc, g[index-40], &err);
477 | 			}else{
478 | 				EGO->spacetime_t2[index] = clCreateImage(
479 | 					EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt,
480 | 					&imgdesc, g[index-40], &err);
481 | 			}
482 | 			/* https://streamhpc.com/blog/2013-04-28/opencl-error-codes/ */
483 | 			if (err != CL_SUCCESS) {
484 | 				lux_print("Error in creating images\n");
485 | 				return err;
486 | 			}
487 | 			index++;
488 | 		}
489 | 
490 | 	{
491 | 		/* We fill _t1 only the first time, when snapshot_index = 0,
492 | 		 * in all the other cases we fill _t2, and then we shift the pointers.*/
493 | 		if (load_in_t1){
494 | 			EGO->spacetime_t1[index] = clCreateImage(
495 | 				EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt,
496 | 				&imgdesc, rho, &err);
497 | 		}else{
498 | 			EGO->spacetime_t2[index] = clCreateImage(
499 | 				EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt,
500 | 				&imgdesc, rho, &err);
501 | 		}
502 | 		/* https://streamhpc.com/blog/2013-04-28/opencl-error-codes/ */
503 | 		if (err != CL_SUCCESS) {
504 | 			lux_print("Error in creating images\n");
505 | 			return err;
506 | 		}
507 | 	}
508 | 
509 | 	lux_debug("Images created\n");
510 | 
511 | 	for (size_t i = 0; i < 40; i++)
512 | 		free(Gamma[i]);
513 | 
514 | 	for (size_t i = 0; i < 10; i++)
515 | 		free(g[i]);
516 | 
517 | 	free(rho);
518 | 
519 | 	status = H5Fclose(file_id);
520 | 	if (status != 0) {
521 | 		printf("Error in closing HDF5 file");
522 | 		return status;
523 | 	}
524 | 
525 | 	end = clock();
526 | 	cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
527 | 
528 | 	lux_print("Reading file and creating images for time %s took %.5f s\n",
529 | 	          time,
530 | 	          cpu_time_used);
531 | 
532 | 	return 0;
533 | }
534 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------