├── tests ├── sample.rap ├── Makefile ├── sample.c └── ck_sample.c ├── sim ├── Kerr.rap ├── Kerr.opts ├── infcam.opts ├── gray.opts ├── Makefile ├── initcond.h ├── gray.h ├── gray │ ├── infcam.c │ └── Kerr │ │ └── ocl.c └── gray.c ├── .gitignore ├── sim-org ├── Makefile ├── icond.opts ├── setup.opts ├── AoS.cl ├── SoA.cl ├── param.opts ├── flow.cl ├── RK4.cl ├── phys.cl ├── evolve.c ├── icond.c ├── gray.h ├── driver.cl ├── build.c ├── rt.cl ├── preamble.cl ├── dyst.cl ├── interp.cl ├── gray.c ├── KS.cl └── io.c ├── doc ├── Makefile └── Doxyfile ├── Makefile ├── tools ├── gray.py ├── generate_data.py └── boosted_ks.py ├── README.md └── COPYING /tests/sample.rap: -------------------------------------------------------------------------------- 1 | XSPEC size_t n; 2 | ISPEC size_t n1, n2; 3 | PARAMS double alpha; 4 | STATES double *x, *y, *z; 5 | -------------------------------------------------------------------------------- /sim/Kerr.rap: -------------------------------------------------------------------------------- 1 | OPTS size_t nque; cl_command_queue *que; 2 | XSPEC size_t n; 3 | ISPEC size_t bsz, gsz; 4 | PARAMS double aspin, target; 5 | STATES cl_mem rays; 6 | -------------------------------------------------------------------------------- /sim/Kerr.opts: -------------------------------------------------------------------------------- 1 | real m_bh:M = 4e6 = strtod(val, &rem); /**< Mass of the central black hole in unit of solar mass; note that it is needed in radiative transfer but not in geodesic integration */ 2 | real a_spin:a = 0 = strtod(val, &rem); /**< Dimensionless black hole spin */ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Global settings 2 | * 3 | !*/ 4 | 5 | # Source codes 6 | !*.[hc] 7 | !*.cl 8 | !*.py 9 | 10 | # Optgen-ed files 11 | !*.rap 12 | !*.opts 13 | *_rap.h 14 | *_opts.h 15 | 16 | # Makefile etc 17 | !*file 18 | 19 | # Documentations 20 | html/ 21 | latex/ 22 | !*.md 23 | 24 | # Data files 25 | !*.?sv 26 | -------------------------------------------------------------------------------- /sim-org/Makefile: -------------------------------------------------------------------------------- 1 | all: gray.la 2 | 3 | install: gray.la 4 | lux-install *.cl gray.la +i sim 5 | 6 | %.h: %.opts 7 | lux-optgen $< 8 | 9 | gray.la: gray.h *.c icond.h param.h setup.h 10 | lux-build gray.h *.c -f opencl hdf5 -o gray.la 11 | 12 | clean: 13 | -rm -fr .libs _libs 14 | -rm -f *.lo gray.*so gray.la 15 | -rm -f icond.h param.h setup.h 16 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | default: doc 2 | 3 | all: 4 | 5 | doc: html/index.html latex/refman.pdf 6 | 7 | check: 8 | 9 | install: 10 | 11 | clean: 12 | 13 | html/index.html: 14 | doxygen 15 | 16 | latex/refman.pdf: 17 | if [ ! -f latex/refman.tex ]; then doxygen; fi 18 | cd latex; pdflatex refman; pdflatex refman; cd .. 19 | 20 | clean: 21 | rm -rf html/ latex/ 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SUBDIRS = doc tests sim 2 | 3 | ifeq ($(MAKECMDGOALS),) 4 | GOALS = all 5 | else 6 | GOALS = $(MAKECMDGOALS) 7 | endif 8 | 9 | default: all 10 | 11 | all: recursive 12 | 13 | doc: recursive 14 | 15 | check: recursive 16 | 17 | install: recursive 18 | 19 | clean: recursive 20 | 21 | recursive: 22 | @list='$(SUBDIRS)'; goals='$(GOALS)'; for subdir in $$list; do \ 23 | test "$$subdir" = . || (cd $$subdir && make $$goals); \ 24 | done 25 | -------------------------------------------------------------------------------- /sim-org/icond.opts: -------------------------------------------------------------------------------- 1 | # Initial conditions 2 | real w_img:w = 64 = strtod(val, &rem); /**< Width of the image in \f$GM/c^2\f$ */ 3 | real h_img:h = 64 = strtod(val, &rem); /**< Height of the image in \f$GM/c^2\f$ */ 4 | real r_obs:r = 1024 = strtod(val, &rem); /**< Distance of the image from the black hole */ 5 | real i_obs:i = 60 = strtod(val, &rem); /**< Inclination angle of the image in degrees */ 6 | real j_obs:j = 0 = strtod(val, &rem); /**< Azimuthal angle of the image in degrees */ 7 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | default: check 2 | 3 | all: sample.la ck_sample 4 | 5 | doc: 6 | 7 | check: all 8 | for ck in $$(ls -1 ck_* | grep -v '\.'); do ./$$ck --debug; done 9 | 10 | install: 11 | 12 | clean: 13 | for f in $$(ls *.rap 2> /dev/null); do rm -f $${f%.rap}_rap.h; done 14 | for g in $$(ls *.opts 2> /dev/null); do rm -f $${g%.opts}_opts.h; done 15 | rm -fr .libs _libs 16 | rm -f *.{lo,so,la} # remove modules 17 | rm -f $$(ls -1 ck_* | grep -v '\.') # remove binaries 18 | 19 | ck_%: %_rap.h ck_%.c 20 | lux-build $^ -o $@ 21 | 22 | %.la: %_rap.h %.c 23 | lux-build $^ -o $@ 24 | 25 | %_rap.h: %.rap 26 | lux-rapgen $< 27 | 28 | %_opts.h: %.opts 29 | lux-optgen $< 30 | -------------------------------------------------------------------------------- /sim/infcam.opts: -------------------------------------------------------------------------------- 1 | # Camera Setup 2 | real w_img:w = 64 = strtod(val, &rem); /**< Width of the image in \f$GM/c^2\f$ */ 3 | real h_img:h = 64 = strtod(val, &rem); /**< Height of the image in \f$GM/c^2\f$ */ 4 | real r_obs:r = 1024 = strtod(val, &rem); /**< Distance of the image from the black hole; TODO: analytically integrate from infinity */ 5 | real i_obs:i = 60 = strtod(val, &rem); /**< Inclination angle of the image in degrees */ 6 | real j_obs:j = 0 = strtod(val, &rem); /**< Azimuthal angle of the image in degrees */ 7 | 8 | # Numerical Setup 9 | const char * coordinates:coor = "pxcenter" = val; /**< Coordinate system */ 10 | size_t n_width:W = 512 = strtoul(val, &rem, 0); /**< Number of rays along the width of the image */ 11 | size_t n_height:H = 512 = strtoul(val, &rem, 0); /**< Number of rays along the height of the image */ 12 | -------------------------------------------------------------------------------- /sim/gray.opts: -------------------------------------------------------------------------------- 1 | unsigned i_platform:ip = 0 = strtoul(val, &rem, 0); /**< Index of platform to use */ 2 | unsigned i_device:id = 0 = strtoul(val, &rem, 0); /**< Index of device to use */ 3 | cl_device_type device_type:type = CL_DEVICE_TYPE_ALL = strtotype(val); /**< Type of device to use */ 4 | 5 | const char * spacetime:st = "Kerr" = val; /**< Spacetime geometry */ 6 | 7 | const char * initcond:ic = "infcam" = val; /**< Initial conditions for the rays */ 8 | 9 | real t_init:t0 = 0 = strtod(val, &rem); /**< Initial time */ 10 | real dt_dump:dt = -32 = strtod(val, &rem); /**< Time interval between dumps */ 11 | size_t i_init:i0 = 0 = strtoul(val, &rem, 0); /**< Initial dump id */ 12 | size_t n_dump:N = 64 = strtoul(val, &rem, 0); /**< Number of dumps (in addition to the initial condition) */ 13 | 14 | const char * rayfile:ray = "ray%04d.h5" = val; /**< File name format for ray dump */ 15 | -------------------------------------------------------------------------------- /sim/Makefile: -------------------------------------------------------------------------------- 1 | default: all 2 | 3 | all: gray/Kerr/ocl.la gray/infcam.la gray.la 4 | 5 | doc: 6 | 7 | check: 8 | 9 | install: gray/Kerr/ocl.la gray/infcam.la gray.la 10 | lux-install $^ +i sim 11 | 12 | clean: 13 | for f in $$(ls *.rap 2> /dev/null); do rm -f $${f%.rap}_rap.h; done 14 | for g in $$(ls *.opts 2> /dev/null); do rm -f $${g%.opts}_opts.h; done 15 | rm -fr {,*/,*/*/}{.,_}libs 16 | rm -f {,*/,*/*/}*.{lo,so,la} 17 | 18 | # mod: interfaces sources options etc 19 | # v v v v 20 | gray/Kerr/ocl.la: Kerr_rap.h gray/Kerr/ocl.c Kerr_opts.h 21 | gray/infcam.la: initcond.h gray/infcam.c infcam_opts.h 22 | gray.la: gray.h initcond.h Kerr_rap.h gray.c gray_opts.h Kerr_opts.h infcam_opts.h 23 | lux-build $^ -f hdf5 -o $@ 24 | 25 | %.la: 26 | lux-build $^ -o $@ 27 | 28 | %_rap.h: %.rap 29 | lux-rapgen $< 30 | 31 | %_opts.h: %.opts 32 | lux-optgen $< 33 | -------------------------------------------------------------------------------- /sim-org/setup.opts: -------------------------------------------------------------------------------- 1 | # Job setup/configuration 2 | unsigned i_platform:ip = 0 = strtoul(val, &rem, 0); /**< Index of platform to use */ 3 | unsigned i_device:id = 0 = strtoul(val, &rem, 0); /**< Index of device to use */ 4 | cl_device_type device_type:type = CL_DEVICE_TYPE_ALL = strtotype(val); /**< Type of device to use */ 5 | 6 | size_t precision:p = sizeof(float) = strtoprec(val); /**< Size of real type */ 7 | const char * scheme:scm = "RK4" = val; /**< Integration scheme */ 8 | const char * morder:mo:ml = "AoS" = val; /**< Memory order/layout */ 9 | const char * kflags:kf = "-w" = val; /**< Kernl compilation flags */ 10 | 11 | const char * outfile:out = "%04d.raw" = val; /**< Dump file name format */ 12 | real t_init:t0 = 0 = strtod(val, &rem); /**< Initial time */ 13 | real dt_dump:dt = -32 = strtod(val, &rem); /**< Time interval between dumps */ 14 | size_t i_init:i0 = 0 = strtoul(val, &rem, 0); /**< Initial dump id */ 15 | size_t n_sub:n = 1024 = strtoul(val, &rem, 0); /**< Number of substeps per dump */ 16 | size_t n_dump:N = 64 = strtoul(val, &rem, 0); /**< Number of dumps (in addition to the initial condition) */ 17 | -------------------------------------------------------------------------------- /sim-org/AoS.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | /** \file 22 | ** Array-of-Structures global index 23 | ** 24 | ** GRay2 uses OpenCL's just-in-time compilation feature to implement 25 | ** run-time configurable algorithms. In this file we provide global 26 | ** index for Array-of-Structures memory order. 27 | **/ 28 | 29 | #define DATA(g, s) data[g * n_data + s] 30 | #define INFO(g, s) info[g * n_info + s] 31 | -------------------------------------------------------------------------------- /sim-org/SoA.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | /** \file 22 | ** Structure-of-Arrays global index 23 | ** 24 | ** GRay2 uses OpenCL's just-in-time compilation feature to implement 25 | ** run-time configurable algorithms. In this file we provide global 26 | ** index for Structure-of-Arrays memory order. 27 | **/ 28 | 29 | #define DATA(g, s) data[s * n_rays + g] 30 | #define INFO(g, s) info[s * n_rays + g] 31 | -------------------------------------------------------------------------------- /sim/initcond.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan 3 | * Copyright (C) 2021 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | #ifndef _GRAY_INITCOND_H_ 21 | #define _GRAY_INITCOND_H_ 22 | 23 | #include 24 | #include 25 | 26 | typedef struct LuxSgray_initcond Lux_gray_initcond; 27 | typedef struct LuxOgray_initcond Lux_gray_initcond_opts; 28 | 29 | struct LuxSgray_initcond { 30 | int (*init)(Lux_gray_initcond *, cl_mem); 31 | size_t n_width, n_height; 32 | }; 33 | 34 | struct LuxOgray_initcond { 35 | size_t nque; 36 | cl_command_queue *que; 37 | void *opts; 38 | }; 39 | 40 | #endif /* _GRAY_INITCOND_H */ 41 | -------------------------------------------------------------------------------- /sim-org/param.opts: -------------------------------------------------------------------------------- 1 | # Runtime parameters needed in kernels 2 | 3 | # Coordinate system 4 | real M_ADM:m = 4.3e6 = strtod(val, &rem); /**< Spacetime mass in unit of solar mass; It is needed in radiative transfer and in the spacetime boundary conditions*/ 5 | const char * coordinates:coor = "dyst" = val; /**< Coordinate system */ 6 | const char * dyst_file:data = "data.h5" = val; /**< File with all the information for dynamical spacetime */ 7 | size_t enable_fast_light:fast_light = 0 = strtoul(val, &rem, 0); /**< If non-zero, assume fast light */ 8 | 9 | # Numerical setup 10 | size_t w_rays:W = 512 = strtoul(val, &rem, 0); /**< Number of rays along the width of the image */ 11 | size_t h_rays:H = 512 = strtoul(val, &rem, 0); /**< Number of rays along the height of the image */ 12 | 13 | # Plasma parameter 14 | real n_electron:ne = 1e6 = strtod(val, &rem); /**< Electron number density normalization */ 15 | real beta_crit:bc = 1 = strtod(val, &rem); /**< Critical plasma \f$\beta_\mathrm{crit} = P_\mathrm{gas}/P_\mathrm{mag}\f$ */ 16 | real R_high:Rh = 1 = strtod(val, &rem); /**< The ratio \f$R_\mathrm{high} = T_p/T_e\f$ at high-beta, weakly magnetized (disk) regions */ 17 | real R_low:Rl = 1 = strtod(val, &rem); /**< The ratio \f$R_\mathrm{low} = T_p/T_e\f$ at low-beta, strongly magnetized (jet) regions */ 18 | real * nu = NULL = strtoda(val, &rem); /**< Frequency channels; NULL means turning off radiative transfer */ 19 | -------------------------------------------------------------------------------- /tools/gray.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Chi-kwan Chan 2 | # Copyright (C) 2016 Steward Observatory 3 | # 4 | # This file is part of GRay2. 5 | # 6 | # GRay2 is free software: you can redistribute it and/or modify it 7 | # under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # GRay2 is distributed in the hope that it will be useful, but WITHOUT 12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 14 | # License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with GRay2. If not, see . 18 | 19 | from os import path 20 | import numpy as np 21 | 22 | def load_raw(name): 23 | """ Load a GRay2 raw file """ 24 | ext = path.splitext(name)[1][1:] 25 | if ext != "raw": 26 | raise NameError("Fail to load file \"{}\", " 27 | "which is in an unsupported format".format(name)) 28 | 29 | with open(name, "rb") as f: 30 | print("Loading GRay2 raw file \"{}\"... ".format(name), end="") 31 | 32 | d = np.fromfile(f, dtype=np.uint64, count=4) 33 | t = np.double if d[0] == 8 else np.single 34 | n = d[1] 35 | w = d[2] 36 | h = d[3] 37 | 38 | states = np.fromfile(f, dtype=t, count=n*w*h).reshape((h,w,n)) 39 | diagno = np.fromfile(f, dtype=t, count= w*h).reshape((h,w )) 40 | 41 | print("DONE") 42 | 43 | return states, diagno 44 | -------------------------------------------------------------------------------- /sim/gray.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan 3 | * Copyright (C) 2021 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | #ifndef _GRAY_H_ 21 | #define _GRAY_H_ 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | #include 30 | 31 | #include "initcond.h" 32 | 33 | #include "gray_opts.h" 34 | #include "Kerr_opts.h" 35 | #include "infcam_opts.h" 36 | 37 | struct gray { 38 | Lux_job super; 39 | 40 | struct gray_opts gray; 41 | union { 42 | struct Kerr_opts Kerr; 43 | } spacetime; 44 | union { 45 | struct infcam_opts infcam; 46 | } initcond; 47 | 48 | Lux_opencl *ocl; 49 | Lux_io *io; 50 | 51 | struct darray rays; 52 | real *rays_host; 53 | 54 | struct basealgo gi; 55 | struct basealgo flow; 56 | struct basealgo rt; 57 | 58 | double t, dt; 59 | size_t i, n; 60 | }; 61 | 62 | #endif /* _GRAY_H */ 63 | -------------------------------------------------------------------------------- /sim-org/flow.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | /** \file 22 | ** A template for flow models 23 | ** 24 | ** A template for an accretion flows model, which may be 25 | ** interpolation of GRMHD simulations or analytical models. 26 | **/ 27 | 28 | struct flow { 29 | real ne; 30 | real te; 31 | real b; 32 | real bkcos; 33 | real shift; 34 | }; 35 | 36 | struct flow 37 | getflow(real4 q, /* "up" position 4-vector q^mu */ 38 | real4 k, /* "down" momentum 4-vector k_mu */ 39 | SPACETIME_PROTOTYPE_ARGS) 40 | { 41 | struct flow f; 42 | 43 | real4 u = {1, 0, 0, 0}; 44 | real4 b = {0, 0, 0, 0}; /* magnetic field four-vector defined as 45 | b^mu = (1/2) 46 | epsilon^{mu,nu,kappa,lambda} 47 | u_nu F_{lambda,kappa}, 48 | see Gammie et al. (2003) */ 49 | 50 | f.ne = interpolate(q, bounding_box, num_points, rho_t1, rho_t2); 51 | f.te = 1e12; 52 | f.b = 0; 53 | 54 | f.shift = -dot(k, u); 55 | f.bkcos = dot(k, b) / (f.shift * f.b + (real)EPSILON); 56 | 57 | return f; 58 | } 59 | -------------------------------------------------------------------------------- /sim-org/RK4.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | /** \file 22 | ** Classical 4th-order Runge-Kutta integrator 23 | ** 24 | ** GRay2 uses OpenCL's just-in-time compilation feature to implement 25 | ** a run-time configurable algorithms. In this file we implement the 26 | ** classical 4th-order Runge-Kutta integrator in integrate(). 27 | **/ 28 | 29 | /** 30 | ** OpenCL implementation of the classical 4th-order Runge-Kutta integrator 31 | ** 32 | ** Assuming rhs() is provided, this function performs the classical 33 | ** 4th-order Runge-Kutta integrator with a single step size dt. 34 | ** 35 | ** \return The new state 36 | **/ 37 | struct state 38 | integrate(struct state s, /**< state of the ray */ 39 | real dt, /**< step size */ 40 | SPACETIME_PROTOTYPE_ARGS) 41 | { 42 | struct state k1 = rhs(X(E(s) ), SPACETIME_ARGS); 43 | struct state k2 = rhs(X(E(s) + K(0.5) * dt * E(k1)), SPACETIME_ARGS); 44 | struct state k3 = rhs(X(E(s) + K(0.5) * dt * E(k2)), SPACETIME_ARGS); 45 | struct state k4 = rhs(X(E(s) + dt * E(k3)), SPACETIME_ARGS); 46 | return X(E(s) + dt * (E(k1) + K(2.0) * (E(k2) + E(k3)) + E(k4)) / K(6.0)); 47 | } 48 | -------------------------------------------------------------------------------- /sim/gray/infcam.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan 3 | * Copyright (C) 2021 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | #include "initcond.h" 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #include "../infcam_opts.h" 28 | 29 | struct infcam { 30 | Lux_gray_initcond super; 31 | size_t nque; 32 | cl_command_queue *que; 33 | }; 34 | 35 | #define EGO ((struct infcam *)ego) 36 | 37 | static int 38 | init(Lux_gray_initcond *ego, cl_mem rays) 39 | { 40 | /* TODO: initialize cl_mem */ 41 | return 0; 42 | } 43 | 44 | void * 45 | LUX_MKMOD(const void *opts) 46 | { 47 | void *ego; 48 | 49 | lux_debug("GRay2:infcam: constructing an instance with options %p\n", opts); 50 | 51 | ego = zalloc(sizeof(struct infcam)); 52 | if(ego) { 53 | struct infcam_opts *o = ((Lux_gray_initcond_opts*)opts)->opts; 54 | 55 | EGO->super.init = init; 56 | EGO->super.n_width = o->n_width; 57 | EGO->super.n_height = o->n_height; 58 | 59 | EGO->nque = ((Lux_gray_initcond_opts*)opts)->nque; 60 | EGO->que = ((Lux_gray_initcond_opts*)opts)->que; 61 | } 62 | return ego; 63 | } 64 | 65 | void 66 | LUX_RMMOD(void *ego) 67 | { 68 | lux_debug("GRay2:infcam: destructing instance %p\n", ego); 69 | 70 | free(ego); 71 | } 72 | -------------------------------------------------------------------------------- /tests/sample.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of lux. 6 | * 7 | * lux is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * lux is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with lux. If not, see . 19 | */ 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #define LUX_RAP_CASTING 1 27 | #include "sample_rap.h" 28 | 29 | static int 30 | driver(Lux_spec *s, Lux_args *a) 31 | { 32 | size_t i, j; 33 | for(i = 0; i < s->n1; ++i) { 34 | for(j = 0; j < s->n2; ++j) { 35 | size_t h = i * s->n2 + j; 36 | if(h < s->n) 37 | a->z[h] = a->x[h] + a->alpha * a->y[h]; 38 | } 39 | } 40 | return 0; 41 | } 42 | 43 | Lux_solution * 44 | LUX_MOD(Lux_problem *prob, unsigned flags) 45 | { 46 | Lux_spec *spec1 = mkspec(prob, (prob->n+ 1-1)/ 1, 1); 47 | Lux_spec *spec2 = mkspec(prob, (prob->n+ 2-1)/ 2, 2); 48 | Lux_spec *spec3 = mkspec(prob, (prob->n+ 4-1)/ 4, 4); 49 | Lux_spec *spec4 = mkspec(prob, (prob->n+ 8-1)/ 8, 8); 50 | Lux_spec *spec5 = mkspec(prob, (prob->n+16-1)/16, 16); 51 | Lux_spec *spec6 = mkspec(prob, (prob->n+32-1)/32, 32); 52 | Lux_spec *spec7 = mkspec(prob, (prob->n+64-1)/64, 64); 53 | 54 | Lux_args *args = mkargs(prob); 55 | 56 | return pvector( 57 | Lux_solution, 58 | {{{driver, spec1}, args}, {0, 0, prob->n, 0}}, 59 | {{{driver, spec2}, args}, {0, 0, prob->n, 0}}, 60 | {{{driver, spec3}, args}, {0, 0, prob->n, 0}}, 61 | {{{driver, spec4}, args}, {0, 0, prob->n, 0}}, 62 | {{{driver, spec5}, args}, {0, 0, prob->n, 0}}, 63 | {{{driver, spec6}, args}, {0, 0, prob->n, 0}}, 64 | {{{driver, spec7}, args}, {0, 0, prob->n, 0}} 65 | ); 66 | 67 | (void)flags; /* silence unused variable warning */ 68 | } 69 | -------------------------------------------------------------------------------- /sim-org/phys.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | /** \file 22 | ** Composing different physics modules 23 | ** 24 | ** This file contains functions that compose different physics, e.g., 25 | ** geodesic integration, radiative transfer, together to form the 26 | ** initial conditions and the right hand side of the full 27 | ** differential equation. 28 | **/ 29 | 30 | struct state { 31 | struct gr g; 32 | struct rt r; 33 | }; 34 | 35 | struct state 36 | icond(real r_obs, /**< distance of the image from the black hole */ 37 | real i_obs, /**< inclination angle of the image in degrees */ 38 | real j_obs, /**< azimuthal angle of the image in degrees */ 39 | real alpha, /**< one of the local Cartesian coordinates */ 40 | real beta) /**< the other local Cartesian coordinate */ 41 | { 42 | return (struct state){ 43 | gr_icond(r_obs, i_obs, j_obs, alpha, beta), 44 | rt_icond() 45 | }; 46 | } 47 | 48 | real 49 | getdt(struct gr g, real dt) 50 | { 51 | real r = sqrt(getrr(g.q)); 52 | real eps = geteps(g.q); 53 | 54 | if(eps < 0.01) /* stop near horizon */ 55 | return 0; 56 | 57 | if(dot(g.q.s123, g.q.s123) < 0 && r > K(1e3)) /* stop outside domain */ 58 | return 0; 59 | 60 | if(fabs(dt) > 0.01 * eps * eps) 61 | return sign(dt) * 0.01 * eps * eps; 62 | else 63 | return dt; 64 | } 65 | 66 | struct state 67 | rhs(struct state s, SPACETIME_PROTOTYPE_ARGS) /**< state of the ray */ 68 | { 69 | real4 q = s.g.q; 70 | real4 k = down(q, s.g.u, SPACETIME_ARGS); 71 | return (struct state){ 72 | gr_rhs(s.g, SPACETIME_ARGS), 73 | rt_rhs(s.r, getflow(q, k, SPACETIME_ARGS))}; 74 | } 75 | -------------------------------------------------------------------------------- /sim-org/evolve.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | #include "gray.h" 21 | 22 | static inline size_t 23 | max(size_t a, size_t b) 24 | { 25 | return a > b ? a : b; 26 | } 27 | 28 | real 29 | evolve(Lux_job *ego, real t, real target, size_t n_sub) 30 | { 31 | Lux_opencl *ocl = EGO->ocl; 32 | Lux_opencl_kernel *evolve = EGO->evolve; 33 | 34 | struct param *p = &EGO->param; 35 | struct setup *s = &EGO->setup; 36 | 37 | const size_t sz = s->precision; 38 | const size_t n_data = EGO->n_coor + EGO->n_freq * 2; 39 | const size_t n_info = EGO->n_info; 40 | 41 | const size_t shape[] = {p->h_rays, p->w_rays}; 42 | 43 | size_t arg_num = 0; 44 | 45 | evolve->setM(evolve, arg_num, EGO->data); 46 | arg_num++; 47 | evolve->setM(evolve, arg_num, EGO->info); 48 | arg_num++; 49 | evolve->setR(evolve, arg_num, target - t); 50 | arg_num++; 51 | evolve->setW(evolve, arg_num, n_sub); 52 | arg_num++; 53 | evolve->setS(evolve, arg_num, sz * max(n_data, n_info)); 54 | arg_num++; 55 | evolve->set(evolve, arg_num, sizeof(cl_float8), &(EGO->bounding_box)); 56 | arg_num++; 57 | evolve->set(evolve, arg_num, sizeof(cl_int4), &(EGO->num_points)); 58 | arg_num++; 59 | /* We have 40 Gammas + 10 metric components + 1 fluid property at t1 */ 60 | for (size_t old_arg_num = arg_num; arg_num < old_arg_num + 51; arg_num++) 61 | evolve->setM(evolve, arg_num, EGO->spacetime_t1[arg_num-old_arg_num]); 62 | /* And here the 40 Gammas + 10 metric components + 1 fluid property at t2 */ 63 | for (size_t old_arg_num = arg_num; arg_num < old_arg_num + 51; arg_num++) 64 | evolve->setM(evolve, arg_num, EGO->spacetime_t2[arg_num-old_arg_num]); 65 | 66 | return ocl->exec(ocl, evolve, 2, shape); 67 | } 68 | -------------------------------------------------------------------------------- /sim/gray/Kerr/ocl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan 3 | * Copyright (C) 2021 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #define LUX_RAP_CASTING 1 28 | #include "../../Kerr_rap.h" 29 | 30 | static int 31 | driver(Lux_spec *s, Lux_args *a) 32 | { 33 | lux_print("Kerr driver %p %p\n", s, a); 34 | 35 | return 0; 36 | 37 | (void)s; /* silence unused variable warning */ 38 | (void)a; /* silence unused variable warning */ 39 | } 40 | 41 | Lux_solution * 42 | LUX_MOD(Lux_problem *prob, unsigned flags) 43 | { 44 | Lux_opencl *ocl = NULL; 45 | struct LuxOopencl opts = OPENCL_NULL; 46 | 47 | Lux_spec *spec1 = mkspec(prob, (prob->n+ 1-1)/ 1, 1); 48 | Lux_spec *spec2 = mkspec(prob, (prob->n+ 2-1)/ 2, 2); 49 | Lux_spec *spec3 = mkspec(prob, (prob->n+ 4-1)/ 4, 4); 50 | Lux_spec *spec4 = mkspec(prob, (prob->n+ 8-1)/ 8, 8); 51 | Lux_spec *spec5 = mkspec(prob, (prob->n+16-1)/16, 16); 52 | Lux_spec *spec6 = mkspec(prob, (prob->n+32-1)/32, 32); 53 | Lux_spec *spec7 = mkspec(prob, (prob->n+64-1)/64, 64); 54 | 55 | Lux_args *args = mkargs(prob); 56 | 57 | opts.nque = prob->nque; 58 | opts.que = prob->que; 59 | ocl = lux_load("opencl", &opts); 60 | 61 | lux_unload(ocl); 62 | 63 | return pvector( 64 | Lux_solution, 65 | {{{driver, spec1}, args}, {0, 0, 0, 0}}, 66 | {{{driver, spec2}, args}, {0, 0, 0, 0}}, 67 | {{{driver, spec3}, args}, {0, 0, 0, 0}}, 68 | {{{driver, spec4}, args}, {0, 0, 0, 0}}, 69 | {{{driver, spec5}, args}, {0, 0, 0, 0}}, 70 | {{{driver, spec6}, args}, {0, 0, 0, 0}}, 71 | {{{driver, spec7}, args}, {0, 0, 0, 0}} 72 | ); 73 | 74 | (void)flags; /* silence unused variable warning */ 75 | } 76 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GRay2 Documentation {#mainpage} 2 | 3 | GRay2 is a hardware-accelerated geodesic integrator for performing 4 | general relativistic ray tracing for accreting black holes. 5 | It is based on the [lux framework](https://luxsrc.org) and runs on a 6 | wide range of modern hardware/accelerators such as GPUs and Intel® 7 | Xeon Phi. 8 | 9 | ## For users 10 | 11 | For people we are interested in using GRay2 as-is, please download a 12 | tarball from GitHub's release page: 13 | 14 | https://github.com/luxsrc/gray/releases 15 | 16 | Assuming `lux` is installed (see https://github.com/luxsrc/lux), users 17 | can simply run 18 | 19 | make 20 | 21 | to build GRay2 as a `lux` job and run GRay2 by `lux gray`. 22 | 23 | ## For developers 24 | 25 | For people we are interested in contributing to GRay2, please fork 26 | GRay2's git repository on GitHub: 27 | 28 | https://github.com/luxsrc/gray 29 | 30 | work on your fork, and then create pull request to merge your changes 31 | back to the main repository. 32 | 33 | GRay2 is flexible and easily extendable. 34 | To turn hard-wired constants into run-time options, follow the 35 | instructions in \ref newopts "this page". 36 | To add new computation kernels to GRay2, see \ref newkern "this page". 37 | We also keep track of a list of TODOs found in the code \ref todo 38 | "here". 39 | 40 | ## Structure of HDF5 files 41 | 42 | GRay2 can read spacetime and fluid data from HDF5 files. These files must 43 | be structured in a specific way: 44 | * At the top level, they must contain a group called "grid". This group has to 45 | contain three datasets named "x", "y", "z", which contains the coordinates 46 | along the three directions. 47 | * Always at the top level, all the groups that are not named "grid" will be 48 | considered time levels. The names of such groups has to be their time. For 49 | example, possible groups names would be "1.0", "1.1", "1.2", ... The group 50 | "1.0" contains variables at that time. The groups have to be in alphanumerical 51 | order. 52 | * In each group, the following datasets have to be defined. Gamma_ttt, 53 | Gamma_ttx, Gamma_tty, Gamma_ttz, Gamma_txx, Gamma_txy, Gamma_txz, Gamma_tyy, 54 | Gamma_tyz, Gamma_tzz, Gamma_xtt, Gamma_xtx, Gamma_xty, Gamma_xtz, Gamma_xxx, 55 | Gamma_xxy, Gamma_xxz, Gamma_xyy, Gamma_xyz, Gamma_xzz, Gamma_ytt, Gamma_ytx, 56 | Gamma_yty, Gamma_ytz, Gamma_yxx, Gamma_yxy, Gamma_yxz, Gamma_yyy, Gamma_yyz, 57 | Gamma_yzz, Gamma_ztt, Gamma_ztx, Gamma_zty, Gamma_ztz, Gamma_zxx, Gamma_zxy, 58 | Gamma_zxz, Gamma_zyy, Gamma_zyz, Gamma_zzz, g_tt, g_tx, g_ty, g_tz, g_xx, 59 | g_xy, g_xz, g_yy, g_yz, g_zz, 60 | * All the variables must have the same precision (e.g., single or double). 61 | -------------------------------------------------------------------------------- /sim-org/icond.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | #include "gray.h" 21 | #include 22 | 23 | static inline size_t 24 | max(size_t a, size_t b) 25 | { 26 | return a > b ? a : b; 27 | } 28 | 29 | void 30 | icond(Lux_job *ego, real t_init) 31 | { 32 | Lux_opencl *ocl = EGO->ocl; 33 | 34 | struct icond *i = &EGO->icond; 35 | struct param *p = &EGO->param; 36 | struct setup *s = &EGO->setup; 37 | 38 | const size_t sz = s->precision; 39 | const size_t n_data = EGO->n_coor + EGO->n_freq * 2; 40 | const size_t n_info = EGO->n_info; 41 | 42 | const size_t shape[] = {p->h_rays, p->w_rays}; 43 | 44 | Lux_opencl_kernel *icond; 45 | 46 | lux_debug("GRay2: executing job %p\n", ego); 47 | 48 | icond = ocl->mkkern(ocl, "icond_drv"); 49 | 50 | 51 | size_t arg_num = 0; 52 | 53 | icond->setM(icond, arg_num, EGO->data); 54 | arg_num++; 55 | icond->setM(icond, arg_num, EGO->info); 56 | arg_num++; 57 | 58 | icond->setR(icond, arg_num, i->w_img); 59 | arg_num++; 60 | icond->setR(icond, arg_num, i->h_img); 61 | arg_num++; 62 | icond->setR(icond, arg_num, i->r_obs); 63 | arg_num++; 64 | icond->setR(icond, arg_num, i->i_obs); 65 | arg_num++; 66 | icond->setR(icond, arg_num, i->j_obs); 67 | arg_num++; 68 | 69 | icond->setS(icond, arg_num, sz * max(n_data, n_info)); 70 | arg_num++; 71 | 72 | icond->set(icond, arg_num, sizeof(cl_float8), &(EGO->bounding_box)); 73 | arg_num++; 74 | icond->set(icond, arg_num, sizeof(cl_int4), &(EGO->num_points)); 75 | arg_num++; 76 | /* We have 40 Gammas + 10 metric components + 1 fluid property at t1 */ 77 | for (size_t old_arg_num = arg_num; arg_num < old_arg_num + 51; arg_num++) 78 | icond->setM(icond, arg_num, EGO->spacetime_t1[arg_num-old_arg_num]); 79 | /* And here the 40 Gammas + 10 metric components + 1 fluid property at t2 */ 80 | for (size_t old_arg_num = arg_num; arg_num < old_arg_num + 51; arg_num++) 81 | icond->setM(icond, arg_num, EGO->spacetime_t2[arg_num-old_arg_num]); 82 | 83 | (void)ocl->exec(ocl, icond, 2, shape); 84 | 85 | ocl->rmkern(ocl, icond); 86 | } 87 | -------------------------------------------------------------------------------- /tests/ck_sample.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan 3 | * Copyright (C) 2021 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #define LUX_RAP_CASTING 1 30 | #include "sample_rap.h" 31 | 32 | static double test_alpha = 3.0; 33 | 34 | static void 35 | test_init(Lux_problem *p) 36 | { 37 | int i; 38 | for(i = 0; i < (int)p->n; ++i) { 39 | p->x[i] = i; 40 | p->y[i] = i * 2.0; 41 | } 42 | } 43 | 44 | static int 45 | test_check(Lux_problem *p) 46 | { 47 | int failed = 0, i; 48 | for(i = 0; i < (int)p->n; ++i) 49 | if(p->z[i] != 7.0 * i) 50 | failed = 1; 51 | return failed; 52 | } 53 | 54 | int 55 | main(int argc, char *argv[]) 56 | { 57 | int failed = 0; 58 | 59 | Lux_problem prob; 60 | Lux_solver *solve; 61 | Lux_solution *sols; 62 | size_t i, n; 63 | 64 | double m_best = HUGE_VAL; 65 | size_t i_best = 0; 66 | 67 | prob.n = 1024 * 1024; 68 | prob.alpha = test_alpha; 69 | prob.x = malloc(sizeof(double) * prob.n); 70 | prob.y = malloc(sizeof(double) * prob.n); 71 | prob.z = malloc(sizeof(double) * prob.n); 72 | 73 | lux_setup(&argc, &argv); 74 | 75 | lux_print("1. Load solvers from the current directory into planner ... "); 76 | solve = lux_load("sample", NULL); 77 | lux_print("%p DONE\n", solve); 78 | 79 | lux_print("2. Solve the problem... "); 80 | sols = solve(&prob, LUX_PLAN_EXHAUSTIVE); 81 | n = pgetn(sols, 0); 82 | lux_print("%p; %zu solutions DONE\n", sols, n); 83 | 84 | lux_print("3. Estimate performance for the solutions ...\n"); 85 | for(i = 0; i < n; ++i) { 86 | double e = estimate(&sols[i].opcnt); 87 | lux_print(" * Solution %zu, estimated cost = %g\n", i, e); 88 | } 89 | lux_print(" DONE\n"); 90 | 91 | lux_print("4. Measure performance for the solutions ...\n"); 92 | for(i = 0; i < n; ++i) { 93 | Lux_task *t = mkluxbasetask(sols[i].task); 94 | double m = measure(t); 95 | free(t); 96 | lux_print(" * Solution %zu, measured cost = %g\n", i, m); 97 | if(m_best > m) { 98 | m_best = m; 99 | i_best = i; 100 | } 101 | } 102 | lux_print(" DONE\n"); 103 | 104 | lux_print("5. Run the optimal solutoin %zu ... ", i_best); 105 | { 106 | Lux_task *t; 107 | 108 | test_init(&prob); 109 | 110 | t = mkluxbasetask(sols[i_best].task); 111 | t->exec(t); 112 | free(t); 113 | 114 | failed = test_check(&prob); 115 | } 116 | if(failed) { 117 | lux_print("FAILED\n"); 118 | lux_abort(); 119 | } else 120 | lux_print("DONE\n"); 121 | 122 | lux_print("6. Free the solutions ... "); 123 | for(i = 0; i < n; ++i) 124 | free(sols[i].task.algo.spec); 125 | free(sols[0].task.args); 126 | pfree(sols); 127 | lux_print("DONE\n"); 128 | 129 | lux_print("7. Unload the solver ... "); 130 | lux_unload(solve); 131 | lux_print("DONE\n"); 132 | 133 | free(prob.x); 134 | free(prob.y); 135 | free(prob.z); 136 | 137 | return failed; 138 | } 139 | -------------------------------------------------------------------------------- /sim-org/gray.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | /** \file 22 | ** 23 | ** Data structure definitions and function declarations for GRay2 24 | ** 25 | ** GRay2 is implemented as a lux module. Its run-time data is stored 26 | ** in a subclass of Lux_job, which is defined in this header file. 27 | ** Additional structure that holds run-time adjustable parameters, 28 | ** constructor, destructor, internal functions, and standard methods 29 | ** in Lux_job, are all declared here as well. 30 | **/ 31 | #ifndef _GRAY_H_ 32 | #define _GRAY_H_ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #include "icond.h" 42 | #include "param.h" 43 | #include "setup.h" 44 | 45 | /* Max number of times in the HDF5 file and max length of the group 46 | * name in the files */ 47 | #define MAX_AVAILABLE_TIMES 1024 48 | #define MAX_TIME_NAME_LENGTH 64 49 | 50 | /** 51 | ** Run-time data structure for GRay2 52 | ** 53 | ** To take advantage of all the low level features provided by lux, 54 | ** GRay2 is implemented as a lux module. Its runtime data is stored 55 | ** in a subclass of Lux_job so that it can be loaded by the lux 56 | ** runtime. 57 | **/ 58 | struct gray { 59 | Lux_job super; 60 | 61 | struct icond icond; 62 | struct param param; 63 | struct setup setup; 64 | 65 | size_t n_coor; 66 | size_t n_freq; 67 | size_t n_info; 68 | 69 | Lux_opencl *ocl; 70 | cl_mem data; 71 | cl_mem info; 72 | Lux_opencl_kernel *evolve; 73 | 74 | /* Grid details */ 75 | /* Bounding_box is a vector with 8 numbers: 76 | * {tmin, xmin, ymin, zmin, tmax, xmax, ymax, zmax} */ 77 | /* tmin and tmax are between the two lodaded timesteps */ 78 | 79 | /* We need these quantities to convert from unnormalized OpenCL coordiantes 80 | to physical coordiantes and viceversa. */ 81 | cl_float8 bounding_box; 82 | /* Points along the various coordinates */ 83 | cl_int4 num_points; /* The .s0 coordinate is not used */ 84 | /* num_points.s1 contains point along the x direction */ 85 | /* num_points.s2 contains point along the y direction */ 86 | /* num_points.s3 contains point along the z direction */ 87 | 88 | /* We need 40+10+1 == 51 images to contain all the 40 christoffel 89 | symbols, 10 metric components, and 1 fluid quality */ 90 | 91 | /* We always have two timesteps loaded */ 92 | cl_mem spacetime_t1[40+10+1]; 93 | cl_mem spacetime_t2[40+10+1]; 94 | 95 | char available_times[MAX_AVAILABLE_TIMES][MAX_TIME_NAME_LENGTH]; 96 | 97 | cl_float max_available_time; 98 | 99 | }; 100 | 101 | #define EGO ((struct gray *)ego) 102 | #define CKR lux_check_func_success 103 | 104 | /** Build the OpenCL module for GRay2 */ 105 | extern Lux_opencl *build(Lux_job *); 106 | 107 | /** Set the initial conditions */ 108 | extern void icond(Lux_job *, real); 109 | 110 | /** Evolve the states of photons to the next (super) step */ 111 | extern real evolve(Lux_job *, real, real, size_t); 112 | 113 | /** Output data to a file */ 114 | extern void dump(Lux_job *, size_t); 115 | 116 | /** I/O helper functions */ 117 | extern size_t populate_ego_available_times(Lux_job *); 118 | extern size_t load_coordinates(Lux_job *); 119 | extern size_t load_snapshot(Lux_job *, size_t, size_t); 120 | extern void copy_snapshot(Lux_job *, size_t); 121 | 122 | #endif /* _GRAY_H */ 123 | -------------------------------------------------------------------------------- /sim-org/driver.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | /** \file 22 | ** Generic driver kernels 23 | ** 24 | ** GRay2 uses OpenCL's just-in-time compilation feature to implement 25 | ** run-time configurable algorithms. In this file we implement 26 | ** generic driver kernels icond_drv() and evolve_drv() that uses 27 | ** IDX(h, k) to access global memory. 28 | ** 29 | ** We use the index convention `h`, `i`, `j`, `k` for time and the 30 | ** three spactial coordinates, respectively. We use `s` to index the 31 | ** record/field. These indices may be prefixed by `g` for global 32 | ** indices, `l` for local indices, etc. 33 | **/ 34 | 35 | /** OpenCL driver kernel for initializing states */ 36 | __kernel void 37 | icond_drv(__global real *data, /**< states of the rays */ 38 | __global real *info, /**< diagnostic information */ 39 | const real w_img, /**< Width of the image in \f$GM/c^2\f$ */ 40 | const real h_img, /**< Height of the image in \f$GM/c^2\f$ */ 41 | const real r_obs, /**< Distance of the image from the black hole */ 42 | const real i_obs, /**< Inclination angle of the image in degrees */ 43 | const real j_obs, /**< Azimuthal angle of the image in degrees */ 44 | __local real *scratch, 45 | SPACETIME_PROTOTYPE_ARGS) 46 | { 47 | const size_t gj = get_global_id(0); /* for h, slowest changing index */ 48 | const size_t gi = get_global_id(1); /* for w, fastest changing index */ 49 | const size_t g = gi + gj * w_rays; 50 | 51 | if(gi < w_rays && gj < h_rays) { 52 | struct state d; 53 | int s; 54 | 55 | /* Compute initial conditions from parameters */ 56 | real alpha = ((gi + 0.5) / w_rays - 0.5) * w_img; 57 | real beta = ((gj + 0.5) / h_rays - 0.5) * h_img; 58 | d = icond(r_obs, i_obs, j_obs, alpha, beta); 59 | 60 | /* Output to global array */ 61 | for(s = 0; s < n_data; ++s) 62 | DATA(g, s) = ((real *)&d)[s]; 63 | 64 | for(s = 0; s < n_info; ++s) 65 | INFO(g, s) = getuu(d.g, SPACETIME_ARGS); 66 | } 67 | } 68 | 69 | /** OpenCL driver kernel for integrating the geodesic equations */ 70 | __kernel void 71 | evolve_drv(__global real *data, /**< states of the rays */ 72 | __global real *info, /**< diagnostic information */ 73 | const real dt, /**< step size */ 74 | const whole n_sub, /**< number of sub-steps */ 75 | __local real *scratch, 76 | SPACETIME_PROTOTYPE_ARGS) 77 | { 78 | const size_t gj = get_global_id(0); /* for h, slowest changing index */ 79 | const size_t gi = get_global_id(1); /* for w, fastest changing index */ 80 | const size_t g = gi + gj * w_rays; 81 | const int n = (INT_MAX / n_sub) * n_sub; 82 | 83 | if(gi < w_rays && gj < h_rays) { 84 | struct state d; 85 | int s, h, dh; 86 | 87 | /* Input from global array */ 88 | for(s = 0; s < n_data; ++s) 89 | ((real *)&d)[s] = DATA(g, s); 90 | 91 | /* Substepping */ 92 | for(h = 0; h < n; h += dh) { 93 | dh = getdt(d.g, dt/n_sub) / dt * n; 94 | if(!dh) 95 | break; 96 | if(dh > n - h) 97 | dh = n - h; 98 | d = integrate(d, dh * dt / n, SPACETIME_ARGS); 99 | } 100 | 101 | /* Output to global array */ 102 | for(s = 0; s < n_data; ++s) 103 | DATA(g, s) = ((real *)&d)[s]; 104 | 105 | for(s = 0; s < n_info; ++s) 106 | INFO(g, s) = getuu(d.g, SPACETIME_ARGS); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /sim-org/build.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | #include "gray.h" 21 | 22 | #include 23 | 24 | static inline size_t 25 | min(size_t a, size_t b) 26 | { 27 | return a < b ? a : b; 28 | } 29 | 30 | Lux_opencl * 31 | build(Lux_job *ego) 32 | { 33 | /** \page newkern New OpenCL Kernels 34 | ** 35 | ** Extend GRay2 by adding new OpenCL kernels 36 | ** 37 | ** GRay2 uses the just-in-time compilation feature of OpenCL 38 | ** to build computation kernels at run-time. Most of the low 39 | ** level OpenCL codes are actually in a lux module called 40 | ** "opencl". GRay2 developers simply need to load this 41 | ** module with a list of OpenCL source codes, e.g., 42 | ** \code{.c} 43 | ** const char *buf = "static constant real a_spin = 0.999;\n"; 44 | ** const char *src[] = {buf, "KS", "RK4", "AoS", NULL}; 45 | ** const char *flags = "-cl-mad-enable"; 46 | ** struct LuxOopencl otps = {..., flags, src}; 47 | ** Lux_opencl *ocl = lux_load("opencl", &opts); 48 | ** \endcode 49 | ** and then an OpenCL kernel can be obtained and run by 50 | ** \code{.c} 51 | ** Lux_opencl_kernel *icond = ocl->mkkern(ocl, "icond_drv"); 52 | ** ... 53 | ** ocl->exec(ocl, icond, ...); 54 | ** \endcode 55 | ** Therefore, with this powerful lux module, it is 56 | ** straightforward to add a new OpenCL kernels to GRay2: 57 | ** 58 | ** -# Name the OpenCL source code with an extension ".cl" and 59 | ** add it to the "sim/" source code folder. 60 | ** -# In "sim/Makefile.am", append the new file name to 61 | dist_krn_DATA. 62 | ** -# Add new code to the C files in "sim" to use the new 63 | ** kernel, or make the new source code default in 64 | ** "sim/gray.c" if necessary. 65 | ** 66 | ** Note that, however, the developer is responsible to make 67 | ** sure that the new OpenCL source code is compatible with 68 | ** other OpenCL codes. This is because GRay2 place all the 69 | ** OpenCL codes together and build them as a single program. 70 | **/ 71 | struct LuxOopencl opts = OPENCL_NULL; 72 | 73 | struct icond *i = &EGO->icond; 74 | struct param *p = &EGO->param; 75 | struct setup *s = &EGO->setup; 76 | 77 | const size_t n_data = EGO->n_coor + EGO->n_freq * 2; 78 | const size_t n_info = 1; 79 | const size_t e_chunk = min(16, n_data & ~(n_data-1)); /* number of real elements in chunk */ 80 | const size_t n_chunk = n_data / e_chunk; /* number of chunks */ 81 | size_t j; 82 | 83 | char lst[10240], *tail; 84 | char buf[10240]; 85 | 86 | const char *src[] = {buf, 87 | "preamble.cl", 88 | "interp.cl", 89 | p->coordinates, 90 | "flow.cl", 91 | "rt.cl", 92 | s->morder, 93 | "phys.cl", 94 | s->scheme, 95 | "driver.cl", 96 | NULL}; 97 | 98 | for(j = 0, tail = lst; j < EGO->n_freq; ++j) { 99 | sprintf(tail, "%.18e,", EGO->param.nu[j]); 100 | tail = lst + strlen(lst); 101 | } 102 | if(EGO->n_freq) 103 | tail[-1] = '\0'; 104 | 105 | snprintf(buf, sizeof(buf), 106 | "#define n_freq %zu\n" 107 | "#define n_data %zu\n" 108 | "#define n_info %zu\n" 109 | "#define n_rays %zu\n" 110 | "#define w_rays %zu\n" 111 | "#define h_rays %zu\n" 112 | "#define n_chunk %zu\n" 113 | "typedef real%zu realE;\n" 114 | "static __constant real nus[n_freq] = {%s};\n" 115 | "static __constant real M_ADM = %.18e;\n", 116 | EGO->n_freq, 117 | n_data, 118 | n_info, 119 | p->h_rays * p->w_rays, 120 | p->w_rays, 121 | p->h_rays, 122 | n_chunk, 123 | e_chunk, 124 | lst, 125 | p->M_ADM); 126 | 127 | lux_print("n_data = %zu\n" 128 | "n_info = %zu\n" 129 | "e_chunk = %zu\n", 130 | n_data, 131 | n_info, 132 | e_chunk); 133 | 134 | opts.base = build; /* this function */ 135 | opts.iplf = s->i_platform; 136 | opts.idev = s->i_device; 137 | opts.devtype = s->device_type; 138 | opts.realsz = s->precision; 139 | opts.flags = s->kflags; 140 | opts.src = src; 141 | 142 | return lux_load("opencl", &opts); 143 | } 144 | -------------------------------------------------------------------------------- /sim/gray.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Gabriele Bozzola and Chi-kwan Chan 3 | * Copyright (C) 2021 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | #include "gray.h" 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | 31 | #include "Kerr_rap.h" 32 | 33 | #define EGO ((struct gray *)ego) 34 | 35 | #define MATCH(opt, str) CASE(!strcmp(EGO->opt, str)) 36 | 37 | static int 38 | conf(Lux_job *ego, const char *restrict arg) 39 | { 40 | const char *spacetime_org = EGO->gray.spacetime; 41 | const char *initcond_org = EGO->gray.initcond; 42 | int status; 43 | 44 | lux_debug("GRay2: configuring instance %p with \"%s\"\n", ego, arg); 45 | 46 | status = gray_config(&EGO->gray, arg); 47 | 48 | /* TODO: take full advantage of dynamic module and avoid switch */ 49 | SWITCH { 50 | MATCH(gray.spacetime, "Kerr") 51 | if(EGO->gray.spacetime != spacetime_org) 52 | Kerr_init(&EGO->spacetime.Kerr); 53 | else if(status) 54 | status = Kerr_config(&EGO->spacetime.Kerr, arg); 55 | DEFAULT 56 | lux_fatal("Unknown spacetime configuration \"%s\"\n", 57 | EGO->gray.spacetime); 58 | } 59 | 60 | /* TODO: take full advantage of dynamic module and avoid switch */ 61 | SWITCH { 62 | MATCH(gray.initcond, "infcam") 63 | if(EGO->gray.initcond != initcond_org) 64 | infcam_init(&EGO->initcond.infcam); 65 | else if(status) 66 | status = infcam_config(&EGO->initcond.infcam, arg); 67 | DEFAULT 68 | lux_fatal("Unknown initial conditions for rays \"%s\"\n", 69 | EGO->gray.initcond); 70 | } 71 | 72 | return status; 73 | } 74 | 75 | static int 76 | init(Lux_job *ego) 77 | { 78 | Lux_planner *gi = NULL; 79 | Lux_gray_initcond *ic = NULL; 80 | 81 | lux_debug("GRay2: initializing instance %p\n", ego); 82 | 83 | EGO->t = EGO->gray.t_init; 84 | EGO->dt = EGO->gray.dt_dump; 85 | EGO->i = EGO->gray.i_init; 86 | EGO->n = EGO->gray.n_dump; 87 | 88 | lux_print("GRay2:init: setup opencl module\n"); 89 | { 90 | struct LuxOopencl opts = OPENCL_NULL; 91 | opts.iplf = EGO->gray.i_platform; 92 | opts.idev = EGO->gray.i_device; 93 | opts.devtype = EGO->gray.device_type; 94 | EGO->ocl = lux_load("opencl", &opts); 95 | } 96 | 97 | EGO->io = lux_load("hdf5", NULL); 98 | 99 | lux_print("GRay2:init: initcond:ic: %s\n", EGO->gray.initcond); 100 | { 101 | Lux_gray_initcond_opts opts = { 102 | EGO->ocl->nque, 103 | EGO->ocl->que, 104 | &EGO->initcond 105 | }; 106 | 107 | char buf[256]; 108 | sprintf(buf, "sim/gray/%s", EGO->gray.initcond); 109 | 110 | ic = lux_load(buf, &opts); 111 | if(!ic) 112 | return -1; 113 | } 114 | 115 | lux_print("GRay2:init: allocate memory\n"); 116 | EGO->rays = dmk(EGO->ocl, real[8], ic->n_width * ic->n_height); 117 | EGO->rays_host = palloc(real, ic->n_width, ic->n_height, 8); 118 | 119 | lux_print("GRay2:init: initialize rays\n"); 120 | (void)ic->init(ic, EGO->rays.data); 121 | 122 | lux_print("GRay2:init: spacetime:st: %s\n", EGO->gray.spacetime); 123 | /* TODO: take full advantage of dynamic module and avoid switch */ 124 | SWITCH { 125 | MATCH(gray.spacetime, "Kerr") 126 | Lux_Kerr_problem prob = { 127 | EGO->ocl->nque, 128 | EGO->ocl->que, 129 | dgetn(EGO->rays, 0), 130 | EGO->spacetime.Kerr.a_spin, 131 | -1.0, 132 | EGO->rays.data 133 | }; 134 | 135 | char buf[256]; 136 | sprintf(buf, "sim/gray/%s", EGO->gray.spacetime); 137 | gi = lux_load("planner", buf); 138 | 139 | EGO->gi = gi->plan(gi, (Lux_problem *)&prob, LUX_PLAN_DEFAULT); 140 | DEFAULT 141 | lux_fatal("Unknown spacetime configuration \"%s\"\n", 142 | EGO->gray.spacetime); 143 | } 144 | 145 | if(gi) 146 | lux_unload(gi); 147 | if(ic) 148 | lux_unload(ic); 149 | 150 | return 0; 151 | } 152 | 153 | static int 154 | exec(Lux_job *ego) 155 | { 156 | lux_debug("GRay2: executing instance %p\n", ego); 157 | 158 | while(EGO->i < EGO->n) { 159 | size_t next = EGO->i + 1; 160 | double t = EGO->t; 161 | double target = EGO->dt * next; 162 | 163 | Lux_file *file; 164 | char buf[256]; 165 | 166 | lux_print("%zu: %4.1f -> %4.1f", next, t, target); 167 | 168 | /* TODO: EGO->gi->exec(EGO->gi); */ 169 | 170 | EGO->ocl->d2h(EGO->ocl, 171 | EGO->rays_host, 172 | EGO->rays.data, dope_getsz(EGO->rays.dope)); 173 | 174 | sprintf(buf, EGO->gray.rayfile, next); 175 | file = EGO->io(buf, H5F_ACC_EXCL); 176 | file->write_pa(file, "/rays", typecodeof(real), EGO->rays_host); 177 | 178 | lux_print(": DONE\n"); 179 | 180 | EGO->i = next; 181 | EGO->t = target; 182 | } 183 | 184 | return 0; 185 | } 186 | 187 | void * 188 | LUX_MKMOD(const void *opts) 189 | { 190 | void *ego; 191 | 192 | lux_debug("GRay2: constructing an instance with options %p\n", opts); 193 | 194 | ego = zalloc(sizeof(struct gray)); 195 | if(ego) { 196 | EGO->super.conf = conf; 197 | EGO->super.init = init; 198 | EGO->super.exec = exec; 199 | 200 | gray_init(&EGO->gray); 201 | infcam_init(&EGO->initcond.infcam); 202 | } 203 | return ego; 204 | } 205 | 206 | void 207 | LUX_RMMOD(void *ego) 208 | { 209 | lux_debug("GRay2: destructing instance %p\n", ego); 210 | 211 | pfree(EGO->rays_host); 212 | drm(EGO->ocl, EGO->rays); 213 | lux_unload(EGO->ocl); 214 | 215 | free(ego); 216 | } 217 | -------------------------------------------------------------------------------- /tools/generate_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright (C) 2020-2021 Gabriele Bozzola 4 | # 5 | # This program is free software; you can redistribute it and/or modify it under the terms 6 | # of the GNU General Public License as published by the Free Software Foundation; either 7 | # version 3 of the License, or (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | # PARTICULAR PURPOSE. See the GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License along with this 14 | # program; if not, see . 15 | 16 | import concurrent.futures 17 | 18 | import h5py 19 | import numpy as np 20 | 21 | from boosted_ks import Christoffel, metric 22 | 23 | """Prepare a HDF5 file in the format required by GRay2 containing spacetime and fluid 24 | information. The output file will have multiple HDF5 groups, one called 'grid' contains 25 | the coordinates, the other ones have as name the timestep at which the variables are 26 | defined. Each of these groups contain numerous datasets, one for each variable.""" 27 | 28 | # User controllable parameters 29 | 30 | # KS parameters 31 | a_spin = 0.6 32 | boostv = 0.5 33 | 34 | # File parameters 35 | output_file = "data.h5" 36 | times = ["0"] 37 | precision = np.single 38 | num_points_x, num_points_y, num_points_z = 10, 12, 14 39 | xmin, xmax = -300, 300 40 | ymin, ymax = -400, 400 41 | zmin, zmax = -600, 600 42 | # If num_worker is not None, use this many CPUs for the following computations. If it is 43 | # None, use as many as possible. 44 | num_workers = None 45 | 46 | 47 | _dims = ("t", "x", "y", "z") 48 | 49 | def fisheye(coord, min_, max_, num_points): 50 | """Fisheye transformation. 51 | 52 | Takes in the logically-Cartesian coordinates, and returns the corresponding fisheye 53 | coordinates as defined from min_ to max_. 54 | 55 | """ 56 | B = np.cbrt(np.arcsinh(min_)) 57 | A = (np.cbrt(np.arcsinh(max_)) - B) / (num_points - 1) 58 | return np.sinh((A * coord + B) ** 3) 59 | 60 | 61 | # Values 0, 1, 2, 3, .... num_points - 1 62 | cart_x = np.linspace(0, num_points_x - 1, num_points_x, dtype=precision) 63 | cart_y = np.linspace(0, num_points_y - 1, num_points_y, dtype=precision) 64 | cart_z = np.linspace(0, num_points_z - 1, num_points_z, dtype=precision) 65 | 66 | # Physical coordinates 67 | xx = fisheye(cart_x, xmin, xmax, num_points_x) 68 | yy = fisheye(cart_y, ymin, ymax, num_points_y) 69 | zz = fisheye(cart_z, zmin, zmax, num_points_z) 70 | 71 | # Now we have to prepare all the variables. 72 | 73 | # This can be computationally expensive, so we are going to distribute the computation on 74 | # as many workers as we can. 75 | 76 | # Gamma dict is a dictionary with keys the times and values another dictionary that has 77 | # as keys the indices and as values the Christoffel symbols. Similarly, metric dict. The 78 | # other dicts have only one level. 79 | Gamma_dict = {} 80 | metric_dict = {} 81 | 82 | fluid_vars = ['rho'] 83 | # The fluid variables have to follow this naming convention in this file. They have to 84 | # be called name_dict, where name is one of those that enter fluid_vars. 85 | rho_dict = {} 86 | 87 | # Not very Pythonic 88 | indices = [] 89 | indices_metric = [] 90 | for i in range(4): 91 | for j in range(4): 92 | for k in range(j, 4): 93 | indices += [(i, j, k)] 94 | if i == 0: 95 | indices_metric += [(j, k)] 96 | 97 | for time in times: 98 | print(f"Working on time {time}") 99 | 100 | def compute_Gamma(ind): 101 | """Compute the Christoffel symbol with given indices.""" 102 | return [[[Christoffel((1, a_spin, boostv), (float(time), x, y, z), *ind) 103 | for x in xx] for y in yy] for z in zz] 104 | 105 | def compute_metric(ind): 106 | """Compute the metric component with given indices.""" 107 | return [[[metric((1, a_spin, boostv), (float(time), x, y, z), *ind) 108 | for x in xx] for y in yy] for z in zz] 109 | 110 | Gamma_dict[time] = {} 111 | metric_dict[time] = {} 112 | 113 | # Do the actual computation 114 | with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as exe: 115 | for index, Gamma in zip(indices, exe.map(compute_Gamma, indices)): 116 | Gamma_dict[time][index] = Gamma 117 | print("Computed Gammas") 118 | 119 | with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as exe: 120 | for index, met in zip(indices_metric, exe.map(compute_metric, indices_metric)): 121 | metric_dict[time][index] = met 122 | print("Computed metric") 123 | 124 | def compute_rho(x, y, z): 125 | """Compute the density as 1/r.""" 126 | return 1/np.sqrt(x*x + y*y + z*z) 127 | 128 | rho_dict[time] = [[[compute_rho(x, y, z) for x in xx] for y in yy] for z in zz] 129 | print("Computed rho") 130 | 131 | 132 | with h5py.File(output_file, "w") as f: 133 | grid_group = f.create_group("grid") 134 | grid_group.create_dataset("x", data=xx) 135 | grid_group.create_dataset("y", data=yy) 136 | grid_group.create_dataset("z", data=zz) 137 | for time in times: 138 | it_group = f.create_group(time) 139 | for ind in indices: 140 | i, j, k = ind 141 | name = f"Gamma_{_dims[i]}{_dims[j]}{_dims[k]}" 142 | data = Gamma_dict[time][(i, j, k)] 143 | data = np.nan_to_num(data) 144 | data = data.astype(precision) 145 | it_group.create_dataset(name, data=data) 146 | for ind in indices_metric: 147 | i, j = ind 148 | name = f"g_{_dims[i]}{_dims[j]}" 149 | data = metric_dict[time][(i, j)] 150 | data = np.nan_to_num(data) 151 | data = data.astype(precision) 152 | it_group.create_dataset(name, data=data) 153 | # Fluid variables 154 | for var in fluid_vars: 155 | name = var 156 | # We read the _dict variables from the global namespace 157 | data = globals()[f"{var}_dict"][time] 158 | data = np.nan_to_num(data) 159 | data = data.astype(precision) 160 | it_group.create_dataset(name, data=data) 161 | -------------------------------------------------------------------------------- /sim-org/rt.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | /** \file 22 | ** Radiative transfer 23 | ** 24 | ** Radiative transfer related functions such as the emission and 25 | ** extinction (absorption) coefficients. 26 | **/ 27 | 28 | #define CONST_c K(2.99792458e+10) 29 | #define CONST_h K(6.62606957e-27) 30 | #define CONST_G K(6.67384800e-08) 31 | #define CONST_kB K(1.38064881e-16) 32 | #define CONST_Ry K(2.17987197e-11) 33 | #define CONST_e K(4.80320425e-10) 34 | #define CONST_me K(9.10938291e-28) 35 | #define CONST_mp_me K(1836.152672450) 36 | #define CONST_mSun K(1.98910000e+33) 37 | 38 | #define M_PI K(3.14159265358979323846) 39 | #define M_SQRT2 K(1.41421356237309504880) 40 | 41 | #define T_MIN K(1e-1) 42 | #define T_MAX K(1e+2) 43 | #define T_GRID (60) 44 | 45 | #define LOG(x) log(x) /* \todo Select the right precision for log() */ 46 | #define SQRT(x) sqrt(x) /* \todo Select the right precision for sqrt() */ 47 | #define CBRT(x) cbrt(x) /* \todo Select the right precision for cbrt() */ 48 | #define POW(x, y) pow(x, y) /* \todo Select the right precision for pow() */ 49 | #define EXP(x) exp(x) /* \todo Select the right precision for exp() */ 50 | 51 | static __constant real log_K2it_tab[] = { 52 | -10.747001122, -9.5813378172, -8.5317093904, -7.5850496322, 53 | -6.7296803564, -5.9551606678, -5.2521532618, -4.6123059955, 54 | -4.0281471473, -3.4929929282, -3.0008659288, -2.5464232845, 55 | -2.1248934192, -1.7320202979, -1.3640141782, -1.0175079137, 56 | -0.6895179334, -0.3774091024, -0.0788627660, +0.2081526098, 57 | +0.4854086716, +0.7544426322, +1.0165811787, +1.2729629642, 58 | +1.5245597366, +1.7721960959, +2.0165678441, +2.2582588804, 59 | +2.4977566043, +2.7354658112, +2.9717210921, +3.2067977811, 60 | +3.4409215189, +3.6742765257, +3.9070126886, +4.1392515843, 61 | +4.3710915520, +4.6026119396, +4.8338766306, +5.0649369599, 62 | +5.2958341090, +5.5266010659, +5.7572642218, +5.9878446670, 63 | +6.2183592400, +6.4488213736, +6.6792417767, +6.9096289812, 64 | +7.1399897815, +7.3703295860, +7.6006526984, +7.8309625420, 65 | +8.0612618396, +8.2915527560, +8.5218370124, +8.7521159766, 66 | +8.9823907360, +9.2126621546, +9.4429309191, +9.6731975749, 67 | +9.9034625556 68 | }; 69 | 70 | static inline real 71 | log_K2it(real te) 72 | { 73 | const real h = LOG(te/(real)T_MIN) * (real)(T_GRID / LOG(T_MAX/T_MIN)); 74 | const int i = h; 75 | const real d = h - i; 76 | 77 | return (1 - d) * log_K2it_tab[i] + d * log_K2it_tab[i+1]; 78 | } /* 7 FLOP */ 79 | 80 | static inline real 81 | B_Planck(real nu, real te) 82 | { 83 | real f1 = 2 * CONST_h * CONST_c; /* ~ 4e-16 */ 84 | real f2 = CONST_h / (CONST_me * CONST_c); /* ~ 2e-10 */ 85 | 86 | nu /= (real)CONST_c; /* 1e-02 -- 1e+12 */ 87 | f1 *= nu * nu; /* 4e-20 -- 4e+08 */ 88 | f2 *= nu / (te + (real)EPSILON); /* 1e-12 -- 1e+02 */ 89 | 90 | return nu * (f2 > (real)1e-5 ? 91 | f1 / (EXP(f2) - 1) : 92 | (f1 / f2) / (1 + f2 / 2 + f2 * f2 / 6)); 93 | } /* 10+ FLOP */ 94 | 95 | static inline real 96 | Gaunt(real x, real y) 97 | { 98 | const real sqrt_x = SQRT(x); 99 | const real sqrt_y = SQRT(y); 100 | 101 | if(x > 1) 102 | return y > 1 ? 103 | (real)SQRT(K(3.0) / M_PI) / sqrt_y : 104 | (real)(SQRT(K(3.0)) / M_PI) * 105 | ((real)LOG(K(4.0) / K(1.78107241799)) - LOG(y + (real)EPSILON)); 106 | else if(x * y > 1) 107 | return (real)SQRT(K(12.0)) / (sqrt_x * sqrt_y); 108 | else if(y > sqrt_x) 109 | return 1; 110 | else { 111 | /* The "small-angle classical region" formulae in 112 | Rybicki & Lightman (1979) and Novikov & Thorne 113 | (1973) are inconsistent; it seems that both 114 | versions contain typos. TODO: double-check the 115 | following formula */ 116 | const real g = (real)(SQRT(K(3.0)) / M_PI) * 117 | ((real)LOG(K(4.0) / POW(K(1.78107241799), K(2.5))) + LOG(sqrt_x / (y + (real)EPSILON))); 118 | return g > (real)EPSILON ? g : (real)EPSILON; 119 | } 120 | } /* 3+ FLOP */ 121 | 122 | static inline real 123 | L_j_ff(real nu, real te, real ne) 124 | { 125 | /* "Standard" formula for thermal bremsstrahlung, Rybicki & 126 | Lightman equation (5.14b) divided by 4 pi. 127 | Because the physical length scale L has to be part of the 128 | radiative transfer, we multiple it with the emissivity 129 | j_ff. */ 130 | 131 | /* Assume Z == 1 and ni == ne */ 132 | 133 | real x = CONST_me * CONST_c * CONST_c / CONST_Ry; /* ~ 4e4 */ 134 | real y = CONST_h / (CONST_me * CONST_c * CONST_c); /* ~ 3e-21 */ 135 | real f = SQRT(CONST_G * CONST_mSun / (CONST_c * CONST_c) * K(6.8e-38) / 136 | (4 * M_PI * SQRT(CONST_me * CONST_c * CONST_c / CONST_kB))); 137 | 138 | x *= te; /* ~ 1e+04 */ 139 | y *= nu / te; /* ~ 1e-10 */ 140 | f *= ne; /* ~ 1e-15 */ 141 | 142 | return (M_ADM * f * Gaunt(x, y)) * (f / (SQRT(te) * EXP(y) + (real)EPSILON)); 143 | } /* 12 FLOP + FLOP(Gaunt) == 15+ FLOP */ 144 | 145 | static inline real 146 | L_j_syn(real nu, real te, real ne, real B, real cos_theta) 147 | { 148 | /* An approximate expression for thermal magnetobremsstrahlung 149 | emission, see Leung, Gammie, & Noble (2011) equation (72). 150 | Because the physical length scale L has to be part of the 151 | radiative transfer, we multiple it with the emissivity j_ff. */ 152 | 153 | if(te <= (real)T_MIN || 154 | cos_theta <= -1 || 155 | cos_theta >= 1) return 0; 156 | 157 | const real nus = te * te * B * SQRT(1 - cos_theta * cos_theta) * 158 | (real)(CONST_e / (9 * M_PI * CONST_me * CONST_c)); /* ~ 1e5 */ 159 | const real x = nu / (nus + (real)EPSILON); /* 1e6 -- 1e18 */ 160 | 161 | const real f = (CONST_G * CONST_mSun / (CONST_c * CONST_c)) * 162 | (M_SQRT2 * M_PI * CONST_e * CONST_e / (3 * CONST_c)); 163 | const real cbrtx = CBRT(x); /* 1e2 -- 1e6 */ 164 | const real xx = SQRT(x) + (real)1.88774862536 * SQRT(cbrtx); /* 1e3 -- 1e9 */ 165 | const real log_K2 = (te > (real)T_MAX) ? 166 | LOG(2 * te * te - (real)0.5) : 167 | log_K2it(te); 168 | 169 | return (M_ADM * xx * EXP(-cbrtx)) * (xx * EXP(-log_K2)) * (f * ne * nus); 170 | } /* 25 FLOP + min(4 FLOP, FLOP(log_K2it)) == 29+ FLOP */ 171 | 172 | 173 | 174 | struct rt { 175 | real I [n_freq]; 176 | real tau[n_freq]; 177 | }; 178 | 179 | struct rt 180 | rt_icond(void) 181 | { 182 | return (struct rt){{0}}; 183 | } 184 | 185 | struct rt 186 | rt_rhs(struct rt r, struct flow f) 187 | { 188 | for(whole i = 0; i < n_freq; ++i) { 189 | const real nu = nus[i] * f.shift; 190 | const real B_nu = B_Planck(nu, f.te); 191 | const real L_j_nu = L_j_syn(nu, f.te, f.ne, f.b, f.bkcos) + L_j_ff(nu, f.te, f.ne); 192 | 193 | r.I [i] = -L_j_nu * EXP(-r.tau[i]) / (f.shift * f.shift + (real)EPSILON); 194 | r.tau[i] = -L_j_nu * f.shift / (B_nu + (real)EPSILON); 195 | } 196 | 197 | return r; 198 | } 199 | -------------------------------------------------------------------------------- /sim-org/preamble.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | /** \file 22 | ** Preamble: useful OpenCL macros and functions 23 | ** 24 | ** GRay2 uses OpenCL's just-in-time compilation feature to implement 25 | ** run-time configurable algorithms. In this preamble we provide 26 | ** OpenCL macros and functions that help implementing the other parts 27 | ** of GRay2. 28 | **/ 29 | 30 | #define EPSILON 1e-28 31 | 32 | /** Helper macros to write equations for vector of length n_vars **/ 33 | #define EACH(s) for(whole _e_ = 0; _e_ < n_chunk; ++_e_) E(s) 34 | #define E(s) ((realE *)&(s))[_e_] 35 | 36 | /** Turn an expression into a local variable that can be passed to function **/ 37 | #define X(x) ({ struct state _; EACH(_) = (x); _; }) 38 | 39 | /** Spacetime arguments for functions **/ 40 | #define SPACETIME_PROTOTYPE_ARGS \ 41 | const real8 bounding_box, /**< Max coordinates of the grid */ \ 42 | const int4 num_points, /**< Number of points on the grid */ \ 43 | __read_only image3d_t Gamma_ttt_t1, \ 44 | __read_only image3d_t Gamma_ttx_t1, \ 45 | __read_only image3d_t Gamma_tty_t1, \ 46 | __read_only image3d_t Gamma_ttz_t1, \ 47 | __read_only image3d_t Gamma_txx_t1, \ 48 | __read_only image3d_t Gamma_txy_t1, \ 49 | __read_only image3d_t Gamma_txz_t1, \ 50 | __read_only image3d_t Gamma_tyy_t1, \ 51 | __read_only image3d_t Gamma_tyz_t1, \ 52 | __read_only image3d_t Gamma_tzz_t1, \ 53 | __read_only image3d_t Gamma_xtt_t1, \ 54 | __read_only image3d_t Gamma_xtx_t1, \ 55 | __read_only image3d_t Gamma_xty_t1, \ 56 | __read_only image3d_t Gamma_xtz_t1, \ 57 | __read_only image3d_t Gamma_xxx_t1, \ 58 | __read_only image3d_t Gamma_xxy_t1, \ 59 | __read_only image3d_t Gamma_xxz_t1, \ 60 | __read_only image3d_t Gamma_xyy_t1, \ 61 | __read_only image3d_t Gamma_xyz_t1, \ 62 | __read_only image3d_t Gamma_xzz_t1, \ 63 | __read_only image3d_t Gamma_ytt_t1, \ 64 | __read_only image3d_t Gamma_ytx_t1, \ 65 | __read_only image3d_t Gamma_yty_t1, \ 66 | __read_only image3d_t Gamma_ytz_t1, \ 67 | __read_only image3d_t Gamma_yxx_t1, \ 68 | __read_only image3d_t Gamma_yxy_t1, \ 69 | __read_only image3d_t Gamma_yxz_t1, \ 70 | __read_only image3d_t Gamma_yyy_t1, \ 71 | __read_only image3d_t Gamma_yyz_t1, \ 72 | __read_only image3d_t Gamma_yzz_t1, \ 73 | __read_only image3d_t Gamma_ztt_t1, \ 74 | __read_only image3d_t Gamma_ztx_t1, \ 75 | __read_only image3d_t Gamma_zty_t1, \ 76 | __read_only image3d_t Gamma_ztz_t1, \ 77 | __read_only image3d_t Gamma_zxx_t1, \ 78 | __read_only image3d_t Gamma_zxy_t1, \ 79 | __read_only image3d_t Gamma_zxz_t1, \ 80 | __read_only image3d_t Gamma_zyy_t1, \ 81 | __read_only image3d_t Gamma_zyz_t1, \ 82 | __read_only image3d_t Gamma_zzz_t1, \ 83 | __read_only image3d_t g_tt_t1, \ 84 | __read_only image3d_t g_tx_t1, \ 85 | __read_only image3d_t g_ty_t1, \ 86 | __read_only image3d_t g_tz_t1, \ 87 | __read_only image3d_t g_xx_t1, \ 88 | __read_only image3d_t g_xy_t1, \ 89 | __read_only image3d_t g_xz_t1, \ 90 | __read_only image3d_t g_yy_t1, \ 91 | __read_only image3d_t g_yz_t1, \ 92 | __read_only image3d_t g_zz_t1, \ 93 | __read_only image3d_t rho_t1, \ 94 | __read_only image3d_t Gamma_ttt_t2, \ 95 | __read_only image3d_t Gamma_ttx_t2, \ 96 | __read_only image3d_t Gamma_tty_t2, \ 97 | __read_only image3d_t Gamma_ttz_t2, \ 98 | __read_only image3d_t Gamma_txx_t2, \ 99 | __read_only image3d_t Gamma_txy_t2, \ 100 | __read_only image3d_t Gamma_txz_t2, \ 101 | __read_only image3d_t Gamma_tyy_t2, \ 102 | __read_only image3d_t Gamma_tyz_t2, \ 103 | __read_only image3d_t Gamma_tzz_t2, \ 104 | __read_only image3d_t Gamma_xtt_t2, \ 105 | __read_only image3d_t Gamma_xtx_t2, \ 106 | __read_only image3d_t Gamma_xty_t2, \ 107 | __read_only image3d_t Gamma_xtz_t2, \ 108 | __read_only image3d_t Gamma_xxx_t2, \ 109 | __read_only image3d_t Gamma_xxy_t2, \ 110 | __read_only image3d_t Gamma_xxz_t2, \ 111 | __read_only image3d_t Gamma_xyy_t2, \ 112 | __read_only image3d_t Gamma_xyz_t2, \ 113 | __read_only image3d_t Gamma_xzz_t2, \ 114 | __read_only image3d_t Gamma_ytt_t2, \ 115 | __read_only image3d_t Gamma_ytx_t2, \ 116 | __read_only image3d_t Gamma_yty_t2, \ 117 | __read_only image3d_t Gamma_ytz_t2, \ 118 | __read_only image3d_t Gamma_yxx_t2, \ 119 | __read_only image3d_t Gamma_yxy_t2, \ 120 | __read_only image3d_t Gamma_yxz_t2, \ 121 | __read_only image3d_t Gamma_yyy_t2, \ 122 | __read_only image3d_t Gamma_yyz_t2, \ 123 | __read_only image3d_t Gamma_yzz_t2, \ 124 | __read_only image3d_t Gamma_ztt_t2, \ 125 | __read_only image3d_t Gamma_ztx_t2, \ 126 | __read_only image3d_t Gamma_zty_t2, \ 127 | __read_only image3d_t Gamma_ztz_t2, \ 128 | __read_only image3d_t Gamma_zxx_t2, \ 129 | __read_only image3d_t Gamma_zxy_t2, \ 130 | __read_only image3d_t Gamma_zxz_t2, \ 131 | __read_only image3d_t Gamma_zyy_t2, \ 132 | __read_only image3d_t Gamma_zyz_t2, \ 133 | __read_only image3d_t Gamma_zzz_t2, \ 134 | __read_only image3d_t g_tt_t2, \ 135 | __read_only image3d_t g_tx_t2, \ 136 | __read_only image3d_t g_ty_t2, \ 137 | __read_only image3d_t g_tz_t2, \ 138 | __read_only image3d_t g_xx_t2, \ 139 | __read_only image3d_t g_xy_t2, \ 140 | __read_only image3d_t g_xz_t2, \ 141 | __read_only image3d_t g_yy_t2, \ 142 | __read_only image3d_t g_yz_t2, \ 143 | __read_only image3d_t g_zz_t2, \ 144 | __read_only image3d_t rho_t2 145 | 146 | 147 | #define SPACETIME_ARGS \ 148 | bounding_box, \ 149 | num_points, \ 150 | Gamma_ttt_t1, \ 151 | Gamma_ttx_t1, \ 152 | Gamma_tty_t1, \ 153 | Gamma_ttz_t1, \ 154 | Gamma_txx_t1, \ 155 | Gamma_txy_t1, \ 156 | Gamma_txz_t1, \ 157 | Gamma_tyy_t1, \ 158 | Gamma_tyz_t1, \ 159 | Gamma_tzz_t1, \ 160 | Gamma_xtt_t1, \ 161 | Gamma_xtx_t1, \ 162 | Gamma_xty_t1, \ 163 | Gamma_xtz_t1, \ 164 | Gamma_xxx_t1, \ 165 | Gamma_xxy_t1, \ 166 | Gamma_xxz_t1, \ 167 | Gamma_xyy_t1, \ 168 | Gamma_xyz_t1, \ 169 | Gamma_xzz_t1, \ 170 | Gamma_ytt_t1, \ 171 | Gamma_ytx_t1, \ 172 | Gamma_yty_t1, \ 173 | Gamma_ytz_t1, \ 174 | Gamma_yxx_t1, \ 175 | Gamma_yxy_t1, \ 176 | Gamma_yxz_t1, \ 177 | Gamma_yyy_t1, \ 178 | Gamma_yyz_t1, \ 179 | Gamma_yzz_t1, \ 180 | Gamma_ztt_t1, \ 181 | Gamma_ztx_t1, \ 182 | Gamma_zty_t1, \ 183 | Gamma_ztz_t1, \ 184 | Gamma_zxx_t1, \ 185 | Gamma_zxy_t1, \ 186 | Gamma_zxz_t1, \ 187 | Gamma_zyy_t1, \ 188 | Gamma_zyz_t1, \ 189 | Gamma_zzz_t1, \ 190 | g_tt_t1, \ 191 | g_tx_t1, \ 192 | g_ty_t1, \ 193 | g_tz_t1, \ 194 | g_xx_t1, \ 195 | g_xy_t1, \ 196 | g_xz_t1, \ 197 | g_yy_t1, \ 198 | g_yz_t1, \ 199 | g_zz_t1, \ 200 | rho_t1, \ 201 | Gamma_ttt_t2, \ 202 | Gamma_ttx_t2, \ 203 | Gamma_tty_t2, \ 204 | Gamma_ttz_t2, \ 205 | Gamma_txx_t2, \ 206 | Gamma_txy_t2, \ 207 | Gamma_txz_t2, \ 208 | Gamma_tyy_t2, \ 209 | Gamma_tyz_t2, \ 210 | Gamma_tzz_t2, \ 211 | Gamma_xtt_t2, \ 212 | Gamma_xtx_t2, \ 213 | Gamma_xty_t2, \ 214 | Gamma_xtz_t2, \ 215 | Gamma_xxx_t2, \ 216 | Gamma_xxy_t2, \ 217 | Gamma_xxz_t2, \ 218 | Gamma_xyy_t2, \ 219 | Gamma_xyz_t2, \ 220 | Gamma_xzz_t2, \ 221 | Gamma_ytt_t2, \ 222 | Gamma_ytx_t2, \ 223 | Gamma_yty_t2, \ 224 | Gamma_ytz_t2, \ 225 | Gamma_yxx_t2, \ 226 | Gamma_yxy_t2, \ 227 | Gamma_yxz_t2, \ 228 | Gamma_yyy_t2, \ 229 | Gamma_yyz_t2, \ 230 | Gamma_yzz_t2, \ 231 | Gamma_ztt_t2, \ 232 | Gamma_ztx_t2, \ 233 | Gamma_zty_t2, \ 234 | Gamma_ztz_t2, \ 235 | Gamma_zxx_t2, \ 236 | Gamma_zxy_t2, \ 237 | Gamma_zxz_t2, \ 238 | Gamma_zyy_t2, \ 239 | Gamma_zyz_t2, \ 240 | Gamma_zzz_t2, \ 241 | g_tt_t2, \ 242 | g_tx_t2, \ 243 | g_ty_t2, \ 244 | g_tz_t2, \ 245 | g_xx_t2, \ 246 | g_xy_t2, \ 247 | g_xz_t2, \ 248 | g_yy_t2, \ 249 | g_yz_t2, \ 250 | g_zz_t2, \ 251 | rho_t2 252 | -------------------------------------------------------------------------------- /sim-org/dyst.cl: -------------------------------------------------------------------------------- 1 | /* Automatically generated, do not edit */ 2 | 3 | struct gr { 4 | real4 q; 5 | real4 u; 6 | }; 7 | 8 | inline real GRAY_SQUARE (real x) { return x*x; }; 9 | inline real GRAY_CUBE (real x) { return x*x*x; }; 10 | inline real GRAY_FOUR (real x) { return x*x*x*x; }; 11 | inline real GRAY_SQRT (real x) { return sqrt(x); }; 12 | inline real GRAY_SQRT_CUBE (real x) { return sqrt(x*x*x); }; 13 | 14 | real16 matrix_product(real16 a, real16 b){ 15 | 16 | real4 a_row0 = a.s0123; 17 | real4 a_row1 = a.s4567; 18 | real4 a_row2 = a.s89ab; 19 | real4 a_row3 = a.scdef; 20 | real4 b_col0 = b.s048c; 21 | real4 b_col1 = b.s159d; 22 | real4 b_col2 = b.s26ae; 23 | real4 b_col3 = b.s37bf; 24 | 25 | return (real16){dot(a_row0, b_col0), dot(a_row0, b_col1), 26 | dot(a_row0, b_col2), dot(a_row0, b_col3), 27 | dot(a_row1, b_col0), dot(a_row1, b_col1), 28 | dot(a_row1, b_col2), dot(a_row1, b_col3), 29 | dot(a_row2, b_col0), dot(a_row2, b_col1), 30 | dot(a_row2, b_col2), dot(a_row2, b_col3), 31 | dot(a_row3, b_col0), dot(a_row3, b_col1), 32 | dot(a_row3, b_col2), dot(a_row3, b_col3)}; 33 | }; 34 | 35 | real4 matrix_vector_product(real16 a, real4 b){ 36 | 37 | return (real4){dot(a.s0123, b), 38 | dot(a.s4567, b), 39 | dot(a.s89ab, b), 40 | dot(a.scdef, b)}; 41 | }; 42 | 43 | real 44 | getrr(real4 q) 45 | { 46 | return 1; /* \todo define the black hole location and implement getrr() */ 47 | } 48 | 49 | real 50 | geteps(real4 q) 51 | { 52 | return 1; /* \todo */ 53 | } 54 | 55 | real4 56 | down(real4 q, real4 u, SPACETIME_PROTOTYPE_ARGS) 57 | { 58 | real16 g; 59 | 60 | g.s0 = interpolate(q, bounding_box, num_points, g_tt_t1, g_tt_t2); 61 | g.s1 = interpolate(q, bounding_box, num_points, g_tx_t1, g_tx_t2); 62 | g.s2 = interpolate(q, bounding_box, num_points, g_ty_t1, g_ty_t2); 63 | g.s3 = interpolate(q, bounding_box, num_points, g_tz_t1, g_tz_t2); 64 | 65 | g.s4 = g.s1; 66 | g.s5 = interpolate(q, bounding_box, num_points, g_xx_t1, g_xx_t2); 67 | g.s6 = interpolate(q, bounding_box, num_points, g_xy_t1, g_xy_t2); 68 | g.s7 = interpolate(q, bounding_box, num_points, g_xz_t1, g_xz_t2); 69 | 70 | g.s8 = g.s2; 71 | g.s9 = g.s6; 72 | g.sa = interpolate(q, bounding_box, num_points, g_yy_t1, g_yy_t2); 73 | g.sb = interpolate(q, bounding_box, num_points, g_yz_t1, g_yz_t2); 74 | 75 | g.sc = g.s3; 76 | g.sd = g.s7; 77 | g.se = g.sb; 78 | g.sf = interpolate(q, bounding_box, num_points, g_yz_t1, g_yz_t2); 79 | 80 | return matrix_vector_product(g, u); 81 | } 82 | 83 | real 84 | getuu(struct gr s, SPACETIME_PROTOTYPE_ARGS) /**< state of the ray */ 85 | { 86 | return dot(s.u, down(s.q, s.u, SPACETIME_ARGS)); 87 | } 88 | 89 | struct gr 90 | gr_icond(real r_obs, /**< Distance of the observer from the black hole */ 91 | real i_obs, /**< Inclination angle of the observer in degrees */ 92 | real j_obs, /**< Azimuthal angle of the observer in degrees */ 93 | real alpha, /**< One of the local Cartesian coordinates */ 94 | real beta) /**< The other local Cartesian coordinate */ 95 | { 96 | 97 | real deg2rad = K(3.14159265358979323846264338327950288) / K(180.0); 98 | real ci, si = sincos(deg2rad * i_obs, &ci); 99 | real cj, sj = sincos(deg2rad * j_obs, &cj); 100 | 101 | real R0 = r_obs * si - beta * ci; 102 | real z = r_obs * ci + beta * si; 103 | real y = R0 * sj + alpha * cj; 104 | real x = R0 * cj - alpha * sj; 105 | 106 | real4 q = (real4){K(0.0), x, y, z}; 107 | real4 u = (real4){K(1.0), si * cj, si * sj, ci}; 108 | 109 | return (struct gr){q, u}; 110 | } 111 | 112 | struct gr 113 | gr_rhs(struct gr g, SPACETIME_PROTOTYPE_ARGS) 114 | { 115 | real4 q = g.q; 116 | real4 u = g.u; 117 | 118 | real16 GammaUPt, GammaUPx, GammaUPy, GammaUPz; 119 | 120 | /* We compute the commented ones in one shot */ 121 | GammaUPt.s0 = interpolate(q, bounding_box, num_points, Gamma_ttt_t1, Gamma_ttt_t2); 122 | GammaUPt.s1 = interpolate(q, bounding_box, num_points, Gamma_ttx_t1, Gamma_ttx_t2); 123 | GammaUPt.s2 = interpolate(q, bounding_box, num_points, Gamma_tty_t1, Gamma_tty_t2); 124 | GammaUPt.s3 = interpolate(q, bounding_box, num_points, Gamma_ttz_t1, Gamma_ttz_t2); 125 | /* GammaUPt.s4 = GammaUPt.s1; */ 126 | GammaUPt.s5 = interpolate(q, bounding_box, num_points, Gamma_txx_t1, Gamma_txx_t2); 127 | GammaUPt.s6 = interpolate(q, bounding_box, num_points, Gamma_txy_t1, Gamma_txy_t2); 128 | GammaUPt.s7 = interpolate(q, bounding_box, num_points, Gamma_txz_t1, Gamma_txz_t2); 129 | /* GammaUPt.s8 = GammaUPt.s2; */ 130 | /* GammaUPt.s9 = GammaUPt.s6; */ 131 | GammaUPt.sa = interpolate(q, bounding_box, num_points, Gamma_tyy_t1, Gamma_tyy_t2); 132 | GammaUPt.sb = interpolate(q, bounding_box, num_points, Gamma_tyz_t1, Gamma_tyz_t2); 133 | /* GammaUPt.sc = GammaUPt.s3; */ 134 | /* GammaUPt.sd = GammaUPt.s7; */ 135 | /* GammaUPt.se = GammaUPt.sb; */ 136 | GammaUPt.sf = interpolate(q, bounding_box, num_points, Gamma_tzz_t1, Gamma_tzz_t2); 137 | 138 | GammaUPt.s489 = GammaUPt.s126; 139 | GammaUPt.scde = GammaUPt.s37b; 140 | 141 | 142 | GammaUPx.s0 = interpolate(q, bounding_box, num_points, Gamma_xtt_t1, Gamma_xtt_t2); 143 | GammaUPx.s1 = interpolate(q, bounding_box, num_points, Gamma_xtx_t1, Gamma_xtx_t2); 144 | GammaUPx.s2 = interpolate(q, bounding_box, num_points, Gamma_xty_t1, Gamma_xty_t2); 145 | GammaUPx.s3 = interpolate(q, bounding_box, num_points, Gamma_xtz_t1, Gamma_xtz_t2); 146 | GammaUPx.s5 = interpolate(q, bounding_box, num_points, Gamma_xxx_t1, Gamma_xxx_t2); 147 | GammaUPx.s6 = interpolate(q, bounding_box, num_points, Gamma_xxy_t1, Gamma_xxy_t2); 148 | GammaUPx.s7 = interpolate(q, bounding_box, num_points, Gamma_xxz_t1, Gamma_xxz_t2); 149 | GammaUPx.sa = interpolate(q, bounding_box, num_points, Gamma_xyy_t1, Gamma_xyy_t2); 150 | GammaUPx.sb = interpolate(q, bounding_box, num_points, Gamma_xyz_t1, Gamma_xyz_t2); 151 | GammaUPx.sf = interpolate(q, bounding_box, num_points, Gamma_xzz_t1, Gamma_xzz_t2); 152 | 153 | GammaUPx.s489 = GammaUPx.s126; 154 | GammaUPx.scde = GammaUPx.s37b; 155 | 156 | 157 | GammaUPy.s0 = interpolate(q, bounding_box, num_points, Gamma_ytt_t1, Gamma_ytt_t2); 158 | GammaUPy.s1 = interpolate(q, bounding_box, num_points, Gamma_ytx_t1, Gamma_ytx_t2); 159 | GammaUPy.s2 = interpolate(q, bounding_box, num_points, Gamma_yty_t1, Gamma_yty_t2); 160 | GammaUPy.s3 = interpolate(q, bounding_box, num_points, Gamma_ytz_t1, Gamma_ytz_t2); 161 | GammaUPy.s5 = interpolate(q, bounding_box, num_points, Gamma_yxx_t1, Gamma_yxx_t2); 162 | GammaUPy.s6 = interpolate(q, bounding_box, num_points, Gamma_yxy_t1, Gamma_yxy_t2); 163 | GammaUPy.s7 = interpolate(q, bounding_box, num_points, Gamma_yxz_t1, Gamma_yxz_t2); 164 | GammaUPy.sa = interpolate(q, bounding_box, num_points, Gamma_yyy_t1, Gamma_yyy_t2); 165 | GammaUPy.sb = interpolate(q, bounding_box, num_points, Gamma_yyz_t1, Gamma_yyz_t2); 166 | GammaUPy.sf = interpolate(q, bounding_box, num_points, Gamma_yzz_t1, Gamma_yzz_t2); 167 | 168 | GammaUPy.s489 = GammaUPy.s126; 169 | GammaUPy.scde = GammaUPy.s37b; 170 | 171 | 172 | GammaUPz.s0 = interpolate(q, bounding_box, num_points, Gamma_ztt_t1, Gamma_ztt_t2); 173 | GammaUPz.s1 = interpolate(q, bounding_box, num_points, Gamma_ztx_t1, Gamma_ztx_t2); 174 | GammaUPz.s2 = interpolate(q, bounding_box, num_points, Gamma_zty_t1, Gamma_zty_t2); 175 | GammaUPz.s3 = interpolate(q, bounding_box, num_points, Gamma_ztz_t1, Gamma_ztz_t2); 176 | GammaUPz.s5 = interpolate(q, bounding_box, num_points, Gamma_zxx_t1, Gamma_zxx_t2); 177 | GammaUPz.s6 = interpolate(q, bounding_box, num_points, Gamma_zxy_t1, Gamma_zxy_t2); 178 | GammaUPz.s7 = interpolate(q, bounding_box, num_points, Gamma_zxz_t1, Gamma_zxz_t2); 179 | GammaUPz.sa = interpolate(q, bounding_box, num_points, Gamma_zyy_t1, Gamma_zyy_t2); 180 | GammaUPz.sb = interpolate(q, bounding_box, num_points, Gamma_zyz_t1, Gamma_zyz_t2); 181 | GammaUPz.sf = interpolate(q, bounding_box, num_points, Gamma_zzz_t1, Gamma_zzz_t2); 182 | 183 | GammaUPz.s489 = GammaUPz.s126; 184 | GammaUPz.scde = GammaUPz.s37b; 185 | 186 | real GammaUU = dot(u, matrix_vector_product(GammaUPt, u)); 187 | 188 | real4 rhs = {-dot(u, matrix_vector_product(GammaUPt, u)) + GammaUU * u.s0, 189 | -dot(u, matrix_vector_product(GammaUPx, u)) + GammaUU * u.s1, 190 | -dot(u, matrix_vector_product(GammaUPy, u)) + GammaUU * u.s2, 191 | -dot(u, matrix_vector_product(GammaUPz, u)) + GammaUU * u.s3}; 192 | 193 | return (struct gr){u, rhs}; 194 | } 195 | -------------------------------------------------------------------------------- /sim-org/interp.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Gabriele Bozzola 3 | * 4 | * This file is part of GRay2. 5 | * 6 | * GRay2 is free software: you can redistribute it and/or modify it 7 | * under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation, either version 3 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 14 | * License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with GRay2. If not, see . 18 | */ 19 | 20 | /** \file 21 | ** Interpolate fisheye coordinates. 22 | ** 23 | ** To study black holes in dynamical spacetimes, one needs enough resolution 24 | ** near the horizons. This is typically reported as a fraction of the mass of 25 | ** the black hole, for example a value of M/80 is the resolution that roughly 26 | ** covers the radius of a non-spinning black hole in the puncture gauge with 80 27 | ** points. It is impossible to cover the entire numerical grid with such 28 | ** resolution, and possible solutions are mesh-refinement of fisheye 29 | ** coordinates. This latter solution consists in deforming the coordinate 30 | ** system in such a way that the points are more concentrated near the horizons 31 | ** and less outside. This can be achieved sampling the relevant functions in a 32 | ** non uniform way. The fisheye transformation that we use here is: 33 | ** \f[ 34 | ** x = \sinh \left[ {\left( A \xi + B \right)}^{n} \right]\,, 35 | ** \f] 36 | ** with \f$x\f$ physical coordinates and \f$\xi\f$ logically-Cartesian 37 | ** coordinates. The parameter \f$n\f$ determines how much concentrated are 38 | ** points near the center, and the other two parameters are fixed by fixing the 39 | ** extent that \f$x\f$ has to cover. 40 | ** 41 | ** When fisheye coordinates are used, it is critical to correct how OpenCL 42 | ** performs the multilinear interpolation to account for the fact that points 43 | ** are distributed unevenly. This module provides the infrastructure to do 44 | ** that. Additionally, it also handles time interpolation (with a linear 45 | ** transformation). 46 | ** 47 | **/ 48 | 49 | /* In this module we call xyz the physical coordinates and uvw the 50 | * unnormalized, OpenCL ones. The values are always stored in the .s123 slots 51 | * of the 4-vectors. The slot .s0 is not used. */ 52 | 53 | inline int4 address_mode(int4 x, int4 size){ 54 | /* We implement CLK_ADDRESS_CLAMP_TO_EDGE */ 55 | 56 | /* We need this function because we need to convert from physical variables to 57 | * unnormalized ones. */ 58 | return clamp(x, (int4){0,0,0,0}, size - 1); 59 | } 60 | 61 | inline real4 address_mode_real(real4 x, real4 size){ 62 | /* We implement CLK_ADDRESS_CLAMP_TO_EDGE */ 63 | 64 | /* We need this function because we need to convert from physical variables to 65 | * unnormalized ones. */ 66 | return clamp(x, (real4){K(0.0), K(0.0), K(0.0), K(0.0)}, size - 1); 67 | } 68 | 69 | int4 xyz_to_uvw(real4 xyz, real8 bounding_box, int4 num_points){ 70 | /* Returns the unnormalized point uvw corresponding to xyz (rounding down) */ 71 | 72 | /* FISHEYE IS HERE */ 73 | /* In this function, we compute the coordinate transformation from physical 74 | * coordinates to unnormalized ones. This does not take into account the 75 | * fact that coordinates are unevenly spaced, and it simply consists in the 76 | * application of the fisheye transformation. */ 77 | 78 | /* Xmin = {0, xmin, ymin, zmin} */ 79 | /* Xmax = {0, xmax, ymax, zmax} */ 80 | real4 Xmin = {K(0.0), bounding_box.s1, bounding_box.s2, bounding_box.s3}; 81 | real4 Xmax = {K(0.0), bounding_box.s5, bounding_box.s6, bounding_box.s7}; 82 | 83 | /* num_points_real is defined like this because there is no easy way to cast a 84 | * vector to a real4, so we instead define a new variables where we cast the 85 | * individual variables. */ 86 | real4 num_points_real = {num_points.s0, num_points.s1, num_points.s2, 87 | num_points.s3}; 88 | 89 | /* The fisheye transformation is hard-coded here */ 90 | /* We work with n = 3 (n is the exponent in the sinh) */ 91 | real4 B = cbrt(asinh(Xmin)); 92 | real4 A = (cbrt(asinh(Xmax)) - B)/(num_points_real - 1); 93 | 94 | real4 xi = floor((cbrt(asinh(xyz)) - B)/A); 95 | 96 | return address_mode(convert_int4(xi), num_points); 97 | } 98 | 99 | real4 uvw_to_xyz(int4 uvw, real8 bounding_box, real4 num_points_real){ 100 | 101 | /* Returns the physical point xyz corresponding to unnormalized uvw */ 102 | 103 | /* FISHEYE IS HERE */ 104 | 105 | /* Xmin = {0, xmin, ymin, zmin} */ 106 | /* Xmax = {0, xmax, ymax, zmax} */ 107 | real4 Xmin = {K(0.0), bounding_box.s1, bounding_box.s2, bounding_box.s3}; 108 | real4 Xmax = {K(0.0), bounding_box.s5, bounding_box.s6, bounding_box.s7}; 109 | 110 | /* uvw_real is defined like this because there is no easy way to cast a 111 | * vector to a real4, so we instead define a new variables where we cast the 112 | * individual variables. */ 113 | real4 uvw_real = {uvw.s0, uvw.s1, uvw.s2, uvw.s3}; 114 | 115 | /* Hardcode the coordinate transformation with n = 3 */ 116 | real4 B = cbrt(asinh(Xmin)); 117 | real4 A = (cbrt(asinh(Xmax)) - B)/(num_points_real - 1); 118 | 119 | real4 fact = (A * uvw_real + B); 120 | return sinh(fact * fact * fact); 121 | } 122 | 123 | real4 find_correct_uvw(real4 xyz, 124 | real8 bounding_box, 125 | int4 num_points){ 126 | 127 | /* Return the OpenCL unnormalized coordinates uvw that, if plugged in the 128 | * multilinear interpolation routines, would return the correct interpolated 129 | * value for the physical coordinate xyz. */ 130 | 131 | /* To do this, we first need to find the unnormalized coordinates that bound 132 | * the given physical points xyz. We call these uvw_ijk and uvw_ijkp1. The 133 | * "p1" means "plus_one" as we know that uvw_ijk will be the lower edge. 134 | * Then, we compute the corresponding physical coordinates and we perform 135 | * linear interpolation between the two. */ 136 | 137 | /* num_points_real is defined like this because there is no easy way to cast a 138 | * vector to a real4, so we instead define a new variables where we cast the 139 | * individual variables. */ 140 | real4 num_points_real = {num_points.s0, num_points.s1, num_points.s2, 141 | num_points.s3}; 142 | 143 | int4 uvw_ijk = xyz_to_uvw(xyz, bounding_box, num_points); 144 | int4 uvw_ijkp1 = uvw_ijk + 1; 145 | 146 | real4 xyz_ijk = uvw_to_xyz(uvw_ijk, bounding_box, num_points_real); 147 | real4 xyz_ijkp1 = uvw_to_xyz(uvw_ijkp1, bounding_box, num_points_real); 148 | 149 | /* uvw_ijk_real is defined like this because there is no easy way to cast a 150 | * vector to a real4, so we instead define a new variables where we cast the 151 | * individual variables. */ 152 | real4 uvw_ijk_real = {uvw_ijk.s0, uvw_ijk.s1, uvw_ijk.s2, uvw_ijk.s3}; 153 | 154 | /* Linear interpolation of coordinates*/ 155 | real4 uvw_interp = uvw_ijk_real + (xyz - xyz_ijk)/(xyz_ijkp1 - xyz_ijk); 156 | 157 | /* We clamp to edge, to make sure we are not producing values that are outside 158 | * the range of definition of the data */ 159 | uvw_interp = address_mode_real(uvw_interp, num_points_real); 160 | 161 | /* Finally, we have to offset by 0.5. This 0.5 is very important because OpenCL 162 | * uses a pixel offset of 0.5 */ 163 | return uvw_interp + (real4){K(0.5), K(0.5), K(0.5), K(0.5)}; 164 | } 165 | 166 | real space_interpolate(real4 xyz, 167 | real8 bounding_box, 168 | int4 num_points, 169 | __read_only image3d_t var){ 170 | 171 | /* Return var evaluated on xyz */ 172 | sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE \ 173 | | CLK_FILTER_LINEAR; 174 | 175 | real4 coords = find_correct_uvw(xyz, bounding_box, num_points); 176 | /* In read_imagef, coords have to be in the slots 0, 1, and 2. The slot 3 is 177 | * ignored */ 178 | coords.s012 = coords.s123; 179 | 180 | return read_imagef(var, sampler, coords).x; 181 | } 182 | 183 | real interpolate(real4 q, 184 | real8 bounding_box, 185 | int4 num_points, 186 | __read_only image3d_t var_t1, 187 | __read_only image3d_t var_t2){ 188 | 189 | /* Return var interpolated on q. If the bounding box is defined on a single 190 | * time level, then use only var_t1, otherwise perform linear interpolation 191 | * in time between var_t1 and var_t2. */ 192 | 193 | real t1 = bounding_box.s0; 194 | real t2 = bounding_box.s4; 195 | 196 | if (t1 == t2) 197 | return space_interpolate(q, bounding_box, num_points, var_t1); 198 | 199 | /* y(t) = y_1 + (t - t_1) / (t2 - t1) * (y_2 - y_1) */ 200 | 201 | real y1 = space_interpolate(q, bounding_box, num_points, var_t1); 202 | real y2 = space_interpolate(q, bounding_box, num_points, var_t2); 203 | 204 | return y1 + (q.s0 - t1) / (t2 - t1) * (y2 - y1); 205 | } 206 | -------------------------------------------------------------------------------- /tools/boosted_ks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright (C) 2021 Pierre Christian, Gabriele Bozzola 4 | # Copyright (C) 2020 Pierre Christian 5 | # 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, but 12 | # WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | # General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | 19 | ############## How to use ################################## 20 | """ 21 | Step 1) Check that Pierre wrote down metric/Christoffel symbols correctly 22 | 23 | Step 2) Compute Christoffel symbol Gamma^i_kl using Christoffel(Param,Coord,i,k,l). Param=parameters of the metric; for the KS metric, Param=[Mass,spin]. Coord=coordinates; for the KS metric, Coord=[time,x,y,z]. i, k, and l are spacetime indices; they range from 0-3 (Yes, I use i for something that ranges from 0-3) 24 | 25 | Other functions: 26 | -) metric(Param,Coord,i,j,updown) gives the metric. The switch updown is either "up" for contravariant metric or "down" for covariant metric. Again, despite how they appear, i and j are spacetime indices (ranges from 0-3). 27 | 28 | -) dm(Param,Coord,i,j,wrt,updown) gives the metric derivatives. The variable wrt specifies that the derivative is taken "with respect to" which coordinate. For example, dm(Param,Coord,i,j,0,updown) is the metric derivative w.r.t. time. 29 | 30 | """ 31 | 32 | ############## import all sorts of things, most of them not used ################################## 33 | 34 | import numpy as np 35 | from functools import lru_cache 36 | 37 | ############## Dual number stuff ########################### 38 | 39 | 40 | class dual: 41 | def __init__(self, first, second): 42 | self.f = first 43 | self.s = second 44 | 45 | @lru_cache(1024) 46 | def __mul__(self, other): 47 | if isinstance(other, dual): 48 | return dual(self.f * other.f, self.s * other.f + self.f * other.s) 49 | else: 50 | return dual(self.f * other, self.s * other) 51 | 52 | @lru_cache(1024) 53 | def __rmul__(self, other): 54 | if isinstance(other, dual): 55 | return dual(self.f * other.f, self.s * other.f + self.f * other.s) 56 | else: 57 | return dual(self.f * other, self.s * other) 58 | 59 | @lru_cache(1024) 60 | def __add__(self, other): 61 | if isinstance(other, dual): 62 | return dual(self.f + other.f, self.s + other.s) 63 | else: 64 | return dual(self.f + other, self.s) 65 | 66 | @lru_cache(1024) 67 | def __radd__(self, other): 68 | if isinstance(other, dual): 69 | return dual(self.f + other.f, self.s + other.s) 70 | else: 71 | return dual(self.f + other, self.s) 72 | 73 | @lru_cache(1024) 74 | def __sub__(self, other): 75 | if isinstance(other, dual): 76 | return dual(self.f - other.f, self.s - other.s) 77 | else: 78 | return dual(self.f - other, self.s) 79 | 80 | @lru_cache(1024) 81 | def __rsub__(self, other): 82 | return dual(other, 0) - self 83 | 84 | @lru_cache(1024) 85 | def __truediv__(self, other): 86 | """ when the first component of the divisor is not 0 """ 87 | if isinstance(other, dual): 88 | return dual( 89 | self.f / other.f, 90 | (self.s * other.f - self.f * other.s) / (other.f ** 2.0), 91 | ) 92 | else: 93 | return dual(self.f / other, self.s / other) 94 | 95 | @lru_cache(1024) 96 | def __rtruediv__(self, other): 97 | return dual(other, 0).__truediv__(self) 98 | 99 | @lru_cache(1024) 100 | def __neg__(self): 101 | return dual(-self.f, -self.s) 102 | 103 | @lru_cache(1024) 104 | def __pow__(self, power): 105 | return dual(self.f ** power, self.s * power * self.f ** (power - 1)) 106 | 107 | @lru_cache(1024) 108 | def dif(func, x): 109 | funcdual = func(dual(x, 1.0)) 110 | 111 | if isinstance(funcdual, dual): 112 | return func(dual(x, 1.0)).s 113 | 114 | # This is for when the function is a constant, e.g. gtt:=0 115 | return 0 116 | 117 | 118 | ################### Metric ##################################### 119 | 120 | def metric(Param, Coord, i, j, up=False, 121 | Kerr_KerrSchild__t=0, Kerr_KerrSchild__x=0, 122 | Kerr_KerrSchild__y=0, Kerr_KerrSchild__z=0): 123 | 124 | m, a, boostv = Param 125 | t, x, y, z = Coord 126 | 127 | gamma = 1 / (1 - boostv**2)**0.5 128 | 129 | t0 = gamma * ((t - Kerr_KerrSchild__t) - boostv * (z - Kerr_KerrSchild__z)) 130 | z0 = gamma * ((z - Kerr_KerrSchild__z) - boostv * (t - Kerr_KerrSchild__t)) 131 | x0 = x - Kerr_KerrSchild__x 132 | y0 = y - Kerr_KerrSchild__y 133 | 134 | rho02 = x0**2 + y0**2 + z0**2 135 | 136 | r02 = 0.5 * (rho02 - a**2) + (0.25 * (rho02 - a**2)**2 + a**2 * z0**2)**0.5 137 | r0 = r02**0.5 138 | costheta0 = z0 / r0 139 | 140 | hh = m * r0 / (r0**2 + a**2 * costheta0**2) 141 | 142 | lt0 = 1 143 | lx0 = (r0 * x0 + a * y0) / (r0**2 + a**2) 144 | ly0 = (r0 * y0 - a * x0) / (r0**2 + a**2) 145 | lz0 = z0 / r0 146 | 147 | lt = gamma * (lt0 - boostv * lz0) 148 | lz = gamma * (lz0 - boostv * lt0) 149 | lx = lx0 150 | ly = ly0 151 | 152 | gdtt = - 1 + 2 * hh * lt * lt 153 | gdtx = 2 * hh * lt * lx 154 | gdty = 2 * hh * lt * ly 155 | gdtz = 2 * hh * lt * lz 156 | gdxx = 1 + 2 * hh * lx * lx 157 | gdyy = 1 + 2 * hh * ly * ly 158 | gdzz = 1 + 2 * hh * lz * lz 159 | gdxy = 2 * hh * lx * ly 160 | gdyz = 2 * hh * ly * lz 161 | gdzx = 2 * hh * lz * lx 162 | 163 | gutt = - 1 - 2 * hh * lt * lt 164 | gutx = 2 * hh * lt * lx 165 | guty = 2 * hh * lt * ly 166 | gutz = 2 * hh * lt * lz 167 | guxx = 1 - 2 * hh * lx * lx 168 | guyy = 1 - 2 * hh * ly * ly 169 | guzz = 1 - 2 * hh * lz * lz 170 | guxy = - 2 * hh * lx * ly 171 | guyz = - 2 * hh * ly * lz 172 | guzx = - 2 * hh * lz * lx 173 | 174 | g_down = [[gdtt, gdtx, gdty, gdtz], 175 | [gdtx, gdxx, gdxy, gdzx], 176 | [gdty, gdxy, gdyy, gdyz], 177 | [gdtz, gdzx, gdyz, gdzz]] 178 | 179 | g_up = [[gutt, gutx, guty, gutz], 180 | [gutx, guxx, guxy, guzx], 181 | [guty, guxy, guyy, guyz], 182 | [gutz, guzx, guyz, guzz]] 183 | 184 | if up: 185 | return g_up[i][j] 186 | return g_down[i][j] 187 | 188 | 189 | ##################### Metric derivatives ############################# 190 | 191 | @lru_cache(1024) 192 | def dm(Param, Coord, i, j, wrt, up=False): 193 | """ This computes metric derivatives. wrt = 0,1,2,3 is derivative "with respect to" which coordinate; i,j are spacetime indices. (Yes, I use i and j for something that range from 0-3) """ 194 | point_d = Coord[wrt] 195 | 196 | point_0 = dual(Coord[0], 0) 197 | point_1 = dual(Coord[1], 0) 198 | point_2 = dual(Coord[2], 0) 199 | point_3 = dual(Coord[3], 0) 200 | 201 | if wrt == 0: 202 | return dif( 203 | lambda p: metric( 204 | Param, (p, point_1, point_2, point_3), i, j, up 205 | ), 206 | point_d, 207 | ) 208 | elif wrt == 1: 209 | return dif( 210 | lambda p: metric( 211 | Param, (point_0, p, point_2, point_3), i, j, up 212 | ), 213 | point_d, 214 | ) 215 | elif wrt == 2: 216 | return dif( 217 | lambda p: metric( 218 | Param, (point_0, point_1, p, point_3), i, j, up 219 | ), 220 | point_d, 221 | ) 222 | elif wrt == 3: 223 | return dif( 224 | lambda p: metric( 225 | Param, (point_0, point_1, point_2, p), i, j, up 226 | ), 227 | point_d, 228 | ) 229 | 230 | 231 | ##################### Christoffel Symbols ############################# 232 | 233 | @lru_cache(1024) 234 | def Chris_anc_A(Param, Coord, i, m, k, l): 235 | return ( 236 | metric(Param, Coord, i, m, up=True) 237 | * dm(Param, Coord, m, k, l) 238 | ) 239 | 240 | @lru_cache(1024) 241 | def Chris_anc_B(Param, Coord, i, m, k, l): 242 | return ( 243 | metric(Param, Coord, i, m, up=True) 244 | * dm(Param, Coord, m, l, k) 245 | ) 246 | 247 | @lru_cache(1024) 248 | def Chris_anc_C(Param, Coord, i, m, k, l): 249 | return ( 250 | metric(Param, Coord, i, m, up=True) 251 | * dm(Param, Coord, k, l, m) 252 | ) 253 | 254 | def Christoffel(Param, Coord, i, k, l): 255 | """ Gamma^i_kl """ 256 | Term1 = ( 257 | Chris_anc_A(Param, Coord, i, 0, k, l) 258 | + Chris_anc_A(Param, Coord, i, 1, k, l) 259 | + Chris_anc_A(Param, Coord, i, 2, k, l) 260 | + Chris_anc_A(Param, Coord, i, 3, k, l) 261 | ) 262 | Term2 = ( 263 | Chris_anc_B(Param, Coord, i, 0, k, l) 264 | + Chris_anc_B(Param, Coord, i, 1, k, l) 265 | + Chris_anc_B(Param, Coord, i, 2, k, l) 266 | + Chris_anc_B(Param, Coord, i, 3, k, l) 267 | ) 268 | Term3 = ( 269 | Chris_anc_C(Param, Coord, i, 0, k, l) 270 | + Chris_anc_C(Param, Coord, i, 1, k, l) 271 | + Chris_anc_C(Param, Coord, i, 2, k, l) 272 | + Chris_anc_C(Param, Coord, i, 3, k, l) 273 | ) 274 | 275 | return 0.5 * (Term1 + Term2 - Term3) 276 | -------------------------------------------------------------------------------- /sim-org/gray.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | #include "gray.h" 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | #include /* For access and F_OK */ 28 | #include 29 | 30 | inline static real time_at_snapshot(Lux_job *ego, int snapshot_number){ 31 | /* Return the time corresponding to the given snapshot */ 32 | 33 | /* Times are saved as chars, so we need to do operations with this 34 | * data type. This is used to read the times in the HDF5 files. */ 35 | char *rem; 36 | 37 | return strtod(EGO->available_times[snapshot_number], &rem); 38 | } 39 | 40 | 41 | static size_t find_snapshot(Lux_job *ego, real t){ 42 | /* Find snapshot number so that t1 <= t <= t2, where t1 is the time 43 | * corresponding to the snapshot number */ 44 | 45 | real t1, t2; 46 | 47 | /* We assume that snapshots they are ordered from the min to the max. */ 48 | size_t snap = -1; 49 | /* We have already performed all the necessary checks, so this loop should 50 | * be well defined. */ 51 | do{ 52 | snap++; 53 | t1 = time_at_snapshot(ego, snap); 54 | t2 = time_at_snapshot(ego, snap + 1); 55 | /* It has to be that slow_light_t2 > slow_light_t1 */ 56 | }while(!(t >= t1 && t <= t2)); 57 | 58 | return snap; 59 | } 60 | 61 | 62 | static int 63 | _conf(Lux_job *ego, const char *restrict arg) 64 | { 65 | /** \page newopts New Run-Time Options 66 | ** 67 | ** Turn hard-wired constants into run-time options 68 | ** 69 | ** GRay2 uses the lux framework and hence follows lux's 70 | ** approach to support many run-time options. To turn 71 | ** hard-wired constants into run-time options, one needs to 72 | ** 73 | ** -# Add an option table ".opts" file in the "sim/" 74 | ** directory. 75 | ** -# Embed the automatically generated structure to `struct 76 | ** gray` in "sim/gray.h" 77 | ** -# Logically `&&` the automatically generated configure 78 | ** function to the return values of `_conf()` in 79 | ** "sim/gray.c". 80 | **/ 81 | int invalid; 82 | real *nu; 83 | 84 | lux_debug("GRay2: configuring instance %p with \"%s\"\n", ego, arg); 85 | 86 | nu = EGO->param.nu; /* save the previous nu */ 87 | 88 | invalid = (icond_config(&EGO->icond, arg) && 89 | param_config(&EGO->param, arg) && 90 | setup_config(&EGO->setup, arg)); 91 | 92 | if(EGO->param.nu != nu) { /* nu was configured */ 93 | if(nu) 94 | free(nu); /* avoid memory leackage by freeing the old nu */ 95 | 96 | nu = EGO->param.nu; 97 | if(isnan(nu[0])) { 98 | lux_print("nu: []\n"); 99 | EGO->n_freq = 0; 100 | } else { 101 | size_t n; 102 | lux_print("nu: [%f", nu[0]); 103 | for(n = 1; !isnan(nu[n]); ++n) 104 | lux_print(", %f", nu[n]); 105 | lux_print("]\n"); 106 | EGO->n_freq = n; 107 | } 108 | } 109 | 110 | return invalid; 111 | } 112 | 113 | static int 114 | _init(Lux_job *ego) 115 | { 116 | Lux_opencl *ocl; /* to be loaded */ 117 | 118 | struct param *p = &EGO->param; 119 | struct setup *s = &EGO->setup; 120 | 121 | const size_t sz = s->precision; 122 | const size_t n_rays = p->h_rays * p->w_rays; 123 | const size_t n_data = EGO->n_coor + EGO->n_freq * 2; 124 | const size_t n_info = EGO->n_info; 125 | 126 | cl_mem_flags flags = CL_MEM_READ_WRITE; 127 | 128 | lux_debug("GRay2: initializing instance %p\n", ego); 129 | 130 | CKR(EGO->ocl = ocl = build(ego), cleanup1); 131 | CKR(EGO->data = ocl->mk(ocl, sz * n_rays * n_data, flags), cleanup2); 132 | CKR(EGO->info = ocl->mk(ocl, sz * n_rays * n_info, flags), cleanup3); 133 | CKR(EGO->evolve = ocl->mkkern(ocl, "evolve_drv"), cleanup4); 134 | 135 | return EXIT_SUCCESS; 136 | 137 | cleanup4: 138 | ocl->rm(ocl, EGO->info); 139 | cleanup3: 140 | ocl->rm(ocl, EGO->data); 141 | cleanup2: 142 | lux_unload(EGO->ocl); 143 | cleanup1: 144 | return EXIT_FAILURE; 145 | } 146 | 147 | static int 148 | _exec(Lux_job *ego) 149 | { 150 | struct param *p = &EGO->param; 151 | struct setup *s = &EGO->setup; 152 | 153 | const size_t n_rays = p->h_rays * p->w_rays; 154 | 155 | size_t i = s->i_init; 156 | const size_t n_sub = s->n_sub; 157 | const size_t n_dump = s->n_dump; 158 | 159 | const real t_init = s->t_init; 160 | const real dt_dump = s->dt_dump; 161 | /* If we are working with slow light, these are the two extrema. */ 162 | real slow_light_t1, slow_light_t2; 163 | 164 | size_t frozen_spacetime = p->enable_fast_light; 165 | size_t only_one_snapshot = 0; 166 | 167 | lux_debug("GRay2: executing instance %p\n", ego); 168 | 169 | lux_print("GRay2: Reading spacetime from file %s\n", p->dyst_file); 170 | 171 | /* We perform basic checks here */ 172 | lux_check_failure_code(access(p->dyst_file, F_OK), cleanup1); 173 | hid_t file_id = H5Fopen(p->dyst_file, H5F_ACC_RDONLY, H5P_DEFAULT); 174 | if (file_id == -1) goto cleanup2; 175 | 176 | /* We list all the available times in the file */ 177 | lux_check_failure_code(populate_ego_available_times(ego), cleanup3); 178 | 179 | /* We load the coordinates */ 180 | lux_check_failure_code(load_coordinates(ego), cleanup3); 181 | 182 | /* If max_available_time is equal to the first time available, it 183 | * means that it is the only one. */ 184 | 185 | real min_available_time = time_at_snapshot(ego, 0); 186 | if (EGO->max_available_time == min_available_time){ 187 | lux_print("Found only one time in data, freezing spacetime\n"); 188 | only_one_snapshot = 1; 189 | frozen_spacetime = 1; 190 | }else{ 191 | /* It does not make sense to perform the integration if we don't have 192 | * the desired initial time and final in range, unless we only have one 193 | * time snapshot. */ 194 | if ((t_init < min_available_time || t_init > EGO->max_available_time)){ 195 | lux_print("ERROR: t_init (%4.1f) is outside domain of the data (%5.1f, %5.1f)\n", 196 | t_init, min_available_time, EGO->max_available_time); 197 | return EXIT_FAILURE; 198 | } 199 | real t_final = t_init + (i+1) * dt_dump * n_dump; 200 | if ((t_final < min_available_time || t_final > EGO->max_available_time)){ 201 | lux_print("ERROR: t_final (%4.1f) is outside domain of the data (%5.1f, %5.1f)\n", 202 | t_final, min_available_time, EGO->max_available_time); 203 | return EXIT_FAILURE; 204 | } 205 | } 206 | 207 | /* Snapshot of interest */ 208 | size_t snap_number; 209 | 210 | if (frozen_spacetime){ 211 | lux_print("Assuming fast light\n"); 212 | 213 | /* If we have only one snapshot, then we must read it (it has index 0). */ 214 | snap_number = 0 ? only_one_snapshot: find_snapshot(ego, t_init); 215 | 216 | /* 1 here means "load in t1" */ 217 | lux_check_failure_code(load_snapshot(ego, snap_number, 1), cleanup3); 218 | 219 | /* We have to fill t2 with something, otherwise it will produce errors. 220 | * We fill with the same data as t1. We do not use copy_snapshot, because 221 | * the function assumes that the EGO already contains valid images. 222 | * Here 0 means "to_t2" */ 223 | lux_check_failure_code(load_snapshot(ego, snap_number, 0), cleanup3); 224 | 225 | /* Next, we disable time interpolation by setting the two time extrema 226 | * of the bounding box to be the same */ 227 | EGO->bounding_box.s0 = 0; 228 | EGO->bounding_box.s4 = 0; 229 | }else{ 230 | lux_print("Working with slow light\n"); 231 | /* Here we read the snapshot at t1 and t2 so that they contain t_init. */ 232 | snap_number = find_snapshot(ego, t_init); 233 | slow_light_t1 = time_at_snapshot(ego, snap_number); 234 | slow_light_t2 = time_at_snapshot(ego, snap_number + 1); 235 | /* 1 here means "load in t1" */ 236 | lux_check_failure_code(load_snapshot(ego, snap_number, 1), cleanup3); 237 | /* 0 here means "load in t2" */ 238 | lux_check_failure_code(load_snapshot(ego, snap_number + 1, 0), cleanup3); 239 | EGO->bounding_box.s0 = slow_light_t1; 240 | EGO->bounding_box.s4 = slow_light_t2; 241 | } 242 | 243 | lux_print("%zu: initialize at %4.1f", i, t_init); 244 | icond(ego, t_init); 245 | dump (ego, i); 246 | lux_print(": DONE\n"); 247 | 248 | while(i < n_dump) { 249 | real ns, t, target; 250 | 251 | t = t_init + i * dt_dump; 252 | target = t_init + (++i) * dt_dump; 253 | 254 | lux_print("%zu: %4.1f -> %4.1f", i, t, target); 255 | ns = evolve(ego, t, target, n_sub); 256 | dump(ego, i); 257 | lux_print(": DONE (%.3gns/step/ray)\n", ns/n_sub/n_rays); 258 | 259 | /* If we are not freezing the spacetime, we need to change the snapshots */ 260 | if (!frozen_spacetime && (target < slow_light_t1 || target > slow_light_t2)){ 261 | 262 | /* If snap_number is off by 1 compared to old_snap_number, this 263 | * means that we can read only one of the two snapshots and copy 264 | * over the other one. If it is off by more than 1, then we have to 265 | * read them both. */ 266 | size_t old_snap_number = snap_number; 267 | snap_number = find_snapshot(ego, target); 268 | slow_light_t1 = time_at_snapshot(ego, snap_number); 269 | slow_light_t2 = time_at_snapshot(ego, snap_number + 1); 270 | 271 | if (snap_number == old_snap_number + 1){ 272 | /* In this case, the old t2 has to become the new t1. Here 1 273 | * means "copy to t1" */ 274 | copy_snapshot(ego, 1); 275 | /* 0 here means "load in t2" */ 276 | lux_check_failure_code(load_snapshot(ego, snap_number + 1, 0), cleanup3); 277 | }else if (snap_number == old_snap_number - 1){ 278 | /* In this case, the old t1 has to become the new t2. Here 0 279 | * means "copy to t2" */ 280 | copy_snapshot(ego, 0); 281 | /* 1 here means "load in t1" */ 282 | lux_check_failure_code(load_snapshot(ego, snap_number, 1), cleanup3); 283 | }else{ 284 | /* We have to read them both */ 285 | /* 1 here means "load in t1" */ 286 | lux_check_failure_code(load_snapshot(ego, snap_number, 1), cleanup3); 287 | /* 0 here means "load in t2" */ 288 | lux_check_failure_code(load_snapshot(ego, snap_number + 1, 0), cleanup3); 289 | } 290 | /* Update bounding box */ 291 | EGO->bounding_box.s0 = slow_light_t1; 292 | EGO->bounding_box.s4 = slow_light_t2; 293 | } 294 | } 295 | 296 | return EXIT_SUCCESS; 297 | 298 | cleanup1: 299 | lux_print("ERROR: File %s could not be read\n", p->dyst_file); 300 | return EXIT_FAILURE; 301 | cleanup2: 302 | lux_print("ERROR: File %s is not a valid HDF5 file\n", p->dyst_file); 303 | return EXIT_FAILURE; 304 | cleanup3: 305 | return EXIT_FAILURE; 306 | } 307 | 308 | void * 309 | LUX_MKMOD(const void *opts) 310 | { 311 | void *ego; 312 | 313 | lux_debug("GRay2: constructing with options %p\n", opts); 314 | 315 | ego = zalloc(sizeof(struct gray)); 316 | if(ego) { 317 | EGO->super.conf = _conf; 318 | EGO->super.init = _init; 319 | EGO->super.exec = _exec; 320 | icond_init(&EGO->icond); 321 | param_init(&EGO->param); 322 | setup_init(&EGO->setup); 323 | EGO->n_coor = 8; /** \todo Adjust n_coor using setup.coordinates. */ 324 | EGO->n_freq = 0; 325 | EGO->n_info = 1; /** \todo Adjust n_info using setup.coordinates. */ 326 | } 327 | return ego; 328 | } 329 | 330 | void 331 | LUX_RMMOD(void *ego) 332 | { 333 | Lux_opencl *ocl = EGO->ocl; 334 | 335 | lux_debug("GRay2: destructing instance %p\n", ego); 336 | 337 | if(EGO->evolve) 338 | ocl->rmkern(ocl, EGO->evolve); 339 | if(EGO->info) 340 | ocl->rm(ocl, EGO->info); 341 | if(EGO->data) 342 | ocl->rm(ocl, EGO->data); 343 | if(EGO->ocl) 344 | lux_unload(EGO->ocl); 345 | free(ego); 346 | } 347 | -------------------------------------------------------------------------------- /doc/Doxyfile: -------------------------------------------------------------------------------- 1 | # Doxyfile 1.8.11 2 | 3 | #--------------------------------------------------------------------------- 4 | # Project related configuration options 5 | #--------------------------------------------------------------------------- 6 | DOXYFILE_ENCODING = UTF-8 7 | PROJECT_NAME = GRay 8 | PROJECT_NUMBER = 2.0-alpha 9 | PROJECT_BRIEF = 10 | PROJECT_LOGO = 11 | OUTPUT_DIRECTORY = 12 | CREATE_SUBDIRS = NO 13 | ALLOW_UNICODE_NAMES = NO 14 | OUTPUT_LANGUAGE = English 15 | BRIEF_MEMBER_DESC = YES 16 | REPEAT_BRIEF = YES 17 | ABBREVIATE_BRIEF = 18 | ALWAYS_DETAILED_SEC = NO 19 | INLINE_INHERITED_MEMB = NO 20 | FULL_PATH_NAMES = NO 21 | STRIP_FROM_PATH = 22 | STRIP_FROM_INC_PATH = 23 | SHORT_NAMES = NO 24 | JAVADOC_AUTOBRIEF = YES 25 | QT_AUTOBRIEF = NO 26 | MULTILINE_CPP_IS_BRIEF = NO 27 | INHERIT_DOCS = YES 28 | SEPARATE_MEMBER_PAGES = NO 29 | TAB_SIZE = 8 30 | ALIASES = 31 | TCL_SUBST = 32 | OPTIMIZE_OUTPUT_FOR_C = YES 33 | OPTIMIZE_OUTPUT_JAVA = NO 34 | OPTIMIZE_FOR_FORTRAN = NO 35 | OPTIMIZE_OUTPUT_VHDL = NO 36 | EXTENSION_MAPPING = cl=C++ 37 | MARKDOWN_SUPPORT = YES 38 | AUTOLINK_SUPPORT = YES 39 | BUILTIN_STL_SUPPORT = NO 40 | CPP_CLI_SUPPORT = NO 41 | SIP_SUPPORT = NO 42 | IDL_PROPERTY_SUPPORT = YES 43 | DISTRIBUTE_GROUP_DOC = NO 44 | GROUP_NESTED_COMPOUNDS = NO 45 | SUBGROUPING = YES 46 | INLINE_GROUPED_CLASSES = NO 47 | INLINE_SIMPLE_STRUCTS = NO 48 | TYPEDEF_HIDES_STRUCT = NO 49 | LOOKUP_CACHE_SIZE = 0 50 | #--------------------------------------------------------------------------- 51 | # Build related configuration options 52 | #--------------------------------------------------------------------------- 53 | EXTRACT_ALL = YES 54 | EXTRACT_PRIVATE = NO 55 | EXTRACT_PACKAGE = NO 56 | EXTRACT_STATIC = YES 57 | EXTRACT_LOCAL_CLASSES = YES 58 | EXTRACT_LOCAL_METHODS = NO 59 | EXTRACT_ANON_NSPACES = NO 60 | HIDE_UNDOC_MEMBERS = NO 61 | HIDE_UNDOC_CLASSES = NO 62 | HIDE_FRIEND_COMPOUNDS = NO 63 | HIDE_IN_BODY_DOCS = NO 64 | INTERNAL_DOCS = NO 65 | CASE_SENSE_NAMES = YES 66 | HIDE_SCOPE_NAMES = NO 67 | HIDE_COMPOUND_REFERENCE= NO 68 | SHOW_INCLUDE_FILES = YES 69 | SHOW_GROUPED_MEMB_INC = NO 70 | FORCE_LOCAL_INCLUDES = NO 71 | INLINE_INFO = YES 72 | SORT_MEMBER_DOCS = YES 73 | SORT_BRIEF_DOCS = NO 74 | SORT_MEMBERS_CTORS_1ST = NO 75 | SORT_GROUP_NAMES = NO 76 | SORT_BY_SCOPE_NAME = NO 77 | STRICT_PROTO_MATCHING = NO 78 | GENERATE_TODOLIST = YES 79 | GENERATE_TESTLIST = YES 80 | GENERATE_BUGLIST = YES 81 | GENERATE_DEPRECATEDLIST= YES 82 | ENABLED_SECTIONS = 83 | MAX_INITIALIZER_LINES = 30 84 | SHOW_USED_FILES = YES 85 | SHOW_FILES = YES 86 | SHOW_NAMESPACES = YES 87 | FILE_VERSION_FILTER = 88 | LAYOUT_FILE = 89 | CITE_BIB_FILES = 90 | #--------------------------------------------------------------------------- 91 | # Configuration options related to warning and progress messages 92 | #--------------------------------------------------------------------------- 93 | QUIET = NO 94 | WARNINGS = YES 95 | WARN_IF_UNDOCUMENTED = YES 96 | WARN_IF_DOC_ERROR = YES 97 | WARN_NO_PARAMDOC = NO 98 | WARN_AS_ERROR = NO 99 | WARN_FORMAT = "$file:$line: $text" 100 | WARN_LOGFILE = 101 | #--------------------------------------------------------------------------- 102 | # Configuration options related to the input files 103 | #--------------------------------------------------------------------------- 104 | INPUT = ../README.md ../sim 105 | INPUT_ENCODING = UTF-8 106 | FILE_PATTERNS = *.h *.c *.cl 107 | RECURSIVE = YES 108 | EXCLUDE = 109 | EXCLUDE_SYMLINKS = NO 110 | EXCLUDE_PATTERNS = 111 | EXCLUDE_SYMBOLS = 112 | EXAMPLE_PATH = 113 | EXAMPLE_PATTERNS = 114 | EXAMPLE_RECURSIVE = NO 115 | IMAGE_PATH = 116 | INPUT_FILTER = 117 | FILTER_PATTERNS = 118 | FILTER_SOURCE_FILES = NO 119 | FILTER_SOURCE_PATTERNS = 120 | USE_MDFILE_AS_MAINPAGE = 121 | #--------------------------------------------------------------------------- 122 | # Configuration options related to source browsing 123 | #--------------------------------------------------------------------------- 124 | SOURCE_BROWSER = YES 125 | INLINE_SOURCES = NO 126 | STRIP_CODE_COMMENTS = NO 127 | REFERENCED_BY_RELATION = NO 128 | REFERENCES_RELATION = NO 129 | REFERENCES_LINK_SOURCE = YES 130 | SOURCE_TOOLTIPS = YES 131 | USE_HTAGS = NO 132 | VERBATIM_HEADERS = YES 133 | #--------------------------------------------------------------------------- 134 | # Configuration options related to the alphabetical class index 135 | #--------------------------------------------------------------------------- 136 | ALPHABETICAL_INDEX = YES 137 | COLS_IN_ALPHA_INDEX = 5 138 | IGNORE_PREFIX = 139 | #--------------------------------------------------------------------------- 140 | # Configuration options related to the HTML output 141 | #--------------------------------------------------------------------------- 142 | GENERATE_HTML = YES 143 | HTML_OUTPUT = html 144 | HTML_FILE_EXTENSION = .html 145 | HTML_HEADER = 146 | HTML_FOOTER = 147 | HTML_STYLESHEET = 148 | HTML_EXTRA_STYLESHEET = 149 | HTML_EXTRA_FILES = 150 | HTML_COLORSTYLE_HUE = 220 151 | HTML_COLORSTYLE_SAT = 100 152 | HTML_COLORSTYLE_GAMMA = 80 153 | HTML_TIMESTAMP = NO 154 | HTML_DYNAMIC_SECTIONS = NO 155 | HTML_INDEX_NUM_ENTRIES = 100 156 | GENERATE_DOCSET = NO 157 | DOCSET_FEEDNAME = "Doxygen generated docs" 158 | DOCSET_BUNDLE_ID = org.doxygen.Project 159 | DOCSET_PUBLISHER_ID = org.doxygen.Publisher 160 | DOCSET_PUBLISHER_NAME = Publisher 161 | GENERATE_HTMLHELP = NO 162 | CHM_FILE = 163 | HHC_LOCATION = 164 | GENERATE_CHI = NO 165 | CHM_INDEX_ENCODING = 166 | BINARY_TOC = NO 167 | TOC_EXPAND = NO 168 | GENERATE_QHP = NO 169 | QCH_FILE = 170 | QHP_NAMESPACE = org.doxygen.Project 171 | QHP_VIRTUAL_FOLDER = doc 172 | QHP_CUST_FILTER_NAME = 173 | QHP_CUST_FILTER_ATTRS = 174 | QHP_SECT_FILTER_ATTRS = 175 | QHG_LOCATION = 176 | GENERATE_ECLIPSEHELP = NO 177 | ECLIPSE_DOC_ID = org.doxygen.Project 178 | DISABLE_INDEX = NO 179 | GENERATE_TREEVIEW = NO 180 | ENUM_VALUES_PER_LINE = 4 181 | TREEVIEW_WIDTH = 250 182 | EXT_LINKS_IN_WINDOW = NO 183 | FORMULA_FONTSIZE = 10 184 | FORMULA_TRANSPARENT = YES 185 | USE_MATHJAX = YES 186 | MATHJAX_FORMAT = HTML-CSS 187 | MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest 188 | MATHJAX_EXTENSIONS = 189 | MATHJAX_CODEFILE = 190 | SEARCHENGINE = YES 191 | SERVER_BASED_SEARCH = NO 192 | EXTERNAL_SEARCH = NO 193 | SEARCHENGINE_URL = 194 | SEARCHDATA_FILE = searchdata.xml 195 | EXTERNAL_SEARCH_ID = 196 | EXTRA_SEARCH_MAPPINGS = 197 | #--------------------------------------------------------------------------- 198 | # Configuration options related to the LaTeX output 199 | #--------------------------------------------------------------------------- 200 | GENERATE_LATEX = YES 201 | LATEX_OUTPUT = latex 202 | LATEX_CMD_NAME = latex 203 | MAKEINDEX_CMD_NAME = makeindex 204 | COMPACT_LATEX = NO 205 | PAPER_TYPE = a4 206 | EXTRA_PACKAGES = 207 | LATEX_HEADER = 208 | LATEX_FOOTER = 209 | LATEX_EXTRA_STYLESHEET = 210 | LATEX_EXTRA_FILES = 211 | PDF_HYPERLINKS = YES 212 | USE_PDFLATEX = YES 213 | LATEX_BATCHMODE = NO 214 | LATEX_HIDE_INDICES = NO 215 | LATEX_SOURCE_CODE = NO 216 | LATEX_BIB_STYLE = plain 217 | LATEX_TIMESTAMP = NO 218 | #--------------------------------------------------------------------------- 219 | # Configuration options related to the RTF output 220 | #--------------------------------------------------------------------------- 221 | GENERATE_RTF = NO 222 | RTF_OUTPUT = rtf 223 | COMPACT_RTF = NO 224 | RTF_HYPERLINKS = NO 225 | RTF_STYLESHEET_FILE = 226 | RTF_EXTENSIONS_FILE = 227 | RTF_SOURCE_CODE = NO 228 | #--------------------------------------------------------------------------- 229 | # Configuration options related to the man page output 230 | #--------------------------------------------------------------------------- 231 | GENERATE_MAN = NO 232 | MAN_OUTPUT = man 233 | MAN_EXTENSION = .3 234 | MAN_SUBDIR = 235 | MAN_LINKS = NO 236 | #--------------------------------------------------------------------------- 237 | # Configuration options related to the XML output 238 | #--------------------------------------------------------------------------- 239 | GENERATE_XML = NO 240 | XML_OUTPUT = xml 241 | XML_PROGRAMLISTING = YES 242 | #--------------------------------------------------------------------------- 243 | # Configuration options related to the DOCBOOK output 244 | #--------------------------------------------------------------------------- 245 | GENERATE_DOCBOOK = NO 246 | DOCBOOK_OUTPUT = docbook 247 | DOCBOOK_PROGRAMLISTING = NO 248 | #--------------------------------------------------------------------------- 249 | # Configuration options for the AutoGen Definitions output 250 | #--------------------------------------------------------------------------- 251 | GENERATE_AUTOGEN_DEF = NO 252 | #--------------------------------------------------------------------------- 253 | # Configuration options related to the Perl module output 254 | #--------------------------------------------------------------------------- 255 | GENERATE_PERLMOD = NO 256 | PERLMOD_LATEX = NO 257 | PERLMOD_PRETTY = YES 258 | PERLMOD_MAKEVAR_PREFIX = 259 | #--------------------------------------------------------------------------- 260 | # Configuration options related to the preprocessor 261 | #--------------------------------------------------------------------------- 262 | ENABLE_PREPROCESSING = YES 263 | MACRO_EXPANSION = NO 264 | EXPAND_ONLY_PREDEF = NO 265 | SEARCH_INCLUDES = YES 266 | INCLUDE_PATH = 267 | INCLUDE_FILE_PATTERNS = 268 | PREDEFINED = 269 | EXPAND_AS_DEFINED = 270 | SKIP_FUNCTION_MACROS = YES 271 | #--------------------------------------------------------------------------- 272 | # Configuration options related to external references 273 | #--------------------------------------------------------------------------- 274 | TAGFILES = 275 | GENERATE_TAGFILE = 276 | ALLEXTERNALS = NO 277 | EXTERNAL_GROUPS = YES 278 | EXTERNAL_PAGES = YES 279 | PERL_PATH = /usr/bin/perl 280 | #--------------------------------------------------------------------------- 281 | # Configuration options related to the dot tool 282 | #--------------------------------------------------------------------------- 283 | CLASS_DIAGRAMS = YES 284 | MSCGEN_PATH = 285 | DIA_PATH = 286 | HIDE_UNDOC_RELATIONS = YES 287 | HAVE_DOT = YES 288 | DOT_NUM_THREADS = 0 289 | DOT_FONTNAME = Helvetica 290 | DOT_FONTSIZE = 10 291 | DOT_FONTPATH = 292 | CLASS_GRAPH = YES 293 | COLLABORATION_GRAPH = YES 294 | GROUP_GRAPHS = YES 295 | UML_LOOK = NO 296 | UML_LIMIT_NUM_FIELDS = 10 297 | TEMPLATE_RELATIONS = NO 298 | INCLUDE_GRAPH = YES 299 | INCLUDED_BY_GRAPH = YES 300 | CALL_GRAPH = YES 301 | CALLER_GRAPH = YES 302 | GRAPHICAL_HIERARCHY = YES 303 | DIRECTORY_GRAPH = YES 304 | DOT_IMAGE_FORMAT = png 305 | INTERACTIVE_SVG = NO 306 | DOT_PATH = 307 | DOTFILE_DIRS = 308 | MSCFILE_DIRS = 309 | DIAFILE_DIRS = 310 | PLANTUML_JAR_PATH = 311 | PLANTUML_INCLUDE_PATH = 312 | DOT_GRAPH_MAX_NODES = 50 313 | MAX_DOT_GRAPH_DEPTH = 0 314 | DOT_TRANSPARENT = NO 315 | DOT_MULTI_TARGETS = NO 316 | GENERATE_LEGEND = YES 317 | DOT_CLEANUP = YES 318 | -------------------------------------------------------------------------------- /sim-org/KS.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Chi-kwan Chan 3 | * Copyright (C) 2016 Steward Observatory 4 | * 5 | * This file is part of GRay2. 6 | * 7 | * GRay2 is free software: you can redistribute it and/or modify it 8 | * under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 | * License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with GRay2. If not, see . 19 | */ 20 | 21 | /** \file 22 | ** Cartesian Kerr-Schild coordinate specific schemes 23 | ** 24 | ** Implement the coordinate specific functions getuu(), icond(), and 25 | ** rhs() in the Cartesian form of the Kerr-Schild coordiantes. Let 26 | ** \f$t\f$, \f$x\f$, \f$y\f$, \f$z\f$ be the coordinates, the 27 | ** Cartesian Kerr-Schild metric is given by 28 | ** \f[ 29 | ** g_{\mu\nu} = \gamma_{\mu\nu} + f l_\mu l_\nu 30 | ** \f] 31 | ** where \f$\gamma_{\mu\nu}\f$ is the Minkowski metric, \f$f\f$ and 32 | ** \f$l_\mu\f$ are defined by 33 | ** \f[ 34 | ** f = \frac{2r^3}{r^4 + a^2 z^2} \mbox{ and } 35 | ** l_\mu = \left(1, \frac{rx + ay}{r^2 + a^2}, 36 | ** \frac{ry - ax}{r^2 + a^2}, 37 | ** \frac{z}{r}\right), 38 | ** \f] 39 | ** respectively, and \f$r\f$ is defined implicitly by\f$ x^2 + y^2 + 40 | ** z^2 = r^2 + a^2 (1 - z^2 / r^2)\f$. 41 | **/ 42 | 43 | struct gr { 44 | real4 q; 45 | real4 u; 46 | }; 47 | 48 | real 49 | getrr(real4 q) 50 | { 51 | real aa = a_spin * a_spin; 52 | real zz = q.s3 * q.s3; 53 | real kk = K(0.5) * (q.s1 * q.s1 + q.s2 * q.s2 + zz - aa); 54 | return sqrt(kk * kk + aa * zz) + kk; 55 | } 56 | 57 | real 58 | geteps(real4 q) 59 | { 60 | return sqrt(getrr(q)) - (1.0 + sqrt(1.0 - a_spin * a_spin)); 61 | } 62 | 63 | real4 64 | getsphKS(real4 q) 65 | { 66 | real r = sqrt(getrr(q)); 67 | real theta = acos(q.s3 / r); 68 | real phi = atan2(q.s2 * r + q.s1 * a_spin, 69 | q.s1 * r - q.s2 * a_spin); 70 | return (real4){q.s0, r, theta, phi}; 71 | } 72 | 73 | real4 74 | getBL(real4 q) 75 | { 76 | q = getsphKS(q); 77 | 78 | real h = sqrt(K(1.0) - a_spin * a_spin); 79 | real rp = 1.0 + h; 80 | real rm = 1.0 - h; 81 | real r = q.s1; 82 | 83 | return (real4){ 84 | q.s0 + (rm * log((r-rm)/(r_match-rm)) - rp * log((r-rp)/(r_match-rp))) / h, 85 | q.s1, 86 | q.s2, 87 | q.s3 + K(0.5) * a_spin * log((r-rm)/(r-rp)) / h /* use r = inf for matching the phi coordinate */ 88 | }; 89 | } 90 | 91 | real4 92 | down(real4 q, real4 u) 93 | { 94 | real aa = a_spin * a_spin; 95 | real zz = q.s3 * q.s3; 96 | real kk = K(0.5) * (q.s1 * q.s1 + q.s2 * q.s2 + zz - aa); 97 | real rr = sqrt(kk * kk + aa * zz) + kk; 98 | real r = sqrt(rr); 99 | 100 | real f = K(2.0) * rr * r / (rr * rr + aa * zz); 101 | real lx = (r * q.s1 + a_spin * q.s2) / (rr + aa); 102 | real ly = (r * q.s2 - a_spin * q.s1) / (rr + aa); 103 | real lz = q.s3 / r; 104 | 105 | real4 gt = {-1 + f , f* lx, f* ly, f* lz}; 106 | real4 gx = { f*lx, 1 + f*lx*lx, f*lx*ly, f*lx*lz}; 107 | real4 gy = { f*ly, f*ly*lx, 1 + f*ly*ly, f*ly*lz}; 108 | real4 gz = { f*lz, f*lz*lx, f*lz*ly, 1 + f*lz*lz}; 109 | 110 | return (real4){dot(gt, u), 111 | dot(gx, u), 112 | dot(gy, u), 113 | dot(gz, u)}; 114 | } 115 | 116 | real4 117 | getsphKSu(real4 q, real4 u) 118 | { 119 | real aa = a_spin * a_spin; 120 | real RR = q.s1 * q.s1 + q.s2 * q.s2; 121 | real zz = q.s3 * q.s3; 122 | real kk = K(0.5) * (RR + zz - aa); 123 | real dd = sqrt(kk * kk + aa * zz); 124 | real rr = dd + kk; 125 | real r = sqrt(rr); 126 | 127 | real4 J1 = ((real4){0, rr, rr, rr + aa} * q) / (K(2.0) * r * dd); 128 | real4 J2 = ((q.s3 / r) * J1 - (real4){0, 0, 0, 1}) / sqrt(rr - zz); 129 | real4 J3 = (-a_spin / (rr + aa)) * J1 + (real4){0, -q.s2, q.s1, 0} / RR; 130 | 131 | return (real4){u.s0, 132 | dot(J1, u), 133 | dot(J2, u), 134 | dot(J3, u)}; 135 | } 136 | 137 | real4 138 | getBLu(real4 q, real4 u) 139 | { 140 | u = getsphKSu(q, u); 141 | 142 | real aa = a_spin * a_spin; 143 | real zz = q.s3 * q.s3; 144 | real kk = K(0.5) * (q.s1 * q.s1 + q.s2 * q.s2 + zz - aa); 145 | real rr = sqrt(kk * kk + aa * zz) + kk; 146 | real r = sqrt(rr); 147 | real D = rr - K(2.0) * r + aa; 148 | 149 | return (real4){u.s0 - u.s3 * K(2.0) * r / D, 150 | u.s1, 151 | u.s2, 152 | u.s3 - u.s0 * a_spin / D}; 153 | } 154 | 155 | /** 156 | ** Sqaure of vector u at the spacetime event q in Kerr-Schild coordiantes 157 | ** 158 | ** Compute \f$u\cdot u \equiv g_{\alpha\beta} u^\alpha u^\beta\f$, 159 | ** where \f$g_{\alpha\beta}\f$ is the Cartesian form of the 160 | ** Kerr-Schild metric. 161 | ** 162 | ** \return The square of u at q 163 | **/ 164 | real 165 | getuu(struct gr g) /**< state of the ray */ 166 | { 167 | return dot(down(g.q, g.u), g.u); 168 | } 169 | 170 | /** 171 | ** Initial conditions of a ray in an image plane 172 | ** 173 | ** To perform ray tracing calculations of an image in Kerr spacetime, 174 | ** we follow Johannsen & Psaltis (2010) and consider an observer 175 | ** viewing the central black hole from a large distance \p r_obs and 176 | ** at an inclination angle \p i_obs from its rotation axis (see 177 | ** Figure 1 of Psaltis & Johannsen 2012). We set up a virtual image 178 | ** plane that is perpendicular to the line of sight and centered at 179 | ** \f$\phi\f$ = \p j_obs of the spacetime. We define the set of 180 | ** local Cartesian coordinates (\p alpha, \p beta) on the image plane 181 | ** such that the \p beta axis is along the same fiducial plane and 182 | ** the \p alpha axis is perpendicular to it. These input parameters 183 | ** define a unique ray, whose initial spacetime position and 184 | ** wavevector are then computed by icond(). 185 | ** 186 | ** \return The initial conditions of a ray 187 | **/ 188 | struct gr 189 | gr_icond(real r_obs, /**< distance of the image from the black hole */ 190 | real i_obs, /**< inclination angle of the image in degrees */ 191 | real j_obs, /**< azimuthal angle of the image in degrees */ 192 | real alpha, /**< one of the local Cartesian coordinates */ 193 | real beta) /**< the other local Cartesian coordinate */ 194 | { 195 | real deg2rad = K(3.14159265358979323846264338327950288) / K(180.0); 196 | real ci, si = sincos(deg2rad * i_obs, &ci); 197 | real cj, sj = sincos(deg2rad * j_obs, &cj); 198 | 199 | real R0 = r_obs * si - beta * ci; /* cylindrical radius */ 200 | real z = r_obs * ci + beta * si; 201 | real y = R0 * sj + alpha * cj; 202 | real x = R0 * cj - alpha * sj; 203 | 204 | real4 q = (real4){0, x, y, z}; 205 | real4 u = (real4){1, si * cj, si * sj, ci}; 206 | 207 | real aa = a_spin * a_spin; 208 | real zz = q.s3 * q.s3; 209 | real kk = K(0.5) * (q.s1 * q.s1 + q.s2 * q.s2 + zz - aa); 210 | real rr = sqrt(kk * kk + aa * zz) + kk; 211 | real r = sqrt(rr); 212 | 213 | real f = K(2.0) * rr * r / (rr * rr + aa * zz); 214 | real lx = (r * q.s1 + a_spin * q.s2) / (rr + aa); 215 | real ly = (r * q.s2 - a_spin * q.s1) / (rr + aa); 216 | real lz = q.s3 / r; 217 | 218 | real4 gt = {-1 + f , f* lx, f* ly, f* lz}; 219 | real4 gx = { f*lx, 1 + f*lx*lx, f*lx*ly, f*lx*lz}; 220 | real4 gy = { f*ly, f*ly*lx, 1 + f*ly*ly, f*ly*lz}; 221 | real4 gz = { f*lz, f*lz*lx, f*lz*ly, 1 + f*lz*lz}; 222 | 223 | real A = gt.s0; 224 | real B = dot(gt.s123, u.s123) * K(2.0); 225 | real C = (dot(gx.s123, u.s123) * u.s1 + 226 | dot(gy.s123, u.s123) * u.s2 + 227 | dot(gz.s123, u.s123) * u.s3); 228 | 229 | u.s123 /= -(B + sqrt(B * B - K(4.0) * A * C)) / (K(2.0) * A); 230 | 231 | return (struct gr){q, u}; 232 | } 233 | 234 | /** 235 | ** Right hand sides of the geodesic equations in Kerr-Schild coordiantes 236 | ** 237 | ** One of the breakthroughs we achieve in GRay2 is that, by a series 238 | ** of mathematical manipulations and regrouping, we significantly 239 | ** reduce the operation count of the geodesic equations in the 240 | ** Cartesian Kerr-Schild coordinates. Let \f$\lambda\f$ be the 241 | ** affine parameter and \f$\dot{x}^\mu \equiv dx^\mu/d\lambda\f$. We 242 | ** show in Chan et al. (2017) that the geodesic equations in the 243 | ** Cartesian KS coordinates can be optimized to the following form: 244 | ** \f[ 245 | ** \ddot{x}^\mu = - \left(\eta^{\mu\beta} \dot{x}^\alpha - 246 | ** \frac{1}{2}\eta^{\mu\alpha} \dot{x}^\beta\right) 247 | ** \dot{x}_{\beta,\alpha} + F l^\mu 248 | ** \f] 249 | ** where 250 | ** \f[ 251 | ** F = f \left(l^\beta \dot{x}^\alpha - 252 | ** \frac{1}{2}l^\alpha \dot{x}^\beta\right) 253 | ** \dot{x}_{\beta,\alpha}. 254 | ** \f] 255 | ** In this new form, the right hand sides (RHS) of the geodesic 256 | ** equations have only ~65% more floating-point operations than in 257 | ** the Boyer-Lindquist coordinates. Furthermore, the evaluation of 258 | ** the RHS uses many matrix-vector products, which are optimized in 259 | ** modern hardwares. 260 | ** 261 | ** \return The right hand sides of the geodesic equations 262 | **/ 263 | struct gr 264 | gr_rhs(struct gr g) /**< state of the ray */ 265 | { 266 | real4 q = g.q; 267 | real4 u = g.u; 268 | 269 | real f, dx_f, dy_f, dz_f; 270 | real lx, dx_lx, dy_lx, dz_lx; 271 | real ly, dx_ly, dy_ly, dz_ly; 272 | real lz, dx_lz, dy_lz, dz_lz; 273 | 274 | real hDxu, hDyu, hDzu; 275 | real4 uD; 276 | real tmp; 277 | 278 | { 279 | real dx_r, dy_r, dz_r; 280 | real r, ir, iss; 281 | { 282 | real aa = a_spin * a_spin; 283 | real rr, tmp2; 284 | { 285 | real zz = q.s3 * q.s3; 286 | real dd; 287 | { 288 | real kk = K(0.5) * (q.s1 * q.s1 + q.s2 * q.s2 + zz - aa); 289 | dd = sqrt(kk * kk + aa * zz); 290 | rr = dd + kk; 291 | } 292 | r = sqrt(rr); 293 | ir = K(1.0) / r; 294 | { 295 | real ss = rr + aa; 296 | iss = K(1.0) / ss; 297 | tmp = K(0.5) / (r * dd); 298 | dz_r = tmp * ss * q.s3; 299 | tmp *= rr; 300 | } 301 | dy_r = tmp * q.s2; 302 | dx_r = tmp * q.s1; 303 | tmp = K(2.0) / (rr + aa * zz / rr); 304 | } 305 | tmp2 = K(3.0) - K(2.0) * rr * tmp; 306 | f = tmp * r; 307 | dx_f = tmp * dx_r * tmp2; 308 | dy_f = tmp * dy_r * tmp2; 309 | dz_f = tmp * (dz_r * tmp2 - tmp * aa * q.s3 * ir); 310 | } /* 48 (-8) FLOPs; estimated FLoating-point OPerations, the number 311 | in the parentheses is (the negative of) the number of FMA */ 312 | { 313 | real m2r = K(-2.0) * r; 314 | real issr = iss * r; 315 | real issa = iss * a_spin; 316 | 317 | lx = iss * (q.s1 * r + q.s2 * a_spin); 318 | tmp = iss * (q.s1 + m2r * lx); 319 | dx_lx = tmp * dx_r + issr; 320 | dy_lx = tmp * dy_r + issa; 321 | dz_lx = tmp * dz_r; 322 | 323 | ly = iss * (q.s2 * r - q.s1 * a_spin); 324 | tmp = iss * (q.s2 + m2r * ly); 325 | dx_ly = tmp * dx_r - issa; 326 | dy_ly = tmp * dy_r + issr; 327 | dz_ly = tmp * dz_r; 328 | 329 | lz = q.s3 * ir; 330 | tmp = -lz * ir; 331 | dx_lz = tmp * dx_r; 332 | dy_lz = tmp * dy_r; 333 | dz_lz = tmp * dz_r + ir; 334 | } /* 35 (-9) FLOPs */ 335 | } 336 | 337 | { 338 | real flu; 339 | real4 Dx, Dy, Dz; 340 | { 341 | real lu = u.s0 + lx * u.s1 + ly * u.s2 + lz * u.s3; 342 | flu = f * lu; 343 | Dx.s0 = dx_f * lu + f * (dx_lx * u.s1 + dx_ly * u.s2 + dx_lz * u.s3); 344 | Dy.s0 = dy_f * lu + f * (dy_lx * u.s1 + dy_ly * u.s2 + dy_lz * u.s3); 345 | Dz.s0 = dz_f * lu + f * (dz_lx * u.s1 + dz_ly * u.s2 + dz_lz * u.s3); /* 31 (-12) FLOPs */ 346 | } 347 | Dx.s1 = Dx.s0 * lx + flu * dx_lx; 348 | Dx.s2 = Dx.s0 * ly + flu * dx_ly; 349 | Dx.s3 = Dx.s0 * lz + flu * dx_lz; /* 9 (-3) FLOPs */ 350 | 351 | Dy.s1 = Dy.s0 * lx + flu * dy_lx; 352 | Dy.s2 = Dy.s0 * ly + flu * dy_ly; 353 | Dy.s3 = Dy.s0 * lz + flu * dy_lz; /* 9 (-3) FLOPs */ 354 | 355 | Dz.s1 = Dz.s0 * lx + flu * dz_lx; 356 | Dz.s2 = Dz.s0 * ly + flu * dz_ly; 357 | Dz.s3 = Dz.s0 * lz + flu * dz_lz; /* 9 (-3) FLOPs */ 358 | 359 | hDxu = K(0.5) * dot(Dx, u); 360 | hDyu = K(0.5) * dot(Dy, u); 361 | hDzu = K(0.5) * dot(Dz, u); /* 24 (-9) FLOPs */ 362 | 363 | uD = u.s1 * Dx + u.s2 * Dy + u.s3 * Dz; /* 20 (-8) FLOPs */ 364 | 365 | tmp = f * (-uD.s0 + lx * (uD.s1 - hDxu) + ly * (uD.s2 - hDyu) + lz * (uD.s3 - hDzu)); /* 10 (-3) FLOPs */ 366 | } 367 | 368 | { 369 | real4 a = { 370 | uD.s0 - tmp, 371 | hDxu - uD.s1 + lx * tmp, 372 | hDyu - uD.s2 + ly * tmp, 373 | hDzu - uD.s3 + lz * tmp 374 | }; /* 10 (-3) FLOPs */ 375 | 376 | return (struct gr){u, a}; 377 | } 378 | } 379 | -------------------------------------------------------------------------------- /sim-org/io.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2020-2021 Gabriele Bozzola 3 | * Copyright (C) 2016 Chi-kwan Chan 4 | * Copyright (C) 2016 Steward Observatory 5 | * 6 | * This file is part of GRay2. 7 | * 8 | * GRay2 is free software: you can redistribute it and/or modify it 9 | * under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * GRay2 is distributed in the hope that it will be useful, but WITHOUT 14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16 | * License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with GRay2. If not, see . 20 | */ 21 | #include "gray.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | /* Compare function, needed for qsort (needed to sort the times in the HDF5 file) */ 28 | int compare (const void *a, const void *b) 29 | { 30 | /* With the help of https://stackoverflow.com/a/3886497 */ 31 | char *rem; 32 | real a_num = strtod((char*)a, &rem); 33 | real b_num = strtod((char*)b, &rem); 34 | return (a_num > b_num) - (a_num < b_num); 35 | } 36 | 37 | /** \todo Implement load() */ 38 | 39 | void 40 | dump(Lux_job *ego, size_t i) 41 | { 42 | Lux_opencl *ocl = EGO->ocl; 43 | 44 | struct param *p = &EGO->param; 45 | struct setup *s = &EGO->setup; 46 | 47 | const size_t sz = s->precision; 48 | const size_t n_data = EGO->n_coor + EGO->n_freq * 2; 49 | const size_t n_info = EGO->n_info; 50 | const size_t n_rays = p->h_rays * p->w_rays; 51 | 52 | void *data = ocl->mmap(ocl, EGO->data, sz * n_rays * n_data); 53 | void *info = ocl->mmap(ocl, EGO->info, sz * n_rays * n_info); 54 | 55 | char buf[64]; 56 | FILE *f; 57 | 58 | snprintf(buf, sizeof(buf), s->outfile, i); 59 | f = fopen(buf, "wb"); 60 | 61 | fwrite(&sz, sizeof(size_t), 1, f); 62 | fwrite(&n_data, sizeof(size_t), 1, f); 63 | fwrite(&p->w_rays, sizeof(size_t), 1, f); 64 | fwrite(&p->h_rays, sizeof(size_t), 1, f); 65 | fwrite( data, sz * n_data, n_rays, f); 66 | fwrite( info, sz * n_info, n_rays, f); 67 | 68 | fclose(f); 69 | 70 | ocl->munmap(ocl, EGO->info, info); 71 | ocl->munmap(ocl, EGO->data, data); 72 | } 73 | 74 | size_t 75 | read_variable_from_h5_file_and_return_num_points(const hid_t group_id, 76 | const char *var_name, 77 | void **var_array) 78 | { 79 | 80 | /* Here we read var_name from group_id and put in var_array*/ 81 | /* var_array is a pointer to a pointer to the area of memory 82 | * where the data will be written */ 83 | /* We want a pointer to a pointer because we want to modify var_array 84 | * with malloc */ 85 | 86 | /* We allocate memory, so it has to be freed! */ 87 | /* WARNING: The memory has to be freed! */ 88 | 89 | /* The return value is the number of elements */ 90 | 91 | herr_t status; 92 | hid_t datasetH5type; 93 | hid_t dataset_id, dataspace_id; /* identifiers for dsets*/ 94 | 95 | lux_debug("Reading variable %s\n", var_name); 96 | 97 | dataset_id = H5Dopen(group_id, var_name, H5P_DEFAULT); 98 | if (dataset_id == -1) { 99 | lux_print("Error in opening dataset: %s", var_name); 100 | return dataset_id; 101 | } 102 | 103 | /* The dataspace will tell us about the size (in bytes) of the data */ 104 | dataspace_id = H5Dget_space(dataset_id); 105 | if (dataspace_id == -1) { 106 | lux_print("Error in getting dataspace: %s", var_name); 107 | return dataset_id; 108 | } 109 | 110 | const size_t total_num_bytes = H5Dget_storage_size(dataset_id); 111 | 112 | /* Here we allocate the memory */ 113 | /* IT MUST BE FREED! */ 114 | *var_array = malloc(total_num_bytes); 115 | 116 | /* To read the data, we must know what type is it */ 117 | datasetH5type = H5Tget_native_type(H5Dget_type(dataset_id), H5T_DIR_DEFAULT); 118 | if (datasetH5type == -1) { 119 | lux_print("Error in determining type in dataset: %s", var_name); 120 | return datasetH5type; 121 | } 122 | 123 | /* Size in bytes of each signle element */ 124 | const size_t sz = H5Tget_size(datasetH5type); 125 | 126 | status = H5Dread(dataset_id, datasetH5type, H5S_ALL, H5S_ALL, H5P_DEFAULT, 127 | *var_array); 128 | 129 | if (status != 0) { 130 | lux_print("Error in reading dataset: %s", var_name); 131 | return status; 132 | } 133 | 134 | status = H5Dclose(dataset_id); 135 | if (status != 0) { 136 | printf("Error in closing dataset: %s", var_name); 137 | return status; 138 | } 139 | 140 | status = H5Sclose(dataspace_id); 141 | if (status != 0) { 142 | printf("Error in closing dataspace: %s", var_name); 143 | return status; 144 | } 145 | 146 | return total_num_bytes / sz; 147 | } 148 | 149 | size_t 150 | populate_ego_available_times(Lux_job *ego) { 151 | 152 | /* HDF5 identifiers */ 153 | hid_t file_id; 154 | hid_t group_id; 155 | herr_t status; 156 | 157 | hsize_t nobj; 158 | 159 | struct param *p = &EGO->param; 160 | const char *file_name = p->dyst_file; 161 | 162 | file_id = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); 163 | if (file_id == -1) { 164 | lux_print("ERROR: File %s is not a valid HDF5 file\n", file_name); 165 | return file_id; 166 | } 167 | 168 | /* Open group corresponding to root */ 169 | group_id = H5Gopen(file_id, "/", H5P_DEFAULT); 170 | if (group_id == -1) { 171 | lux_print("ERROR: Could not open root of HDF5 file\n"); 172 | return group_id; 173 | } 174 | 175 | /* Get all the members of the groups, one at a time */ 176 | status = H5Gget_num_objs(group_id, &nobj); 177 | if (status != 0) { 178 | lux_print("ERROR: Could not obtain number of groups in HDF5 file\n"); 179 | return status; 180 | } 181 | 182 | if (nobj < 2) { 183 | lux_print("ERROR: Not enough groups in the HDF5 file\n"); 184 | return -1; 185 | } 186 | 187 | lux_debug("Available times: \n"); 188 | 189 | char time_name[MAX_TIME_NAME_LENGTH]; 190 | 191 | for (size_t i = 0; i < nobj; i++) { 192 | H5Gget_objname_by_idx(group_id, i, time_name, MAX_TIME_NAME_LENGTH); 193 | /* We esclude the "grid" group, which contains the coordinates */ 194 | if (time_name[0] != 'g'){ 195 | char *time_name_in_ego = EGO->available_times[i]; 196 | snprintf(time_name_in_ego, sizeof(time_name), "%s", time_name); 197 | lux_debug("%s\n", time_name_in_ego); 198 | } 199 | } 200 | 201 | /* Now we sort the available_times array in ascending order */ 202 | qsort(EGO->available_times, nobj - 1, sizeof(EGO->available_times[0]), compare); 203 | 204 | lux_debug("Sorted available times: \n"); 205 | 206 | for (size_t i = 0; i < nobj - 1; i++){ 207 | lux_debug("%s\n", EGO->available_times[i]); 208 | } 209 | 210 | char *rem; 211 | /* Here it is -2 because we have a 'grid' group around, and it has to be after 212 | * the numbers (nobj - 2 is the last element of the array) */ 213 | EGO->max_available_time = strtod(EGO->available_times[nobj - 2], &rem); 214 | 215 | return 0; 216 | } 217 | 218 | size_t 219 | load_coordinates(Lux_job *ego){ 220 | /* Here we load the coordinates from the 'grid' dataset in the HDF5 file */ 221 | 222 | /* OpenCL Image properties */ 223 | cl_image_format imgfmt; 224 | cl_image_desc imgdesc; 225 | cl_int err; 226 | 227 | struct param *p = &EGO->param; 228 | 229 | const char *file_name = p->dyst_file; 230 | 231 | /* Dimension names, useful for loops */ 232 | const char dimension_names[4][2] = {"t", "x", "y", "z"}; 233 | 234 | /* HDF5 identifiers */ 235 | hid_t file_id; 236 | hid_t group_id; 237 | herr_t status; 238 | 239 | /* Array of pointers for the coordinates */ 240 | /* There are only 3: x, y, z */ 241 | void *coordinates[3]; 242 | 243 | file_id = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); 244 | if (file_id == -1) { 245 | lux_print("ERROR: File %s is not a valid HDF5 file\n", file_name); 246 | return file_id; 247 | } 248 | 249 | group_id = H5Gopen(file_id, "grid", H5P_DEFAULT); 250 | if (group_id == -1) { 251 | lux_print("ERROR: grid group not found!\n"); 252 | return group_id; 253 | } 254 | 255 | /* First, we read the coordinates */ 256 | /* dimension_names[i + 1] because we ignore the time, which is the zeroth */ 257 | for (size_t i = 0; i < 3; i++){ 258 | EGO->num_points.s[i + 1] = read_variable_from_h5_file_and_return_num_points( 259 | group_id, dimension_names[i + 1], &coordinates[i]); 260 | /* This is an error, something didn't work as expected */ 261 | /* We have already printed what */ 262 | if (EGO->num_points.s[i + 1] <= 0) 263 | return EGO->num_points.s[i + 1]; 264 | } 265 | 266 | lux_debug("Read coordiantes\n"); 267 | 268 | /* Fill spatial bounding box */ 269 | for (int i = 1; i < 4; i++){ 270 | /* xmin */ 271 | EGO->bounding_box.s[i] = ((cl_float *)coordinates[i - 1])[0]; 272 | /* xmax */ 273 | EGO->bounding_box.s[i + 4] = ((cl_float *)coordinates[i - 1])[EGO->num_points.s[i] - 1]; 274 | } 275 | 276 | return 0; 277 | } 278 | 279 | void 280 | copy_snapshot(Lux_job *ego, size_t to_t1){ 281 | /* This function copies over the snapshot in _t2 to _t1 if to_t1 is true, 282 | * otherwise from _t1 to _t2. Before doing this, the memory is released. 283 | * We assume that data is already defined before copying. */ 284 | size_t index = 0; 285 | for (size_t i = 0; i < 4; i++) 286 | for (size_t j = 0; j < 4; j++) 287 | for (size_t k = j; k < 4; k++) { 288 | if (to_t1){ 289 | /* TODO: Error checking on clReleaseMemObject */ 290 | clReleaseMemObject(EGO->spacetime_t1[index]); 291 | EGO->spacetime_t1[index] = EGO->spacetime_t2[index]; 292 | }else{ 293 | /* TODO: Error checking on clReleaseMemObject */ 294 | clReleaseMemObject(EGO->spacetime_t2[index]); 295 | EGO->spacetime_t2[index] = EGO->spacetime_t1[index]; 296 | } 297 | index++; 298 | } 299 | 300 | } 301 | 302 | size_t 303 | load_snapshot(Lux_job *ego, size_t time_snapshot_index, size_t load_in_t1){ 304 | 305 | /* If load_in_t1 is true, then fill the t1 slot, otherwise, fill the t2 slot. */ 306 | 307 | /* TODO: Add support to compressed HDF5 files */ 308 | /* https://support.hdfgroup.org/ftp/HDF5/examples/examples-by-api/hdf5-examples/1_10/C/H5D/h5ex_d_shuffle.cgo */ 309 | 310 | struct param *p = &EGO->param; 311 | 312 | const char *file_name = p->dyst_file; 313 | 314 | /* Dimension names, useful for loops */ 315 | const char dimension_names[4][2] = {"t", "x", "y", "z"}; 316 | 317 | /* OpenCL Image properties */ 318 | cl_image_format imgfmt; 319 | cl_image_desc imgdesc; 320 | cl_int err; 321 | 322 | /* HDF5 identifiers */ 323 | hid_t file_id; 324 | hid_t group_id; 325 | herr_t status; 326 | 327 | clock_t start, end; 328 | double cpu_time_used; 329 | 330 | char *time = EGO->available_times[time_snapshot_index]; 331 | 332 | start = clock(); 333 | 334 | file_id = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); 335 | if (file_id == -1) { 336 | lux_print("ERROR: File %s is not a valid HDF5 file\n", file_name); 337 | return file_id; 338 | } 339 | /* Open group corresponding to time */ 340 | group_id = H5Gopen(file_id, time, H5P_DEFAULT); 341 | if (group_id == -1) { 342 | lux_print("ERROR: Time %s not found\n", time); 343 | return group_id; 344 | } 345 | 346 | lux_debug("Reading time %s\n", time); 347 | 348 | /* Now, we read the Gammas */ 349 | void *Gamma[40]; 350 | size_t num_points; 351 | size_t index = 0; 352 | const size_t expected_num_points = EGO->num_points.s[1] * 353 | EGO->num_points.s[2] * 354 | EGO->num_points.s[3]; 355 | 356 | for (size_t i = 0; i < 4; i++) 357 | for (size_t j = 0; j < 4; j++) 358 | for (size_t k = j; k < 4; k++) { 359 | char var_name[256]; 360 | snprintf(var_name, sizeof(var_name), "Gamma_%s%s%s", dimension_names[i], 361 | dimension_names[j], dimension_names[k]); 362 | 363 | /* We treat our 3D data as 1D */ 364 | num_points = read_variable_from_h5_file_and_return_num_points( 365 | group_id, var_name, &Gamma[index]); 366 | 367 | /* for (size_t kk=0; kk < num_points; kk++){ */ 368 | /* float val = ((float*)Gamma[index])[kk]; */ 369 | /* if (val > 1) printf("Gamma kk %d, %.16g\n", kk, val); */ 370 | /* } */ 371 | 372 | /* This is an error, something didn't work as expected */ 373 | /* We have already printed what */ 374 | if (num_points <= 0) 375 | return num_points; 376 | 377 | if (num_points != expected_num_points) { 378 | lux_print("Number of points in Gammas inconsistent with coordinates\n"); 379 | return -1; 380 | } 381 | index++; 382 | } 383 | 384 | lux_debug("Read Gammas\n"); 385 | 386 | /* Read metric */ 387 | void *g[10]; 388 | 389 | index = 0; 390 | for(size_t i = 0; i < 4; i++) 391 | for(size_t j = i; j < 4; j++) { 392 | char var_name[256]; 393 | snprintf(var_name, sizeof(var_name), "g_%s%s", dimension_names[i], dimension_names[j]); 394 | 395 | /* We treat our 3D data as 1D */ 396 | num_points = read_variable_from_h5_file_and_return_num_points( 397 | group_id, var_name, &g[index]); 398 | 399 | /* This is an error, something didn't work as expected */ 400 | /* We have already printed what */ 401 | if (num_points <= 0) 402 | return num_points; 403 | 404 | if (num_points != expected_num_points) { 405 | lux_print("Number of points in Gammas inconsistent with coordinates\n"); 406 | return -1; 407 | } 408 | index++; 409 | } 410 | 411 | lux_debug("Read metric\n"); 412 | 413 | void *rho; 414 | { 415 | /* We treat our 3D data as 1D */ 416 | num_points = read_variable_from_h5_file_and_return_num_points( 417 | group_id, "rho", &rho); 418 | 419 | /* This is an error, something didn't work as expected */ 420 | /* We have already printed what */ 421 | if (num_points <= 0) 422 | return num_points; 423 | 424 | if (num_points != expected_num_points) { 425 | lux_print("Number of points in Gammas inconsistent with coordinates\n"); 426 | return -1; 427 | } 428 | } 429 | 430 | lux_debug("Read fluid\n"); 431 | 432 | /* Finally, we create the images */ 433 | 434 | imgfmt.image_channel_order = CL_R; /* use one channel */ 435 | imgfmt.image_channel_data_type = CL_FLOAT; /* each channel is a float */ 436 | imgdesc.image_type = CL_MEM_OBJECT_IMAGE3D; 437 | imgdesc.image_width = EGO->num_points.s[1]; /* x */ 438 | imgdesc.image_height = EGO->num_points.s[2]; /* y */ 439 | imgdesc.image_depth = EGO->num_points.s[3]; /* z */ 440 | imgdesc.image_row_pitch = 0; 441 | imgdesc.image_slice_pitch = 0; 442 | imgdesc.num_mip_levels = 0; 443 | imgdesc.num_samples = 0; 444 | imgdesc.buffer = NULL; 445 | 446 | index = 0; 447 | for (size_t i = 0; i < 4; i++) 448 | for (size_t j = 0; j < 4; j++) 449 | for (size_t k = j; k < 4; k++) { 450 | /* We fill _t1 only the first time, when snapshot_index = 0, 451 | * in all the other cases we fill _t2, and then we shift the pointers.*/ 452 | if (load_in_t1){ 453 | EGO->spacetime_t1[index] = clCreateImage( 454 | EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt, 455 | &imgdesc, Gamma[index], &err); 456 | }else{ 457 | EGO->spacetime_t2[index] = clCreateImage( 458 | EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt, 459 | &imgdesc, Gamma[index], &err); 460 | } 461 | /* https://streamhpc.com/blog/2013-04-28/opencl-error-codes/ */ 462 | if (err != CL_SUCCESS) { 463 | lux_print("Error in creating images\n"); 464 | return err; 465 | } 466 | index++; 467 | } 468 | 469 | for (size_t i = 0; i < 4; i++) 470 | for (size_t j = i; j < 4; j++) { 471 | /* We fill _t1 only the first time, when snapshot_index = 0, 472 | * in all the other cases we fill _t2, and then we shift the pointers.*/ 473 | if (load_in_t1){ 474 | EGO->spacetime_t1[index] = clCreateImage( 475 | EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt, 476 | &imgdesc, g[index-40], &err); 477 | }else{ 478 | EGO->spacetime_t2[index] = clCreateImage( 479 | EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt, 480 | &imgdesc, g[index-40], &err); 481 | } 482 | /* https://streamhpc.com/blog/2013-04-28/opencl-error-codes/ */ 483 | if (err != CL_SUCCESS) { 484 | lux_print("Error in creating images\n"); 485 | return err; 486 | } 487 | index++; 488 | } 489 | 490 | { 491 | /* We fill _t1 only the first time, when snapshot_index = 0, 492 | * in all the other cases we fill _t2, and then we shift the pointers.*/ 493 | if (load_in_t1){ 494 | EGO->spacetime_t1[index] = clCreateImage( 495 | EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt, 496 | &imgdesc, rho, &err); 497 | }else{ 498 | EGO->spacetime_t2[index] = clCreateImage( 499 | EGO->ocl->ctx, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, &imgfmt, 500 | &imgdesc, rho, &err); 501 | } 502 | /* https://streamhpc.com/blog/2013-04-28/opencl-error-codes/ */ 503 | if (err != CL_SUCCESS) { 504 | lux_print("Error in creating images\n"); 505 | return err; 506 | } 507 | } 508 | 509 | lux_debug("Images created\n"); 510 | 511 | for (size_t i = 0; i < 40; i++) 512 | free(Gamma[i]); 513 | 514 | for (size_t i = 0; i < 10; i++) 515 | free(g[i]); 516 | 517 | free(rho); 518 | 519 | status = H5Fclose(file_id); 520 | if (status != 0) { 521 | printf("Error in closing HDF5 file"); 522 | return status; 523 | } 524 | 525 | end = clock(); 526 | cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; 527 | 528 | lux_print("Reading file and creating images for time %s took %.5f s\n", 529 | time, 530 | cpu_time_used); 531 | 532 | return 0; 533 | } 534 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | --------------------------------------------------------------------------------