├── .clang-format
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── scorecard.yml
├── .gitignore
├── .vscode
    ├── launch.json
    └── tasks.json
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── cmake
    ├── FindGLEW.cmake
    ├── FindGLFW.cmake
    ├── FindGLM.cmake
    ├── FindVulkan.cmake
    └── Finddpct.cmake
├── docs
    ├── Eq1.png
    └── downscale_artefact.png
├── libs
    └── imgui
    │   ├── CMakeLists.txt
    │   ├── include
    │       ├── imconfig.h
    │       ├── imgui.h
    │       ├── imgui_impl_glfw.h
    │       ├── imgui_impl_opengl3.h
    │       ├── imgui_impl_opengl3_loader.h
    │       ├── imgui_internal.h
    │       ├── imstb_rectpack.h
    │       ├── imstb_textedit.h
    │       └── imstb_truetype.h
    │   └── src
    │       ├── imgui.cpp
    │       ├── imgui_demo.cpp
    │       ├── imgui_draw.cpp
    │       ├── imgui_impl_glfw.cpp
    │       ├── imgui_impl_opengl3.cpp
    │       ├── imgui_tables.cpp
    │       └── imgui_widgets.cpp
├── scripts
    ├── build_cuda.sh
    ├── build_dpcpp.sh
    ├── docker_build_etc.sh
    ├── perf_test.sh
    ├── perf_test_cuda.sh
    ├── perf_test_dpcpp.sh
    ├── run_dpct.sh
    ├── run_dpct_native.sh
    ├── run_nbody.sh
    └── xvfb.sh
├── shaders
    └── gl
    │   ├── blur.frag
    │   ├── deferred.vert
    │   ├── integration.comp
    │   ├── interaction.comp
    │   ├── luminance.frag
    │   ├── main.frag
    │   ├── main.geom
    │   ├── main.vert
    │   └── tonemap.frag
├── src
    ├── CMakeLists.txt
    ├── camera.cpp
    ├── camera.hpp
    ├── gen.cpp
    ├── gen.hpp
    ├── nbody.cpp
    ├── renderer.hpp
    ├── renderer_gl.cpp
    ├── renderer_gl.hpp
    ├── shader.cpp
    ├── shader.hpp
    ├── sim_param.cpp
    ├── sim_param.hpp
    ├── simulator.cu
    └── simulator.cuh
└── src_sycl
    ├── CMakeLists.txt
    ├── README.md
    ├── camera.cpp
    ├── camera.hpp
    ├── gen.cpp
    ├── gen.hpp
    ├── nbody.cpp
    ├── renderer.hpp
    ├── renderer_gl.cpp
    ├── renderer_gl.hpp
    ├── shader.cpp
    ├── shader.hpp
    ├── sim_param.cpp
    ├── sim_param.hpp
    ├── simulator.dp.cpp
    └── simulator.dp.hpp


/.clang-format:
--------------------------------------------------------------------------------
1 | {BasedOnStyle: Google, IndentWidth: 3, ColumnLimit: 80, NamespaceIndentation: All, AlignTrailingComments: true}


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   # Enable version updates for Github Actions
 4 |   - package-ecosystem: "github-actions"
 5 |     directory: "/"
 6 |     schedule:
 7 |       interval: "monthly"
 8 |     groups:
 9 |       github-actions:
10 |         patterns:
11 |           - "*"
12 |     reviewers:
13 |       - "codeplaysoftware/security-managers"
14 | 


--------------------------------------------------------------------------------
/.github/workflows/scorecard.yml:
--------------------------------------------------------------------------------
 1 | # Scorecards' GitHub action
 2 | 
 3 | name: Scorecard supply-chain security
 4 | on:
 5 |   # For Branch-Protection check. Only the default branch is supported. See
 6 |   # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
 7 |   branch_protection_rule:
 8 |   schedule:
 9 |     - cron: '15 18 * * 5'
10 |   push:
11 |     branches: [ "main" ]
12 | 
13 | # Declare default permissions as read only.
14 | permissions: read-all
15 | 
16 | jobs:
17 |   analysis:
18 |     name: Scorecard analysis
19 |     runs-on: ubuntu-latest
20 |     permissions:
21 |       # Needed to upload the results to code-scanning dashboard.
22 |       security-events: write
23 |       # Needed to publish results and get a badge (see publish_results below).
24 |       id-token: write
25 | 
26 |     steps:
27 |       - name: "Checkout code"
28 |         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
29 |         with:
30 |           persist-credentials: false
31 | 
32 |       - name: "Run analysis"
33 |         uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0
34 |         with:
35 |           results_file: results.sarif
36 |           results_format: sarif
37 |           publish_results: true
38 | 
39 |       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
40 |       # format to the repository Actions tab.
41 |       - name: "Upload artifact"
42 |         uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
43 |         with:
44 |           name: SARIF file
45 |           path: results.sarif
46 |           retention-days: 5
47 | 
48 |       # Upload the results to GitHub's code scanning dashboard (optional).
49 |       # Commenting out will disable upload of results to your repo's Code Scanning dashboard
50 |       - name: "Upload to code-scanning"
51 |         uses: github/codeql-action/upload-sarif@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0
52 |         with:
53 |           sarif_file: results.sarif
54 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build*
 2 | nbodygl
 3 | nbodyvk
 4 | .cache
 5 | *~
 6 | settings.json
 7 | **/dpct-output/
 8 | \#*
 9 | *mp4
10 | lib/**cpp
11 | lib/**h
12 | *fatbin*
13 | *.ptx
14 | log*
15 | .vscode
16 | nbody_dpcpp
17 | nbody_cuda
18 | nbody_cuda_d
19 | nbody_dpcpp_d
20 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "configurations": [
 3 |         {
 4 |             "name": "DEBUG: (gdb-oneapi) nbody_dpcpp_d Launch",
 5 |             "type": "cppdbg",
 6 |             "request": "launch",
 7 |             "preLaunchTask": "Debug C/C++: DPCPP Makefile",
 8 |             "postDebugTask": "",
 9 |             "program": "${workspaceFolder}/nbody_dpcpp_d",
10 |             "args": ["50", "4", "0.999998", "0.005", "1.0e-7", "2.0", "100000"],
11 |             "stopAtEntry": true,
12 |             "cwd": "${workspaceFolder}",
13 |             "environment": [
14 |                 {
15 |                     "name": "ZET_ENABLE_PROGRAM_DEBUGGING",
16 |                     "value": "1"
17 |                 },
18 |                 {
19 |                     "name": "IGC_EnableGTLocationDebugging",
20 |                     "value": "1"
21 |                 }
22 |             ],
23 |             "externalConsole": false,
24 |             "MIMode": "gdb",
25 |             "miDebuggerPath": "gdb-oneapi",
26 |             "setupCommands": [
27 |                 {
28 |                     "description": "Enable pretty-printing for gdb",
29 |                     "text": "-enable-pretty-printing",
30 |                     "ignoreFailures": true
31 |                 },
32 |                 {
33 |                     "description": "Disable target async",
34 |                     "text": "set target-async off",
35 |                     "ignoreFailures": true
36 |                 },
37 |                 {
38 |                     "description": "Do not display function arguments when printing a stack frame",
39 |                     "text": "set print frame-arguments none",
40 |                     "ignoreFailures": true
41 |                 }
42 |             ]
43 |         },
44 |         {
45 |             "name": "DEBUG: (cuda-gdb) nbody_cuda_d Launch",
46 |             "type": "cuda-gdb",
47 |             "request": "launch",
48 |             "preLaunchTask": "Debug C/C++: CUDA Makefile",
49 |             "postDebugTask": "",
50 |             "program": "${workspaceFolder}/nbody_cuda_d",
51 |             "args": "50 4 0.999998 0.005 1.0e-7 2.0 100000",
52 |             "stopAtEntry": true,
53 |             "cwd": "${workspaceFolder}",
54 |         }
55 |     ]
56 | }


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"version": "2.0.0",
 3 | 	"tasks": [
 4 | 		{
 5 | 			"type": "cppbuild",
 6 | 			"label": "Debug C/C++: CUDA Makefile",
 7 | 			"command": "make",
 8 | 			"args": [
 9 | 				"debug",
10 | 			],
11 | 			"options": {
12 | 				"cwd": "${workspaceFolder}/build_cuda"
13 | 			},
14 | 			"problemMatcher": [
15 | 				"$gcc"
16 | 			],
17 | 			"group": "build",
18 | 			"detail": "compiler: INTEL oneapi icpx"
19 | 		},
20 | 		{
21 | 			"type": "cppbuild",
22 | 			"label": "Release C/C++: CUDA Makefile",
23 | 			"command": "make",
24 | 			"args": [
25 | 				"release"
26 | 			],
27 | 			"options": {
28 | 				"cwd": "${workspaceFolder}/build_cuda"
29 | 			},
30 | 			"problemMatcher": [
31 | 				"$gcc"
32 | 			],
33 | 			"group": "build",
34 | 			"detail": "compiler: INTEL oneapi icpx"
35 | 		},
36 | 		{
37 | 			"type": "cppbuild",
38 | 			"label": "Release C/C++: DPCPP Makefile",
39 | 			"command": "make",
40 | 			"args": [
41 | 				"release"
42 | 			],
43 | 			"options": {
44 | 				"cwd": "${workspaceFolder}build_dpcpp"
45 | 			},
46 | 			"problemMatcher": [
47 | 				"$gcc"
48 | 			],
49 | 			"group": "build",
50 | 			"detail": "compiler: INTEL oneapi icpx"
51 | 		},
52 | 		{
53 | 			"type": "cppbuild",
54 | 			"label": "Debug C/C++: DPCPP Makefile",
55 | 			"command": "make",
56 | 			"args": [
57 | 				"debug"
58 | 			],
59 | 			"options": {
60 | 				"cwd": "${workspaceFolder}/build_dpcpp"
61 | 			},
62 | 			"problemMatcher": [
63 | 				"$gcc"
64 | 			],
65 | 			"group": "build",
66 | 			"detail": "compiler: INTEL oneapi icpx"
67 | 		}
68 | 	]
69 | }
70 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | # Copyright (C) 2022 Codeplay Software Limited
 3 | 
 4 | cmake_minimum_required (VERSION 3.16)
 5 | 
 6 | project (nbody LANGUAGES CXX)
 7 | 
 8 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 9 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
10 | 
11 | set(BACKEND "CUDA" CACHE STRING "Which backend to build")
12 | option(RENDER "Use openGl or not" ON)
13 | 
14 | if(BACKEND STREQUAL "CUDA")
15 |   set(BINARY_NAME "nbody_cuda" CACHE STRING "Binary name")
16 |   enable_language(CUDA)
17 |   add_subdirectory(src)
18 | elseif(BACKEND STREQUAL "DPCPP")
19 |   set(BINARY_NAME "nbody_dpcpp" CACHE STRING "Binary name")
20 |   add_subdirectory(src_sycl)
21 | else()
22 |   message(FATAL_ERROR "Unrecognized BACKEND")
23 | endif()
24 | 
25 | if(RENDER)
26 |   add_subdirectory(libs/imgui)
27 | endif()
28 | 
29 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression
 9 | , level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at
59 | [sycl@codeplay.com](mailto:sycl@codeplay.com). All complaints will be reviewed
60 | and investigated and will result in a response that is deemed necessary and
61 | appropriate to the circumstances. The project team is obligated to maintain
62 | confidentiality with regard to the reporter of an incident. Further details of
63 | specific enforcement policies may be posted separately.
64 | 
65 | Project maintainers who do not follow or enforce the Code of Conduct in good
66 | faith may face temporary or permanent repercussions as determined by other
67 | members of the project's leadership.
68 | 
69 | ## Attribution
70 | 
71 | This Code of Conduct is adapted from the
72 | [Contributor Covenant](https://www.contributor-covenant.org/), version 1.4,
73 | available at
74 | https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
75 | 
76 | ---
77 | 
78 | If there are any issues or suggestions relating to the current set of rules, you
79 |  can reach us at [sycl@codeplay.com](mailto:sycl@codeplay.com).
80 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright for portions of project 'nbody' are held by Sarah Le Luron, 2016-2018 as part of project 'dpct-nbody'. All other copyright for project 'dpct-nbody' are held by Codeplay Software Limited, 2022.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # nbody
  2 | 
  3 | [![OpenSSF Scorecard](https://api.scorecard.dev/projects/github.com/codeplaysoftware/cuda-to-sycl-nbody/badge)](https://scorecard.dev/viewer/?uri=github.com/codeplaysoftware/cuda-to-sycl-nbody)
  4 | 
  5 | Accelerated N-body sim with OpenGL graphics & automatic CUDA->SYCL conversion using [dpct](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html).
  6 | 
  7 | ![](http://i.imgur.com/drzi33P.jpg)
  8 | 
  9 | Forked from https://github.com/salel/nbody
 10 | 
 11 | ## Compilers/Backends
 12 | 
 13 | This nbody simulation can be run with any of:
 14 |  - CUDA
 15 |  - DPC++ CUDA backend
 16 |  - DPC++ OpenCL CPU backend
 17 | 
 18 | Source code for the CUDA version is in `./src/` while `./src_sycl/` contains the semi-automatically converted SYCL code.
 19 | 
 20 | ## Build Dependencies
 21 | 
 22 | ### Graphics Dependencies
 23 | 
 24 | By default the build requieres OpenGL. See the **Building** section below to build without rendering.
 25 | 
 26 | The rendering components of this code are independent of the CUDA/SYCL backend, and depend on:
 27 |  - GLM
 28 |  - GLFW
 29 |  - GLEW
 30 | 
 31 | These can be installed with apt:
 32 | ```
 33 | sudo apt update
 34 | sudo apt install libglew-dev libglfw3-dev libglm-dev libxxf86vm-dev libxcursor-dev libxinerama-dev libxi-dev
 35 | ```
 36 | 
 37 | The implementation relies on OpenGL 4.5.
 38 | 
 39 | ### Simulation Dependencies (CUDA & SYCL)
 40 | 
 41 | The CUDA version of this code requires the [CUDA runtime](https://intel.github.io/llvm-docs/GetStartedGuide.html#build-dpc-toolchain-with-support-for-nvidia-cuda) to be installed on your machine.
 42 | 
 43 | The DPC++ CUDA backend version also requires the CUDA runtime.
 44 | 
 45 | The DPC++ OpenCL backend requires an [OpenCL runtime](https://intel.github.io/llvm-docs/GetStartedGuide.html#install-low-level-runtime). To run specifically on the CPU, you must install the OpenCL runtime for your CPU.
 46 | 
 47 | Both DPC++ backends require the [DPC++ compiler](https://intel.github.io/llvm-docs/GetStartedGuide.html) to compile the SYCL code.
 48 | 
 49 | ## Building
 50 | 
 51 | This project uses CMake for build configuration. Build scripts for CUDA and DPC++ are located in `./scripts/`. Note that these scripts include some hardcoded paths from our dev machine, and so will not work out-the-box.
 52 | 
 53 | The CMake option `-DBACKEND` allows to select which backend ("CUDA" or "DPCPP") to build. CUDA is built by default. The name of the built binary is suffixed with the backend (`nbody_cuda` or `nbody_dpcpp`).
 54 | 
 55 | The DPC++ backend, in turn, supports both an OpenCL & CUDA backend, both of which are built by default. If you are building on a machine without CUDA support, you can switch off the DPC++ CUDA backend with the flag `-DDPCPP_CUDA_SUPPORT=off`.
 56 | 
 57 | The build scripts create a version that includes rendering. To build versions that do not require OpenGL, provide the argument **no_render** to the build scripts.
 58 | 
 59 | By default, a **release** target is built, for example, `nbody_cuda`. To build a debug version, navigate to the build directory and execute **make debug**. Running **make** will build both versions. The debug binary will share the same name as the **release** version with "_d" appended.
 60 | 
 61 | The provided `tasks.json` and `launch.json` configuration files for vscode serve as examples, demonstrating how to initiate a debug session directly from within vscode.
 62 | 
 63 | ## Migrating CUDA to SYCL
 64 | 
 65 | The script `./scripts/run_dpct.sh` calls a containerized version of the Intel® DPC++ Compatibility Tool to automatically convert the CUDA components of this project into SYCL. A docker container was used because the dev machine has an incompatible version of the CUDA driver. This should be adapted based on your environment.
 66 | 
 67 | The Intel® DPC++ compatibility tool offers options for intercepting complex builds, but current dev environment restrictions require me to run the tool inside a docker container. This complicates things, so for now I'm just doing single source conversion on the simulator.cu file.
 68 | 
 69 | ## Running on different platforms
 70 | 
 71 | The script `./scripts/run_nbody.sh` will run the nbody simulation, selecting a different binary based on the `-b` flag, where `-b` can be `cuda` or `dpcpp`. Subsequent positional arguments are passed on to the `nbody` binary. These positions args are described in the [Simulation](#Simulation) section. For example, to run on the DPC++ OpenCL host backend with 25600 (100 * 256) particles, executing 10 timesteps per rendered frame:
 72 | 
 73 | ```
 74 | ./scripts/run_nbody.sh -b dpcpp 100 10
 75 | ```
 76 | 
 77 | Note that this script runs `nbody` with the default X window, as opposed to using [xvfb](#Running-headless). This makes it unsuitable for running on a remote machine.
 78 | 
 79 | `run_nbody.sh` is a simple wrapper around the `nbody_*` binaries with some environment variables set; the sections below describe how to launch the binaries directly.
 80 | 
 81 | ### Detecting available SYCL backends
 82 | 
 83 | The `sycl-ls` tool allows you to check for available backends on the system. For example, on a system with Intel OpenCL CPU runtime & CUDA runtime, the output is:
 84 | 
 85 | ```
 86 | > sycl-ls
 87 | [opencl:cpu:0] Intel(R) OpenCL, Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz 3.0 [2021.13.11.0.23_160000]
 88 | [opencl:cpu:1] Intel(R) OpenCL, Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz 3.0 [2021.13.11.0.23_160000]
 89 | [cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA GeForce RTX 3060 0.0 [CUDA 11.6]
 90 | [host:host:0] SYCL host platform, SYCL host device 1.2 [1.2]
 91 | ```
 92 | 
 93 | ### Selecting a backend (DPC++)
 94 | 
 95 | By specifying the environment variable `SYCL_DEVICE_FILTER`, it's possible to switch between running with the CUDA backend and the OpenCL host backend. For example:
 96 | 
 97 | ```
 98 |     SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp
 99 | ```
100 | will run on the CUDA backend, whereas:
101 | ```
102 |     SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp
103 | ```
104 | will run on a CPU through the OpenCL backend. Note the correspondence between options for `SYCL_DEVICE_FILTER` and the output of `sycl-ls`.
105 | 
106 | **Note**: Selection between DPC++ backends at runtime is possible because `CMakeLists.txt` specifies building the SYCL code for both CUDA (`nvptx64-nvidia-cuda`) & OpenCL (`spir64`) targets:
107 | ```
108 |      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -fsycl-targets=spir64,nvptx64-nvidia-cuda -fsycl-unnamed-lambda")
109 | ```
110 | 
111 | ### Adapting the project for DPC++ OpenCL
112 | 
113 | No changes to the code were required, but there were a couple of bugs which are worked around.
114 | 
115 | Firstly, when building for multiple targets (`-fsycl-targets`), there is a [recent bug](https://github.com/intel/llvm/issues/5330) which causes failure to link to static libraries. The workaround for this is to switch from building `imgui` as a static to a shared library.
116 | 
117 | Secondly, I encountered the common CL header bug (see [here](https://github.com/intel/llvm/issues/2617) and [here](https://github.com/oneapi-src/oneDNN/issues/885)). This turned out to be triggered for the `spir64` backend because the CUDA headers were included *only* via `-I` and not via `-internal-isystem`. This caused them to take precedence over SYCL CL headers. The solution was to not include CUDA headers in `src_sycl/CMakeLists.txt`, which turned out to be unnecessary anyway.
118 | 
119 | ## Passing data between OpenGL & CUDA/SYCL
120 | 
121 | OpenGL & CUDA are capable of interoperating to share device memory, but this will not play well with the Intel® DPC++ Compatibility Tool. Instead, computed particle positions are migrated back to the host by CUDA/SYCL, then sent *back* to OpenGL via mapping.
122 | 
123 | 
124 | ## Simulation
125 | 
126 | The `DiskGalaxySimulator` class handles the physics of n-body interaction. The computation of interparticle forces, velocity & updated particle positions are handled by the CUDA kernel `particle_interaction`.
127 | 
128 | The equation solved by this code is equivalent to Eq. 1 [here](http://www.scholarpedia.org/article/N-body_simulations_(gravitational)), with the simplifying assumption that all particles have unit mass and there is no external/background force. This becomes:
129 | 
130 | ![Eq1](/docs/Eq1.png)
131 | 
132 | The force vector on each particle (F) is the sum of gravitational forces from all other particles. For each particle interaction, the attractive force is inversely proportional to the square distance between them. This force is equal to the gravitational constant (`G`) multiplied by the unit vector pointing between the particles, divided by the square of this distance. The equation above has this last term slightly rearranged to avoid unnecessary computation.
133 | 
134 | Given the assumption of unit mass, the force (F) is equal to the acceleration, and so at each timestep, the force vector F is multiplied by the timestep size (`dt`) and added to the velocity vector. The position of each particle is then updated by the velocity multiplied by the timestep size (`dt`).
135 | 
136 | A drag factor (`damping`) is used to regulate the velocity. At each timestep, the velocity is multiplied by the drag term, slowing the particles. The maximum force between very close particles is also limited for stability; this is achieved via an epsilon term (`distEps`) which is added to the distance between each particle pairing.
137 | 
138 | The `parameters` described in this section can all be adjusted via command line arguments, as follows:
139 | 
140 | `./nbody_cuda numParticles simIterationsPerFrame damping dt distEps G numFrames gwSize calcMethod`
141 | 
142 | Note that `numParticles` specifies the number of particles simulated, divided by blocksize (i.e. setting `numParticles` to 50 produces 50*256 particles). `simIterationsPerFrame` specifies how many steps of the simulation to take before rendering the next frame and `numFrames` specifies the total number of simulation steps before the program exits. For default values for all of these parameters, refer to `sim_param.cpp`.
143 | 
144 | `gwSize`: This parameter allows changing the work group size from the default 64.
145 | 
146 | `calcMethod`: This string parameter, with a default value of BRANCH, selects branch instruction code. If set to PREDICATED, it uses an arithmetic expression. Refer to the [performance](#sycl-vs-cuda-performance) section for details.
147 | 
148 | 
149 | ### Modifying Simulation Behaviour
150 | 
151 | You can get quite a wide range of 'galactic' behaviours by playing with the parameters described above.
152 | 
153 | Initial velocity of stars is a stable orbital velocity, computed with an implicit value for gravity of `G = 1`. The default value *during* the simulation, however, is `G = 2`. So by default the galaxy collapses inwards quite quickly, but by reducing G closer to 1, you can make a more stable, rotating galaxy.
154 | 
155 | The `damping` factor is a drag term. By default `damping = 0.999998` but by reducing this value to e.g. `0.999`, stars will tend to form local clusters before collapsing in towards the galactic centre.
156 | 
157 | `distEps` serves as a stabilising parameter to prevent numerical instability at larger timestep sizes. Setting this value very small (`1.0e-10`) will produce more 'explosive' simulations. This is unrealistic for n-body gravitational interaction, but it looks dramatic.
158 | 
159 | If you want to speed up the evolution of the galaxy, set a larger timestep size (`dt`) or increase the number of steps taken per frame (`simIterationsPerFrame`). Either change will increase the total simulation time per rendered frame. If you reach a sufficiently high timestep size that you get unstable explosive behaviour, increase the value of `distEps` and this should stabilise things. Note that there is a separate discussion [below](#performance-scaling-for-demos) about altering the ratio of compute/render time to, for instance, visually highlight a performance difference between platforms.
160 | 
161 | ## Graphics Pipeline
162 | 
163 | ### Rendering
164 | Render targets for all passes except the last use dimensions a bit larger than the window, to prevent popping. This is used when some effects affect neighboring pixels (bloom, ssao..) and must be taken into account even when off-screen.
165 | #### HDR
166 | Each particle is rendered as a fixed-size flare, generated from a gaussian. Particle color depends on velocity, blue at low speeds and purple at high speeds. Additive blending is set, so dense regions look bright. The render target is RGBA16F, because GL_R11F_G11F_B10F looks yellow on subsequent render passes.
167 | 
168 | #### Bloom
169 | 
170 | Bloom is applied through a separable Gaussian blur, applied once in the horizontal and then the vertical direction. The 1D Gaussian kernel is computed by `RendererGL::gaussKernel` and optimized to minimize texel lookups by `RendererGL::optimGaussKernel` following [this guide](https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/). At present, a gaussian window of 49 pixels with sigma = 10.0 is used. Multiple passes are possible (ping pong between two RGBA16F frame buffers), though at present we execute only one blur in each direction.
171 | 
172 | Note that unlike typical bloom processing, there is no extraction of bright light sources prior to blurring, because the scene (bright stars on a dark background) makes this obsolete.
173 | 
174 | The appearance & performance of the blur is controlled by four variables which are not currently exposed as arguments to `nbody_[backend]` but which could be manually modified as desired. The two arguments to the `gaussKernel` function (`sigma` and `halfwidth`) effectively define the 'spread' of the blur. Higher values for `sigma` result in wider blurring, whereas `halfwindow` defines the actual width of the pixel window which is sampled. Higher values of `halfwindow` will decrease performance, as more texel lookups are required. As a general rule, when increasing `sigma`, it will likely be necessary to increase `halfwindow` to avoid an obvious visual cut-off at the edge of the window. Conversely, a wide `halfwindow` with a small `sigma` reduces performance unnecessarily, because texels with negligible contribution will be sampled.
175 | 
176 | Blur downscaling is a common technique to improve blur performance; the image is downsampled by the factor `blur_dsc` defined in `renderer_gl.cpp`, then the regular blur filter is performed, and finally the image is upscaled again. This is a very cheap way of enhancing the blur effect, but there is an associated artefact:
177 | 
178 | ![DownscaleArtefact](/docs/downscale_artefact.png)
179 | 
180 | If this artefact is unacceptable, set `blur_dsc = 1` to turn off downscaling. Note however that this will significantly reduce the blurriness, and compensating with wider `halfwindow` or more passes (see below) will cost a lot of rendering time.
181 | 
182 | Enhanced blurring can also be achieved by executing multiple passes. This is controlled by `nPasses`, and is set to 1 by default. Due to the dominance of blur in the render pipeline, total rendering time should scale pretty much linearly with `nPasses`, so increasing it is a potentially expensive option.
183 | 
184 | #### Average luminance
185 | The average luminance of the scene is computed from the HDR target into a downscaled R16F target. Then we generate mipmaps to obtain the average luminance on the smallest mipmap (1x1). (Could also be obtained from a 2x2 texture but screen-size targets always seem to resolve down to odd dimensions)
186 | 
187 | #### Tonemapping & gamma correction
188 | The exposure of the final render is obtained from the average luminance, and the HDR and Bloom targets are combined and converted to LDR. Gamma correction is also applied. Tada.
189 | 
190 | ## Running headless
191 | 
192 | If you run `nbody_cuda` on a remote machine with X-forwarding, sending the rendered frames across the net will be a significant bottleneck. This can be worked around by making use of [Xvfb](https://linux.die.net/man/1/xvfb) which provides a *virtual* X display. You can then read from the memory mapped file to write to e.g. MP4 output.
193 | 
194 | The script `./scripts/xvfb.sh` runs `nbody_cuda` in this manner, producing a video file `output.mp4`. Note that this script will run the simulation until manually terminated.
195 | 
196 | ## Performance Scaling for Demos
197 | 
198 | We've previously discussed the desire for a simulation which is *visibly* slower when the physics kernel isn't well optimized. With current default settings, the rendering takes longer (~55ms) than the simulation (10ms). However, altering three of the simulation parameters provides almost complete control of the ratio of render to simulation time.
199 | 
200 | Firstly, the number of particles (`numParticles` [above](#Simulation)) has a large effect on the simulation time, as the computation scales with O(n<sup>2</sup>). By default, 12.8k particles (50 * 256) are rendered, but increasing this to 64k particles (250 * 256), the simulation time increases from 10ms to ~170ms.
201 | 
202 | Alternatively, simulation time can be arbitrarily raised or lowered by changing both timestep size (`dt` [above](#Simulation)) and simulation steps per rendered frame (`simIterationsPerFrame`, [above](#Simulation)). By default, a timestep size of 0.005 is used, and 4 simulation steps are taken per rendered frame (Note that `scripts/xvfb.sh` overrides these default values with `dt = 0.001` and `simIterationsPerFrame = 5`).
203 | 
204 | To increase the simulation time by a factor of 5, for example, simply divide `dt` by 5 and multiply `simIterationsPerFrame` by 5. This will produce *almost* identical output. Take care with *increasing* `dt` to get the opposite effect; above a certain value, the simulation will become unstable & you may see this manifest as unphysical behaviour (very fast moving stars exploding out from the centre). Instability at large `dt` can be mitigated, to an extent, by increasing `distEps` or `damping`.
205 | 
206 | A significant portion of the rendering time is the bloom filter. The [bloom](#Bloom) section has some tips about how to control this.
207 | 
208 | ## SYCL vs. CUDA performance
209 | 
210 | This repo previously reported *faster* performance from SYCL than CUDA, but this was due to an erroneous translation in the Intel® DPC++ Compatibility Tool from `__frsqrt_rn` to `sycl::rsqrt`. The former has higher precision and runs slower than the latter. This has now been rectified so that the original CUDA code calls `rsqrt`.
211 | 
212 | With this bug rectified, and without any further modification to the CUDA code or migrated SYCL code, the SYCL code used to be considerably slower because the Intel® DPC++ Compatibility Tool used to insert a cast to double in the rsqrt call:
213 | 
214 | ```
215 |          coords_t inv_dist_cube =
216 |              sycl::rsqrt((double)dist_sqr * dist_sqr * dist_sqr);
217 | 
218 | ```
219 | 
220 | This was presumably because the tool was unaware of the equivalence of `rsqrt` and `sycl::rsqrt`. However, inspecting PTX reveals that the generated instructions are the same, so the cast to double is unnecessary. Removing the cast to double leaves a 40% performance gap between CUDA and SYCL. This is no longer necessary as newer versions of the Intel® DPC++ Compatibility Tool no longer insert the cast.
221 | 
222 | The root cause of this 40% performance gap appears to be different handling of the branch instruction:
223 | 
224 | ```
225 | if (i == id) continue;
226 | ```
227 | in the main loop in simulation.dp.cpp. Whereas NVCC handles this via instruction predication, DPC++ generates branch & sync instructions. By replacing this branch instruction with an arithmetic expression:
228 | 
229 | ```
230 | force += r * inv_dist_cube * (i != id);
231 | ```
232 | in both the CUDA & SYCL code, we get comparable performance between the two using our hardware set up (RTX 3060). For 5 steps of the physical simulation (1 rendered frame) with 12,800 particles, both CUDA and SYCL take ~5.05ms (RTX 3060).
233 | 
234 | ## Update 2024
235 | 
236 | The ability to execute the nbody code without rendering simplified the process of running the code on different platforms. The results of these executions have brought to light some issues related to the runtime and compilers. As stated before, the original code was modified by substituting:
237 | 
238 | ```
239 |     // Original code
240 |     if (i == id) continue;
241 | 
242 |     force += r * inv_dist_cube;
243 | ```
244 | 
245 | with
246 | 
247 | ```
248 |     // Modified code
249 |     force += r * inv_dist_cube * (i != id);
250 | ```
251 | 
252 | in order to address the 40% decrease in SYCL performance compared to the CUDA code. With this change, the performance was almost the same for both compilers in RTX 3060.
253 | 
254 | We have found that while this is the case for the A100 (CUDA 8.48516 ms vs. SYCL 8.23865 ms), it is not the same on the RTX 2060, where CUDA is heavily penalized (CUDA 10.7281 ms vs. SYCL 8.52349 ms). Even on the A100, the change lowered the CUDA performance (7.95778 ms for the original code).
255 | 
256 | The code change also greatly improved the performance by 100% on the MAX 1100 GPU, dropping from 21.6555 ms to 10.7633 ms.
257 | Below are the best results from executing the code on the three different platforms.
258 | 
259 | ```
260 | [ext_oneapi_cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA GeForce RTX 2060 7.5 [CUDA 12.3]
261 | ==================== WORK GROUP SIZE 512 BRANCH ========================
262 | CUDA - At step 10000 kernel time is 8.48516 and mean is 8.53952 and stddev is: 0.0884324
263 |  DPC - At step 10000 kernel time is 8.23865 and mean is 8.30511 and stddev is: 0.0788344
264 | ==================== WORK GROUP SIZE 512 PREDICATED ====================
265 | CUDA - At step 10000 kernel time is 10.7281 and mean is 10.7601 and stddev is: 0.0630959
266 |  DPC - At step 10000 kernel time is 8.52349 and mean is 8.5992 and stddev is: 0.078034
267 | 
268 | [ext_oneapi_cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA A100-PCIE-40GB 8.0 [CUDA 12.2]
269 | ==================== WORK GROUP SIZE 128 BRANCH ========================
270 | CUDA - At step 10000 kernel time is 7.95778 and mean is 7.95753 and stddev is: 0.000680384
271 |  DPC - At step 10000 kernel time is 10.051 and mean is 10.0506 and stddev is: 0.00181166
272 | ==================== WORK GROUP SIZE 128 PREDICATED ====================
273 | CUDA - At step 10000 kernel time is 8.60294 and mean is 8.60151 and stddev is: 0.00077172
274 |  DPC - At step 10000 kernel time is 7.99054 and mean is 7.99109 and stddev is: 0.0041852
275 | 
276 | [ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Data Center GPU Max 1100 1.3 [1.3.26516]
277 | ==================== WORK GROUP SIZE 32 BRANCH ========================
278 | At step 10000 kernel time is 21.5747 and mean is 21.6555 and stddev is: 0.0734683
279 | ==================== WORK GROUP SIZE 32 PREDICATED ====================
280 | At step 10000 kernel time is 10.6649 and mean is 10.7633 and stddev is: 0.0507969
281 | ```
282 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 | 
3 | ## Reporting a Vulnerability
4 | 
5 | To report a vulnerability or a security issue please fill the security
6 | advisories form [here](../../security/advisories/new), send an email to
7 | security@codeplay.com or contact us using the [contact form on our web
8 | page](https://codeplay.com/company/contact/?q=Report%20Security%20Issue).
9 | 


--------------------------------------------------------------------------------
/cmake/FindGLEW.cmake:
--------------------------------------------------------------------------------
  1 | #
  2 | 
  3 | # Try to find GLEW library and include path.
  4 | # Once done this will define
  5 | #
  6 | # GLEW_FOUND
  7 | # GLEW_INCLUDE_DIR
  8 | # GLEW_LIBRARY
  9 | # GLEW_SOURCE
 10 | #
 11 | 
 12 | include(FindPackageHandleStandardArgs)
 13 | 
 14 | if (WIN32)
 15 |     find_path( GLEW_INCLUDE_DIR
 16 |         NAMES
 17 |             GL/glew.h
 18 |         PATHS
 19 |             ${GLEW_LOCATION}/include
 20 |             $ENV{GLEW_LOCATION}/include
 21 |             $ENV{PROGRAMFILES}/GLEW/include
 22 |             ${PROJECT_SOURCE_DIR}/extern/glew/include
 23 |             ${GLEW_LOCATION}
 24 |             $ENV{GLEW_LOCATION}
 25 |             DOC "The directory where GL/glew.h resides" )
 26 |     find_file( GLEW_SOURCE
 27 |         NAMES
 28 |             glew.c
 29 |         PATHS
 30 |             ${GLEW_LOCATION}/src
 31 |             $ENV{GLEW_LOCATION}/src
 32 |             $ENV{PROGRAMFILES}/GLEW/src
 33 |             ${PROJECT_SOURCE_DIR}/extern/glew/src
 34 |             ${GLEW_LOCATION}
 35 |             $ENV{GLEW_LOCATION}
 36 |             DOC "The directory where GL/glew.c resides" )
 37 |     if(ARCH STREQUAL "x86")
 38 |       find_library( GLEW_LIBRARY
 39 |           NAMES
 40 |               glew GLEW glew32s glew32
 41 |           PATHS
 42 |               ${GLEW_LOCATION}/lib
 43 |               ${GLEW_LOCATION}/lib/x86
 44 |               ${GLEW_LOCATION}/lib/win32
 45 |               ${GLEW_LOCATION}/lib/Release/win32
 46 |               ${GLEW_LOCATION}/lib/Release MX/win32
 47 |               $ENV{GLEW_LOCATION}/lib
 48 |               $ENV{GLEW_LOCATION}/lib/Release/win32
 49 |               $ENV{GLEW_LOCATION}/lib/Release MX/win32
 50 |               $ENV{GLEW_LOCATION}/lib/x86
 51 |               $ENV{GLEW_LOCATION}/lib/win32
 52 |               $ENV{PROGRAMFILES}/GLEW/lib
 53 |               $ENV{PROGRAMFILES}/GLEW/lib/x86
 54 |               $ENV{PROGRAMFILES}/GLEW/lib/win32
 55 |               ${PROJECT_SOURCE_DIR}/extern/glew/bin
 56 |               ${PROJECT_SOURCE_DIR}/extern/glew/lib
 57 |               ${PROJECT_SOURCE_DIR}/extern/glew/lib/x86
 58 |               ${PROJECT_SOURCE_DIR}/extern/glew/lib/win32
 59 |               ${GLEW_LOCATION}
 60 |               $ENV{GLEW_LOCATION}
 61 |               DOC "The GLEW library")
 62 |     else()
 63 |       find_library( GLEW_LIBRARY
 64 |           NAMES
 65 |               glew GLEW glew32s glew32
 66 |           PATHS
 67 |               ${GLEW_LOCATION}/lib/x64
 68 |               ${GLEW_LOCATION}/lib/Release/x64
 69 |               ${GLEW_LOCATION}/lib/Release MX/x64
 70 |               $ENV{GLEW_LOCATION}/lib/x64
 71 |               $ENV{GLEW_LOCATION}/lib/Release/x64
 72 |               $ENV{GLEW_LOCATION}/lib/Release MX/x64
 73 |               $ENV{PROGRAMFILES}/GLEW/lib/x64
 74 |               ${PROJECT_SOURCE_DIR}/extern/glew/bin
 75 |               ${PROJECT_SOURCE_DIR}/extern/glew/lib/x64
 76 |               ${GLEW_LOCATION}/lib
 77 |               $ENV{GLEW_LOCATION}/lib
 78 |               $ENV{PROGRAMFILES}/GLEW/lib
 79 |               ${PROJECT_SOURCE_DIR}/extern/glew/lib
 80 |               ${GLEW_LOCATION}
 81 |               $ENV{GLEW_LOCATION}
 82 |               DOC "The GLEW library")
 83 |     endif()
 84 | endif ()
 85 | 
 86 | if (${CMAKE_HOST_UNIX})
 87 |     find_path( GLEW_INCLUDE_DIR
 88 |         NAMES
 89 |             GL/glew.h
 90 |         PATHS
 91 |             ${GLEW_LOCATION}/include
 92 |             $ENV{GLEW_LOCATION}/include
 93 |             /usr/include
 94 |             /usr/local/include
 95 |             /sw/include
 96 |             /opt/local/include
 97 |             NO_DEFAULT_PATH
 98 |             DOC "The directory where GL/glew.h resides"
 99 |     )
100 |     find_library( GLEW_LIBRARY
101 |         NAMES
102 |             GLEW glew
103 |         PATHS
104 |             ${GLEW_LOCATION}/lib
105 |             $ENV{GLEW_LOCATION}/lib
106 |             /usr/lib64
107 |             /usr/lib
108 |             /usr/local/lib64
109 |             /usr/local/lib
110 |             /sw/lib
111 |             /opt/local/lib
112 |             NO_DEFAULT_PATH
113 |             DOC "The GLEW library")
114 | endif ()
115 | 
116 | if (GLEW_INCLUDE_DIR AND EXISTS "${GLEW_INCLUDE_DIR}/GL/glew.h")
117 | 
118 |    file(STRINGS "${GLEW_INCLUDE_DIR}/GL/glew.h" GLEW_4_2 REGEX "^#define GL_VERSION_4_2.*$")
119 |    if (GLEW_4_2)
120 |        SET(OPENGL_4_2_FOUND TRUE)
121 |    else ()
122 |        message(WARNING
123 |        "glew-1.7.0 or newer needed for supporting OpenGL 4.2 dependent features"
124 |        )
125 |    endif ()
126 | 
127 |    file(STRINGS "${GLEW_INCLUDE_DIR}/GL/glew.h" GLEW_4_3 REGEX "^#define GL_VERSION_4_3.*$")
128 |    if (GLEW_4_3)
129 |        SET(OPENGL_4_3_FOUND TRUE)
130 |    else ()
131 |        message(WARNING
132 |        "glew-1.9.0 or newer needed for supporting OpenGL 4.3 dependent features"
133 |        )
134 |    endif ()
135 | 
136 | endif ()
137 | 
138 | if(GLEW_SOURCE)
139 | find_package_handle_standard_args(GLEW DEFAULT_MSG
140 |     GLEW_INCLUDE_DIR
141 |     GLEW_SOURCE
142 | )
143 | else()
144 | find_package_handle_standard_args(GLEW DEFAULT_MSG
145 |     GLEW_INCLUDE_DIR
146 |     GLEW_LIBRARY
147 | )
148 | endif()
149 | 
150 | mark_as_advanced( GLEW_FOUND )


--------------------------------------------------------------------------------
/cmake/FindGLFW.cmake:
--------------------------------------------------------------------------------
  1 | #
  2 | #   Copyright 2013 Pixar
  3 | #
  4 | #   Licensed under the Apache License, Version 2.0 (the "Apache License")
  5 | #   with the following modification; you may not use this file except in
  6 | #   compliance with the Apache License and the following modification to it:
  7 | #   Section 6. Trademarks. is deleted and replaced with:
  8 | #
  9 | #   6. Trademarks. This License does not grant permission to use the trade
 10 | #      names, trademarks, service marks, or product names of the Licensor
 11 | #      and its affiliates, except as required to comply with Section 4(c) of
 12 | #      the License and to reproduce the content of the NOTICE file.
 13 | #
 14 | #   You may obtain a copy of the Apache License at
 15 | #
 16 | #       http://www.apache.org/licenses/LICENSE-2.0
 17 | #
 18 | #   Unless required by applicable law or agreed to in writing, software
 19 | #   distributed under the Apache License with the above modification is
 20 | #   distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 21 | #   KIND, either express or implied. See the Apache License for the specific
 22 | #   language governing permissions and limitations under the Apache License.
 23 | #
 24 | 
 25 | # Try to find GLFW library and include path.
 26 | # Once done this will define
 27 | #
 28 | # GLFW_FOUND
 29 | # GLFW_INCLUDE_DIR
 30 | # GLFW_LIBRARIES
 31 | #
 32 | 
 33 | find_path( GLFW_INCLUDE_DIR 
 34 |     NAMES
 35 |         GLFW/glfw3.h
 36 |     HINTS
 37 |         "${GLFW_LOCATION}/include"
 38 |         "$ENV{GLFW_LOCATION}/include"
 39 |     PATHS
 40 |         "$ENV{PROGRAMFILES}/GLFW/include"
 41 |         "${OPENGL_INCLUDE_DIR}"
 42 |         /usr/openwin/share/include
 43 |         /usr/openwin/include
 44 |         /usr/X11R6/include
 45 |         /usr/include/X11
 46 |         /opt/graphics/OpenGL/include
 47 |         /opt/graphics/OpenGL/contrib/libglfw
 48 |         /usr/local/include
 49 |         /usr/include/GL
 50 |         /usr/include
 51 |     DOC 
 52 |         "The directory where GLFW/glfw3.h resides"
 53 | )
 54 | 
 55 | #
 56 | # XXX: Do we still need to search for GL/glfw.h?
 57 | #
 58 | find_path( GLFW_INCLUDE_DIR 
 59 |     NAMES
 60 |         GL/glfw.h
 61 |     HINTS
 62 |         "${GLFW_LOCATION}/include"
 63 |         "$ENV{GLFW_LOCATION}/include"
 64 |     PATHS
 65 |         "$ENV{PROGRAMFILES}/GLFW/include"
 66 |         "${OPENGL_INCLUDE_DIR}"
 67 |         /usr/openwin/share/include
 68 |         /usr/openwin/include
 69 |         /usr/X11R6/include
 70 |         /usr/include/X11
 71 |         /opt/graphics/OpenGL/include
 72 |         /opt/graphics/OpenGL/contrib/libglfw
 73 |         /usr/local/include
 74 |         /usr/include/GL
 75 |         /usr/include
 76 |     DOC 
 77 |         "The directory where GL/glfw.h resides"
 78 | )
 79 | 
 80 | if (WIN32)
 81 |     if(CYGWIN)
 82 |         find_library( GLFW_glfw_LIBRARY 
 83 |             NAMES
 84 |                 glfw32
 85 |             HINTS
 86 |                 "${GLFW_LOCATION}/lib"
 87 |                 "${GLFW_LOCATION}/lib/x64"
 88 |                 "$ENV{GLFW_LOCATION}/lib"
 89 |             PATHS
 90 |                 "${OPENGL_LIBRARY_DIR}"
 91 |                 /usr/lib
 92 |                 /usr/lib/w32api
 93 |                 /usr/local/lib
 94 |                 /usr/X11R6/lib
 95 |             DOC 
 96 |                 "The GLFW library"
 97 |         )
 98 |     else()
 99 |         find_library( GLFW_glfw_LIBRARY
100 |             NAMES 
101 |                 glfw32 
102 |                 glfw32s 
103 |                 glfw
104 |                 glfw3
105 |             HINTS
106 |                 "${GLFW_LOCATION}/lib"
107 |                 "${GLFW_LOCATION}/lib/x64"
108 |                 "${GLFW_LOCATION}/lib-msvc110"
109 |                 "${GLFW_LOCATION}/lib-vc2012"
110 |                 "$ENV{GLFW_LOCATION}/lib"
111 |                 "$ENV{GLFW_LOCATION}/lib/x64"
112 |                 "$ENV{GLFW_LOCATION}/lib-msvc110"
113 |                 "$ENV{GLFW_LOCATION}/lib-vc2012"
114 |             PATHS
115 |                 "$ENV{PROGRAMFILES}/GLFW/lib"
116 |                 "${OPENGL_LIBRARY_DIR}"
117 |             DOC 
118 |                 "The GLFW library"
119 |         )
120 |     endif()
121 | else ()
122 |     if (APPLE)
123 |         find_library( GLFW_glfw_LIBRARY glfw
124 |             NAMES 
125 |                 glfw
126 |                 glfw3
127 |             HINTS
128 |                 "${GLFW_LOCATION}/lib"
129 |                 "${GLFW_LOCATION}/lib/cocoa"
130 |                 "$ENV{GLFW_LOCATION}/lib"
131 |                 "$ENV{GLFW_LOCATION}/lib/cocoa"
132 |             PATHS
133 |                 /usr/local/lib
134 |         )
135 |         set(GLFW_cocoa_LIBRARY "-framework Cocoa" CACHE STRING "Cocoa framework for OSX")
136 |         set(GLFW_corevideo_LIBRARY "-framework CoreVideo" CACHE STRING "CoreVideo framework for OSX")
137 |         set(GLFW_iokit_LIBRARY "-framework IOKit" CACHE STRING "IOKit framework for OSX")
138 |     else ()
139 |         # (*)NIX
140 |         
141 |         find_package(Threads REQUIRED)
142 | 
143 |         find_package(X11 REQUIRED)
144 |         
145 |         if(NOT X11_Xrandr_FOUND)
146 |             message(FATAL_ERROR "Xrandr library not found - required for GLFW")
147 |         endif()
148 | 
149 |         if(NOT X11_xf86vmode_FOUND)
150 |             message(FATAL_ERROR "xf86vmode library not found - required for GLFW")
151 |         endif()
152 | 
153 |         if(NOT X11_Xcursor_FOUND)
154 |             message(FATAL_ERROR "Xcursor library not found - required for GLFW")
155 |         endif()
156 | 
157 |         if(NOT X11_Xinerama_FOUND)
158 |             message(FATAL_ERROR "Xinerama library not found - required for GLFW")
159 |         endif()
160 | 
161 |         list(APPEND GLFW_x11_LIBRARY "${X11_Xrandr_LIB}" "${X11_Xxf86vm_LIB}" "${X11_Xcursor_LIB}" "${X11_Xinerama_LIB}" "${CMAKE_THREAD_LIBS_INIT}" -lrt -lXi)
162 | 
163 |         find_library( GLFW_glfw_LIBRARY
164 |             NAMES 
165 |                 glfw
166 |                 glfw3
167 |             HINTS
168 |                 "${GLFW_LOCATION}/lib"
169 |                 "$ENV{GLFW_LOCATION}/lib"
170 |                 "${GLFW_LOCATION}/lib/x11"
171 |                 "$ENV{GLFW_LOCATION}/lib/x11"
172 |             PATHS
173 |                 /usr/lib64
174 |                 /usr/lib
175 |                 /usr/lib/${CMAKE_LIBRARY_ARCHITECTURE}
176 |                 /usr/local/lib64
177 |                 /usr/local/lib
178 |                 /usr/local/lib/${CMAKE_LIBRARY_ARCHITECTURE}
179 |                 /usr/openwin/lib
180 |                 /usr/X11R6/lib
181 |             DOC 
182 |                 "The GLFW library"
183 |         )
184 |     endif (APPLE)
185 | endif (WIN32)
186 | 
187 | set( GLFW_FOUND "NO" )
188 | 
189 | if(GLFW_INCLUDE_DIR)
190 | 
191 |     if(GLFW_glfw_LIBRARY)
192 |         set( GLFW_LIBRARIES "${GLFW_glfw_LIBRARY}"
193 |                             "${GLFW_x11_LIBRARY}"
194 |                             "${GLFW_cocoa_LIBRARY}"
195 |                             "${GLFW_iokit_LIBRARY}"
196 |                             "${GLFW_corevideo_LIBRARY}" )
197 |         set( GLFW_FOUND "YES" )
198 |         set (GLFW_LIBRARY "${GLFW_LIBRARIES}")
199 |         set (GLFW_INCLUDE_PATH "${GLFW_INCLUDE_DIR}")
200 |     endif(GLFW_glfw_LIBRARY)
201 | 
202 | 
203 |     # Tease the GLFW_VERSION numbers from the lib headers
204 |     function(parseVersion FILENAME VARNAME)
205 |             
206 |         set(PATTERN "^#define ${VARNAME}.*$")
207 |         
208 |         file(STRINGS "${GLFW_INCLUDE_DIR}/${FILENAME}" TMP REGEX ${PATTERN})
209 |         
210 |         string(REGEX MATCHALL "[0-9]+" TMP ${TMP})
211 |         
212 |         set(${VARNAME} ${TMP} PARENT_SCOPE)
213 |         
214 |     endfunction()
215 | 
216 | 
217 |     if(EXISTS "${GLFW_INCLUDE_DIR}/GL/glfw.h")
218 | 
219 |         parseVersion(GL/glfw.h GLFW_VERSION_MAJOR)
220 |         parseVersion(GL/glfw.h GLFW_VERSION_MINOR)
221 |         parseVersion(GL/glfw.h GLFW_VERSION_REVISION)
222 | 
223 |     elseif(EXISTS "${GLFW_INCLUDE_DIR}/GLFW/glfw3.h")
224 | 
225 |         parseVersion(GLFW/glfw3.h GLFW_VERSION_MAJOR)
226 |         parseVersion(GLFW/glfw3.h GLFW_VERSION_MINOR)
227 |         parseVersion(GLFW/glfw3.h GLFW_VERSION_REVISION)
228 |  
229 |     endif()
230 | 
231 |     if(${GLFW_VERSION_MAJOR} OR ${GLFW_VERSION_MINOR} OR ${GLFW_VERSION_REVISION})
232 |         set(GLFW_VERSION "${GLFW_VERSION_MAJOR}.${GLFW_VERSION_MINOR}.${GLFW_VERSION_REVISION}")
233 |         set(GLFW_VERSION_STRING "${GLFW_VERSION}")
234 |         mark_as_advanced(GLFW_VERSION)
235 |     endif()
236 |     
237 | endif(GLFW_INCLUDE_DIR)
238 | 
239 | include(FindPackageHandleStandardArgs)
240 | 
241 | find_package_handle_standard_args(GLFW 
242 |     REQUIRED_VARS
243 |         GLFW_INCLUDE_DIR
244 |         GLFW_LIBRARIES
245 |     VERSION_VAR
246 |         GLFW_VERSION
247 | )
248 | 
249 | mark_as_advanced(
250 |   GLFW_INCLUDE_DIR
251 |   GLFW_LIBRARIES
252 |   GLFW_glfw_LIBRARY
253 |   GLFW_cocoa_LIBRARY
254 | )
255 | 
256 | 


--------------------------------------------------------------------------------
/cmake/FindGLM.cmake:
--------------------------------------------------------------------------------
 1 | #
 2 | # Find GLM
 3 | #
 4 | # Try to find GLM : OpenGL Mathematics.
 5 | # This module defines 
 6 | # - GLM_INCLUDE_DIRS
 7 | # - GLM_FOUND
 8 | #
 9 | # The following variables can be set as arguments for the module.
10 | # - GLM_ROOT_DIR : Root library directory of GLM 
11 | #
12 | # References:
13 | # - https://github.com/Groovounet/glm/blob/master/util/FindGLM.cmake
14 | # - https://bitbucket.org/alfonse/gltut/src/28636298c1c0/glm-0.9.0.7/FindGLM.cmake
15 | #
16 | 
17 | # Additional modules
18 | include(FindPackageHandleStandardArgs)
19 | 
20 | if (WIN32)
21 | 	# Find include files
22 | 	find_path(
23 | 		GLM_INCLUDE_DIR
24 | 		NAMES glm/glm.hpp
25 | 		PATHS
26 | 		$ENV{PROGRAMFILES}/include
27 | 		${GLM_ROOT_DIR}/include
28 | 		DOC "The directory where glm/glm.hpp resides")
29 | else()
30 | 	# Find include files
31 | 	find_path(
32 | 		GLM_INCLUDE_DIR
33 | 		NAMES glm/glm.hpp
34 | 		PATHS
35 | 		/usr/include
36 | 		/usr/local/include
37 | 		/sw/include
38 | 		/opt/local/include
39 | 		${GLM_ROOT_DIR}/include
40 | 		DOC "The directory where glm/glm.hpp resides")
41 | endif()
42 | 
43 | # Handle REQUIRD argument, define *_FOUND variable
44 | find_package_handle_standard_args(GLM DEFAULT_MSG GLM_INCLUDE_DIR)
45 | 
46 | # Define GLM_INCLUDE_DIRS
47 | if (GLM_FOUND)
48 | 	set(GLM_INCLUDE_DIRS ${GLM_INCLUDE_DIR})
49 | endif()
50 | 
51 | # Hide some variables
52 | mark_as_advanced(GLM_INCLUDE_DIR)
53 | 


--------------------------------------------------------------------------------
/cmake/FindVulkan.cmake:
--------------------------------------------------------------------------------
 1 | # Find Vulkan
 2 | #
 3 | # VULKAN_INCLUDE_DIR
 4 | # VULKAN_LIBRARY
 5 | # VULKAN_FOUND
 6 | 
 7 | if (WIN32)
 8 |     find_path(VULKAN_INCLUDE_DIR NAMES vulkan/vulkan.h HINTS
 9 |         "$ENV{VULKAN_SDK}/Include"
10 |         "$ENV{VK_SDK_PATH}/Include")
11 |     if (CMAKE_CL_64)
12 |         find_library(VULKAN_LIBRARY NAMES vulkan-1 HINTS
13 |             "$ENV{VULKAN_SDK}/Bin"
14 |             "$ENV{VK_SDK_PATH}/Bin")
15 |         find_library(VULKAN_STATIC_LIBRARY NAMES vkstatic.1 HINTS
16 |             "$ENV{VULKAN_SDK}/Bin"
17 |             "$ENV{VK_SDK_PATH}/Bin")
18 |     else()
19 |         find_library(VULKAN_LIBRARY NAMES vulkan-1 HINTS
20 |             "$ENV{VULKAN_SDK}/Bin32"
21 |             "$ENV{VK_SDK_PATH}/Bin32")
22 |     endif()
23 | else()
24 |     find_path(VULKAN_INCLUDE_DIR NAMES vulkan/vulkan.h HINTS
25 |         "$ENV{VULKAN_SDK}/include")
26 |     find_library(VULKAN_LIBRARY NAMES vulkan HINTS
27 |         "$ENV{VULKAN_SDK}/lib")
28 | endif()
29 | 
30 | include(FindPackageHandleStandardArgs)
31 | find_package_handle_standard_args(Vulkan DEFAULT_MSG VULKAN_LIBRARY VULKAN_INCLUDE_DIR)
32 | 
33 | mark_as_advanced(VULKAN_INCLUDE_DIR VULKAN_LIBRARY VULKAN_STATIC_LIBRARY)
34 | 


--------------------------------------------------------------------------------
/cmake/Finddpct.cmake:
--------------------------------------------------------------------------------
 1 | if (WIN32)
 2 |     find_path( dpct_INCLUDE_DIR
 3 |         NAMES
 4 |             dpct/dpct.hpp
 5 |         PATHS
 6 |             ${dpct_LOCATION}/include
 7 |             $ENV{dpct_LOCATION}/include
 8 |             $ENV{DPCT_BUNDLE_ROOT}/include
 9 |             $ENV{ONEAPI_ROOT}/dpcpp-ct/latest/include
10 |             $ENV{PROGRAMFILES}/include
11 |             NO_DEFAULT_PATH
12 |             DOC "The directory where dpct/dpct.hpp resides"
13 |     )
14 | else()
15 |     find_path( dpct_INCLUDE_DIR
16 |         NAMES
17 |             dpct/dpct.hpp
18 |         PATHS
19 |             ${dpct_LOCATION}/include
20 |             $ENV{dpct_LOCATION}/include
21 |             $ENV{DPCT_BUNDLE_ROOT}/include
22 |             $ENV{ONEAPI_ROOT}/dpcpp-ct/latest/include
23 |             /opt/intel/oneapi/dpcpp-ct/latest/include
24 |             /usr/include
25 |             /usr/local/include
26 |             /sw/include
27 |             /opt/local/include
28 |             NO_DEFAULT_PATH
29 |             DOC "The directory where dpct/dpct.hpp resides"
30 |     )
31 | endif ()
32 | 
33 | include(FindPackageHandleStandardArgs)
34 | find_package_handle_standard_args( dpct REQUIRED_VARS dpct_INCLUDE_DIR )
35 | 
36 | 


--------------------------------------------------------------------------------
/docs/Eq1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeplaysoftware/cuda-to-sycl-nbody/c8f013a7b554faeabc6c39107b742916c66b7cd4/docs/Eq1.png


--------------------------------------------------------------------------------
/docs/downscale_artefact.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeplaysoftware/cuda-to-sycl-nbody/c8f013a7b554faeabc6c39107b742916c66b7cd4/docs/downscale_artefact.png


--------------------------------------------------------------------------------
/libs/imgui/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Codeplay Software Limited
 2 | 
 3 | add_library(imgui SHARED
 4 |     src/imgui.cpp
 5 |     src/imgui_widgets.cpp
 6 |     src/imgui_demo.cpp
 7 |     src/imgui_draw.cpp
 8 |     src/imgui_tables.cpp
 9 |     src/imgui_impl_opengl3.cpp
10 |     src/imgui_impl_glfw.cpp)
11 | 
12 | target_link_libraries(imgui PRIVATE dl)
13 | 
14 | target_include_directories(imgui PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include)
15 | 
16 | target_include_directories(${BINARY_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include)
17 | target_include_directories(${BINARY_NAME}_d PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include)
18 | 
19 | # Link main project to imgui lib
20 | target_link_libraries(${BINARY_NAME} PRIVATE imgui)
21 | target_link_libraries(${BINARY_NAME}_d PRIVATE imgui)
22 | 


--------------------------------------------------------------------------------
/libs/imgui/include/imconfig.h:
--------------------------------------------------------------------------------
  1 | //-----------------------------------------------------------------------------
  2 | // COMPILE-TIME OPTIONS FOR DEAR IMGUI
  3 | // Runtime options (clipboard callbacks, enabling various features, etc.) can generally be set via the ImGuiIO structure.
  4 | // You can use ImGui::SetAllocatorFunctions() before calling ImGui::CreateContext() to rewire memory allocation functions.
  5 | //-----------------------------------------------------------------------------
  6 | // A) You may edit imconfig.h (and not overwrite it when updating Dear ImGui, or maintain a patch/rebased branch with your modifications to it)
  7 | // B) or '#define IMGUI_USER_CONFIG "my_imgui_config.h"' in your project and then add directives in your own file without touching this template.
  8 | //-----------------------------------------------------------------------------
  9 | // You need to make sure that configuration settings are defined consistently _everywhere_ Dear ImGui is used, which include the imgui*.cpp
 10 | // files but also _any_ of your code that uses Dear ImGui. This is because some compile-time options have an affect on data structures.
 11 | // Defining those options in imconfig.h will ensure every compilation unit gets to see the same data structure layouts.
 12 | // Call IMGUI_CHECKVERSION() from your .cpp files to verify that the data structures your files are using are matching the ones imgui.cpp is using.
 13 | //-----------------------------------------------------------------------------
 14 | 
 15 | #pragma once
 16 | 
 17 | //---- Define assertion handler. Defaults to calling assert().
 18 | // If your macro uses multiple statements, make sure is enclosed in a 'do { .. } while (0)' block so it can be used as a single statement.
 19 | //#define IM_ASSERT(_EXPR)  MyAssert(_EXPR)
 20 | //#define IM_ASSERT(_EXPR)  ((void)(_EXPR))     // Disable asserts
 21 | 
 22 | //---- Define attributes of all API symbols declarations, e.g. for DLL under Windows
 23 | // Using Dear ImGui via a shared library is not recommended, because of function call overhead and because we don't guarantee backward nor forward ABI compatibility.
 24 | // DLL users: heaps and globals are not shared across DLL boundaries! You will need to call SetCurrentContext() + SetAllocatorFunctions()
 25 | // for each static/DLL boundary you are calling from. Read "Context and Memory Allocators" section of imgui.cpp for more details.
 26 | //#define IMGUI_API __declspec( dllexport )
 27 | //#define IMGUI_API __declspec( dllimport )
 28 | 
 29 | //---- Don't define obsolete functions/enums/behaviors. Consider enabling from time to time after updating to avoid using soon-to-be obsolete function/names.
 30 | //#define IMGUI_DISABLE_OBSOLETE_FUNCTIONS
 31 | //#define IMGUI_DISABLE_OBSOLETE_KEYIO                      // 1.87: disable legacy io.KeyMap[]+io.KeysDown[] in favor io.AddKeyEvent(). This will be folded into IMGUI_DISABLE_OBSOLETE_FUNCTIONS in a few versions.
 32 | 
 33 | //---- Disable all of Dear ImGui or don't implement standard windows.
 34 | // It is very strongly recommended to NOT disable the demo windows during development. Please read comments in imgui_demo.cpp.
 35 | //#define IMGUI_DISABLE                                     // Disable everything: all headers and source files will be empty.
 36 | //#define IMGUI_DISABLE_DEMO_WINDOWS                        // Disable demo windows: ShowDemoWindow()/ShowStyleEditor() will be empty. Not recommended.
 37 | //#define IMGUI_DISABLE_METRICS_WINDOW                      // Disable metrics/debugger and other debug tools: ShowMetricsWindow() and ShowStackToolWindow() will be empty.
 38 | 
 39 | //---- Don't implement some functions to reduce linkage requirements.
 40 | //#define IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS   // [Win32] Don't implement default clipboard handler. Won't use and link with OpenClipboard/GetClipboardData/CloseClipboard etc. (user32.lib/.a, kernel32.lib/.a)
 41 | //#define IMGUI_ENABLE_WIN32_DEFAULT_IME_FUNCTIONS          // [Win32] [Default with Visual Studio] Implement default IME handler (require imm32.lib/.a, auto-link for Visual Studio, -limm32 on command-line for MinGW)
 42 | //#define IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS         // [Win32] [Default with non-Visual Studio compilers] Don't implement default IME handler (won't require imm32.lib/.a)
 43 | //#define IMGUI_DISABLE_WIN32_FUNCTIONS                     // [Win32] Won't use and link with any Win32 function (clipboard, ime).
 44 | //#define IMGUI_ENABLE_OSX_DEFAULT_CLIPBOARD_FUNCTIONS      // [OSX] Implement default OSX clipboard handler (need to link with '-framework ApplicationServices', this is why this is not the default).
 45 | //#define IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS            // Don't implement ImFormatString/ImFormatStringV so you can implement them yourself (e.g. if you don't want to link with vsnprintf)
 46 | //#define IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS              // Don't implement ImFabs/ImSqrt/ImPow/ImFmod/ImCos/ImSin/ImAcos/ImAtan2 so you can implement them yourself.
 47 | //#define IMGUI_DISABLE_FILE_FUNCTIONS                      // Don't implement ImFileOpen/ImFileClose/ImFileRead/ImFileWrite and ImFileHandle at all (replace them with dummies)
 48 | //#define IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS              // Don't implement ImFileOpen/ImFileClose/ImFileRead/ImFileWrite and ImFileHandle so you can implement them yourself if you don't want to link with fopen/fclose/fread/fwrite. This will also disable the LogToTTY() function.
 49 | //#define IMGUI_DISABLE_DEFAULT_ALLOCATORS                  // Don't implement default allocators calling malloc()/free() to avoid linking with them. You will need to call ImGui::SetAllocatorFunctions().
 50 | //#define IMGUI_DISABLE_SSE                                 // Disable use of SSE intrinsics even if available
 51 | 
 52 | //---- Include imgui_user.h at the end of imgui.h as a convenience
 53 | //#define IMGUI_INCLUDE_IMGUI_USER_H
 54 | 
 55 | //---- Pack colors to BGRA8 instead of RGBA8 (to avoid converting from one to another)
 56 | //#define IMGUI_USE_BGRA_PACKED_COLOR
 57 | 
 58 | //---- Use 32-bit for ImWchar (default is 16-bit) to support unicode planes 1-16. (e.g. point beyond 0xFFFF like emoticons, dingbats, symbols, shapes, ancient languages, etc...)
 59 | //#define IMGUI_USE_WCHAR32
 60 | 
 61 | //---- Avoid multiple STB libraries implementations, or redefine path/filenames to prioritize another version
 62 | // By default the embedded implementations are declared static and not available outside of Dear ImGui sources files.
 63 | //#define IMGUI_STB_TRUETYPE_FILENAME   "my_folder/stb_truetype.h"
 64 | //#define IMGUI_STB_RECT_PACK_FILENAME  "my_folder/stb_rect_pack.h"
 65 | //#define IMGUI_DISABLE_STB_TRUETYPE_IMPLEMENTATION
 66 | //#define IMGUI_DISABLE_STB_RECT_PACK_IMPLEMENTATION
 67 | 
 68 | //---- Use stb_printf's faster implementation of vsnprintf instead of the one from libc (unless IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS is defined)
 69 | // Requires 'stb_sprintf.h' to be available in the include path. Compatibility checks of arguments and formats done by clang and GCC will be disabled in order to support the extra formats provided by STB sprintf.
 70 | // #define IMGUI_USE_STB_SPRINTF
 71 | 
 72 | //---- Use FreeType to build and rasterize the font atlas (instead of stb_truetype which is embedded by default in Dear ImGui)
 73 | // Requires FreeType headers to be available in the include path. Requires program to be compiled with 'misc/freetype/imgui_freetype.cpp' (in this repository) + the FreeType library (not provided).
 74 | // On Windows you may use vcpkg with 'vcpkg install freetype --triplet=x64-windows' + 'vcpkg integrate install'.
 75 | //#define IMGUI_ENABLE_FREETYPE
 76 | 
 77 | //---- Use stb_truetype to build and rasterize the font atlas (default)
 78 | // The only purpose of this define is if you want force compilation of the stb_truetype backend ALONG with the FreeType backend.
 79 | //#define IMGUI_ENABLE_STB_TRUETYPE
 80 | 
 81 | //---- Define constructor and implicit cast operators to convert back<>forth between your math types and ImVec2/ImVec4.
 82 | // This will be inlined as part of ImVec2 and ImVec4 class declarations.
 83 | /*
 84 | #define IM_VEC2_CLASS_EXTRA                                                 \
 85 |         ImVec2(const MyVec2& f) { x = f.x; y = f.y; }                       \
 86 |         operator MyVec2() const { return MyVec2(x,y); }
 87 | 
 88 | #define IM_VEC4_CLASS_EXTRA                                                 \
 89 |         ImVec4(const MyVec4& f) { x = f.x; y = f.y; z = f.z; w = f.w; }     \
 90 |         operator MyVec4() const { return MyVec4(x,y,z,w); }
 91 | */
 92 | 
 93 | //---- Use 32-bit vertex indices (default is 16-bit) is one way to allow large meshes with more than 64K vertices.
 94 | // Your renderer backend will need to support it (most example renderer backends support both 16/32-bit indices).
 95 | // Another way to allow large meshes while keeping 16-bit indices is to handle ImDrawCmd::VtxOffset in your renderer.
 96 | // Read about ImGuiBackendFlags_RendererHasVtxOffset for details.
 97 | //#define ImDrawIdx unsigned int
 98 | 
 99 | //---- Override ImDrawCallback signature (will need to modify renderer backends accordingly)
100 | //struct ImDrawList;
101 | //struct ImDrawCmd;
102 | //typedef void (*MyImDrawCallback)(const ImDrawList* draw_list, const ImDrawCmd* cmd, void* my_renderer_user_data);
103 | //#define ImDrawCallback MyImDrawCallback
104 | 
105 | //---- Debug Tools: Macro to break in Debugger
106 | // (use 'Metrics->Tools->Item Picker' to pick widgets with the mouse and break into them for easy debugging.)
107 | //#define IM_DEBUG_BREAK  IM_ASSERT(0)
108 | //#define IM_DEBUG_BREAK  __debugbreak()
109 | 
110 | //---- Debug Tools: Have the Item Picker break in the ItemAdd() function instead of ItemHoverable(),
111 | // (which comes earlier in the code, will catch a few extra items, allow picking items other than Hovered one.)
112 | // This adds a small runtime cost which is why it is not enabled by default.
113 | //#define IMGUI_DEBUG_TOOL_ITEM_PICKER_EX
114 | 
115 | //---- Debug Tools: Enable slower asserts
116 | //#define IMGUI_DEBUG_PARANOID
117 | 
118 | //---- Tip: You can add extra functions within the ImGui:: namespace, here or in your own headers files.
119 | /*
120 | namespace ImGui
121 | {
122 |     void MyFunction(const char* name, const MyMatrix44& v);
123 | }
124 | */
125 | 


--------------------------------------------------------------------------------
/libs/imgui/include/imgui_impl_glfw.h:
--------------------------------------------------------------------------------
 1 | // dear imgui: Platform Backend for GLFW
 2 | // This needs to be used along with a Renderer (e.g. OpenGL3, Vulkan, WebGPU..)
 3 | // (Info: GLFW is a cross-platform general purpose library for handling windows, inputs, OpenGL/Vulkan graphics context creation, etc.)
 4 | 
 5 | // Implemented features:
 6 | //  [X] Platform: Clipboard support.
 7 | //  [X] Platform: Keyboard support. Since 1.87 we are using the io.AddKeyEvent() function. Pass ImGuiKey values to all key functions e.g. ImGui::IsKeyPressed(ImGuiKey_Space). [Legacy GLFW_KEY_* values will also be supported unless IMGUI_DISABLE_OBSOLETE_KEYIO is set]
 8 | //  [X] Platform: Gamepad support. Enable with 'io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad'.
 9 | //  [X] Platform: Mouse cursor shape and visibility. Disable with 'io.ConfigFlags |= ImGuiConfigFlags_NoMouseCursorChange' (note: the resizing cursors requires GLFW 3.4+).
10 | 
11 | // You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this.
12 | // Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need.
13 | // If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp.
14 | // Read online: https://github.com/ocornut/imgui/tree/master/docs
15 | 
16 | // About GLSL version:
17 | // The 'glsl_version' initialization parameter defaults to "#version 150" if NULL.
18 | // Only override if your GL version doesn't handle this GLSL version. Keep NULL if unsure!
19 | 
20 | #pragma once
21 | #include "imgui.h"      // IMGUI_IMPL_API
22 | 
23 | struct GLFWwindow;
24 | struct GLFWmonitor;
25 | 
26 | IMGUI_IMPL_API bool     ImGui_ImplGlfw_InitForOpenGL(GLFWwindow* window, bool install_callbacks);
27 | IMGUI_IMPL_API bool     ImGui_ImplGlfw_InitForVulkan(GLFWwindow* window, bool install_callbacks);
28 | IMGUI_IMPL_API bool     ImGui_ImplGlfw_InitForOther(GLFWwindow* window, bool install_callbacks);
29 | IMGUI_IMPL_API void     ImGui_ImplGlfw_Shutdown();
30 | IMGUI_IMPL_API void     ImGui_ImplGlfw_NewFrame();
31 | 
32 | // GLFW callbacks
33 | // - When calling Init with 'install_callbacks=true': GLFW callbacks will be installed for you. They will call user's previously installed callbacks, if any.
34 | // - When calling Init with 'install_callbacks=false': GLFW callbacks won't be installed. You will need to call those function yourself from your own GLFW callbacks.
35 | IMGUI_IMPL_API void     ImGui_ImplGlfw_WindowFocusCallback(GLFWwindow* window, int focused);        // Since 1.84
36 | IMGUI_IMPL_API void     ImGui_ImplGlfw_CursorEnterCallback(GLFWwindow* window, int entered);        // Since 1.84
37 | IMGUI_IMPL_API void     ImGui_ImplGlfw_CursorPosCallback(GLFWwindow* window, double x, double y);   // Since 1.87
38 | IMGUI_IMPL_API void     ImGui_ImplGlfw_MouseButtonCallback(GLFWwindow* window, int button, int action, int mods);
39 | IMGUI_IMPL_API void     ImGui_ImplGlfw_ScrollCallback(GLFWwindow* window, double xoffset, double yoffset);
40 | IMGUI_IMPL_API void     ImGui_ImplGlfw_KeyCallback(GLFWwindow* window, int key, int scancode, int action, int mods);
41 | IMGUI_IMPL_API void     ImGui_ImplGlfw_CharCallback(GLFWwindow* window, unsigned int c);
42 | IMGUI_IMPL_API void     ImGui_ImplGlfw_MonitorCallback(GLFWmonitor* monitor, int event);
43 | 


--------------------------------------------------------------------------------
/libs/imgui/include/imgui_impl_opengl3.h:
--------------------------------------------------------------------------------
 1 | // dear imgui: Renderer Backend for modern OpenGL with shaders / programmatic pipeline
 2 | // - Desktop GL: 2.x 3.x 4.x
 3 | // - Embedded GL: ES 2.0 (WebGL 1.0), ES 3.0 (WebGL 2.0)
 4 | // This needs to be used along with a Platform Backend (e.g. GLFW, SDL, Win32, custom..)
 5 | 
 6 | // Implemented features:
 7 | //  [X] Renderer: User texture binding. Use 'GLuint' OpenGL texture identifier as void*/ImTextureID. Read the FAQ about ImTextureID!
 8 | //  [x] Renderer: Desktop GL only: Support for large meshes (64k+ vertices) with 16-bit indices.
 9 | 
10 | // You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this.
11 | // Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need.
12 | // If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp.
13 | // Read online: https://github.com/ocornut/imgui/tree/master/docs
14 | 
15 | // About GLSL version:
16 | //  The 'glsl_version' initialization parameter should be NULL (default) or a "#version XXX" string.
17 | //  On computer platform the GLSL version default to "#version 130". On OpenGL ES 3 platform it defaults to "#version 300 es"
18 | //  Only override if your GL version doesn't handle this GLSL version. See GLSL version table at the top of imgui_impl_opengl3.cpp.
19 | 
20 | #pragma once
21 | #include "imgui.h"      // IMGUI_IMPL_API
22 | 
23 | // Backend API
24 | IMGUI_IMPL_API bool     ImGui_ImplOpenGL3_Init(const char* glsl_version = NULL);
25 | IMGUI_IMPL_API void     ImGui_ImplOpenGL3_Shutdown();
26 | IMGUI_IMPL_API void     ImGui_ImplOpenGL3_NewFrame();
27 | IMGUI_IMPL_API void     ImGui_ImplOpenGL3_RenderDrawData(ImDrawData* draw_data);
28 | 
29 | // (Optional) Called by Init/NewFrame/Shutdown
30 | IMGUI_IMPL_API bool     ImGui_ImplOpenGL3_CreateFontsTexture();
31 | IMGUI_IMPL_API void     ImGui_ImplOpenGL3_DestroyFontsTexture();
32 | IMGUI_IMPL_API bool     ImGui_ImplOpenGL3_CreateDeviceObjects();
33 | IMGUI_IMPL_API void     ImGui_ImplOpenGL3_DestroyDeviceObjects();
34 | 
35 | // Specific OpenGL ES versions
36 | //#define IMGUI_IMPL_OPENGL_ES2     // Auto-detected on Emscripten
37 | //#define IMGUI_IMPL_OPENGL_ES3     // Auto-detected on iOS/Android
38 | 
39 | // You can explicitly select GLES2 or GLES3 API by using one of the '#define IMGUI_IMPL_OPENGL_LOADER_XXX' in imconfig.h or compiler command-line.
40 | #if !defined(IMGUI_IMPL_OPENGL_ES2) \
41 |  && !defined(IMGUI_IMPL_OPENGL_ES3)
42 | 
43 | // Try to detect GLES on matching platforms
44 | #if defined(__APPLE__)
45 | #include <TargetConditionals.h>
46 | #endif
47 | #if (defined(__APPLE__) && (TARGET_OS_IOS || TARGET_OS_TV)) || (defined(__ANDROID__))
48 | #define IMGUI_IMPL_OPENGL_ES3               // iOS, Android  -> GL ES 3, "#version 300 es"
49 | #elif defined(__EMSCRIPTEN__)
50 | #define IMGUI_IMPL_OPENGL_ES2               // Emscripten    -> GL ES 2, "#version 100"
51 | #else
52 | // Otherwise imgui_impl_opengl3_loader.h will be used.
53 | #endif
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/libs/imgui/include/imstb_rectpack.h:
--------------------------------------------------------------------------------
  1 | // [DEAR IMGUI]
  2 | // This is a slightly modified version of stb_rect_pack.h 1.00.
  3 | // Those changes would need to be pushed into nothings/stb:
  4 | // - Added STBRP__CDECL
  5 | // Grep for [DEAR IMGUI] to find the changes.
  6 | 
  7 | // stb_rect_pack.h - v1.00 - public domain - rectangle packing
  8 | // Sean Barrett 2014
  9 | //
 10 | // Useful for e.g. packing rectangular textures into an atlas.
 11 | // Does not do rotation.
 12 | //
 13 | // Not necessarily the awesomest packing method, but better than
 14 | // the totally naive one in stb_truetype (which is primarily what
 15 | // this is meant to replace).
 16 | //
 17 | // Has only had a few tests run, may have issues.
 18 | //
 19 | // More docs to come.
 20 | //
 21 | // No memory allocations; uses qsort() and assert() from stdlib.
 22 | // Can override those by defining STBRP_SORT and STBRP_ASSERT.
 23 | //
 24 | // This library currently uses the Skyline Bottom-Left algorithm.
 25 | //
 26 | // Please note: better rectangle packers are welcome! Please
 27 | // implement them to the same API, but with a different init
 28 | // function.
 29 | //
 30 | // Credits
 31 | //
 32 | //  Library
 33 | //    Sean Barrett
 34 | //  Minor features
 35 | //    Martins Mozeiko
 36 | //    github:IntellectualKitty
 37 | //
 38 | //  Bugfixes / warning fixes
 39 | //    Jeremy Jaussaud
 40 | //    Fabian Giesen
 41 | //
 42 | // Version history:
 43 | //
 44 | //     1.00  (2019-02-25)  avoid small space waste; gracefully fail too-wide rectangles
 45 | //     0.99  (2019-02-07)  warning fixes
 46 | //     0.11  (2017-03-03)  return packing success/fail result
 47 | //     0.10  (2016-10-25)  remove cast-away-const to avoid warnings
 48 | //     0.09  (2016-08-27)  fix compiler warnings
 49 | //     0.08  (2015-09-13)  really fix bug with empty rects (w=0 or h=0)
 50 | //     0.07  (2015-09-13)  fix bug with empty rects (w=0 or h=0)
 51 | //     0.06  (2015-04-15)  added STBRP_SORT to allow replacing qsort
 52 | //     0.05:  added STBRP_ASSERT to allow replacing assert
 53 | //     0.04:  fixed minor bug in STBRP_LARGE_RECTS support
 54 | //     0.01:  initial release
 55 | //
 56 | // LICENSE
 57 | //
 58 | //   See end of file for license information.
 59 | 
 60 | //////////////////////////////////////////////////////////////////////////////
 61 | //
 62 | //       INCLUDE SECTION
 63 | //
 64 | 
 65 | #ifndef STB_INCLUDE_STB_RECT_PACK_H
 66 | #define STB_INCLUDE_STB_RECT_PACK_H
 67 | 
 68 | #define STB_RECT_PACK_VERSION  1
 69 | 
 70 | #ifdef STBRP_STATIC
 71 | #define STBRP_DEF static
 72 | #else
 73 | #define STBRP_DEF extern
 74 | #endif
 75 | 
 76 | #ifdef __cplusplus
 77 | extern "C" {
 78 | #endif
 79 | 
 80 | typedef struct stbrp_context stbrp_context;
 81 | typedef struct stbrp_node    stbrp_node;
 82 | typedef struct stbrp_rect    stbrp_rect;
 83 | 
 84 | #ifdef STBRP_LARGE_RECTS
 85 | typedef int            stbrp_coord;
 86 | #else
 87 | typedef unsigned short stbrp_coord;
 88 | #endif
 89 | 
 90 | STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects);
 91 | // Assign packed locations to rectangles. The rectangles are of type
 92 | // 'stbrp_rect' defined below, stored in the array 'rects', and there
 93 | // are 'num_rects' many of them.
 94 | //
 95 | // Rectangles which are successfully packed have the 'was_packed' flag
 96 | // set to a non-zero value and 'x' and 'y' store the minimum location
 97 | // on each axis (i.e. bottom-left in cartesian coordinates, top-left
 98 | // if you imagine y increasing downwards). Rectangles which do not fit
 99 | // have the 'was_packed' flag set to 0.
100 | //
101 | // You should not try to access the 'rects' array from another thread
102 | // while this function is running, as the function temporarily reorders
103 | // the array while it executes.
104 | //
105 | // To pack into another rectangle, you need to call stbrp_init_target
106 | // again. To continue packing into the same rectangle, you can call
107 | // this function again. Calling this multiple times with multiple rect
108 | // arrays will probably produce worse packing results than calling it
109 | // a single time with the full rectangle array, but the option is
110 | // available.
111 | //
112 | // The function returns 1 if all of the rectangles were successfully
113 | // packed and 0 otherwise.
114 | 
115 | struct stbrp_rect
116 | {
117 |    // reserved for your use:
118 |    int            id;
119 | 
120 |    // input:
121 |    stbrp_coord    w, h;
122 | 
123 |    // output:
124 |    stbrp_coord    x, y;
125 |    int            was_packed;  // non-zero if valid packing
126 | 
127 | }; // 16 bytes, nominally
128 | 
129 | 
130 | STBRP_DEF void stbrp_init_target (stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes);
131 | // Initialize a rectangle packer to:
132 | //    pack a rectangle that is 'width' by 'height' in dimensions
133 | //    using temporary storage provided by the array 'nodes', which is 'num_nodes' long
134 | //
135 | // You must call this function every time you start packing into a new target.
136 | //
137 | // There is no "shutdown" function. The 'nodes' memory must stay valid for
138 | // the following stbrp_pack_rects() call (or calls), but can be freed after
139 | // the call (or calls) finish.
140 | //
141 | // Note: to guarantee best results, either:
142 | //       1. make sure 'num_nodes' >= 'width'
143 | //   or  2. call stbrp_allow_out_of_mem() defined below with 'allow_out_of_mem = 1'
144 | //
145 | // If you don't do either of the above things, widths will be quantized to multiples
146 | // of small integers to guarantee the algorithm doesn't run out of temporary storage.
147 | //
148 | // If you do #2, then the non-quantized algorithm will be used, but the algorithm
149 | // may run out of temporary storage and be unable to pack some rectangles.
150 | 
151 | STBRP_DEF void stbrp_setup_allow_out_of_mem (stbrp_context *context, int allow_out_of_mem);
152 | // Optionally call this function after init but before doing any packing to
153 | // change the handling of the out-of-temp-memory scenario, described above.
154 | // If you call init again, this will be reset to the default (false).
155 | 
156 | 
157 | STBRP_DEF void stbrp_setup_heuristic (stbrp_context *context, int heuristic);
158 | // Optionally select which packing heuristic the library should use. Different
159 | // heuristics will produce better/worse results for different data sets.
160 | // If you call init again, this will be reset to the default.
161 | 
162 | enum
163 | {
164 |    STBRP_HEURISTIC_Skyline_default=0,
165 |    STBRP_HEURISTIC_Skyline_BL_sortHeight = STBRP_HEURISTIC_Skyline_default,
166 |    STBRP_HEURISTIC_Skyline_BF_sortHeight
167 | };
168 | 
169 | 
170 | //////////////////////////////////////////////////////////////////////////////
171 | //
172 | // the details of the following structures don't matter to you, but they must
173 | // be visible so you can handle the memory allocations for them
174 | 
175 | struct stbrp_node
176 | {
177 |    stbrp_coord  x,y;
178 |    stbrp_node  *next;
179 | };
180 | 
181 | struct stbrp_context
182 | {
183 |    int width;
184 |    int height;
185 |    int align;
186 |    int init_mode;
187 |    int heuristic;
188 |    int num_nodes;
189 |    stbrp_node *active_head;
190 |    stbrp_node *free_head;
191 |    stbrp_node extra[2]; // we allocate two extra nodes so optimal user-node-count is 'width' not 'width+2'
192 | };
193 | 
194 | #ifdef __cplusplus
195 | }
196 | #endif
197 | 
198 | #endif
199 | 
200 | //////////////////////////////////////////////////////////////////////////////
201 | //
202 | //     IMPLEMENTATION SECTION
203 | //
204 | 
205 | #ifdef STB_RECT_PACK_IMPLEMENTATION
206 | #ifndef STBRP_SORT
207 | #include <stdlib.h>
208 | #define STBRP_SORT qsort
209 | #endif
210 | 
211 | #ifndef STBRP_ASSERT
212 | #include <assert.h>
213 | #define STBRP_ASSERT assert
214 | #endif
215 | 
216 | // [DEAR IMGUI] Added STBRP__CDECL
217 | #ifdef _MSC_VER
218 | #define STBRP__NOTUSED(v)  (void)(v)
219 | #define STBRP__CDECL __cdecl
220 | #else
221 | #define STBRP__NOTUSED(v)  (void)sizeof(v)
222 | #define STBRP__CDECL
223 | #endif
224 | 
225 | enum
226 | {
227 |    STBRP__INIT_skyline = 1
228 | };
229 | 
230 | STBRP_DEF void stbrp_setup_heuristic(stbrp_context *context, int heuristic)
231 | {
232 |    switch (context->init_mode) {
233 |       case STBRP__INIT_skyline:
234 |          STBRP_ASSERT(heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight || heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight);
235 |          context->heuristic = heuristic;
236 |          break;
237 |       default:
238 |          STBRP_ASSERT(0);
239 |    }
240 | }
241 | 
242 | STBRP_DEF void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_out_of_mem)
243 | {
244 |    if (allow_out_of_mem)
245 |       // if it's ok to run out of memory, then don't bother aligning them;
246 |       // this gives better packing, but may fail due to OOM (even though
247 |       // the rectangles easily fit). @TODO a smarter approach would be to only
248 |       // quantize once we've hit OOM, then we could get rid of this parameter.
249 |       context->align = 1;
250 |    else {
251 |       // if it's not ok to run out of memory, then quantize the widths
252 |       // so that num_nodes is always enough nodes.
253 |       //
254 |       // I.e. num_nodes * align >= width
255 |       //                  align >= width / num_nodes
256 |       //                  align = ceil(width/num_nodes)
257 | 
258 |       context->align = (context->width + context->num_nodes-1) / context->num_nodes;
259 |    }
260 | }
261 | 
262 | STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes)
263 | {
264 |    int i;
265 | #ifndef STBRP_LARGE_RECTS
266 |    STBRP_ASSERT(width <= 0xffff && height <= 0xffff);
267 | #endif
268 | 
269 |    for (i=0; i < num_nodes-1; ++i)
270 |       nodes[i].next = &nodes[i+1];
271 |    nodes[i].next = NULL;
272 |    context->init_mode = STBRP__INIT_skyline;
273 |    context->heuristic = STBRP_HEURISTIC_Skyline_default;
274 |    context->free_head = &nodes[0];
275 |    context->active_head = &context->extra[0];
276 |    context->width = width;
277 |    context->height = height;
278 |    context->num_nodes = num_nodes;
279 |    stbrp_setup_allow_out_of_mem(context, 0);
280 | 
281 |    // node 0 is the full width, node 1 is the sentinel (lets us not store width explicitly)
282 |    context->extra[0].x = 0;
283 |    context->extra[0].y = 0;
284 |    context->extra[0].next = &context->extra[1];
285 |    context->extra[1].x = (stbrp_coord) width;
286 | #ifdef STBRP_LARGE_RECTS
287 |    context->extra[1].y = (1<<30);
288 | #else
289 |    context->extra[1].y = 65535;
290 | #endif
291 |    context->extra[1].next = NULL;
292 | }
293 | 
294 | // find minimum y position if it starts at x1
295 | static int stbrp__skyline_find_min_y(stbrp_context *c, stbrp_node *first, int x0, int width, int *pwaste)
296 | {
297 |    stbrp_node *node = first;
298 |    int x1 = x0 + width;
299 |    int min_y, visited_width, waste_area;
300 | 
301 |    STBRP__NOTUSED(c);
302 | 
303 |    STBRP_ASSERT(first->x <= x0);
304 | 
305 |    #if 0
306 |    // skip in case we're past the node
307 |    while (node->next->x <= x0)
308 |       ++node;
309 |    #else
310 |    STBRP_ASSERT(node->next->x > x0); // we ended up handling this in the caller for efficiency
311 |    #endif
312 | 
313 |    STBRP_ASSERT(node->x <= x0);
314 | 
315 |    min_y = 0;
316 |    waste_area = 0;
317 |    visited_width = 0;
318 |    while (node->x < x1) {
319 |       if (node->y > min_y) {
320 |          // raise min_y higher.
321 |          // we've accounted for all waste up to min_y,
322 |          // but we'll now add more waste for everything we've visted
323 |          waste_area += visited_width * (node->y - min_y);
324 |          min_y = node->y;
325 |          // the first time through, visited_width might be reduced
326 |          if (node->x < x0)
327 |             visited_width += node->next->x - x0;
328 |          else
329 |             visited_width += node->next->x - node->x;
330 |       } else {
331 |          // add waste area
332 |          int under_width = node->next->x - node->x;
333 |          if (under_width + visited_width > width)
334 |             under_width = width - visited_width;
335 |          waste_area += under_width * (min_y - node->y);
336 |          visited_width += under_width;
337 |       }
338 |       node = node->next;
339 |    }
340 | 
341 |    *pwaste = waste_area;
342 |    return min_y;
343 | }
344 | 
345 | typedef struct
346 | {
347 |    int x,y;
348 |    stbrp_node **prev_link;
349 | } stbrp__findresult;
350 | 
351 | static stbrp__findresult stbrp__skyline_find_best_pos(stbrp_context *c, int width, int height)
352 | {
353 |    int best_waste = (1<<30), best_x, best_y = (1 << 30);
354 |    stbrp__findresult fr;
355 |    stbrp_node **prev, *node, *tail, **best = NULL;
356 | 
357 |    // align to multiple of c->align
358 |    width = (width + c->align - 1);
359 |    width -= width % c->align;
360 |    STBRP_ASSERT(width % c->align == 0);
361 | 
362 |    // if it can't possibly fit, bail immediately
363 |    if (width > c->width || height > c->height) {
364 |       fr.prev_link = NULL;
365 |       fr.x = fr.y = 0;
366 |       return fr;
367 |    }
368 | 
369 |    node = c->active_head;
370 |    prev = &c->active_head;
371 |    while (node->x + width <= c->width) {
372 |       int y,waste;
373 |       y = stbrp__skyline_find_min_y(c, node, node->x, width, &waste);
374 |       if (c->heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight) { // actually just want to test BL
375 |          // bottom left
376 |          if (y < best_y) {
377 |             best_y = y;
378 |             best = prev;
379 |          }
380 |       } else {
381 |          // best-fit
382 |          if (y + height <= c->height) {
383 |             // can only use it if it first vertically
384 |             if (y < best_y || (y == best_y && waste < best_waste)) {
385 |                best_y = y;
386 |                best_waste = waste;
387 |                best = prev;
388 |             }
389 |          }
390 |       }
391 |       prev = &node->next;
392 |       node = node->next;
393 |    }
394 | 
395 |    best_x = (best == NULL) ? 0 : (*best)->x;
396 | 
397 |    // if doing best-fit (BF), we also have to try aligning right edge to each node position
398 |    //
399 |    // e.g, if fitting
400 |    //
401 |    //     ____________________
402 |    //    |____________________|
403 |    //
404 |    //            into
405 |    //
406 |    //   |                         |
407 |    //   |             ____________|
408 |    //   |____________|
409 |    //
410 |    // then right-aligned reduces waste, but bottom-left BL is always chooses left-aligned
411 |    //
412 |    // This makes BF take about 2x the time
413 | 
414 |    if (c->heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight) {
415 |       tail = c->active_head;
416 |       node = c->active_head;
417 |       prev = &c->active_head;
418 |       // find first node that's admissible
419 |       while (tail->x < width)
420 |          tail = tail->next;
421 |       while (tail) {
422 |          int xpos = tail->x - width;
423 |          int y,waste;
424 |          STBRP_ASSERT(xpos >= 0);
425 |          // find the left position that matches this
426 |          while (node->next->x <= xpos) {
427 |             prev = &node->next;
428 |             node = node->next;
429 |          }
430 |          STBRP_ASSERT(node->next->x > xpos && node->x <= xpos);
431 |          y = stbrp__skyline_find_min_y(c, node, xpos, width, &waste);
432 |          if (y + height <= c->height) {
433 |             if (y <= best_y) {
434 |                if (y < best_y || waste < best_waste || (waste==best_waste && xpos < best_x)) {
435 |                   best_x = xpos;
436 |                   STBRP_ASSERT(y <= best_y);
437 |                   best_y = y;
438 |                   best_waste = waste;
439 |                   best = prev;
440 |                }
441 |             }
442 |          }
443 |          tail = tail->next;
444 |       }
445 |    }
446 | 
447 |    fr.prev_link = best;
448 |    fr.x = best_x;
449 |    fr.y = best_y;
450 |    return fr;
451 | }
452 | 
453 | static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, int width, int height)
454 | {
455 |    // find best position according to heuristic
456 |    stbrp__findresult res = stbrp__skyline_find_best_pos(context, width, height);
457 |    stbrp_node *node, *cur;
458 | 
459 |    // bail if:
460 |    //    1. it failed
461 |    //    2. the best node doesn't fit (we don't always check this)
462 |    //    3. we're out of memory
463 |    if (res.prev_link == NULL || res.y + height > context->height || context->free_head == NULL) {
464 |       res.prev_link = NULL;
465 |       return res;
466 |    }
467 | 
468 |    // on success, create new node
469 |    node = context->free_head;
470 |    node->x = (stbrp_coord) res.x;
471 |    node->y = (stbrp_coord) (res.y + height);
472 | 
473 |    context->free_head = node->next;
474 | 
475 |    // insert the new node into the right starting point, and
476 |    // let 'cur' point to the remaining nodes needing to be
477 |    // stiched back in
478 | 
479 |    cur = *res.prev_link;
480 |    if (cur->x < res.x) {
481 |       // preserve the existing one, so start testing with the next one
482 |       stbrp_node *next = cur->next;
483 |       cur->next = node;
484 |       cur = next;
485 |    } else {
486 |       *res.prev_link = node;
487 |    }
488 | 
489 |    // from here, traverse cur and free the nodes, until we get to one
490 |    // that shouldn't be freed
491 |    while (cur->next && cur->next->x <= res.x + width) {
492 |       stbrp_node *next = cur->next;
493 |       // move the current node to the free list
494 |       cur->next = context->free_head;
495 |       context->free_head = cur;
496 |       cur = next;
497 |    }
498 | 
499 |    // stitch the list back in
500 |    node->next = cur;
501 | 
502 |    if (cur->x < res.x + width)
503 |       cur->x = (stbrp_coord) (res.x + width);
504 | 
505 | #ifdef _DEBUG
506 |    cur = context->active_head;
507 |    while (cur->x < context->width) {
508 |       STBRP_ASSERT(cur->x < cur->next->x);
509 |       cur = cur->next;
510 |    }
511 |    STBRP_ASSERT(cur->next == NULL);
512 | 
513 |    {
514 |       int count=0;
515 |       cur = context->active_head;
516 |       while (cur) {
517 |          cur = cur->next;
518 |          ++count;
519 |       }
520 |       cur = context->free_head;
521 |       while (cur) {
522 |          cur = cur->next;
523 |          ++count;
524 |       }
525 |       STBRP_ASSERT(count == context->num_nodes+2);
526 |    }
527 | #endif
528 | 
529 |    return res;
530 | }
531 | 
532 | // [DEAR IMGUI] Added STBRP__CDECL
533 | static int STBRP__CDECL rect_height_compare(const void *a, const void *b)
534 | {
535 |    const stbrp_rect *p = (const stbrp_rect *) a;
536 |    const stbrp_rect *q = (const stbrp_rect *) b;
537 |    if (p->h > q->h)
538 |       return -1;
539 |    if (p->h < q->h)
540 |       return  1;
541 |    return (p->w > q->w) ? -1 : (p->w < q->w);
542 | }
543 | 
544 | // [DEAR IMGUI] Added STBRP__CDECL
545 | static int STBRP__CDECL rect_original_order(const void *a, const void *b)
546 | {
547 |    const stbrp_rect *p = (const stbrp_rect *) a;
548 |    const stbrp_rect *q = (const stbrp_rect *) b;
549 |    return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed);
550 | }
551 | 
552 | #ifdef STBRP_LARGE_RECTS
553 | #define STBRP__MAXVAL  0xffffffff
554 | #else
555 | #define STBRP__MAXVAL  0xffff
556 | #endif
557 | 
558 | STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects)
559 | {
560 |    int i, all_rects_packed = 1;
561 | 
562 |    // we use the 'was_packed' field internally to allow sorting/unsorting
563 |    for (i=0; i < num_rects; ++i) {
564 |       rects[i].was_packed = i;
565 |    }
566 | 
567 |    // sort according to heuristic
568 |    STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_height_compare);
569 | 
570 |    for (i=0; i < num_rects; ++i) {
571 |       if (rects[i].w == 0 || rects[i].h == 0) {
572 |          rects[i].x = rects[i].y = 0;  // empty rect needs no space
573 |       } else {
574 |          stbrp__findresult fr = stbrp__skyline_pack_rectangle(context, rects[i].w, rects[i].h);
575 |          if (fr.prev_link) {
576 |             rects[i].x = (stbrp_coord) fr.x;
577 |             rects[i].y = (stbrp_coord) fr.y;
578 |          } else {
579 |             rects[i].x = rects[i].y = STBRP__MAXVAL;
580 |          }
581 |       }
582 |    }
583 | 
584 |    // unsort
585 |    STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_original_order);
586 | 
587 |    // set was_packed flags and all_rects_packed status
588 |    for (i=0; i < num_rects; ++i) {
589 |       rects[i].was_packed = !(rects[i].x == STBRP__MAXVAL && rects[i].y == STBRP__MAXVAL);
590 |       if (!rects[i].was_packed)
591 |          all_rects_packed = 0;
592 |    }
593 | 
594 |    // return the all_rects_packed status
595 |    return all_rects_packed;
596 | }
597 | #endif
598 | 
599 | /*
600 | ------------------------------------------------------------------------------
601 | This software is available under 2 licenses -- choose whichever you prefer.
602 | ------------------------------------------------------------------------------
603 | ALTERNATIVE A - MIT License
604 | Copyright (c) 2017 Sean Barrett
605 | Permission is hereby granted, free of charge, to any person obtaining a copy of
606 | this software and associated documentation files (the "Software"), to deal in
607 | the Software without restriction, including without limitation the rights to
608 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
609 | of the Software, and to permit persons to whom the Software is furnished to do
610 | so, subject to the following conditions:
611 | The above copyright notice and this permission notice shall be included in all
612 | copies or substantial portions of the Software.
613 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
614 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
615 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
616 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
617 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
618 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
619 | SOFTWARE.
620 | ------------------------------------------------------------------------------
621 | ALTERNATIVE B - Public Domain (www.unlicense.org)
622 | This is free and unencumbered software released into the public domain.
623 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
624 | software, either in source code form or as a compiled binary, for any purpose,
625 | commercial or non-commercial, and by any means.
626 | In jurisdictions that recognize copyright laws, the author or authors of this
627 | software dedicate any and all copyright interest in the software to the public
628 | domain. We make this dedication for the benefit of the public at large and to
629 | the detriment of our heirs and successors. We intend this dedication to be an
630 | overt act of relinquishment in perpetuity of all present and future rights to
631 | this software under copyright law.
632 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
633 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
634 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
635 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
636 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
637 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
638 | ------------------------------------------------------------------------------
639 | */
640 | 


--------------------------------------------------------------------------------
/libs/imgui/src/imgui_impl_glfw.cpp:
--------------------------------------------------------------------------------
  1 | // dear imgui: Platform Backend for GLFW
  2 | // This needs to be used along with a Renderer (e.g. OpenGL3, Vulkan, WebGPU..)
  3 | // (Info: GLFW is a cross-platform general purpose library for handling windows, inputs, OpenGL/Vulkan graphics context creation, etc.)
  4 | // (Requires: GLFW 3.1+)
  5 | 
  6 | // Implemented features:
  7 | //  [X] Platform: Clipboard support.
  8 | //  [X] Platform: Keyboard support. Since 1.87 we are using the io.AddKeyEvent() function. Pass ImGuiKey values to all key functions e.g. ImGui::IsKeyPressed(ImGuiKey_Space). [Legacy GLFW_KEY_* values will also be supported unless IMGUI_DISABLE_OBSOLETE_KEYIO is set]
  9 | //  [X] Platform: Gamepad support. Enable with 'io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad'.
 10 | //  [X] Platform: Mouse cursor shape and visibility. Disable with 'io.ConfigFlags |= ImGuiConfigFlags_NoMouseCursorChange' (note: the resizing cursors requires GLFW 3.4+).
 11 | 
 12 | // You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this.
 13 | // Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need.
 14 | // If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp.
 15 | // Read online: https://github.com/ocornut/imgui/tree/master/docs
 16 | 
 17 | // CHANGELOG
 18 | // (minor and older changes stripped away, please see git history for details)
 19 | //  2022-01-26: Inputs: replaced short-lived io.AddKeyModsEvent() (added two weeks ago)with io.AddKeyEvent() using ImGuiKey_ModXXX flags. Sorry for the confusion.
 20 | //  2021-01-20: Inputs: calling new io.AddKeyAnalogEvent() for gamepad support, instead of writing directly to io.NavInputs[].
 21 | //  2022-01-17: Inputs: calling new io.AddMousePosEvent(), io.AddMouseButtonEvent(), io.AddMouseWheelEvent() API (1.87+).
 22 | //  2022-01-17: Inputs: always update key mods next and before key event (not in NewFrame) to fix input queue with very low framerates.
 23 | //  2022-01-12: *BREAKING CHANGE*: Now using glfwSetCursorPosCallback(). If you called ImGui_ImplGlfw_InitXXX() with install_callbacks = false, you MUST install glfwSetCursorPosCallback() and forward it to the backend via ImGui_ImplGlfw_CursorPosCallback().
 24 | //  2022-01-10: Inputs: calling new io.AddKeyEvent(), io.AddKeyModsEvent() + io.SetKeyEventNativeData() API (1.87+). Support for full ImGuiKey range.
 25 | //  2022-01-05: Inputs: Converting GLFW untranslated keycodes back to translated keycodes (in the ImGui_ImplGlfw_KeyCallback() function) in order to match the behavior of every other backend, and facilitate the use of GLFW with lettered-shortcuts API.
 26 | //  2021-08-17: *BREAKING CHANGE*: Now using glfwSetWindowFocusCallback() to calling io.AddFocusEvent(). If you called ImGui_ImplGlfw_InitXXX() with install_callbacks = false, you MUST install glfwSetWindowFocusCallback() and forward it to the backend via ImGui_ImplGlfw_WindowFocusCallback().
 27 | //  2021-07-29: *BREAKING CHANGE*: Now using glfwSetCursorEnterCallback(). MousePos is correctly reported when the host platform window is hovered but not focused. If you called ImGui_ImplGlfw_InitXXX() with install_callbacks = false, you MUST install glfwSetWindowFocusCallback() callback and forward it to the backend via ImGui_ImplGlfw_CursorEnterCallback().
 28 | //  2021-06-29: Reorganized backend to pull data from a single structure to facilitate usage with multiple-contexts (all g_XXXX access changed to bd->XXXX).
 29 | //  2020-01-17: Inputs: Disable error callback while assigning mouse cursors because some X11 setup don't have them and it generates errors.
 30 | //  2019-12-05: Inputs: Added support for new mouse cursors added in GLFW 3.4+ (resizing cursors, not allowed cursor).
 31 | //  2019-10-18: Misc: Previously installed user callbacks are now restored on shutdown.
 32 | //  2019-07-21: Inputs: Added mapping for ImGuiKey_KeyPadEnter.
 33 | //  2019-05-11: Inputs: Don't filter value from character callback before calling AddInputCharacter().
 34 | //  2019-03-12: Misc: Preserve DisplayFramebufferScale when main window is minimized.
 35 | //  2018-11-30: Misc: Setting up io.BackendPlatformName so it can be displayed in the About Window.
 36 | //  2018-11-07: Inputs: When installing our GLFW callbacks, we save user's previously installed ones - if any - and chain call them.
 37 | //  2018-08-01: Inputs: Workaround for Emscripten which doesn't seem to handle focus related calls.
 38 | //  2018-06-29: Inputs: Added support for the ImGuiMouseCursor_Hand cursor.
 39 | //  2018-06-08: Misc: Extracted imgui_impl_glfw.cpp/.h away from the old combined GLFW+OpenGL/Vulkan examples.
 40 | //  2018-03-20: Misc: Setup io.BackendFlags ImGuiBackendFlags_HasMouseCursors flag + honor ImGuiConfigFlags_NoMouseCursorChange flag.
 41 | //  2018-02-20: Inputs: Added support for mouse cursors (ImGui::GetMouseCursor() value, passed to glfwSetCursor()).
 42 | //  2018-02-06: Misc: Removed call to ImGui::Shutdown() which is not available from 1.60 WIP, user needs to call CreateContext/DestroyContext themselves.
 43 | //  2018-02-06: Inputs: Added mapping for ImGuiKey_Space.
 44 | //  2018-01-25: Inputs: Added gamepad support if ImGuiConfigFlags_NavEnableGamepad is set.
 45 | //  2018-01-25: Inputs: Honoring the io.WantSetMousePos by repositioning the mouse (when using navigation and ImGuiConfigFlags_NavMoveMouse is set).
 46 | //  2018-01-20: Inputs: Added Horizontal Mouse Wheel support.
 47 | //  2018-01-18: Inputs: Added mapping for ImGuiKey_Insert.
 48 | //  2017-08-25: Inputs: MousePos set to -FLT_MAX,-FLT_MAX when mouse is unavailable/missing (instead of -1,-1).
 49 | //  2016-10-15: Misc: Added a void* user_data parameter to Clipboard function handlers.
 50 | 
 51 | #include "imgui.h"
 52 | #include "imgui_impl_glfw.h"
 53 | 
 54 | // Clang warnings with -Weverything
 55 | #if defined(__clang__)
 56 | #pragma clang diagnostic push
 57 | #pragma clang diagnostic ignored "-Wold-style-cast"     // warning: use of old-style cast
 58 | #pragma clang diagnostic ignored "-Wsign-conversion"    // warning: implicit conversion changes signedness
 59 | #if __has_warning("-Wzero-as-null-pointer-constant")
 60 | #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
 61 | #endif
 62 | #endif
 63 | 
 64 | // GLFW
 65 | #include <GLFW/glfw3.h>
 66 | #ifdef _WIN32
 67 | #undef APIENTRY
 68 | #define GLFW_EXPOSE_NATIVE_WIN32
 69 | #include <GLFW/glfw3native.h>   // for glfwGetWin32Window
 70 | #endif
 71 | #ifdef GLFW_RESIZE_NESW_CURSOR  // Let's be nice to people who pulled GLFW between 2019-04-16 (3.4 define) and 2019-11-29 (cursors defines) // FIXME: Remove when GLFW 3.4 is released?
 72 | #define GLFW_HAS_NEW_CURSORS    (GLFW_VERSION_MAJOR * 1000 + GLFW_VERSION_MINOR * 100 >= 3400) // 3.4+ GLFW_RESIZE_ALL_CURSOR, GLFW_RESIZE_NESW_CURSOR, GLFW_RESIZE_NWSE_CURSOR, GLFW_NOT_ALLOWED_CURSOR
 73 | #else
 74 | #define GLFW_HAS_NEW_CURSORS    (0)
 75 | #endif
 76 | #define GLFW_HAS_GAMEPAD_API    (GLFW_VERSION_MAJOR * 1000 + GLFW_VERSION_MINOR * 100 >= 3300) // 3.3+ glfwGetGamepadState() new api
 77 | #define GLFW_HAS_GET_KEY_NAME   (GLFW_VERSION_MAJOR * 1000 + GLFW_VERSION_MINOR * 100 >= 3200) // 3.2+ glfwGetKeyName()
 78 | 
 79 | // GLFW data
 80 | enum GlfwClientApi
 81 | {
 82 |     GlfwClientApi_Unknown,
 83 |     GlfwClientApi_OpenGL,
 84 |     GlfwClientApi_Vulkan
 85 | };
 86 | 
 87 | struct ImGui_ImplGlfw_Data
 88 | {
 89 |     GLFWwindow*             Window;
 90 |     GlfwClientApi           ClientApi;
 91 |     double                  Time;
 92 |     GLFWwindow*             MouseWindow;
 93 |     GLFWcursor*             MouseCursors[ImGuiMouseCursor_COUNT];
 94 |     bool                    InstalledCallbacks;
 95 | 
 96 |     // Chain GLFW callbacks: our callbacks will call the user's previously installed callbacks, if any.
 97 |     GLFWwindowfocusfun      PrevUserCallbackWindowFocus;
 98 |     GLFWcursorposfun        PrevUserCallbackCursorPos;
 99 |     GLFWcursorenterfun      PrevUserCallbackCursorEnter;
100 |     GLFWmousebuttonfun      PrevUserCallbackMousebutton;
101 |     GLFWscrollfun           PrevUserCallbackScroll;
102 |     GLFWkeyfun              PrevUserCallbackKey;
103 |     GLFWcharfun             PrevUserCallbackChar;
104 |     GLFWmonitorfun          PrevUserCallbackMonitor;
105 | 
106 |     ImGui_ImplGlfw_Data()   { memset(this, 0, sizeof(*this)); }
107 | };
108 | 
109 | // Backend data stored in io.BackendPlatformUserData to allow support for multiple Dear ImGui contexts
110 | // It is STRONGLY preferred that you use docking branch with multi-viewports (== single Dear ImGui context + multiple windows) instead of multiple Dear ImGui contexts.
111 | // FIXME: multi-context support is not well tested and probably dysfunctional in this backend.
112 | // - Because glfwPollEvents() process all windows and some events may be called outside of it, you will need to register your own callbacks
113 | //   (passing install_callbacks=false in ImGui_ImplGlfw_InitXXX functions), set the current dear imgui context and then call our callbacks.
114 | // - Otherwise we may need to store a GLFWWindow* -> ImGuiContext* map and handle this in the backend, adding a little bit of extra complexity to it.
115 | // FIXME: some shared resources (mouse cursor shape, gamepad) are mishandled when using multi-context.
116 | static ImGui_ImplGlfw_Data* ImGui_ImplGlfw_GetBackendData()
117 | {
118 |     return ImGui::GetCurrentContext() ? (ImGui_ImplGlfw_Data*)ImGui::GetIO().BackendPlatformUserData : NULL;
119 | }
120 | 
121 | // Functions
122 | static const char* ImGui_ImplGlfw_GetClipboardText(void* user_data)
123 | {
124 |     return glfwGetClipboardString((GLFWwindow*)user_data);
125 | }
126 | 
127 | static void ImGui_ImplGlfw_SetClipboardText(void* user_data, const char* text)
128 | {
129 |     glfwSetClipboardString((GLFWwindow*)user_data, text);
130 | }
131 | 
132 | static ImGuiKey ImGui_ImplGlfw_KeyToImGuiKey(int key)
133 | {
134 |     switch (key)
135 |     {
136 |         case GLFW_KEY_TAB: return ImGuiKey_Tab;
137 |         case GLFW_KEY_LEFT: return ImGuiKey_LeftArrow;
138 |         case GLFW_KEY_RIGHT: return ImGuiKey_RightArrow;
139 |         case GLFW_KEY_UP: return ImGuiKey_UpArrow;
140 |         case GLFW_KEY_DOWN: return ImGuiKey_DownArrow;
141 |         case GLFW_KEY_PAGE_UP: return ImGuiKey_PageUp;
142 |         case GLFW_KEY_PAGE_DOWN: return ImGuiKey_PageDown;
143 |         case GLFW_KEY_HOME: return ImGuiKey_Home;
144 |         case GLFW_KEY_END: return ImGuiKey_End;
145 |         case GLFW_KEY_INSERT: return ImGuiKey_Insert;
146 |         case GLFW_KEY_DELETE: return ImGuiKey_Delete;
147 |         case GLFW_KEY_BACKSPACE: return ImGuiKey_Backspace;
148 |         case GLFW_KEY_SPACE: return ImGuiKey_Space;
149 |         case GLFW_KEY_ENTER: return ImGuiKey_Enter;
150 |         case GLFW_KEY_ESCAPE: return ImGuiKey_Escape;
151 |         case GLFW_KEY_APOSTROPHE: return ImGuiKey_Apostrophe;
152 |         case GLFW_KEY_COMMA: return ImGuiKey_Comma;
153 |         case GLFW_KEY_MINUS: return ImGuiKey_Minus;
154 |         case GLFW_KEY_PERIOD: return ImGuiKey_Period;
155 |         case GLFW_KEY_SLASH: return ImGuiKey_Slash;
156 |         case GLFW_KEY_SEMICOLON: return ImGuiKey_Semicolon;
157 |         case GLFW_KEY_EQUAL: return ImGuiKey_Equal;
158 |         case GLFW_KEY_LEFT_BRACKET: return ImGuiKey_LeftBracket;
159 |         case GLFW_KEY_BACKSLASH: return ImGuiKey_Backslash;
160 |         case GLFW_KEY_RIGHT_BRACKET: return ImGuiKey_RightBracket;
161 |         case GLFW_KEY_GRAVE_ACCENT: return ImGuiKey_GraveAccent;
162 |         case GLFW_KEY_CAPS_LOCK: return ImGuiKey_CapsLock;
163 |         case GLFW_KEY_SCROLL_LOCK: return ImGuiKey_ScrollLock;
164 |         case GLFW_KEY_NUM_LOCK: return ImGuiKey_NumLock;
165 |         case GLFW_KEY_PRINT_SCREEN: return ImGuiKey_PrintScreen;
166 |         case GLFW_KEY_PAUSE: return ImGuiKey_Pause;
167 |         case GLFW_KEY_KP_0: return ImGuiKey_Keypad0;
168 |         case GLFW_KEY_KP_1: return ImGuiKey_Keypad1;
169 |         case GLFW_KEY_KP_2: return ImGuiKey_Keypad2;
170 |         case GLFW_KEY_KP_3: return ImGuiKey_Keypad3;
171 |         case GLFW_KEY_KP_4: return ImGuiKey_Keypad4;
172 |         case GLFW_KEY_KP_5: return ImGuiKey_Keypad5;
173 |         case GLFW_KEY_KP_6: return ImGuiKey_Keypad6;
174 |         case GLFW_KEY_KP_7: return ImGuiKey_Keypad7;
175 |         case GLFW_KEY_KP_8: return ImGuiKey_Keypad8;
176 |         case GLFW_KEY_KP_9: return ImGuiKey_Keypad9;
177 |         case GLFW_KEY_KP_DECIMAL: return ImGuiKey_KeypadDecimal;
178 |         case GLFW_KEY_KP_DIVIDE: return ImGuiKey_KeypadDivide;
179 |         case GLFW_KEY_KP_MULTIPLY: return ImGuiKey_KeypadMultiply;
180 |         case GLFW_KEY_KP_SUBTRACT: return ImGuiKey_KeypadSubtract;
181 |         case GLFW_KEY_KP_ADD: return ImGuiKey_KeypadAdd;
182 |         case GLFW_KEY_KP_ENTER: return ImGuiKey_KeypadEnter;
183 |         case GLFW_KEY_KP_EQUAL: return ImGuiKey_KeypadEqual;
184 |         case GLFW_KEY_LEFT_SHIFT: return ImGuiKey_LeftShift;
185 |         case GLFW_KEY_LEFT_CONTROL: return ImGuiKey_LeftCtrl;
186 |         case GLFW_KEY_LEFT_ALT: return ImGuiKey_LeftAlt;
187 |         case GLFW_KEY_LEFT_SUPER: return ImGuiKey_LeftSuper;
188 |         case GLFW_KEY_RIGHT_SHIFT: return ImGuiKey_RightShift;
189 |         case GLFW_KEY_RIGHT_CONTROL: return ImGuiKey_RightCtrl;
190 |         case GLFW_KEY_RIGHT_ALT: return ImGuiKey_RightAlt;
191 |         case GLFW_KEY_RIGHT_SUPER: return ImGuiKey_RightSuper;
192 |         case GLFW_KEY_MENU: return ImGuiKey_Menu;
193 |         case GLFW_KEY_0: return ImGuiKey_0;
194 |         case GLFW_KEY_1: return ImGuiKey_1;
195 |         case GLFW_KEY_2: return ImGuiKey_2;
196 |         case GLFW_KEY_3: return ImGuiKey_3;
197 |         case GLFW_KEY_4: return ImGuiKey_4;
198 |         case GLFW_KEY_5: return ImGuiKey_5;
199 |         case GLFW_KEY_6: return ImGuiKey_6;
200 |         case GLFW_KEY_7: return ImGuiKey_7;
201 |         case GLFW_KEY_8: return ImGuiKey_8;
202 |         case GLFW_KEY_9: return ImGuiKey_9;
203 |         case GLFW_KEY_A: return ImGuiKey_A;
204 |         case GLFW_KEY_B: return ImGuiKey_B;
205 |         case GLFW_KEY_C: return ImGuiKey_C;
206 |         case GLFW_KEY_D: return ImGuiKey_D;
207 |         case GLFW_KEY_E: return ImGuiKey_E;
208 |         case GLFW_KEY_F: return ImGuiKey_F;
209 |         case GLFW_KEY_G: return ImGuiKey_G;
210 |         case GLFW_KEY_H: return ImGuiKey_H;
211 |         case GLFW_KEY_I: return ImGuiKey_I;
212 |         case GLFW_KEY_J: return ImGuiKey_J;
213 |         case GLFW_KEY_K: return ImGuiKey_K;
214 |         case GLFW_KEY_L: return ImGuiKey_L;
215 |         case GLFW_KEY_M: return ImGuiKey_M;
216 |         case GLFW_KEY_N: return ImGuiKey_N;
217 |         case GLFW_KEY_O: return ImGuiKey_O;
218 |         case GLFW_KEY_P: return ImGuiKey_P;
219 |         case GLFW_KEY_Q: return ImGuiKey_Q;
220 |         case GLFW_KEY_R: return ImGuiKey_R;
221 |         case GLFW_KEY_S: return ImGuiKey_S;
222 |         case GLFW_KEY_T: return ImGuiKey_T;
223 |         case GLFW_KEY_U: return ImGuiKey_U;
224 |         case GLFW_KEY_V: return ImGuiKey_V;
225 |         case GLFW_KEY_W: return ImGuiKey_W;
226 |         case GLFW_KEY_X: return ImGuiKey_X;
227 |         case GLFW_KEY_Y: return ImGuiKey_Y;
228 |         case GLFW_KEY_Z: return ImGuiKey_Z;
229 |         case GLFW_KEY_F1: return ImGuiKey_F1;
230 |         case GLFW_KEY_F2: return ImGuiKey_F2;
231 |         case GLFW_KEY_F3: return ImGuiKey_F3;
232 |         case GLFW_KEY_F4: return ImGuiKey_F4;
233 |         case GLFW_KEY_F5: return ImGuiKey_F5;
234 |         case GLFW_KEY_F6: return ImGuiKey_F6;
235 |         case GLFW_KEY_F7: return ImGuiKey_F7;
236 |         case GLFW_KEY_F8: return ImGuiKey_F8;
237 |         case GLFW_KEY_F9: return ImGuiKey_F9;
238 |         case GLFW_KEY_F10: return ImGuiKey_F10;
239 |         case GLFW_KEY_F11: return ImGuiKey_F11;
240 |         case GLFW_KEY_F12: return ImGuiKey_F12;
241 |         default: return ImGuiKey_None;
242 |     }
243 | }
244 | 
245 | static void ImGui_ImplGlfw_UpdateKeyModifiers(int mods)
246 | {
247 |     ImGuiIO& io = ImGui::GetIO();
248 |     io.AddKeyEvent(ImGuiKey_ModCtrl, (mods & GLFW_MOD_CONTROL) != 0);
249 |     io.AddKeyEvent(ImGuiKey_ModShift, (mods & GLFW_MOD_SHIFT) != 0);
250 |     io.AddKeyEvent(ImGuiKey_ModAlt, (mods & GLFW_MOD_ALT) != 0);
251 |     io.AddKeyEvent(ImGuiKey_ModSuper, (mods & GLFW_MOD_SUPER) != 0);
252 | }
253 | 
254 | void ImGui_ImplGlfw_MouseButtonCallback(GLFWwindow* window, int button, int action, int mods)
255 | {
256 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
257 |     if (bd->PrevUserCallbackMousebutton != NULL && window == bd->Window)
258 |         bd->PrevUserCallbackMousebutton(window, button, action, mods);
259 | 
260 |     ImGui_ImplGlfw_UpdateKeyModifiers(mods);
261 | 
262 |     ImGuiIO& io = ImGui::GetIO();
263 |     if (button >= 0 && button < ImGuiMouseButton_COUNT)
264 |         io.AddMouseButtonEvent(button, action == GLFW_PRESS);
265 | }
266 | 
267 | void ImGui_ImplGlfw_ScrollCallback(GLFWwindow* window, double xoffset, double yoffset)
268 | {
269 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
270 |     if (bd->PrevUserCallbackScroll != NULL && window == bd->Window)
271 |         bd->PrevUserCallbackScroll(window, xoffset, yoffset);
272 | 
273 |     ImGuiIO& io = ImGui::GetIO();
274 |     io.AddMouseWheelEvent((float)xoffset, (float)yoffset);
275 | }
276 | 
277 | static int ImGui_ImplGlfw_TranslateUntranslatedKey(int key, int scancode)
278 | {
279 | #if GLFW_HAS_GET_KEY_NAME && !defined(__EMSCRIPTEN__)
280 |     // GLFW 3.1+ attempts to "untranslate" keys, which goes the opposite of what every other framework does, making using lettered shortcuts difficult.
281 |     // (It had reasons to do so: namely GLFW is/was more likely to be used for WASD-type game controls rather than lettered shortcuts, but IHMO the 3.1 change could have been done differently)
282 |     // See https://github.com/glfw/glfw/issues/1502 for details.
283 |     // Adding a workaround to undo this (so our keys are translated->untranslated->translated, likely a lossy process).
284 |     // This won't cover edge cases but this is at least going to cover common cases.
285 |     const char* key_name = glfwGetKeyName(key, scancode);
286 |     if (key_name && key_name[0] != 0 && key_name[1] == 0)
287 |     {
288 |         const char char_names[] = "`-=[]\\,;\'./";
289 |         const int char_keys[] = { GLFW_KEY_GRAVE_ACCENT, GLFW_KEY_MINUS, GLFW_KEY_EQUAL, GLFW_KEY_LEFT_BRACKET, GLFW_KEY_RIGHT_BRACKET, GLFW_KEY_BACKSLASH, GLFW_KEY_COMMA, GLFW_KEY_SEMICOLON, GLFW_KEY_APOSTROPHE, GLFW_KEY_PERIOD, GLFW_KEY_SLASH, 0 };
290 |         IM_ASSERT(IM_ARRAYSIZE(char_names) == IM_ARRAYSIZE(char_keys));
291 |         if (key_name[0] >= '0' && key_name[0] <= '9')               { key = GLFW_KEY_0 + (key_name[0] - '0'); }
292 |         else if (key_name[0] >= 'A' && key_name[0] <= 'Z')          { key = GLFW_KEY_A + (key_name[0] - 'A'); }
293 |         else if (const char* p = strchr(char_names, key_name[0]))   { key = char_keys[p - char_names]; }
294 |     }
295 |     // if (action == GLFW_PRESS) printf("key %d scancode %d name '%s'\n", key, scancode, key_name);
296 | #else
297 |     IM_UNUSED(scancode);
298 | #endif
299 |     return key;
300 | }
301 | 
302 | void ImGui_ImplGlfw_KeyCallback(GLFWwindow* window, int keycode, int scancode, int action, int mods)
303 | {
304 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
305 |     if (bd->PrevUserCallbackKey != NULL && window == bd->Window)
306 |         bd->PrevUserCallbackKey(window, keycode, scancode, action, mods);
307 | 
308 |     if (action != GLFW_PRESS && action != GLFW_RELEASE)
309 |         return;
310 | 
311 |     ImGui_ImplGlfw_UpdateKeyModifiers(mods);
312 | 
313 |     keycode = ImGui_ImplGlfw_TranslateUntranslatedKey(keycode, scancode);
314 | 
315 |     ImGuiIO& io = ImGui::GetIO();
316 |     ImGuiKey imgui_key = ImGui_ImplGlfw_KeyToImGuiKey(keycode);
317 |     io.AddKeyEvent(imgui_key, (action == GLFW_PRESS));
318 |     io.SetKeyEventNativeData(imgui_key, keycode, scancode); // To support legacy indexing (<1.87 user code)
319 | }
320 | 
321 | void ImGui_ImplGlfw_WindowFocusCallback(GLFWwindow* window, int focused)
322 | {
323 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
324 |     if (bd->PrevUserCallbackWindowFocus != NULL && window == bd->Window)
325 |         bd->PrevUserCallbackWindowFocus(window, focused);
326 | 
327 |     ImGuiIO& io = ImGui::GetIO();
328 |     io.AddFocusEvent(focused != 0);
329 | }
330 | 
331 | void ImGui_ImplGlfw_CursorPosCallback(GLFWwindow* window, double x, double y)
332 | {
333 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
334 |     if (bd->PrevUserCallbackCursorPos != NULL && window == bd->Window)
335 |         bd->PrevUserCallbackCursorPos(window, x, y);
336 | 
337 |     ImGuiIO& io = ImGui::GetIO();
338 |     io.AddMousePosEvent((float)x, (float)y);
339 | }
340 | 
341 | void ImGui_ImplGlfw_CursorEnterCallback(GLFWwindow* window, int entered)
342 | {
343 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
344 |     if (bd->PrevUserCallbackCursorEnter != NULL && window == bd->Window)
345 |         bd->PrevUserCallbackCursorEnter(window, entered);
346 | 
347 |     ImGuiIO& io = ImGui::GetIO();
348 |     if (entered)
349 |         bd->MouseWindow = window;
350 |     if (!entered && bd->MouseWindow == window)
351 |     {
352 |         bd->MouseWindow = NULL;
353 |         io.AddMousePosEvent(-FLT_MAX, -FLT_MAX);
354 |     }
355 | }
356 | 
357 | void ImGui_ImplGlfw_CharCallback(GLFWwindow* window, unsigned int c)
358 | {
359 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
360 |     if (bd->PrevUserCallbackChar != NULL && window == bd->Window)
361 |         bd->PrevUserCallbackChar(window, c);
362 | 
363 |     ImGuiIO& io = ImGui::GetIO();
364 |     io.AddInputCharacter(c);
365 | }
366 | 
367 | void ImGui_ImplGlfw_MonitorCallback(GLFWmonitor*, int)
368 | {
369 | 	// Unused in 'master' branch but 'docking' branch will use this, so we declare it ahead of it so if you have to install callbacks you can install this one too.
370 | }
371 | 
372 | static bool ImGui_ImplGlfw_Init(GLFWwindow* window, bool install_callbacks, GlfwClientApi client_api)
373 | {
374 |     ImGuiIO& io = ImGui::GetIO();
375 |     IM_ASSERT(io.BackendPlatformUserData == NULL && "Already initialized a platform backend!");
376 | 
377 |     // Setup backend capabilities flags
378 |     ImGui_ImplGlfw_Data* bd = IM_NEW(ImGui_ImplGlfw_Data)();
379 |     io.BackendPlatformUserData = (void*)bd;
380 |     io.BackendPlatformName = "imgui_impl_glfw";
381 |     io.BackendFlags |= ImGuiBackendFlags_HasMouseCursors;         // We can honor GetMouseCursor() values (optional)
382 |     io.BackendFlags |= ImGuiBackendFlags_HasSetMousePos;          // We can honor io.WantSetMousePos requests (optional, rarely used)
383 | 
384 |     bd->Window = window;
385 |     bd->Time = 0.0;
386 | 
387 |     io.SetClipboardTextFn = ImGui_ImplGlfw_SetClipboardText;
388 |     io.GetClipboardTextFn = ImGui_ImplGlfw_GetClipboardText;
389 |     io.ClipboardUserData = bd->Window;
390 | 
391 |     // Set platform dependent data in viewport
392 | #if defined(_WIN32)
393 |     ImGui::GetMainViewport()->PlatformHandleRaw = (void*)glfwGetWin32Window(bd->Window);
394 | #endif
395 | 
396 |     // Create mouse cursors
397 |     // (By design, on X11 cursors are user configurable and some cursors may be missing. When a cursor doesn't exist,
398 |     // GLFW will emit an error which will often be printed by the app, so we temporarily disable error reporting.
399 |     // Missing cursors will return NULL and our _UpdateMouseCursor() function will use the Arrow cursor instead.)
400 |     GLFWerrorfun prev_error_callback = glfwSetErrorCallback(NULL);
401 |     bd->MouseCursors[ImGuiMouseCursor_Arrow] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR);
402 |     bd->MouseCursors[ImGuiMouseCursor_TextInput] = glfwCreateStandardCursor(GLFW_IBEAM_CURSOR);
403 |     bd->MouseCursors[ImGuiMouseCursor_ResizeNS] = glfwCreateStandardCursor(GLFW_VRESIZE_CURSOR);
404 |     bd->MouseCursors[ImGuiMouseCursor_ResizeEW] = glfwCreateStandardCursor(GLFW_HRESIZE_CURSOR);
405 |     bd->MouseCursors[ImGuiMouseCursor_Hand] = glfwCreateStandardCursor(GLFW_HAND_CURSOR);
406 | #if GLFW_HAS_NEW_CURSORS
407 |     bd->MouseCursors[ImGuiMouseCursor_ResizeAll] = glfwCreateStandardCursor(GLFW_RESIZE_ALL_CURSOR);
408 |     bd->MouseCursors[ImGuiMouseCursor_ResizeNESW] = glfwCreateStandardCursor(GLFW_RESIZE_NESW_CURSOR);
409 |     bd->MouseCursors[ImGuiMouseCursor_ResizeNWSE] = glfwCreateStandardCursor(GLFW_RESIZE_NWSE_CURSOR);
410 |     bd->MouseCursors[ImGuiMouseCursor_NotAllowed] = glfwCreateStandardCursor(GLFW_NOT_ALLOWED_CURSOR);
411 | #else
412 |     bd->MouseCursors[ImGuiMouseCursor_ResizeAll] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR);
413 |     bd->MouseCursors[ImGuiMouseCursor_ResizeNESW] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR);
414 |     bd->MouseCursors[ImGuiMouseCursor_ResizeNWSE] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR);
415 |     bd->MouseCursors[ImGuiMouseCursor_NotAllowed] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR);
416 | #endif
417 |     glfwSetErrorCallback(prev_error_callback);
418 | 
419 |     // Chain GLFW callbacks: our callbacks will call the user's previously installed callbacks, if any.
420 |     bd->PrevUserCallbackWindowFocus = NULL;
421 |     bd->PrevUserCallbackCursorEnter = NULL;
422 |     bd->PrevUserCallbackMousebutton = NULL;
423 |     bd->PrevUserCallbackScroll = NULL;
424 |     bd->PrevUserCallbackKey = NULL;
425 |     bd->PrevUserCallbackChar = NULL;
426 |     bd->PrevUserCallbackMonitor = NULL;
427 |     if (install_callbacks)
428 |     {
429 |         bd->InstalledCallbacks = true;
430 |         bd->PrevUserCallbackWindowFocus = glfwSetWindowFocusCallback(window, ImGui_ImplGlfw_WindowFocusCallback);
431 |         bd->PrevUserCallbackCursorEnter = glfwSetCursorEnterCallback(window, ImGui_ImplGlfw_CursorEnterCallback);
432 |         bd->PrevUserCallbackCursorPos = glfwSetCursorPosCallback(window, ImGui_ImplGlfw_CursorPosCallback);
433 |         bd->PrevUserCallbackMousebutton = glfwSetMouseButtonCallback(window, ImGui_ImplGlfw_MouseButtonCallback);
434 |         bd->PrevUserCallbackScroll = glfwSetScrollCallback(window, ImGui_ImplGlfw_ScrollCallback);
435 |         bd->PrevUserCallbackKey = glfwSetKeyCallback(window, ImGui_ImplGlfw_KeyCallback);
436 |         bd->PrevUserCallbackChar = glfwSetCharCallback(window, ImGui_ImplGlfw_CharCallback);
437 |         bd->PrevUserCallbackMonitor = glfwSetMonitorCallback(ImGui_ImplGlfw_MonitorCallback);
438 |     }
439 | 
440 |     bd->ClientApi = client_api;
441 |     return true;
442 | }
443 | 
444 | bool ImGui_ImplGlfw_InitForOpenGL(GLFWwindow* window, bool install_callbacks)
445 | {
446 |     return ImGui_ImplGlfw_Init(window, install_callbacks, GlfwClientApi_OpenGL);
447 | }
448 | 
449 | bool ImGui_ImplGlfw_InitForVulkan(GLFWwindow* window, bool install_callbacks)
450 | {
451 |     return ImGui_ImplGlfw_Init(window, install_callbacks, GlfwClientApi_Vulkan);
452 | }
453 | 
454 | bool ImGui_ImplGlfw_InitForOther(GLFWwindow* window, bool install_callbacks)
455 | {
456 |     return ImGui_ImplGlfw_Init(window, install_callbacks, GlfwClientApi_Unknown);
457 | }
458 | 
459 | void ImGui_ImplGlfw_Shutdown()
460 | {
461 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
462 |     IM_ASSERT(bd != NULL && "No platform backend to shutdown, or already shutdown?");
463 |     ImGuiIO& io = ImGui::GetIO();
464 | 
465 |     if (bd->InstalledCallbacks)
466 |     {
467 |         glfwSetWindowFocusCallback(bd->Window, bd->PrevUserCallbackWindowFocus);
468 |         glfwSetCursorEnterCallback(bd->Window, bd->PrevUserCallbackCursorEnter);
469 |         glfwSetCursorPosCallback(bd->Window, bd->PrevUserCallbackCursorPos);
470 |         glfwSetMouseButtonCallback(bd->Window, bd->PrevUserCallbackMousebutton);
471 |         glfwSetScrollCallback(bd->Window, bd->PrevUserCallbackScroll);
472 |         glfwSetKeyCallback(bd->Window, bd->PrevUserCallbackKey);
473 |         glfwSetCharCallback(bd->Window, bd->PrevUserCallbackChar);
474 |         glfwSetMonitorCallback(bd->PrevUserCallbackMonitor);
475 |     }
476 | 
477 |     for (ImGuiMouseCursor cursor_n = 0; cursor_n < ImGuiMouseCursor_COUNT; cursor_n++)
478 |         glfwDestroyCursor(bd->MouseCursors[cursor_n]);
479 | 
480 |     io.BackendPlatformName = NULL;
481 |     io.BackendPlatformUserData = NULL;
482 |     IM_DELETE(bd);
483 | }
484 | 
485 | static void ImGui_ImplGlfw_UpdateMouseData()
486 | {
487 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
488 |     ImGuiIO& io = ImGui::GetIO();
489 | 
490 | #ifdef __EMSCRIPTEN__
491 |     const bool is_app_focused = true;
492 | #else
493 |     const bool is_app_focused = glfwGetWindowAttrib(bd->Window, GLFW_FOCUSED) != 0;
494 | #endif
495 |     if (is_app_focused)
496 |     {
497 |         // (Optional) Set OS mouse position from Dear ImGui if requested (rarely used, only when ImGuiConfigFlags_NavEnableSetMousePos is enabled by user)
498 |         if (io.WantSetMousePos)
499 |             glfwSetCursorPos(bd->Window, (double)io.MousePos.x, (double)io.MousePos.y);
500 | 
501 |         // (Optional) Fallback to provide mouse position when focused (ImGui_ImplGlfw_CursorPosCallback already provides this when hovered or captured)
502 |         if (is_app_focused && bd->MouseWindow == NULL)
503 |         {
504 |             double mouse_x, mouse_y;
505 |             glfwGetCursorPos(bd->Window, &mouse_x, &mouse_y);
506 |             io.AddMousePosEvent((float)mouse_x, (float)mouse_y);
507 |         }
508 |     }
509 | }
510 | 
511 | static void ImGui_ImplGlfw_UpdateMouseCursor()
512 | {
513 |     ImGuiIO& io = ImGui::GetIO();
514 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
515 |     if ((io.ConfigFlags & ImGuiConfigFlags_NoMouseCursorChange) || glfwGetInputMode(bd->Window, GLFW_CURSOR) == GLFW_CURSOR_DISABLED)
516 |         return;
517 | 
518 |     ImGuiMouseCursor imgui_cursor = ImGui::GetMouseCursor();
519 |     if (imgui_cursor == ImGuiMouseCursor_None || io.MouseDrawCursor)
520 |     {
521 |         // Hide OS mouse cursor if imgui is drawing it or if it wants no cursor
522 |         glfwSetInputMode(bd->Window, GLFW_CURSOR, GLFW_CURSOR_HIDDEN);
523 |     }
524 |     else
525 |     {
526 |         // Show OS mouse cursor
527 |         // FIXME-PLATFORM: Unfocused windows seems to fail changing the mouse cursor with GLFW 3.2, but 3.3 works here.
528 |         glfwSetCursor(bd->Window, bd->MouseCursors[imgui_cursor] ? bd->MouseCursors[imgui_cursor] : bd->MouseCursors[ImGuiMouseCursor_Arrow]);
529 |         glfwSetInputMode(bd->Window, GLFW_CURSOR, GLFW_CURSOR_NORMAL);
530 |     }
531 | }
532 | 
533 | // Update gamepad inputs
534 | static inline float Saturate(float v) { return v < 0.0f ? 0.0f : v  > 1.0f ? 1.0f : v; }
535 | static void ImGui_ImplGlfw_UpdateGamepads()
536 | {
537 |     ImGuiIO& io = ImGui::GetIO();
538 |     if ((io.ConfigFlags & ImGuiConfigFlags_NavEnableGamepad) == 0)
539 |         return;
540 | 
541 |     io.BackendFlags &= ~ImGuiBackendFlags_HasGamepad;
542 | #if GLFW_HAS_GAMEPAD_API
543 |     GLFWgamepadstate gamepad;
544 |     if (!glfwGetGamepadState(GLFW_JOYSTICK_1, &gamepad))
545 |         return;
546 |     #define MAP_BUTTON(KEY_NO, BUTTON_NO, _UNUSED)          do { io.AddKeyEvent(KEY_NO, gamepad.buttons[BUTTON_NO] != 0); } while (0)
547 |     #define MAP_ANALOG(KEY_NO, AXIS_NO, _UNUSED, V0, V1)    do { float v = gamepad.axes[AXIS_NO]; v = (v - V0) / (V1 - V0); io.AddKeyAnalogEvent(KEY_NO, v > 0.10f, Saturate(v)); } while (0)
548 | #else
549 |     int axes_count = 0, buttons_count = 0;
550 |     const float* axes = glfwGetJoystickAxes(GLFW_JOYSTICK_1, &axes_count);
551 |     const unsigned char* buttons = glfwGetJoystickButtons(GLFW_JOYSTICK_1, &buttons_count);
552 |     if (axes_count == 0 || buttons_count == 0)
553 |         return;
554 |     #define MAP_BUTTON(KEY_NO, _UNUSED, BUTTON_NO)          do { io.AddKeyEvent(KEY_NO, (buttons_count > BUTTON_NO && buttons[BUTTON_NO] == GLFW_PRESS)); } while (0)
555 |     #define MAP_ANALOG(KEY_NO, _UNUSED, AXIS_NO, V0, V1)    do { float v = (axes_count > AXIS_NO) ? axes[AXIS_NO] : V0; v = (v - V0) / (V1 - V0); io.AddKeyAnalogEvent(KEY_NO, v > 0.10f, Saturate(v)); } while (0)
556 | #endif
557 |     io.BackendFlags |= ImGuiBackendFlags_HasGamepad;
558 |     MAP_BUTTON(ImGuiKey_GamepadStart,       GLFW_GAMEPAD_BUTTON_START,          7);
559 |     MAP_BUTTON(ImGuiKey_GamepadBack,        GLFW_GAMEPAD_BUTTON_BACK,           6);
560 |     MAP_BUTTON(ImGuiKey_GamepadFaceDown,    GLFW_GAMEPAD_BUTTON_A,              0);     // Xbox A, PS Cross
561 |     MAP_BUTTON(ImGuiKey_GamepadFaceRight,   GLFW_GAMEPAD_BUTTON_B,              1);     // Xbox B, PS Circle
562 |     MAP_BUTTON(ImGuiKey_GamepadFaceLeft,    GLFW_GAMEPAD_BUTTON_X,              2);     // Xbox X, PS Square
563 |     MAP_BUTTON(ImGuiKey_GamepadFaceUp,      GLFW_GAMEPAD_BUTTON_Y,              3);     // Xbox Y, PS Triangle
564 |     MAP_BUTTON(ImGuiKey_GamepadDpadLeft,    GLFW_GAMEPAD_BUTTON_DPAD_LEFT,      13);
565 |     MAP_BUTTON(ImGuiKey_GamepadDpadRight,   GLFW_GAMEPAD_BUTTON_DPAD_RIGHT,     11);
566 |     MAP_BUTTON(ImGuiKey_GamepadDpadUp,      GLFW_GAMEPAD_BUTTON_DPAD_UP,        10);
567 |     MAP_BUTTON(ImGuiKey_GamepadDpadDown,    GLFW_GAMEPAD_BUTTON_DPAD_DOWN,      12);
568 |     MAP_BUTTON(ImGuiKey_GamepadL1,          GLFW_GAMEPAD_BUTTON_LEFT_BUMPER,    4);
569 |     MAP_BUTTON(ImGuiKey_GamepadR1,          GLFW_GAMEPAD_BUTTON_RIGHT_BUMPER,   5);
570 |     MAP_ANALOG(ImGuiKey_GamepadL2,          GLFW_GAMEPAD_AXIS_LEFT_TRIGGER,     4,      -0.75f,  +1.0f);
571 |     MAP_ANALOG(ImGuiKey_GamepadR2,          GLFW_GAMEPAD_AXIS_RIGHT_TRIGGER,    5,      -0.75f,  +1.0f);
572 |     MAP_BUTTON(ImGuiKey_GamepadL3,          GLFW_GAMEPAD_BUTTON_LEFT_THUMB,     8);
573 |     MAP_BUTTON(ImGuiKey_GamepadR3,          GLFW_GAMEPAD_BUTTON_RIGHT_THUMB,    9);
574 |     MAP_ANALOG(ImGuiKey_GamepadLStickLeft,  GLFW_GAMEPAD_AXIS_LEFT_X,           0,      -0.25f,  -1.0f);
575 |     MAP_ANALOG(ImGuiKey_GamepadLStickRight, GLFW_GAMEPAD_AXIS_LEFT_X,           0,      +0.25f,  +1.0f);
576 |     MAP_ANALOG(ImGuiKey_GamepadLStickUp,    GLFW_GAMEPAD_AXIS_LEFT_Y,           1,      -0.25f,  -1.0f);
577 |     MAP_ANALOG(ImGuiKey_GamepadLStickDown,  GLFW_GAMEPAD_AXIS_LEFT_Y,           1,      +0.25f,  +1.0f);
578 |     MAP_ANALOG(ImGuiKey_GamepadRStickLeft,  GLFW_GAMEPAD_AXIS_RIGHT_X,          2,      -0.25f,  -1.0f);
579 |     MAP_ANALOG(ImGuiKey_GamepadRStickRight, GLFW_GAMEPAD_AXIS_RIGHT_X,          2,      +0.25f,  +1.0f);
580 |     MAP_ANALOG(ImGuiKey_GamepadRStickUp,    GLFW_GAMEPAD_AXIS_RIGHT_Y,          3,      -0.25f,  -1.0f);
581 |     MAP_ANALOG(ImGuiKey_GamepadRStickDown,  GLFW_GAMEPAD_AXIS_RIGHT_Y,          3,      +0.25f,  +1.0f);
582 |     #undef MAP_BUTTON
583 |     #undef MAP_ANALOG
584 | }
585 | 
586 | void ImGui_ImplGlfw_NewFrame()
587 | {
588 |     ImGuiIO& io = ImGui::GetIO();
589 |     ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
590 |     IM_ASSERT(bd != NULL && "Did you call ImGui_ImplGlfw_InitForXXX()?");
591 | 
592 |     // Setup display size (every frame to accommodate for window resizing)
593 |     int w, h;
594 |     int display_w, display_h;
595 |     glfwGetWindowSize(bd->Window, &w, &h);
596 |     glfwGetFramebufferSize(bd->Window, &display_w, &display_h);
597 |     io.DisplaySize = ImVec2((float)w, (float)h);
598 |     if (w > 0 && h > 0)
599 |         io.DisplayFramebufferScale = ImVec2((float)display_w / (float)w, (float)display_h / (float)h);
600 | 
601 |     // Setup time step
602 |     double current_time = glfwGetTime();
603 |     io.DeltaTime = bd->Time > 0.0 ? (float)(current_time - bd->Time) : (float)(1.0f / 60.0f);
604 |     bd->Time = current_time;
605 | 
606 |     ImGui_ImplGlfw_UpdateMouseData();
607 |     ImGui_ImplGlfw_UpdateMouseCursor();
608 | 
609 |     // Update game controllers (if enabled and available)
610 |     ImGui_ImplGlfw_UpdateGamepads();
611 | }
612 | 
613 | #if defined(__clang__)
614 | #pragma clang diagnostic pop
615 | #endif
616 | 


--------------------------------------------------------------------------------
/scripts/build_cuda.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (C) 2022 Codeplay Software Limited
 4 | # This work is licensed under the terms of the MIT license.
 5 | # For a copy, see https://opensource.org/licenses/MIT.
 6 | 
 7 | BUILD_DIR="build_cuda"
 8 | render=on
 9 | 
10 | if [ -n "$1" ]; then
11 | 	if [ "$1" = "no_render" ]; then
12 | 		render=off
13 | 	else
14 | 		echo "Unknown param $1"
15 | 		exit
16 | 	fi
17 | fi
18 | 
19 | rm -rf $BUILD_DIR
20 | mkdir $BUILD_DIR
21 | cd $BUILD_DIR || exit
22 | 
23 | cmake ../ \
24 | -DRENDER=${render} \
25 | -DGLEW_LIBRARY=/usr/lib/x86_64-linux-gnu/libGLEW.so \
26 | -DCMAKE_EXPORT_COMPILE_COMMANDS=on || exit
27 | 
28 | make release
29 | 


--------------------------------------------------------------------------------
/scripts/build_dpcpp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (C) 2022 Codeplay Software Limited
 4 | # This work is licensed under the terms of the MIT license.
 5 | # For a copy, see https://opensource.org/licenses/MIT.
 6 | 
 7 | BUILD_DIR="build_dpcpp"
 8 | render=on
 9 | 
10 | if [ -n "$1" ]; then
11 | 	if [ "$1" = "no_render" ]; then
12 | 		render=off
13 | 	else
14 | 		echo "Unknown param $1"
15 | 		exit
16 | 	fi
17 | fi
18 | 
19 | rm -rf $BUILD_DIR
20 | mkdir $BUILD_DIR
21 | cd $BUILD_DIR || exit
22 | 
23 | #CXX=clang++ \
24 | #CC=clang \
25 | CXX=icpx \
26 | CC=icpx \
27 | cmake ../ \
28 | -DRENDER=${render} \
29 | -DGLEW_LIBRARY=/usr/lib/x86_64-linux-gnu/libGLEW.so \
30 | -DBACKEND=DPCPP -DDPCPP_CUDA_SUPPORT=on || exit
31 | 
32 | make release
33 | 


--------------------------------------------------------------------------------
/scripts/docker_build_etc.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Codeplay Software Limited
 2 | # This work is licensed under the terms of the MIT license.
 3 | # For a copy, see https://opensource.org/licenses/MIT.
 4 | 
 5 | # non-functional code! This is a sketch of how to do the dpct conversion properly in a container
 6 | #
 7 | # Should be run with something like:
 8 | #
 9 | # docker run --rm \
10 | #     -v /opt/intel/oneapi/:/opt/intel/oneapi/ \
11 | #     -v $PWD:$PWD \
12 | #     -u $UID \
13 | #     -i joeatodd/onednn-cuda \
14 | #     bash < scripts/docker_build_etc.sh
15 | 
16 | 
17 | # Navigate to relevant directory
18 | 
19 | cd $SRC_DIR
20 | 
21 | # Call cmake on it
22 | bash scripts/build_cuda.sh
23 | 
24 | # Call "intercept-build make" in build dir
25 | cd build
26 | make clean
27 | intercept-build make
28 | 
29 | # Do conversion w/ -p
30 | 


--------------------------------------------------------------------------------
/scripts/perf_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (C) 2022 Codeplay Software Limited
 4 | # This work is licensed under the terms of the MIT license.
 5 | # For a copy, see https://opensource.org/licenses/MIT.
 6 | 
 7 | # Get rid of any previous virtual frame buffer
 8 | pkill -9 Xvfb
 9 | rm /var/tmp/Xvfb_screen_0
10 | 
11 | # Create a virtual screen :99.0 with given dimensions & color depth
12 | # mapping output to /var/tmp/Xvfb_screen_0
13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp &
14 | 
15 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 &
16 | DISPLAY=:99.0 SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 &
17 | #DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0 &
18 | 
19 | # To take a screenshot instead of a video (doesn't always work):
20 | # sleep 2
21 | # DISPLAY=:99 xwd -root -silent | convert xwd:- png:/tmp/screenshot.png
22 | 
23 | # Use the x11grab device to write to video file
24 | ffmpeg -video_size 1920x1080 -framerate 25 -f x11grab -i :99.0+0,0 output.mp4
25 | 


--------------------------------------------------------------------------------
/scripts/perf_test_cuda.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (C) 2022 Codeplay Software Limited
 4 | # This work is licensed under the terms of the MIT license.
 5 | # For a copy, see https://opensource.org/licenses/MIT.
 6 | 
 7 | # Get rid of any previous virtual frame buffer
 8 | pkill -9 Xvfb
 9 | rm /var/tmp/Xvfb_screen_0
10 | 
11 | # Create a virtual screen :99.0 with given dimensions & color depth
12 | # mapping output to /var/tmp/Xvfb_screen_0
13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp &
14 | 
15 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 &
16 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 &
17 | DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0
18 | 


--------------------------------------------------------------------------------
/scripts/perf_test_dpcpp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (C) 2022 Codeplay Software Limited
 4 | # This work is licensed under the terms of the MIT license.
 5 | # For a copy, see https://opensource.org/licenses/MIT.
 6 | 
 7 | # Get rid of any previous virtual frame buffer
 8 | pkill -9 Xvfb
 9 | rm /var/tmp/Xvfb_screen_0
10 | 
11 | # Create a virtual screen :99.0 with given dimensions & color depth
12 | # mapping output to /var/tmp/Xvfb_screen_0
13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp &
14 | 
15 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 &
16 | DISPLAY=:99.0 SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0
17 | #DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0 &
18 | 


--------------------------------------------------------------------------------
/scripts/run_dpct.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (C) 2022 Codeplay Software Limited
 4 | # This work is licensed under the terms of the MIT license.
 5 | # For a copy, see https://opensource.org/licenses/MIT.
 6 | 
 7 | # This script converts the project's CUDA code to SYCL code. The DPC++ compatibility tool offers options
 8 | # for intercepting complex builds, but current dev environment restrictions require me to run dpct inside
 9 | # a docker container. This complicates things, so for now I'm just doing single source conversion on the 
10 | # simulator.cu file.
11 | #
12 | # The option --assume-nd-range-dim=1 prevents dpct from converting CUDA 1D ranges into SYCL 3D ranges.
13 | # It's not totally clear why the default behaviour isn't just to keep the CUDA dimensionality.
14 | #
15 | # The custom helper header files referred to by the --use-custom-helper flag are already part of this repo.
16 | # As such, we suppress generation of new helper
17 | # headers when calling dpct with `--use-custom-helper=none`.
18 | 
19 | rm src_sycl/*.[ch]pp src_sycl/*.yaml
20 | cd src_sycl; ln -s ../src/*[ch]pp .; cd -
21 | 
22 | docker run --rm \
23 |     -v /opt/intel/oneapi/dpcpp-ct/2023.1.0/:/dpcpp-ct \
24 |     -v $PWD:/nbody/ \
25 |     -u $UID \
26 |     -it joeatodd/onednn-cuda \
27 |     /dpcpp-ct/bin/dpct --out-root=/nbody/src_sycl \
28 |     --assume-nd-range-dim=1 \
29 |     --use-custom-helper=none \
30 |     --stop-on-parse-err \
31 |     --sycl-named-lambda \
32 |     /nbody/src/simulator.cu
33 | 
34 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/renderer.hpp
35 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/nbody.cpp
36 | 
37 | # -p=/nbody/build \
38 | # --optimize-migration
39 | 


--------------------------------------------------------------------------------
/scripts/run_dpct_native.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (C) 2022 Codeplay Software Limited
 4 | # This work is licensed under the terms of the MIT license.
 5 | # For a copy, see https://opensource.org/licenses/MIT.
 6 | 
 7 | # This script converts the project's CUDA code to SYCL code. The DPC++ compatibility tool offers options
 8 | # for intercepting complex builds, but current dev environment restrictions require me to run dpct inside
 9 | # a docker container. This complicates things, so for now I'm just doing single source conversion on the 
10 | # simulator.cu file.
11 | #
12 | # The option --assume-nd-range-dim=1 prevents dpct from converting CUDA 1D ranges into SYCL 3D ranges.
13 | # It's not totally clear why the default behaviour isn't just to keep the CUDA dimensionality.
14 | #
15 | # The custom helper header files referred to by the --use-custom-helper flag are already part of this repo.
16 | # As such, we suppress generation of new helper
17 | # headers when calling dpct with `--use-custom-helper=none`.
18 | 
19 | export NBODY_DIR=$PWD
20 | 
21 | cd $NBODY_DIR
22 | 
23 | rm src_sycl/*.[ch]pp src_sycl/*.yaml
24 | cd src_sycl; ln -s ../src/*[ch]pp .; cd -
25 | 
26 | dpct --out-root=./src_sycl \
27 |     --assume-nd-range-dim=1 \
28 |     --use-custom-helper=none \
29 |     --stop-on-parse-err \
30 |     --sycl-named-lambda \
31 |     ./src/simulator.cu
32 | 
33 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/renderer.hpp
34 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/nbody.cpp
35 | 
36 | # -p=/nbody/build \
37 | # --optimize-migration
38 | 


--------------------------------------------------------------------------------
/scripts/run_nbody.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (C) 2022 Codeplay Software Limited
 4 | # This work is licensed under the terms of the MIT license.
 5 | # For a copy, see https://opensource.org/licenses/MIT.
 6 | 
 7 | # This script runs a particular version of the nbody simulation
 8 | # depending on the -b flag. All subsequent positional args are
 9 | # passed on to nbody. See ../README.md for a description of these
10 | # positional args.
11 | #
12 | # ./scripts/run_nbody.sh -b dpcpp 50 5 0.999 0.001 1.0e-3 2.0
13 | 
14 | while getopts b: flag
15 | do
16 |     case "${flag}" in
17 |     b) backend=${OPTARG};;
18 |     esac
19 | done
20 | 
21 | shift 2;
22 | 
23 | case "$backend" in
24 |     cuda) ./nbody_cuda "$@";;
25 |     dpcpp) SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp "$@";;
26 |     *) echo "Bad backend"; exit 1;;
27 | esac
28 | 


--------------------------------------------------------------------------------
/scripts/xvfb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (C) 2022 Codeplay Software Limited
 4 | # This work is licensed under the terms of the MIT license.
 5 | # For a copy, see https://opensource.org/licenses/MIT.
 6 | 
 7 | # Get rid of any previous virtual frame buffer
 8 | pkill -9 Xvfb
 9 | rm /var/tmp/Xvfb_screen_0
10 | 
11 | # Create a virtual screen :99.0 with given dimensions & color depth
12 | # mapping output to /var/tmp/Xvfb_screen_0
13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp &
14 | 
15 | # Run the nbody simulation on this screen
16 | DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0 &
17 | #DISPLAY=:99.0 ./nbody_cuda 250 5 0.999 0.001 1.0e-3 2.0 &
18 | 
19 | # To take a screenshot instead of a video (doesn't always work):
20 | # sleep 2
21 | # DISPLAY=:99 xwd -root -silent | convert xwd:- png:/tmp/screenshot.png
22 | 
23 | # Use the x11grab device to write to video file
24 | ffmpeg -video_size 1920x1080 -framerate 25 -f x11grab -i :99.0+0,0 output.mp4
25 | 


--------------------------------------------------------------------------------
/shaders/gl/blur.frag:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | #version 450 core
 3 | 
 4 | layout (binding = 0) uniform sampler2D tex;
 5 | 
 6 | layout (location = 0) uniform vec2 size;
 7 | layout (location = 1) uniform vec2 mult;
 8 | layout (location = 2) uniform int kHalfWidth;
 9 | // Maximum length of gauss kernel sample = 100
10 | layout (location = 3) uniform float[100] offset;
11 | layout (location = 103) uniform float[100] weight;
12 | 
13 | in vec2 pass_tc;
14 | 
15 | out vec4 out_color;
16 | 
17 | vec4 contribute(float offset, float weight)
18 | {
19 |   return (texture(tex, pass_tc+offset*mult*size)+
20 |           texture(tex, pass_tc-offset*mult*size))
21 |            *weight;
22 | }
23 | 
24 | void main()
25 | {
26 |   out_color = texture(tex, pass_tc) * weight[0];
27 |   for(int i = 1; i < kHalfWidth; i++){
28 |     out_color += contribute(offset[i], weight[i]);
29 |   }
30 | }


--------------------------------------------------------------------------------
/shaders/gl/deferred.vert:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | #version 450 core
 3 | 
 4 | layout (location = 0) in vec2 in_pos;
 5 | 
 6 | out vec2 pass_tc;
 7 | 
 8 | void main()
 9 | {
10 |   gl_Position = vec4(in_pos,0.0,1.0);
11 |   pass_tc = in_pos*0.5+vec2(0.5);
12 | }


--------------------------------------------------------------------------------
/shaders/gl/integration.comp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | #version 450 core
 3 | 
 4 | layout (location = 0) uniform float dt; 
 5 | 
 6 | layout (local_size_x = 256) in;
 7 | 
 8 | layout (binding = 0, std430) buffer particles_in
 9 | {
10 |   vec4 part_in[];
11 | };
12 | 
13 | layout (binding = 1, std430) buffer particles_vel
14 | {
15 |   vec4 part_vel[];
16 | };
17 | 
18 | void main()
19 | {
20 |   uint id = gl_GlobalInvocationID.x;
21 |   vec4 pos = part_in[id];
22 |   part_in[id] = vec4(pos.xyz+dt*part_vel[id].xyz,pos.w);
23 | }


--------------------------------------------------------------------------------
/shaders/gl/interaction.comp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | #version 450 core
 3 | 
 4 | #define EPS2 0.2
 5 | 
 6 | layout (location = 0) uniform float dt;
 7 | layout (location = 1) uniform float G;
 8 | layout (location = 2) uniform float damping;
 9 | 
10 | layout (local_size_x = 256) in;
11 | 
12 | layout (binding = 0, std430) buffer particles_in
13 | {
14 |   vec4 part_in[];
15 | };
16 | 
17 | layout (binding = 1, std430) buffer particles_vel
18 | {
19 |   vec4 part_vel[];
20 | };
21 | 
22 | shared vec4 cache[gl_WorkGroupSize.x];
23 | 
24 | vec3 interaction(in vec4 p1,in vec4 p2)
25 | {
26 |   vec3 r = p2.xyz - p1.xyz;
27 |   float dist_sqr = dot(r,r) + EPS2;
28 |   float dist_sixth = dist_sqr*dist_sqr*dist_sqr;
29 |   float inv_dist_cube = inversesqrt(dist_sixth);
30 |   return r*inv_dist_cube;
31 | }
32 | 
33 | void main()
34 | {
35 |   uint id = gl_GlobalInvocationID.x;
36 |   vec4 p1 = part_in[id];
37 |   vec3 pos = p1.xyz;
38 |   vec3 vel = vec3(0.0);
39 |   for (uint i=0;i<gl_NumWorkGroups.x*gl_WorkGroupSize.x;i+=gl_WorkGroupSize.x)
40 |   {
41 |     vec4 p = part_in[i+gl_LocalInvocationIndex];
42 |     cache[gl_LocalInvocationIndex] = vec4(p.xyz,1.0);
43 |     groupMemoryBarrier();
44 |     barrier();
45 |     for (int j=0;j<gl_WorkGroupSize.x;++j)
46 |     {
47 |       vel += interaction(p1, cache[j]);
48 |     }
49 |   }
50 |   vec3 final_vel = dt*vel*G+part_vel[id].xyz*damping;
51 |   part_vel[id] = vec4(final_vel,0.0);
52 | }


--------------------------------------------------------------------------------
/shaders/gl/luminance.frag:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | #version 450 core
 3 | 
 4 | layout (binding = 0) uniform sampler2D tex;
 5 | 
 6 | out float lum;
 7 | 
 8 | void main(void)
 9 | {
10 |   vec2 coords = (gl_FragCoord.xy*2+vec2(0.5));
11 | 
12 |   lum = dot(
13 |     vec3(0.2126,0.7152, 0.0722),
14 |     textureLod(tex, coords/textureSize(tex, 0), 0).rgb);
15 | }


--------------------------------------------------------------------------------
/shaders/gl/main.frag:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | #version 450 core
 3 | 
 4 | in vec2 bary;
 5 | in vec4 color;
 6 | 
 7 | layout (binding = 0) uniform sampler2D tex;
 8 | 
 9 | out vec4 out_color;
10 | 
11 | void main()
12 | {
13 |   float alpha = texture(tex, bary).r;
14 |   out_color = vec4(color.rgb*alpha, 1.0);
15 | }


--------------------------------------------------------------------------------
/shaders/gl/main.geom:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | #version 450 core
 3 | 
 4 | layout(points) in;
 5 | layout(triangle_strip, max_vertices = 4) out;
 6 | 
 7 | layout (location = 8) uniform vec2 flare_size;
 8 | 
 9 | in vec4 pass_pos[];
10 | in vec4 pass_col[];
11 | 
12 | out vec4 color;
13 | out vec2 bary;
14 | 
15 | void main()
16 | {
17 |   vec4 pos = pass_pos[0];
18 |   color = pass_col[0];
19 | 
20 |   vec2 f = flare_size*pass_pos[0].w;
21 | 
22 |   gl_Position = pos+vec4(-f.x,-f.y,0,0);
23 |   bary = vec2(0,0);
24 |   EmitVertex();
25 | 
26 |   gl_Position = pos+vec4(+f.x,-f.y,0,0);
27 |   bary = vec2(+1,0);
28 |   EmitVertex();
29 | 
30 |   gl_Position = pos+vec4(-f.x,+f.y,0,0);
31 |   bary = vec2(0,+1);
32 |   EmitVertex();
33 | 
34 |   gl_Position = pos+vec4(+f.x,+f.y,0,0);
35 |   bary = vec2(+1,+1);
36 |   EmitVertex();
37 | 
38 |   EndPrimitive();
39 | }


--------------------------------------------------------------------------------
/shaders/gl/main.vert:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | #version 450 core
 3 | 
 4 | layout (location = 0) uniform mat4 mv;
 5 | layout (location = 4) uniform mat4 p;
 6 | 
 7 | layout (location = 0) in vec4 pos;
 8 | layout (location = 1) in vec4 vel;
 9 | 
10 | out vec4 pass_pos;
11 | out vec4 pass_col;
12 | 
13 | void main()
14 | {
15 |   pass_pos = p*mv*pos;
16 | 
17 |   // slow->blue, fast->purple
18 |   vec3 color = mix(vec3(0,0.4,1),vec3(1,0.2,1),clamp(dot(vel,vel)*0.0006,0,1));
19 | 
20 |   pass_col = vec4(color,1.0);
21 | }


--------------------------------------------------------------------------------
/shaders/gl/tonemap.frag:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | #version 450 core
 3 | 
 4 | const int FBO_MARGIN = 50;
 5 | 
 6 | layout (binding = 0) uniform sampler2D hdr;
 7 | layout (binding = 1) uniform sampler2D bloom;
 8 | layout (binding = 2) uniform sampler2D lum;
 9 | 
10 | layout (location = 0) uniform int lum_lod;
11 | 
12 | in vec2 pass_tc;
13 | 
14 | out vec4 out_color;
15 | 
16 | void main()
17 | {
18 |   ivec2 coord = ivec2(gl_FragCoord.xy)+ivec2(FBO_MARGIN);
19 | 
20 |   vec3 color = texelFetch(hdr,coord,0).rgb;
21 | 
22 |   float luminance = textureLod(lum, vec2(0.5), lum_lod).r;
23 |   float exposure = 1.0/clamp(luminance*10, 0.2,1000.0);
24 | 
25 |   color += texture(bloom, vec2(coord)/textureSize(hdr, 0)).rgb;
26 |   vec3 tonemap = vec3(1.0)- exp(-color*exposure);
27 | 
28 |   vec3 gamma = pow(tonemap, vec3(1.0/2.2));
29 |   out_color = vec4(gamma, 1.0);
30 | }


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | # Copyright (C) 2022 Codeplay Software Limited
 3 | 
 4 | find_package(PkgConfig REQUIRED)
 5 | 
 6 | if (RENDER) 
 7 |   pkg_check_modules(Glew REQUIRED IMPORTED_TARGET glew)
 8 | 
 9 |   find_package(glm REQUIRED)
10 |   find_package(glfw3 REQUIRED)
11 |   find_package(OpenGL REQUIRED)
12 | endif()
13 | 
14 | find_package(CUDA REQUIRED)
15 | 
16 | set(COMMON_SOURCE 
17 |   nbody.cpp 
18 |   sim_param.cpp 
19 |   simulator.cu)
20 | set(OPENGL_SOURCE 
21 |   camera.cpp 
22 |   gen.cpp 
23 |   renderer_gl.cpp 
24 |   shader.cpp)
25 | 
26 | if(NOT TARGET glm::glm)
27 |   add_library(glm::glm IMPORTED INTERFACE)
28 |   target_include_directories(glm::glm INTERFACE ${GLM_INCLUDE_DIR})
29 | endif()
30 | 
31 | set(DEBUG_FLAGS -g -O0)
32 | 
33 | 
34 | if (RENDER) 
35 |   set(RENDER_LIB  glm::glm glfw PkgConfig::Glew OpenGL::OpenGL cuda)
36 |   set(RENDER_FLAG -DUSE_OPENGL)
37 |   set(SOURCE_FILES ${COMMON_SOURCE} ${OPENGL_SOURCE})
38 | else()
39 |   set(RENDER_LIB  cuda)
40 |   set(RENDER_FLAG DISABLE_GL)
41 |   set(SOURCE_FILES ${COMMON_SOURCE})
42 | endif()
43 | 
44 | add_custom_target(release DEPENDS ${BINARY_NAME})
45 | add_executable(${BINARY_NAME} ${SOURCE_FILES})
46 | # COMPILER_NAME here is only used to print text overlay on simulation
47 | target_compile_definitions(${BINARY_NAME} PRIVATE ${RENDER_FLAG} COMPILER_NAME="CUDA")
48 | target_link_libraries(${BINARY_NAME} PRIVATE ${RENDER_LIB})
49 | target_compile_features(${BINARY_NAME} PRIVATE cxx_auto_type cxx_nullptr cxx_range_for)
50 | target_include_directories(${BINARY_NAME} PRIVATE ${CUDA_INCLUDE_DIRS})
51 | target_compile_options(${BINARY_NAME} PRIVATE -use_fast_math)
52 | 
53 | add_custom_target(debug DEPENDS ${BINARY_NAME}_d)
54 | add_executable(${BINARY_NAME}_d ${SOURCE_FILES})
55 | # COMPILER_NAME here is only used to print text overlay on simulation
56 | target_compile_definitions(${BINARY_NAME}_d PRIVATE ${RENDER_FLAG} COMPILER_NAME="CUDA")
57 | target_link_libraries(${BINARY_NAME}_d PRIVATE ${RENDER_LIB})
58 | target_compile_features(${BINARY_NAME}_d PRIVATE cxx_auto_type cxx_nullptr cxx_range_for)
59 | target_include_directories(${BINARY_NAME}_d PRIVATE ${CUDA_INCLUDE_DIRS})
60 | target_compile_options(${BINARY_NAME}_d PRIVATE ${DEBUG_FLAGS})
61 | 


--------------------------------------------------------------------------------
/src/camera.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | 
 3 | #include "camera.hpp"
 4 | 
 5 | #include <algorithm>
 6 | #include <glm/gtc/matrix_transform.hpp>
 7 | 
 8 | const float PI = 3.14159265358979323846;
 9 | 
10 | using namespace std;
11 | 
12 | Camera::Camera() {
13 |   position.x = 0;
14 |   position.y = PI / 4;
15 |   position.z = 50.0;
16 | 
17 |   velocity    = {0.0, 0.0, 0.0};
18 |   look_at     = {0.0, 0.0, 0.0};
19 |   look_at_vel = {0.0, 0.0, 0.0};
20 | }
21 | 
22 | void Camera::step() {
23 |   position.x -= velocity.x;
24 |   position.y -= velocity.y;
25 |   position.z *= (1.0 - velocity.z);
26 |   look_at += look_at_vel;
27 | 
28 |   velocity *= 0.72;  // damping
29 |   look_at_vel *= 0.90;
30 | 
31 |   // limits
32 |   if (position.x < 0) position.x += 2 * PI;
33 |   if (position.x >= 2 * PI) position.x -= 2 * PI;
34 |   position.y =
35 |     max(-(float)PI / 2 + 0.001f, min(position.y, (float)PI / 2 - 0.001f));
36 | }
37 | 
38 | glm::mat4 Camera::getProj(int width, int height) {
39 |   return glm::infinitePerspective(glm::radians(30.0f), width / (float)height,
40 |       1.f);
41 | }
42 | 
43 | glm::vec3 getCartesianCoordinates(glm::vec3 v) {
44 |   return glm::vec3(cos(v.x) * cos(v.y), sin(v.x) * cos(v.y), sin(v.y)) * v.z;
45 | }
46 | 
47 | glm::mat4 Camera::getView() {
48 |   // polar to cartesian coordinates
49 |   glm::vec3 view_pos = getCartesianCoordinates(position);
50 | 
51 |   return glm::lookAt(view_pos + look_at, look_at, glm::vec3(0, 0, 1));
52 | }
53 | 
54 | glm::vec3 Camera::getForward() {
55 |   return glm::normalize(-getCartesianCoordinates(position));
56 | }
57 | 
58 | glm::vec3 Camera::getRight() {
59 |   return glm::normalize(
60 |       glm::cross(getCartesianCoordinates(position), glm::vec3(0, 0, 1)));
61 | }
62 | 
63 | glm::vec3 Camera::getUp() {
64 |   return glm::normalize(
65 |       glm::cross(getCartesianCoordinates(position), getRight()));
66 | }
67 | 
68 | void Camera::addVelocity(glm::vec3 vel) { velocity += vel; }
69 | 
70 | void Camera::addLookAtVelocity(glm::vec3 vel) { look_at_vel += vel; }
71 | 
72 | glm::vec3 Camera::getPosition() { return position; }
73 | 


--------------------------------------------------------------------------------
/src/camera.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <glm/glm.hpp>
 6 | 
 7 | class Camera {
 8 |   public:
 9 |     Camera();
10 | 
11 |     /**
12 |      * Computes next step of camera parameters
13 |      * @param c camera at step n
14 |      * @return camera at step n+1
15 |      */
16 |     void step();
17 | 
18 |     /**
19 |      * Computes projection matrix from camera parameters
20 |      * @param c camera parameters
21 |      * @param width viewport width
22 |      * @param height viewport height
23 |      * @return projection matrix
24 |      */
25 |     glm::mat4 getProj(int width, int height);
26 | 
27 |     /**
28 |      * Computes view matrix from camera parameters
29 |      * @param c camera parameters
30 |      * @param view matrix
31 |      */
32 |     glm::mat4 getView();
33 | 
34 |     glm::vec3 getForward();
35 |     glm::vec3 getRight();
36 |     glm::vec3 getUp();
37 | 
38 |     glm::vec3 getPosition();
39 | 
40 |     void addVelocity(glm::vec3 vel);
41 |     void addLookAtVelocity(glm::vec3 vel);
42 | 
43 |   private:
44 |     glm::vec3 position;     ///< Polar coordinates in radians
45 |     glm::vec3 velocity;     ///< dp/dt of polar coordinates
46 |     glm::vec3 look_at;      ///< Where is the camera looking at
47 |     glm::vec3 look_at_vel;  ///< dp/dt of lookat position
48 | };
49 | 


--------------------------------------------------------------------------------
/src/gen.cpp:
--------------------------------------------------------------------------------
 1 | #include "gen.hpp"
 2 | 
 3 | #include <random>
 4 | 
 5 | const float PI = 3.14159265358979323846;
 6 | 
 7 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 8 | // Copyright (C) 2022 Codeplay Software Limited
 9 | 
10 | using namespace std;
11 | 
12 | mt19937 rng;
13 | uniform_real_distribution<> dis(0, 1);
14 | 
15 | glm::vec4 randomParticlePos() {
16 |   // Random position on a 'thick disk'
17 |   glm::vec4 particle;
18 |   float t = dis(rng) * 2 * PI;
19 |   float s = dis(rng) * 100;
20 |   particle.x = cos(t) * s;
21 |   particle.y = sin(t) * s;
22 |   particle.z = dis(rng) * 4;
23 | 
24 |   particle.w = 1.f;
25 |   return particle;
26 | }
27 | 
28 | glm::vec4 randomParticleVel(glm::vec4 pos) {
29 |   // Initial velocity is 'orbital' velocity from position
30 |   glm::vec3 vel = glm::cross(glm::vec3(pos), glm::vec3(0, 0, 1));
31 |   float orbital_vel = sqrt(2.0 * glm::length(vel));
32 |   vel = glm::normalize(vel) * orbital_vel;
33 |   return glm::vec4(vel, 0.0);
34 | }
35 | 
36 | std::vector<float> genFlareTex(int tex_size) {
37 |   std::vector<float> pixels(tex_size * tex_size);
38 |   float sigma2 = tex_size / 2.0;
39 |   float A = 1.0;
40 |   for (int i = 0; i < tex_size; ++i) {
41 |     float i1 = i - tex_size / 2;
42 |     for (int j = 0; j < tex_size; ++j) {
43 |       float j1 = j - tex_size / 2;
44 |       // gamma corrected gauss
45 |       pixels[i * tex_size + j] = pow(
46 |           A * exp(-((i1 * i1) / (2 * sigma2) + (j1 * j1) / (2 * sigma2))),
47 |           2.2);
48 |     }
49 |   }
50 |   return pixels;
51 | }
52 | 


--------------------------------------------------------------------------------
/src/gen.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | // Copyright (C) 2022 Codeplay Software Limited
 3 | 
 4 | #pragma once
 5 | #include <glm/glm.hpp>
 6 | #include <vector>
 7 | 
 8 | /**
 9 |  * Generates a random particle position
10 |  * @return 3D position + w component at 1.f
11 |  */
12 | glm::vec4 randomParticlePos();
13 | 
14 | /**
15 |  * Generates a random particle velocity
16 |  * @param pos the same particle's position
17 |  * @return 3D velocity + w component at 0.f
18 |  */
19 | glm::vec4 randomParticleVel(glm::vec4 pos);
20 | 
21 | std::vector<float> genFlareTex(int size);
22 | 


--------------------------------------------------------------------------------
/src/nbody.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
  2 | // Copyright (C) 2022 Codeplay Software Limited
  3 | 
  4 | #include <iostream>
  5 | #include <chrono>
  6 | #include <cstdlib>
  7 | 
  8 | #ifndef DISABLE_GL
  9 | #include <GL/glew.h>
 10 | 
 11 | #include "renderer_gl.hpp"
 12 | #include <GLFW/glfw3.h>
 13 | #include <glm/glm.hpp>
 14 | #include "camera.hpp"
 15 | #include "gen.hpp"
 16 | #else
 17 | #include <cmath>
 18 | #endif
 19 | 
 20 | #include <thread>
 21 | #include <vector>
 22 | #include <numeric>
 23 | #include <algorithm>
 24 | 
 25 | #include "sim_param.hpp"
 26 | #include "simulator.cuh"
 27 | 
 28 | using namespace std;
 29 | using namespace simulation;
 30 | 
 31 | int main(int argc, char **argv) {
 32 | 
 33 |   SimParam params;
 34 |   params.parseArgs(argc, argv);
 35 | 
 36 |   DiskGalaxySimulator nbodySim(params);
 37 | 
 38 | #ifndef DISABLE_GL
 39 |   // Window initialization
 40 |   GLFWwindow *window;
 41 | 
 42 |   glfwSetErrorCallback([](const int error, const char *msg) {
 43 |       cout << "Error id : " << error << ", " << msg << endl;
 44 |       exit(-1);
 45 |       });
 46 | 
 47 |   if (!glfwInit()) {
 48 |     cout << "GLFW can't initialize" << endl;
 49 |     return -1;
 50 |   }
 51 | 
 52 |   GLFWmonitor *monitor = glfwGetPrimaryMonitor();
 53 | 
 54 |   const GLFWvidmode *mode = glfwGetVideoMode(monitor);
 55 | 
 56 |   glfwWindowHint(GLFW_RED_BITS, mode->redBits);
 57 |   glfwWindowHint(GLFW_GREEN_BITS, mode->greenBits);
 58 |   glfwWindowHint(GLFW_BLUE_BITS, mode->blueBits);
 59 |   glfwWindowHint(GLFW_REFRESH_RATE, mode->refreshRate);
 60 |   glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE);
 61 |   RendererGL renderer;
 62 | 
 63 |   renderer.initWindow();
 64 | 
 65 |   int width = mode->width;
 66 |   int height = mode->height - 30;
 67 |   window = glfwCreateWindow(width, height, "N-Body Simulation", NULL, NULL);
 68 | 
 69 | 
 70 |   glfwMakeContextCurrent(window);
 71 | 
 72 |   renderer.init(window, width, height, nbodySim);
 73 |   renderer.initImgui(window);
 74 | 
 75 |   // Get initial postitions generated in simulator ctor
 76 |   renderer.updateParticles();
 77 | 
 78 |   Camera camera;
 79 | 
 80 |   float last_fps{0};
 81 | #endif
 82 | 
 83 |   std::vector<float> stepTimes;
 84 |   int step{0};
 85 | 
 86 |   // Main loop
 87 |   float stepTime = 0.0;
 88 | 
 89 | #ifndef DISABLE_GL
 90 |   while (!glfwWindowShouldClose(window) &&
 91 |       glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_RELEASE &&
 92 |       step < params.numFrames) {
 93 |     double frame_start = glfwGetTime();
 94 | #else
 95 |     while ( step < params.numFrames) {
 96 | #endif
 97 |       nbodySim.stepSim();
 98 | #ifndef DISABLE_GL
 99 |       renderer.updateParticles();
100 |       renderer.render(camera.getProj(width, height), camera.getView());
101 | #endif
102 |       if(!(step % 20)) stepTime = nbodySim.getLastStepTime();
103 | #ifndef DISABLE_GL
104 |       renderer.printKernelTime(stepTime);
105 | #endif
106 | 
107 |       step++;
108 |       int warmSteps{2};
109 |       if (step > warmSteps) {
110 |         stepTimes.push_back(nbodySim.getLastStepTime());
111 |         float cumStepTime =
112 |           std::accumulate(stepTimes.begin(), stepTimes.end(), 0.0);
113 |         float meanTime = cumStepTime / stepTimes.size();
114 |         float accum{0.0};
115 |         std::for_each(stepTimes.begin(), stepTimes.end(),
116 |             [&](const float time) {
117 |             accum += std::pow((time - meanTime), 2);
118 |             });
119 |         float stdDev = std::pow(accum / stepTimes.size(), 0.5);
120 |         std::cout << "At step " << step << " kernel time is "
121 |           << stepTimes.back() << " and mean is " << meanTime
122 |           << " and stddev is: " << stdDev << "\n";
123 |       }
124 | #ifndef DISABLE_GL
125 |       // Window refresh
126 |       glfwSwapBuffers(window);
127 |       glfwPollEvents();
128 | 
129 |       // Thread sleep to match min frame time
130 |       double frame_end = glfwGetTime();
131 |       double elapsed = frame_end - frame_start;
132 |       last_fps = 1.0 / elapsed;
133 | #endif
134 |     }
135 | #ifndef DISABLE_GL
136 |     renderer.destroy();
137 |     glfwDestroyWindow(window);
138 |     glfwTerminate();
139 | #endif
140 |     return 0;
141 |   }
142 | 


--------------------------------------------------------------------------------
/src/renderer.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | // Copyright (C) 2022 Codeplay Software Limited
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <GL/glew.h>
 7 | #include <GLFW/glfw3.h>
 8 | 
 9 | #include <glm/glm.hpp>
10 | #include <vector>
11 | 
12 | #include "simulator.cuh"
13 | 
14 | class Renderer {
15 |   public:
16 |     virtual void initWindow() = 0;
17 | 
18 |     /**
19 |      * Initializes the gl state
20 |      * @param width viewport width
21 |      * @param height viewport height
22 |      * @param params simulation parameters
23 |      */
24 |     virtual void init(GLFWwindow *window, int width, int height,
25 |         simulation::Simulator &sim) = 0;
26 | 
27 |     virtual void destroy() = 0;
28 | 
29 |     /**
30 |      * Supplies the gl state with updated particle position and velocity
31 |      * @param pos particle positions
32 |      * @param vel particle velocities
33 |      */
34 |     virtual void updateParticles() = 0;
35 | 
36 |     /**
37 |      * Renders the particles at the current step
38 |      * @param proj_mat projection matrix @see camera_get_proj
39 |      * @param view_mat view matrix @see camera_get_view
40 |      */
41 |     virtual void render(glm::mat4 projMat, glm::mat4 viewMat) = 0;
42 | };
43 | 


--------------------------------------------------------------------------------
/src/renderer_gl.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
  2 | // Copyright (C) 2022 Codeplay Software Limited
  3 | 
  4 | #include "renderer_gl.hpp"
  5 | 
  6 | #include <algorithm>
  7 | #include <glm/glm.hpp>
  8 | #include <glm/gtc/type_ptr.hpp>
  9 | #include <stdexcept>
 10 | #include <iostream>
 11 | #include <numeric>
 12 | 
 13 | #include "imgui.h"
 14 | #include "imgui_impl_glfw.h"
 15 | #include "imgui_impl_opengl3.h"
 16 | #include "gen.hpp"
 17 | 
 18 | const int FBO_MARGIN = 50;
 19 | 
 20 | #define PRINT_PSEUDO_FPS 0
 21 | 
 22 | using namespace std;
 23 | 
 24 | void RendererGL::initWindow() {
 25 |   glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
 26 |   glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 5);
 27 |   glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
 28 | }
 29 | 
 30 | void RendererGL::init(GLFWwindow *window, int width, int height,
 31 |     simulation::Simulator &sim_) {
 32 |   // OpenGL initialization
 33 |   GLenum error = glewInit();
 34 |   if (error != GLEW_OK) {
 35 |     throw std::runtime_error("Can't load GL");
 36 |   }
 37 | 
 38 |   sim = &sim_;
 39 |   numParticles = sim->getNumParticles();
 40 |   setWindowDimensions(width, height);
 41 |   createFlareTexture();
 42 |   createVaosVbos();
 43 |   initShaders();
 44 |   initFbos();
 45 |   setUniforms();
 46 | }
 47 | 
 48 | void RendererGL::setWindowDimensions(int width, int height) {
 49 |   width_ = width;
 50 |   height_ = height;
 51 | }
 52 | 
 53 | void RendererGL::createFlareTexture() {
 54 |   texSize = 16;
 55 |   glCreateTextures(GL_TEXTURE_2D, 1, &flareTex);
 56 |   glTextureStorage2D(flareTex, 1, GL_R32F, texSize, texSize);
 57 |   glTextureParameteri(flareTex, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
 58 |   {
 59 |     std::vector<float> pixels = genFlareTex(texSize);
 60 |     glTextureSubImage2D(flareTex, 0, 0, 0, texSize, texSize, GL_RED, GL_FLOAT,
 61 |         pixels.data());
 62 |   }
 63 | }
 64 | 
 65 | void RendererGL::createVaosVbos() {
 66 |   // Particle VAO
 67 |   glCreateVertexArrays(1, &vaoParticles);
 68 |   glCreateBuffers(1, &vboParticlesPos);
 69 |   glCreateBuffers(1, &ssboVelocities);
 70 |   glVertexArrayVertexBuffer(vaoParticles, 0, vboParticlesPos, 0,
 71 |       sizeof(glm::vec4));
 72 |   glVertexArrayVertexBuffer(vaoParticles, 1, ssboVelocities, 0,
 73 |       sizeof(glm::vec4));
 74 | 
 75 |   // Position
 76 |   glEnableVertexArrayAttrib(vaoParticles, 0);
 77 |   glVertexArrayAttribFormat(vaoParticles, 0, 4, GL_FLOAT, GL_FALSE, 0);
 78 |   glVertexArrayAttribBinding(vaoParticles, 0, 0);
 79 | 
 80 |   // Velocity
 81 |   glEnableVertexArrayAttrib(vaoParticles, 1);
 82 |   glVertexArrayAttribFormat(vaoParticles, 1, 4, GL_FLOAT, GL_FALSE, 0);
 83 |   glVertexArrayAttribBinding(vaoParticles, 1, 1);
 84 | 
 85 |   // Deferred VAO
 86 |   glCreateVertexArrays(1, &vaoDeferred);
 87 |   glCreateBuffers(1, &vboDeferred);
 88 |   glVertexArrayVertexBuffer(vaoDeferred, 0, vboDeferred, 0, sizeof(glm::vec2));
 89 |   // Position
 90 |   glEnableVertexArrayAttrib(vaoDeferred, 0);
 91 |   glVertexArrayAttribFormat(vaoDeferred, 0, 2, GL_FLOAT, GL_FALSE, 0);
 92 |   glVertexArrayAttribBinding(vaoDeferred, 0, 0);
 93 | 
 94 |   // Deferred tri
 95 |   glm::vec2 tri[3] = {glm::vec2(-2, -1), glm::vec2(+2, -1), glm::vec2(0, 4)};
 96 |   glNamedBufferStorage(vboDeferred, 3 * sizeof(glm::vec2), tri, 0);
 97 | 
 98 |   // SSBO allocation (particle position & velocities)
 99 |   glNamedBufferStorage(vboParticlesPos, numParticles * sizeof(glm::vec4),
100 |       nullptr, GL_MAP_WRITE_BIT);
101 |   glNamedBufferStorage(ssboVelocities, numParticles * sizeof(glm::vec4),
102 |       nullptr, GL_MAP_WRITE_BIT);
103 | 
104 |   // SSBO binding
105 |   glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, vboParticlesPos, 0,
106 |       numParticles * sizeof(glm::vec4));
107 |   glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, ssboVelocities, 0,
108 |       numParticles * sizeof(glm::vec4));
109 | }
110 | 
111 | void RendererGL::updateParticles() {
112 |   setParticleData(vboParticlesPos, sim->getParticlePos());
113 |   setParticleData(ssboVelocities, sim->getParticleVel());
114 | }
115 | 
116 | void RendererGL::initImgui(GLFWwindow *window) {
117 |   // Setup ImGui context
118 |   IMGUI_CHECKVERSION();
119 |   ImGui::CreateContext();
120 |   ImGuiIO &io = ImGui::GetIO();
121 |   (void)io;
122 |   ImGui::StyleColorsDark();
123 | 
124 |   // Setup Platform/Renderer bindings
125 |   ImGui_ImplGlfw_InitForOpenGL(window, true);
126 |   ImGui_ImplOpenGL3_Init("#version 450");
127 | }
128 | 
129 | void RendererGL::printKernelTime(float kernelTime) {
130 |   // Start the Dear ImGui frame
131 |   ImGui_ImplOpenGL3_NewFrame();
132 |   ImGui_ImplGlfw_NewFrame();
133 |   ImGui::NewFrame();
134 | 
135 |   // Generate a minimal window
136 |   bool isOpen;
137 |   ImGui::Begin("N/A", &isOpen,
138 |       ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoMove |
139 |       ImGuiWindowFlags_NoScrollbar |
140 |       ImGuiWindowFlags_NoSavedSettings |
141 |       ImGuiWindowFlags_NoInputs);
142 |   ImGui::SetWindowFontScale(2.5);
143 |   ImGui::Text("%s", (std::string("N-body demo running with " COMPILER_NAME
144 |           " on device: ") + *sim->getDeviceName()).c_str());
145 |   if (PRINT_PSEUDO_FPS) {
146 |     ImGui::Text("FPS: %2.0f", 1000.0/kernelTime);
147 |   } else {
148 |     ImGui::Text("Kernel time: %4.2f ms", kernelTime);
149 |   }
150 |   ImGui::End();
151 | 
152 |   ImGui::Render();
153 |   ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
154 | }
155 | 
156 | void RendererGL::setParticleData(const GLuint buffer,
157 |     const ParticleData &data) {
158 |   void *particle_ptr = glMapNamedBufferRange(
159 |       buffer, 0, numParticles * sizeof(glm::vec4), GL_MAP_WRITE_BIT);
160 | 
161 |   assert(!glGetError());
162 |   assert(particle_ptr);
163 | 
164 |   const ParticleData &particles = sim->getParticlePos();
165 | 
166 |   // Fill using placement new
167 |   for (size_t i = 0; i < numParticles; i++) {
168 |     glm::vec4 *my4 = new ((glm::vec4 *)particle_ptr + i)
169 |       glm::vec4(data.x[i], data.y[i], data.z[i], 1.0f);
170 |   }
171 |   glUnmapNamedBuffer(buffer);
172 | }
173 | 
174 | void RendererGL::initShaders() {
175 |   // Need to cut these two shaders out
176 |   // programInteraction.source(GL_COMPUTE_SHADER,
177 |   // "shaders/gl/interaction.comp"); programInteraction.link();
178 | 
179 |   // programIntegration.source(GL_COMPUTE_SHADER,
180 |   // "shaders/gl/integration.comp"); programIntegration.link();
181 | 
182 |   programHdr.source(GL_VERTEX_SHADER, "shaders/gl/main.vert");
183 |   programHdr.source(GL_FRAGMENT_SHADER, "shaders/gl/main.frag");
184 |   programHdr.source(GL_GEOMETRY_SHADER, "shaders/gl/main.geom");
185 |   programHdr.link();
186 | 
187 |   programTonemap.source(GL_VERTEX_SHADER, "shaders/gl/deferred.vert");
188 |   programTonemap.source(GL_FRAGMENT_SHADER, "shaders/gl/tonemap.frag");
189 |   programTonemap.link();
190 | 
191 |   programBlur.source(GL_VERTEX_SHADER, "shaders/gl/deferred.vert");
192 |   programBlur.source(GL_FRAGMENT_SHADER, "shaders/gl/blur.frag");
193 |   programBlur.link();
194 | 
195 |   programLum.source(GL_VERTEX_SHADER, "shaders/gl/deferred.vert");
196 |   programLum.source(GL_FRAGMENT_SHADER, "shaders/gl/luminance.frag");
197 |   programLum.link();
198 | }
199 | 
200 | void RendererGL::initFbos() {
201 |   int blur_dsc = 2;
202 |   blurDownscale = blur_dsc;
203 | 
204 |   glCreateFramebuffers(4, fbos);
205 |   glCreateTextures(GL_TEXTURE_2D, 4, attachs);
206 | 
207 |   int base_width = width_ + 2 * FBO_MARGIN;
208 |   int base_height = height_ + 2 * FBO_MARGIN;
209 | 
210 |   int widths[] = {base_width, base_width / blur_dsc, base_width / blur_dsc,
211 |     base_width / 2};
212 | 
213 |   int heights[] = {base_height, base_height / blur_dsc, base_height / blur_dsc,
214 |     base_height / 2};
215 | 
216 |   lumLod = (int)floor(log2(max(base_width, base_height) / 2));
217 |   int mipmaps[] = {1, 1, 1, lumLod + 1};
218 |   GLenum types[] = {GL_RGBA16F, GL_RGBA16F, GL_RGBA16F, GL_R16F};
219 |   GLenum min_filters[] = {GL_LINEAR, GL_LINEAR, GL_LINEAR,
220 |     GL_LINEAR_MIPMAP_LINEAR};
221 | 
222 |   for (int i = 0; i < 4; ++i) {
223 |     glTextureStorage2D(attachs[i], mipmaps[i], types[i], widths[i],
224 |         heights[i]);
225 |     glTextureParameteri(attachs[i], GL_TEXTURE_MIN_FILTER, min_filters[i]);
226 |     glTextureParameteri(attachs[i], GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
227 |     glTextureParameteri(attachs[i], GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
228 |     glNamedFramebufferTexture(fbos[i], GL_COLOR_ATTACHMENT0, attachs[i], 0);
229 |   }
230 | }
231 | 
232 | void RendererGL::setUniforms() {
233 |   // // NDC sprite size
234 |   glProgramUniform2f(programHdr.getId(), 8, texSize / float(2 * width_),
235 |       texSize / float(2 * height_));
236 |   // Blur sample offset length
237 |   glProgramUniform2f(programBlur.getId(), 0, (float)blurDownscale / width_,
238 |       (float)blurDownscale / height_);
239 | 
240 |   // Compute optimized 1D gaussian kernel & send to device
241 |   auto optimGauss = optimGaussKernel(gaussKernel(10.0, 25));
242 |   auto offsets = optimGauss.first;
243 |   auto weights = optimGauss.second;
244 | 
245 |   assert(offsets.size() < 100 && "Maximum Gaussian kernel size exceeded!");
246 |   glProgramUniform1i(programBlur.getId(), 2, offsets.size());
247 |   glProgramUniform1fv(programBlur.getId(), 3, offsets.size(), offsets.data());
248 |   glProgramUniform1fv(programBlur.getId(), 103, offsets.size(), weights.data());
249 | }
250 | 
251 | void RendererGL::render(glm::mat4 proj_mat, glm::mat4 view_mat) {
252 |   // Particle HDR rendering
253 |   glViewport(0, 0, width_ + 2 * FBO_MARGIN, height_ + 2 * FBO_MARGIN);
254 |   glBindVertexArray(vaoParticles);
255 |   glEnable(GL_BLEND);
256 |   glBlendFunc(GL_ONE, GL_ONE);
257 |   glBindFramebuffer(GL_FRAMEBUFFER, fbos[0]);
258 |   glUseProgram(programHdr.getId());
259 |   glClear(GL_COLOR_BUFFER_BIT);
260 |   glProgramUniformMatrix4fv(programHdr.getId(), 0, 1, GL_FALSE,
261 |       glm::value_ptr(view_mat));
262 |   glProgramUniformMatrix4fv(programHdr.getId(), 4, 1, GL_FALSE,
263 |       glm::value_ptr(proj_mat));
264 |   glBindTextureUnit(0, flareTex);
265 |   glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
266 |   glDrawArrays(GL_POINTS, 0, numParticles);
267 | 
268 |   glBindVertexArray(vaoDeferred);
269 |   glDisable(GL_BLEND);
270 | 
271 |   glViewport(0, 0, (width_ + 2 * FBO_MARGIN) / blurDownscale,
272 |       (height_ + 2 * FBO_MARGIN) / blurDownscale);
273 |   glUseProgram(programBlur.getId());
274 | 
275 |   // Blur pingpong (N horizontal blurs then N vertical blurs)
276 | 
277 |   const int nPasses = 1; // Only one blur pass in each direction
278 |   int loop = 0;
279 |   for (int i = 0; i < 2; ++i) {
280 |     if (i == 0)
281 |       glProgramUniform2f(programBlur.getId(), 1, 1, 0);
282 |     else
283 |       glProgramUniform2f(programBlur.getId(), 1, 0, 1);
284 |     for (int j = 0; j < nPasses; ++j) {
285 |       GLuint fbo = fbos[(loop % 2) + 1];
286 |       GLuint attach = attachs[loop ? ((loop + 1) % 2 + 1) : 0];
287 |       glBindFramebuffer(GL_FRAMEBUFFER, fbo);
288 |       glBindTextureUnit(0, attach);
289 |       glDrawArrays(GL_TRIANGLES, 0, 3);
290 |       loop++;
291 |     }
292 |   }
293 | 
294 |   // Average luminance
295 |   glViewport(0, 0, (width_ + 2 * FBO_MARGIN) / 2,
296 |       (height_ + 2 * FBO_MARGIN) / 2);
297 |   glBindFramebuffer(GL_FRAMEBUFFER, fbos[3]);
298 |   glUseProgram(programLum.getId());
299 |   glBindTextureUnit(0, attachs[0]);
300 |   glDrawArrays(GL_TRIANGLES, 0, 3);
301 |   glGenerateTextureMipmap(attachs[3]);
302 | 
303 |   // Tonemapping step (direct to screen)
304 |   glViewport(0, 0, width_, height_);
305 |   glBindFramebuffer(GL_FRAMEBUFFER, 0);
306 |   glUseProgram(programTonemap.getId());
307 |   glProgramUniform1i(programTonemap.getId(), 0, lumLod);
308 |   glBindTextureUnit(0, attachs[0]);
309 |   glBindTextureUnit(1, attachs[2]);
310 |   glBindTextureUnit(2, attachs[3]);
311 |   glDrawArrays(GL_TRIANGLES, 0, 3);
312 | }
313 | 
314 | std::vector<float> RendererGL::gaussKernel(const float sigma,
315 |     const int halfwidth) {
316 |   float sigma_factor = 1.0 / (sigma * sqrt(2 * glm::pi<float>()));
317 | 
318 |   auto sigma_fun = [sigma, sigma_factor, n = 0]() mutable {
319 |     float sigma_val =
320 |       sigma_factor * std::exp(-std::pow(static_cast<float>(n), 2) /
321 |           (2 * std::pow(sigma, 2)));
322 |     n++;
323 |     return sigma_val;
324 |   };
325 | 
326 |   std::vector<float> result(halfwidth);
327 |   std::generate(result.begin(), result.end(), sigma_fun);
328 | 
329 |   // Normalize the Gaussian kernel
330 |   float halfnorm = std::accumulate(result.begin() + 1, result.end(), 0.0);
331 |   float norm = 2 * halfnorm + result[0];
332 | 
333 |   std::transform(result.begin(), result.end(), result.begin(),
334 |       [norm](auto val) { return val / norm; });
335 | 
336 |   return result;
337 | }
338 | 
339 | std::pair<std::vector<float>, std::vector<float>> RendererGL::optimGaussKernel(
340 |     const std::vector<float> weightsIn) {
341 |   const int inSize = weightsIn.size();
342 |   const int outSize = (inSize / 2) + 1;
343 | 
344 |   std::vector<float> offsetsIn(inSize);
345 |   std::iota(offsetsIn.begin(), offsetsIn.end(), 0);
346 | 
347 |   std::vector<float> offsetsOut(outSize);
348 |   std::vector<float> weightsOut(outSize);
349 | 
350 |   // Centre point of gaussian doesn't change
351 |   offsetsOut[0] = offsetsIn[0];  // 0.0
352 |   weightsOut[0] = weightsIn[0];
353 | 
354 |   // Convert pairs of neighbouring texel weights into a single
355 |   // weight linearly interpolated between texels. Take care of
356 |   // possible last lone weight.
357 |   for (int i = 1; i < outSize; i++) {
358 |     weightsOut[i] = weightsIn[i * 2 - 1];
359 |     offsetsOut[i] = offsetsIn[i * 2 - 1];
360 |     if (i * 2 < inSize) {
361 |       weightsOut[i] += weightsIn[i * 2];
362 |       offsetsOut[i] = (offsetsIn[i * 2 - 1] * weightsIn[i * 2 - 1] +
363 |           offsetsIn[i * 2] * weightsIn[i * 2]) /
364 |         weightsOut[i];
365 |     }
366 |   }
367 |   return std::make_pair(offsetsOut, weightsOut);
368 | }
369 | 
370 | void RendererGL::destroy() {}
371 | 


--------------------------------------------------------------------------------
/src/renderer_gl.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | // Copyright (C) 2022 Codeplay Software Limited
 3 | 
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <GL/glew.h>
 8 | #include <GLFW/glfw3.h>
 9 | 
10 | #include "renderer.hpp"
11 | #include "shader.hpp"
12 | 
13 | using namespace simulation;
14 | 
15 | class RendererGL : public Renderer {
16 |   public:
17 |     void initWindow();
18 |     void init(GLFWwindow *window, int width, int height,
19 |         simulation::Simulator &sim);
20 |     void destroy();
21 |     /// Initialize Imgui
22 |     void initImgui(GLFWwindow *window);
23 |     void updateParticles();
24 |     void render(glm::mat4 proj_mat, glm::mat4 view_mat);
25 |     void printKernelTime(float kernelTime);
26 |     RendererGL() : sim{} {}
27 | 
28 |   private:
29 |     /// Provides the gl state with window dimensions for fbo size, etc
30 |     void setWindowDimensions(int width, int height);
31 | 
32 |     /// Generates the star flare texture
33 |     void createFlareTexture();
34 | 
35 |     /// Creates the VAO and VBO objects
36 |     void createVaosVbos();
37 | 
38 |     /// Loads the shaders into the gl state
39 |     void initShaders();
40 | 
41 |     // Initializes and supplies the framebuffers with valid data
42 |     void initFbos();
43 | 
44 |     // Supplies the gl state with nbody simulation parameters
45 |     void setUniforms();
46 | 
47 |     // Send data obtained from simulation to a buffer
48 |     void setParticleData(const GLuint buffer, const ParticleData &data);
49 | 
50 |     // Compute the 1D gaussian kernel for given sigma & halfwidth
51 |     static std::vector<float> gaussKernel(const float sigma,
52 |         const int halfwidth);
53 | 
54 |     // Optimizes the given 1D gaussian kernel via texel linear interp
55 |     static std::pair<std::vector<float>, std::vector<float>> optimGaussKernel(
56 |         const std::vector<float> inKernel);
57 | 
58 |     Simulator *sim{nullptr};
59 | 
60 |     GLuint flareTex;         ///< Texture for the star flare
61 |     GLuint vaoParticles;     ///< Vertex definition for points
62 |     GLuint vboParticlesPos;  ///< Particle position buffer
63 |     GLuint ssboVelocities;   ///< Particle velocity buffer
64 |     GLuint vaoDeferred;      ///< Vertex definition for deferred
65 |     GLuint vboDeferred;      ///< Vertex buffer of deferred fullscreen tri
66 | 
67 |     /** Shader programs **/
68 |     ShaderProgram programHdr;      ///< HDR rendering step
69 |     ShaderProgram programBlur;     ///< Bloom blurring step
70 |     ShaderProgram programLum;      ///< Average luminance step
71 |     ShaderProgram programTonemap;  ///< Tonemapping step
72 | 
73 |     GLuint fbos[4];     ///< FBOs (0 for hdr, 1 & 2 for blur ping pong, 3 for
74 |                         ///< luminance)
75 |     GLuint attachs[4];  ///< Respective FBO attachments.
76 | 
77 |     int texSize;        ///< Flare texture size in pixels
78 |     int lumLod;         ///< Luminance texture level to sample from
79 |     int blurDownscale;  ///< Downscale factor for the blurring step
80 |     int width_;         ///< Viewport width
81 |     int height_;        ///< Viewport height
82 | 
83 |     size_t numParticles;
84 |     size_t computeIterations;
85 | };
86 | 


--------------------------------------------------------------------------------
/src/shader.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | 
 3 | #include "shader.hpp"
 4 | 
 5 | #include <fstream>
 6 | #include <iostream>
 7 | #include <sstream>
 8 | #include <string>
 9 | 
10 | using namespace std;
11 | 
12 | ShaderProgram::ShaderProgram() : id(0) {}
13 | 
14 | void ShaderProgram::source(GLenum shader_type, const string &filename) {
15 |   if (!id) id = glCreateProgram();
16 | 
17 |   string code;
18 | 
19 |   // IO stuff
20 |   try {
21 |     stringstream sstream;
22 |     {
23 |       ifstream stream;
24 |       stream.exceptions(ifstream::failbit | ifstream::badbit);
25 |       stream.open(filename);
26 |       sstream << stream.rdbuf();
27 |     }
28 |     code = sstream.str();
29 |   } catch (ifstream::failure e) {
30 |     throw std::runtime_error(std::string("Can't open ") + filename +
31 |         std::string(e.what()));
32 |   }
33 | 
34 |   GLint success;
35 |   GLchar info_log[2048];
36 | 
37 |   const char *s = code.c_str();
38 | 
39 |   // OpenGL stuff
40 |   GLuint shad_id = glCreateShader(shader_type);
41 |   glShaderSource(shad_id, 1, &s, NULL);
42 |   glCompileShader(shad_id);
43 |   glGetShaderiv(shad_id, GL_COMPILE_STATUS, &success);
44 |   if (!success) {
45 |     // error log
46 |     glGetShaderInfoLog(shad_id, sizeof(info_log), NULL, info_log);
47 |     throw std::runtime_error(std::string("Can't compile ") + filename + " " +
48 |         info_log);
49 |     exit(-1);
50 |   }
51 |   glAttachShader(id, shad_id);
52 | }
53 | 
54 | void ShaderProgram::link() {
55 |   GLint success;
56 |   GLchar info_log[2048];
57 | 
58 |   glLinkProgram(id);
59 |   glGetProgramiv(id, GL_LINK_STATUS, &success);
60 |   if (!success) {
61 |     // error log
62 |     glGetProgramInfoLog(id, sizeof(info_log), NULL, info_log);
63 |     throw std::runtime_error(std::string("Can't link ") +
64 |         std::string(info_log));
65 |   }
66 | }
67 | 
68 | GLuint ShaderProgram::getId() { return id; }
69 | 


--------------------------------------------------------------------------------
/src/shader.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <GL/glew.h>
 6 | 
 7 | #include <string>
 8 | 
 9 | class ShaderProgram {
10 |   public:
11 |     ShaderProgram();
12 | 
13 |     /**
14 |      * Compiles a shader stage from a given source, displays errors in stderr
15 |      * @param program shader program handle
16 |      * @param shader_type one of GL_COMPUTE_SHADER, GL_VERTEX_SHADER,
17 |      * GL_TESS_CONTROL_SHADER, GL_TESS_EVALUATION_SHADER, GL_GEOMETRY_SHADER, or
18 |      * GL_FRAGMENT_SHADER
19 |      * @param filename GLSL source file
20 |      */
21 |     void source(GLenum shaderType, const std::string &filename);
22 | 
23 |     /**
24 |      * Links all shaders inside the program, displays errors in stderr
25 |      */
26 |     void link();
27 | 
28 |     GLuint getId();
29 | 
30 |   private:
31 |     GLuint id;
32 | };
33 | 


--------------------------------------------------------------------------------
/src/sim_param.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | // Copyright (C) 2022 Codeplay Software Limited
 3 | 
 4 | #include "sim_param.hpp"
 5 | 
 6 | #include <iostream>
 7 | #include <string>
 8 | #include <map>
 9 | #include <cstdlib>
10 | #include <cstdint>
11 | 
12 | SimParam::SimParam() {
13 |   G = 2.0;
14 |   dt = 0.005;
15 |   numParticles = 50 * 256;
16 |   numFrames = SIZE_MAX;
17 |   simIterationsPerFrame = 4;
18 |   damping = 0.999998;
19 |   distEps = 1.0e-7;
20 |   gwSize = 64;
21 |   calcMethod = CalculationMethod::BRANCH;
22 | }
23 | 
24 | // Set the calculation method from the given string
25 | CalculationMethod getCalculationMethod(const std::string& method) {
26 | 
27 |   static const std::map<std::string, CalculationMethod> methodMap = {
28 |     {"BRANCH", CalculationMethod::BRANCH},
29 |     {"PREDICATED", CalculationMethod::PREDICATED}
30 |   };
31 | 
32 |   auto it = methodMap.find(method);
33 |   if (it != methodMap.end()) {
34 |     return it->second;
35 |   } else {
36 |     throw std::invalid_argument("Valid calculation methods are BRANCH or PREDICATED");
37 |   }
38 | }
39 | 
40 | void SimParam::parseArgs(int argc, char **argv) {
41 |   // First argument if existing = number of particle batches (256 per batch)
42 |   if (argc >= 2) numParticles = 256 * atoi(argv[1]);
43 | 
44 |   // Second argument if existing = number of iterations per frame
45 |   if (argc >= 3) simIterationsPerFrame = atoi(argv[2]);
46 | 
47 |   // Third argument if existing = damping parameter
48 |   if (argc >= 4) damping = atof(argv[3]);
49 | 
50 |   // Fourth argument if existing = dt (timestep size) parameter
51 |   if (argc >= 5) dt = atof(argv[4]);
52 | 
53 |   // Fifth argument if existing = distEps (minimum inter-particle distance) parameter
54 |   if (argc >= 6) distEps = atof(argv[5]);
55 | 
56 |   // Sixth argument if existing = G (gravity) parameter
57 |   if (argc >= 7) G = atof(argv[6]);
58 | 
59 |   // Seventh argument if existing = number of frames to simulate
60 |   if (argc >= 8) numFrames = atoi(argv[7]);
61 | 
62 |   // Eighth argument if existing = the work group size
63 |   if (argc >= 9) gwSize = atoi(argv[8]);
64 | 
65 |   // Ninth argument if existing = the calculation method
66 |   if (argc >= 10) calcMethod = getCalculationMethod(argv[9]);
67 | }
68 | 


--------------------------------------------------------------------------------
/src/sim_param.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | // Copyright (C) 2022 Codeplay Software Limited
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <cstdlib>
 7 | 
 8 | enum class CalculationMethod {
 9 |   BRANCH,
10 |   PREDICATED
11 | };
12 | 
13 | /**
14 |  * Simulation parameters
15 |  */
16 | class SimParam {
17 |   public:
18 |     /**
19 |      * Creates default simulation parameters
20 |      */
21 |     SimParam();
22 | 
23 |     /**
24 |      * Provides user-defined simulation parameters
25 |      * @param argc number of arguments
26 |      * @param argv arguments
27 |      */
28 |     void parseArgs(int argc, char **argv);
29 | 
30 |     float G;                     ///< Gravitational parameter
31 |     float dt;                    ///< Simulation delta t
32 |     size_t numParticles;         ///< Number of particles simulated
33 |     size_t numFrames;            ///< Number of frames simulated
34 |     int simIterationsPerFrame;   ///< Simulation iterations per frame rendered
35 |     float damping;  ///< Damping parameter for simulating 'soupy' galaxy (1.0 =
36 |                     ///< no damping)
37 |     float distEps;  ///< Minimum distance to limit gravity of very close particles
38 |     int gwSize;                  ///< Work group size
39 |     CalculationMethod calcMethod;              /// Use or not branch instruction in kernel
40 | };
41 | 


--------------------------------------------------------------------------------
/src/simulator.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2022 Codeplay Software Limited
  2 | // This work is licensed under the terms of the MIT license.
  3 | // For a copy, see https://opensource.org/licenses/MIT.
  4 | 
  5 | #include "simulator.cuh"
  6 | //#include <cstddef>
  7 | #include <stdio.h>
  8 | 
  9 | #include <algorithm>
 10 | #include <cmath>
 11 | #include <random>
 12 | #include <tuple>
 13 | #include <chrono>
 14 | #include <iostream>
 15 | 
 16 | namespace simulation {
 17 | 
 18 |   // Forward decl
 19 |   template <CalculationMethod ct>
 20 |   __global__ void particle_interaction(ParticleData_d pPos,
 21 |       ParticleData_d pNextPos,
 22 |       ParticleData_d pVel, SimParam params);
 23 | 
 24 |   DiskGalaxySimulator::DiskGalaxySimulator(SimParam params_)
 25 |     : params(params_),
 26 |     pos(params_.numParticles),
 27 |     vel(params_.numParticles),
 28 |     pos_d(params_.numParticles),
 29 |     vel_d(params_.numParticles),
 30 |     pos_next_d(params_.numParticles) {
 31 |       randomParticlePos();
 32 |       initialParticleVel();
 33 |       sendToDevice();
 34 |     };
 35 | 
 36 |   const std::string* DiskGalaxySimulator::getDeviceName() {
 37 |     // Query the device first time only
 38 |     if(devName.empty()){
 39 |       char devNameHolder[256];
 40 |       int error_id = cuDeviceGetName(devNameHolder, 256, 0); // Assume main device
 41 |       if(error_id != CUDA_SUCCESS) devName = "Unknown Device";
 42 |       else devName = devNameHolder;
 43 |     }
 44 |     return &devName;
 45 |   }
 46 | 
 47 |   void DiskGalaxySimulator::stepSim() {
 48 |     // Compute updated positions
 49 |     int wg_size = getGwSize();
 50 |     int nblocks = ((getNumParticles() - 1) / wg_size) + 1;
 51 | 
 52 |     // Profiling info - rather than using the CUDA event recording
 53 |     // approach, we are instead measuring the time from before kernel
 54 |     // submission until host synchronization. This is more portable via
 55 |     // dpct.
 56 |     auto start = std::chrono::steady_clock::now();
 57 |     for (size_t i = 0; i < params.simIterationsPerFrame; i++) {
 58 |       if ( getCM() == CalculationMethod::BRANCH ) {
 59 |         particle_interaction<CalculationMethod::BRANCH><<<nblocks, wg_size>>>(pos_d, pos_next_d, vel_d,
 60 |             params);
 61 |       } else {
 62 |         particle_interaction<CalculationMethod::PREDICATED><<<nblocks, wg_size>>>(pos_d, pos_next_d, vel_d,
 63 |             params);
 64 |       }
 65 |       std::swap(pos_d, pos_next_d);
 66 |     }
 67 |     gpuErrchk(cudaDeviceSynchronize());
 68 |     auto stop = std::chrono::steady_clock::now();
 69 |     lastStepTime =
 70 |       std::chrono::duration<float, std::milli>(stop - start)
 71 |       .count();
 72 | 
 73 |     // Sync data
 74 |     recvFromDevice();
 75 |   }
 76 | 
 77 |   // Only necessary because we can't initialize data on device yet, in a
 78 |   // dpct-friendly way
 79 |   void DiskGalaxySimulator::sendToDevice() {
 80 |     gpuErrchk(cudaDeviceSynchronize());
 81 | 
 82 |     gpuErrchk(cudaMemcpy(pos_d.x, pos.x.data(),
 83 |           params.numParticles * sizeof(coords_t),
 84 |           cudaMemcpyHostToDevice));
 85 |     gpuErrchk(cudaMemcpy(pos_d.y, pos.y.data(),
 86 |           params.numParticles * sizeof(coords_t),
 87 |           cudaMemcpyHostToDevice));
 88 |     gpuErrchk(cudaMemcpy(pos_d.z, pos.z.data(),
 89 |           params.numParticles * sizeof(coords_t),
 90 |           cudaMemcpyHostToDevice));
 91 | 
 92 |     gpuErrchk(cudaMemcpy(vel_d.x, vel.x.data(),
 93 |           params.numParticles * sizeof(coords_t),
 94 |           cudaMemcpyHostToDevice));
 95 |     gpuErrchk(cudaMemcpy(vel_d.y, vel.y.data(),
 96 |           params.numParticles * sizeof(coords_t),
 97 |           cudaMemcpyHostToDevice));
 98 |     gpuErrchk(cudaMemcpy(vel_d.z, vel.z.data(),
 99 |           params.numParticles * sizeof(coords_t),
100 |           cudaMemcpyHostToDevice));
101 | 
102 |     gpuErrchk(cudaDeviceSynchronize());
103 |   }
104 | 
105 |   // Receive particle positions & velocity from device
106 |   void DiskGalaxySimulator::recvFromDevice() {
107 |     gpuErrchk(cudaDeviceSynchronize());
108 | 
109 |     gpuErrchk(cudaMemcpy(pos.x.data(), pos_d.x,
110 |           params.numParticles * sizeof(coords_t),
111 |           cudaMemcpyDeviceToHost));
112 |     gpuErrchk(cudaMemcpy(pos.y.data(), pos_d.y,
113 |           params.numParticles * sizeof(coords_t),
114 |           cudaMemcpyDeviceToHost));
115 |     gpuErrchk(cudaMemcpy(pos.z.data(), pos_d.z,
116 |           params.numParticles * sizeof(coords_t),
117 |           cudaMemcpyDeviceToHost));
118 | 
119 |     gpuErrchk(cudaMemcpy(vel.x.data(), vel_d.x,
120 |           params.numParticles * sizeof(coords_t),
121 |           cudaMemcpyDeviceToHost));
122 |     gpuErrchk(cudaMemcpy(vel.y.data(), vel_d.y,
123 |           params.numParticles * sizeof(coords_t),
124 |           cudaMemcpyDeviceToHost));
125 |     gpuErrchk(cudaMemcpy(vel.z.data(), vel_d.z,
126 |           params.numParticles * sizeof(coords_t),
127 |           cudaMemcpyDeviceToHost));
128 |     gpuErrchk(cudaDeviceSynchronize());
129 |   }
130 | 
131 |   void DiskGalaxySimulator::randomParticlePos() {
132 |     // deterministic - default seed
133 |     std::mt19937 gen;
134 |     std::uniform_real_distribution<> dis(0.0, 1.0);
135 | 
136 |     // Disk shape in x-y plane
137 |     for (int i = 0; i < params.numParticles; i++) {
138 |       float t = dis(gen) * 2 * PI;
139 |       float s = dis(gen) * 100;
140 |       pos.x[i] = cos(t) * s;
141 |       pos.y[i] = sin(t) * s;
142 |     }
143 | 
144 |     // Z component is independent (uniform range 0-4)
145 |     std::generate(begin(pos.z), end(pos.z),
146 |         [&gen, &dis]() { return 4.0 * dis(gen); });
147 |   }
148 | 
149 |   void DiskGalaxySimulator::initialParticleVel() {
150 |     for (int i = 0; i < params.numParticles; i++) {
151 |       vec3 vel = cross({pos.x[i], pos.y[i], pos.z[i]}, {0.0, 0.0, 1.0});
152 |       coords_t orbital_vel = std::sqrt(2.0 * length(vel));
153 |       vel = normalize(vel) * orbital_vel;
154 |       this->vel.x[i] = vel.x;
155 |       this->vel.y[i] = vel.y;
156 |       this->vel.z[i] = vel.z;
157 |     }
158 |   }
159 | 
160 |   const ParticleData& DiskGalaxySimulator::getParticlePos() { return pos; };
161 | 
162 |   const ParticleData& DiskGalaxySimulator::getParticleVel() { return vel; };
163 | 
164 |   // Linear Algebra functions (not yet exposed in header)
165 |   HOSTDEV vec3 cross(const vec3 v0, const vec3 v1) {
166 |     return vec3(v0.y * v1.z - v0.z * v1.y, v0.z * v1.x - v0.x * v1.z,
167 |         v0.x * v1.y - v0.y * v1.x);
168 |   };
169 | 
170 |   HOSTDEV coords_t length(const vec3 v) {
171 |     return std::sqrt(std::pow(v.x, 2) + std::pow(v.y, 2) + std::pow(v.z, 2));
172 |   }
173 | 
174 |   HOSTDEV vec3 normalize(const vec3 v) {
175 |     vec3 result = v;
176 |     coords_t len = length(v);
177 |     result.x /= len;
178 |     result.y /= len;
179 |     result.z /= len;
180 |     return result;
181 |   }
182 | 
183 |   /* O(n^2) implementation (no distance threshold), with no shared
184 |      memory etc.
185 |    */
186 |   template <CalculationMethod ct>
187 |     __global__ void particle_interaction(ParticleData_d pPos,
188 |         ParticleData_d pNextPos,
189 |         ParticleData_d pVel, SimParam params) {
190 |       int id = threadIdx.x + (blockIdx.x * blockDim.x);
191 |       if (id >= params.numParticles) return;
192 | 
193 |       vec3 force(0.0f, 0.0f, 0.0f);
194 |       vec3 pos(pPos.x[id], pPos.y[id], pPos.z[id]);
195 | 
196 | #pragma unroll 4
197 |       for (int i = 0; i < params.numParticles; i++) {
198 |         vec3 other_pos{pPos.x[i], pPos.y[i], pPos.z[i]};
199 |         vec3 r = other_pos - pos;
200 |         // Fast computation of 1/(|r|^3)
201 |         coords_t dist_sqr = dot(r, r) + params.distEps;
202 |         coords_t inv_dist_cube = rsqrt(dist_sqr * dist_sqr * dist_sqr);
203 | 
204 |         // assume uniform unit mass
205 |         if  constexpr(ct == CalculationMethod::BRANCH) {
206 |           if ( i == id ) continue;
207 |           force += r * inv_dist_cube;
208 |         } else  if constexpr (ct == CalculationMethod::PREDICATED) {
209 |           force += r * inv_dist_cube * (i == id);
210 |         }
211 |       }
212 | 
213 |       // Update velocity
214 |       vec3 curr_vel(pVel.x[id], pVel.y[id], pVel.z[id]);
215 |       curr_vel *= params.damping;
216 |       curr_vel += force * params.dt * params.G;
217 | 
218 |       pVel.x[id] = curr_vel.x;
219 |       pVel.y[id] = curr_vel.y;
220 |       pVel.z[id] = curr_vel.z;
221 | 
222 |       // Update position (integration)
223 |       vec3 curr_pos(pPos.x[id], pPos.y[id], pPos.z[id]);
224 | 
225 |       curr_pos += curr_vel * params.dt;
226 |       pNextPos.x[id] = curr_pos.x;
227 |       pNextPos.y[id] = curr_pos.y;
228 |       pNextPos.z[id] = curr_pos.z;
229 |     }
230 | 
231 | }  // namespace simulation
232 | 


--------------------------------------------------------------------------------
/src/simulator.cuh:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2022 Codeplay Software Limited
  2 | // This work is licensed under the terms of the MIT license.
  3 | // For a copy, see https://opensource.org/licenses/MIT.
  4 | 
  5 | #pragma once
  6 | 
  7 | #include <cuda.h>
  8 | #include <cuda_runtime_api.h>
  9 | #include <stdio.h>
 10 | 
 11 | #include <string>
 12 | #include <vector>
 13 | 
 14 | #include "sim_param.hpp"
 15 | 
 16 | #ifdef __CUDACC__
 17 | #define HOSTDEV __host__ __device__
 18 | #else
 19 | #define HOSTDEV
 20 | #endif
 21 | 
 22 | #define gpuErrchk(ans) \
 23 | { gpuAssert((ans), __FILE__, __LINE__); }
 24 | inline void gpuAssert(cudaError_t code, const char *file, int line,
 25 |     bool abort = true) {
 26 |   if (code != cudaSuccess) {
 27 |     fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
 28 |         line);
 29 |     if (abort) exit(code);
 30 |   }
 31 | }
 32 | 
 33 | namespace simulation {
 34 | 
 35 |   const float PI = 3.14159265358979323846;
 36 | 
 37 |   typedef float coords_t;
 38 | 
 39 |   struct vec3 {
 40 |     coords_t x = 0.0;
 41 |     coords_t y = 0.0;
 42 |     coords_t z = 0.0;
 43 | 
 44 |     HOSTDEV vec3() {};
 45 |     HOSTDEV vec3(coords_t x_, coords_t y_, coords_t z_)
 46 |       : x{x_}, y{y_}, z{z_} {}
 47 | 
 48 |     HOSTDEV inline vec3 &operator+=(const vec3 &rhs) {
 49 |       x += rhs.x;
 50 |       y += rhs.y;
 51 |       z += rhs.z;
 52 |       return *this;
 53 |     }
 54 | 
 55 |     HOSTDEV inline vec3 &operator*=(const coords_t &scale) {
 56 |       x *= scale;
 57 |       y *= scale;
 58 |       z *= scale;
 59 |       return *this;
 60 |     }
 61 |   };
 62 | 
 63 |   HOSTDEV inline const vec3 operator*(const vec3 &pos, const coords_t &scale) {
 64 |     return {pos.x * scale, pos.y * scale, pos.z * scale};
 65 |   }
 66 | 
 67 |   HOSTDEV inline const vec3 operator-(const vec3 &vec1, const vec3 &vec2) {
 68 |     return {vec1.x - vec2.x, vec1.y - vec2.y, vec1.z - vec2.z};
 69 |   }
 70 | 
 71 |   HOSTDEV inline coords_t dot(const vec3 &vec1, const vec3 &vec2) {
 72 |     return vec1.x * vec2.x + vec1.y * vec2.y + vec1.z * vec2.z;
 73 |   }
 74 | 
 75 |   struct ParticleData {
 76 |     std::vector<coords_t> x;
 77 |     std::vector<coords_t> y;
 78 |     std::vector<coords_t> z;
 79 | 
 80 |     ParticleData(std::vector<coords_t> x_, std::vector<coords_t> y_,
 81 |         std::vector<coords_t> z_)
 82 |       : x(std::move(x_)), y(std::move(y_)), z(std::move(z_)){};
 83 |     ParticleData(size_t n) : x(n, 0.0), y(n, 0.0), z(n, 0.0){};
 84 |   };
 85 | 
 86 |   // Simply holds 3 coords_t* as a SoA
 87 |   struct ParticleData_d {
 88 |     coords_t *x = nullptr;
 89 |     coords_t *y = nullptr;
 90 |     coords_t *z = nullptr;
 91 | 
 92 |     ParticleData_d(size_t n) {
 93 |       // Allocate device memory for particle coords & velocity...
 94 |       gpuErrchk(cudaMalloc((void **)&x, sizeof(coords_t) * n));
 95 |       gpuErrchk(cudaMalloc((void **)&y, sizeof(coords_t) * n));
 96 |       gpuErrchk(cudaMalloc((void **)&z, sizeof(coords_t) * n));
 97 |     };
 98 |   };
 99 | 
100 |   HOSTDEV coords_t length(const vec3 v);
101 |   HOSTDEV vec3 cross(const vec3 v0, const vec3 v1);
102 |   HOSTDEV vec3 normalize(const vec3 v);
103 | 
104 |   /*
105 |      Interface class for Simulator
106 |    */
107 |   class Simulator {
108 |     public:
109 |       virtual void stepSim() = 0;
110 |       virtual size_t getNumParticles() = 0;
111 |       virtual const ParticleData &getParticlePos() = 0;
112 |       virtual const ParticleData &getParticleVel() = 0;
113 |       virtual float getLastStepTime() = 0;
114 |       virtual const std::string* getDeviceName() = 0;
115 |       virtual int getGwSize() = 0;
116 |   };
117 | 
118 |   /*
119 |      DiskGalaxySimulator class to handle execution of the nbody simulation.
120 | 
121 |      Regular data transfer only occurs in the device->host direction (from
122 |      Simulator to Renderer).
123 | 
124 | Invariants:
125 | - Has params
126 | - Has valid particle positions & velocities, allocated on host & device
127 |    */
128 | 
129 |   class DiskGalaxySimulator : public Simulator {
130 |     public:
131 |       DiskGalaxySimulator(SimParam params_);
132 | 
133 |       void stepSim();
134 |       float getLastStepTime() { return lastStepTime; }
135 |       size_t getNumParticles() { return params.numParticles; }
136 |       const ParticleData &getParticlePos();
137 |       const ParticleData &getParticleVel();
138 |       const std::string* getDeviceName();
139 |       int getGwSize() { return params.gwSize; }
140 |       CalculationMethod getCM() { return params.calcMethod; }
141 | 
142 |     private:
143 |       SimParam params;
144 |       std::string devName;
145 |       float lastStepTime{0.0};
146 | 
147 |       // Data for particle positions & vel on host
148 |       ParticleData pos;
149 |       ParticleData vel;
150 | 
151 |       // and on device
152 |       ParticleData_d pos_d;
153 |       ParticleData_d pos_next_d;  // double buffering
154 |       ParticleData_d vel_d;
155 | 
156 |       void randomParticlePos();
157 |       void initialParticleVel();
158 |       void sendToDevice();
159 |       void recvFromDevice();
160 |   };
161 | 
162 | }  // namespace simulation
163 | 


--------------------------------------------------------------------------------
/src_sycl/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | # Copyright (C) 2022 Codeplay Software Limited
 3 | 
 4 | find_package(PkgConfig REQUIRED)
 5 | 
 6 | if (RENDER) 
 7 |   pkg_check_modules(Glew REQUIRED IMPORTED_TARGET glew)
 8 | 
 9 |   find_package(glm REQUIRED)
10 |   find_package(glfw3 REQUIRED)
11 |   find_package(OpenGL REQUIRED)
12 | endif()
13 | 
14 | find_package(dpct REQUIRED)
15 | 
16 | set(COMMON_SOURCE 
17 |   nbody.cpp 
18 |   sim_param.cpp 
19 |   simulator.dp.cpp)
20 | 
21 | set(OPENGL_SOURCE 
22 |   gen.cpp 
23 |   camera.cpp 
24 |   renderer_gl.cpp 
25 |   shader.cpp)
26 | 
27 | set(DEBUG_FLAGS -g -O0)
28 | 
29 | if (RENDER) 
30 |   set(RENDER_LIB glm::glm glfw PkgConfig::Glew OpenGL::OpenGL)
31 |   set(RENDER_FLAG -DUSE_OPENGL)
32 |   set(SOURCE_FILES ${COMMON_SOURCE} ${OPENGL_SOURCE})
33 | else()
34 |   set(RENDER_LIB)
35 |   set(RENDER_FLAG DISABLE_GL)
36 |   set(SOURCE_FILES ${COMMON_SOURCE})
37 | endif()
38 | 
39 | add_custom_target(release DEPENDS ${BINARY_NAME})
40 | add_executable(${BINARY_NAME} ${SOURCE_FILES})
41 | target_compile_definitions(${BINARY_NAME} PRIVATE ${RENDER_FLAG} COMPILER_NAME="SYCL")
42 | target_link_libraries(${BINARY_NAME} PRIVATE ${RENDER_LIB})
43 | target_compile_features(${BINARY_NAME} PRIVATE cxx_auto_type cxx_nullptr cxx_range_for)
44 | target_include_directories(${BINARY_NAME} PRIVATE ${dpct_INCLUDE_DIR})
45 | 
46 | add_custom_target(debug DEPENDS ${BINARY_NAME}_d)
47 | add_executable(${BINARY_NAME}_d ${SOURCE_FILES})
48 | target_compile_definitions(${BINARY_NAME}_d PRIVATE ${RENDER_FLAG} COMPILER_NAME="SYCL")
49 | target_link_libraries(${BINARY_NAME}_d PRIVATE ${RENDER_LIB})
50 | target_compile_features(${BINARY_NAME}_d PRIVATE cxx_auto_type cxx_nullptr cxx_range_for)
51 | target_include_directories(${BINARY_NAME}_d PRIVATE ${dpct_INCLUDE_DIR})
52 | 
53 | if(NOT TARGET glm::glm)
54 |   add_library(glm::glm IMPORTED INTERFACE)
55 |   target_include_directories(glm::glm INTERFACE ${GLM_INCLUDE_DIR})
56 | endif()
57 | 
58 | 
59 | if(NOT ((CMAKE_CXX_COMPILER MATCHES ".*dpcpp(\\.exe)?$") OR
60 |   (CMAKE_CXX_COMPILER MATCHES ".*icpx(\\.exe)?$")  OR
61 |   (CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+(\\.exe)?$")))
62 |   message( FATAL_ERROR "Invalid C++ compiler for SYCL compilation" )
63 | endif()
64 | 
65 | option(DPCPP_CUDA_SUPPORT "Whether to enable DPC++'s CUDA backend" ON)
66 | if(DPCPP_CUDA_SUPPORT)
67 |   set(DEFAULT_CUDA_COMPUTE_CAPABILITY "50")
68 |   execute_process(
69 |     COMMAND bash -c "which nvidia-smi >/dev/null && nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -n 1 | tr -d '.'"
70 |     OUTPUT_VARIABLE CUDA_COMPUTE_CAPABILITY
71 |     OUTPUT_STRIP_TRAILING_WHITESPACE)
72 |   if ("${CUDA_COMPUTE_CAPABILITY}" STREQUAL "")
73 |     set(CUDA_COMPUTE_CAPABILITY ${DEFAULT_CUDA_COMPUTE_CAPABILITY})
74 |   endif()
75 |   set(SYCL_FLAGS -fsycl
76 |     -fsycl-targets=nvptx64-nvidia-cuda,spir64
77 |     -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_${CUDA_COMPUTE_CAPABILITY}
78 |     -fsycl-unnamed-lambda
79 |     -fgpu-inline-threshold=100000)
80 | #  set(OPT_FLAGS -Ofast)
81 | else()
82 |   set(SYCL_FLAGS -fsycl -fsycl-targets=spir64 -fsycl-unnamed-lambda)
83 |   set(OPT_FLAGS)
84 | endif()
85 | 
86 | target_compile_options(${BINARY_NAME} PRIVATE ${SYCL_FLAGS} ${OPT_FLAGS})
87 | target_link_options(${BINARY_NAME} PRIVATE ${SYCL_FLAGS} ${OPT_FLAGS})
88 | 
89 | target_compile_options(${BINARY_NAME}_d PRIVATE ${SYCL_FLAGS} ${DEBUG_FLAGS})
90 | target_link_options(${BINARY_NAME}_d PRIVATE ${SYCL_FLAGS} ${DEBUG_FLAGS})
91 | 


--------------------------------------------------------------------------------
/src_sycl/README.md:
--------------------------------------------------------------------------------
1 | Run ../scripts/run_dpct.sh to populate this directory
2 | 


--------------------------------------------------------------------------------
/src_sycl/camera.cpp:
--------------------------------------------------------------------------------
1 | ../src/camera.cpp


--------------------------------------------------------------------------------
/src_sycl/camera.hpp:
--------------------------------------------------------------------------------
1 | ../src/camera.hpp


--------------------------------------------------------------------------------
/src_sycl/gen.cpp:
--------------------------------------------------------------------------------
1 | ../src/gen.cpp


--------------------------------------------------------------------------------
/src_sycl/gen.hpp:
--------------------------------------------------------------------------------
1 | ../src/gen.hpp


--------------------------------------------------------------------------------
/src_sycl/nbody.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
  2 | // Copyright (C) 2022 Codeplay Software Limited
  3 |  
  4 | #include <iostream>
  5 | #include <chrono>
  6 | #include <cstdlib>
  7 | 
  8 | #ifndef DISABLE_GL
  9 | #include <GL/glew.h>
 10 | 
 11 | #include "renderer_gl.hpp"
 12 | #include <GLFW/glfw3.h>
 13 | #include <glm/glm.hpp>
 14 | #include "camera.hpp"
 15 | #include "gen.hpp"
 16 | #else
 17 | #include <cmath>
 18 | #endif
 19 | 
 20 | #include <thread>
 21 | #include <vector>
 22 | #include <numeric>
 23 | #include <algorithm>
 24 | 
 25 | #include "sim_param.hpp"
 26 | #include "simulator.dp.hpp"
 27 | 
 28 | 
 29 | using namespace std;
 30 | using namespace simulation;
 31 | 
 32 | int main(int argc, char **argv) {
 33 | 
 34 |    SimParam params;
 35 |    params.parseArgs(argc, argv);
 36 | 
 37 |    DiskGalaxySimulator nbodySim(params);
 38 | 
 39 | #ifndef DISABLE_GL
 40 |    // Window initialization
 41 |    GLFWwindow *window;
 42 | 
 43 |    glfwSetErrorCallback([](const int error, const char *msg) {
 44 |       cout << "Error id : " << error << ", " << msg << endl;
 45 |       exit(-1);
 46 |    });
 47 | 
 48 |    if (!glfwInit()) {
 49 |       cout << "GLFW can't initialize" << endl;
 50 |       return -1;
 51 |    }
 52 | 
 53 |    GLFWmonitor *monitor = glfwGetPrimaryMonitor();
 54 | 
 55 |    const GLFWvidmode *mode = glfwGetVideoMode(monitor);
 56 | 
 57 |    glfwWindowHint(GLFW_RED_BITS, mode->redBits);
 58 |    glfwWindowHint(GLFW_GREEN_BITS, mode->greenBits);
 59 |    glfwWindowHint(GLFW_BLUE_BITS, mode->blueBits);
 60 |    glfwWindowHint(GLFW_REFRESH_RATE, mode->refreshRate);
 61 |    glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE);
 62 | 
 63 |    RendererGL renderer;
 64 | 
 65 |    renderer.initWindow();
 66 | 
 67 |    int width = mode->width;
 68 |    int height = mode->height - 30;
 69 |    window = glfwCreateWindow(width, height, "N-Body Simulation", NULL, NULL);
 70 | 
 71 |    glfwMakeContextCurrent(window);
 72 | 
 73 |    renderer.init(window, width, height, nbodySim);
 74 |    renderer.initImgui(window);
 75 | 
 76 |    // Get initial postitions generated in simulator ctor
 77 |    renderer.updateParticles();
 78 | 
 79 |    Camera camera;
 80 | 
 81 |    float last_fps{0};
 82 | #endif
 83 | 
 84 |    std::vector<float> stepTimes;
 85 |    int step{0};
 86 | 
 87 |    // Main loop
 88 |    float stepTime = 0.0;
 89 | 
 90 | #ifndef DISABLE_GL
 91 |    while (!glfwWindowShouldClose(window) &&
 92 |           glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_RELEASE &&
 93 |           step < params.numFrames) {
 94 |       double frame_start = glfwGetTime();
 95 | #else
 96 |    while ( step < params.numFrames) {
 97 | #endif
 98 |       nbodySim.stepSim();
 99 | #ifndef DISABLE_GL
100 |       renderer.updateParticles();
101 |       renderer.render(camera.getProj(width, height), camera.getView());
102 | #endif
103 |       if(!(step % 20)) stepTime = nbodySim.getLastStepTime();
104 | #ifndef DISABLE_GL
105 |       renderer.printKernelTime(stepTime);
106 | #endif
107 | 
108 |       step++;
109 |       int warmSteps{2};
110 |       if (step > warmSteps) {
111 |          stepTimes.push_back(nbodySim.getLastStepTime());
112 |          float cumStepTime =
113 |              std::accumulate(stepTimes.begin(), stepTimes.end(), 0.0);
114 |          float meanTime = cumStepTime / stepTimes.size();
115 |          float accum{0.0};
116 |          std::for_each(stepTimes.begin(), stepTimes.end(),
117 |                        [&](const float time) {
118 |                           accum += std::pow((time - meanTime), 2);
119 |                        });
120 |          float stdDev = std::pow(accum / stepTimes.size(), 0.5);
121 |          std::cout << "At step " << step << " kernel time is "
122 |                    << stepTimes.back() << " and mean is " << meanTime
123 |                    << " and stddev is: " << stdDev << "\n";
124 |       }
125 | #ifndef DISABLE_GL
126 |       // Window refresh
127 |       glfwSwapBuffers(window);
128 |       glfwPollEvents();
129 | 
130 |       // Thread sleep to match min frame time
131 |       double frame_end = glfwGetTime();
132 |       double elapsed = frame_end - frame_start;
133 |       last_fps = 1.0 / elapsed;
134 | #endif
135 |    }
136 | #ifndef DISABLE_GL
137 |    renderer.destroy();
138 |    glfwDestroyWindow(window);
139 |    glfwTerminate();
140 | #endif
141 |    return 0;
142 | }
143 | 


--------------------------------------------------------------------------------
/src_sycl/renderer.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
 2 | // Copyright (C) 2022 Codeplay Software Limited
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <GL/glew.h>
 7 | #include <GLFW/glfw3.h>
 8 | 
 9 | #include <glm/glm.hpp>
10 | #include <vector>
11 | 
12 | #include "simulator.dp.hpp"
13 | 
14 | class Renderer {
15 |   public:
16 |    virtual void initWindow() = 0;
17 | 
18 |    /**
19 |     * Initializes the gl state
20 |     * @param width viewport width
21 |     * @param height viewport height
22 |     * @param params simulation parameters
23 |     */
24 |    virtual void init(GLFWwindow *window, int width, int height,
25 |                      simulation::Simulator &sim) = 0;
26 | 
27 |    virtual void destroy() = 0;
28 | 
29 |    /**
30 |     * Supplies the gl state with updated particle position and velocity
31 |     * @param pos particle positions
32 |     * @param vel particle velocities
33 |     */
34 |    virtual void updateParticles() = 0;
35 | 
36 |    /**
37 |     * Renders the particles at the current step
38 |     * @param proj_mat projection matrix @see camera_get_proj
39 |     * @param view_mat view matrix @see camera_get_view
40 |     */
41 |    virtual void render(glm::mat4 projMat, glm::mat4 viewMat) = 0;
42 | };


--------------------------------------------------------------------------------
/src_sycl/renderer_gl.cpp:
--------------------------------------------------------------------------------
1 | ../src/renderer_gl.cpp


--------------------------------------------------------------------------------
/src_sycl/renderer_gl.hpp:
--------------------------------------------------------------------------------
1 | ../src/renderer_gl.hpp


--------------------------------------------------------------------------------
/src_sycl/shader.cpp:
--------------------------------------------------------------------------------
1 | ../src/shader.cpp


--------------------------------------------------------------------------------
/src_sycl/shader.hpp:
--------------------------------------------------------------------------------
1 | ../src/shader.hpp


--------------------------------------------------------------------------------
/src_sycl/sim_param.cpp:
--------------------------------------------------------------------------------
1 | ../src/sim_param.cpp


--------------------------------------------------------------------------------
/src_sycl/sim_param.hpp:
--------------------------------------------------------------------------------
1 | ../src/sim_param.hpp


--------------------------------------------------------------------------------
/src_sycl/simulator.dp.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2022 Codeplay Software Limited
  2 | // This work is licensed under the terms of the MIT license.
  3 | // For a copy, see https://opensource.org/licenses/MIT.
  4 | 
  5 | #include <sycl/sycl.hpp>
  6 | #include <dpct/dpct.hpp>
  7 | #include "simulator.dp.hpp"
  8 | //#include <cstddef>
  9 | #include <stdio.h>
 10 | 
 11 | #include <algorithm>
 12 | #include <cmath>
 13 | #include <random>
 14 | #include <tuple>
 15 | #include <chrono>
 16 | 
 17 | namespace simulation {
 18 | 
 19 |   // Forward decl
 20 |   template <CalculationMethod ct>
 21 |   void particle_interaction(ParticleData_d pPos,
 22 |         ParticleData_d pNextPos,
 23 |         ParticleData_d pVel, SimParam params,
 24 |         const sycl::nd_item<1> &item_ct1);
 25 | 
 26 |   DiskGalaxySimulator::DiskGalaxySimulator(SimParam params_)
 27 |     : params(params_),
 28 |     pos(params_.numParticles),
 29 |     vel(params_.numParticles),
 30 |     pos_d(params_.numParticles),
 31 |     vel_d(params_.numParticles),
 32 |     pos_next_d(params_.numParticles) {
 33 |       randomParticlePos();
 34 |       initialParticleVel();
 35 |       sendToDevice();
 36 |     };
 37 | 
 38 |   const std::string* DiskGalaxySimulator::getDeviceName() {
 39 |     // Query the device first time only
 40 |     if(devName.empty()){
 41 |       char devNameHolder[256];
 42 |       /*
 43 | DPCT1003:4: Migrated API does not return error code. (*, 0) is inserted.
 44 | You may need to rewrite this code.
 45 |        */
 46 |       int error_id = (memcpy(devNameHolder,
 47 |             dpct::dev_mgr::instance()
 48 |             .get_device(0)
 49 |             .get_info<sycl::info::device::name>()
 50 |             .c_str(),
 51 |             256),
 52 |           0);  // Assume main device
 53 |       if (error_id != 0) devName = "Unknown Device";
 54 |       else devName = devNameHolder;
 55 |     }
 56 |     return &devName;
 57 |   }
 58 | 
 59 |   void DiskGalaxySimulator::stepSim() {
 60 |     // Compute updated positions
 61 |     int wg_size = getGwSize();
 62 |     int nblocks = ((getNumParticles() - 1) / wg_size) + 1;
 63 | 
 64 |     // Profiling info - rather than using the CUDA event recording
 65 |     // approach, we are instead measuring the time from before kernel
 66 |     // submission until host synchronization. This is more portable via
 67 |     // dpct.
 68 |     auto start = std::chrono::steady_clock::now();
 69 |     for (size_t i = 0; i < params.simIterationsPerFrame; i++) {
 70 |       dpct::get_default_queue().submit([&](sycl::handler &cgh) {
 71 |           auto pos_d_ct0 = pos_d;
 72 |           auto pos_next_d_ct1 = pos_next_d;
 73 |           auto vel_d_ct2 = vel_d;
 74 |           auto params_ct3 = params;
 75 | 
 76 |           if ( getCM() == CalculationMethod::BRANCH ) {
 77 |           cgh.parallel_for<
 78 |           dpct_kernel_name<class particle_interaction_da5588>>(
 79 |               sycl::nd_range<1>(
 80 |                 sycl::range<1>(nblocks) * sycl::range<1>(wg_size),
 81 |                 sycl::range<1>(wg_size)),
 82 |               [=](sycl::nd_item<1> item_ct1) {
 83 |               particle_interaction<CalculationMethod::BRANCH>(pos_d_ct0, pos_next_d_ct1, vel_d_ct2,
 84 |                   params_ct3, item_ct1);
 85 |               });
 86 |           } else {
 87 |           cgh.parallel_for<
 88 |           dpct_kernel_name<class particle_interaction_da5589>>(
 89 |               sycl::nd_range<1>(
 90 |                 sycl::range<1>(nblocks) * sycl::range<1>(wg_size),
 91 |                 sycl::range<1>(wg_size)),
 92 |               [=](sycl::nd_item<1> item_ct1) {
 93 |               particle_interaction<CalculationMethod::PREDICATED>(pos_d_ct0, pos_next_d_ct1, vel_d_ct2,
 94 |                   params_ct3, item_ct1);
 95 |               });
 96 |           }
 97 |       });
 98 |       std::swap(pos_d, pos_next_d);
 99 |     }
100 |     /*
101 | DPCT1003:5: Migrated API does not return error code. (*, 0) is inserted.
102 | You may need to rewrite this code.
103 |      */
104 |     gpuErrchk((dpct::get_current_device().queues_wait_and_throw(), 0));
105 |     auto stop = std::chrono::steady_clock::now();
106 |     lastStepTime =
107 |       std::chrono::duration<float, std::milli>(stop - start)
108 |       .count();
109 | 
110 |     // Sync data
111 |     recvFromDevice();
112 |   }
113 | 
114 |   // Only necessary because we can't initialize data on device yet, in a
115 |   // dpct-friendly way
116 |   void DiskGalaxySimulator::sendToDevice() {
117 |     dpct::device_ext &dev_ct1 = dpct::get_current_device();
118 |     sycl::queue &q_ct1 = dev_ct1.default_queue();
119 |     /*
120 | DPCT1003:6: Migrated API does not return error code. (*, 0) is inserted.
121 | You may need to rewrite this code.
122 |      */
123 |     gpuErrchk((dev_ct1.queues_wait_and_throw(), 0));
124 | 
125 |     /*
126 | DPCT1003:7: Migrated API does not return error code. (*, 0) is inserted.
127 | You may need to rewrite this code.
128 |      */
129 |     gpuErrchk((q_ct1
130 |           .memcpy(pos_d.x, pos.x.data(),
131 |             params.numParticles * sizeof(coords_t))
132 |           .wait(),
133 |           0));
134 |     /*
135 | DPCT1003:8: Migrated API does not return error code. (*, 0) is inserted.
136 | You may need to rewrite this code.
137 |      */
138 |     gpuErrchk((q_ct1
139 |           .memcpy(pos_d.y, pos.y.data(),
140 |             params.numParticles * sizeof(coords_t))
141 |           .wait(),
142 |           0));
143 |     /*
144 | DPCT1003:9: Migrated API does not return error code. (*, 0) is inserted.
145 | You may need to rewrite this code.
146 |      */
147 |     gpuErrchk((q_ct1
148 |           .memcpy(pos_d.z, pos.z.data(),
149 |             params.numParticles * sizeof(coords_t))
150 |           .wait(),
151 |           0));
152 | 
153 |     /*
154 | DPCT1003:10: Migrated API does not return error code. (*, 0) is inserted.
155 | You may need to rewrite this code.
156 |      */
157 |     gpuErrchk((q_ct1
158 |           .memcpy(vel_d.x, vel.x.data(),
159 |             params.numParticles * sizeof(coords_t))
160 |           .wait(),
161 |           0));
162 |     /*
163 | DPCT1003:11: Migrated API does not return error code. (*, 0) is inserted.
164 | You may need to rewrite this code.
165 |      */
166 |     gpuErrchk((q_ct1
167 |           .memcpy(vel_d.y, vel.y.data(),
168 |             params.numParticles * sizeof(coords_t))
169 |           .wait(),
170 |           0));
171 |     /*
172 | DPCT1003:12: Migrated API does not return error code. (*, 0) is inserted.
173 | You may need to rewrite this code.
174 |      */
175 |     gpuErrchk((q_ct1
176 |           .memcpy(vel_d.z, vel.z.data(),
177 |             params.numParticles * sizeof(coords_t))
178 |           .wait(),
179 |           0));
180 | 
181 |     /*
182 | DPCT1003:13: Migrated API does not return error code. (*, 0) is inserted.
183 | You may need to rewrite this code.
184 |      */
185 |     gpuErrchk((dev_ct1.queues_wait_and_throw(), 0));
186 |   }
187 | 
188 |   // Receive particle positions & velocity from device
189 |   void DiskGalaxySimulator::recvFromDevice() {
190 |     dpct::device_ext &dev_ct1 = dpct::get_current_device();
191 |     sycl::queue &q_ct1 = dev_ct1.default_queue();
192 |     /*
193 | DPCT1003:14: Migrated API does not return error code. (*, 0) is inserted.
194 | You may need to rewrite this code.
195 |      */
196 |     gpuErrchk((dev_ct1.queues_wait_and_throw(), 0));
197 | 
198 |     /*
199 | DPCT1003:15: Migrated API does not return error code. (*, 0) is inserted.
200 | You may need to rewrite this code.
201 |      */
202 |     gpuErrchk((q_ct1
203 |           .memcpy(pos.x.data(), pos_d.x,
204 |             params.numParticles * sizeof(coords_t))
205 |           .wait(),
206 |           0));
207 |     /*
208 | DPCT1003:16: Migrated API does not return error code. (*, 0) is inserted.
209 | You may need to rewrite this code.
210 |      */
211 |     gpuErrchk((q_ct1
212 |           .memcpy(pos.y.data(), pos_d.y,
213 |             params.numParticles * sizeof(coords_t))
214 |           .wait(),
215 |           0));
216 |     /*
217 | DPCT1003:17: Migrated API does not return error code. (*, 0) is inserted.
218 | You may need to rewrite this code.
219 |      */
220 |     gpuErrchk((q_ct1
221 |           .memcpy(pos.z.data(), pos_d.z,
222 |             params.numParticles * sizeof(coords_t))
223 |           .wait(),
224 |           0));
225 | 
226 |     /*
227 | DPCT1003:18: Migrated API does not return error code. (*, 0) is inserted.
228 | You may need to rewrite this code.
229 |      */
230 |     gpuErrchk((q_ct1
231 |           .memcpy(vel.x.data(), vel_d.x,
232 |             params.numParticles * sizeof(coords_t))
233 |           .wait(),
234 |           0));
235 |     /*
236 | DPCT1003:19: Migrated API does not return error code. (*, 0) is inserted.
237 | You may need to rewrite this code.
238 |      */
239 |     gpuErrchk((q_ct1
240 |           .memcpy(vel.y.data(), vel_d.y,
241 |             params.numParticles * sizeof(coords_t))
242 |           .wait(),
243 |           0));
244 |     /*
245 | DPCT1003:20: Migrated API does not return error code. (*, 0) is inserted.
246 | You may need to rewrite this code.
247 |      */
248 |     gpuErrchk((q_ct1
249 |           .memcpy(vel.z.data(), vel_d.z,
250 |             params.numParticles * sizeof(coords_t))
251 |           .wait(),
252 |           0));
253 |     /*
254 | DPCT1003:21: Migrated API does not return error code. (*, 0) is inserted.
255 | You may need to rewrite this code.
256 |      */
257 |     gpuErrchk((dev_ct1.queues_wait_and_throw(), 0));
258 |   }
259 | 
260 |   void DiskGalaxySimulator::randomParticlePos() {
261 |     // deterministic - default seed
262 |     std::mt19937 gen;
263 |     std::uniform_real_distribution<> dis(0.0, 1.0);
264 | 
265 |     // Disk shape in x-y plane
266 |     for (int i = 0; i < params.numParticles; i++) {
267 |       float t = dis(gen) * 2 * PI;
268 |       float s = dis(gen) * 100;
269 |       pos.x[i] = cos(t) * s;
270 |       pos.y[i] = sin(t) * s;
271 |     }
272 | 
273 |     // Z component is independent (uniform range 0-4)
274 |     std::generate(begin(pos.z), end(pos.z),
275 |         [&gen, &dis]() { return 4.0 * dis(gen); });
276 |   }
277 | 
278 |   void DiskGalaxySimulator::initialParticleVel() {
279 |     for (int i = 0; i < params.numParticles; i++) {
280 |       vec3 vel = cross({pos.x[i], pos.y[i], pos.z[i]}, {0.0, 0.0, 1.0});
281 |       coords_t orbital_vel = std::sqrt(2.0 * length(vel));
282 |       vel = normalize(vel) * orbital_vel;
283 |       this->vel.x[i] = vel.x;
284 |       this->vel.y[i] = vel.y;
285 |       this->vel.z[i] = vel.z;
286 |     }
287 |   }
288 | 
289 |   const ParticleData& DiskGalaxySimulator::getParticlePos() { return pos; };
290 | 
291 |   const ParticleData& DiskGalaxySimulator::getParticleVel() { return vel; };
292 | 
293 |   // Linear Algebra functions (not yet exposed in header)
294 |   HOSTDEV vec3 cross(const vec3 v0, const vec3 v1) {
295 |     return vec3(v0.y * v1.z - v0.z * v1.y, v0.z * v1.x - v0.x * v1.z,
296 |         v0.x * v1.y - v0.y * v1.x);
297 |   };
298 | 
299 |   HOSTDEV coords_t length(const vec3 v) {
300 |     return sycl::sqrt(v.x * v.x + v.y * v.y + v.z * v.z);
301 |   }
302 | 
303 |   HOSTDEV vec3 normalize(const vec3 v) {
304 |     vec3 result = v;
305 |     coords_t len = length(v);
306 |     result.x /= len;
307 |     result.y /= len;
308 |     result.z /= len;
309 |     return result;
310 |   }
311 | 
312 |   /* O(n^2) implementation (no distance threshold), with no shared
313 |      memory etc.
314 |    */
315 |   template <CalculationMethod ct>
316 |     void particle_interaction(ParticleData_d pPos,
317 |         ParticleData_d pNextPos,
318 |         ParticleData_d pVel, SimParam params,
319 |         const sycl::nd_item<1> &item_ct1) {
320 |       int id = item_ct1.get_local_id(0) +
321 |         (item_ct1.get_group(0) * item_ct1.get_local_range(0));
322 |       if (id >= params.numParticles) return;
323 | 
324 |       vec3 force(0.0f, 0.0f, 0.0f);
325 |       vec3 pos(pPos.x[id], pPos.y[id], pPos.z[id]);
326 | 
327 | #pragma unroll 4
328 |       for (int i = 0; i < params.numParticles; i++) {
329 |         vec3 other_pos{pPos.x[i], pPos.y[i], pPos.z[i]};
330 |         vec3 r = other_pos - pos;
331 |         // Fast computation of 1/(|r|^3)
332 |         coords_t dist_sqr = dot(r, r) + params.distEps;
333 |         coords_t inv_dist_cube = sycl::rsqrt(dist_sqr * dist_sqr * dist_sqr);
334 | 
335 |         // assume uniform unit mass
336 |         if  constexpr(ct == CalculationMethod::BRANCH) {
337 |           if (i == id) continue;
338 |           force += r * inv_dist_cube;
339 |         } else  if constexpr (ct == CalculationMethod::PREDICATED) {
340 |           force += r * inv_dist_cube * (i == id);
341 |         }
342 |       }
343 | 
344 |       // Update velocity
345 |       vec3 curr_vel(pVel.x[id], pVel.y[id], pVel.z[id]);
346 |       curr_vel *= params.damping;
347 |       curr_vel += force * params.dt * params.G;
348 | 
349 |       pVel.x[id] = curr_vel.x;
350 |       pVel.y[id] = curr_vel.y;
351 |       pVel.z[id] = curr_vel.z;
352 | 
353 |       // Update position (integration)
354 |       vec3 curr_pos(pPos.x[id], pPos.y[id], pPos.z[id]);
355 | 
356 |       curr_pos += curr_vel * params.dt;
357 |       pNextPos.x[id] = curr_pos.x;
358 |       pNextPos.y[id] = curr_pos.y;
359 |       pNextPos.z[id] = curr_pos.z;
360 |     }
361 | 
362 | }  // namespace simulation
363 | 


--------------------------------------------------------------------------------
/src_sycl/simulator.dp.hpp:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2022 Codeplay Software Limited
  2 | // This work is licensed under the terms of the MIT license.
  3 | // For a copy, see https://opensource.org/licenses/MIT.
  4 | 
  5 | #pragma once
  6 | 
  7 | #include <sycl/sycl.hpp>
  8 | #include <dpct/dpct.hpp>
  9 | #include <stdio.h>
 10 | 
 11 | #include <string>
 12 | #include <vector>
 13 | 
 14 | #include "sim_param.hpp"
 15 | 
 16 | #ifdef SYCL_LANGUAGE_VERSION
 17 | #define HOSTDEV 
 18 | #else
 19 | #define HOSTDEV
 20 | #endif
 21 | 
 22 | #define gpuErrchk(ans) \
 23 | { gpuAssert((ans), __FILE__, __LINE__); }
 24 | inline void gpuAssert(dpct::err0 code, const char *file, int line,
 25 |     bool abort = true) {
 26 | }
 27 | 
 28 | namespace simulation {
 29 | 
 30 |   const float PI = 3.14159265358979323846;
 31 | 
 32 |   typedef float coords_t;
 33 | 
 34 |   struct vec3 {
 35 |     coords_t x = 0.0;
 36 |     coords_t y = 0.0;
 37 |     coords_t z = 0.0;
 38 | 
 39 |     HOSTDEV vec3() {};
 40 |     HOSTDEV vec3(coords_t x_, coords_t y_, coords_t z_)
 41 |       : x{x_}, y{y_}, z{z_} {}
 42 | 
 43 |     HOSTDEV inline vec3 &operator+=(const vec3 &rhs) {
 44 |       x += rhs.x;
 45 |       y += rhs.y;
 46 |       z += rhs.z;
 47 |       return *this;
 48 |     }
 49 | 
 50 |     HOSTDEV inline vec3 &operator*=(const coords_t &scale) {
 51 |       x *= scale;
 52 |       y *= scale;
 53 |       z *= scale;
 54 |       return *this;
 55 |     }
 56 |   };
 57 | 
 58 |   HOSTDEV inline const vec3 operator*(const vec3 &pos, const coords_t &scale) {
 59 |     return {pos.x * scale, pos.y * scale, pos.z * scale};
 60 |   }
 61 | 
 62 |   HOSTDEV inline const vec3 operator-(const vec3 &vec1, const vec3 &vec2) {
 63 |     return {vec1.x - vec2.x, vec1.y - vec2.y, vec1.z - vec2.z};
 64 |   }
 65 | 
 66 |   HOSTDEV inline coords_t dot(const vec3 &vec1, const vec3 &vec2) {
 67 |     return vec1.x * vec2.x + vec1.y * vec2.y + vec1.z * vec2.z;
 68 |   }
 69 | 
 70 |   struct ParticleData {
 71 |     std::vector<coords_t> x;
 72 |     std::vector<coords_t> y;
 73 |     std::vector<coords_t> z;
 74 | 
 75 |     ParticleData(std::vector<coords_t> x_, std::vector<coords_t> y_,
 76 |         std::vector<coords_t> z_)
 77 |       : x(std::move(x_)), y(std::move(y_)), z(std::move(z_)){};
 78 |     ParticleData(size_t n) : x(n, 0.0), y(n, 0.0), z(n, 0.0){};
 79 |   };
 80 | 
 81 |   // Simply holds 3 coords_t* as a SoA
 82 |   struct ParticleData_d {
 83 |     coords_t *x = nullptr;
 84 |     coords_t *y = nullptr;
 85 |     coords_t *z = nullptr;
 86 | 
 87 |     ParticleData_d(size_t n) {
 88 |       dpct::device_ext &dev_ct1 = dpct::get_current_device();
 89 |       sycl::queue &q_ct1 = dev_ct1.default_queue();
 90 |       // Allocate device memory for particle coords & velocity...
 91 |       /*
 92 | DPCT1003:1: Migrated API does not return error code. (*, 0) is
 93 | inserted. You may need to rewrite this code.
 94 |        */
 95 |       gpuErrchk((x = sycl::malloc_device<coords_t>(n, q_ct1), 0));
 96 |       /*
 97 | DPCT1003:2: Migrated API does not return error code. (*, 0) is
 98 | inserted. You may need to rewrite this code.
 99 |        */
100 |       gpuErrchk((y = sycl::malloc_device<coords_t>(n, q_ct1), 0));
101 |       /*
102 | DPCT1003:3: Migrated API does not return error code. (*, 0) is
103 | inserted. You may need to rewrite this code.
104 |        */
105 |       gpuErrchk((z = sycl::malloc_device<coords_t>(n, q_ct1), 0));
106 |     };
107 |   };
108 | 
109 |   HOSTDEV coords_t length(const vec3 v);
110 |   HOSTDEV vec3 cross(const vec3 v0, const vec3 v1);
111 |   HOSTDEV vec3 normalize(const vec3 v);
112 | 
113 |   /*
114 |      Interface class for Simulator
115 |    */
116 |   class Simulator {
117 |     public:
118 |       virtual void stepSim() = 0;
119 |       virtual size_t getNumParticles() = 0;
120 |       virtual const ParticleData &getParticlePos() = 0;
121 |       virtual const ParticleData &getParticleVel() = 0;
122 |       virtual float getLastStepTime() = 0;
123 |       virtual const std::string* getDeviceName() = 0;
124 |       virtual CalculationMethod getCM() = 0;
125 |   };
126 | 
127 |   /*
128 |      DiskGalaxySimulator class to handle execution of the nbody simulation.
129 | 
130 |      Regular data transfer only occurs in the device->host direction (from
131 |      Simulator to Renderer).
132 | 
133 | Invariants:
134 | - Has params
135 | - Has valid particle positions & velocities, allocated on host & device
136 |    */
137 | 
138 |   class DiskGalaxySimulator : public Simulator {
139 |     public:
140 |       DiskGalaxySimulator(SimParam params_);
141 | 
142 |       void stepSim();
143 |       float getLastStepTime() { return lastStepTime; }
144 |       size_t getNumParticles() { return params.numParticles; }
145 |       const ParticleData &getParticlePos();
146 |       const ParticleData &getParticleVel();
147 |       const std::string* getDeviceName();
148 |       int getGwSize() { return params.gwSize; }
149 |       CalculationMethod getCM() { return params.calcMethod; }
150 | 
151 |     private:
152 |       SimParam params;
153 |       std::string devName;
154 |       float lastStepTime{0.0};
155 | 
156 |       // Data for particle positions & vel on host
157 |       ParticleData pos;
158 |       ParticleData vel;
159 | 
160 |       // and on device
161 |       ParticleData_d pos_d;
162 |       ParticleData_d pos_next_d;  // double buffering
163 |       ParticleData_d vel_d;
164 | 
165 |       void randomParticlePos();
166 |       void initialParticleVel();
167 |       void sendToDevice();
168 |       void recvFromDevice();
169 |   };
170 | 
171 | }  // namespace simulation
172 | 


--------------------------------------------------------------------------------