├── .clang-format
├── .github
├── dependabot.yml
└── workflows
│ └── scorecard.yml
├── .gitignore
├── .vscode
├── launch.json
└── tasks.json
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── cmake
├── FindGLEW.cmake
├── FindGLFW.cmake
├── FindGLM.cmake
├── FindVulkan.cmake
└── Finddpct.cmake
├── docs
├── Eq1.png
└── downscale_artefact.png
├── libs
└── imgui
│ ├── CMakeLists.txt
│ ├── include
│ ├── imconfig.h
│ ├── imgui.h
│ ├── imgui_impl_glfw.h
│ ├── imgui_impl_opengl3.h
│ ├── imgui_impl_opengl3_loader.h
│ ├── imgui_internal.h
│ ├── imstb_rectpack.h
│ ├── imstb_textedit.h
│ └── imstb_truetype.h
│ └── src
│ ├── imgui.cpp
│ ├── imgui_demo.cpp
│ ├── imgui_draw.cpp
│ ├── imgui_impl_glfw.cpp
│ ├── imgui_impl_opengl3.cpp
│ ├── imgui_tables.cpp
│ └── imgui_widgets.cpp
├── scripts
├── build_cuda.sh
├── build_dpcpp.sh
├── docker_build_etc.sh
├── perf_test.sh
├── perf_test_cuda.sh
├── perf_test_dpcpp.sh
├── run_dpct.sh
├── run_dpct_native.sh
├── run_nbody.sh
└── xvfb.sh
├── shaders
└── gl
│ ├── blur.frag
│ ├── deferred.vert
│ ├── integration.comp
│ ├── interaction.comp
│ ├── luminance.frag
│ ├── main.frag
│ ├── main.geom
│ ├── main.vert
│ └── tonemap.frag
├── src
├── CMakeLists.txt
├── camera.cpp
├── camera.hpp
├── gen.cpp
├── gen.hpp
├── nbody.cpp
├── renderer.hpp
├── renderer_gl.cpp
├── renderer_gl.hpp
├── shader.cpp
├── shader.hpp
├── sim_param.cpp
├── sim_param.hpp
├── simulator.cu
└── simulator.cuh
└── src_sycl
├── CMakeLists.txt
├── README.md
├── camera.cpp
├── camera.hpp
├── gen.cpp
├── gen.hpp
├── nbody.cpp
├── renderer.hpp
├── renderer_gl.cpp
├── renderer_gl.hpp
├── shader.cpp
├── shader.hpp
├── sim_param.cpp
├── sim_param.hpp
├── simulator.dp.cpp
└── simulator.dp.hpp
/.clang-format:
--------------------------------------------------------------------------------
1 | {BasedOnStyle: Google, IndentWidth: 3, ColumnLimit: 80, NamespaceIndentation: All, AlignTrailingComments: true}
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | # Enable version updates for Github Actions
4 | - package-ecosystem: "github-actions"
5 | directory: "/"
6 | schedule:
7 | interval: "monthly"
8 | groups:
9 | github-actions:
10 | patterns:
11 | - "*"
12 | reviewers:
13 | - "codeplaysoftware/security-managers"
14 |
--------------------------------------------------------------------------------
/.github/workflows/scorecard.yml:
--------------------------------------------------------------------------------
1 | # Scorecards' GitHub action
2 |
3 | name: Scorecard supply-chain security
4 | on:
5 | # For Branch-Protection check. Only the default branch is supported. See
6 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
7 | branch_protection_rule:
8 | schedule:
9 | - cron: '15 18 * * 5'
10 | push:
11 | branches: [ "main" ]
12 |
13 | # Declare default permissions as read only.
14 | permissions: read-all
15 |
16 | jobs:
17 | analysis:
18 | name: Scorecard analysis
19 | runs-on: ubuntu-latest
20 | permissions:
21 | # Needed to upload the results to code-scanning dashboard.
22 | security-events: write
23 | # Needed to publish results and get a badge (see publish_results below).
24 | id-token: write
25 |
26 | steps:
27 | - name: "Checkout code"
28 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
29 | with:
30 | persist-credentials: false
31 |
32 | - name: "Run analysis"
33 | uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0
34 | with:
35 | results_file: results.sarif
36 | results_format: sarif
37 | publish_results: true
38 |
39 | # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
40 | # format to the repository Actions tab.
41 | - name: "Upload artifact"
42 | uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
43 | with:
44 | name: SARIF file
45 | path: results.sarif
46 | retention-days: 5
47 |
48 | # Upload the results to GitHub's code scanning dashboard (optional).
49 | # Commenting out will disable upload of results to your repo's Code Scanning dashboard
50 | - name: "Upload to code-scanning"
51 | uses: github/codeql-action/upload-sarif@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0
52 | with:
53 | sarif_file: results.sarif
54 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build*
2 | nbodygl
3 | nbodyvk
4 | .cache
5 | *~
6 | settings.json
7 | **/dpct-output/
8 | \#*
9 | *mp4
10 | lib/**cpp
11 | lib/**h
12 | *fatbin*
13 | *.ptx
14 | log*
15 | .vscode
16 | nbody_dpcpp
17 | nbody_cuda
18 | nbody_cuda_d
19 | nbody_dpcpp_d
20 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "configurations": [
3 | {
4 | "name": "DEBUG: (gdb-oneapi) nbody_dpcpp_d Launch",
5 | "type": "cppdbg",
6 | "request": "launch",
7 | "preLaunchTask": "Debug C/C++: DPCPP Makefile",
8 | "postDebugTask": "",
9 | "program": "${workspaceFolder}/nbody_dpcpp_d",
10 | "args": ["50", "4", "0.999998", "0.005", "1.0e-7", "2.0", "100000"],
11 | "stopAtEntry": true,
12 | "cwd": "${workspaceFolder}",
13 | "environment": [
14 | {
15 | "name": "ZET_ENABLE_PROGRAM_DEBUGGING",
16 | "value": "1"
17 | },
18 | {
19 | "name": "IGC_EnableGTLocationDebugging",
20 | "value": "1"
21 | }
22 | ],
23 | "externalConsole": false,
24 | "MIMode": "gdb",
25 | "miDebuggerPath": "gdb-oneapi",
26 | "setupCommands": [
27 | {
28 | "description": "Enable pretty-printing for gdb",
29 | "text": "-enable-pretty-printing",
30 | "ignoreFailures": true
31 | },
32 | {
33 | "description": "Disable target async",
34 | "text": "set target-async off",
35 | "ignoreFailures": true
36 | },
37 | {
38 | "description": "Do not display function arguments when printing a stack frame",
39 | "text": "set print frame-arguments none",
40 | "ignoreFailures": true
41 | }
42 | ]
43 | },
44 | {
45 | "name": "DEBUG: (cuda-gdb) nbody_cuda_d Launch",
46 | "type": "cuda-gdb",
47 | "request": "launch",
48 | "preLaunchTask": "Debug C/C++: CUDA Makefile",
49 | "postDebugTask": "",
50 | "program": "${workspaceFolder}/nbody_cuda_d",
51 | "args": "50 4 0.999998 0.005 1.0e-7 2.0 100000",
52 | "stopAtEntry": true,
53 | "cwd": "${workspaceFolder}",
54 | }
55 | ]
56 | }
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0.0",
3 | "tasks": [
4 | {
5 | "type": "cppbuild",
6 | "label": "Debug C/C++: CUDA Makefile",
7 | "command": "make",
8 | "args": [
9 | "debug",
10 | ],
11 | "options": {
12 | "cwd": "${workspaceFolder}/build_cuda"
13 | },
14 | "problemMatcher": [
15 | "$gcc"
16 | ],
17 | "group": "build",
18 | "detail": "compiler: INTEL oneapi icpx"
19 | },
20 | {
21 | "type": "cppbuild",
22 | "label": "Release C/C++: CUDA Makefile",
23 | "command": "make",
24 | "args": [
25 | "release"
26 | ],
27 | "options": {
28 | "cwd": "${workspaceFolder}/build_cuda"
29 | },
30 | "problemMatcher": [
31 | "$gcc"
32 | ],
33 | "group": "build",
34 | "detail": "compiler: INTEL oneapi icpx"
35 | },
36 | {
37 | "type": "cppbuild",
38 | "label": "Release C/C++: DPCPP Makefile",
39 | "command": "make",
40 | "args": [
41 | "release"
42 | ],
43 | "options": {
44 | "cwd": "${workspaceFolder}build_dpcpp"
45 | },
46 | "problemMatcher": [
47 | "$gcc"
48 | ],
49 | "group": "build",
50 | "detail": "compiler: INTEL oneapi icpx"
51 | },
52 | {
53 | "type": "cppbuild",
54 | "label": "Debug C/C++: DPCPP Makefile",
55 | "command": "make",
56 | "args": [
57 | "debug"
58 | ],
59 | "options": {
60 | "cwd": "${workspaceFolder}/build_dpcpp"
61 | },
62 | "problemMatcher": [
63 | "$gcc"
64 | ],
65 | "group": "build",
66 | "detail": "compiler: INTEL oneapi icpx"
67 | }
68 | ]
69 | }
70 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2016 - 2018 Sarah Le Luron
2 | # Copyright (C) 2022 Codeplay Software Limited
3 |
4 | cmake_minimum_required (VERSION 3.16)
5 |
6 | project (nbody LANGUAGES CXX)
7 |
8 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
9 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
10 |
11 | set(BACKEND "CUDA" CACHE STRING "Which backend to build")
12 | option(RENDER "Use openGl or not" ON)
13 |
14 | if(BACKEND STREQUAL "CUDA")
15 | set(BINARY_NAME "nbody_cuda" CACHE STRING "Binary name")
16 | enable_language(CUDA)
17 | add_subdirectory(src)
18 | elseif(BACKEND STREQUAL "DPCPP")
19 | set(BINARY_NAME "nbody_dpcpp" CACHE STRING "Binary name")
20 | add_subdirectory(src_sycl)
21 | else()
22 | message(FATAL_ERROR "Unrecognized BACKEND")
23 | endif()
24 |
25 | if(RENDER)
26 | add_subdirectory(libs/imgui)
27 | endif()
28 |
29 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression
9 | , level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at
59 | [sycl@codeplay.com](mailto:sycl@codeplay.com). All complaints will be reviewed
60 | and investigated and will result in a response that is deemed necessary and
61 | appropriate to the circumstances. The project team is obligated to maintain
62 | confidentiality with regard to the reporter of an incident. Further details of
63 | specific enforcement policies may be posted separately.
64 |
65 | Project maintainers who do not follow or enforce the Code of Conduct in good
66 | faith may face temporary or permanent repercussions as determined by other
67 | members of the project's leadership.
68 |
69 | ## Attribution
70 |
71 | This Code of Conduct is adapted from the
72 | [Contributor Covenant](https://www.contributor-covenant.org/), version 1.4,
73 | available at
74 | https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
75 |
76 | ---
77 |
78 | If there are any issues or suggestions relating to the current set of rules, you
79 | can reach us at [sycl@codeplay.com](mailto:sycl@codeplay.com).
80 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright for portions of project 'nbody' are held by Sarah Le Luron, 2016-2018 as part of project 'dpct-nbody'. All other copyright for project 'dpct-nbody' are held by Codeplay Software Limited, 2022.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # nbody
2 |
3 | [](https://scorecard.dev/viewer/?uri=github.com/codeplaysoftware/cuda-to-sycl-nbody)
4 |
5 | Accelerated N-body sim with OpenGL graphics & automatic CUDA->SYCL conversion using [dpct](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html).
6 |
7 | 
8 |
9 | Forked from https://github.com/salel/nbody
10 |
11 | ## Compilers/Backends
12 |
13 | This nbody simulation can be run with any of:
14 | - CUDA
15 | - DPC++ CUDA backend
16 | - DPC++ OpenCL CPU backend
17 |
18 | Source code for the CUDA version is in `./src/` while `./src_sycl/` contains the semi-automatically converted SYCL code.
19 |
20 | ## Build Dependencies
21 |
22 | ### Graphics Dependencies
23 |
24 | By default the build requieres OpenGL. See the **Building** section below to build without rendering.
25 |
26 | The rendering components of this code are independent of the CUDA/SYCL backend, and depend on:
27 | - GLM
28 | - GLFW
29 | - GLEW
30 |
31 | These can be installed with apt:
32 | ```
33 | sudo apt update
34 | sudo apt install libglew-dev libglfw3-dev libglm-dev libxxf86vm-dev libxcursor-dev libxinerama-dev libxi-dev
35 | ```
36 |
37 | The implementation relies on OpenGL 4.5.
38 |
39 | ### Simulation Dependencies (CUDA & SYCL)
40 |
41 | The CUDA version of this code requires the [CUDA runtime](https://intel.github.io/llvm-docs/GetStartedGuide.html#build-dpc-toolchain-with-support-for-nvidia-cuda) to be installed on your machine.
42 |
43 | The DPC++ CUDA backend version also requires the CUDA runtime.
44 |
45 | The DPC++ OpenCL backend requires an [OpenCL runtime](https://intel.github.io/llvm-docs/GetStartedGuide.html#install-low-level-runtime). To run specifically on the CPU, you must install the OpenCL runtime for your CPU.
46 |
47 | Both DPC++ backends require the [DPC++ compiler](https://intel.github.io/llvm-docs/GetStartedGuide.html) to compile the SYCL code.
48 |
49 | ## Building
50 |
51 | This project uses CMake for build configuration. Build scripts for CUDA and DPC++ are located in `./scripts/`. Note that these scripts include some hardcoded paths from our dev machine, and so will not work out-the-box.
52 |
53 | The CMake option `-DBACKEND` allows to select which backend ("CUDA" or "DPCPP") to build. CUDA is built by default. The name of the built binary is suffixed with the backend (`nbody_cuda` or `nbody_dpcpp`).
54 |
55 | The DPC++ backend, in turn, supports both an OpenCL & CUDA backend, both of which are built by default. If you are building on a machine without CUDA support, you can switch off the DPC++ CUDA backend with the flag `-DDPCPP_CUDA_SUPPORT=off`.
56 |
57 | The build scripts create a version that includes rendering. To build versions that do not require OpenGL, provide the argument **no_render** to the build scripts.
58 |
59 | By default, a **release** target is built, for example, `nbody_cuda`. To build a debug version, navigate to the build directory and execute **make debug**. Running **make** will build both versions. The debug binary will share the same name as the **release** version with "_d" appended.
60 |
61 | The provided `tasks.json` and `launch.json` configuration files for vscode serve as examples, demonstrating how to initiate a debug session directly from within vscode.
62 |
63 | ## Migrating CUDA to SYCL
64 |
65 | The script `./scripts/run_dpct.sh` calls a containerized version of the Intel® DPC++ Compatibility Tool to automatically convert the CUDA components of this project into SYCL. A docker container was used because the dev machine has an incompatible version of the CUDA driver. This should be adapted based on your environment.
66 |
67 | The Intel® DPC++ compatibility tool offers options for intercepting complex builds, but current dev environment restrictions require me to run the tool inside a docker container. This complicates things, so for now I'm just doing single source conversion on the simulator.cu file.
68 |
69 | ## Running on different platforms
70 |
71 | The script `./scripts/run_nbody.sh` will run the nbody simulation, selecting a different binary based on the `-b` flag, where `-b` can be `cuda` or `dpcpp`. Subsequent positional arguments are passed on to the `nbody` binary. These positions args are described in the [Simulation](#Simulation) section. For example, to run on the DPC++ OpenCL host backend with 25600 (100 * 256) particles, executing 10 timesteps per rendered frame:
72 |
73 | ```
74 | ./scripts/run_nbody.sh -b dpcpp 100 10
75 | ```
76 |
77 | Note that this script runs `nbody` with the default X window, as opposed to using [xvfb](#Running-headless). This makes it unsuitable for running on a remote machine.
78 |
79 | `run_nbody.sh` is a simple wrapper around the `nbody_*` binaries with some environment variables set; the sections below describe how to launch the binaries directly.
80 |
81 | ### Detecting available SYCL backends
82 |
83 | The `sycl-ls` tool allows you to check for available backends on the system. For example, on a system with Intel OpenCL CPU runtime & CUDA runtime, the output is:
84 |
85 | ```
86 | > sycl-ls
87 | [opencl:cpu:0] Intel(R) OpenCL, Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz 3.0 [2021.13.11.0.23_160000]
88 | [opencl:cpu:1] Intel(R) OpenCL, Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz 3.0 [2021.13.11.0.23_160000]
89 | [cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA GeForce RTX 3060 0.0 [CUDA 11.6]
90 | [host:host:0] SYCL host platform, SYCL host device 1.2 [1.2]
91 | ```
92 |
93 | ### Selecting a backend (DPC++)
94 |
95 | By specifying the environment variable `SYCL_DEVICE_FILTER`, it's possible to switch between running with the CUDA backend and the OpenCL host backend. For example:
96 |
97 | ```
98 | SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp
99 | ```
100 | will run on the CUDA backend, whereas:
101 | ```
102 | SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp
103 | ```
104 | will run on a CPU through the OpenCL backend. Note the correspondence between options for `SYCL_DEVICE_FILTER` and the output of `sycl-ls`.
105 |
106 | **Note**: Selection between DPC++ backends at runtime is possible because `CMakeLists.txt` specifies building the SYCL code for both CUDA (`nvptx64-nvidia-cuda`) & OpenCL (`spir64`) targets:
107 | ```
108 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -fsycl-targets=spir64,nvptx64-nvidia-cuda -fsycl-unnamed-lambda")
109 | ```
110 |
111 | ### Adapting the project for DPC++ OpenCL
112 |
113 | No changes to the code were required, but there were a couple of bugs which are worked around.
114 |
115 | Firstly, when building for multiple targets (`-fsycl-targets`), there is a [recent bug](https://github.com/intel/llvm/issues/5330) which causes failure to link to static libraries. The workaround for this is to switch from building `imgui` as a static to a shared library.
116 |
117 | Secondly, I encountered the common CL header bug (see [here](https://github.com/intel/llvm/issues/2617) and [here](https://github.com/oneapi-src/oneDNN/issues/885)). This turned out to be triggered for the `spir64` backend because the CUDA headers were included *only* via `-I` and not via `-internal-isystem`. This caused them to take precedence over SYCL CL headers. The solution was to not include CUDA headers in `src_sycl/CMakeLists.txt`, which turned out to be unnecessary anyway.
118 |
119 | ## Passing data between OpenGL & CUDA/SYCL
120 |
121 | OpenGL & CUDA are capable of interoperating to share device memory, but this will not play well with the Intel® DPC++ Compatibility Tool. Instead, computed particle positions are migrated back to the host by CUDA/SYCL, then sent *back* to OpenGL via mapping.
122 |
123 |
124 | ## Simulation
125 |
126 | The `DiskGalaxySimulator` class handles the physics of n-body interaction. The computation of interparticle forces, velocity & updated particle positions are handled by the CUDA kernel `particle_interaction`.
127 |
128 | The equation solved by this code is equivalent to Eq. 1 [here](http://www.scholarpedia.org/article/N-body_simulations_(gravitational)), with the simplifying assumption that all particles have unit mass and there is no external/background force. This becomes:
129 |
130 | 
131 |
132 | The force vector on each particle (F) is the sum of gravitational forces from all other particles. For each particle interaction, the attractive force is inversely proportional to the square distance between them. This force is equal to the gravitational constant (`G`) multiplied by the unit vector pointing between the particles, divided by the square of this distance. The equation above has this last term slightly rearranged to avoid unnecessary computation.
133 |
134 | Given the assumption of unit mass, the force (F) is equal to the acceleration, and so at each timestep, the force vector F is multiplied by the timestep size (`dt`) and added to the velocity vector. The position of each particle is then updated by the velocity multiplied by the timestep size (`dt`).
135 |
136 | A drag factor (`damping`) is used to regulate the velocity. At each timestep, the velocity is multiplied by the drag term, slowing the particles. The maximum force between very close particles is also limited for stability; this is achieved via an epsilon term (`distEps`) which is added to the distance between each particle pairing.
137 |
138 | The `parameters` described in this section can all be adjusted via command line arguments, as follows:
139 |
140 | `./nbody_cuda numParticles simIterationsPerFrame damping dt distEps G numFrames gwSize calcMethod`
141 |
142 | Note that `numParticles` specifies the number of particles simulated, divided by blocksize (i.e. setting `numParticles` to 50 produces 50*256 particles). `simIterationsPerFrame` specifies how many steps of the simulation to take before rendering the next frame and `numFrames` specifies the total number of simulation steps before the program exits. For default values for all of these parameters, refer to `sim_param.cpp`.
143 |
144 | `gwSize`: This parameter allows changing the work group size from the default 64.
145 |
146 | `calcMethod`: This string parameter, with a default value of BRANCH, selects branch instruction code. If set to PREDICATED, it uses an arithmetic expression. Refer to the [performance](#sycl-vs-cuda-performance) section for details.
147 |
148 |
149 | ### Modifying Simulation Behaviour
150 |
151 | You can get quite a wide range of 'galactic' behaviours by playing with the parameters described above.
152 |
153 | Initial velocity of stars is a stable orbital velocity, computed with an implicit value for gravity of `G = 1`. The default value *during* the simulation, however, is `G = 2`. So by default the galaxy collapses inwards quite quickly, but by reducing G closer to 1, you can make a more stable, rotating galaxy.
154 |
155 | The `damping` factor is a drag term. By default `damping = 0.999998` but by reducing this value to e.g. `0.999`, stars will tend to form local clusters before collapsing in towards the galactic centre.
156 |
157 | `distEps` serves as a stabilising parameter to prevent numerical instability at larger timestep sizes. Setting this value very small (`1.0e-10`) will produce more 'explosive' simulations. This is unrealistic for n-body gravitational interaction, but it looks dramatic.
158 |
159 | If you want to speed up the evolution of the galaxy, set a larger timestep size (`dt`) or increase the number of steps taken per frame (`simIterationsPerFrame`). Either change will increase the total simulation time per rendered frame. If you reach a sufficiently high timestep size that you get unstable explosive behaviour, increase the value of `distEps` and this should stabilise things. Note that there is a separate discussion [below](#performance-scaling-for-demos) about altering the ratio of compute/render time to, for instance, visually highlight a performance difference between platforms.
160 |
161 | ## Graphics Pipeline
162 |
163 | ### Rendering
164 | Render targets for all passes except the last use dimensions a bit larger than the window, to prevent popping. This is used when some effects affect neighboring pixels (bloom, ssao..) and must be taken into account even when off-screen.
165 | #### HDR
166 | Each particle is rendered as a fixed-size flare, generated from a gaussian. Particle color depends on velocity, blue at low speeds and purple at high speeds. Additive blending is set, so dense regions look bright. The render target is RGBA16F, because GL_R11F_G11F_B10F looks yellow on subsequent render passes.
167 |
168 | #### Bloom
169 |
170 | Bloom is applied through a separable Gaussian blur, applied once in the horizontal and then the vertical direction. The 1D Gaussian kernel is computed by `RendererGL::gaussKernel` and optimized to minimize texel lookups by `RendererGL::optimGaussKernel` following [this guide](https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/). At present, a gaussian window of 49 pixels with sigma = 10.0 is used. Multiple passes are possible (ping pong between two RGBA16F frame buffers), though at present we execute only one blur in each direction.
171 |
172 | Note that unlike typical bloom processing, there is no extraction of bright light sources prior to blurring, because the scene (bright stars on a dark background) makes this obsolete.
173 |
174 | The appearance & performance of the blur is controlled by four variables which are not currently exposed as arguments to `nbody_[backend]` but which could be manually modified as desired. The two arguments to the `gaussKernel` function (`sigma` and `halfwidth`) effectively define the 'spread' of the blur. Higher values for `sigma` result in wider blurring, whereas `halfwindow` defines the actual width of the pixel window which is sampled. Higher values of `halfwindow` will decrease performance, as more texel lookups are required. As a general rule, when increasing `sigma`, it will likely be necessary to increase `halfwindow` to avoid an obvious visual cut-off at the edge of the window. Conversely, a wide `halfwindow` with a small `sigma` reduces performance unnecessarily, because texels with negligible contribution will be sampled.
175 |
176 | Blur downscaling is a common technique to improve blur performance; the image is downsampled by the factor `blur_dsc` defined in `renderer_gl.cpp`, then the regular blur filter is performed, and finally the image is upscaled again. This is a very cheap way of enhancing the blur effect, but there is an associated artefact:
177 |
178 | 
179 |
180 | If this artefact is unacceptable, set `blur_dsc = 1` to turn off downscaling. Note however that this will significantly reduce the blurriness, and compensating with wider `halfwindow` or more passes (see below) will cost a lot of rendering time.
181 |
182 | Enhanced blurring can also be achieved by executing multiple passes. This is controlled by `nPasses`, and is set to 1 by default. Due to the dominance of blur in the render pipeline, total rendering time should scale pretty much linearly with `nPasses`, so increasing it is a potentially expensive option.
183 |
184 | #### Average luminance
185 | The average luminance of the scene is computed from the HDR target into a downscaled R16F target. Then we generate mipmaps to obtain the average luminance on the smallest mipmap (1x1). (Could also be obtained from a 2x2 texture but screen-size targets always seem to resolve down to odd dimensions)
186 |
187 | #### Tonemapping & gamma correction
188 | The exposure of the final render is obtained from the average luminance, and the HDR and Bloom targets are combined and converted to LDR. Gamma correction is also applied. Tada.
189 |
190 | ## Running headless
191 |
192 | If you run `nbody_cuda` on a remote machine with X-forwarding, sending the rendered frames across the net will be a significant bottleneck. This can be worked around by making use of [Xvfb](https://linux.die.net/man/1/xvfb) which provides a *virtual* X display. You can then read from the memory mapped file to write to e.g. MP4 output.
193 |
194 | The script `./scripts/xvfb.sh` runs `nbody_cuda` in this manner, producing a video file `output.mp4`. Note that this script will run the simulation until manually terminated.
195 |
196 | ## Performance Scaling for Demos
197 |
198 | We've previously discussed the desire for a simulation which is *visibly* slower when the physics kernel isn't well optimized. With current default settings, the rendering takes longer (~55ms) than the simulation (10ms). However, altering three of the simulation parameters provides almost complete control of the ratio of render to simulation time.
199 |
200 | Firstly, the number of particles (`numParticles` [above](#Simulation)) has a large effect on the simulation time, as the computation scales with O(n2). By default, 12.8k particles (50 * 256) are rendered, but increasing this to 64k particles (250 * 256), the simulation time increases from 10ms to ~170ms.
201 |
202 | Alternatively, simulation time can be arbitrarily raised or lowered by changing both timestep size (`dt` [above](#Simulation)) and simulation steps per rendered frame (`simIterationsPerFrame`, [above](#Simulation)). By default, a timestep size of 0.005 is used, and 4 simulation steps are taken per rendered frame (Note that `scripts/xvfb.sh` overrides these default values with `dt = 0.001` and `simIterationsPerFrame = 5`).
203 |
204 | To increase the simulation time by a factor of 5, for example, simply divide `dt` by 5 and multiply `simIterationsPerFrame` by 5. This will produce *almost* identical output. Take care with *increasing* `dt` to get the opposite effect; above a certain value, the simulation will become unstable & you may see this manifest as unphysical behaviour (very fast moving stars exploding out from the centre). Instability at large `dt` can be mitigated, to an extent, by increasing `distEps` or `damping`.
205 |
206 | A significant portion of the rendering time is the bloom filter. The [bloom](#Bloom) section has some tips about how to control this.
207 |
208 | ## SYCL vs. CUDA performance
209 |
210 | This repo previously reported *faster* performance from SYCL than CUDA, but this was due to an erroneous translation in the Intel® DPC++ Compatibility Tool from `__frsqrt_rn` to `sycl::rsqrt`. The former has higher precision and runs slower than the latter. This has now been rectified so that the original CUDA code calls `rsqrt`.
211 |
212 | With this bug rectified, and without any further modification to the CUDA code or migrated SYCL code, the SYCL code used to be considerably slower because the Intel® DPC++ Compatibility Tool used to insert a cast to double in the rsqrt call:
213 |
214 | ```
215 | coords_t inv_dist_cube =
216 | sycl::rsqrt((double)dist_sqr * dist_sqr * dist_sqr);
217 |
218 | ```
219 |
220 | This was presumably because the tool was unaware of the equivalence of `rsqrt` and `sycl::rsqrt`. However, inspecting PTX reveals that the generated instructions are the same, so the cast to double is unnecessary. Removing the cast to double leaves a 40% performance gap between CUDA and SYCL. This is no longer necessary as newer versions of the Intel® DPC++ Compatibility Tool no longer insert the cast.
221 |
222 | The root cause of this 40% performance gap appears to be different handling of the branch instruction:
223 |
224 | ```
225 | if (i == id) continue;
226 | ```
227 | in the main loop in simulation.dp.cpp. Whereas NVCC handles this via instruction predication, DPC++ generates branch & sync instructions. By replacing this branch instruction with an arithmetic expression:
228 |
229 | ```
230 | force += r * inv_dist_cube * (i != id);
231 | ```
232 | in both the CUDA & SYCL code, we get comparable performance between the two using our hardware set up (RTX 3060). For 5 steps of the physical simulation (1 rendered frame) with 12,800 particles, both CUDA and SYCL take ~5.05ms (RTX 3060).
233 |
234 | ## Update 2024
235 |
236 | The ability to execute the nbody code without rendering simplified the process of running the code on different platforms. The results of these executions have brought to light some issues related to the runtime and compilers. As stated before, the original code was modified by substituting:
237 |
238 | ```
239 | // Original code
240 | if (i == id) continue;
241 |
242 | force += r * inv_dist_cube;
243 | ```
244 |
245 | with
246 |
247 | ```
248 | // Modified code
249 | force += r * inv_dist_cube * (i != id);
250 | ```
251 |
252 | in order to address the 40% decrease in SYCL performance compared to the CUDA code. With this change, the performance was almost the same for both compilers in RTX 3060.
253 |
254 | We have found that while this is the case for the A100 (CUDA 8.48516 ms vs. SYCL 8.23865 ms), it is not the same on the RTX 2060, where CUDA is heavily penalized (CUDA 10.7281 ms vs. SYCL 8.52349 ms). Even on the A100, the change lowered the CUDA performance (7.95778 ms for the original code).
255 |
256 | The code change also greatly improved the performance by 100% on the MAX 1100 GPU, dropping from 21.6555 ms to 10.7633 ms.
257 | Below are the best results from executing the code on the three different platforms.
258 |
259 | ```
260 | [ext_oneapi_cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA GeForce RTX 2060 7.5 [CUDA 12.3]
261 | ==================== WORK GROUP SIZE 512 BRANCH ========================
262 | CUDA - At step 10000 kernel time is 8.48516 and mean is 8.53952 and stddev is: 0.0884324
263 | DPC - At step 10000 kernel time is 8.23865 and mean is 8.30511 and stddev is: 0.0788344
264 | ==================== WORK GROUP SIZE 512 PREDICATED ====================
265 | CUDA - At step 10000 kernel time is 10.7281 and mean is 10.7601 and stddev is: 0.0630959
266 | DPC - At step 10000 kernel time is 8.52349 and mean is 8.5992 and stddev is: 0.078034
267 |
268 | [ext_oneapi_cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA A100-PCIE-40GB 8.0 [CUDA 12.2]
269 | ==================== WORK GROUP SIZE 128 BRANCH ========================
270 | CUDA - At step 10000 kernel time is 7.95778 and mean is 7.95753 and stddev is: 0.000680384
271 | DPC - At step 10000 kernel time is 10.051 and mean is 10.0506 and stddev is: 0.00181166
272 | ==================== WORK GROUP SIZE 128 PREDICATED ====================
273 | CUDA - At step 10000 kernel time is 8.60294 and mean is 8.60151 and stddev is: 0.00077172
274 | DPC - At step 10000 kernel time is 7.99054 and mean is 7.99109 and stddev is: 0.0041852
275 |
276 | [ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Data Center GPU Max 1100 1.3 [1.3.26516]
277 | ==================== WORK GROUP SIZE 32 BRANCH ========================
278 | At step 10000 kernel time is 21.5747 and mean is 21.6555 and stddev is: 0.0734683
279 | ==================== WORK GROUP SIZE 32 PREDICATED ====================
280 | At step 10000 kernel time is 10.6649 and mean is 10.7633 and stddev is: 0.0507969
281 | ```
282 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Reporting a Vulnerability
4 |
5 | To report a vulnerability or a security issue please fill the security
6 | advisories form [here](../../security/advisories/new), send an email to
7 | security@codeplay.com or contact us using the [contact form on our web
8 | page](https://codeplay.com/company/contact/?q=Report%20Security%20Issue).
9 |
--------------------------------------------------------------------------------
/cmake/FindGLEW.cmake:
--------------------------------------------------------------------------------
1 | #
2 |
3 | # Try to find GLEW library and include path.
4 | # Once done this will define
5 | #
6 | # GLEW_FOUND
7 | # GLEW_INCLUDE_DIR
8 | # GLEW_LIBRARY
9 | # GLEW_SOURCE
10 | #
11 |
12 | include(FindPackageHandleStandardArgs)
13 |
14 | if (WIN32)
15 | find_path( GLEW_INCLUDE_DIR
16 | NAMES
17 | GL/glew.h
18 | PATHS
19 | ${GLEW_LOCATION}/include
20 | $ENV{GLEW_LOCATION}/include
21 | $ENV{PROGRAMFILES}/GLEW/include
22 | ${PROJECT_SOURCE_DIR}/extern/glew/include
23 | ${GLEW_LOCATION}
24 | $ENV{GLEW_LOCATION}
25 | DOC "The directory where GL/glew.h resides" )
26 | find_file( GLEW_SOURCE
27 | NAMES
28 | glew.c
29 | PATHS
30 | ${GLEW_LOCATION}/src
31 | $ENV{GLEW_LOCATION}/src
32 | $ENV{PROGRAMFILES}/GLEW/src
33 | ${PROJECT_SOURCE_DIR}/extern/glew/src
34 | ${GLEW_LOCATION}
35 | $ENV{GLEW_LOCATION}
36 | DOC "The directory where GL/glew.c resides" )
37 | if(ARCH STREQUAL "x86")
38 | find_library( GLEW_LIBRARY
39 | NAMES
40 | glew GLEW glew32s glew32
41 | PATHS
42 | ${GLEW_LOCATION}/lib
43 | ${GLEW_LOCATION}/lib/x86
44 | ${GLEW_LOCATION}/lib/win32
45 | ${GLEW_LOCATION}/lib/Release/win32
46 | ${GLEW_LOCATION}/lib/Release MX/win32
47 | $ENV{GLEW_LOCATION}/lib
48 | $ENV{GLEW_LOCATION}/lib/Release/win32
49 | $ENV{GLEW_LOCATION}/lib/Release MX/win32
50 | $ENV{GLEW_LOCATION}/lib/x86
51 | $ENV{GLEW_LOCATION}/lib/win32
52 | $ENV{PROGRAMFILES}/GLEW/lib
53 | $ENV{PROGRAMFILES}/GLEW/lib/x86
54 | $ENV{PROGRAMFILES}/GLEW/lib/win32
55 | ${PROJECT_SOURCE_DIR}/extern/glew/bin
56 | ${PROJECT_SOURCE_DIR}/extern/glew/lib
57 | ${PROJECT_SOURCE_DIR}/extern/glew/lib/x86
58 | ${PROJECT_SOURCE_DIR}/extern/glew/lib/win32
59 | ${GLEW_LOCATION}
60 | $ENV{GLEW_LOCATION}
61 | DOC "The GLEW library")
62 | else()
63 | find_library( GLEW_LIBRARY
64 | NAMES
65 | glew GLEW glew32s glew32
66 | PATHS
67 | ${GLEW_LOCATION}/lib/x64
68 | ${GLEW_LOCATION}/lib/Release/x64
69 | ${GLEW_LOCATION}/lib/Release MX/x64
70 | $ENV{GLEW_LOCATION}/lib/x64
71 | $ENV{GLEW_LOCATION}/lib/Release/x64
72 | $ENV{GLEW_LOCATION}/lib/Release MX/x64
73 | $ENV{PROGRAMFILES}/GLEW/lib/x64
74 | ${PROJECT_SOURCE_DIR}/extern/glew/bin
75 | ${PROJECT_SOURCE_DIR}/extern/glew/lib/x64
76 | ${GLEW_LOCATION}/lib
77 | $ENV{GLEW_LOCATION}/lib
78 | $ENV{PROGRAMFILES}/GLEW/lib
79 | ${PROJECT_SOURCE_DIR}/extern/glew/lib
80 | ${GLEW_LOCATION}
81 | $ENV{GLEW_LOCATION}
82 | DOC "The GLEW library")
83 | endif()
84 | endif ()
85 |
86 | if (${CMAKE_HOST_UNIX})
87 | find_path( GLEW_INCLUDE_DIR
88 | NAMES
89 | GL/glew.h
90 | PATHS
91 | ${GLEW_LOCATION}/include
92 | $ENV{GLEW_LOCATION}/include
93 | /usr/include
94 | /usr/local/include
95 | /sw/include
96 | /opt/local/include
97 | NO_DEFAULT_PATH
98 | DOC "The directory where GL/glew.h resides"
99 | )
100 | find_library( GLEW_LIBRARY
101 | NAMES
102 | GLEW glew
103 | PATHS
104 | ${GLEW_LOCATION}/lib
105 | $ENV{GLEW_LOCATION}/lib
106 | /usr/lib64
107 | /usr/lib
108 | /usr/local/lib64
109 | /usr/local/lib
110 | /sw/lib
111 | /opt/local/lib
112 | NO_DEFAULT_PATH
113 | DOC "The GLEW library")
114 | endif ()
115 |
116 | if (GLEW_INCLUDE_DIR AND EXISTS "${GLEW_INCLUDE_DIR}/GL/glew.h")
117 |
118 | file(STRINGS "${GLEW_INCLUDE_DIR}/GL/glew.h" GLEW_4_2 REGEX "^#define GL_VERSION_4_2.*$")
119 | if (GLEW_4_2)
120 | SET(OPENGL_4_2_FOUND TRUE)
121 | else ()
122 | message(WARNING
123 | "glew-1.7.0 or newer needed for supporting OpenGL 4.2 dependent features"
124 | )
125 | endif ()
126 |
127 | file(STRINGS "${GLEW_INCLUDE_DIR}/GL/glew.h" GLEW_4_3 REGEX "^#define GL_VERSION_4_3.*$")
128 | if (GLEW_4_3)
129 | SET(OPENGL_4_3_FOUND TRUE)
130 | else ()
131 | message(WARNING
132 | "glew-1.9.0 or newer needed for supporting OpenGL 4.3 dependent features"
133 | )
134 | endif ()
135 |
136 | endif ()
137 |
138 | if(GLEW_SOURCE)
139 | find_package_handle_standard_args(GLEW DEFAULT_MSG
140 | GLEW_INCLUDE_DIR
141 | GLEW_SOURCE
142 | )
143 | else()
144 | find_package_handle_standard_args(GLEW DEFAULT_MSG
145 | GLEW_INCLUDE_DIR
146 | GLEW_LIBRARY
147 | )
148 | endif()
149 |
150 | mark_as_advanced( GLEW_FOUND )
--------------------------------------------------------------------------------
/cmake/FindGLFW.cmake:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2013 Pixar
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "Apache License")
5 | # with the following modification; you may not use this file except in
6 | # compliance with the Apache License and the following modification to it:
7 | # Section 6. Trademarks. is deleted and replaced with:
8 | #
9 | # 6. Trademarks. This License does not grant permission to use the trade
10 | # names, trademarks, service marks, or product names of the Licensor
11 | # and its affiliates, except as required to comply with Section 4(c) of
12 | # the License and to reproduce the content of the NOTICE file.
13 | #
14 | # You may obtain a copy of the Apache License at
15 | #
16 | # http://www.apache.org/licenses/LICENSE-2.0
17 | #
18 | # Unless required by applicable law or agreed to in writing, software
19 | # distributed under the Apache License with the above modification is
20 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 | # KIND, either express or implied. See the Apache License for the specific
22 | # language governing permissions and limitations under the Apache License.
23 | #
24 |
25 | # Try to find GLFW library and include path.
26 | # Once done this will define
27 | #
28 | # GLFW_FOUND
29 | # GLFW_INCLUDE_DIR
30 | # GLFW_LIBRARIES
31 | #
32 |
33 | find_path( GLFW_INCLUDE_DIR
34 | NAMES
35 | GLFW/glfw3.h
36 | HINTS
37 | "${GLFW_LOCATION}/include"
38 | "$ENV{GLFW_LOCATION}/include"
39 | PATHS
40 | "$ENV{PROGRAMFILES}/GLFW/include"
41 | "${OPENGL_INCLUDE_DIR}"
42 | /usr/openwin/share/include
43 | /usr/openwin/include
44 | /usr/X11R6/include
45 | /usr/include/X11
46 | /opt/graphics/OpenGL/include
47 | /opt/graphics/OpenGL/contrib/libglfw
48 | /usr/local/include
49 | /usr/include/GL
50 | /usr/include
51 | DOC
52 | "The directory where GLFW/glfw3.h resides"
53 | )
54 |
55 | #
56 | # XXX: Do we still need to search for GL/glfw.h?
57 | #
58 | find_path( GLFW_INCLUDE_DIR
59 | NAMES
60 | GL/glfw.h
61 | HINTS
62 | "${GLFW_LOCATION}/include"
63 | "$ENV{GLFW_LOCATION}/include"
64 | PATHS
65 | "$ENV{PROGRAMFILES}/GLFW/include"
66 | "${OPENGL_INCLUDE_DIR}"
67 | /usr/openwin/share/include
68 | /usr/openwin/include
69 | /usr/X11R6/include
70 | /usr/include/X11
71 | /opt/graphics/OpenGL/include
72 | /opt/graphics/OpenGL/contrib/libglfw
73 | /usr/local/include
74 | /usr/include/GL
75 | /usr/include
76 | DOC
77 | "The directory where GL/glfw.h resides"
78 | )
79 |
80 | if (WIN32)
81 | if(CYGWIN)
82 | find_library( GLFW_glfw_LIBRARY
83 | NAMES
84 | glfw32
85 | HINTS
86 | "${GLFW_LOCATION}/lib"
87 | "${GLFW_LOCATION}/lib/x64"
88 | "$ENV{GLFW_LOCATION}/lib"
89 | PATHS
90 | "${OPENGL_LIBRARY_DIR}"
91 | /usr/lib
92 | /usr/lib/w32api
93 | /usr/local/lib
94 | /usr/X11R6/lib
95 | DOC
96 | "The GLFW library"
97 | )
98 | else()
99 | find_library( GLFW_glfw_LIBRARY
100 | NAMES
101 | glfw32
102 | glfw32s
103 | glfw
104 | glfw3
105 | HINTS
106 | "${GLFW_LOCATION}/lib"
107 | "${GLFW_LOCATION}/lib/x64"
108 | "${GLFW_LOCATION}/lib-msvc110"
109 | "${GLFW_LOCATION}/lib-vc2012"
110 | "$ENV{GLFW_LOCATION}/lib"
111 | "$ENV{GLFW_LOCATION}/lib/x64"
112 | "$ENV{GLFW_LOCATION}/lib-msvc110"
113 | "$ENV{GLFW_LOCATION}/lib-vc2012"
114 | PATHS
115 | "$ENV{PROGRAMFILES}/GLFW/lib"
116 | "${OPENGL_LIBRARY_DIR}"
117 | DOC
118 | "The GLFW library"
119 | )
120 | endif()
121 | else ()
122 | if (APPLE)
123 | find_library( GLFW_glfw_LIBRARY glfw
124 | NAMES
125 | glfw
126 | glfw3
127 | HINTS
128 | "${GLFW_LOCATION}/lib"
129 | "${GLFW_LOCATION}/lib/cocoa"
130 | "$ENV{GLFW_LOCATION}/lib"
131 | "$ENV{GLFW_LOCATION}/lib/cocoa"
132 | PATHS
133 | /usr/local/lib
134 | )
135 | set(GLFW_cocoa_LIBRARY "-framework Cocoa" CACHE STRING "Cocoa framework for OSX")
136 | set(GLFW_corevideo_LIBRARY "-framework CoreVideo" CACHE STRING "CoreVideo framework for OSX")
137 | set(GLFW_iokit_LIBRARY "-framework IOKit" CACHE STRING "IOKit framework for OSX")
138 | else ()
139 | # (*)NIX
140 |
141 | find_package(Threads REQUIRED)
142 |
143 | find_package(X11 REQUIRED)
144 |
145 | if(NOT X11_Xrandr_FOUND)
146 | message(FATAL_ERROR "Xrandr library not found - required for GLFW")
147 | endif()
148 |
149 | if(NOT X11_xf86vmode_FOUND)
150 | message(FATAL_ERROR "xf86vmode library not found - required for GLFW")
151 | endif()
152 |
153 | if(NOT X11_Xcursor_FOUND)
154 | message(FATAL_ERROR "Xcursor library not found - required for GLFW")
155 | endif()
156 |
157 | if(NOT X11_Xinerama_FOUND)
158 | message(FATAL_ERROR "Xinerama library not found - required for GLFW")
159 | endif()
160 |
161 | list(APPEND GLFW_x11_LIBRARY "${X11_Xrandr_LIB}" "${X11_Xxf86vm_LIB}" "${X11_Xcursor_LIB}" "${X11_Xinerama_LIB}" "${CMAKE_THREAD_LIBS_INIT}" -lrt -lXi)
162 |
163 | find_library( GLFW_glfw_LIBRARY
164 | NAMES
165 | glfw
166 | glfw3
167 | HINTS
168 | "${GLFW_LOCATION}/lib"
169 | "$ENV{GLFW_LOCATION}/lib"
170 | "${GLFW_LOCATION}/lib/x11"
171 | "$ENV{GLFW_LOCATION}/lib/x11"
172 | PATHS
173 | /usr/lib64
174 | /usr/lib
175 | /usr/lib/${CMAKE_LIBRARY_ARCHITECTURE}
176 | /usr/local/lib64
177 | /usr/local/lib
178 | /usr/local/lib/${CMAKE_LIBRARY_ARCHITECTURE}
179 | /usr/openwin/lib
180 | /usr/X11R6/lib
181 | DOC
182 | "The GLFW library"
183 | )
184 | endif (APPLE)
185 | endif (WIN32)
186 |
187 | set( GLFW_FOUND "NO" )
188 |
189 | if(GLFW_INCLUDE_DIR)
190 |
191 | if(GLFW_glfw_LIBRARY)
192 | set( GLFW_LIBRARIES "${GLFW_glfw_LIBRARY}"
193 | "${GLFW_x11_LIBRARY}"
194 | "${GLFW_cocoa_LIBRARY}"
195 | "${GLFW_iokit_LIBRARY}"
196 | "${GLFW_corevideo_LIBRARY}" )
197 | set( GLFW_FOUND "YES" )
198 | set (GLFW_LIBRARY "${GLFW_LIBRARIES}")
199 | set (GLFW_INCLUDE_PATH "${GLFW_INCLUDE_DIR}")
200 | endif(GLFW_glfw_LIBRARY)
201 |
202 |
203 | # Tease the GLFW_VERSION numbers from the lib headers
204 | function(parseVersion FILENAME VARNAME)
205 |
206 | set(PATTERN "^#define ${VARNAME}.*$")
207 |
208 | file(STRINGS "${GLFW_INCLUDE_DIR}/${FILENAME}" TMP REGEX ${PATTERN})
209 |
210 | string(REGEX MATCHALL "[0-9]+" TMP ${TMP})
211 |
212 | set(${VARNAME} ${TMP} PARENT_SCOPE)
213 |
214 | endfunction()
215 |
216 |
217 | if(EXISTS "${GLFW_INCLUDE_DIR}/GL/glfw.h")
218 |
219 | parseVersion(GL/glfw.h GLFW_VERSION_MAJOR)
220 | parseVersion(GL/glfw.h GLFW_VERSION_MINOR)
221 | parseVersion(GL/glfw.h GLFW_VERSION_REVISION)
222 |
223 | elseif(EXISTS "${GLFW_INCLUDE_DIR}/GLFW/glfw3.h")
224 |
225 | parseVersion(GLFW/glfw3.h GLFW_VERSION_MAJOR)
226 | parseVersion(GLFW/glfw3.h GLFW_VERSION_MINOR)
227 | parseVersion(GLFW/glfw3.h GLFW_VERSION_REVISION)
228 |
229 | endif()
230 |
231 | if(${GLFW_VERSION_MAJOR} OR ${GLFW_VERSION_MINOR} OR ${GLFW_VERSION_REVISION})
232 | set(GLFW_VERSION "${GLFW_VERSION_MAJOR}.${GLFW_VERSION_MINOR}.${GLFW_VERSION_REVISION}")
233 | set(GLFW_VERSION_STRING "${GLFW_VERSION}")
234 | mark_as_advanced(GLFW_VERSION)
235 | endif()
236 |
237 | endif(GLFW_INCLUDE_DIR)
238 |
239 | include(FindPackageHandleStandardArgs)
240 |
241 | find_package_handle_standard_args(GLFW
242 | REQUIRED_VARS
243 | GLFW_INCLUDE_DIR
244 | GLFW_LIBRARIES
245 | VERSION_VAR
246 | GLFW_VERSION
247 | )
248 |
249 | mark_as_advanced(
250 | GLFW_INCLUDE_DIR
251 | GLFW_LIBRARIES
252 | GLFW_glfw_LIBRARY
253 | GLFW_cocoa_LIBRARY
254 | )
255 |
256 |
--------------------------------------------------------------------------------
/cmake/FindGLM.cmake:
--------------------------------------------------------------------------------
1 | #
2 | # Find GLM
3 | #
4 | # Try to find GLM : OpenGL Mathematics.
5 | # This module defines
6 | # - GLM_INCLUDE_DIRS
7 | # - GLM_FOUND
8 | #
9 | # The following variables can be set as arguments for the module.
10 | # - GLM_ROOT_DIR : Root library directory of GLM
11 | #
12 | # References:
13 | # - https://github.com/Groovounet/glm/blob/master/util/FindGLM.cmake
14 | # - https://bitbucket.org/alfonse/gltut/src/28636298c1c0/glm-0.9.0.7/FindGLM.cmake
15 | #
16 |
17 | # Additional modules
18 | include(FindPackageHandleStandardArgs)
19 |
20 | if (WIN32)
21 | # Find include files
22 | find_path(
23 | GLM_INCLUDE_DIR
24 | NAMES glm/glm.hpp
25 | PATHS
26 | $ENV{PROGRAMFILES}/include
27 | ${GLM_ROOT_DIR}/include
28 | DOC "The directory where glm/glm.hpp resides")
29 | else()
30 | # Find include files
31 | find_path(
32 | GLM_INCLUDE_DIR
33 | NAMES glm/glm.hpp
34 | PATHS
35 | /usr/include
36 | /usr/local/include
37 | /sw/include
38 | /opt/local/include
39 | ${GLM_ROOT_DIR}/include
40 | DOC "The directory where glm/glm.hpp resides")
41 | endif()
42 |
43 | # Handle REQUIRD argument, define *_FOUND variable
44 | find_package_handle_standard_args(GLM DEFAULT_MSG GLM_INCLUDE_DIR)
45 |
46 | # Define GLM_INCLUDE_DIRS
47 | if (GLM_FOUND)
48 | set(GLM_INCLUDE_DIRS ${GLM_INCLUDE_DIR})
49 | endif()
50 |
51 | # Hide some variables
52 | mark_as_advanced(GLM_INCLUDE_DIR)
53 |
--------------------------------------------------------------------------------
/cmake/FindVulkan.cmake:
--------------------------------------------------------------------------------
1 | # Find Vulkan
2 | #
3 | # VULKAN_INCLUDE_DIR
4 | # VULKAN_LIBRARY
5 | # VULKAN_FOUND
6 |
7 | if (WIN32)
8 | find_path(VULKAN_INCLUDE_DIR NAMES vulkan/vulkan.h HINTS
9 | "$ENV{VULKAN_SDK}/Include"
10 | "$ENV{VK_SDK_PATH}/Include")
11 | if (CMAKE_CL_64)
12 | find_library(VULKAN_LIBRARY NAMES vulkan-1 HINTS
13 | "$ENV{VULKAN_SDK}/Bin"
14 | "$ENV{VK_SDK_PATH}/Bin")
15 | find_library(VULKAN_STATIC_LIBRARY NAMES vkstatic.1 HINTS
16 | "$ENV{VULKAN_SDK}/Bin"
17 | "$ENV{VK_SDK_PATH}/Bin")
18 | else()
19 | find_library(VULKAN_LIBRARY NAMES vulkan-1 HINTS
20 | "$ENV{VULKAN_SDK}/Bin32"
21 | "$ENV{VK_SDK_PATH}/Bin32")
22 | endif()
23 | else()
24 | find_path(VULKAN_INCLUDE_DIR NAMES vulkan/vulkan.h HINTS
25 | "$ENV{VULKAN_SDK}/include")
26 | find_library(VULKAN_LIBRARY NAMES vulkan HINTS
27 | "$ENV{VULKAN_SDK}/lib")
28 | endif()
29 |
30 | include(FindPackageHandleStandardArgs)
31 | find_package_handle_standard_args(Vulkan DEFAULT_MSG VULKAN_LIBRARY VULKAN_INCLUDE_DIR)
32 |
33 | mark_as_advanced(VULKAN_INCLUDE_DIR VULKAN_LIBRARY VULKAN_STATIC_LIBRARY)
34 |
--------------------------------------------------------------------------------
/cmake/Finddpct.cmake:
--------------------------------------------------------------------------------
1 | if (WIN32)
2 | find_path( dpct_INCLUDE_DIR
3 | NAMES
4 | dpct/dpct.hpp
5 | PATHS
6 | ${dpct_LOCATION}/include
7 | $ENV{dpct_LOCATION}/include
8 | $ENV{DPCT_BUNDLE_ROOT}/include
9 | $ENV{ONEAPI_ROOT}/dpcpp-ct/latest/include
10 | $ENV{PROGRAMFILES}/include
11 | NO_DEFAULT_PATH
12 | DOC "The directory where dpct/dpct.hpp resides"
13 | )
14 | else()
15 | find_path( dpct_INCLUDE_DIR
16 | NAMES
17 | dpct/dpct.hpp
18 | PATHS
19 | ${dpct_LOCATION}/include
20 | $ENV{dpct_LOCATION}/include
21 | $ENV{DPCT_BUNDLE_ROOT}/include
22 | $ENV{ONEAPI_ROOT}/dpcpp-ct/latest/include
23 | /opt/intel/oneapi/dpcpp-ct/latest/include
24 | /usr/include
25 | /usr/local/include
26 | /sw/include
27 | /opt/local/include
28 | NO_DEFAULT_PATH
29 | DOC "The directory where dpct/dpct.hpp resides"
30 | )
31 | endif ()
32 |
33 | include(FindPackageHandleStandardArgs)
34 | find_package_handle_standard_args( dpct REQUIRED_VARS dpct_INCLUDE_DIR )
35 |
36 |
--------------------------------------------------------------------------------
/docs/Eq1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeplaysoftware/cuda-to-sycl-nbody/c8f013a7b554faeabc6c39107b742916c66b7cd4/docs/Eq1.png
--------------------------------------------------------------------------------
/docs/downscale_artefact.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeplaysoftware/cuda-to-sycl-nbody/c8f013a7b554faeabc6c39107b742916c66b7cd4/docs/downscale_artefact.png
--------------------------------------------------------------------------------
/libs/imgui/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 Codeplay Software Limited
2 |
3 | add_library(imgui SHARED
4 | src/imgui.cpp
5 | src/imgui_widgets.cpp
6 | src/imgui_demo.cpp
7 | src/imgui_draw.cpp
8 | src/imgui_tables.cpp
9 | src/imgui_impl_opengl3.cpp
10 | src/imgui_impl_glfw.cpp)
11 |
12 | target_link_libraries(imgui PRIVATE dl)
13 |
14 | target_include_directories(imgui PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include)
15 |
16 | target_include_directories(${BINARY_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include)
17 | target_include_directories(${BINARY_NAME}_d PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include)
18 |
19 | # Link main project to imgui lib
20 | target_link_libraries(${BINARY_NAME} PRIVATE imgui)
21 | target_link_libraries(${BINARY_NAME}_d PRIVATE imgui)
22 |
--------------------------------------------------------------------------------
/libs/imgui/include/imconfig.h:
--------------------------------------------------------------------------------
1 | //-----------------------------------------------------------------------------
2 | // COMPILE-TIME OPTIONS FOR DEAR IMGUI
3 | // Runtime options (clipboard callbacks, enabling various features, etc.) can generally be set via the ImGuiIO structure.
4 | // You can use ImGui::SetAllocatorFunctions() before calling ImGui::CreateContext() to rewire memory allocation functions.
5 | //-----------------------------------------------------------------------------
6 | // A) You may edit imconfig.h (and not overwrite it when updating Dear ImGui, or maintain a patch/rebased branch with your modifications to it)
7 | // B) or '#define IMGUI_USER_CONFIG "my_imgui_config.h"' in your project and then add directives in your own file without touching this template.
8 | //-----------------------------------------------------------------------------
9 | // You need to make sure that configuration settings are defined consistently _everywhere_ Dear ImGui is used, which include the imgui*.cpp
10 | // files but also _any_ of your code that uses Dear ImGui. This is because some compile-time options have an affect on data structures.
11 | // Defining those options in imconfig.h will ensure every compilation unit gets to see the same data structure layouts.
12 | // Call IMGUI_CHECKVERSION() from your .cpp files to verify that the data structures your files are using are matching the ones imgui.cpp is using.
13 | //-----------------------------------------------------------------------------
14 |
15 | #pragma once
16 |
17 | //---- Define assertion handler. Defaults to calling assert().
18 | // If your macro uses multiple statements, make sure is enclosed in a 'do { .. } while (0)' block so it can be used as a single statement.
19 | //#define IM_ASSERT(_EXPR) MyAssert(_EXPR)
20 | //#define IM_ASSERT(_EXPR) ((void)(_EXPR)) // Disable asserts
21 |
22 | //---- Define attributes of all API symbols declarations, e.g. for DLL under Windows
23 | // Using Dear ImGui via a shared library is not recommended, because of function call overhead and because we don't guarantee backward nor forward ABI compatibility.
24 | // DLL users: heaps and globals are not shared across DLL boundaries! You will need to call SetCurrentContext() + SetAllocatorFunctions()
25 | // for each static/DLL boundary you are calling from. Read "Context and Memory Allocators" section of imgui.cpp for more details.
26 | //#define IMGUI_API __declspec( dllexport )
27 | //#define IMGUI_API __declspec( dllimport )
28 |
29 | //---- Don't define obsolete functions/enums/behaviors. Consider enabling from time to time after updating to avoid using soon-to-be obsolete function/names.
30 | //#define IMGUI_DISABLE_OBSOLETE_FUNCTIONS
31 | //#define IMGUI_DISABLE_OBSOLETE_KEYIO // 1.87: disable legacy io.KeyMap[]+io.KeysDown[] in favor io.AddKeyEvent(). This will be folded into IMGUI_DISABLE_OBSOLETE_FUNCTIONS in a few versions.
32 |
33 | //---- Disable all of Dear ImGui or don't implement standard windows.
34 | // It is very strongly recommended to NOT disable the demo windows during development. Please read comments in imgui_demo.cpp.
35 | //#define IMGUI_DISABLE // Disable everything: all headers and source files will be empty.
36 | //#define IMGUI_DISABLE_DEMO_WINDOWS // Disable demo windows: ShowDemoWindow()/ShowStyleEditor() will be empty. Not recommended.
37 | //#define IMGUI_DISABLE_METRICS_WINDOW // Disable metrics/debugger and other debug tools: ShowMetricsWindow() and ShowStackToolWindow() will be empty.
38 |
39 | //---- Don't implement some functions to reduce linkage requirements.
40 | //#define IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS // [Win32] Don't implement default clipboard handler. Won't use and link with OpenClipboard/GetClipboardData/CloseClipboard etc. (user32.lib/.a, kernel32.lib/.a)
41 | //#define IMGUI_ENABLE_WIN32_DEFAULT_IME_FUNCTIONS // [Win32] [Default with Visual Studio] Implement default IME handler (require imm32.lib/.a, auto-link for Visual Studio, -limm32 on command-line for MinGW)
42 | //#define IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS // [Win32] [Default with non-Visual Studio compilers] Don't implement default IME handler (won't require imm32.lib/.a)
43 | //#define IMGUI_DISABLE_WIN32_FUNCTIONS // [Win32] Won't use and link with any Win32 function (clipboard, ime).
44 | //#define IMGUI_ENABLE_OSX_DEFAULT_CLIPBOARD_FUNCTIONS // [OSX] Implement default OSX clipboard handler (need to link with '-framework ApplicationServices', this is why this is not the default).
45 | //#define IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS // Don't implement ImFormatString/ImFormatStringV so you can implement them yourself (e.g. if you don't want to link with vsnprintf)
46 | //#define IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS // Don't implement ImFabs/ImSqrt/ImPow/ImFmod/ImCos/ImSin/ImAcos/ImAtan2 so you can implement them yourself.
47 | //#define IMGUI_DISABLE_FILE_FUNCTIONS // Don't implement ImFileOpen/ImFileClose/ImFileRead/ImFileWrite and ImFileHandle at all (replace them with dummies)
48 | //#define IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS // Don't implement ImFileOpen/ImFileClose/ImFileRead/ImFileWrite and ImFileHandle so you can implement them yourself if you don't want to link with fopen/fclose/fread/fwrite. This will also disable the LogToTTY() function.
49 | //#define IMGUI_DISABLE_DEFAULT_ALLOCATORS // Don't implement default allocators calling malloc()/free() to avoid linking with them. You will need to call ImGui::SetAllocatorFunctions().
50 | //#define IMGUI_DISABLE_SSE // Disable use of SSE intrinsics even if available
51 |
52 | //---- Include imgui_user.h at the end of imgui.h as a convenience
53 | //#define IMGUI_INCLUDE_IMGUI_USER_H
54 |
55 | //---- Pack colors to BGRA8 instead of RGBA8 (to avoid converting from one to another)
56 | //#define IMGUI_USE_BGRA_PACKED_COLOR
57 |
58 | //---- Use 32-bit for ImWchar (default is 16-bit) to support unicode planes 1-16. (e.g. point beyond 0xFFFF like emoticons, dingbats, symbols, shapes, ancient languages, etc...)
59 | //#define IMGUI_USE_WCHAR32
60 |
61 | //---- Avoid multiple STB libraries implementations, or redefine path/filenames to prioritize another version
62 | // By default the embedded implementations are declared static and not available outside of Dear ImGui sources files.
63 | //#define IMGUI_STB_TRUETYPE_FILENAME "my_folder/stb_truetype.h"
64 | //#define IMGUI_STB_RECT_PACK_FILENAME "my_folder/stb_rect_pack.h"
65 | //#define IMGUI_DISABLE_STB_TRUETYPE_IMPLEMENTATION
66 | //#define IMGUI_DISABLE_STB_RECT_PACK_IMPLEMENTATION
67 |
68 | //---- Use stb_printf's faster implementation of vsnprintf instead of the one from libc (unless IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS is defined)
69 | // Requires 'stb_sprintf.h' to be available in the include path. Compatibility checks of arguments and formats done by clang and GCC will be disabled in order to support the extra formats provided by STB sprintf.
70 | // #define IMGUI_USE_STB_SPRINTF
71 |
72 | //---- Use FreeType to build and rasterize the font atlas (instead of stb_truetype which is embedded by default in Dear ImGui)
73 | // Requires FreeType headers to be available in the include path. Requires program to be compiled with 'misc/freetype/imgui_freetype.cpp' (in this repository) + the FreeType library (not provided).
74 | // On Windows you may use vcpkg with 'vcpkg install freetype --triplet=x64-windows' + 'vcpkg integrate install'.
75 | //#define IMGUI_ENABLE_FREETYPE
76 |
77 | //---- Use stb_truetype to build and rasterize the font atlas (default)
78 | // The only purpose of this define is if you want force compilation of the stb_truetype backend ALONG with the FreeType backend.
79 | //#define IMGUI_ENABLE_STB_TRUETYPE
80 |
81 | //---- Define constructor and implicit cast operators to convert back<>forth between your math types and ImVec2/ImVec4.
82 | // This will be inlined as part of ImVec2 and ImVec4 class declarations.
83 | /*
84 | #define IM_VEC2_CLASS_EXTRA \
85 | ImVec2(const MyVec2& f) { x = f.x; y = f.y; } \
86 | operator MyVec2() const { return MyVec2(x,y); }
87 |
88 | #define IM_VEC4_CLASS_EXTRA \
89 | ImVec4(const MyVec4& f) { x = f.x; y = f.y; z = f.z; w = f.w; } \
90 | operator MyVec4() const { return MyVec4(x,y,z,w); }
91 | */
92 |
93 | //---- Use 32-bit vertex indices (default is 16-bit) is one way to allow large meshes with more than 64K vertices.
94 | // Your renderer backend will need to support it (most example renderer backends support both 16/32-bit indices).
95 | // Another way to allow large meshes while keeping 16-bit indices is to handle ImDrawCmd::VtxOffset in your renderer.
96 | // Read about ImGuiBackendFlags_RendererHasVtxOffset for details.
97 | //#define ImDrawIdx unsigned int
98 |
99 | //---- Override ImDrawCallback signature (will need to modify renderer backends accordingly)
100 | //struct ImDrawList;
101 | //struct ImDrawCmd;
102 | //typedef void (*MyImDrawCallback)(const ImDrawList* draw_list, const ImDrawCmd* cmd, void* my_renderer_user_data);
103 | //#define ImDrawCallback MyImDrawCallback
104 |
105 | //---- Debug Tools: Macro to break in Debugger
106 | // (use 'Metrics->Tools->Item Picker' to pick widgets with the mouse and break into them for easy debugging.)
107 | //#define IM_DEBUG_BREAK IM_ASSERT(0)
108 | //#define IM_DEBUG_BREAK __debugbreak()
109 |
110 | //---- Debug Tools: Have the Item Picker break in the ItemAdd() function instead of ItemHoverable(),
111 | // (which comes earlier in the code, will catch a few extra items, allow picking items other than Hovered one.)
112 | // This adds a small runtime cost which is why it is not enabled by default.
113 | //#define IMGUI_DEBUG_TOOL_ITEM_PICKER_EX
114 |
115 | //---- Debug Tools: Enable slower asserts
116 | //#define IMGUI_DEBUG_PARANOID
117 |
118 | //---- Tip: You can add extra functions within the ImGui:: namespace, here or in your own headers files.
119 | /*
120 | namespace ImGui
121 | {
122 | void MyFunction(const char* name, const MyMatrix44& v);
123 | }
124 | */
125 |
--------------------------------------------------------------------------------
/libs/imgui/include/imgui_impl_glfw.h:
--------------------------------------------------------------------------------
1 | // dear imgui: Platform Backend for GLFW
2 | // This needs to be used along with a Renderer (e.g. OpenGL3, Vulkan, WebGPU..)
3 | // (Info: GLFW is a cross-platform general purpose library for handling windows, inputs, OpenGL/Vulkan graphics context creation, etc.)
4 |
5 | // Implemented features:
6 | // [X] Platform: Clipboard support.
7 | // [X] Platform: Keyboard support. Since 1.87 we are using the io.AddKeyEvent() function. Pass ImGuiKey values to all key functions e.g. ImGui::IsKeyPressed(ImGuiKey_Space). [Legacy GLFW_KEY_* values will also be supported unless IMGUI_DISABLE_OBSOLETE_KEYIO is set]
8 | // [X] Platform: Gamepad support. Enable with 'io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad'.
9 | // [X] Platform: Mouse cursor shape and visibility. Disable with 'io.ConfigFlags |= ImGuiConfigFlags_NoMouseCursorChange' (note: the resizing cursors requires GLFW 3.4+).
10 |
11 | // You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this.
12 | // Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need.
13 | // If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp.
14 | // Read online: https://github.com/ocornut/imgui/tree/master/docs
15 |
16 | // About GLSL version:
17 | // The 'glsl_version' initialization parameter defaults to "#version 150" if NULL.
18 | // Only override if your GL version doesn't handle this GLSL version. Keep NULL if unsure!
19 |
20 | #pragma once
21 | #include "imgui.h" // IMGUI_IMPL_API
22 |
23 | struct GLFWwindow;
24 | struct GLFWmonitor;
25 |
26 | IMGUI_IMPL_API bool ImGui_ImplGlfw_InitForOpenGL(GLFWwindow* window, bool install_callbacks);
27 | IMGUI_IMPL_API bool ImGui_ImplGlfw_InitForVulkan(GLFWwindow* window, bool install_callbacks);
28 | IMGUI_IMPL_API bool ImGui_ImplGlfw_InitForOther(GLFWwindow* window, bool install_callbacks);
29 | IMGUI_IMPL_API void ImGui_ImplGlfw_Shutdown();
30 | IMGUI_IMPL_API void ImGui_ImplGlfw_NewFrame();
31 |
32 | // GLFW callbacks
33 | // - When calling Init with 'install_callbacks=true': GLFW callbacks will be installed for you. They will call user's previously installed callbacks, if any.
34 | // - When calling Init with 'install_callbacks=false': GLFW callbacks won't be installed. You will need to call those function yourself from your own GLFW callbacks.
35 | IMGUI_IMPL_API void ImGui_ImplGlfw_WindowFocusCallback(GLFWwindow* window, int focused); // Since 1.84
36 | IMGUI_IMPL_API void ImGui_ImplGlfw_CursorEnterCallback(GLFWwindow* window, int entered); // Since 1.84
37 | IMGUI_IMPL_API void ImGui_ImplGlfw_CursorPosCallback(GLFWwindow* window, double x, double y); // Since 1.87
38 | IMGUI_IMPL_API void ImGui_ImplGlfw_MouseButtonCallback(GLFWwindow* window, int button, int action, int mods);
39 | IMGUI_IMPL_API void ImGui_ImplGlfw_ScrollCallback(GLFWwindow* window, double xoffset, double yoffset);
40 | IMGUI_IMPL_API void ImGui_ImplGlfw_KeyCallback(GLFWwindow* window, int key, int scancode, int action, int mods);
41 | IMGUI_IMPL_API void ImGui_ImplGlfw_CharCallback(GLFWwindow* window, unsigned int c);
42 | IMGUI_IMPL_API void ImGui_ImplGlfw_MonitorCallback(GLFWmonitor* monitor, int event);
43 |
--------------------------------------------------------------------------------
/libs/imgui/include/imgui_impl_opengl3.h:
--------------------------------------------------------------------------------
1 | // dear imgui: Renderer Backend for modern OpenGL with shaders / programmatic pipeline
2 | // - Desktop GL: 2.x 3.x 4.x
3 | // - Embedded GL: ES 2.0 (WebGL 1.0), ES 3.0 (WebGL 2.0)
4 | // This needs to be used along with a Platform Backend (e.g. GLFW, SDL, Win32, custom..)
5 |
6 | // Implemented features:
7 | // [X] Renderer: User texture binding. Use 'GLuint' OpenGL texture identifier as void*/ImTextureID. Read the FAQ about ImTextureID!
8 | // [x] Renderer: Desktop GL only: Support for large meshes (64k+ vertices) with 16-bit indices.
9 |
10 | // You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this.
11 | // Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need.
12 | // If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp.
13 | // Read online: https://github.com/ocornut/imgui/tree/master/docs
14 |
15 | // About GLSL version:
16 | // The 'glsl_version' initialization parameter should be NULL (default) or a "#version XXX" string.
17 | // On computer platform the GLSL version default to "#version 130". On OpenGL ES 3 platform it defaults to "#version 300 es"
18 | // Only override if your GL version doesn't handle this GLSL version. See GLSL version table at the top of imgui_impl_opengl3.cpp.
19 |
20 | #pragma once
21 | #include "imgui.h" // IMGUI_IMPL_API
22 |
23 | // Backend API
24 | IMGUI_IMPL_API bool ImGui_ImplOpenGL3_Init(const char* glsl_version = NULL);
25 | IMGUI_IMPL_API void ImGui_ImplOpenGL3_Shutdown();
26 | IMGUI_IMPL_API void ImGui_ImplOpenGL3_NewFrame();
27 | IMGUI_IMPL_API void ImGui_ImplOpenGL3_RenderDrawData(ImDrawData* draw_data);
28 |
29 | // (Optional) Called by Init/NewFrame/Shutdown
30 | IMGUI_IMPL_API bool ImGui_ImplOpenGL3_CreateFontsTexture();
31 | IMGUI_IMPL_API void ImGui_ImplOpenGL3_DestroyFontsTexture();
32 | IMGUI_IMPL_API bool ImGui_ImplOpenGL3_CreateDeviceObjects();
33 | IMGUI_IMPL_API void ImGui_ImplOpenGL3_DestroyDeviceObjects();
34 |
35 | // Specific OpenGL ES versions
36 | //#define IMGUI_IMPL_OPENGL_ES2 // Auto-detected on Emscripten
37 | //#define IMGUI_IMPL_OPENGL_ES3 // Auto-detected on iOS/Android
38 |
39 | // You can explicitly select GLES2 or GLES3 API by using one of the '#define IMGUI_IMPL_OPENGL_LOADER_XXX' in imconfig.h or compiler command-line.
40 | #if !defined(IMGUI_IMPL_OPENGL_ES2) \
41 | && !defined(IMGUI_IMPL_OPENGL_ES3)
42 |
43 | // Try to detect GLES on matching platforms
44 | #if defined(__APPLE__)
45 | #include
46 | #endif
47 | #if (defined(__APPLE__) && (TARGET_OS_IOS || TARGET_OS_TV)) || (defined(__ANDROID__))
48 | #define IMGUI_IMPL_OPENGL_ES3 // iOS, Android -> GL ES 3, "#version 300 es"
49 | #elif defined(__EMSCRIPTEN__)
50 | #define IMGUI_IMPL_OPENGL_ES2 // Emscripten -> GL ES 2, "#version 100"
51 | #else
52 | // Otherwise imgui_impl_opengl3_loader.h will be used.
53 | #endif
54 |
55 | #endif
56 |
--------------------------------------------------------------------------------
/libs/imgui/include/imstb_rectpack.h:
--------------------------------------------------------------------------------
1 | // [DEAR IMGUI]
2 | // This is a slightly modified version of stb_rect_pack.h 1.00.
3 | // Those changes would need to be pushed into nothings/stb:
4 | // - Added STBRP__CDECL
5 | // Grep for [DEAR IMGUI] to find the changes.
6 |
7 | // stb_rect_pack.h - v1.00 - public domain - rectangle packing
8 | // Sean Barrett 2014
9 | //
10 | // Useful for e.g. packing rectangular textures into an atlas.
11 | // Does not do rotation.
12 | //
13 | // Not necessarily the awesomest packing method, but better than
14 | // the totally naive one in stb_truetype (which is primarily what
15 | // this is meant to replace).
16 | //
17 | // Has only had a few tests run, may have issues.
18 | //
19 | // More docs to come.
20 | //
21 | // No memory allocations; uses qsort() and assert() from stdlib.
22 | // Can override those by defining STBRP_SORT and STBRP_ASSERT.
23 | //
24 | // This library currently uses the Skyline Bottom-Left algorithm.
25 | //
26 | // Please note: better rectangle packers are welcome! Please
27 | // implement them to the same API, but with a different init
28 | // function.
29 | //
30 | // Credits
31 | //
32 | // Library
33 | // Sean Barrett
34 | // Minor features
35 | // Martins Mozeiko
36 | // github:IntellectualKitty
37 | //
38 | // Bugfixes / warning fixes
39 | // Jeremy Jaussaud
40 | // Fabian Giesen
41 | //
42 | // Version history:
43 | //
44 | // 1.00 (2019-02-25) avoid small space waste; gracefully fail too-wide rectangles
45 | // 0.99 (2019-02-07) warning fixes
46 | // 0.11 (2017-03-03) return packing success/fail result
47 | // 0.10 (2016-10-25) remove cast-away-const to avoid warnings
48 | // 0.09 (2016-08-27) fix compiler warnings
49 | // 0.08 (2015-09-13) really fix bug with empty rects (w=0 or h=0)
50 | // 0.07 (2015-09-13) fix bug with empty rects (w=0 or h=0)
51 | // 0.06 (2015-04-15) added STBRP_SORT to allow replacing qsort
52 | // 0.05: added STBRP_ASSERT to allow replacing assert
53 | // 0.04: fixed minor bug in STBRP_LARGE_RECTS support
54 | // 0.01: initial release
55 | //
56 | // LICENSE
57 | //
58 | // See end of file for license information.
59 |
60 | //////////////////////////////////////////////////////////////////////////////
61 | //
62 | // INCLUDE SECTION
63 | //
64 |
65 | #ifndef STB_INCLUDE_STB_RECT_PACK_H
66 | #define STB_INCLUDE_STB_RECT_PACK_H
67 |
68 | #define STB_RECT_PACK_VERSION 1
69 |
70 | #ifdef STBRP_STATIC
71 | #define STBRP_DEF static
72 | #else
73 | #define STBRP_DEF extern
74 | #endif
75 |
76 | #ifdef __cplusplus
77 | extern "C" {
78 | #endif
79 |
80 | typedef struct stbrp_context stbrp_context;
81 | typedef struct stbrp_node stbrp_node;
82 | typedef struct stbrp_rect stbrp_rect;
83 |
84 | #ifdef STBRP_LARGE_RECTS
85 | typedef int stbrp_coord;
86 | #else
87 | typedef unsigned short stbrp_coord;
88 | #endif
89 |
90 | STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects);
91 | // Assign packed locations to rectangles. The rectangles are of type
92 | // 'stbrp_rect' defined below, stored in the array 'rects', and there
93 | // are 'num_rects' many of them.
94 | //
95 | // Rectangles which are successfully packed have the 'was_packed' flag
96 | // set to a non-zero value and 'x' and 'y' store the minimum location
97 | // on each axis (i.e. bottom-left in cartesian coordinates, top-left
98 | // if you imagine y increasing downwards). Rectangles which do not fit
99 | // have the 'was_packed' flag set to 0.
100 | //
101 | // You should not try to access the 'rects' array from another thread
102 | // while this function is running, as the function temporarily reorders
103 | // the array while it executes.
104 | //
105 | // To pack into another rectangle, you need to call stbrp_init_target
106 | // again. To continue packing into the same rectangle, you can call
107 | // this function again. Calling this multiple times with multiple rect
108 | // arrays will probably produce worse packing results than calling it
109 | // a single time with the full rectangle array, but the option is
110 | // available.
111 | //
112 | // The function returns 1 if all of the rectangles were successfully
113 | // packed and 0 otherwise.
114 |
115 | struct stbrp_rect
116 | {
117 | // reserved for your use:
118 | int id;
119 |
120 | // input:
121 | stbrp_coord w, h;
122 |
123 | // output:
124 | stbrp_coord x, y;
125 | int was_packed; // non-zero if valid packing
126 |
127 | }; // 16 bytes, nominally
128 |
129 |
130 | STBRP_DEF void stbrp_init_target (stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes);
131 | // Initialize a rectangle packer to:
132 | // pack a rectangle that is 'width' by 'height' in dimensions
133 | // using temporary storage provided by the array 'nodes', which is 'num_nodes' long
134 | //
135 | // You must call this function every time you start packing into a new target.
136 | //
137 | // There is no "shutdown" function. The 'nodes' memory must stay valid for
138 | // the following stbrp_pack_rects() call (or calls), but can be freed after
139 | // the call (or calls) finish.
140 | //
141 | // Note: to guarantee best results, either:
142 | // 1. make sure 'num_nodes' >= 'width'
143 | // or 2. call stbrp_allow_out_of_mem() defined below with 'allow_out_of_mem = 1'
144 | //
145 | // If you don't do either of the above things, widths will be quantized to multiples
146 | // of small integers to guarantee the algorithm doesn't run out of temporary storage.
147 | //
148 | // If you do #2, then the non-quantized algorithm will be used, but the algorithm
149 | // may run out of temporary storage and be unable to pack some rectangles.
150 |
151 | STBRP_DEF void stbrp_setup_allow_out_of_mem (stbrp_context *context, int allow_out_of_mem);
152 | // Optionally call this function after init but before doing any packing to
153 | // change the handling of the out-of-temp-memory scenario, described above.
154 | // If you call init again, this will be reset to the default (false).
155 |
156 |
157 | STBRP_DEF void stbrp_setup_heuristic (stbrp_context *context, int heuristic);
158 | // Optionally select which packing heuristic the library should use. Different
159 | // heuristics will produce better/worse results for different data sets.
160 | // If you call init again, this will be reset to the default.
161 |
162 | enum
163 | {
164 | STBRP_HEURISTIC_Skyline_default=0,
165 | STBRP_HEURISTIC_Skyline_BL_sortHeight = STBRP_HEURISTIC_Skyline_default,
166 | STBRP_HEURISTIC_Skyline_BF_sortHeight
167 | };
168 |
169 |
170 | //////////////////////////////////////////////////////////////////////////////
171 | //
172 | // the details of the following structures don't matter to you, but they must
173 | // be visible so you can handle the memory allocations for them
174 |
175 | struct stbrp_node
176 | {
177 | stbrp_coord x,y;
178 | stbrp_node *next;
179 | };
180 |
181 | struct stbrp_context
182 | {
183 | int width;
184 | int height;
185 | int align;
186 | int init_mode;
187 | int heuristic;
188 | int num_nodes;
189 | stbrp_node *active_head;
190 | stbrp_node *free_head;
191 | stbrp_node extra[2]; // we allocate two extra nodes so optimal user-node-count is 'width' not 'width+2'
192 | };
193 |
194 | #ifdef __cplusplus
195 | }
196 | #endif
197 |
198 | #endif
199 |
200 | //////////////////////////////////////////////////////////////////////////////
201 | //
202 | // IMPLEMENTATION SECTION
203 | //
204 |
205 | #ifdef STB_RECT_PACK_IMPLEMENTATION
206 | #ifndef STBRP_SORT
207 | #include
208 | #define STBRP_SORT qsort
209 | #endif
210 |
211 | #ifndef STBRP_ASSERT
212 | #include
213 | #define STBRP_ASSERT assert
214 | #endif
215 |
216 | // [DEAR IMGUI] Added STBRP__CDECL
217 | #ifdef _MSC_VER
218 | #define STBRP__NOTUSED(v) (void)(v)
219 | #define STBRP__CDECL __cdecl
220 | #else
221 | #define STBRP__NOTUSED(v) (void)sizeof(v)
222 | #define STBRP__CDECL
223 | #endif
224 |
225 | enum
226 | {
227 | STBRP__INIT_skyline = 1
228 | };
229 |
230 | STBRP_DEF void stbrp_setup_heuristic(stbrp_context *context, int heuristic)
231 | {
232 | switch (context->init_mode) {
233 | case STBRP__INIT_skyline:
234 | STBRP_ASSERT(heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight || heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight);
235 | context->heuristic = heuristic;
236 | break;
237 | default:
238 | STBRP_ASSERT(0);
239 | }
240 | }
241 |
242 | STBRP_DEF void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_out_of_mem)
243 | {
244 | if (allow_out_of_mem)
245 | // if it's ok to run out of memory, then don't bother aligning them;
246 | // this gives better packing, but may fail due to OOM (even though
247 | // the rectangles easily fit). @TODO a smarter approach would be to only
248 | // quantize once we've hit OOM, then we could get rid of this parameter.
249 | context->align = 1;
250 | else {
251 | // if it's not ok to run out of memory, then quantize the widths
252 | // so that num_nodes is always enough nodes.
253 | //
254 | // I.e. num_nodes * align >= width
255 | // align >= width / num_nodes
256 | // align = ceil(width/num_nodes)
257 |
258 | context->align = (context->width + context->num_nodes-1) / context->num_nodes;
259 | }
260 | }
261 |
262 | STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes)
263 | {
264 | int i;
265 | #ifndef STBRP_LARGE_RECTS
266 | STBRP_ASSERT(width <= 0xffff && height <= 0xffff);
267 | #endif
268 |
269 | for (i=0; i < num_nodes-1; ++i)
270 | nodes[i].next = &nodes[i+1];
271 | nodes[i].next = NULL;
272 | context->init_mode = STBRP__INIT_skyline;
273 | context->heuristic = STBRP_HEURISTIC_Skyline_default;
274 | context->free_head = &nodes[0];
275 | context->active_head = &context->extra[0];
276 | context->width = width;
277 | context->height = height;
278 | context->num_nodes = num_nodes;
279 | stbrp_setup_allow_out_of_mem(context, 0);
280 |
281 | // node 0 is the full width, node 1 is the sentinel (lets us not store width explicitly)
282 | context->extra[0].x = 0;
283 | context->extra[0].y = 0;
284 | context->extra[0].next = &context->extra[1];
285 | context->extra[1].x = (stbrp_coord) width;
286 | #ifdef STBRP_LARGE_RECTS
287 | context->extra[1].y = (1<<30);
288 | #else
289 | context->extra[1].y = 65535;
290 | #endif
291 | context->extra[1].next = NULL;
292 | }
293 |
294 | // find minimum y position if it starts at x1
295 | static int stbrp__skyline_find_min_y(stbrp_context *c, stbrp_node *first, int x0, int width, int *pwaste)
296 | {
297 | stbrp_node *node = first;
298 | int x1 = x0 + width;
299 | int min_y, visited_width, waste_area;
300 |
301 | STBRP__NOTUSED(c);
302 |
303 | STBRP_ASSERT(first->x <= x0);
304 |
305 | #if 0
306 | // skip in case we're past the node
307 | while (node->next->x <= x0)
308 | ++node;
309 | #else
310 | STBRP_ASSERT(node->next->x > x0); // we ended up handling this in the caller for efficiency
311 | #endif
312 |
313 | STBRP_ASSERT(node->x <= x0);
314 |
315 | min_y = 0;
316 | waste_area = 0;
317 | visited_width = 0;
318 | while (node->x < x1) {
319 | if (node->y > min_y) {
320 | // raise min_y higher.
321 | // we've accounted for all waste up to min_y,
322 | // but we'll now add more waste for everything we've visted
323 | waste_area += visited_width * (node->y - min_y);
324 | min_y = node->y;
325 | // the first time through, visited_width might be reduced
326 | if (node->x < x0)
327 | visited_width += node->next->x - x0;
328 | else
329 | visited_width += node->next->x - node->x;
330 | } else {
331 | // add waste area
332 | int under_width = node->next->x - node->x;
333 | if (under_width + visited_width > width)
334 | under_width = width - visited_width;
335 | waste_area += under_width * (min_y - node->y);
336 | visited_width += under_width;
337 | }
338 | node = node->next;
339 | }
340 |
341 | *pwaste = waste_area;
342 | return min_y;
343 | }
344 |
345 | typedef struct
346 | {
347 | int x,y;
348 | stbrp_node **prev_link;
349 | } stbrp__findresult;
350 |
351 | static stbrp__findresult stbrp__skyline_find_best_pos(stbrp_context *c, int width, int height)
352 | {
353 | int best_waste = (1<<30), best_x, best_y = (1 << 30);
354 | stbrp__findresult fr;
355 | stbrp_node **prev, *node, *tail, **best = NULL;
356 |
357 | // align to multiple of c->align
358 | width = (width + c->align - 1);
359 | width -= width % c->align;
360 | STBRP_ASSERT(width % c->align == 0);
361 |
362 | // if it can't possibly fit, bail immediately
363 | if (width > c->width || height > c->height) {
364 | fr.prev_link = NULL;
365 | fr.x = fr.y = 0;
366 | return fr;
367 | }
368 |
369 | node = c->active_head;
370 | prev = &c->active_head;
371 | while (node->x + width <= c->width) {
372 | int y,waste;
373 | y = stbrp__skyline_find_min_y(c, node, node->x, width, &waste);
374 | if (c->heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight) { // actually just want to test BL
375 | // bottom left
376 | if (y < best_y) {
377 | best_y = y;
378 | best = prev;
379 | }
380 | } else {
381 | // best-fit
382 | if (y + height <= c->height) {
383 | // can only use it if it first vertically
384 | if (y < best_y || (y == best_y && waste < best_waste)) {
385 | best_y = y;
386 | best_waste = waste;
387 | best = prev;
388 | }
389 | }
390 | }
391 | prev = &node->next;
392 | node = node->next;
393 | }
394 |
395 | best_x = (best == NULL) ? 0 : (*best)->x;
396 |
397 | // if doing best-fit (BF), we also have to try aligning right edge to each node position
398 | //
399 | // e.g, if fitting
400 | //
401 | // ____________________
402 | // |____________________|
403 | //
404 | // into
405 | //
406 | // | |
407 | // | ____________|
408 | // |____________|
409 | //
410 | // then right-aligned reduces waste, but bottom-left BL is always chooses left-aligned
411 | //
412 | // This makes BF take about 2x the time
413 |
414 | if (c->heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight) {
415 | tail = c->active_head;
416 | node = c->active_head;
417 | prev = &c->active_head;
418 | // find first node that's admissible
419 | while (tail->x < width)
420 | tail = tail->next;
421 | while (tail) {
422 | int xpos = tail->x - width;
423 | int y,waste;
424 | STBRP_ASSERT(xpos >= 0);
425 | // find the left position that matches this
426 | while (node->next->x <= xpos) {
427 | prev = &node->next;
428 | node = node->next;
429 | }
430 | STBRP_ASSERT(node->next->x > xpos && node->x <= xpos);
431 | y = stbrp__skyline_find_min_y(c, node, xpos, width, &waste);
432 | if (y + height <= c->height) {
433 | if (y <= best_y) {
434 | if (y < best_y || waste < best_waste || (waste==best_waste && xpos < best_x)) {
435 | best_x = xpos;
436 | STBRP_ASSERT(y <= best_y);
437 | best_y = y;
438 | best_waste = waste;
439 | best = prev;
440 | }
441 | }
442 | }
443 | tail = tail->next;
444 | }
445 | }
446 |
447 | fr.prev_link = best;
448 | fr.x = best_x;
449 | fr.y = best_y;
450 | return fr;
451 | }
452 |
453 | static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, int width, int height)
454 | {
455 | // find best position according to heuristic
456 | stbrp__findresult res = stbrp__skyline_find_best_pos(context, width, height);
457 | stbrp_node *node, *cur;
458 |
459 | // bail if:
460 | // 1. it failed
461 | // 2. the best node doesn't fit (we don't always check this)
462 | // 3. we're out of memory
463 | if (res.prev_link == NULL || res.y + height > context->height || context->free_head == NULL) {
464 | res.prev_link = NULL;
465 | return res;
466 | }
467 |
468 | // on success, create new node
469 | node = context->free_head;
470 | node->x = (stbrp_coord) res.x;
471 | node->y = (stbrp_coord) (res.y + height);
472 |
473 | context->free_head = node->next;
474 |
475 | // insert the new node into the right starting point, and
476 | // let 'cur' point to the remaining nodes needing to be
477 | // stiched back in
478 |
479 | cur = *res.prev_link;
480 | if (cur->x < res.x) {
481 | // preserve the existing one, so start testing with the next one
482 | stbrp_node *next = cur->next;
483 | cur->next = node;
484 | cur = next;
485 | } else {
486 | *res.prev_link = node;
487 | }
488 |
489 | // from here, traverse cur and free the nodes, until we get to one
490 | // that shouldn't be freed
491 | while (cur->next && cur->next->x <= res.x + width) {
492 | stbrp_node *next = cur->next;
493 | // move the current node to the free list
494 | cur->next = context->free_head;
495 | context->free_head = cur;
496 | cur = next;
497 | }
498 |
499 | // stitch the list back in
500 | node->next = cur;
501 |
502 | if (cur->x < res.x + width)
503 | cur->x = (stbrp_coord) (res.x + width);
504 |
505 | #ifdef _DEBUG
506 | cur = context->active_head;
507 | while (cur->x < context->width) {
508 | STBRP_ASSERT(cur->x < cur->next->x);
509 | cur = cur->next;
510 | }
511 | STBRP_ASSERT(cur->next == NULL);
512 |
513 | {
514 | int count=0;
515 | cur = context->active_head;
516 | while (cur) {
517 | cur = cur->next;
518 | ++count;
519 | }
520 | cur = context->free_head;
521 | while (cur) {
522 | cur = cur->next;
523 | ++count;
524 | }
525 | STBRP_ASSERT(count == context->num_nodes+2);
526 | }
527 | #endif
528 |
529 | return res;
530 | }
531 |
532 | // [DEAR IMGUI] Added STBRP__CDECL
533 | static int STBRP__CDECL rect_height_compare(const void *a, const void *b)
534 | {
535 | const stbrp_rect *p = (const stbrp_rect *) a;
536 | const stbrp_rect *q = (const stbrp_rect *) b;
537 | if (p->h > q->h)
538 | return -1;
539 | if (p->h < q->h)
540 | return 1;
541 | return (p->w > q->w) ? -1 : (p->w < q->w);
542 | }
543 |
544 | // [DEAR IMGUI] Added STBRP__CDECL
545 | static int STBRP__CDECL rect_original_order(const void *a, const void *b)
546 | {
547 | const stbrp_rect *p = (const stbrp_rect *) a;
548 | const stbrp_rect *q = (const stbrp_rect *) b;
549 | return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed);
550 | }
551 |
552 | #ifdef STBRP_LARGE_RECTS
553 | #define STBRP__MAXVAL 0xffffffff
554 | #else
555 | #define STBRP__MAXVAL 0xffff
556 | #endif
557 |
558 | STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects)
559 | {
560 | int i, all_rects_packed = 1;
561 |
562 | // we use the 'was_packed' field internally to allow sorting/unsorting
563 | for (i=0; i < num_rects; ++i) {
564 | rects[i].was_packed = i;
565 | }
566 |
567 | // sort according to heuristic
568 | STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_height_compare);
569 |
570 | for (i=0; i < num_rects; ++i) {
571 | if (rects[i].w == 0 || rects[i].h == 0) {
572 | rects[i].x = rects[i].y = 0; // empty rect needs no space
573 | } else {
574 | stbrp__findresult fr = stbrp__skyline_pack_rectangle(context, rects[i].w, rects[i].h);
575 | if (fr.prev_link) {
576 | rects[i].x = (stbrp_coord) fr.x;
577 | rects[i].y = (stbrp_coord) fr.y;
578 | } else {
579 | rects[i].x = rects[i].y = STBRP__MAXVAL;
580 | }
581 | }
582 | }
583 |
584 | // unsort
585 | STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_original_order);
586 |
587 | // set was_packed flags and all_rects_packed status
588 | for (i=0; i < num_rects; ++i) {
589 | rects[i].was_packed = !(rects[i].x == STBRP__MAXVAL && rects[i].y == STBRP__MAXVAL);
590 | if (!rects[i].was_packed)
591 | all_rects_packed = 0;
592 | }
593 |
594 | // return the all_rects_packed status
595 | return all_rects_packed;
596 | }
597 | #endif
598 |
599 | /*
600 | ------------------------------------------------------------------------------
601 | This software is available under 2 licenses -- choose whichever you prefer.
602 | ------------------------------------------------------------------------------
603 | ALTERNATIVE A - MIT License
604 | Copyright (c) 2017 Sean Barrett
605 | Permission is hereby granted, free of charge, to any person obtaining a copy of
606 | this software and associated documentation files (the "Software"), to deal in
607 | the Software without restriction, including without limitation the rights to
608 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
609 | of the Software, and to permit persons to whom the Software is furnished to do
610 | so, subject to the following conditions:
611 | The above copyright notice and this permission notice shall be included in all
612 | copies or substantial portions of the Software.
613 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
614 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
615 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
616 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
617 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
618 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
619 | SOFTWARE.
620 | ------------------------------------------------------------------------------
621 | ALTERNATIVE B - Public Domain (www.unlicense.org)
622 | This is free and unencumbered software released into the public domain.
623 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
624 | software, either in source code form or as a compiled binary, for any purpose,
625 | commercial or non-commercial, and by any means.
626 | In jurisdictions that recognize copyright laws, the author or authors of this
627 | software dedicate any and all copyright interest in the software to the public
628 | domain. We make this dedication for the benefit of the public at large and to
629 | the detriment of our heirs and successors. We intend this dedication to be an
630 | overt act of relinquishment in perpetuity of all present and future rights to
631 | this software under copyright law.
632 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
633 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
634 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
635 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
636 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
637 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
638 | ------------------------------------------------------------------------------
639 | */
640 |
--------------------------------------------------------------------------------
/libs/imgui/src/imgui_impl_glfw.cpp:
--------------------------------------------------------------------------------
1 | // dear imgui: Platform Backend for GLFW
2 | // This needs to be used along with a Renderer (e.g. OpenGL3, Vulkan, WebGPU..)
3 | // (Info: GLFW is a cross-platform general purpose library for handling windows, inputs, OpenGL/Vulkan graphics context creation, etc.)
4 | // (Requires: GLFW 3.1+)
5 |
6 | // Implemented features:
7 | // [X] Platform: Clipboard support.
8 | // [X] Platform: Keyboard support. Since 1.87 we are using the io.AddKeyEvent() function. Pass ImGuiKey values to all key functions e.g. ImGui::IsKeyPressed(ImGuiKey_Space). [Legacy GLFW_KEY_* values will also be supported unless IMGUI_DISABLE_OBSOLETE_KEYIO is set]
9 | // [X] Platform: Gamepad support. Enable with 'io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad'.
10 | // [X] Platform: Mouse cursor shape and visibility. Disable with 'io.ConfigFlags |= ImGuiConfigFlags_NoMouseCursorChange' (note: the resizing cursors requires GLFW 3.4+).
11 |
12 | // You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this.
13 | // Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need.
14 | // If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp.
15 | // Read online: https://github.com/ocornut/imgui/tree/master/docs
16 |
17 | // CHANGELOG
18 | // (minor and older changes stripped away, please see git history for details)
19 | // 2022-01-26: Inputs: replaced short-lived io.AddKeyModsEvent() (added two weeks ago)with io.AddKeyEvent() using ImGuiKey_ModXXX flags. Sorry for the confusion.
20 | // 2021-01-20: Inputs: calling new io.AddKeyAnalogEvent() for gamepad support, instead of writing directly to io.NavInputs[].
21 | // 2022-01-17: Inputs: calling new io.AddMousePosEvent(), io.AddMouseButtonEvent(), io.AddMouseWheelEvent() API (1.87+).
22 | // 2022-01-17: Inputs: always update key mods next and before key event (not in NewFrame) to fix input queue with very low framerates.
23 | // 2022-01-12: *BREAKING CHANGE*: Now using glfwSetCursorPosCallback(). If you called ImGui_ImplGlfw_InitXXX() with install_callbacks = false, you MUST install glfwSetCursorPosCallback() and forward it to the backend via ImGui_ImplGlfw_CursorPosCallback().
24 | // 2022-01-10: Inputs: calling new io.AddKeyEvent(), io.AddKeyModsEvent() + io.SetKeyEventNativeData() API (1.87+). Support for full ImGuiKey range.
25 | // 2022-01-05: Inputs: Converting GLFW untranslated keycodes back to translated keycodes (in the ImGui_ImplGlfw_KeyCallback() function) in order to match the behavior of every other backend, and facilitate the use of GLFW with lettered-shortcuts API.
26 | // 2021-08-17: *BREAKING CHANGE*: Now using glfwSetWindowFocusCallback() to calling io.AddFocusEvent(). If you called ImGui_ImplGlfw_InitXXX() with install_callbacks = false, you MUST install glfwSetWindowFocusCallback() and forward it to the backend via ImGui_ImplGlfw_WindowFocusCallback().
27 | // 2021-07-29: *BREAKING CHANGE*: Now using glfwSetCursorEnterCallback(). MousePos is correctly reported when the host platform window is hovered but not focused. If you called ImGui_ImplGlfw_InitXXX() with install_callbacks = false, you MUST install glfwSetWindowFocusCallback() callback and forward it to the backend via ImGui_ImplGlfw_CursorEnterCallback().
28 | // 2021-06-29: Reorganized backend to pull data from a single structure to facilitate usage with multiple-contexts (all g_XXXX access changed to bd->XXXX).
29 | // 2020-01-17: Inputs: Disable error callback while assigning mouse cursors because some X11 setup don't have them and it generates errors.
30 | // 2019-12-05: Inputs: Added support for new mouse cursors added in GLFW 3.4+ (resizing cursors, not allowed cursor).
31 | // 2019-10-18: Misc: Previously installed user callbacks are now restored on shutdown.
32 | // 2019-07-21: Inputs: Added mapping for ImGuiKey_KeyPadEnter.
33 | // 2019-05-11: Inputs: Don't filter value from character callback before calling AddInputCharacter().
34 | // 2019-03-12: Misc: Preserve DisplayFramebufferScale when main window is minimized.
35 | // 2018-11-30: Misc: Setting up io.BackendPlatformName so it can be displayed in the About Window.
36 | // 2018-11-07: Inputs: When installing our GLFW callbacks, we save user's previously installed ones - if any - and chain call them.
37 | // 2018-08-01: Inputs: Workaround for Emscripten which doesn't seem to handle focus related calls.
38 | // 2018-06-29: Inputs: Added support for the ImGuiMouseCursor_Hand cursor.
39 | // 2018-06-08: Misc: Extracted imgui_impl_glfw.cpp/.h away from the old combined GLFW+OpenGL/Vulkan examples.
40 | // 2018-03-20: Misc: Setup io.BackendFlags ImGuiBackendFlags_HasMouseCursors flag + honor ImGuiConfigFlags_NoMouseCursorChange flag.
41 | // 2018-02-20: Inputs: Added support for mouse cursors (ImGui::GetMouseCursor() value, passed to glfwSetCursor()).
42 | // 2018-02-06: Misc: Removed call to ImGui::Shutdown() which is not available from 1.60 WIP, user needs to call CreateContext/DestroyContext themselves.
43 | // 2018-02-06: Inputs: Added mapping for ImGuiKey_Space.
44 | // 2018-01-25: Inputs: Added gamepad support if ImGuiConfigFlags_NavEnableGamepad is set.
45 | // 2018-01-25: Inputs: Honoring the io.WantSetMousePos by repositioning the mouse (when using navigation and ImGuiConfigFlags_NavMoveMouse is set).
46 | // 2018-01-20: Inputs: Added Horizontal Mouse Wheel support.
47 | // 2018-01-18: Inputs: Added mapping for ImGuiKey_Insert.
48 | // 2017-08-25: Inputs: MousePos set to -FLT_MAX,-FLT_MAX when mouse is unavailable/missing (instead of -1,-1).
49 | // 2016-10-15: Misc: Added a void* user_data parameter to Clipboard function handlers.
50 |
51 | #include "imgui.h"
52 | #include "imgui_impl_glfw.h"
53 |
54 | // Clang warnings with -Weverything
55 | #if defined(__clang__)
56 | #pragma clang diagnostic push
57 | #pragma clang diagnostic ignored "-Wold-style-cast" // warning: use of old-style cast
58 | #pragma clang diagnostic ignored "-Wsign-conversion" // warning: implicit conversion changes signedness
59 | #if __has_warning("-Wzero-as-null-pointer-constant")
60 | #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
61 | #endif
62 | #endif
63 |
64 | // GLFW
65 | #include
66 | #ifdef _WIN32
67 | #undef APIENTRY
68 | #define GLFW_EXPOSE_NATIVE_WIN32
69 | #include // for glfwGetWin32Window
70 | #endif
71 | #ifdef GLFW_RESIZE_NESW_CURSOR // Let's be nice to people who pulled GLFW between 2019-04-16 (3.4 define) and 2019-11-29 (cursors defines) // FIXME: Remove when GLFW 3.4 is released?
72 | #define GLFW_HAS_NEW_CURSORS (GLFW_VERSION_MAJOR * 1000 + GLFW_VERSION_MINOR * 100 >= 3400) // 3.4+ GLFW_RESIZE_ALL_CURSOR, GLFW_RESIZE_NESW_CURSOR, GLFW_RESIZE_NWSE_CURSOR, GLFW_NOT_ALLOWED_CURSOR
73 | #else
74 | #define GLFW_HAS_NEW_CURSORS (0)
75 | #endif
76 | #define GLFW_HAS_GAMEPAD_API (GLFW_VERSION_MAJOR * 1000 + GLFW_VERSION_MINOR * 100 >= 3300) // 3.3+ glfwGetGamepadState() new api
77 | #define GLFW_HAS_GET_KEY_NAME (GLFW_VERSION_MAJOR * 1000 + GLFW_VERSION_MINOR * 100 >= 3200) // 3.2+ glfwGetKeyName()
78 |
79 | // GLFW data
80 | enum GlfwClientApi
81 | {
82 | GlfwClientApi_Unknown,
83 | GlfwClientApi_OpenGL,
84 | GlfwClientApi_Vulkan
85 | };
86 |
87 | struct ImGui_ImplGlfw_Data
88 | {
89 | GLFWwindow* Window;
90 | GlfwClientApi ClientApi;
91 | double Time;
92 | GLFWwindow* MouseWindow;
93 | GLFWcursor* MouseCursors[ImGuiMouseCursor_COUNT];
94 | bool InstalledCallbacks;
95 |
96 | // Chain GLFW callbacks: our callbacks will call the user's previously installed callbacks, if any.
97 | GLFWwindowfocusfun PrevUserCallbackWindowFocus;
98 | GLFWcursorposfun PrevUserCallbackCursorPos;
99 | GLFWcursorenterfun PrevUserCallbackCursorEnter;
100 | GLFWmousebuttonfun PrevUserCallbackMousebutton;
101 | GLFWscrollfun PrevUserCallbackScroll;
102 | GLFWkeyfun PrevUserCallbackKey;
103 | GLFWcharfun PrevUserCallbackChar;
104 | GLFWmonitorfun PrevUserCallbackMonitor;
105 |
106 | ImGui_ImplGlfw_Data() { memset(this, 0, sizeof(*this)); }
107 | };
108 |
109 | // Backend data stored in io.BackendPlatformUserData to allow support for multiple Dear ImGui contexts
110 | // It is STRONGLY preferred that you use docking branch with multi-viewports (== single Dear ImGui context + multiple windows) instead of multiple Dear ImGui contexts.
111 | // FIXME: multi-context support is not well tested and probably dysfunctional in this backend.
112 | // - Because glfwPollEvents() process all windows and some events may be called outside of it, you will need to register your own callbacks
113 | // (passing install_callbacks=false in ImGui_ImplGlfw_InitXXX functions), set the current dear imgui context and then call our callbacks.
114 | // - Otherwise we may need to store a GLFWWindow* -> ImGuiContext* map and handle this in the backend, adding a little bit of extra complexity to it.
115 | // FIXME: some shared resources (mouse cursor shape, gamepad) are mishandled when using multi-context.
116 | static ImGui_ImplGlfw_Data* ImGui_ImplGlfw_GetBackendData()
117 | {
118 | return ImGui::GetCurrentContext() ? (ImGui_ImplGlfw_Data*)ImGui::GetIO().BackendPlatformUserData : NULL;
119 | }
120 |
121 | // Functions
122 | static const char* ImGui_ImplGlfw_GetClipboardText(void* user_data)
123 | {
124 | return glfwGetClipboardString((GLFWwindow*)user_data);
125 | }
126 |
127 | static void ImGui_ImplGlfw_SetClipboardText(void* user_data, const char* text)
128 | {
129 | glfwSetClipboardString((GLFWwindow*)user_data, text);
130 | }
131 |
132 | static ImGuiKey ImGui_ImplGlfw_KeyToImGuiKey(int key)
133 | {
134 | switch (key)
135 | {
136 | case GLFW_KEY_TAB: return ImGuiKey_Tab;
137 | case GLFW_KEY_LEFT: return ImGuiKey_LeftArrow;
138 | case GLFW_KEY_RIGHT: return ImGuiKey_RightArrow;
139 | case GLFW_KEY_UP: return ImGuiKey_UpArrow;
140 | case GLFW_KEY_DOWN: return ImGuiKey_DownArrow;
141 | case GLFW_KEY_PAGE_UP: return ImGuiKey_PageUp;
142 | case GLFW_KEY_PAGE_DOWN: return ImGuiKey_PageDown;
143 | case GLFW_KEY_HOME: return ImGuiKey_Home;
144 | case GLFW_KEY_END: return ImGuiKey_End;
145 | case GLFW_KEY_INSERT: return ImGuiKey_Insert;
146 | case GLFW_KEY_DELETE: return ImGuiKey_Delete;
147 | case GLFW_KEY_BACKSPACE: return ImGuiKey_Backspace;
148 | case GLFW_KEY_SPACE: return ImGuiKey_Space;
149 | case GLFW_KEY_ENTER: return ImGuiKey_Enter;
150 | case GLFW_KEY_ESCAPE: return ImGuiKey_Escape;
151 | case GLFW_KEY_APOSTROPHE: return ImGuiKey_Apostrophe;
152 | case GLFW_KEY_COMMA: return ImGuiKey_Comma;
153 | case GLFW_KEY_MINUS: return ImGuiKey_Minus;
154 | case GLFW_KEY_PERIOD: return ImGuiKey_Period;
155 | case GLFW_KEY_SLASH: return ImGuiKey_Slash;
156 | case GLFW_KEY_SEMICOLON: return ImGuiKey_Semicolon;
157 | case GLFW_KEY_EQUAL: return ImGuiKey_Equal;
158 | case GLFW_KEY_LEFT_BRACKET: return ImGuiKey_LeftBracket;
159 | case GLFW_KEY_BACKSLASH: return ImGuiKey_Backslash;
160 | case GLFW_KEY_RIGHT_BRACKET: return ImGuiKey_RightBracket;
161 | case GLFW_KEY_GRAVE_ACCENT: return ImGuiKey_GraveAccent;
162 | case GLFW_KEY_CAPS_LOCK: return ImGuiKey_CapsLock;
163 | case GLFW_KEY_SCROLL_LOCK: return ImGuiKey_ScrollLock;
164 | case GLFW_KEY_NUM_LOCK: return ImGuiKey_NumLock;
165 | case GLFW_KEY_PRINT_SCREEN: return ImGuiKey_PrintScreen;
166 | case GLFW_KEY_PAUSE: return ImGuiKey_Pause;
167 | case GLFW_KEY_KP_0: return ImGuiKey_Keypad0;
168 | case GLFW_KEY_KP_1: return ImGuiKey_Keypad1;
169 | case GLFW_KEY_KP_2: return ImGuiKey_Keypad2;
170 | case GLFW_KEY_KP_3: return ImGuiKey_Keypad3;
171 | case GLFW_KEY_KP_4: return ImGuiKey_Keypad4;
172 | case GLFW_KEY_KP_5: return ImGuiKey_Keypad5;
173 | case GLFW_KEY_KP_6: return ImGuiKey_Keypad6;
174 | case GLFW_KEY_KP_7: return ImGuiKey_Keypad7;
175 | case GLFW_KEY_KP_8: return ImGuiKey_Keypad8;
176 | case GLFW_KEY_KP_9: return ImGuiKey_Keypad9;
177 | case GLFW_KEY_KP_DECIMAL: return ImGuiKey_KeypadDecimal;
178 | case GLFW_KEY_KP_DIVIDE: return ImGuiKey_KeypadDivide;
179 | case GLFW_KEY_KP_MULTIPLY: return ImGuiKey_KeypadMultiply;
180 | case GLFW_KEY_KP_SUBTRACT: return ImGuiKey_KeypadSubtract;
181 | case GLFW_KEY_KP_ADD: return ImGuiKey_KeypadAdd;
182 | case GLFW_KEY_KP_ENTER: return ImGuiKey_KeypadEnter;
183 | case GLFW_KEY_KP_EQUAL: return ImGuiKey_KeypadEqual;
184 | case GLFW_KEY_LEFT_SHIFT: return ImGuiKey_LeftShift;
185 | case GLFW_KEY_LEFT_CONTROL: return ImGuiKey_LeftCtrl;
186 | case GLFW_KEY_LEFT_ALT: return ImGuiKey_LeftAlt;
187 | case GLFW_KEY_LEFT_SUPER: return ImGuiKey_LeftSuper;
188 | case GLFW_KEY_RIGHT_SHIFT: return ImGuiKey_RightShift;
189 | case GLFW_KEY_RIGHT_CONTROL: return ImGuiKey_RightCtrl;
190 | case GLFW_KEY_RIGHT_ALT: return ImGuiKey_RightAlt;
191 | case GLFW_KEY_RIGHT_SUPER: return ImGuiKey_RightSuper;
192 | case GLFW_KEY_MENU: return ImGuiKey_Menu;
193 | case GLFW_KEY_0: return ImGuiKey_0;
194 | case GLFW_KEY_1: return ImGuiKey_1;
195 | case GLFW_KEY_2: return ImGuiKey_2;
196 | case GLFW_KEY_3: return ImGuiKey_3;
197 | case GLFW_KEY_4: return ImGuiKey_4;
198 | case GLFW_KEY_5: return ImGuiKey_5;
199 | case GLFW_KEY_6: return ImGuiKey_6;
200 | case GLFW_KEY_7: return ImGuiKey_7;
201 | case GLFW_KEY_8: return ImGuiKey_8;
202 | case GLFW_KEY_9: return ImGuiKey_9;
203 | case GLFW_KEY_A: return ImGuiKey_A;
204 | case GLFW_KEY_B: return ImGuiKey_B;
205 | case GLFW_KEY_C: return ImGuiKey_C;
206 | case GLFW_KEY_D: return ImGuiKey_D;
207 | case GLFW_KEY_E: return ImGuiKey_E;
208 | case GLFW_KEY_F: return ImGuiKey_F;
209 | case GLFW_KEY_G: return ImGuiKey_G;
210 | case GLFW_KEY_H: return ImGuiKey_H;
211 | case GLFW_KEY_I: return ImGuiKey_I;
212 | case GLFW_KEY_J: return ImGuiKey_J;
213 | case GLFW_KEY_K: return ImGuiKey_K;
214 | case GLFW_KEY_L: return ImGuiKey_L;
215 | case GLFW_KEY_M: return ImGuiKey_M;
216 | case GLFW_KEY_N: return ImGuiKey_N;
217 | case GLFW_KEY_O: return ImGuiKey_O;
218 | case GLFW_KEY_P: return ImGuiKey_P;
219 | case GLFW_KEY_Q: return ImGuiKey_Q;
220 | case GLFW_KEY_R: return ImGuiKey_R;
221 | case GLFW_KEY_S: return ImGuiKey_S;
222 | case GLFW_KEY_T: return ImGuiKey_T;
223 | case GLFW_KEY_U: return ImGuiKey_U;
224 | case GLFW_KEY_V: return ImGuiKey_V;
225 | case GLFW_KEY_W: return ImGuiKey_W;
226 | case GLFW_KEY_X: return ImGuiKey_X;
227 | case GLFW_KEY_Y: return ImGuiKey_Y;
228 | case GLFW_KEY_Z: return ImGuiKey_Z;
229 | case GLFW_KEY_F1: return ImGuiKey_F1;
230 | case GLFW_KEY_F2: return ImGuiKey_F2;
231 | case GLFW_KEY_F3: return ImGuiKey_F3;
232 | case GLFW_KEY_F4: return ImGuiKey_F4;
233 | case GLFW_KEY_F5: return ImGuiKey_F5;
234 | case GLFW_KEY_F6: return ImGuiKey_F6;
235 | case GLFW_KEY_F7: return ImGuiKey_F7;
236 | case GLFW_KEY_F8: return ImGuiKey_F8;
237 | case GLFW_KEY_F9: return ImGuiKey_F9;
238 | case GLFW_KEY_F10: return ImGuiKey_F10;
239 | case GLFW_KEY_F11: return ImGuiKey_F11;
240 | case GLFW_KEY_F12: return ImGuiKey_F12;
241 | default: return ImGuiKey_None;
242 | }
243 | }
244 |
245 | static void ImGui_ImplGlfw_UpdateKeyModifiers(int mods)
246 | {
247 | ImGuiIO& io = ImGui::GetIO();
248 | io.AddKeyEvent(ImGuiKey_ModCtrl, (mods & GLFW_MOD_CONTROL) != 0);
249 | io.AddKeyEvent(ImGuiKey_ModShift, (mods & GLFW_MOD_SHIFT) != 0);
250 | io.AddKeyEvent(ImGuiKey_ModAlt, (mods & GLFW_MOD_ALT) != 0);
251 | io.AddKeyEvent(ImGuiKey_ModSuper, (mods & GLFW_MOD_SUPER) != 0);
252 | }
253 |
254 | void ImGui_ImplGlfw_MouseButtonCallback(GLFWwindow* window, int button, int action, int mods)
255 | {
256 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
257 | if (bd->PrevUserCallbackMousebutton != NULL && window == bd->Window)
258 | bd->PrevUserCallbackMousebutton(window, button, action, mods);
259 |
260 | ImGui_ImplGlfw_UpdateKeyModifiers(mods);
261 |
262 | ImGuiIO& io = ImGui::GetIO();
263 | if (button >= 0 && button < ImGuiMouseButton_COUNT)
264 | io.AddMouseButtonEvent(button, action == GLFW_PRESS);
265 | }
266 |
267 | void ImGui_ImplGlfw_ScrollCallback(GLFWwindow* window, double xoffset, double yoffset)
268 | {
269 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
270 | if (bd->PrevUserCallbackScroll != NULL && window == bd->Window)
271 | bd->PrevUserCallbackScroll(window, xoffset, yoffset);
272 |
273 | ImGuiIO& io = ImGui::GetIO();
274 | io.AddMouseWheelEvent((float)xoffset, (float)yoffset);
275 | }
276 |
277 | static int ImGui_ImplGlfw_TranslateUntranslatedKey(int key, int scancode)
278 | {
279 | #if GLFW_HAS_GET_KEY_NAME && !defined(__EMSCRIPTEN__)
280 | // GLFW 3.1+ attempts to "untranslate" keys, which goes the opposite of what every other framework does, making using lettered shortcuts difficult.
281 | // (It had reasons to do so: namely GLFW is/was more likely to be used for WASD-type game controls rather than lettered shortcuts, but IHMO the 3.1 change could have been done differently)
282 | // See https://github.com/glfw/glfw/issues/1502 for details.
283 | // Adding a workaround to undo this (so our keys are translated->untranslated->translated, likely a lossy process).
284 | // This won't cover edge cases but this is at least going to cover common cases.
285 | const char* key_name = glfwGetKeyName(key, scancode);
286 | if (key_name && key_name[0] != 0 && key_name[1] == 0)
287 | {
288 | const char char_names[] = "`-=[]\\,;\'./";
289 | const int char_keys[] = { GLFW_KEY_GRAVE_ACCENT, GLFW_KEY_MINUS, GLFW_KEY_EQUAL, GLFW_KEY_LEFT_BRACKET, GLFW_KEY_RIGHT_BRACKET, GLFW_KEY_BACKSLASH, GLFW_KEY_COMMA, GLFW_KEY_SEMICOLON, GLFW_KEY_APOSTROPHE, GLFW_KEY_PERIOD, GLFW_KEY_SLASH, 0 };
290 | IM_ASSERT(IM_ARRAYSIZE(char_names) == IM_ARRAYSIZE(char_keys));
291 | if (key_name[0] >= '0' && key_name[0] <= '9') { key = GLFW_KEY_0 + (key_name[0] - '0'); }
292 | else if (key_name[0] >= 'A' && key_name[0] <= 'Z') { key = GLFW_KEY_A + (key_name[0] - 'A'); }
293 | else if (const char* p = strchr(char_names, key_name[0])) { key = char_keys[p - char_names]; }
294 | }
295 | // if (action == GLFW_PRESS) printf("key %d scancode %d name '%s'\n", key, scancode, key_name);
296 | #else
297 | IM_UNUSED(scancode);
298 | #endif
299 | return key;
300 | }
301 |
302 | void ImGui_ImplGlfw_KeyCallback(GLFWwindow* window, int keycode, int scancode, int action, int mods)
303 | {
304 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
305 | if (bd->PrevUserCallbackKey != NULL && window == bd->Window)
306 | bd->PrevUserCallbackKey(window, keycode, scancode, action, mods);
307 |
308 | if (action != GLFW_PRESS && action != GLFW_RELEASE)
309 | return;
310 |
311 | ImGui_ImplGlfw_UpdateKeyModifiers(mods);
312 |
313 | keycode = ImGui_ImplGlfw_TranslateUntranslatedKey(keycode, scancode);
314 |
315 | ImGuiIO& io = ImGui::GetIO();
316 | ImGuiKey imgui_key = ImGui_ImplGlfw_KeyToImGuiKey(keycode);
317 | io.AddKeyEvent(imgui_key, (action == GLFW_PRESS));
318 | io.SetKeyEventNativeData(imgui_key, keycode, scancode); // To support legacy indexing (<1.87 user code)
319 | }
320 |
321 | void ImGui_ImplGlfw_WindowFocusCallback(GLFWwindow* window, int focused)
322 | {
323 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
324 | if (bd->PrevUserCallbackWindowFocus != NULL && window == bd->Window)
325 | bd->PrevUserCallbackWindowFocus(window, focused);
326 |
327 | ImGuiIO& io = ImGui::GetIO();
328 | io.AddFocusEvent(focused != 0);
329 | }
330 |
331 | void ImGui_ImplGlfw_CursorPosCallback(GLFWwindow* window, double x, double y)
332 | {
333 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
334 | if (bd->PrevUserCallbackCursorPos != NULL && window == bd->Window)
335 | bd->PrevUserCallbackCursorPos(window, x, y);
336 |
337 | ImGuiIO& io = ImGui::GetIO();
338 | io.AddMousePosEvent((float)x, (float)y);
339 | }
340 |
341 | void ImGui_ImplGlfw_CursorEnterCallback(GLFWwindow* window, int entered)
342 | {
343 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
344 | if (bd->PrevUserCallbackCursorEnter != NULL && window == bd->Window)
345 | bd->PrevUserCallbackCursorEnter(window, entered);
346 |
347 | ImGuiIO& io = ImGui::GetIO();
348 | if (entered)
349 | bd->MouseWindow = window;
350 | if (!entered && bd->MouseWindow == window)
351 | {
352 | bd->MouseWindow = NULL;
353 | io.AddMousePosEvent(-FLT_MAX, -FLT_MAX);
354 | }
355 | }
356 |
357 | void ImGui_ImplGlfw_CharCallback(GLFWwindow* window, unsigned int c)
358 | {
359 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
360 | if (bd->PrevUserCallbackChar != NULL && window == bd->Window)
361 | bd->PrevUserCallbackChar(window, c);
362 |
363 | ImGuiIO& io = ImGui::GetIO();
364 | io.AddInputCharacter(c);
365 | }
366 |
367 | void ImGui_ImplGlfw_MonitorCallback(GLFWmonitor*, int)
368 | {
369 | // Unused in 'master' branch but 'docking' branch will use this, so we declare it ahead of it so if you have to install callbacks you can install this one too.
370 | }
371 |
372 | static bool ImGui_ImplGlfw_Init(GLFWwindow* window, bool install_callbacks, GlfwClientApi client_api)
373 | {
374 | ImGuiIO& io = ImGui::GetIO();
375 | IM_ASSERT(io.BackendPlatformUserData == NULL && "Already initialized a platform backend!");
376 |
377 | // Setup backend capabilities flags
378 | ImGui_ImplGlfw_Data* bd = IM_NEW(ImGui_ImplGlfw_Data)();
379 | io.BackendPlatformUserData = (void*)bd;
380 | io.BackendPlatformName = "imgui_impl_glfw";
381 | io.BackendFlags |= ImGuiBackendFlags_HasMouseCursors; // We can honor GetMouseCursor() values (optional)
382 | io.BackendFlags |= ImGuiBackendFlags_HasSetMousePos; // We can honor io.WantSetMousePos requests (optional, rarely used)
383 |
384 | bd->Window = window;
385 | bd->Time = 0.0;
386 |
387 | io.SetClipboardTextFn = ImGui_ImplGlfw_SetClipboardText;
388 | io.GetClipboardTextFn = ImGui_ImplGlfw_GetClipboardText;
389 | io.ClipboardUserData = bd->Window;
390 |
391 | // Set platform dependent data in viewport
392 | #if defined(_WIN32)
393 | ImGui::GetMainViewport()->PlatformHandleRaw = (void*)glfwGetWin32Window(bd->Window);
394 | #endif
395 |
396 | // Create mouse cursors
397 | // (By design, on X11 cursors are user configurable and some cursors may be missing. When a cursor doesn't exist,
398 | // GLFW will emit an error which will often be printed by the app, so we temporarily disable error reporting.
399 | // Missing cursors will return NULL and our _UpdateMouseCursor() function will use the Arrow cursor instead.)
400 | GLFWerrorfun prev_error_callback = glfwSetErrorCallback(NULL);
401 | bd->MouseCursors[ImGuiMouseCursor_Arrow] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR);
402 | bd->MouseCursors[ImGuiMouseCursor_TextInput] = glfwCreateStandardCursor(GLFW_IBEAM_CURSOR);
403 | bd->MouseCursors[ImGuiMouseCursor_ResizeNS] = glfwCreateStandardCursor(GLFW_VRESIZE_CURSOR);
404 | bd->MouseCursors[ImGuiMouseCursor_ResizeEW] = glfwCreateStandardCursor(GLFW_HRESIZE_CURSOR);
405 | bd->MouseCursors[ImGuiMouseCursor_Hand] = glfwCreateStandardCursor(GLFW_HAND_CURSOR);
406 | #if GLFW_HAS_NEW_CURSORS
407 | bd->MouseCursors[ImGuiMouseCursor_ResizeAll] = glfwCreateStandardCursor(GLFW_RESIZE_ALL_CURSOR);
408 | bd->MouseCursors[ImGuiMouseCursor_ResizeNESW] = glfwCreateStandardCursor(GLFW_RESIZE_NESW_CURSOR);
409 | bd->MouseCursors[ImGuiMouseCursor_ResizeNWSE] = glfwCreateStandardCursor(GLFW_RESIZE_NWSE_CURSOR);
410 | bd->MouseCursors[ImGuiMouseCursor_NotAllowed] = glfwCreateStandardCursor(GLFW_NOT_ALLOWED_CURSOR);
411 | #else
412 | bd->MouseCursors[ImGuiMouseCursor_ResizeAll] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR);
413 | bd->MouseCursors[ImGuiMouseCursor_ResizeNESW] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR);
414 | bd->MouseCursors[ImGuiMouseCursor_ResizeNWSE] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR);
415 | bd->MouseCursors[ImGuiMouseCursor_NotAllowed] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR);
416 | #endif
417 | glfwSetErrorCallback(prev_error_callback);
418 |
419 | // Chain GLFW callbacks: our callbacks will call the user's previously installed callbacks, if any.
420 | bd->PrevUserCallbackWindowFocus = NULL;
421 | bd->PrevUserCallbackCursorEnter = NULL;
422 | bd->PrevUserCallbackMousebutton = NULL;
423 | bd->PrevUserCallbackScroll = NULL;
424 | bd->PrevUserCallbackKey = NULL;
425 | bd->PrevUserCallbackChar = NULL;
426 | bd->PrevUserCallbackMonitor = NULL;
427 | if (install_callbacks)
428 | {
429 | bd->InstalledCallbacks = true;
430 | bd->PrevUserCallbackWindowFocus = glfwSetWindowFocusCallback(window, ImGui_ImplGlfw_WindowFocusCallback);
431 | bd->PrevUserCallbackCursorEnter = glfwSetCursorEnterCallback(window, ImGui_ImplGlfw_CursorEnterCallback);
432 | bd->PrevUserCallbackCursorPos = glfwSetCursorPosCallback(window, ImGui_ImplGlfw_CursorPosCallback);
433 | bd->PrevUserCallbackMousebutton = glfwSetMouseButtonCallback(window, ImGui_ImplGlfw_MouseButtonCallback);
434 | bd->PrevUserCallbackScroll = glfwSetScrollCallback(window, ImGui_ImplGlfw_ScrollCallback);
435 | bd->PrevUserCallbackKey = glfwSetKeyCallback(window, ImGui_ImplGlfw_KeyCallback);
436 | bd->PrevUserCallbackChar = glfwSetCharCallback(window, ImGui_ImplGlfw_CharCallback);
437 | bd->PrevUserCallbackMonitor = glfwSetMonitorCallback(ImGui_ImplGlfw_MonitorCallback);
438 | }
439 |
440 | bd->ClientApi = client_api;
441 | return true;
442 | }
443 |
444 | bool ImGui_ImplGlfw_InitForOpenGL(GLFWwindow* window, bool install_callbacks)
445 | {
446 | return ImGui_ImplGlfw_Init(window, install_callbacks, GlfwClientApi_OpenGL);
447 | }
448 |
449 | bool ImGui_ImplGlfw_InitForVulkan(GLFWwindow* window, bool install_callbacks)
450 | {
451 | return ImGui_ImplGlfw_Init(window, install_callbacks, GlfwClientApi_Vulkan);
452 | }
453 |
454 | bool ImGui_ImplGlfw_InitForOther(GLFWwindow* window, bool install_callbacks)
455 | {
456 | return ImGui_ImplGlfw_Init(window, install_callbacks, GlfwClientApi_Unknown);
457 | }
458 |
459 | void ImGui_ImplGlfw_Shutdown()
460 | {
461 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
462 | IM_ASSERT(bd != NULL && "No platform backend to shutdown, or already shutdown?");
463 | ImGuiIO& io = ImGui::GetIO();
464 |
465 | if (bd->InstalledCallbacks)
466 | {
467 | glfwSetWindowFocusCallback(bd->Window, bd->PrevUserCallbackWindowFocus);
468 | glfwSetCursorEnterCallback(bd->Window, bd->PrevUserCallbackCursorEnter);
469 | glfwSetCursorPosCallback(bd->Window, bd->PrevUserCallbackCursorPos);
470 | glfwSetMouseButtonCallback(bd->Window, bd->PrevUserCallbackMousebutton);
471 | glfwSetScrollCallback(bd->Window, bd->PrevUserCallbackScroll);
472 | glfwSetKeyCallback(bd->Window, bd->PrevUserCallbackKey);
473 | glfwSetCharCallback(bd->Window, bd->PrevUserCallbackChar);
474 | glfwSetMonitorCallback(bd->PrevUserCallbackMonitor);
475 | }
476 |
477 | for (ImGuiMouseCursor cursor_n = 0; cursor_n < ImGuiMouseCursor_COUNT; cursor_n++)
478 | glfwDestroyCursor(bd->MouseCursors[cursor_n]);
479 |
480 | io.BackendPlatformName = NULL;
481 | io.BackendPlatformUserData = NULL;
482 | IM_DELETE(bd);
483 | }
484 |
485 | static void ImGui_ImplGlfw_UpdateMouseData()
486 | {
487 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
488 | ImGuiIO& io = ImGui::GetIO();
489 |
490 | #ifdef __EMSCRIPTEN__
491 | const bool is_app_focused = true;
492 | #else
493 | const bool is_app_focused = glfwGetWindowAttrib(bd->Window, GLFW_FOCUSED) != 0;
494 | #endif
495 | if (is_app_focused)
496 | {
497 | // (Optional) Set OS mouse position from Dear ImGui if requested (rarely used, only when ImGuiConfigFlags_NavEnableSetMousePos is enabled by user)
498 | if (io.WantSetMousePos)
499 | glfwSetCursorPos(bd->Window, (double)io.MousePos.x, (double)io.MousePos.y);
500 |
501 | // (Optional) Fallback to provide mouse position when focused (ImGui_ImplGlfw_CursorPosCallback already provides this when hovered or captured)
502 | if (is_app_focused && bd->MouseWindow == NULL)
503 | {
504 | double mouse_x, mouse_y;
505 | glfwGetCursorPos(bd->Window, &mouse_x, &mouse_y);
506 | io.AddMousePosEvent((float)mouse_x, (float)mouse_y);
507 | }
508 | }
509 | }
510 |
511 | static void ImGui_ImplGlfw_UpdateMouseCursor()
512 | {
513 | ImGuiIO& io = ImGui::GetIO();
514 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
515 | if ((io.ConfigFlags & ImGuiConfigFlags_NoMouseCursorChange) || glfwGetInputMode(bd->Window, GLFW_CURSOR) == GLFW_CURSOR_DISABLED)
516 | return;
517 |
518 | ImGuiMouseCursor imgui_cursor = ImGui::GetMouseCursor();
519 | if (imgui_cursor == ImGuiMouseCursor_None || io.MouseDrawCursor)
520 | {
521 | // Hide OS mouse cursor if imgui is drawing it or if it wants no cursor
522 | glfwSetInputMode(bd->Window, GLFW_CURSOR, GLFW_CURSOR_HIDDEN);
523 | }
524 | else
525 | {
526 | // Show OS mouse cursor
527 | // FIXME-PLATFORM: Unfocused windows seems to fail changing the mouse cursor with GLFW 3.2, but 3.3 works here.
528 | glfwSetCursor(bd->Window, bd->MouseCursors[imgui_cursor] ? bd->MouseCursors[imgui_cursor] : bd->MouseCursors[ImGuiMouseCursor_Arrow]);
529 | glfwSetInputMode(bd->Window, GLFW_CURSOR, GLFW_CURSOR_NORMAL);
530 | }
531 | }
532 |
533 | // Update gamepad inputs
534 | static inline float Saturate(float v) { return v < 0.0f ? 0.0f : v > 1.0f ? 1.0f : v; }
535 | static void ImGui_ImplGlfw_UpdateGamepads()
536 | {
537 | ImGuiIO& io = ImGui::GetIO();
538 | if ((io.ConfigFlags & ImGuiConfigFlags_NavEnableGamepad) == 0)
539 | return;
540 |
541 | io.BackendFlags &= ~ImGuiBackendFlags_HasGamepad;
542 | #if GLFW_HAS_GAMEPAD_API
543 | GLFWgamepadstate gamepad;
544 | if (!glfwGetGamepadState(GLFW_JOYSTICK_1, &gamepad))
545 | return;
546 | #define MAP_BUTTON(KEY_NO, BUTTON_NO, _UNUSED) do { io.AddKeyEvent(KEY_NO, gamepad.buttons[BUTTON_NO] != 0); } while (0)
547 | #define MAP_ANALOG(KEY_NO, AXIS_NO, _UNUSED, V0, V1) do { float v = gamepad.axes[AXIS_NO]; v = (v - V0) / (V1 - V0); io.AddKeyAnalogEvent(KEY_NO, v > 0.10f, Saturate(v)); } while (0)
548 | #else
549 | int axes_count = 0, buttons_count = 0;
550 | const float* axes = glfwGetJoystickAxes(GLFW_JOYSTICK_1, &axes_count);
551 | const unsigned char* buttons = glfwGetJoystickButtons(GLFW_JOYSTICK_1, &buttons_count);
552 | if (axes_count == 0 || buttons_count == 0)
553 | return;
554 | #define MAP_BUTTON(KEY_NO, _UNUSED, BUTTON_NO) do { io.AddKeyEvent(KEY_NO, (buttons_count > BUTTON_NO && buttons[BUTTON_NO] == GLFW_PRESS)); } while (0)
555 | #define MAP_ANALOG(KEY_NO, _UNUSED, AXIS_NO, V0, V1) do { float v = (axes_count > AXIS_NO) ? axes[AXIS_NO] : V0; v = (v - V0) / (V1 - V0); io.AddKeyAnalogEvent(KEY_NO, v > 0.10f, Saturate(v)); } while (0)
556 | #endif
557 | io.BackendFlags |= ImGuiBackendFlags_HasGamepad;
558 | MAP_BUTTON(ImGuiKey_GamepadStart, GLFW_GAMEPAD_BUTTON_START, 7);
559 | MAP_BUTTON(ImGuiKey_GamepadBack, GLFW_GAMEPAD_BUTTON_BACK, 6);
560 | MAP_BUTTON(ImGuiKey_GamepadFaceDown, GLFW_GAMEPAD_BUTTON_A, 0); // Xbox A, PS Cross
561 | MAP_BUTTON(ImGuiKey_GamepadFaceRight, GLFW_GAMEPAD_BUTTON_B, 1); // Xbox B, PS Circle
562 | MAP_BUTTON(ImGuiKey_GamepadFaceLeft, GLFW_GAMEPAD_BUTTON_X, 2); // Xbox X, PS Square
563 | MAP_BUTTON(ImGuiKey_GamepadFaceUp, GLFW_GAMEPAD_BUTTON_Y, 3); // Xbox Y, PS Triangle
564 | MAP_BUTTON(ImGuiKey_GamepadDpadLeft, GLFW_GAMEPAD_BUTTON_DPAD_LEFT, 13);
565 | MAP_BUTTON(ImGuiKey_GamepadDpadRight, GLFW_GAMEPAD_BUTTON_DPAD_RIGHT, 11);
566 | MAP_BUTTON(ImGuiKey_GamepadDpadUp, GLFW_GAMEPAD_BUTTON_DPAD_UP, 10);
567 | MAP_BUTTON(ImGuiKey_GamepadDpadDown, GLFW_GAMEPAD_BUTTON_DPAD_DOWN, 12);
568 | MAP_BUTTON(ImGuiKey_GamepadL1, GLFW_GAMEPAD_BUTTON_LEFT_BUMPER, 4);
569 | MAP_BUTTON(ImGuiKey_GamepadR1, GLFW_GAMEPAD_BUTTON_RIGHT_BUMPER, 5);
570 | MAP_ANALOG(ImGuiKey_GamepadL2, GLFW_GAMEPAD_AXIS_LEFT_TRIGGER, 4, -0.75f, +1.0f);
571 | MAP_ANALOG(ImGuiKey_GamepadR2, GLFW_GAMEPAD_AXIS_RIGHT_TRIGGER, 5, -0.75f, +1.0f);
572 | MAP_BUTTON(ImGuiKey_GamepadL3, GLFW_GAMEPAD_BUTTON_LEFT_THUMB, 8);
573 | MAP_BUTTON(ImGuiKey_GamepadR3, GLFW_GAMEPAD_BUTTON_RIGHT_THUMB, 9);
574 | MAP_ANALOG(ImGuiKey_GamepadLStickLeft, GLFW_GAMEPAD_AXIS_LEFT_X, 0, -0.25f, -1.0f);
575 | MAP_ANALOG(ImGuiKey_GamepadLStickRight, GLFW_GAMEPAD_AXIS_LEFT_X, 0, +0.25f, +1.0f);
576 | MAP_ANALOG(ImGuiKey_GamepadLStickUp, GLFW_GAMEPAD_AXIS_LEFT_Y, 1, -0.25f, -1.0f);
577 | MAP_ANALOG(ImGuiKey_GamepadLStickDown, GLFW_GAMEPAD_AXIS_LEFT_Y, 1, +0.25f, +1.0f);
578 | MAP_ANALOG(ImGuiKey_GamepadRStickLeft, GLFW_GAMEPAD_AXIS_RIGHT_X, 2, -0.25f, -1.0f);
579 | MAP_ANALOG(ImGuiKey_GamepadRStickRight, GLFW_GAMEPAD_AXIS_RIGHT_X, 2, +0.25f, +1.0f);
580 | MAP_ANALOG(ImGuiKey_GamepadRStickUp, GLFW_GAMEPAD_AXIS_RIGHT_Y, 3, -0.25f, -1.0f);
581 | MAP_ANALOG(ImGuiKey_GamepadRStickDown, GLFW_GAMEPAD_AXIS_RIGHT_Y, 3, +0.25f, +1.0f);
582 | #undef MAP_BUTTON
583 | #undef MAP_ANALOG
584 | }
585 |
586 | void ImGui_ImplGlfw_NewFrame()
587 | {
588 | ImGuiIO& io = ImGui::GetIO();
589 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData();
590 | IM_ASSERT(bd != NULL && "Did you call ImGui_ImplGlfw_InitForXXX()?");
591 |
592 | // Setup display size (every frame to accommodate for window resizing)
593 | int w, h;
594 | int display_w, display_h;
595 | glfwGetWindowSize(bd->Window, &w, &h);
596 | glfwGetFramebufferSize(bd->Window, &display_w, &display_h);
597 | io.DisplaySize = ImVec2((float)w, (float)h);
598 | if (w > 0 && h > 0)
599 | io.DisplayFramebufferScale = ImVec2((float)display_w / (float)w, (float)display_h / (float)h);
600 |
601 | // Setup time step
602 | double current_time = glfwGetTime();
603 | io.DeltaTime = bd->Time > 0.0 ? (float)(current_time - bd->Time) : (float)(1.0f / 60.0f);
604 | bd->Time = current_time;
605 |
606 | ImGui_ImplGlfw_UpdateMouseData();
607 | ImGui_ImplGlfw_UpdateMouseCursor();
608 |
609 | // Update game controllers (if enabled and available)
610 | ImGui_ImplGlfw_UpdateGamepads();
611 | }
612 |
613 | #if defined(__clang__)
614 | #pragma clang diagnostic pop
615 | #endif
616 |
--------------------------------------------------------------------------------
/scripts/build_cuda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (C) 2022 Codeplay Software Limited
4 | # This work is licensed under the terms of the MIT license.
5 | # For a copy, see https://opensource.org/licenses/MIT.
6 |
7 | BUILD_DIR="build_cuda"
8 | render=on
9 |
10 | if [ -n "$1" ]; then
11 | if [ "$1" = "no_render" ]; then
12 | render=off
13 | else
14 | echo "Unknown param $1"
15 | exit
16 | fi
17 | fi
18 |
19 | rm -rf $BUILD_DIR
20 | mkdir $BUILD_DIR
21 | cd $BUILD_DIR || exit
22 |
23 | cmake ../ \
24 | -DRENDER=${render} \
25 | -DGLEW_LIBRARY=/usr/lib/x86_64-linux-gnu/libGLEW.so \
26 | -DCMAKE_EXPORT_COMPILE_COMMANDS=on || exit
27 |
28 | make release
29 |
--------------------------------------------------------------------------------
/scripts/build_dpcpp.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (C) 2022 Codeplay Software Limited
4 | # This work is licensed under the terms of the MIT license.
5 | # For a copy, see https://opensource.org/licenses/MIT.
6 |
7 | BUILD_DIR="build_dpcpp"
8 | render=on
9 |
10 | if [ -n "$1" ]; then
11 | if [ "$1" = "no_render" ]; then
12 | render=off
13 | else
14 | echo "Unknown param $1"
15 | exit
16 | fi
17 | fi
18 |
19 | rm -rf $BUILD_DIR
20 | mkdir $BUILD_DIR
21 | cd $BUILD_DIR || exit
22 |
23 | #CXX=clang++ \
24 | #CC=clang \
25 | CXX=icpx \
26 | CC=icpx \
27 | cmake ../ \
28 | -DRENDER=${render} \
29 | -DGLEW_LIBRARY=/usr/lib/x86_64-linux-gnu/libGLEW.so \
30 | -DBACKEND=DPCPP -DDPCPP_CUDA_SUPPORT=on || exit
31 |
32 | make release
33 |
--------------------------------------------------------------------------------
/scripts/docker_build_etc.sh:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 Codeplay Software Limited
2 | # This work is licensed under the terms of the MIT license.
3 | # For a copy, see https://opensource.org/licenses/MIT.
4 |
5 | # non-functional code! This is a sketch of how to do the dpct conversion properly in a container
6 | #
7 | # Should be run with something like:
8 | #
9 | # docker run --rm \
10 | # -v /opt/intel/oneapi/:/opt/intel/oneapi/ \
11 | # -v $PWD:$PWD \
12 | # -u $UID \
13 | # -i joeatodd/onednn-cuda \
14 | # bash < scripts/docker_build_etc.sh
15 |
16 |
17 | # Navigate to relevant directory
18 |
19 | cd $SRC_DIR
20 |
21 | # Call cmake on it
22 | bash scripts/build_cuda.sh
23 |
24 | # Call "intercept-build make" in build dir
25 | cd build
26 | make clean
27 | intercept-build make
28 |
29 | # Do conversion w/ -p
30 |
--------------------------------------------------------------------------------
/scripts/perf_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (C) 2022 Codeplay Software Limited
4 | # This work is licensed under the terms of the MIT license.
5 | # For a copy, see https://opensource.org/licenses/MIT.
6 |
7 | # Get rid of any previous virtual frame buffer
8 | pkill -9 Xvfb
9 | rm /var/tmp/Xvfb_screen_0
10 |
11 | # Create a virtual screen :99.0 with given dimensions & color depth
12 | # mapping output to /var/tmp/Xvfb_screen_0
13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp &
14 |
15 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 &
16 | DISPLAY=:99.0 SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 &
17 | #DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0 &
18 |
19 | # To take a screenshot instead of a video (doesn't always work):
20 | # sleep 2
21 | # DISPLAY=:99 xwd -root -silent | convert xwd:- png:/tmp/screenshot.png
22 |
23 | # Use the x11grab device to write to video file
24 | ffmpeg -video_size 1920x1080 -framerate 25 -f x11grab -i :99.0+0,0 output.mp4
25 |
--------------------------------------------------------------------------------
/scripts/perf_test_cuda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (C) 2022 Codeplay Software Limited
4 | # This work is licensed under the terms of the MIT license.
5 | # For a copy, see https://opensource.org/licenses/MIT.
6 |
7 | # Get rid of any previous virtual frame buffer
8 | pkill -9 Xvfb
9 | rm /var/tmp/Xvfb_screen_0
10 |
11 | # Create a virtual screen :99.0 with given dimensions & color depth
12 | # mapping output to /var/tmp/Xvfb_screen_0
13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp &
14 |
15 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 &
16 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 &
17 | DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0
18 |
--------------------------------------------------------------------------------
/scripts/perf_test_dpcpp.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (C) 2022 Codeplay Software Limited
4 | # This work is licensed under the terms of the MIT license.
5 | # For a copy, see https://opensource.org/licenses/MIT.
6 |
7 | # Get rid of any previous virtual frame buffer
8 | pkill -9 Xvfb
9 | rm /var/tmp/Xvfb_screen_0
10 |
11 | # Create a virtual screen :99.0 with given dimensions & color depth
12 | # mapping output to /var/tmp/Xvfb_screen_0
13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp &
14 |
15 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 &
16 | DISPLAY=:99.0 SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0
17 | #DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0 &
18 |
--------------------------------------------------------------------------------
/scripts/run_dpct.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (C) 2022 Codeplay Software Limited
4 | # This work is licensed under the terms of the MIT license.
5 | # For a copy, see https://opensource.org/licenses/MIT.
6 |
7 | # This script converts the project's CUDA code to SYCL code. The DPC++ compatibility tool offers options
8 | # for intercepting complex builds, but current dev environment restrictions require me to run dpct inside
9 | # a docker container. This complicates things, so for now I'm just doing single source conversion on the
10 | # simulator.cu file.
11 | #
12 | # The option --assume-nd-range-dim=1 prevents dpct from converting CUDA 1D ranges into SYCL 3D ranges.
13 | # It's not totally clear why the default behaviour isn't just to keep the CUDA dimensionality.
14 | #
15 | # The custom helper header files referred to by the --use-custom-helper flag are already part of this repo.
16 | # As such, we suppress generation of new helper
17 | # headers when calling dpct with `--use-custom-helper=none`.
18 |
19 | rm src_sycl/*.[ch]pp src_sycl/*.yaml
20 | cd src_sycl; ln -s ../src/*[ch]pp .; cd -
21 |
22 | docker run --rm \
23 | -v /opt/intel/oneapi/dpcpp-ct/2023.1.0/:/dpcpp-ct \
24 | -v $PWD:/nbody/ \
25 | -u $UID \
26 | -it joeatodd/onednn-cuda \
27 | /dpcpp-ct/bin/dpct --out-root=/nbody/src_sycl \
28 | --assume-nd-range-dim=1 \
29 | --use-custom-helper=none \
30 | --stop-on-parse-err \
31 | --sycl-named-lambda \
32 | /nbody/src/simulator.cu
33 |
34 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/renderer.hpp
35 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/nbody.cpp
36 |
37 | # -p=/nbody/build \
38 | # --optimize-migration
39 |
--------------------------------------------------------------------------------
/scripts/run_dpct_native.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (C) 2022 Codeplay Software Limited
4 | # This work is licensed under the terms of the MIT license.
5 | # For a copy, see https://opensource.org/licenses/MIT.
6 |
7 | # This script converts the project's CUDA code to SYCL code. The DPC++ compatibility tool offers options
8 | # for intercepting complex builds, but current dev environment restrictions require me to run dpct inside
9 | # a docker container. This complicates things, so for now I'm just doing single source conversion on the
10 | # simulator.cu file.
11 | #
12 | # The option --assume-nd-range-dim=1 prevents dpct from converting CUDA 1D ranges into SYCL 3D ranges.
13 | # It's not totally clear why the default behaviour isn't just to keep the CUDA dimensionality.
14 | #
15 | # The custom helper header files referred to by the --use-custom-helper flag are already part of this repo.
16 | # As such, we suppress generation of new helper
17 | # headers when calling dpct with `--use-custom-helper=none`.
18 |
19 | export NBODY_DIR=$PWD
20 |
21 | cd $NBODY_DIR
22 |
23 | rm src_sycl/*.[ch]pp src_sycl/*.yaml
24 | cd src_sycl; ln -s ../src/*[ch]pp .; cd -
25 |
26 | dpct --out-root=./src_sycl \
27 | --assume-nd-range-dim=1 \
28 | --use-custom-helper=none \
29 | --stop-on-parse-err \
30 | --sycl-named-lambda \
31 | ./src/simulator.cu
32 |
33 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/renderer.hpp
34 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/nbody.cpp
35 |
36 | # -p=/nbody/build \
37 | # --optimize-migration
38 |
--------------------------------------------------------------------------------
/scripts/run_nbody.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (C) 2022 Codeplay Software Limited
4 | # This work is licensed under the terms of the MIT license.
5 | # For a copy, see https://opensource.org/licenses/MIT.
6 |
7 | # This script runs a particular version of the nbody simulation
8 | # depending on the -b flag. All subsequent positional args are
9 | # passed on to nbody. See ../README.md for a description of these
10 | # positional args.
11 | #
12 | # ./scripts/run_nbody.sh -b dpcpp 50 5 0.999 0.001 1.0e-3 2.0
13 |
14 | while getopts b: flag
15 | do
16 | case "${flag}" in
17 | b) backend=${OPTARG};;
18 | esac
19 | done
20 |
21 | shift 2;
22 |
23 | case "$backend" in
24 | cuda) ./nbody_cuda "$@";;
25 | dpcpp) SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp "$@";;
26 | *) echo "Bad backend"; exit 1;;
27 | esac
28 |
--------------------------------------------------------------------------------
/scripts/xvfb.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright (C) 2022 Codeplay Software Limited
4 | # This work is licensed under the terms of the MIT license.
5 | # For a copy, see https://opensource.org/licenses/MIT.
6 |
7 | # Get rid of any previous virtual frame buffer
8 | pkill -9 Xvfb
9 | rm /var/tmp/Xvfb_screen_0
10 |
11 | # Create a virtual screen :99.0 with given dimensions & color depth
12 | # mapping output to /var/tmp/Xvfb_screen_0
13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp &
14 |
15 | # Run the nbody simulation on this screen
16 | DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0 &
17 | #DISPLAY=:99.0 ./nbody_cuda 250 5 0.999 0.001 1.0e-3 2.0 &
18 |
19 | # To take a screenshot instead of a video (doesn't always work):
20 | # sleep 2
21 | # DISPLAY=:99 xwd -root -silent | convert xwd:- png:/tmp/screenshot.png
22 |
23 | # Use the x11grab device to write to video file
24 | ffmpeg -video_size 1920x1080 -framerate 25 -f x11grab -i :99.0+0,0 output.mp4
25 |
--------------------------------------------------------------------------------
/shaders/gl/blur.frag:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | #version 450 core
3 |
4 | layout (binding = 0) uniform sampler2D tex;
5 |
6 | layout (location = 0) uniform vec2 size;
7 | layout (location = 1) uniform vec2 mult;
8 | layout (location = 2) uniform int kHalfWidth;
9 | // Maximum length of gauss kernel sample = 100
10 | layout (location = 3) uniform float[100] offset;
11 | layout (location = 103) uniform float[100] weight;
12 |
13 | in vec2 pass_tc;
14 |
15 | out vec4 out_color;
16 |
17 | vec4 contribute(float offset, float weight)
18 | {
19 | return (texture(tex, pass_tc+offset*mult*size)+
20 | texture(tex, pass_tc-offset*mult*size))
21 | *weight;
22 | }
23 |
24 | void main()
25 | {
26 | out_color = texture(tex, pass_tc) * weight[0];
27 | for(int i = 1; i < kHalfWidth; i++){
28 | out_color += contribute(offset[i], weight[i]);
29 | }
30 | }
--------------------------------------------------------------------------------
/shaders/gl/deferred.vert:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | #version 450 core
3 |
4 | layout (location = 0) in vec2 in_pos;
5 |
6 | out vec2 pass_tc;
7 |
8 | void main()
9 | {
10 | gl_Position = vec4(in_pos,0.0,1.0);
11 | pass_tc = in_pos*0.5+vec2(0.5);
12 | }
--------------------------------------------------------------------------------
/shaders/gl/integration.comp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | #version 450 core
3 |
4 | layout (location = 0) uniform float dt;
5 |
6 | layout (local_size_x = 256) in;
7 |
8 | layout (binding = 0, std430) buffer particles_in
9 | {
10 | vec4 part_in[];
11 | };
12 |
13 | layout (binding = 1, std430) buffer particles_vel
14 | {
15 | vec4 part_vel[];
16 | };
17 |
18 | void main()
19 | {
20 | uint id = gl_GlobalInvocationID.x;
21 | vec4 pos = part_in[id];
22 | part_in[id] = vec4(pos.xyz+dt*part_vel[id].xyz,pos.w);
23 | }
--------------------------------------------------------------------------------
/shaders/gl/interaction.comp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | #version 450 core
3 |
4 | #define EPS2 0.2
5 |
6 | layout (location = 0) uniform float dt;
7 | layout (location = 1) uniform float G;
8 | layout (location = 2) uniform float damping;
9 |
10 | layout (local_size_x = 256) in;
11 |
12 | layout (binding = 0, std430) buffer particles_in
13 | {
14 | vec4 part_in[];
15 | };
16 |
17 | layout (binding = 1, std430) buffer particles_vel
18 | {
19 | vec4 part_vel[];
20 | };
21 |
22 | shared vec4 cache[gl_WorkGroupSize.x];
23 |
24 | vec3 interaction(in vec4 p1,in vec4 p2)
25 | {
26 | vec3 r = p2.xyz - p1.xyz;
27 | float dist_sqr = dot(r,r) + EPS2;
28 | float dist_sixth = dist_sqr*dist_sqr*dist_sqr;
29 | float inv_dist_cube = inversesqrt(dist_sixth);
30 | return r*inv_dist_cube;
31 | }
32 |
33 | void main()
34 | {
35 | uint id = gl_GlobalInvocationID.x;
36 | vec4 p1 = part_in[id];
37 | vec3 pos = p1.xyz;
38 | vec3 vel = vec3(0.0);
39 | for (uint i=0;iblue, fast->purple
18 | vec3 color = mix(vec3(0,0.4,1),vec3(1,0.2,1),clamp(dot(vel,vel)*0.0006,0,1));
19 |
20 | pass_col = vec4(color,1.0);
21 | }
--------------------------------------------------------------------------------
/shaders/gl/tonemap.frag:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | #version 450 core
3 |
4 | const int FBO_MARGIN = 50;
5 |
6 | layout (binding = 0) uniform sampler2D hdr;
7 | layout (binding = 1) uniform sampler2D bloom;
8 | layout (binding = 2) uniform sampler2D lum;
9 |
10 | layout (location = 0) uniform int lum_lod;
11 |
12 | in vec2 pass_tc;
13 |
14 | out vec4 out_color;
15 |
16 | void main()
17 | {
18 | ivec2 coord = ivec2(gl_FragCoord.xy)+ivec2(FBO_MARGIN);
19 |
20 | vec3 color = texelFetch(hdr,coord,0).rgb;
21 |
22 | float luminance = textureLod(lum, vec2(0.5), lum_lod).r;
23 | float exposure = 1.0/clamp(luminance*10, 0.2,1000.0);
24 |
25 | color += texture(bloom, vec2(coord)/textureSize(hdr, 0)).rgb;
26 | vec3 tonemap = vec3(1.0)- exp(-color*exposure);
27 |
28 | vec3 gamma = pow(tonemap, vec3(1.0/2.2));
29 | out_color = vec4(gamma, 1.0);
30 | }
--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2016 - 2018 Sarah Le Luron
2 | # Copyright (C) 2022 Codeplay Software Limited
3 |
4 | find_package(PkgConfig REQUIRED)
5 |
6 | if (RENDER)
7 | pkg_check_modules(Glew REQUIRED IMPORTED_TARGET glew)
8 |
9 | find_package(glm REQUIRED)
10 | find_package(glfw3 REQUIRED)
11 | find_package(OpenGL REQUIRED)
12 | endif()
13 |
14 | find_package(CUDA REQUIRED)
15 |
16 | set(COMMON_SOURCE
17 | nbody.cpp
18 | sim_param.cpp
19 | simulator.cu)
20 | set(OPENGL_SOURCE
21 | camera.cpp
22 | gen.cpp
23 | renderer_gl.cpp
24 | shader.cpp)
25 |
26 | if(NOT TARGET glm::glm)
27 | add_library(glm::glm IMPORTED INTERFACE)
28 | target_include_directories(glm::glm INTERFACE ${GLM_INCLUDE_DIR})
29 | endif()
30 |
31 | set(DEBUG_FLAGS -g -O0)
32 |
33 |
34 | if (RENDER)
35 | set(RENDER_LIB glm::glm glfw PkgConfig::Glew OpenGL::OpenGL cuda)
36 | set(RENDER_FLAG -DUSE_OPENGL)
37 | set(SOURCE_FILES ${COMMON_SOURCE} ${OPENGL_SOURCE})
38 | else()
39 | set(RENDER_LIB cuda)
40 | set(RENDER_FLAG DISABLE_GL)
41 | set(SOURCE_FILES ${COMMON_SOURCE})
42 | endif()
43 |
44 | add_custom_target(release DEPENDS ${BINARY_NAME})
45 | add_executable(${BINARY_NAME} ${SOURCE_FILES})
46 | # COMPILER_NAME here is only used to print text overlay on simulation
47 | target_compile_definitions(${BINARY_NAME} PRIVATE ${RENDER_FLAG} COMPILER_NAME="CUDA")
48 | target_link_libraries(${BINARY_NAME} PRIVATE ${RENDER_LIB})
49 | target_compile_features(${BINARY_NAME} PRIVATE cxx_auto_type cxx_nullptr cxx_range_for)
50 | target_include_directories(${BINARY_NAME} PRIVATE ${CUDA_INCLUDE_DIRS})
51 | target_compile_options(${BINARY_NAME} PRIVATE -use_fast_math)
52 |
53 | add_custom_target(debug DEPENDS ${BINARY_NAME}_d)
54 | add_executable(${BINARY_NAME}_d ${SOURCE_FILES})
55 | # COMPILER_NAME here is only used to print text overlay on simulation
56 | target_compile_definitions(${BINARY_NAME}_d PRIVATE ${RENDER_FLAG} COMPILER_NAME="CUDA")
57 | target_link_libraries(${BINARY_NAME}_d PRIVATE ${RENDER_LIB})
58 | target_compile_features(${BINARY_NAME}_d PRIVATE cxx_auto_type cxx_nullptr cxx_range_for)
59 | target_include_directories(${BINARY_NAME}_d PRIVATE ${CUDA_INCLUDE_DIRS})
60 | target_compile_options(${BINARY_NAME}_d PRIVATE ${DEBUG_FLAGS})
61 |
--------------------------------------------------------------------------------
/src/camera.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 |
3 | #include "camera.hpp"
4 |
5 | #include
6 | #include
7 |
8 | const float PI = 3.14159265358979323846;
9 |
10 | using namespace std;
11 |
12 | Camera::Camera() {
13 | position.x = 0;
14 | position.y = PI / 4;
15 | position.z = 50.0;
16 |
17 | velocity = {0.0, 0.0, 0.0};
18 | look_at = {0.0, 0.0, 0.0};
19 | look_at_vel = {0.0, 0.0, 0.0};
20 | }
21 |
22 | void Camera::step() {
23 | position.x -= velocity.x;
24 | position.y -= velocity.y;
25 | position.z *= (1.0 - velocity.z);
26 | look_at += look_at_vel;
27 |
28 | velocity *= 0.72; // damping
29 | look_at_vel *= 0.90;
30 |
31 | // limits
32 | if (position.x < 0) position.x += 2 * PI;
33 | if (position.x >= 2 * PI) position.x -= 2 * PI;
34 | position.y =
35 | max(-(float)PI / 2 + 0.001f, min(position.y, (float)PI / 2 - 0.001f));
36 | }
37 |
38 | glm::mat4 Camera::getProj(int width, int height) {
39 | return glm::infinitePerspective(glm::radians(30.0f), width / (float)height,
40 | 1.f);
41 | }
42 |
43 | glm::vec3 getCartesianCoordinates(glm::vec3 v) {
44 | return glm::vec3(cos(v.x) * cos(v.y), sin(v.x) * cos(v.y), sin(v.y)) * v.z;
45 | }
46 |
47 | glm::mat4 Camera::getView() {
48 | // polar to cartesian coordinates
49 | glm::vec3 view_pos = getCartesianCoordinates(position);
50 |
51 | return glm::lookAt(view_pos + look_at, look_at, glm::vec3(0, 0, 1));
52 | }
53 |
54 | glm::vec3 Camera::getForward() {
55 | return glm::normalize(-getCartesianCoordinates(position));
56 | }
57 |
58 | glm::vec3 Camera::getRight() {
59 | return glm::normalize(
60 | glm::cross(getCartesianCoordinates(position), glm::vec3(0, 0, 1)));
61 | }
62 |
63 | glm::vec3 Camera::getUp() {
64 | return glm::normalize(
65 | glm::cross(getCartesianCoordinates(position), getRight()));
66 | }
67 |
68 | void Camera::addVelocity(glm::vec3 vel) { velocity += vel; }
69 |
70 | void Camera::addLookAtVelocity(glm::vec3 vel) { look_at_vel += vel; }
71 |
72 | glm::vec3 Camera::getPosition() { return position; }
73 |
--------------------------------------------------------------------------------
/src/camera.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 |
3 | #pragma once
4 |
5 | #include
6 |
7 | class Camera {
8 | public:
9 | Camera();
10 |
11 | /**
12 | * Computes next step of camera parameters
13 | * @param c camera at step n
14 | * @return camera at step n+1
15 | */
16 | void step();
17 |
18 | /**
19 | * Computes projection matrix from camera parameters
20 | * @param c camera parameters
21 | * @param width viewport width
22 | * @param height viewport height
23 | * @return projection matrix
24 | */
25 | glm::mat4 getProj(int width, int height);
26 |
27 | /**
28 | * Computes view matrix from camera parameters
29 | * @param c camera parameters
30 | * @param view matrix
31 | */
32 | glm::mat4 getView();
33 |
34 | glm::vec3 getForward();
35 | glm::vec3 getRight();
36 | glm::vec3 getUp();
37 |
38 | glm::vec3 getPosition();
39 |
40 | void addVelocity(glm::vec3 vel);
41 | void addLookAtVelocity(glm::vec3 vel);
42 |
43 | private:
44 | glm::vec3 position; ///< Polar coordinates in radians
45 | glm::vec3 velocity; ///< dp/dt of polar coordinates
46 | glm::vec3 look_at; ///< Where is the camera looking at
47 | glm::vec3 look_at_vel; ///< dp/dt of lookat position
48 | };
49 |
--------------------------------------------------------------------------------
/src/gen.cpp:
--------------------------------------------------------------------------------
1 | #include "gen.hpp"
2 |
3 | #include
4 |
5 | const float PI = 3.14159265358979323846;
6 |
7 | // Copyright (C) 2016 - 2018 Sarah Le Luron
8 | // Copyright (C) 2022 Codeplay Software Limited
9 |
10 | using namespace std;
11 |
12 | mt19937 rng;
13 | uniform_real_distribution<> dis(0, 1);
14 |
15 | glm::vec4 randomParticlePos() {
16 | // Random position on a 'thick disk'
17 | glm::vec4 particle;
18 | float t = dis(rng) * 2 * PI;
19 | float s = dis(rng) * 100;
20 | particle.x = cos(t) * s;
21 | particle.y = sin(t) * s;
22 | particle.z = dis(rng) * 4;
23 |
24 | particle.w = 1.f;
25 | return particle;
26 | }
27 |
28 | glm::vec4 randomParticleVel(glm::vec4 pos) {
29 | // Initial velocity is 'orbital' velocity from position
30 | glm::vec3 vel = glm::cross(glm::vec3(pos), glm::vec3(0, 0, 1));
31 | float orbital_vel = sqrt(2.0 * glm::length(vel));
32 | vel = glm::normalize(vel) * orbital_vel;
33 | return glm::vec4(vel, 0.0);
34 | }
35 |
36 | std::vector genFlareTex(int tex_size) {
37 | std::vector pixels(tex_size * tex_size);
38 | float sigma2 = tex_size / 2.0;
39 | float A = 1.0;
40 | for (int i = 0; i < tex_size; ++i) {
41 | float i1 = i - tex_size / 2;
42 | for (int j = 0; j < tex_size; ++j) {
43 | float j1 = j - tex_size / 2;
44 | // gamma corrected gauss
45 | pixels[i * tex_size + j] = pow(
46 | A * exp(-((i1 * i1) / (2 * sigma2) + (j1 * j1) / (2 * sigma2))),
47 | 2.2);
48 | }
49 | }
50 | return pixels;
51 | }
52 |
--------------------------------------------------------------------------------
/src/gen.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | // Copyright (C) 2022 Codeplay Software Limited
3 |
4 | #pragma once
5 | #include
6 | #include
7 |
8 | /**
9 | * Generates a random particle position
10 | * @return 3D position + w component at 1.f
11 | */
12 | glm::vec4 randomParticlePos();
13 |
14 | /**
15 | * Generates a random particle velocity
16 | * @param pos the same particle's position
17 | * @return 3D velocity + w component at 0.f
18 | */
19 | glm::vec4 randomParticleVel(glm::vec4 pos);
20 |
21 | std::vector genFlareTex(int size);
22 |
--------------------------------------------------------------------------------
/src/nbody.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | // Copyright (C) 2022 Codeplay Software Limited
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | #ifndef DISABLE_GL
9 | #include
10 |
11 | #include "renderer_gl.hpp"
12 | #include
13 | #include
14 | #include "camera.hpp"
15 | #include "gen.hpp"
16 | #else
17 | #include
18 | #endif
19 |
20 | #include
21 | #include
22 | #include
23 | #include
24 |
25 | #include "sim_param.hpp"
26 | #include "simulator.cuh"
27 |
28 | using namespace std;
29 | using namespace simulation;
30 |
31 | int main(int argc, char **argv) {
32 |
33 | SimParam params;
34 | params.parseArgs(argc, argv);
35 |
36 | DiskGalaxySimulator nbodySim(params);
37 |
38 | #ifndef DISABLE_GL
39 | // Window initialization
40 | GLFWwindow *window;
41 |
42 | glfwSetErrorCallback([](const int error, const char *msg) {
43 | cout << "Error id : " << error << ", " << msg << endl;
44 | exit(-1);
45 | });
46 |
47 | if (!glfwInit()) {
48 | cout << "GLFW can't initialize" << endl;
49 | return -1;
50 | }
51 |
52 | GLFWmonitor *monitor = glfwGetPrimaryMonitor();
53 |
54 | const GLFWvidmode *mode = glfwGetVideoMode(monitor);
55 |
56 | glfwWindowHint(GLFW_RED_BITS, mode->redBits);
57 | glfwWindowHint(GLFW_GREEN_BITS, mode->greenBits);
58 | glfwWindowHint(GLFW_BLUE_BITS, mode->blueBits);
59 | glfwWindowHint(GLFW_REFRESH_RATE, mode->refreshRate);
60 | glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE);
61 | RendererGL renderer;
62 |
63 | renderer.initWindow();
64 |
65 | int width = mode->width;
66 | int height = mode->height - 30;
67 | window = glfwCreateWindow(width, height, "N-Body Simulation", NULL, NULL);
68 |
69 |
70 | glfwMakeContextCurrent(window);
71 |
72 | renderer.init(window, width, height, nbodySim);
73 | renderer.initImgui(window);
74 |
75 | // Get initial postitions generated in simulator ctor
76 | renderer.updateParticles();
77 |
78 | Camera camera;
79 |
80 | float last_fps{0};
81 | #endif
82 |
83 | std::vector stepTimes;
84 | int step{0};
85 |
86 | // Main loop
87 | float stepTime = 0.0;
88 |
89 | #ifndef DISABLE_GL
90 | while (!glfwWindowShouldClose(window) &&
91 | glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_RELEASE &&
92 | step < params.numFrames) {
93 | double frame_start = glfwGetTime();
94 | #else
95 | while ( step < params.numFrames) {
96 | #endif
97 | nbodySim.stepSim();
98 | #ifndef DISABLE_GL
99 | renderer.updateParticles();
100 | renderer.render(camera.getProj(width, height), camera.getView());
101 | #endif
102 | if(!(step % 20)) stepTime = nbodySim.getLastStepTime();
103 | #ifndef DISABLE_GL
104 | renderer.printKernelTime(stepTime);
105 | #endif
106 |
107 | step++;
108 | int warmSteps{2};
109 | if (step > warmSteps) {
110 | stepTimes.push_back(nbodySim.getLastStepTime());
111 | float cumStepTime =
112 | std::accumulate(stepTimes.begin(), stepTimes.end(), 0.0);
113 | float meanTime = cumStepTime / stepTimes.size();
114 | float accum{0.0};
115 | std::for_each(stepTimes.begin(), stepTimes.end(),
116 | [&](const float time) {
117 | accum += std::pow((time - meanTime), 2);
118 | });
119 | float stdDev = std::pow(accum / stepTimes.size(), 0.5);
120 | std::cout << "At step " << step << " kernel time is "
121 | << stepTimes.back() << " and mean is " << meanTime
122 | << " and stddev is: " << stdDev << "\n";
123 | }
124 | #ifndef DISABLE_GL
125 | // Window refresh
126 | glfwSwapBuffers(window);
127 | glfwPollEvents();
128 |
129 | // Thread sleep to match min frame time
130 | double frame_end = glfwGetTime();
131 | double elapsed = frame_end - frame_start;
132 | last_fps = 1.0 / elapsed;
133 | #endif
134 | }
135 | #ifndef DISABLE_GL
136 | renderer.destroy();
137 | glfwDestroyWindow(window);
138 | glfwTerminate();
139 | #endif
140 | return 0;
141 | }
142 |
--------------------------------------------------------------------------------
/src/renderer.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | // Copyright (C) 2022 Codeplay Software Limited
3 |
4 | #pragma once
5 |
6 | #include
7 | #include
8 |
9 | #include
10 | #include
11 |
12 | #include "simulator.cuh"
13 |
14 | class Renderer {
15 | public:
16 | virtual void initWindow() = 0;
17 |
18 | /**
19 | * Initializes the gl state
20 | * @param width viewport width
21 | * @param height viewport height
22 | * @param params simulation parameters
23 | */
24 | virtual void init(GLFWwindow *window, int width, int height,
25 | simulation::Simulator &sim) = 0;
26 |
27 | virtual void destroy() = 0;
28 |
29 | /**
30 | * Supplies the gl state with updated particle position and velocity
31 | * @param pos particle positions
32 | * @param vel particle velocities
33 | */
34 | virtual void updateParticles() = 0;
35 |
36 | /**
37 | * Renders the particles at the current step
38 | * @param proj_mat projection matrix @see camera_get_proj
39 | * @param view_mat view matrix @see camera_get_view
40 | */
41 | virtual void render(glm::mat4 projMat, glm::mat4 viewMat) = 0;
42 | };
43 |
--------------------------------------------------------------------------------
/src/renderer_gl.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | // Copyright (C) 2022 Codeplay Software Limited
3 |
4 | #include "renderer_gl.hpp"
5 |
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 |
13 | #include "imgui.h"
14 | #include "imgui_impl_glfw.h"
15 | #include "imgui_impl_opengl3.h"
16 | #include "gen.hpp"
17 |
18 | const int FBO_MARGIN = 50;
19 |
20 | #define PRINT_PSEUDO_FPS 0
21 |
22 | using namespace std;
23 |
24 | void RendererGL::initWindow() {
25 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
26 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 5);
27 | glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
28 | }
29 |
30 | void RendererGL::init(GLFWwindow *window, int width, int height,
31 | simulation::Simulator &sim_) {
32 | // OpenGL initialization
33 | GLenum error = glewInit();
34 | if (error != GLEW_OK) {
35 | throw std::runtime_error("Can't load GL");
36 | }
37 |
38 | sim = &sim_;
39 | numParticles = sim->getNumParticles();
40 | setWindowDimensions(width, height);
41 | createFlareTexture();
42 | createVaosVbos();
43 | initShaders();
44 | initFbos();
45 | setUniforms();
46 | }
47 |
48 | void RendererGL::setWindowDimensions(int width, int height) {
49 | width_ = width;
50 | height_ = height;
51 | }
52 |
53 | void RendererGL::createFlareTexture() {
54 | texSize = 16;
55 | glCreateTextures(GL_TEXTURE_2D, 1, &flareTex);
56 | glTextureStorage2D(flareTex, 1, GL_R32F, texSize, texSize);
57 | glTextureParameteri(flareTex, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
58 | {
59 | std::vector pixels = genFlareTex(texSize);
60 | glTextureSubImage2D(flareTex, 0, 0, 0, texSize, texSize, GL_RED, GL_FLOAT,
61 | pixels.data());
62 | }
63 | }
64 |
65 | void RendererGL::createVaosVbos() {
66 | // Particle VAO
67 | glCreateVertexArrays(1, &vaoParticles);
68 | glCreateBuffers(1, &vboParticlesPos);
69 | glCreateBuffers(1, &ssboVelocities);
70 | glVertexArrayVertexBuffer(vaoParticles, 0, vboParticlesPos, 0,
71 | sizeof(glm::vec4));
72 | glVertexArrayVertexBuffer(vaoParticles, 1, ssboVelocities, 0,
73 | sizeof(glm::vec4));
74 |
75 | // Position
76 | glEnableVertexArrayAttrib(vaoParticles, 0);
77 | glVertexArrayAttribFormat(vaoParticles, 0, 4, GL_FLOAT, GL_FALSE, 0);
78 | glVertexArrayAttribBinding(vaoParticles, 0, 0);
79 |
80 | // Velocity
81 | glEnableVertexArrayAttrib(vaoParticles, 1);
82 | glVertexArrayAttribFormat(vaoParticles, 1, 4, GL_FLOAT, GL_FALSE, 0);
83 | glVertexArrayAttribBinding(vaoParticles, 1, 1);
84 |
85 | // Deferred VAO
86 | glCreateVertexArrays(1, &vaoDeferred);
87 | glCreateBuffers(1, &vboDeferred);
88 | glVertexArrayVertexBuffer(vaoDeferred, 0, vboDeferred, 0, sizeof(glm::vec2));
89 | // Position
90 | glEnableVertexArrayAttrib(vaoDeferred, 0);
91 | glVertexArrayAttribFormat(vaoDeferred, 0, 2, GL_FLOAT, GL_FALSE, 0);
92 | glVertexArrayAttribBinding(vaoDeferred, 0, 0);
93 |
94 | // Deferred tri
95 | glm::vec2 tri[3] = {glm::vec2(-2, -1), glm::vec2(+2, -1), glm::vec2(0, 4)};
96 | glNamedBufferStorage(vboDeferred, 3 * sizeof(glm::vec2), tri, 0);
97 |
98 | // SSBO allocation (particle position & velocities)
99 | glNamedBufferStorage(vboParticlesPos, numParticles * sizeof(glm::vec4),
100 | nullptr, GL_MAP_WRITE_BIT);
101 | glNamedBufferStorage(ssboVelocities, numParticles * sizeof(glm::vec4),
102 | nullptr, GL_MAP_WRITE_BIT);
103 |
104 | // SSBO binding
105 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, vboParticlesPos, 0,
106 | numParticles * sizeof(glm::vec4));
107 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, ssboVelocities, 0,
108 | numParticles * sizeof(glm::vec4));
109 | }
110 |
111 | void RendererGL::updateParticles() {
112 | setParticleData(vboParticlesPos, sim->getParticlePos());
113 | setParticleData(ssboVelocities, sim->getParticleVel());
114 | }
115 |
116 | void RendererGL::initImgui(GLFWwindow *window) {
117 | // Setup ImGui context
118 | IMGUI_CHECKVERSION();
119 | ImGui::CreateContext();
120 | ImGuiIO &io = ImGui::GetIO();
121 | (void)io;
122 | ImGui::StyleColorsDark();
123 |
124 | // Setup Platform/Renderer bindings
125 | ImGui_ImplGlfw_InitForOpenGL(window, true);
126 | ImGui_ImplOpenGL3_Init("#version 450");
127 | }
128 |
129 | void RendererGL::printKernelTime(float kernelTime) {
130 | // Start the Dear ImGui frame
131 | ImGui_ImplOpenGL3_NewFrame();
132 | ImGui_ImplGlfw_NewFrame();
133 | ImGui::NewFrame();
134 |
135 | // Generate a minimal window
136 | bool isOpen;
137 | ImGui::Begin("N/A", &isOpen,
138 | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoMove |
139 | ImGuiWindowFlags_NoScrollbar |
140 | ImGuiWindowFlags_NoSavedSettings |
141 | ImGuiWindowFlags_NoInputs);
142 | ImGui::SetWindowFontScale(2.5);
143 | ImGui::Text("%s", (std::string("N-body demo running with " COMPILER_NAME
144 | " on device: ") + *sim->getDeviceName()).c_str());
145 | if (PRINT_PSEUDO_FPS) {
146 | ImGui::Text("FPS: %2.0f", 1000.0/kernelTime);
147 | } else {
148 | ImGui::Text("Kernel time: %4.2f ms", kernelTime);
149 | }
150 | ImGui::End();
151 |
152 | ImGui::Render();
153 | ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
154 | }
155 |
156 | void RendererGL::setParticleData(const GLuint buffer,
157 | const ParticleData &data) {
158 | void *particle_ptr = glMapNamedBufferRange(
159 | buffer, 0, numParticles * sizeof(glm::vec4), GL_MAP_WRITE_BIT);
160 |
161 | assert(!glGetError());
162 | assert(particle_ptr);
163 |
164 | const ParticleData &particles = sim->getParticlePos();
165 |
166 | // Fill using placement new
167 | for (size_t i = 0; i < numParticles; i++) {
168 | glm::vec4 *my4 = new ((glm::vec4 *)particle_ptr + i)
169 | glm::vec4(data.x[i], data.y[i], data.z[i], 1.0f);
170 | }
171 | glUnmapNamedBuffer(buffer);
172 | }
173 |
174 | void RendererGL::initShaders() {
175 | // Need to cut these two shaders out
176 | // programInteraction.source(GL_COMPUTE_SHADER,
177 | // "shaders/gl/interaction.comp"); programInteraction.link();
178 |
179 | // programIntegration.source(GL_COMPUTE_SHADER,
180 | // "shaders/gl/integration.comp"); programIntegration.link();
181 |
182 | programHdr.source(GL_VERTEX_SHADER, "shaders/gl/main.vert");
183 | programHdr.source(GL_FRAGMENT_SHADER, "shaders/gl/main.frag");
184 | programHdr.source(GL_GEOMETRY_SHADER, "shaders/gl/main.geom");
185 | programHdr.link();
186 |
187 | programTonemap.source(GL_VERTEX_SHADER, "shaders/gl/deferred.vert");
188 | programTonemap.source(GL_FRAGMENT_SHADER, "shaders/gl/tonemap.frag");
189 | programTonemap.link();
190 |
191 | programBlur.source(GL_VERTEX_SHADER, "shaders/gl/deferred.vert");
192 | programBlur.source(GL_FRAGMENT_SHADER, "shaders/gl/blur.frag");
193 | programBlur.link();
194 |
195 | programLum.source(GL_VERTEX_SHADER, "shaders/gl/deferred.vert");
196 | programLum.source(GL_FRAGMENT_SHADER, "shaders/gl/luminance.frag");
197 | programLum.link();
198 | }
199 |
200 | void RendererGL::initFbos() {
201 | int blur_dsc = 2;
202 | blurDownscale = blur_dsc;
203 |
204 | glCreateFramebuffers(4, fbos);
205 | glCreateTextures(GL_TEXTURE_2D, 4, attachs);
206 |
207 | int base_width = width_ + 2 * FBO_MARGIN;
208 | int base_height = height_ + 2 * FBO_MARGIN;
209 |
210 | int widths[] = {base_width, base_width / blur_dsc, base_width / blur_dsc,
211 | base_width / 2};
212 |
213 | int heights[] = {base_height, base_height / blur_dsc, base_height / blur_dsc,
214 | base_height / 2};
215 |
216 | lumLod = (int)floor(log2(max(base_width, base_height) / 2));
217 | int mipmaps[] = {1, 1, 1, lumLod + 1};
218 | GLenum types[] = {GL_RGBA16F, GL_RGBA16F, GL_RGBA16F, GL_R16F};
219 | GLenum min_filters[] = {GL_LINEAR, GL_LINEAR, GL_LINEAR,
220 | GL_LINEAR_MIPMAP_LINEAR};
221 |
222 | for (int i = 0; i < 4; ++i) {
223 | glTextureStorage2D(attachs[i], mipmaps[i], types[i], widths[i],
224 | heights[i]);
225 | glTextureParameteri(attachs[i], GL_TEXTURE_MIN_FILTER, min_filters[i]);
226 | glTextureParameteri(attachs[i], GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
227 | glTextureParameteri(attachs[i], GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
228 | glNamedFramebufferTexture(fbos[i], GL_COLOR_ATTACHMENT0, attachs[i], 0);
229 | }
230 | }
231 |
232 | void RendererGL::setUniforms() {
233 | // // NDC sprite size
234 | glProgramUniform2f(programHdr.getId(), 8, texSize / float(2 * width_),
235 | texSize / float(2 * height_));
236 | // Blur sample offset length
237 | glProgramUniform2f(programBlur.getId(), 0, (float)blurDownscale / width_,
238 | (float)blurDownscale / height_);
239 |
240 | // Compute optimized 1D gaussian kernel & send to device
241 | auto optimGauss = optimGaussKernel(gaussKernel(10.0, 25));
242 | auto offsets = optimGauss.first;
243 | auto weights = optimGauss.second;
244 |
245 | assert(offsets.size() < 100 && "Maximum Gaussian kernel size exceeded!");
246 | glProgramUniform1i(programBlur.getId(), 2, offsets.size());
247 | glProgramUniform1fv(programBlur.getId(), 3, offsets.size(), offsets.data());
248 | glProgramUniform1fv(programBlur.getId(), 103, offsets.size(), weights.data());
249 | }
250 |
251 | void RendererGL::render(glm::mat4 proj_mat, glm::mat4 view_mat) {
252 | // Particle HDR rendering
253 | glViewport(0, 0, width_ + 2 * FBO_MARGIN, height_ + 2 * FBO_MARGIN);
254 | glBindVertexArray(vaoParticles);
255 | glEnable(GL_BLEND);
256 | glBlendFunc(GL_ONE, GL_ONE);
257 | glBindFramebuffer(GL_FRAMEBUFFER, fbos[0]);
258 | glUseProgram(programHdr.getId());
259 | glClear(GL_COLOR_BUFFER_BIT);
260 | glProgramUniformMatrix4fv(programHdr.getId(), 0, 1, GL_FALSE,
261 | glm::value_ptr(view_mat));
262 | glProgramUniformMatrix4fv(programHdr.getId(), 4, 1, GL_FALSE,
263 | glm::value_ptr(proj_mat));
264 | glBindTextureUnit(0, flareTex);
265 | glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
266 | glDrawArrays(GL_POINTS, 0, numParticles);
267 |
268 | glBindVertexArray(vaoDeferred);
269 | glDisable(GL_BLEND);
270 |
271 | glViewport(0, 0, (width_ + 2 * FBO_MARGIN) / blurDownscale,
272 | (height_ + 2 * FBO_MARGIN) / blurDownscale);
273 | glUseProgram(programBlur.getId());
274 |
275 | // Blur pingpong (N horizontal blurs then N vertical blurs)
276 |
277 | const int nPasses = 1; // Only one blur pass in each direction
278 | int loop = 0;
279 | for (int i = 0; i < 2; ++i) {
280 | if (i == 0)
281 | glProgramUniform2f(programBlur.getId(), 1, 1, 0);
282 | else
283 | glProgramUniform2f(programBlur.getId(), 1, 0, 1);
284 | for (int j = 0; j < nPasses; ++j) {
285 | GLuint fbo = fbos[(loop % 2) + 1];
286 | GLuint attach = attachs[loop ? ((loop + 1) % 2 + 1) : 0];
287 | glBindFramebuffer(GL_FRAMEBUFFER, fbo);
288 | glBindTextureUnit(0, attach);
289 | glDrawArrays(GL_TRIANGLES, 0, 3);
290 | loop++;
291 | }
292 | }
293 |
294 | // Average luminance
295 | glViewport(0, 0, (width_ + 2 * FBO_MARGIN) / 2,
296 | (height_ + 2 * FBO_MARGIN) / 2);
297 | glBindFramebuffer(GL_FRAMEBUFFER, fbos[3]);
298 | glUseProgram(programLum.getId());
299 | glBindTextureUnit(0, attachs[0]);
300 | glDrawArrays(GL_TRIANGLES, 0, 3);
301 | glGenerateTextureMipmap(attachs[3]);
302 |
303 | // Tonemapping step (direct to screen)
304 | glViewport(0, 0, width_, height_);
305 | glBindFramebuffer(GL_FRAMEBUFFER, 0);
306 | glUseProgram(programTonemap.getId());
307 | glProgramUniform1i(programTonemap.getId(), 0, lumLod);
308 | glBindTextureUnit(0, attachs[0]);
309 | glBindTextureUnit(1, attachs[2]);
310 | glBindTextureUnit(2, attachs[3]);
311 | glDrawArrays(GL_TRIANGLES, 0, 3);
312 | }
313 |
314 | std::vector RendererGL::gaussKernel(const float sigma,
315 | const int halfwidth) {
316 | float sigma_factor = 1.0 / (sigma * sqrt(2 * glm::pi()));
317 |
318 | auto sigma_fun = [sigma, sigma_factor, n = 0]() mutable {
319 | float sigma_val =
320 | sigma_factor * std::exp(-std::pow(static_cast(n), 2) /
321 | (2 * std::pow(sigma, 2)));
322 | n++;
323 | return sigma_val;
324 | };
325 |
326 | std::vector result(halfwidth);
327 | std::generate(result.begin(), result.end(), sigma_fun);
328 |
329 | // Normalize the Gaussian kernel
330 | float halfnorm = std::accumulate(result.begin() + 1, result.end(), 0.0);
331 | float norm = 2 * halfnorm + result[0];
332 |
333 | std::transform(result.begin(), result.end(), result.begin(),
334 | [norm](auto val) { return val / norm; });
335 |
336 | return result;
337 | }
338 |
339 | std::pair, std::vector> RendererGL::optimGaussKernel(
340 | const std::vector weightsIn) {
341 | const int inSize = weightsIn.size();
342 | const int outSize = (inSize / 2) + 1;
343 |
344 | std::vector offsetsIn(inSize);
345 | std::iota(offsetsIn.begin(), offsetsIn.end(), 0);
346 |
347 | std::vector offsetsOut(outSize);
348 | std::vector weightsOut(outSize);
349 |
350 | // Centre point of gaussian doesn't change
351 | offsetsOut[0] = offsetsIn[0]; // 0.0
352 | weightsOut[0] = weightsIn[0];
353 |
354 | // Convert pairs of neighbouring texel weights into a single
355 | // weight linearly interpolated between texels. Take care of
356 | // possible last lone weight.
357 | for (int i = 1; i < outSize; i++) {
358 | weightsOut[i] = weightsIn[i * 2 - 1];
359 | offsetsOut[i] = offsetsIn[i * 2 - 1];
360 | if (i * 2 < inSize) {
361 | weightsOut[i] += weightsIn[i * 2];
362 | offsetsOut[i] = (offsetsIn[i * 2 - 1] * weightsIn[i * 2 - 1] +
363 | offsetsIn[i * 2] * weightsIn[i * 2]) /
364 | weightsOut[i];
365 | }
366 | }
367 | return std::make_pair(offsetsOut, weightsOut);
368 | }
369 |
370 | void RendererGL::destroy() {}
371 |
--------------------------------------------------------------------------------
/src/renderer_gl.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | // Copyright (C) 2022 Codeplay Software Limited
3 |
4 |
5 | #pragma once
6 |
7 | #include
8 | #include
9 |
10 | #include "renderer.hpp"
11 | #include "shader.hpp"
12 |
13 | using namespace simulation;
14 |
15 | class RendererGL : public Renderer {
16 | public:
17 | void initWindow();
18 | void init(GLFWwindow *window, int width, int height,
19 | simulation::Simulator &sim);
20 | void destroy();
21 | /// Initialize Imgui
22 | void initImgui(GLFWwindow *window);
23 | void updateParticles();
24 | void render(glm::mat4 proj_mat, glm::mat4 view_mat);
25 | void printKernelTime(float kernelTime);
26 | RendererGL() : sim{} {}
27 |
28 | private:
29 | /// Provides the gl state with window dimensions for fbo size, etc
30 | void setWindowDimensions(int width, int height);
31 |
32 | /// Generates the star flare texture
33 | void createFlareTexture();
34 |
35 | /// Creates the VAO and VBO objects
36 | void createVaosVbos();
37 |
38 | /// Loads the shaders into the gl state
39 | void initShaders();
40 |
41 | // Initializes and supplies the framebuffers with valid data
42 | void initFbos();
43 |
44 | // Supplies the gl state with nbody simulation parameters
45 | void setUniforms();
46 |
47 | // Send data obtained from simulation to a buffer
48 | void setParticleData(const GLuint buffer, const ParticleData &data);
49 |
50 | // Compute the 1D gaussian kernel for given sigma & halfwidth
51 | static std::vector gaussKernel(const float sigma,
52 | const int halfwidth);
53 |
54 | // Optimizes the given 1D gaussian kernel via texel linear interp
55 | static std::pair, std::vector> optimGaussKernel(
56 | const std::vector inKernel);
57 |
58 | Simulator *sim{nullptr};
59 |
60 | GLuint flareTex; ///< Texture for the star flare
61 | GLuint vaoParticles; ///< Vertex definition for points
62 | GLuint vboParticlesPos; ///< Particle position buffer
63 | GLuint ssboVelocities; ///< Particle velocity buffer
64 | GLuint vaoDeferred; ///< Vertex definition for deferred
65 | GLuint vboDeferred; ///< Vertex buffer of deferred fullscreen tri
66 |
67 | /** Shader programs **/
68 | ShaderProgram programHdr; ///< HDR rendering step
69 | ShaderProgram programBlur; ///< Bloom blurring step
70 | ShaderProgram programLum; ///< Average luminance step
71 | ShaderProgram programTonemap; ///< Tonemapping step
72 |
73 | GLuint fbos[4]; ///< FBOs (0 for hdr, 1 & 2 for blur ping pong, 3 for
74 | ///< luminance)
75 | GLuint attachs[4]; ///< Respective FBO attachments.
76 |
77 | int texSize; ///< Flare texture size in pixels
78 | int lumLod; ///< Luminance texture level to sample from
79 | int blurDownscale; ///< Downscale factor for the blurring step
80 | int width_; ///< Viewport width
81 | int height_; ///< Viewport height
82 |
83 | size_t numParticles;
84 | size_t computeIterations;
85 | };
86 |
--------------------------------------------------------------------------------
/src/shader.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 |
3 | #include "shader.hpp"
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | using namespace std;
11 |
12 | ShaderProgram::ShaderProgram() : id(0) {}
13 |
14 | void ShaderProgram::source(GLenum shader_type, const string &filename) {
15 | if (!id) id = glCreateProgram();
16 |
17 | string code;
18 |
19 | // IO stuff
20 | try {
21 | stringstream sstream;
22 | {
23 | ifstream stream;
24 | stream.exceptions(ifstream::failbit | ifstream::badbit);
25 | stream.open(filename);
26 | sstream << stream.rdbuf();
27 | }
28 | code = sstream.str();
29 | } catch (ifstream::failure e) {
30 | throw std::runtime_error(std::string("Can't open ") + filename +
31 | std::string(e.what()));
32 | }
33 |
34 | GLint success;
35 | GLchar info_log[2048];
36 |
37 | const char *s = code.c_str();
38 |
39 | // OpenGL stuff
40 | GLuint shad_id = glCreateShader(shader_type);
41 | glShaderSource(shad_id, 1, &s, NULL);
42 | glCompileShader(shad_id);
43 | glGetShaderiv(shad_id, GL_COMPILE_STATUS, &success);
44 | if (!success) {
45 | // error log
46 | glGetShaderInfoLog(shad_id, sizeof(info_log), NULL, info_log);
47 | throw std::runtime_error(std::string("Can't compile ") + filename + " " +
48 | info_log);
49 | exit(-1);
50 | }
51 | glAttachShader(id, shad_id);
52 | }
53 |
54 | void ShaderProgram::link() {
55 | GLint success;
56 | GLchar info_log[2048];
57 |
58 | glLinkProgram(id);
59 | glGetProgramiv(id, GL_LINK_STATUS, &success);
60 | if (!success) {
61 | // error log
62 | glGetProgramInfoLog(id, sizeof(info_log), NULL, info_log);
63 | throw std::runtime_error(std::string("Can't link ") +
64 | std::string(info_log));
65 | }
66 | }
67 |
68 | GLuint ShaderProgram::getId() { return id; }
69 |
--------------------------------------------------------------------------------
/src/shader.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 |
3 | #pragma once
4 |
5 | #include
6 |
7 | #include
8 |
9 | class ShaderProgram {
10 | public:
11 | ShaderProgram();
12 |
13 | /**
14 | * Compiles a shader stage from a given source, displays errors in stderr
15 | * @param program shader program handle
16 | * @param shader_type one of GL_COMPUTE_SHADER, GL_VERTEX_SHADER,
17 | * GL_TESS_CONTROL_SHADER, GL_TESS_EVALUATION_SHADER, GL_GEOMETRY_SHADER, or
18 | * GL_FRAGMENT_SHADER
19 | * @param filename GLSL source file
20 | */
21 | void source(GLenum shaderType, const std::string &filename);
22 |
23 | /**
24 | * Links all shaders inside the program, displays errors in stderr
25 | */
26 | void link();
27 |
28 | GLuint getId();
29 |
30 | private:
31 | GLuint id;
32 | };
33 |
--------------------------------------------------------------------------------
/src/sim_param.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2016 - 2018 Sarah Le Luron
2 | // Copyright (C) 2022 Codeplay Software Limited
3 |
4 | #include "sim_param.hpp"
5 |
6 | #include
7 | #include
8 | #include