├── .clang-format ├── .github ├── dependabot.yml └── workflows │ └── scorecard.yml ├── .gitignore ├── .vscode ├── launch.json └── tasks.json ├── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── cmake ├── FindGLEW.cmake ├── FindGLFW.cmake ├── FindGLM.cmake ├── FindVulkan.cmake └── Finddpct.cmake ├── docs ├── Eq1.png └── downscale_artefact.png ├── libs └── imgui │ ├── CMakeLists.txt │ ├── include │ ├── imconfig.h │ ├── imgui.h │ ├── imgui_impl_glfw.h │ ├── imgui_impl_opengl3.h │ ├── imgui_impl_opengl3_loader.h │ ├── imgui_internal.h │ ├── imstb_rectpack.h │ ├── imstb_textedit.h │ └── imstb_truetype.h │ └── src │ ├── imgui.cpp │ ├── imgui_demo.cpp │ ├── imgui_draw.cpp │ ├── imgui_impl_glfw.cpp │ ├── imgui_impl_opengl3.cpp │ ├── imgui_tables.cpp │ └── imgui_widgets.cpp ├── scripts ├── build_cuda.sh ├── build_dpcpp.sh ├── docker_build_etc.sh ├── perf_test.sh ├── perf_test_cuda.sh ├── perf_test_dpcpp.sh ├── run_dpct.sh ├── run_dpct_native.sh ├── run_nbody.sh └── xvfb.sh ├── shaders └── gl │ ├── blur.frag │ ├── deferred.vert │ ├── integration.comp │ ├── interaction.comp │ ├── luminance.frag │ ├── main.frag │ ├── main.geom │ ├── main.vert │ └── tonemap.frag ├── src ├── CMakeLists.txt ├── camera.cpp ├── camera.hpp ├── gen.cpp ├── gen.hpp ├── nbody.cpp ├── renderer.hpp ├── renderer_gl.cpp ├── renderer_gl.hpp ├── shader.cpp ├── shader.hpp ├── sim_param.cpp ├── sim_param.hpp ├── simulator.cu └── simulator.cuh └── src_sycl ├── CMakeLists.txt ├── README.md ├── camera.cpp ├── camera.hpp ├── gen.cpp ├── gen.hpp ├── nbody.cpp ├── renderer.hpp ├── renderer_gl.cpp ├── renderer_gl.hpp ├── shader.cpp ├── shader.hpp ├── sim_param.cpp ├── sim_param.hpp ├── simulator.dp.cpp └── simulator.dp.hpp /.clang-format: -------------------------------------------------------------------------------- 1 | {BasedOnStyle: Google, IndentWidth: 3, ColumnLimit: 80, NamespaceIndentation: All, AlignTrailingComments: true} -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Enable version updates for Github Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "monthly" 8 | groups: 9 | github-actions: 10 | patterns: 11 | - "*" 12 | reviewers: 13 | - "codeplaysoftware/security-managers" 14 | -------------------------------------------------------------------------------- /.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- 1 | # Scorecards' GitHub action 2 | 3 | name: Scorecard supply-chain security 4 | on: 5 | # For Branch-Protection check. Only the default branch is supported. See 6 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection 7 | branch_protection_rule: 8 | schedule: 9 | - cron: '15 18 * * 5' 10 | push: 11 | branches: [ "main" ] 12 | 13 | # Declare default permissions as read only. 14 | permissions: read-all 15 | 16 | jobs: 17 | analysis: 18 | name: Scorecard analysis 19 | runs-on: ubuntu-latest 20 | permissions: 21 | # Needed to upload the results to code-scanning dashboard. 22 | security-events: write 23 | # Needed to publish results and get a badge (see publish_results below). 24 | id-token: write 25 | 26 | steps: 27 | - name: "Checkout code" 28 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 29 | with: 30 | persist-credentials: false 31 | 32 | - name: "Run analysis" 33 | uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0 34 | with: 35 | results_file: results.sarif 36 | results_format: sarif 37 | publish_results: true 38 | 39 | # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF 40 | # format to the repository Actions tab. 41 | - name: "Upload artifact" 42 | uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0 43 | with: 44 | name: SARIF file 45 | path: results.sarif 46 | retention-days: 5 47 | 48 | # Upload the results to GitHub's code scanning dashboard (optional). 49 | # Commenting out will disable upload of results to your repo's Code Scanning dashboard 50 | - name: "Upload to code-scanning" 51 | uses: github/codeql-action/upload-sarif@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0 52 | with: 53 | sarif_file: results.sarif 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build* 2 | nbodygl 3 | nbodyvk 4 | .cache 5 | *~ 6 | settings.json 7 | **/dpct-output/ 8 | \#* 9 | *mp4 10 | lib/**cpp 11 | lib/**h 12 | *fatbin* 13 | *.ptx 14 | log* 15 | .vscode 16 | nbody_dpcpp 17 | nbody_cuda 18 | nbody_cuda_d 19 | nbody_dpcpp_d 20 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "DEBUG: (gdb-oneapi) nbody_dpcpp_d Launch", 5 | "type": "cppdbg", 6 | "request": "launch", 7 | "preLaunchTask": "Debug C/C++: DPCPP Makefile", 8 | "postDebugTask": "", 9 | "program": "${workspaceFolder}/nbody_dpcpp_d", 10 | "args": ["50", "4", "0.999998", "0.005", "1.0e-7", "2.0", "100000"], 11 | "stopAtEntry": true, 12 | "cwd": "${workspaceFolder}", 13 | "environment": [ 14 | { 15 | "name": "ZET_ENABLE_PROGRAM_DEBUGGING", 16 | "value": "1" 17 | }, 18 | { 19 | "name": "IGC_EnableGTLocationDebugging", 20 | "value": "1" 21 | } 22 | ], 23 | "externalConsole": false, 24 | "MIMode": "gdb", 25 | "miDebuggerPath": "gdb-oneapi", 26 | "setupCommands": [ 27 | { 28 | "description": "Enable pretty-printing for gdb", 29 | "text": "-enable-pretty-printing", 30 | "ignoreFailures": true 31 | }, 32 | { 33 | "description": "Disable target async", 34 | "text": "set target-async off", 35 | "ignoreFailures": true 36 | }, 37 | { 38 | "description": "Do not display function arguments when printing a stack frame", 39 | "text": "set print frame-arguments none", 40 | "ignoreFailures": true 41 | } 42 | ] 43 | }, 44 | { 45 | "name": "DEBUG: (cuda-gdb) nbody_cuda_d Launch", 46 | "type": "cuda-gdb", 47 | "request": "launch", 48 | "preLaunchTask": "Debug C/C++: CUDA Makefile", 49 | "postDebugTask": "", 50 | "program": "${workspaceFolder}/nbody_cuda_d", 51 | "args": "50 4 0.999998 0.005 1.0e-7 2.0 100000", 52 | "stopAtEntry": true, 53 | "cwd": "${workspaceFolder}", 54 | } 55 | ] 56 | } -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "type": "cppbuild", 6 | "label": "Debug C/C++: CUDA Makefile", 7 | "command": "make", 8 | "args": [ 9 | "debug", 10 | ], 11 | "options": { 12 | "cwd": "${workspaceFolder}/build_cuda" 13 | }, 14 | "problemMatcher": [ 15 | "$gcc" 16 | ], 17 | "group": "build", 18 | "detail": "compiler: INTEL oneapi icpx" 19 | }, 20 | { 21 | "type": "cppbuild", 22 | "label": "Release C/C++: CUDA Makefile", 23 | "command": "make", 24 | "args": [ 25 | "release" 26 | ], 27 | "options": { 28 | "cwd": "${workspaceFolder}/build_cuda" 29 | }, 30 | "problemMatcher": [ 31 | "$gcc" 32 | ], 33 | "group": "build", 34 | "detail": "compiler: INTEL oneapi icpx" 35 | }, 36 | { 37 | "type": "cppbuild", 38 | "label": "Release C/C++: DPCPP Makefile", 39 | "command": "make", 40 | "args": [ 41 | "release" 42 | ], 43 | "options": { 44 | "cwd": "${workspaceFolder}build_dpcpp" 45 | }, 46 | "problemMatcher": [ 47 | "$gcc" 48 | ], 49 | "group": "build", 50 | "detail": "compiler: INTEL oneapi icpx" 51 | }, 52 | { 53 | "type": "cppbuild", 54 | "label": "Debug C/C++: DPCPP Makefile", 55 | "command": "make", 56 | "args": [ 57 | "debug" 58 | ], 59 | "options": { 60 | "cwd": "${workspaceFolder}/build_dpcpp" 61 | }, 62 | "problemMatcher": [ 63 | "$gcc" 64 | ], 65 | "group": "build", 66 | "detail": "compiler: INTEL oneapi icpx" 67 | } 68 | ] 69 | } 70 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 - 2018 Sarah Le Luron 2 | # Copyright (C) 2022 Codeplay Software Limited 3 | 4 | cmake_minimum_required (VERSION 3.16) 5 | 6 | project (nbody LANGUAGES CXX) 7 | 8 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) 9 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) 10 | 11 | set(BACKEND "CUDA" CACHE STRING "Which backend to build") 12 | option(RENDER "Use openGl or not" ON) 13 | 14 | if(BACKEND STREQUAL "CUDA") 15 | set(BINARY_NAME "nbody_cuda" CACHE STRING "Binary name") 16 | enable_language(CUDA) 17 | add_subdirectory(src) 18 | elseif(BACKEND STREQUAL "DPCPP") 19 | set(BINARY_NAME "nbody_dpcpp" CACHE STRING "Binary name") 20 | add_subdirectory(src_sycl) 21 | else() 22 | message(FATAL_ERROR "Unrecognized BACKEND") 23 | endif() 24 | 25 | if(RENDER) 26 | add_subdirectory(libs/imgui) 27 | endif() 28 | 29 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression 9 | , level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at 59 | [sycl@codeplay.com](mailto:sycl@codeplay.com). All complaints will be reviewed 60 | and investigated and will result in a response that is deemed necessary and 61 | appropriate to the circumstances. The project team is obligated to maintain 62 | confidentiality with regard to the reporter of an incident. Further details of 63 | specific enforcement policies may be posted separately. 64 | 65 | Project maintainers who do not follow or enforce the Code of Conduct in good 66 | faith may face temporary or permanent repercussions as determined by other 67 | members of the project's leadership. 68 | 69 | ## Attribution 70 | 71 | This Code of Conduct is adapted from the 72 | [Contributor Covenant](https://www.contributor-covenant.org/), version 1.4, 73 | available at 74 | https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 75 | 76 | --- 77 | 78 | If there are any issues or suggestions relating to the current set of rules, you 79 | can reach us at [sycl@codeplay.com](mailto:sycl@codeplay.com). 80 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright for portions of project 'nbody' are held by Sarah Le Luron, 2016-2018 as part of project 'dpct-nbody'. All other copyright for project 'dpct-nbody' are held by Codeplay Software Limited, 2022. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nbody 2 | 3 | [![OpenSSF Scorecard](https://api.scorecard.dev/projects/github.com/codeplaysoftware/cuda-to-sycl-nbody/badge)](https://scorecard.dev/viewer/?uri=github.com/codeplaysoftware/cuda-to-sycl-nbody) 4 | 5 | Accelerated N-body sim with OpenGL graphics & automatic CUDA->SYCL conversion using [dpct](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html). 6 | 7 | ![](http://i.imgur.com/drzi33P.jpg) 8 | 9 | Forked from https://github.com/salel/nbody 10 | 11 | ## Compilers/Backends 12 | 13 | This nbody simulation can be run with any of: 14 | - CUDA 15 | - DPC++ CUDA backend 16 | - DPC++ OpenCL CPU backend 17 | 18 | Source code for the CUDA version is in `./src/` while `./src_sycl/` contains the semi-automatically converted SYCL code. 19 | 20 | ## Build Dependencies 21 | 22 | ### Graphics Dependencies 23 | 24 | By default the build requieres OpenGL. See the **Building** section below to build without rendering. 25 | 26 | The rendering components of this code are independent of the CUDA/SYCL backend, and depend on: 27 | - GLM 28 | - GLFW 29 | - GLEW 30 | 31 | These can be installed with apt: 32 | ``` 33 | sudo apt update 34 | sudo apt install libglew-dev libglfw3-dev libglm-dev libxxf86vm-dev libxcursor-dev libxinerama-dev libxi-dev 35 | ``` 36 | 37 | The implementation relies on OpenGL 4.5. 38 | 39 | ### Simulation Dependencies (CUDA & SYCL) 40 | 41 | The CUDA version of this code requires the [CUDA runtime](https://intel.github.io/llvm-docs/GetStartedGuide.html#build-dpc-toolchain-with-support-for-nvidia-cuda) to be installed on your machine. 42 | 43 | The DPC++ CUDA backend version also requires the CUDA runtime. 44 | 45 | The DPC++ OpenCL backend requires an [OpenCL runtime](https://intel.github.io/llvm-docs/GetStartedGuide.html#install-low-level-runtime). To run specifically on the CPU, you must install the OpenCL runtime for your CPU. 46 | 47 | Both DPC++ backends require the [DPC++ compiler](https://intel.github.io/llvm-docs/GetStartedGuide.html) to compile the SYCL code. 48 | 49 | ## Building 50 | 51 | This project uses CMake for build configuration. Build scripts for CUDA and DPC++ are located in `./scripts/`. Note that these scripts include some hardcoded paths from our dev machine, and so will not work out-the-box. 52 | 53 | The CMake option `-DBACKEND` allows to select which backend ("CUDA" or "DPCPP") to build. CUDA is built by default. The name of the built binary is suffixed with the backend (`nbody_cuda` or `nbody_dpcpp`). 54 | 55 | The DPC++ backend, in turn, supports both an OpenCL & CUDA backend, both of which are built by default. If you are building on a machine without CUDA support, you can switch off the DPC++ CUDA backend with the flag `-DDPCPP_CUDA_SUPPORT=off`. 56 | 57 | The build scripts create a version that includes rendering. To build versions that do not require OpenGL, provide the argument **no_render** to the build scripts. 58 | 59 | By default, a **release** target is built, for example, `nbody_cuda`. To build a debug version, navigate to the build directory and execute **make debug**. Running **make** will build both versions. The debug binary will share the same name as the **release** version with "_d" appended. 60 | 61 | The provided `tasks.json` and `launch.json` configuration files for vscode serve as examples, demonstrating how to initiate a debug session directly from within vscode. 62 | 63 | ## Migrating CUDA to SYCL 64 | 65 | The script `./scripts/run_dpct.sh` calls a containerized version of the Intel® DPC++ Compatibility Tool to automatically convert the CUDA components of this project into SYCL. A docker container was used because the dev machine has an incompatible version of the CUDA driver. This should be adapted based on your environment. 66 | 67 | The Intel® DPC++ compatibility tool offers options for intercepting complex builds, but current dev environment restrictions require me to run the tool inside a docker container. This complicates things, so for now I'm just doing single source conversion on the simulator.cu file. 68 | 69 | ## Running on different platforms 70 | 71 | The script `./scripts/run_nbody.sh` will run the nbody simulation, selecting a different binary based on the `-b` flag, where `-b` can be `cuda` or `dpcpp`. Subsequent positional arguments are passed on to the `nbody` binary. These positions args are described in the [Simulation](#Simulation) section. For example, to run on the DPC++ OpenCL host backend with 25600 (100 * 256) particles, executing 10 timesteps per rendered frame: 72 | 73 | ``` 74 | ./scripts/run_nbody.sh -b dpcpp 100 10 75 | ``` 76 | 77 | Note that this script runs `nbody` with the default X window, as opposed to using [xvfb](#Running-headless). This makes it unsuitable for running on a remote machine. 78 | 79 | `run_nbody.sh` is a simple wrapper around the `nbody_*` binaries with some environment variables set; the sections below describe how to launch the binaries directly. 80 | 81 | ### Detecting available SYCL backends 82 | 83 | The `sycl-ls` tool allows you to check for available backends on the system. For example, on a system with Intel OpenCL CPU runtime & CUDA runtime, the output is: 84 | 85 | ``` 86 | > sycl-ls 87 | [opencl:cpu:0] Intel(R) OpenCL, Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz 3.0 [2021.13.11.0.23_160000] 88 | [opencl:cpu:1] Intel(R) OpenCL, Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz 3.0 [2021.13.11.0.23_160000] 89 | [cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA GeForce RTX 3060 0.0 [CUDA 11.6] 90 | [host:host:0] SYCL host platform, SYCL host device 1.2 [1.2] 91 | ``` 92 | 93 | ### Selecting a backend (DPC++) 94 | 95 | By specifying the environment variable `SYCL_DEVICE_FILTER`, it's possible to switch between running with the CUDA backend and the OpenCL host backend. For example: 96 | 97 | ``` 98 | SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp 99 | ``` 100 | will run on the CUDA backend, whereas: 101 | ``` 102 | SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp 103 | ``` 104 | will run on a CPU through the OpenCL backend. Note the correspondence between options for `SYCL_DEVICE_FILTER` and the output of `sycl-ls`. 105 | 106 | **Note**: Selection between DPC++ backends at runtime is possible because `CMakeLists.txt` specifies building the SYCL code for both CUDA (`nvptx64-nvidia-cuda`) & OpenCL (`spir64`) targets: 107 | ``` 108 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -fsycl-targets=spir64,nvptx64-nvidia-cuda -fsycl-unnamed-lambda") 109 | ``` 110 | 111 | ### Adapting the project for DPC++ OpenCL 112 | 113 | No changes to the code were required, but there were a couple of bugs which are worked around. 114 | 115 | Firstly, when building for multiple targets (`-fsycl-targets`), there is a [recent bug](https://github.com/intel/llvm/issues/5330) which causes failure to link to static libraries. The workaround for this is to switch from building `imgui` as a static to a shared library. 116 | 117 | Secondly, I encountered the common CL header bug (see [here](https://github.com/intel/llvm/issues/2617) and [here](https://github.com/oneapi-src/oneDNN/issues/885)). This turned out to be triggered for the `spir64` backend because the CUDA headers were included *only* via `-I` and not via `-internal-isystem`. This caused them to take precedence over SYCL CL headers. The solution was to not include CUDA headers in `src_sycl/CMakeLists.txt`, which turned out to be unnecessary anyway. 118 | 119 | ## Passing data between OpenGL & CUDA/SYCL 120 | 121 | OpenGL & CUDA are capable of interoperating to share device memory, but this will not play well with the Intel® DPC++ Compatibility Tool. Instead, computed particle positions are migrated back to the host by CUDA/SYCL, then sent *back* to OpenGL via mapping. 122 | 123 | 124 | ## Simulation 125 | 126 | The `DiskGalaxySimulator` class handles the physics of n-body interaction. The computation of interparticle forces, velocity & updated particle positions are handled by the CUDA kernel `particle_interaction`. 127 | 128 | The equation solved by this code is equivalent to Eq. 1 [here](http://www.scholarpedia.org/article/N-body_simulations_(gravitational)), with the simplifying assumption that all particles have unit mass and there is no external/background force. This becomes: 129 | 130 | ![Eq1](/docs/Eq1.png) 131 | 132 | The force vector on each particle (F) is the sum of gravitational forces from all other particles. For each particle interaction, the attractive force is inversely proportional to the square distance between them. This force is equal to the gravitational constant (`G`) multiplied by the unit vector pointing between the particles, divided by the square of this distance. The equation above has this last term slightly rearranged to avoid unnecessary computation. 133 | 134 | Given the assumption of unit mass, the force (F) is equal to the acceleration, and so at each timestep, the force vector F is multiplied by the timestep size (`dt`) and added to the velocity vector. The position of each particle is then updated by the velocity multiplied by the timestep size (`dt`). 135 | 136 | A drag factor (`damping`) is used to regulate the velocity. At each timestep, the velocity is multiplied by the drag term, slowing the particles. The maximum force between very close particles is also limited for stability; this is achieved via an epsilon term (`distEps`) which is added to the distance between each particle pairing. 137 | 138 | The `parameters` described in this section can all be adjusted via command line arguments, as follows: 139 | 140 | `./nbody_cuda numParticles simIterationsPerFrame damping dt distEps G numFrames gwSize calcMethod` 141 | 142 | Note that `numParticles` specifies the number of particles simulated, divided by blocksize (i.e. setting `numParticles` to 50 produces 50*256 particles). `simIterationsPerFrame` specifies how many steps of the simulation to take before rendering the next frame and `numFrames` specifies the total number of simulation steps before the program exits. For default values for all of these parameters, refer to `sim_param.cpp`. 143 | 144 | `gwSize`: This parameter allows changing the work group size from the default 64. 145 | 146 | `calcMethod`: This string parameter, with a default value of BRANCH, selects branch instruction code. If set to PREDICATED, it uses an arithmetic expression. Refer to the [performance](#sycl-vs-cuda-performance) section for details. 147 | 148 | 149 | ### Modifying Simulation Behaviour 150 | 151 | You can get quite a wide range of 'galactic' behaviours by playing with the parameters described above. 152 | 153 | Initial velocity of stars is a stable orbital velocity, computed with an implicit value for gravity of `G = 1`. The default value *during* the simulation, however, is `G = 2`. So by default the galaxy collapses inwards quite quickly, but by reducing G closer to 1, you can make a more stable, rotating galaxy. 154 | 155 | The `damping` factor is a drag term. By default `damping = 0.999998` but by reducing this value to e.g. `0.999`, stars will tend to form local clusters before collapsing in towards the galactic centre. 156 | 157 | `distEps` serves as a stabilising parameter to prevent numerical instability at larger timestep sizes. Setting this value very small (`1.0e-10`) will produce more 'explosive' simulations. This is unrealistic for n-body gravitational interaction, but it looks dramatic. 158 | 159 | If you want to speed up the evolution of the galaxy, set a larger timestep size (`dt`) or increase the number of steps taken per frame (`simIterationsPerFrame`). Either change will increase the total simulation time per rendered frame. If you reach a sufficiently high timestep size that you get unstable explosive behaviour, increase the value of `distEps` and this should stabilise things. Note that there is a separate discussion [below](#performance-scaling-for-demos) about altering the ratio of compute/render time to, for instance, visually highlight a performance difference between platforms. 160 | 161 | ## Graphics Pipeline 162 | 163 | ### Rendering 164 | Render targets for all passes except the last use dimensions a bit larger than the window, to prevent popping. This is used when some effects affect neighboring pixels (bloom, ssao..) and must be taken into account even when off-screen. 165 | #### HDR 166 | Each particle is rendered as a fixed-size flare, generated from a gaussian. Particle color depends on velocity, blue at low speeds and purple at high speeds. Additive blending is set, so dense regions look bright. The render target is RGBA16F, because GL_R11F_G11F_B10F looks yellow on subsequent render passes. 167 | 168 | #### Bloom 169 | 170 | Bloom is applied through a separable Gaussian blur, applied once in the horizontal and then the vertical direction. The 1D Gaussian kernel is computed by `RendererGL::gaussKernel` and optimized to minimize texel lookups by `RendererGL::optimGaussKernel` following [this guide](https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/). At present, a gaussian window of 49 pixels with sigma = 10.0 is used. Multiple passes are possible (ping pong between two RGBA16F frame buffers), though at present we execute only one blur in each direction. 171 | 172 | Note that unlike typical bloom processing, there is no extraction of bright light sources prior to blurring, because the scene (bright stars on a dark background) makes this obsolete. 173 | 174 | The appearance & performance of the blur is controlled by four variables which are not currently exposed as arguments to `nbody_[backend]` but which could be manually modified as desired. The two arguments to the `gaussKernel` function (`sigma` and `halfwidth`) effectively define the 'spread' of the blur. Higher values for `sigma` result in wider blurring, whereas `halfwindow` defines the actual width of the pixel window which is sampled. Higher values of `halfwindow` will decrease performance, as more texel lookups are required. As a general rule, when increasing `sigma`, it will likely be necessary to increase `halfwindow` to avoid an obvious visual cut-off at the edge of the window. Conversely, a wide `halfwindow` with a small `sigma` reduces performance unnecessarily, because texels with negligible contribution will be sampled. 175 | 176 | Blur downscaling is a common technique to improve blur performance; the image is downsampled by the factor `blur_dsc` defined in `renderer_gl.cpp`, then the regular blur filter is performed, and finally the image is upscaled again. This is a very cheap way of enhancing the blur effect, but there is an associated artefact: 177 | 178 | ![DownscaleArtefact](/docs/downscale_artefact.png) 179 | 180 | If this artefact is unacceptable, set `blur_dsc = 1` to turn off downscaling. Note however that this will significantly reduce the blurriness, and compensating with wider `halfwindow` or more passes (see below) will cost a lot of rendering time. 181 | 182 | Enhanced blurring can also be achieved by executing multiple passes. This is controlled by `nPasses`, and is set to 1 by default. Due to the dominance of blur in the render pipeline, total rendering time should scale pretty much linearly with `nPasses`, so increasing it is a potentially expensive option. 183 | 184 | #### Average luminance 185 | The average luminance of the scene is computed from the HDR target into a downscaled R16F target. Then we generate mipmaps to obtain the average luminance on the smallest mipmap (1x1). (Could also be obtained from a 2x2 texture but screen-size targets always seem to resolve down to odd dimensions) 186 | 187 | #### Tonemapping & gamma correction 188 | The exposure of the final render is obtained from the average luminance, and the HDR and Bloom targets are combined and converted to LDR. Gamma correction is also applied. Tada. 189 | 190 | ## Running headless 191 | 192 | If you run `nbody_cuda` on a remote machine with X-forwarding, sending the rendered frames across the net will be a significant bottleneck. This can be worked around by making use of [Xvfb](https://linux.die.net/man/1/xvfb) which provides a *virtual* X display. You can then read from the memory mapped file to write to e.g. MP4 output. 193 | 194 | The script `./scripts/xvfb.sh` runs `nbody_cuda` in this manner, producing a video file `output.mp4`. Note that this script will run the simulation until manually terminated. 195 | 196 | ## Performance Scaling for Demos 197 | 198 | We've previously discussed the desire for a simulation which is *visibly* slower when the physics kernel isn't well optimized. With current default settings, the rendering takes longer (~55ms) than the simulation (10ms). However, altering three of the simulation parameters provides almost complete control of the ratio of render to simulation time. 199 | 200 | Firstly, the number of particles (`numParticles` [above](#Simulation)) has a large effect on the simulation time, as the computation scales with O(n2). By default, 12.8k particles (50 * 256) are rendered, but increasing this to 64k particles (250 * 256), the simulation time increases from 10ms to ~170ms. 201 | 202 | Alternatively, simulation time can be arbitrarily raised or lowered by changing both timestep size (`dt` [above](#Simulation)) and simulation steps per rendered frame (`simIterationsPerFrame`, [above](#Simulation)). By default, a timestep size of 0.005 is used, and 4 simulation steps are taken per rendered frame (Note that `scripts/xvfb.sh` overrides these default values with `dt = 0.001` and `simIterationsPerFrame = 5`). 203 | 204 | To increase the simulation time by a factor of 5, for example, simply divide `dt` by 5 and multiply `simIterationsPerFrame` by 5. This will produce *almost* identical output. Take care with *increasing* `dt` to get the opposite effect; above a certain value, the simulation will become unstable & you may see this manifest as unphysical behaviour (very fast moving stars exploding out from the centre). Instability at large `dt` can be mitigated, to an extent, by increasing `distEps` or `damping`. 205 | 206 | A significant portion of the rendering time is the bloom filter. The [bloom](#Bloom) section has some tips about how to control this. 207 | 208 | ## SYCL vs. CUDA performance 209 | 210 | This repo previously reported *faster* performance from SYCL than CUDA, but this was due to an erroneous translation in the Intel® DPC++ Compatibility Tool from `__frsqrt_rn` to `sycl::rsqrt`. The former has higher precision and runs slower than the latter. This has now been rectified so that the original CUDA code calls `rsqrt`. 211 | 212 | With this bug rectified, and without any further modification to the CUDA code or migrated SYCL code, the SYCL code used to be considerably slower because the Intel® DPC++ Compatibility Tool used to insert a cast to double in the rsqrt call: 213 | 214 | ``` 215 | coords_t inv_dist_cube = 216 | sycl::rsqrt((double)dist_sqr * dist_sqr * dist_sqr); 217 | 218 | ``` 219 | 220 | This was presumably because the tool was unaware of the equivalence of `rsqrt` and `sycl::rsqrt`. However, inspecting PTX reveals that the generated instructions are the same, so the cast to double is unnecessary. Removing the cast to double leaves a 40% performance gap between CUDA and SYCL. This is no longer necessary as newer versions of the Intel® DPC++ Compatibility Tool no longer insert the cast. 221 | 222 | The root cause of this 40% performance gap appears to be different handling of the branch instruction: 223 | 224 | ``` 225 | if (i == id) continue; 226 | ``` 227 | in the main loop in simulation.dp.cpp. Whereas NVCC handles this via instruction predication, DPC++ generates branch & sync instructions. By replacing this branch instruction with an arithmetic expression: 228 | 229 | ``` 230 | force += r * inv_dist_cube * (i != id); 231 | ``` 232 | in both the CUDA & SYCL code, we get comparable performance between the two using our hardware set up (RTX 3060). For 5 steps of the physical simulation (1 rendered frame) with 12,800 particles, both CUDA and SYCL take ~5.05ms (RTX 3060). 233 | 234 | ## Update 2024 235 | 236 | The ability to execute the nbody code without rendering simplified the process of running the code on different platforms. The results of these executions have brought to light some issues related to the runtime and compilers. As stated before, the original code was modified by substituting: 237 | 238 | ``` 239 | // Original code 240 | if (i == id) continue; 241 | 242 | force += r * inv_dist_cube; 243 | ``` 244 | 245 | with 246 | 247 | ``` 248 | // Modified code 249 | force += r * inv_dist_cube * (i != id); 250 | ``` 251 | 252 | in order to address the 40% decrease in SYCL performance compared to the CUDA code. With this change, the performance was almost the same for both compilers in RTX 3060. 253 | 254 | We have found that while this is the case for the A100 (CUDA 8.48516 ms vs. SYCL 8.23865 ms), it is not the same on the RTX 2060, where CUDA is heavily penalized (CUDA 10.7281 ms vs. SYCL 8.52349 ms). Even on the A100, the change lowered the CUDA performance (7.95778 ms for the original code). 255 | 256 | The code change also greatly improved the performance by 100% on the MAX 1100 GPU, dropping from 21.6555 ms to 10.7633 ms. 257 | Below are the best results from executing the code on the three different platforms. 258 | 259 | ``` 260 | [ext_oneapi_cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA GeForce RTX 2060 7.5 [CUDA 12.3] 261 | ==================== WORK GROUP SIZE 512 BRANCH ======================== 262 | CUDA - At step 10000 kernel time is 8.48516 and mean is 8.53952 and stddev is: 0.0884324 263 | DPC - At step 10000 kernel time is 8.23865 and mean is 8.30511 and stddev is: 0.0788344 264 | ==================== WORK GROUP SIZE 512 PREDICATED ==================== 265 | CUDA - At step 10000 kernel time is 10.7281 and mean is 10.7601 and stddev is: 0.0630959 266 | DPC - At step 10000 kernel time is 8.52349 and mean is 8.5992 and stddev is: 0.078034 267 | 268 | [ext_oneapi_cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA A100-PCIE-40GB 8.0 [CUDA 12.2] 269 | ==================== WORK GROUP SIZE 128 BRANCH ======================== 270 | CUDA - At step 10000 kernel time is 7.95778 and mean is 7.95753 and stddev is: 0.000680384 271 | DPC - At step 10000 kernel time is 10.051 and mean is 10.0506 and stddev is: 0.00181166 272 | ==================== WORK GROUP SIZE 128 PREDICATED ==================== 273 | CUDA - At step 10000 kernel time is 8.60294 and mean is 8.60151 and stddev is: 0.00077172 274 | DPC - At step 10000 kernel time is 7.99054 and mean is 7.99109 and stddev is: 0.0041852 275 | 276 | [ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Data Center GPU Max 1100 1.3 [1.3.26516] 277 | ==================== WORK GROUP SIZE 32 BRANCH ======================== 278 | At step 10000 kernel time is 21.5747 and mean is 21.6555 and stddev is: 0.0734683 279 | ==================== WORK GROUP SIZE 32 PREDICATED ==================== 280 | At step 10000 kernel time is 10.6649 and mean is 10.7633 and stddev is: 0.0507969 281 | ``` 282 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | To report a vulnerability or a security issue please fill the security 6 | advisories form [here](../../security/advisories/new), send an email to 7 | security@codeplay.com or contact us using the [contact form on our web 8 | page](https://codeplay.com/company/contact/?q=Report%20Security%20Issue). 9 | -------------------------------------------------------------------------------- /cmake/FindGLEW.cmake: -------------------------------------------------------------------------------- 1 | # 2 | 3 | # Try to find GLEW library and include path. 4 | # Once done this will define 5 | # 6 | # GLEW_FOUND 7 | # GLEW_INCLUDE_DIR 8 | # GLEW_LIBRARY 9 | # GLEW_SOURCE 10 | # 11 | 12 | include(FindPackageHandleStandardArgs) 13 | 14 | if (WIN32) 15 | find_path( GLEW_INCLUDE_DIR 16 | NAMES 17 | GL/glew.h 18 | PATHS 19 | ${GLEW_LOCATION}/include 20 | $ENV{GLEW_LOCATION}/include 21 | $ENV{PROGRAMFILES}/GLEW/include 22 | ${PROJECT_SOURCE_DIR}/extern/glew/include 23 | ${GLEW_LOCATION} 24 | $ENV{GLEW_LOCATION} 25 | DOC "The directory where GL/glew.h resides" ) 26 | find_file( GLEW_SOURCE 27 | NAMES 28 | glew.c 29 | PATHS 30 | ${GLEW_LOCATION}/src 31 | $ENV{GLEW_LOCATION}/src 32 | $ENV{PROGRAMFILES}/GLEW/src 33 | ${PROJECT_SOURCE_DIR}/extern/glew/src 34 | ${GLEW_LOCATION} 35 | $ENV{GLEW_LOCATION} 36 | DOC "The directory where GL/glew.c resides" ) 37 | if(ARCH STREQUAL "x86") 38 | find_library( GLEW_LIBRARY 39 | NAMES 40 | glew GLEW glew32s glew32 41 | PATHS 42 | ${GLEW_LOCATION}/lib 43 | ${GLEW_LOCATION}/lib/x86 44 | ${GLEW_LOCATION}/lib/win32 45 | ${GLEW_LOCATION}/lib/Release/win32 46 | ${GLEW_LOCATION}/lib/Release MX/win32 47 | $ENV{GLEW_LOCATION}/lib 48 | $ENV{GLEW_LOCATION}/lib/Release/win32 49 | $ENV{GLEW_LOCATION}/lib/Release MX/win32 50 | $ENV{GLEW_LOCATION}/lib/x86 51 | $ENV{GLEW_LOCATION}/lib/win32 52 | $ENV{PROGRAMFILES}/GLEW/lib 53 | $ENV{PROGRAMFILES}/GLEW/lib/x86 54 | $ENV{PROGRAMFILES}/GLEW/lib/win32 55 | ${PROJECT_SOURCE_DIR}/extern/glew/bin 56 | ${PROJECT_SOURCE_DIR}/extern/glew/lib 57 | ${PROJECT_SOURCE_DIR}/extern/glew/lib/x86 58 | ${PROJECT_SOURCE_DIR}/extern/glew/lib/win32 59 | ${GLEW_LOCATION} 60 | $ENV{GLEW_LOCATION} 61 | DOC "The GLEW library") 62 | else() 63 | find_library( GLEW_LIBRARY 64 | NAMES 65 | glew GLEW glew32s glew32 66 | PATHS 67 | ${GLEW_LOCATION}/lib/x64 68 | ${GLEW_LOCATION}/lib/Release/x64 69 | ${GLEW_LOCATION}/lib/Release MX/x64 70 | $ENV{GLEW_LOCATION}/lib/x64 71 | $ENV{GLEW_LOCATION}/lib/Release/x64 72 | $ENV{GLEW_LOCATION}/lib/Release MX/x64 73 | $ENV{PROGRAMFILES}/GLEW/lib/x64 74 | ${PROJECT_SOURCE_DIR}/extern/glew/bin 75 | ${PROJECT_SOURCE_DIR}/extern/glew/lib/x64 76 | ${GLEW_LOCATION}/lib 77 | $ENV{GLEW_LOCATION}/lib 78 | $ENV{PROGRAMFILES}/GLEW/lib 79 | ${PROJECT_SOURCE_DIR}/extern/glew/lib 80 | ${GLEW_LOCATION} 81 | $ENV{GLEW_LOCATION} 82 | DOC "The GLEW library") 83 | endif() 84 | endif () 85 | 86 | if (${CMAKE_HOST_UNIX}) 87 | find_path( GLEW_INCLUDE_DIR 88 | NAMES 89 | GL/glew.h 90 | PATHS 91 | ${GLEW_LOCATION}/include 92 | $ENV{GLEW_LOCATION}/include 93 | /usr/include 94 | /usr/local/include 95 | /sw/include 96 | /opt/local/include 97 | NO_DEFAULT_PATH 98 | DOC "The directory where GL/glew.h resides" 99 | ) 100 | find_library( GLEW_LIBRARY 101 | NAMES 102 | GLEW glew 103 | PATHS 104 | ${GLEW_LOCATION}/lib 105 | $ENV{GLEW_LOCATION}/lib 106 | /usr/lib64 107 | /usr/lib 108 | /usr/local/lib64 109 | /usr/local/lib 110 | /sw/lib 111 | /opt/local/lib 112 | NO_DEFAULT_PATH 113 | DOC "The GLEW library") 114 | endif () 115 | 116 | if (GLEW_INCLUDE_DIR AND EXISTS "${GLEW_INCLUDE_DIR}/GL/glew.h") 117 | 118 | file(STRINGS "${GLEW_INCLUDE_DIR}/GL/glew.h" GLEW_4_2 REGEX "^#define GL_VERSION_4_2.*$") 119 | if (GLEW_4_2) 120 | SET(OPENGL_4_2_FOUND TRUE) 121 | else () 122 | message(WARNING 123 | "glew-1.7.0 or newer needed for supporting OpenGL 4.2 dependent features" 124 | ) 125 | endif () 126 | 127 | file(STRINGS "${GLEW_INCLUDE_DIR}/GL/glew.h" GLEW_4_3 REGEX "^#define GL_VERSION_4_3.*$") 128 | if (GLEW_4_3) 129 | SET(OPENGL_4_3_FOUND TRUE) 130 | else () 131 | message(WARNING 132 | "glew-1.9.0 or newer needed for supporting OpenGL 4.3 dependent features" 133 | ) 134 | endif () 135 | 136 | endif () 137 | 138 | if(GLEW_SOURCE) 139 | find_package_handle_standard_args(GLEW DEFAULT_MSG 140 | GLEW_INCLUDE_DIR 141 | GLEW_SOURCE 142 | ) 143 | else() 144 | find_package_handle_standard_args(GLEW DEFAULT_MSG 145 | GLEW_INCLUDE_DIR 146 | GLEW_LIBRARY 147 | ) 148 | endif() 149 | 150 | mark_as_advanced( GLEW_FOUND ) -------------------------------------------------------------------------------- /cmake/FindGLFW.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2013 Pixar 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "Apache License") 5 | # with the following modification; you may not use this file except in 6 | # compliance with the Apache License and the following modification to it: 7 | # Section 6. Trademarks. is deleted and replaced with: 8 | # 9 | # 6. Trademarks. This License does not grant permission to use the trade 10 | # names, trademarks, service marks, or product names of the Licensor 11 | # and its affiliates, except as required to comply with Section 4(c) of 12 | # the License and to reproduce the content of the NOTICE file. 13 | # 14 | # You may obtain a copy of the Apache License at 15 | # 16 | # http://www.apache.org/licenses/LICENSE-2.0 17 | # 18 | # Unless required by applicable law or agreed to in writing, software 19 | # distributed under the Apache License with the above modification is 20 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 21 | # KIND, either express or implied. See the Apache License for the specific 22 | # language governing permissions and limitations under the Apache License. 23 | # 24 | 25 | # Try to find GLFW library and include path. 26 | # Once done this will define 27 | # 28 | # GLFW_FOUND 29 | # GLFW_INCLUDE_DIR 30 | # GLFW_LIBRARIES 31 | # 32 | 33 | find_path( GLFW_INCLUDE_DIR 34 | NAMES 35 | GLFW/glfw3.h 36 | HINTS 37 | "${GLFW_LOCATION}/include" 38 | "$ENV{GLFW_LOCATION}/include" 39 | PATHS 40 | "$ENV{PROGRAMFILES}/GLFW/include" 41 | "${OPENGL_INCLUDE_DIR}" 42 | /usr/openwin/share/include 43 | /usr/openwin/include 44 | /usr/X11R6/include 45 | /usr/include/X11 46 | /opt/graphics/OpenGL/include 47 | /opt/graphics/OpenGL/contrib/libglfw 48 | /usr/local/include 49 | /usr/include/GL 50 | /usr/include 51 | DOC 52 | "The directory where GLFW/glfw3.h resides" 53 | ) 54 | 55 | # 56 | # XXX: Do we still need to search for GL/glfw.h? 57 | # 58 | find_path( GLFW_INCLUDE_DIR 59 | NAMES 60 | GL/glfw.h 61 | HINTS 62 | "${GLFW_LOCATION}/include" 63 | "$ENV{GLFW_LOCATION}/include" 64 | PATHS 65 | "$ENV{PROGRAMFILES}/GLFW/include" 66 | "${OPENGL_INCLUDE_DIR}" 67 | /usr/openwin/share/include 68 | /usr/openwin/include 69 | /usr/X11R6/include 70 | /usr/include/X11 71 | /opt/graphics/OpenGL/include 72 | /opt/graphics/OpenGL/contrib/libglfw 73 | /usr/local/include 74 | /usr/include/GL 75 | /usr/include 76 | DOC 77 | "The directory where GL/glfw.h resides" 78 | ) 79 | 80 | if (WIN32) 81 | if(CYGWIN) 82 | find_library( GLFW_glfw_LIBRARY 83 | NAMES 84 | glfw32 85 | HINTS 86 | "${GLFW_LOCATION}/lib" 87 | "${GLFW_LOCATION}/lib/x64" 88 | "$ENV{GLFW_LOCATION}/lib" 89 | PATHS 90 | "${OPENGL_LIBRARY_DIR}" 91 | /usr/lib 92 | /usr/lib/w32api 93 | /usr/local/lib 94 | /usr/X11R6/lib 95 | DOC 96 | "The GLFW library" 97 | ) 98 | else() 99 | find_library( GLFW_glfw_LIBRARY 100 | NAMES 101 | glfw32 102 | glfw32s 103 | glfw 104 | glfw3 105 | HINTS 106 | "${GLFW_LOCATION}/lib" 107 | "${GLFW_LOCATION}/lib/x64" 108 | "${GLFW_LOCATION}/lib-msvc110" 109 | "${GLFW_LOCATION}/lib-vc2012" 110 | "$ENV{GLFW_LOCATION}/lib" 111 | "$ENV{GLFW_LOCATION}/lib/x64" 112 | "$ENV{GLFW_LOCATION}/lib-msvc110" 113 | "$ENV{GLFW_LOCATION}/lib-vc2012" 114 | PATHS 115 | "$ENV{PROGRAMFILES}/GLFW/lib" 116 | "${OPENGL_LIBRARY_DIR}" 117 | DOC 118 | "The GLFW library" 119 | ) 120 | endif() 121 | else () 122 | if (APPLE) 123 | find_library( GLFW_glfw_LIBRARY glfw 124 | NAMES 125 | glfw 126 | glfw3 127 | HINTS 128 | "${GLFW_LOCATION}/lib" 129 | "${GLFW_LOCATION}/lib/cocoa" 130 | "$ENV{GLFW_LOCATION}/lib" 131 | "$ENV{GLFW_LOCATION}/lib/cocoa" 132 | PATHS 133 | /usr/local/lib 134 | ) 135 | set(GLFW_cocoa_LIBRARY "-framework Cocoa" CACHE STRING "Cocoa framework for OSX") 136 | set(GLFW_corevideo_LIBRARY "-framework CoreVideo" CACHE STRING "CoreVideo framework for OSX") 137 | set(GLFW_iokit_LIBRARY "-framework IOKit" CACHE STRING "IOKit framework for OSX") 138 | else () 139 | # (*)NIX 140 | 141 | find_package(Threads REQUIRED) 142 | 143 | find_package(X11 REQUIRED) 144 | 145 | if(NOT X11_Xrandr_FOUND) 146 | message(FATAL_ERROR "Xrandr library not found - required for GLFW") 147 | endif() 148 | 149 | if(NOT X11_xf86vmode_FOUND) 150 | message(FATAL_ERROR "xf86vmode library not found - required for GLFW") 151 | endif() 152 | 153 | if(NOT X11_Xcursor_FOUND) 154 | message(FATAL_ERROR "Xcursor library not found - required for GLFW") 155 | endif() 156 | 157 | if(NOT X11_Xinerama_FOUND) 158 | message(FATAL_ERROR "Xinerama library not found - required for GLFW") 159 | endif() 160 | 161 | list(APPEND GLFW_x11_LIBRARY "${X11_Xrandr_LIB}" "${X11_Xxf86vm_LIB}" "${X11_Xcursor_LIB}" "${X11_Xinerama_LIB}" "${CMAKE_THREAD_LIBS_INIT}" -lrt -lXi) 162 | 163 | find_library( GLFW_glfw_LIBRARY 164 | NAMES 165 | glfw 166 | glfw3 167 | HINTS 168 | "${GLFW_LOCATION}/lib" 169 | "$ENV{GLFW_LOCATION}/lib" 170 | "${GLFW_LOCATION}/lib/x11" 171 | "$ENV{GLFW_LOCATION}/lib/x11" 172 | PATHS 173 | /usr/lib64 174 | /usr/lib 175 | /usr/lib/${CMAKE_LIBRARY_ARCHITECTURE} 176 | /usr/local/lib64 177 | /usr/local/lib 178 | /usr/local/lib/${CMAKE_LIBRARY_ARCHITECTURE} 179 | /usr/openwin/lib 180 | /usr/X11R6/lib 181 | DOC 182 | "The GLFW library" 183 | ) 184 | endif (APPLE) 185 | endif (WIN32) 186 | 187 | set( GLFW_FOUND "NO" ) 188 | 189 | if(GLFW_INCLUDE_DIR) 190 | 191 | if(GLFW_glfw_LIBRARY) 192 | set( GLFW_LIBRARIES "${GLFW_glfw_LIBRARY}" 193 | "${GLFW_x11_LIBRARY}" 194 | "${GLFW_cocoa_LIBRARY}" 195 | "${GLFW_iokit_LIBRARY}" 196 | "${GLFW_corevideo_LIBRARY}" ) 197 | set( GLFW_FOUND "YES" ) 198 | set (GLFW_LIBRARY "${GLFW_LIBRARIES}") 199 | set (GLFW_INCLUDE_PATH "${GLFW_INCLUDE_DIR}") 200 | endif(GLFW_glfw_LIBRARY) 201 | 202 | 203 | # Tease the GLFW_VERSION numbers from the lib headers 204 | function(parseVersion FILENAME VARNAME) 205 | 206 | set(PATTERN "^#define ${VARNAME}.*$") 207 | 208 | file(STRINGS "${GLFW_INCLUDE_DIR}/${FILENAME}" TMP REGEX ${PATTERN}) 209 | 210 | string(REGEX MATCHALL "[0-9]+" TMP ${TMP}) 211 | 212 | set(${VARNAME} ${TMP} PARENT_SCOPE) 213 | 214 | endfunction() 215 | 216 | 217 | if(EXISTS "${GLFW_INCLUDE_DIR}/GL/glfw.h") 218 | 219 | parseVersion(GL/glfw.h GLFW_VERSION_MAJOR) 220 | parseVersion(GL/glfw.h GLFW_VERSION_MINOR) 221 | parseVersion(GL/glfw.h GLFW_VERSION_REVISION) 222 | 223 | elseif(EXISTS "${GLFW_INCLUDE_DIR}/GLFW/glfw3.h") 224 | 225 | parseVersion(GLFW/glfw3.h GLFW_VERSION_MAJOR) 226 | parseVersion(GLFW/glfw3.h GLFW_VERSION_MINOR) 227 | parseVersion(GLFW/glfw3.h GLFW_VERSION_REVISION) 228 | 229 | endif() 230 | 231 | if(${GLFW_VERSION_MAJOR} OR ${GLFW_VERSION_MINOR} OR ${GLFW_VERSION_REVISION}) 232 | set(GLFW_VERSION "${GLFW_VERSION_MAJOR}.${GLFW_VERSION_MINOR}.${GLFW_VERSION_REVISION}") 233 | set(GLFW_VERSION_STRING "${GLFW_VERSION}") 234 | mark_as_advanced(GLFW_VERSION) 235 | endif() 236 | 237 | endif(GLFW_INCLUDE_DIR) 238 | 239 | include(FindPackageHandleStandardArgs) 240 | 241 | find_package_handle_standard_args(GLFW 242 | REQUIRED_VARS 243 | GLFW_INCLUDE_DIR 244 | GLFW_LIBRARIES 245 | VERSION_VAR 246 | GLFW_VERSION 247 | ) 248 | 249 | mark_as_advanced( 250 | GLFW_INCLUDE_DIR 251 | GLFW_LIBRARIES 252 | GLFW_glfw_LIBRARY 253 | GLFW_cocoa_LIBRARY 254 | ) 255 | 256 | -------------------------------------------------------------------------------- /cmake/FindGLM.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Find GLM 3 | # 4 | # Try to find GLM : OpenGL Mathematics. 5 | # This module defines 6 | # - GLM_INCLUDE_DIRS 7 | # - GLM_FOUND 8 | # 9 | # The following variables can be set as arguments for the module. 10 | # - GLM_ROOT_DIR : Root library directory of GLM 11 | # 12 | # References: 13 | # - https://github.com/Groovounet/glm/blob/master/util/FindGLM.cmake 14 | # - https://bitbucket.org/alfonse/gltut/src/28636298c1c0/glm-0.9.0.7/FindGLM.cmake 15 | # 16 | 17 | # Additional modules 18 | include(FindPackageHandleStandardArgs) 19 | 20 | if (WIN32) 21 | # Find include files 22 | find_path( 23 | GLM_INCLUDE_DIR 24 | NAMES glm/glm.hpp 25 | PATHS 26 | $ENV{PROGRAMFILES}/include 27 | ${GLM_ROOT_DIR}/include 28 | DOC "The directory where glm/glm.hpp resides") 29 | else() 30 | # Find include files 31 | find_path( 32 | GLM_INCLUDE_DIR 33 | NAMES glm/glm.hpp 34 | PATHS 35 | /usr/include 36 | /usr/local/include 37 | /sw/include 38 | /opt/local/include 39 | ${GLM_ROOT_DIR}/include 40 | DOC "The directory where glm/glm.hpp resides") 41 | endif() 42 | 43 | # Handle REQUIRD argument, define *_FOUND variable 44 | find_package_handle_standard_args(GLM DEFAULT_MSG GLM_INCLUDE_DIR) 45 | 46 | # Define GLM_INCLUDE_DIRS 47 | if (GLM_FOUND) 48 | set(GLM_INCLUDE_DIRS ${GLM_INCLUDE_DIR}) 49 | endif() 50 | 51 | # Hide some variables 52 | mark_as_advanced(GLM_INCLUDE_DIR) 53 | -------------------------------------------------------------------------------- /cmake/FindVulkan.cmake: -------------------------------------------------------------------------------- 1 | # Find Vulkan 2 | # 3 | # VULKAN_INCLUDE_DIR 4 | # VULKAN_LIBRARY 5 | # VULKAN_FOUND 6 | 7 | if (WIN32) 8 | find_path(VULKAN_INCLUDE_DIR NAMES vulkan/vulkan.h HINTS 9 | "$ENV{VULKAN_SDK}/Include" 10 | "$ENV{VK_SDK_PATH}/Include") 11 | if (CMAKE_CL_64) 12 | find_library(VULKAN_LIBRARY NAMES vulkan-1 HINTS 13 | "$ENV{VULKAN_SDK}/Bin" 14 | "$ENV{VK_SDK_PATH}/Bin") 15 | find_library(VULKAN_STATIC_LIBRARY NAMES vkstatic.1 HINTS 16 | "$ENV{VULKAN_SDK}/Bin" 17 | "$ENV{VK_SDK_PATH}/Bin") 18 | else() 19 | find_library(VULKAN_LIBRARY NAMES vulkan-1 HINTS 20 | "$ENV{VULKAN_SDK}/Bin32" 21 | "$ENV{VK_SDK_PATH}/Bin32") 22 | endif() 23 | else() 24 | find_path(VULKAN_INCLUDE_DIR NAMES vulkan/vulkan.h HINTS 25 | "$ENV{VULKAN_SDK}/include") 26 | find_library(VULKAN_LIBRARY NAMES vulkan HINTS 27 | "$ENV{VULKAN_SDK}/lib") 28 | endif() 29 | 30 | include(FindPackageHandleStandardArgs) 31 | find_package_handle_standard_args(Vulkan DEFAULT_MSG VULKAN_LIBRARY VULKAN_INCLUDE_DIR) 32 | 33 | mark_as_advanced(VULKAN_INCLUDE_DIR VULKAN_LIBRARY VULKAN_STATIC_LIBRARY) 34 | -------------------------------------------------------------------------------- /cmake/Finddpct.cmake: -------------------------------------------------------------------------------- 1 | if (WIN32) 2 | find_path( dpct_INCLUDE_DIR 3 | NAMES 4 | dpct/dpct.hpp 5 | PATHS 6 | ${dpct_LOCATION}/include 7 | $ENV{dpct_LOCATION}/include 8 | $ENV{DPCT_BUNDLE_ROOT}/include 9 | $ENV{ONEAPI_ROOT}/dpcpp-ct/latest/include 10 | $ENV{PROGRAMFILES}/include 11 | NO_DEFAULT_PATH 12 | DOC "The directory where dpct/dpct.hpp resides" 13 | ) 14 | else() 15 | find_path( dpct_INCLUDE_DIR 16 | NAMES 17 | dpct/dpct.hpp 18 | PATHS 19 | ${dpct_LOCATION}/include 20 | $ENV{dpct_LOCATION}/include 21 | $ENV{DPCT_BUNDLE_ROOT}/include 22 | $ENV{ONEAPI_ROOT}/dpcpp-ct/latest/include 23 | /opt/intel/oneapi/dpcpp-ct/latest/include 24 | /usr/include 25 | /usr/local/include 26 | /sw/include 27 | /opt/local/include 28 | NO_DEFAULT_PATH 29 | DOC "The directory where dpct/dpct.hpp resides" 30 | ) 31 | endif () 32 | 33 | include(FindPackageHandleStandardArgs) 34 | find_package_handle_standard_args( dpct REQUIRED_VARS dpct_INCLUDE_DIR ) 35 | 36 | -------------------------------------------------------------------------------- /docs/Eq1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeplaysoftware/cuda-to-sycl-nbody/c8f013a7b554faeabc6c39107b742916c66b7cd4/docs/Eq1.png -------------------------------------------------------------------------------- /docs/downscale_artefact.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeplaysoftware/cuda-to-sycl-nbody/c8f013a7b554faeabc6c39107b742916c66b7cd4/docs/downscale_artefact.png -------------------------------------------------------------------------------- /libs/imgui/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Codeplay Software Limited 2 | 3 | add_library(imgui SHARED 4 | src/imgui.cpp 5 | src/imgui_widgets.cpp 6 | src/imgui_demo.cpp 7 | src/imgui_draw.cpp 8 | src/imgui_tables.cpp 9 | src/imgui_impl_opengl3.cpp 10 | src/imgui_impl_glfw.cpp) 11 | 12 | target_link_libraries(imgui PRIVATE dl) 13 | 14 | target_include_directories(imgui PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include) 15 | 16 | target_include_directories(${BINARY_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include) 17 | target_include_directories(${BINARY_NAME}_d PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include) 18 | 19 | # Link main project to imgui lib 20 | target_link_libraries(${BINARY_NAME} PRIVATE imgui) 21 | target_link_libraries(${BINARY_NAME}_d PRIVATE imgui) 22 | -------------------------------------------------------------------------------- /libs/imgui/include/imconfig.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // COMPILE-TIME OPTIONS FOR DEAR IMGUI 3 | // Runtime options (clipboard callbacks, enabling various features, etc.) can generally be set via the ImGuiIO structure. 4 | // You can use ImGui::SetAllocatorFunctions() before calling ImGui::CreateContext() to rewire memory allocation functions. 5 | //----------------------------------------------------------------------------- 6 | // A) You may edit imconfig.h (and not overwrite it when updating Dear ImGui, or maintain a patch/rebased branch with your modifications to it) 7 | // B) or '#define IMGUI_USER_CONFIG "my_imgui_config.h"' in your project and then add directives in your own file without touching this template. 8 | //----------------------------------------------------------------------------- 9 | // You need to make sure that configuration settings are defined consistently _everywhere_ Dear ImGui is used, which include the imgui*.cpp 10 | // files but also _any_ of your code that uses Dear ImGui. This is because some compile-time options have an affect on data structures. 11 | // Defining those options in imconfig.h will ensure every compilation unit gets to see the same data structure layouts. 12 | // Call IMGUI_CHECKVERSION() from your .cpp files to verify that the data structures your files are using are matching the ones imgui.cpp is using. 13 | //----------------------------------------------------------------------------- 14 | 15 | #pragma once 16 | 17 | //---- Define assertion handler. Defaults to calling assert(). 18 | // If your macro uses multiple statements, make sure is enclosed in a 'do { .. } while (0)' block so it can be used as a single statement. 19 | //#define IM_ASSERT(_EXPR) MyAssert(_EXPR) 20 | //#define IM_ASSERT(_EXPR) ((void)(_EXPR)) // Disable asserts 21 | 22 | //---- Define attributes of all API symbols declarations, e.g. for DLL under Windows 23 | // Using Dear ImGui via a shared library is not recommended, because of function call overhead and because we don't guarantee backward nor forward ABI compatibility. 24 | // DLL users: heaps and globals are not shared across DLL boundaries! You will need to call SetCurrentContext() + SetAllocatorFunctions() 25 | // for each static/DLL boundary you are calling from. Read "Context and Memory Allocators" section of imgui.cpp for more details. 26 | //#define IMGUI_API __declspec( dllexport ) 27 | //#define IMGUI_API __declspec( dllimport ) 28 | 29 | //---- Don't define obsolete functions/enums/behaviors. Consider enabling from time to time after updating to avoid using soon-to-be obsolete function/names. 30 | //#define IMGUI_DISABLE_OBSOLETE_FUNCTIONS 31 | //#define IMGUI_DISABLE_OBSOLETE_KEYIO // 1.87: disable legacy io.KeyMap[]+io.KeysDown[] in favor io.AddKeyEvent(). This will be folded into IMGUI_DISABLE_OBSOLETE_FUNCTIONS in a few versions. 32 | 33 | //---- Disable all of Dear ImGui or don't implement standard windows. 34 | // It is very strongly recommended to NOT disable the demo windows during development. Please read comments in imgui_demo.cpp. 35 | //#define IMGUI_DISABLE // Disable everything: all headers and source files will be empty. 36 | //#define IMGUI_DISABLE_DEMO_WINDOWS // Disable demo windows: ShowDemoWindow()/ShowStyleEditor() will be empty. Not recommended. 37 | //#define IMGUI_DISABLE_METRICS_WINDOW // Disable metrics/debugger and other debug tools: ShowMetricsWindow() and ShowStackToolWindow() will be empty. 38 | 39 | //---- Don't implement some functions to reduce linkage requirements. 40 | //#define IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS // [Win32] Don't implement default clipboard handler. Won't use and link with OpenClipboard/GetClipboardData/CloseClipboard etc. (user32.lib/.a, kernel32.lib/.a) 41 | //#define IMGUI_ENABLE_WIN32_DEFAULT_IME_FUNCTIONS // [Win32] [Default with Visual Studio] Implement default IME handler (require imm32.lib/.a, auto-link for Visual Studio, -limm32 on command-line for MinGW) 42 | //#define IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS // [Win32] [Default with non-Visual Studio compilers] Don't implement default IME handler (won't require imm32.lib/.a) 43 | //#define IMGUI_DISABLE_WIN32_FUNCTIONS // [Win32] Won't use and link with any Win32 function (clipboard, ime). 44 | //#define IMGUI_ENABLE_OSX_DEFAULT_CLIPBOARD_FUNCTIONS // [OSX] Implement default OSX clipboard handler (need to link with '-framework ApplicationServices', this is why this is not the default). 45 | //#define IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS // Don't implement ImFormatString/ImFormatStringV so you can implement them yourself (e.g. if you don't want to link with vsnprintf) 46 | //#define IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS // Don't implement ImFabs/ImSqrt/ImPow/ImFmod/ImCos/ImSin/ImAcos/ImAtan2 so you can implement them yourself. 47 | //#define IMGUI_DISABLE_FILE_FUNCTIONS // Don't implement ImFileOpen/ImFileClose/ImFileRead/ImFileWrite and ImFileHandle at all (replace them with dummies) 48 | //#define IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS // Don't implement ImFileOpen/ImFileClose/ImFileRead/ImFileWrite and ImFileHandle so you can implement them yourself if you don't want to link with fopen/fclose/fread/fwrite. This will also disable the LogToTTY() function. 49 | //#define IMGUI_DISABLE_DEFAULT_ALLOCATORS // Don't implement default allocators calling malloc()/free() to avoid linking with them. You will need to call ImGui::SetAllocatorFunctions(). 50 | //#define IMGUI_DISABLE_SSE // Disable use of SSE intrinsics even if available 51 | 52 | //---- Include imgui_user.h at the end of imgui.h as a convenience 53 | //#define IMGUI_INCLUDE_IMGUI_USER_H 54 | 55 | //---- Pack colors to BGRA8 instead of RGBA8 (to avoid converting from one to another) 56 | //#define IMGUI_USE_BGRA_PACKED_COLOR 57 | 58 | //---- Use 32-bit for ImWchar (default is 16-bit) to support unicode planes 1-16. (e.g. point beyond 0xFFFF like emoticons, dingbats, symbols, shapes, ancient languages, etc...) 59 | //#define IMGUI_USE_WCHAR32 60 | 61 | //---- Avoid multiple STB libraries implementations, or redefine path/filenames to prioritize another version 62 | // By default the embedded implementations are declared static and not available outside of Dear ImGui sources files. 63 | //#define IMGUI_STB_TRUETYPE_FILENAME "my_folder/stb_truetype.h" 64 | //#define IMGUI_STB_RECT_PACK_FILENAME "my_folder/stb_rect_pack.h" 65 | //#define IMGUI_DISABLE_STB_TRUETYPE_IMPLEMENTATION 66 | //#define IMGUI_DISABLE_STB_RECT_PACK_IMPLEMENTATION 67 | 68 | //---- Use stb_printf's faster implementation of vsnprintf instead of the one from libc (unless IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS is defined) 69 | // Requires 'stb_sprintf.h' to be available in the include path. Compatibility checks of arguments and formats done by clang and GCC will be disabled in order to support the extra formats provided by STB sprintf. 70 | // #define IMGUI_USE_STB_SPRINTF 71 | 72 | //---- Use FreeType to build and rasterize the font atlas (instead of stb_truetype which is embedded by default in Dear ImGui) 73 | // Requires FreeType headers to be available in the include path. Requires program to be compiled with 'misc/freetype/imgui_freetype.cpp' (in this repository) + the FreeType library (not provided). 74 | // On Windows you may use vcpkg with 'vcpkg install freetype --triplet=x64-windows' + 'vcpkg integrate install'. 75 | //#define IMGUI_ENABLE_FREETYPE 76 | 77 | //---- Use stb_truetype to build and rasterize the font atlas (default) 78 | // The only purpose of this define is if you want force compilation of the stb_truetype backend ALONG with the FreeType backend. 79 | //#define IMGUI_ENABLE_STB_TRUETYPE 80 | 81 | //---- Define constructor and implicit cast operators to convert back<>forth between your math types and ImVec2/ImVec4. 82 | // This will be inlined as part of ImVec2 and ImVec4 class declarations. 83 | /* 84 | #define IM_VEC2_CLASS_EXTRA \ 85 | ImVec2(const MyVec2& f) { x = f.x; y = f.y; } \ 86 | operator MyVec2() const { return MyVec2(x,y); } 87 | 88 | #define IM_VEC4_CLASS_EXTRA \ 89 | ImVec4(const MyVec4& f) { x = f.x; y = f.y; z = f.z; w = f.w; } \ 90 | operator MyVec4() const { return MyVec4(x,y,z,w); } 91 | */ 92 | 93 | //---- Use 32-bit vertex indices (default is 16-bit) is one way to allow large meshes with more than 64K vertices. 94 | // Your renderer backend will need to support it (most example renderer backends support both 16/32-bit indices). 95 | // Another way to allow large meshes while keeping 16-bit indices is to handle ImDrawCmd::VtxOffset in your renderer. 96 | // Read about ImGuiBackendFlags_RendererHasVtxOffset for details. 97 | //#define ImDrawIdx unsigned int 98 | 99 | //---- Override ImDrawCallback signature (will need to modify renderer backends accordingly) 100 | //struct ImDrawList; 101 | //struct ImDrawCmd; 102 | //typedef void (*MyImDrawCallback)(const ImDrawList* draw_list, const ImDrawCmd* cmd, void* my_renderer_user_data); 103 | //#define ImDrawCallback MyImDrawCallback 104 | 105 | //---- Debug Tools: Macro to break in Debugger 106 | // (use 'Metrics->Tools->Item Picker' to pick widgets with the mouse and break into them for easy debugging.) 107 | //#define IM_DEBUG_BREAK IM_ASSERT(0) 108 | //#define IM_DEBUG_BREAK __debugbreak() 109 | 110 | //---- Debug Tools: Have the Item Picker break in the ItemAdd() function instead of ItemHoverable(), 111 | // (which comes earlier in the code, will catch a few extra items, allow picking items other than Hovered one.) 112 | // This adds a small runtime cost which is why it is not enabled by default. 113 | //#define IMGUI_DEBUG_TOOL_ITEM_PICKER_EX 114 | 115 | //---- Debug Tools: Enable slower asserts 116 | //#define IMGUI_DEBUG_PARANOID 117 | 118 | //---- Tip: You can add extra functions within the ImGui:: namespace, here or in your own headers files. 119 | /* 120 | namespace ImGui 121 | { 122 | void MyFunction(const char* name, const MyMatrix44& v); 123 | } 124 | */ 125 | -------------------------------------------------------------------------------- /libs/imgui/include/imgui_impl_glfw.h: -------------------------------------------------------------------------------- 1 | // dear imgui: Platform Backend for GLFW 2 | // This needs to be used along with a Renderer (e.g. OpenGL3, Vulkan, WebGPU..) 3 | // (Info: GLFW is a cross-platform general purpose library for handling windows, inputs, OpenGL/Vulkan graphics context creation, etc.) 4 | 5 | // Implemented features: 6 | // [X] Platform: Clipboard support. 7 | // [X] Platform: Keyboard support. Since 1.87 we are using the io.AddKeyEvent() function. Pass ImGuiKey values to all key functions e.g. ImGui::IsKeyPressed(ImGuiKey_Space). [Legacy GLFW_KEY_* values will also be supported unless IMGUI_DISABLE_OBSOLETE_KEYIO is set] 8 | // [X] Platform: Gamepad support. Enable with 'io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad'. 9 | // [X] Platform: Mouse cursor shape and visibility. Disable with 'io.ConfigFlags |= ImGuiConfigFlags_NoMouseCursorChange' (note: the resizing cursors requires GLFW 3.4+). 10 | 11 | // You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. 12 | // Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. 13 | // If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. 14 | // Read online: https://github.com/ocornut/imgui/tree/master/docs 15 | 16 | // About GLSL version: 17 | // The 'glsl_version' initialization parameter defaults to "#version 150" if NULL. 18 | // Only override if your GL version doesn't handle this GLSL version. Keep NULL if unsure! 19 | 20 | #pragma once 21 | #include "imgui.h" // IMGUI_IMPL_API 22 | 23 | struct GLFWwindow; 24 | struct GLFWmonitor; 25 | 26 | IMGUI_IMPL_API bool ImGui_ImplGlfw_InitForOpenGL(GLFWwindow* window, bool install_callbacks); 27 | IMGUI_IMPL_API bool ImGui_ImplGlfw_InitForVulkan(GLFWwindow* window, bool install_callbacks); 28 | IMGUI_IMPL_API bool ImGui_ImplGlfw_InitForOther(GLFWwindow* window, bool install_callbacks); 29 | IMGUI_IMPL_API void ImGui_ImplGlfw_Shutdown(); 30 | IMGUI_IMPL_API void ImGui_ImplGlfw_NewFrame(); 31 | 32 | // GLFW callbacks 33 | // - When calling Init with 'install_callbacks=true': GLFW callbacks will be installed for you. They will call user's previously installed callbacks, if any. 34 | // - When calling Init with 'install_callbacks=false': GLFW callbacks won't be installed. You will need to call those function yourself from your own GLFW callbacks. 35 | IMGUI_IMPL_API void ImGui_ImplGlfw_WindowFocusCallback(GLFWwindow* window, int focused); // Since 1.84 36 | IMGUI_IMPL_API void ImGui_ImplGlfw_CursorEnterCallback(GLFWwindow* window, int entered); // Since 1.84 37 | IMGUI_IMPL_API void ImGui_ImplGlfw_CursorPosCallback(GLFWwindow* window, double x, double y); // Since 1.87 38 | IMGUI_IMPL_API void ImGui_ImplGlfw_MouseButtonCallback(GLFWwindow* window, int button, int action, int mods); 39 | IMGUI_IMPL_API void ImGui_ImplGlfw_ScrollCallback(GLFWwindow* window, double xoffset, double yoffset); 40 | IMGUI_IMPL_API void ImGui_ImplGlfw_KeyCallback(GLFWwindow* window, int key, int scancode, int action, int mods); 41 | IMGUI_IMPL_API void ImGui_ImplGlfw_CharCallback(GLFWwindow* window, unsigned int c); 42 | IMGUI_IMPL_API void ImGui_ImplGlfw_MonitorCallback(GLFWmonitor* monitor, int event); 43 | -------------------------------------------------------------------------------- /libs/imgui/include/imgui_impl_opengl3.h: -------------------------------------------------------------------------------- 1 | // dear imgui: Renderer Backend for modern OpenGL with shaders / programmatic pipeline 2 | // - Desktop GL: 2.x 3.x 4.x 3 | // - Embedded GL: ES 2.0 (WebGL 1.0), ES 3.0 (WebGL 2.0) 4 | // This needs to be used along with a Platform Backend (e.g. GLFW, SDL, Win32, custom..) 5 | 6 | // Implemented features: 7 | // [X] Renderer: User texture binding. Use 'GLuint' OpenGL texture identifier as void*/ImTextureID. Read the FAQ about ImTextureID! 8 | // [x] Renderer: Desktop GL only: Support for large meshes (64k+ vertices) with 16-bit indices. 9 | 10 | // You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. 11 | // Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. 12 | // If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. 13 | // Read online: https://github.com/ocornut/imgui/tree/master/docs 14 | 15 | // About GLSL version: 16 | // The 'glsl_version' initialization parameter should be NULL (default) or a "#version XXX" string. 17 | // On computer platform the GLSL version default to "#version 130". On OpenGL ES 3 platform it defaults to "#version 300 es" 18 | // Only override if your GL version doesn't handle this GLSL version. See GLSL version table at the top of imgui_impl_opengl3.cpp. 19 | 20 | #pragma once 21 | #include "imgui.h" // IMGUI_IMPL_API 22 | 23 | // Backend API 24 | IMGUI_IMPL_API bool ImGui_ImplOpenGL3_Init(const char* glsl_version = NULL); 25 | IMGUI_IMPL_API void ImGui_ImplOpenGL3_Shutdown(); 26 | IMGUI_IMPL_API void ImGui_ImplOpenGL3_NewFrame(); 27 | IMGUI_IMPL_API void ImGui_ImplOpenGL3_RenderDrawData(ImDrawData* draw_data); 28 | 29 | // (Optional) Called by Init/NewFrame/Shutdown 30 | IMGUI_IMPL_API bool ImGui_ImplOpenGL3_CreateFontsTexture(); 31 | IMGUI_IMPL_API void ImGui_ImplOpenGL3_DestroyFontsTexture(); 32 | IMGUI_IMPL_API bool ImGui_ImplOpenGL3_CreateDeviceObjects(); 33 | IMGUI_IMPL_API void ImGui_ImplOpenGL3_DestroyDeviceObjects(); 34 | 35 | // Specific OpenGL ES versions 36 | //#define IMGUI_IMPL_OPENGL_ES2 // Auto-detected on Emscripten 37 | //#define IMGUI_IMPL_OPENGL_ES3 // Auto-detected on iOS/Android 38 | 39 | // You can explicitly select GLES2 or GLES3 API by using one of the '#define IMGUI_IMPL_OPENGL_LOADER_XXX' in imconfig.h or compiler command-line. 40 | #if !defined(IMGUI_IMPL_OPENGL_ES2) \ 41 | && !defined(IMGUI_IMPL_OPENGL_ES3) 42 | 43 | // Try to detect GLES on matching platforms 44 | #if defined(__APPLE__) 45 | #include 46 | #endif 47 | #if (defined(__APPLE__) && (TARGET_OS_IOS || TARGET_OS_TV)) || (defined(__ANDROID__)) 48 | #define IMGUI_IMPL_OPENGL_ES3 // iOS, Android -> GL ES 3, "#version 300 es" 49 | #elif defined(__EMSCRIPTEN__) 50 | #define IMGUI_IMPL_OPENGL_ES2 // Emscripten -> GL ES 2, "#version 100" 51 | #else 52 | // Otherwise imgui_impl_opengl3_loader.h will be used. 53 | #endif 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /libs/imgui/include/imstb_rectpack.h: -------------------------------------------------------------------------------- 1 | // [DEAR IMGUI] 2 | // This is a slightly modified version of stb_rect_pack.h 1.00. 3 | // Those changes would need to be pushed into nothings/stb: 4 | // - Added STBRP__CDECL 5 | // Grep for [DEAR IMGUI] to find the changes. 6 | 7 | // stb_rect_pack.h - v1.00 - public domain - rectangle packing 8 | // Sean Barrett 2014 9 | // 10 | // Useful for e.g. packing rectangular textures into an atlas. 11 | // Does not do rotation. 12 | // 13 | // Not necessarily the awesomest packing method, but better than 14 | // the totally naive one in stb_truetype (which is primarily what 15 | // this is meant to replace). 16 | // 17 | // Has only had a few tests run, may have issues. 18 | // 19 | // More docs to come. 20 | // 21 | // No memory allocations; uses qsort() and assert() from stdlib. 22 | // Can override those by defining STBRP_SORT and STBRP_ASSERT. 23 | // 24 | // This library currently uses the Skyline Bottom-Left algorithm. 25 | // 26 | // Please note: better rectangle packers are welcome! Please 27 | // implement them to the same API, but with a different init 28 | // function. 29 | // 30 | // Credits 31 | // 32 | // Library 33 | // Sean Barrett 34 | // Minor features 35 | // Martins Mozeiko 36 | // github:IntellectualKitty 37 | // 38 | // Bugfixes / warning fixes 39 | // Jeremy Jaussaud 40 | // Fabian Giesen 41 | // 42 | // Version history: 43 | // 44 | // 1.00 (2019-02-25) avoid small space waste; gracefully fail too-wide rectangles 45 | // 0.99 (2019-02-07) warning fixes 46 | // 0.11 (2017-03-03) return packing success/fail result 47 | // 0.10 (2016-10-25) remove cast-away-const to avoid warnings 48 | // 0.09 (2016-08-27) fix compiler warnings 49 | // 0.08 (2015-09-13) really fix bug with empty rects (w=0 or h=0) 50 | // 0.07 (2015-09-13) fix bug with empty rects (w=0 or h=0) 51 | // 0.06 (2015-04-15) added STBRP_SORT to allow replacing qsort 52 | // 0.05: added STBRP_ASSERT to allow replacing assert 53 | // 0.04: fixed minor bug in STBRP_LARGE_RECTS support 54 | // 0.01: initial release 55 | // 56 | // LICENSE 57 | // 58 | // See end of file for license information. 59 | 60 | ////////////////////////////////////////////////////////////////////////////// 61 | // 62 | // INCLUDE SECTION 63 | // 64 | 65 | #ifndef STB_INCLUDE_STB_RECT_PACK_H 66 | #define STB_INCLUDE_STB_RECT_PACK_H 67 | 68 | #define STB_RECT_PACK_VERSION 1 69 | 70 | #ifdef STBRP_STATIC 71 | #define STBRP_DEF static 72 | #else 73 | #define STBRP_DEF extern 74 | #endif 75 | 76 | #ifdef __cplusplus 77 | extern "C" { 78 | #endif 79 | 80 | typedef struct stbrp_context stbrp_context; 81 | typedef struct stbrp_node stbrp_node; 82 | typedef struct stbrp_rect stbrp_rect; 83 | 84 | #ifdef STBRP_LARGE_RECTS 85 | typedef int stbrp_coord; 86 | #else 87 | typedef unsigned short stbrp_coord; 88 | #endif 89 | 90 | STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects); 91 | // Assign packed locations to rectangles. The rectangles are of type 92 | // 'stbrp_rect' defined below, stored in the array 'rects', and there 93 | // are 'num_rects' many of them. 94 | // 95 | // Rectangles which are successfully packed have the 'was_packed' flag 96 | // set to a non-zero value and 'x' and 'y' store the minimum location 97 | // on each axis (i.e. bottom-left in cartesian coordinates, top-left 98 | // if you imagine y increasing downwards). Rectangles which do not fit 99 | // have the 'was_packed' flag set to 0. 100 | // 101 | // You should not try to access the 'rects' array from another thread 102 | // while this function is running, as the function temporarily reorders 103 | // the array while it executes. 104 | // 105 | // To pack into another rectangle, you need to call stbrp_init_target 106 | // again. To continue packing into the same rectangle, you can call 107 | // this function again. Calling this multiple times with multiple rect 108 | // arrays will probably produce worse packing results than calling it 109 | // a single time with the full rectangle array, but the option is 110 | // available. 111 | // 112 | // The function returns 1 if all of the rectangles were successfully 113 | // packed and 0 otherwise. 114 | 115 | struct stbrp_rect 116 | { 117 | // reserved for your use: 118 | int id; 119 | 120 | // input: 121 | stbrp_coord w, h; 122 | 123 | // output: 124 | stbrp_coord x, y; 125 | int was_packed; // non-zero if valid packing 126 | 127 | }; // 16 bytes, nominally 128 | 129 | 130 | STBRP_DEF void stbrp_init_target (stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes); 131 | // Initialize a rectangle packer to: 132 | // pack a rectangle that is 'width' by 'height' in dimensions 133 | // using temporary storage provided by the array 'nodes', which is 'num_nodes' long 134 | // 135 | // You must call this function every time you start packing into a new target. 136 | // 137 | // There is no "shutdown" function. The 'nodes' memory must stay valid for 138 | // the following stbrp_pack_rects() call (or calls), but can be freed after 139 | // the call (or calls) finish. 140 | // 141 | // Note: to guarantee best results, either: 142 | // 1. make sure 'num_nodes' >= 'width' 143 | // or 2. call stbrp_allow_out_of_mem() defined below with 'allow_out_of_mem = 1' 144 | // 145 | // If you don't do either of the above things, widths will be quantized to multiples 146 | // of small integers to guarantee the algorithm doesn't run out of temporary storage. 147 | // 148 | // If you do #2, then the non-quantized algorithm will be used, but the algorithm 149 | // may run out of temporary storage and be unable to pack some rectangles. 150 | 151 | STBRP_DEF void stbrp_setup_allow_out_of_mem (stbrp_context *context, int allow_out_of_mem); 152 | // Optionally call this function after init but before doing any packing to 153 | // change the handling of the out-of-temp-memory scenario, described above. 154 | // If you call init again, this will be reset to the default (false). 155 | 156 | 157 | STBRP_DEF void stbrp_setup_heuristic (stbrp_context *context, int heuristic); 158 | // Optionally select which packing heuristic the library should use. Different 159 | // heuristics will produce better/worse results for different data sets. 160 | // If you call init again, this will be reset to the default. 161 | 162 | enum 163 | { 164 | STBRP_HEURISTIC_Skyline_default=0, 165 | STBRP_HEURISTIC_Skyline_BL_sortHeight = STBRP_HEURISTIC_Skyline_default, 166 | STBRP_HEURISTIC_Skyline_BF_sortHeight 167 | }; 168 | 169 | 170 | ////////////////////////////////////////////////////////////////////////////// 171 | // 172 | // the details of the following structures don't matter to you, but they must 173 | // be visible so you can handle the memory allocations for them 174 | 175 | struct stbrp_node 176 | { 177 | stbrp_coord x,y; 178 | stbrp_node *next; 179 | }; 180 | 181 | struct stbrp_context 182 | { 183 | int width; 184 | int height; 185 | int align; 186 | int init_mode; 187 | int heuristic; 188 | int num_nodes; 189 | stbrp_node *active_head; 190 | stbrp_node *free_head; 191 | stbrp_node extra[2]; // we allocate two extra nodes so optimal user-node-count is 'width' not 'width+2' 192 | }; 193 | 194 | #ifdef __cplusplus 195 | } 196 | #endif 197 | 198 | #endif 199 | 200 | ////////////////////////////////////////////////////////////////////////////// 201 | // 202 | // IMPLEMENTATION SECTION 203 | // 204 | 205 | #ifdef STB_RECT_PACK_IMPLEMENTATION 206 | #ifndef STBRP_SORT 207 | #include 208 | #define STBRP_SORT qsort 209 | #endif 210 | 211 | #ifndef STBRP_ASSERT 212 | #include 213 | #define STBRP_ASSERT assert 214 | #endif 215 | 216 | // [DEAR IMGUI] Added STBRP__CDECL 217 | #ifdef _MSC_VER 218 | #define STBRP__NOTUSED(v) (void)(v) 219 | #define STBRP__CDECL __cdecl 220 | #else 221 | #define STBRP__NOTUSED(v) (void)sizeof(v) 222 | #define STBRP__CDECL 223 | #endif 224 | 225 | enum 226 | { 227 | STBRP__INIT_skyline = 1 228 | }; 229 | 230 | STBRP_DEF void stbrp_setup_heuristic(stbrp_context *context, int heuristic) 231 | { 232 | switch (context->init_mode) { 233 | case STBRP__INIT_skyline: 234 | STBRP_ASSERT(heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight || heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight); 235 | context->heuristic = heuristic; 236 | break; 237 | default: 238 | STBRP_ASSERT(0); 239 | } 240 | } 241 | 242 | STBRP_DEF void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_out_of_mem) 243 | { 244 | if (allow_out_of_mem) 245 | // if it's ok to run out of memory, then don't bother aligning them; 246 | // this gives better packing, but may fail due to OOM (even though 247 | // the rectangles easily fit). @TODO a smarter approach would be to only 248 | // quantize once we've hit OOM, then we could get rid of this parameter. 249 | context->align = 1; 250 | else { 251 | // if it's not ok to run out of memory, then quantize the widths 252 | // so that num_nodes is always enough nodes. 253 | // 254 | // I.e. num_nodes * align >= width 255 | // align >= width / num_nodes 256 | // align = ceil(width/num_nodes) 257 | 258 | context->align = (context->width + context->num_nodes-1) / context->num_nodes; 259 | } 260 | } 261 | 262 | STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes) 263 | { 264 | int i; 265 | #ifndef STBRP_LARGE_RECTS 266 | STBRP_ASSERT(width <= 0xffff && height <= 0xffff); 267 | #endif 268 | 269 | for (i=0; i < num_nodes-1; ++i) 270 | nodes[i].next = &nodes[i+1]; 271 | nodes[i].next = NULL; 272 | context->init_mode = STBRP__INIT_skyline; 273 | context->heuristic = STBRP_HEURISTIC_Skyline_default; 274 | context->free_head = &nodes[0]; 275 | context->active_head = &context->extra[0]; 276 | context->width = width; 277 | context->height = height; 278 | context->num_nodes = num_nodes; 279 | stbrp_setup_allow_out_of_mem(context, 0); 280 | 281 | // node 0 is the full width, node 1 is the sentinel (lets us not store width explicitly) 282 | context->extra[0].x = 0; 283 | context->extra[0].y = 0; 284 | context->extra[0].next = &context->extra[1]; 285 | context->extra[1].x = (stbrp_coord) width; 286 | #ifdef STBRP_LARGE_RECTS 287 | context->extra[1].y = (1<<30); 288 | #else 289 | context->extra[1].y = 65535; 290 | #endif 291 | context->extra[1].next = NULL; 292 | } 293 | 294 | // find minimum y position if it starts at x1 295 | static int stbrp__skyline_find_min_y(stbrp_context *c, stbrp_node *first, int x0, int width, int *pwaste) 296 | { 297 | stbrp_node *node = first; 298 | int x1 = x0 + width; 299 | int min_y, visited_width, waste_area; 300 | 301 | STBRP__NOTUSED(c); 302 | 303 | STBRP_ASSERT(first->x <= x0); 304 | 305 | #if 0 306 | // skip in case we're past the node 307 | while (node->next->x <= x0) 308 | ++node; 309 | #else 310 | STBRP_ASSERT(node->next->x > x0); // we ended up handling this in the caller for efficiency 311 | #endif 312 | 313 | STBRP_ASSERT(node->x <= x0); 314 | 315 | min_y = 0; 316 | waste_area = 0; 317 | visited_width = 0; 318 | while (node->x < x1) { 319 | if (node->y > min_y) { 320 | // raise min_y higher. 321 | // we've accounted for all waste up to min_y, 322 | // but we'll now add more waste for everything we've visted 323 | waste_area += visited_width * (node->y - min_y); 324 | min_y = node->y; 325 | // the first time through, visited_width might be reduced 326 | if (node->x < x0) 327 | visited_width += node->next->x - x0; 328 | else 329 | visited_width += node->next->x - node->x; 330 | } else { 331 | // add waste area 332 | int under_width = node->next->x - node->x; 333 | if (under_width + visited_width > width) 334 | under_width = width - visited_width; 335 | waste_area += under_width * (min_y - node->y); 336 | visited_width += under_width; 337 | } 338 | node = node->next; 339 | } 340 | 341 | *pwaste = waste_area; 342 | return min_y; 343 | } 344 | 345 | typedef struct 346 | { 347 | int x,y; 348 | stbrp_node **prev_link; 349 | } stbrp__findresult; 350 | 351 | static stbrp__findresult stbrp__skyline_find_best_pos(stbrp_context *c, int width, int height) 352 | { 353 | int best_waste = (1<<30), best_x, best_y = (1 << 30); 354 | stbrp__findresult fr; 355 | stbrp_node **prev, *node, *tail, **best = NULL; 356 | 357 | // align to multiple of c->align 358 | width = (width + c->align - 1); 359 | width -= width % c->align; 360 | STBRP_ASSERT(width % c->align == 0); 361 | 362 | // if it can't possibly fit, bail immediately 363 | if (width > c->width || height > c->height) { 364 | fr.prev_link = NULL; 365 | fr.x = fr.y = 0; 366 | return fr; 367 | } 368 | 369 | node = c->active_head; 370 | prev = &c->active_head; 371 | while (node->x + width <= c->width) { 372 | int y,waste; 373 | y = stbrp__skyline_find_min_y(c, node, node->x, width, &waste); 374 | if (c->heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight) { // actually just want to test BL 375 | // bottom left 376 | if (y < best_y) { 377 | best_y = y; 378 | best = prev; 379 | } 380 | } else { 381 | // best-fit 382 | if (y + height <= c->height) { 383 | // can only use it if it first vertically 384 | if (y < best_y || (y == best_y && waste < best_waste)) { 385 | best_y = y; 386 | best_waste = waste; 387 | best = prev; 388 | } 389 | } 390 | } 391 | prev = &node->next; 392 | node = node->next; 393 | } 394 | 395 | best_x = (best == NULL) ? 0 : (*best)->x; 396 | 397 | // if doing best-fit (BF), we also have to try aligning right edge to each node position 398 | // 399 | // e.g, if fitting 400 | // 401 | // ____________________ 402 | // |____________________| 403 | // 404 | // into 405 | // 406 | // | | 407 | // | ____________| 408 | // |____________| 409 | // 410 | // then right-aligned reduces waste, but bottom-left BL is always chooses left-aligned 411 | // 412 | // This makes BF take about 2x the time 413 | 414 | if (c->heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight) { 415 | tail = c->active_head; 416 | node = c->active_head; 417 | prev = &c->active_head; 418 | // find first node that's admissible 419 | while (tail->x < width) 420 | tail = tail->next; 421 | while (tail) { 422 | int xpos = tail->x - width; 423 | int y,waste; 424 | STBRP_ASSERT(xpos >= 0); 425 | // find the left position that matches this 426 | while (node->next->x <= xpos) { 427 | prev = &node->next; 428 | node = node->next; 429 | } 430 | STBRP_ASSERT(node->next->x > xpos && node->x <= xpos); 431 | y = stbrp__skyline_find_min_y(c, node, xpos, width, &waste); 432 | if (y + height <= c->height) { 433 | if (y <= best_y) { 434 | if (y < best_y || waste < best_waste || (waste==best_waste && xpos < best_x)) { 435 | best_x = xpos; 436 | STBRP_ASSERT(y <= best_y); 437 | best_y = y; 438 | best_waste = waste; 439 | best = prev; 440 | } 441 | } 442 | } 443 | tail = tail->next; 444 | } 445 | } 446 | 447 | fr.prev_link = best; 448 | fr.x = best_x; 449 | fr.y = best_y; 450 | return fr; 451 | } 452 | 453 | static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, int width, int height) 454 | { 455 | // find best position according to heuristic 456 | stbrp__findresult res = stbrp__skyline_find_best_pos(context, width, height); 457 | stbrp_node *node, *cur; 458 | 459 | // bail if: 460 | // 1. it failed 461 | // 2. the best node doesn't fit (we don't always check this) 462 | // 3. we're out of memory 463 | if (res.prev_link == NULL || res.y + height > context->height || context->free_head == NULL) { 464 | res.prev_link = NULL; 465 | return res; 466 | } 467 | 468 | // on success, create new node 469 | node = context->free_head; 470 | node->x = (stbrp_coord) res.x; 471 | node->y = (stbrp_coord) (res.y + height); 472 | 473 | context->free_head = node->next; 474 | 475 | // insert the new node into the right starting point, and 476 | // let 'cur' point to the remaining nodes needing to be 477 | // stiched back in 478 | 479 | cur = *res.prev_link; 480 | if (cur->x < res.x) { 481 | // preserve the existing one, so start testing with the next one 482 | stbrp_node *next = cur->next; 483 | cur->next = node; 484 | cur = next; 485 | } else { 486 | *res.prev_link = node; 487 | } 488 | 489 | // from here, traverse cur and free the nodes, until we get to one 490 | // that shouldn't be freed 491 | while (cur->next && cur->next->x <= res.x + width) { 492 | stbrp_node *next = cur->next; 493 | // move the current node to the free list 494 | cur->next = context->free_head; 495 | context->free_head = cur; 496 | cur = next; 497 | } 498 | 499 | // stitch the list back in 500 | node->next = cur; 501 | 502 | if (cur->x < res.x + width) 503 | cur->x = (stbrp_coord) (res.x + width); 504 | 505 | #ifdef _DEBUG 506 | cur = context->active_head; 507 | while (cur->x < context->width) { 508 | STBRP_ASSERT(cur->x < cur->next->x); 509 | cur = cur->next; 510 | } 511 | STBRP_ASSERT(cur->next == NULL); 512 | 513 | { 514 | int count=0; 515 | cur = context->active_head; 516 | while (cur) { 517 | cur = cur->next; 518 | ++count; 519 | } 520 | cur = context->free_head; 521 | while (cur) { 522 | cur = cur->next; 523 | ++count; 524 | } 525 | STBRP_ASSERT(count == context->num_nodes+2); 526 | } 527 | #endif 528 | 529 | return res; 530 | } 531 | 532 | // [DEAR IMGUI] Added STBRP__CDECL 533 | static int STBRP__CDECL rect_height_compare(const void *a, const void *b) 534 | { 535 | const stbrp_rect *p = (const stbrp_rect *) a; 536 | const stbrp_rect *q = (const stbrp_rect *) b; 537 | if (p->h > q->h) 538 | return -1; 539 | if (p->h < q->h) 540 | return 1; 541 | return (p->w > q->w) ? -1 : (p->w < q->w); 542 | } 543 | 544 | // [DEAR IMGUI] Added STBRP__CDECL 545 | static int STBRP__CDECL rect_original_order(const void *a, const void *b) 546 | { 547 | const stbrp_rect *p = (const stbrp_rect *) a; 548 | const stbrp_rect *q = (const stbrp_rect *) b; 549 | return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed); 550 | } 551 | 552 | #ifdef STBRP_LARGE_RECTS 553 | #define STBRP__MAXVAL 0xffffffff 554 | #else 555 | #define STBRP__MAXVAL 0xffff 556 | #endif 557 | 558 | STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects) 559 | { 560 | int i, all_rects_packed = 1; 561 | 562 | // we use the 'was_packed' field internally to allow sorting/unsorting 563 | for (i=0; i < num_rects; ++i) { 564 | rects[i].was_packed = i; 565 | } 566 | 567 | // sort according to heuristic 568 | STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_height_compare); 569 | 570 | for (i=0; i < num_rects; ++i) { 571 | if (rects[i].w == 0 || rects[i].h == 0) { 572 | rects[i].x = rects[i].y = 0; // empty rect needs no space 573 | } else { 574 | stbrp__findresult fr = stbrp__skyline_pack_rectangle(context, rects[i].w, rects[i].h); 575 | if (fr.prev_link) { 576 | rects[i].x = (stbrp_coord) fr.x; 577 | rects[i].y = (stbrp_coord) fr.y; 578 | } else { 579 | rects[i].x = rects[i].y = STBRP__MAXVAL; 580 | } 581 | } 582 | } 583 | 584 | // unsort 585 | STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_original_order); 586 | 587 | // set was_packed flags and all_rects_packed status 588 | for (i=0; i < num_rects; ++i) { 589 | rects[i].was_packed = !(rects[i].x == STBRP__MAXVAL && rects[i].y == STBRP__MAXVAL); 590 | if (!rects[i].was_packed) 591 | all_rects_packed = 0; 592 | } 593 | 594 | // return the all_rects_packed status 595 | return all_rects_packed; 596 | } 597 | #endif 598 | 599 | /* 600 | ------------------------------------------------------------------------------ 601 | This software is available under 2 licenses -- choose whichever you prefer. 602 | ------------------------------------------------------------------------------ 603 | ALTERNATIVE A - MIT License 604 | Copyright (c) 2017 Sean Barrett 605 | Permission is hereby granted, free of charge, to any person obtaining a copy of 606 | this software and associated documentation files (the "Software"), to deal in 607 | the Software without restriction, including without limitation the rights to 608 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 609 | of the Software, and to permit persons to whom the Software is furnished to do 610 | so, subject to the following conditions: 611 | The above copyright notice and this permission notice shall be included in all 612 | copies or substantial portions of the Software. 613 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 614 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 615 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 616 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 617 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 618 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 619 | SOFTWARE. 620 | ------------------------------------------------------------------------------ 621 | ALTERNATIVE B - Public Domain (www.unlicense.org) 622 | This is free and unencumbered software released into the public domain. 623 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 624 | software, either in source code form or as a compiled binary, for any purpose, 625 | commercial or non-commercial, and by any means. 626 | In jurisdictions that recognize copyright laws, the author or authors of this 627 | software dedicate any and all copyright interest in the software to the public 628 | domain. We make this dedication for the benefit of the public at large and to 629 | the detriment of our heirs and successors. We intend this dedication to be an 630 | overt act of relinquishment in perpetuity of all present and future rights to 631 | this software under copyright law. 632 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 633 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 634 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 635 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 636 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 637 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 638 | ------------------------------------------------------------------------------ 639 | */ 640 | -------------------------------------------------------------------------------- /libs/imgui/src/imgui_impl_glfw.cpp: -------------------------------------------------------------------------------- 1 | // dear imgui: Platform Backend for GLFW 2 | // This needs to be used along with a Renderer (e.g. OpenGL3, Vulkan, WebGPU..) 3 | // (Info: GLFW is a cross-platform general purpose library for handling windows, inputs, OpenGL/Vulkan graphics context creation, etc.) 4 | // (Requires: GLFW 3.1+) 5 | 6 | // Implemented features: 7 | // [X] Platform: Clipboard support. 8 | // [X] Platform: Keyboard support. Since 1.87 we are using the io.AddKeyEvent() function. Pass ImGuiKey values to all key functions e.g. ImGui::IsKeyPressed(ImGuiKey_Space). [Legacy GLFW_KEY_* values will also be supported unless IMGUI_DISABLE_OBSOLETE_KEYIO is set] 9 | // [X] Platform: Gamepad support. Enable with 'io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad'. 10 | // [X] Platform: Mouse cursor shape and visibility. Disable with 'io.ConfigFlags |= ImGuiConfigFlags_NoMouseCursorChange' (note: the resizing cursors requires GLFW 3.4+). 11 | 12 | // You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. 13 | // Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. 14 | // If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. 15 | // Read online: https://github.com/ocornut/imgui/tree/master/docs 16 | 17 | // CHANGELOG 18 | // (minor and older changes stripped away, please see git history for details) 19 | // 2022-01-26: Inputs: replaced short-lived io.AddKeyModsEvent() (added two weeks ago)with io.AddKeyEvent() using ImGuiKey_ModXXX flags. Sorry for the confusion. 20 | // 2021-01-20: Inputs: calling new io.AddKeyAnalogEvent() for gamepad support, instead of writing directly to io.NavInputs[]. 21 | // 2022-01-17: Inputs: calling new io.AddMousePosEvent(), io.AddMouseButtonEvent(), io.AddMouseWheelEvent() API (1.87+). 22 | // 2022-01-17: Inputs: always update key mods next and before key event (not in NewFrame) to fix input queue with very low framerates. 23 | // 2022-01-12: *BREAKING CHANGE*: Now using glfwSetCursorPosCallback(). If you called ImGui_ImplGlfw_InitXXX() with install_callbacks = false, you MUST install glfwSetCursorPosCallback() and forward it to the backend via ImGui_ImplGlfw_CursorPosCallback(). 24 | // 2022-01-10: Inputs: calling new io.AddKeyEvent(), io.AddKeyModsEvent() + io.SetKeyEventNativeData() API (1.87+). Support for full ImGuiKey range. 25 | // 2022-01-05: Inputs: Converting GLFW untranslated keycodes back to translated keycodes (in the ImGui_ImplGlfw_KeyCallback() function) in order to match the behavior of every other backend, and facilitate the use of GLFW with lettered-shortcuts API. 26 | // 2021-08-17: *BREAKING CHANGE*: Now using glfwSetWindowFocusCallback() to calling io.AddFocusEvent(). If you called ImGui_ImplGlfw_InitXXX() with install_callbacks = false, you MUST install glfwSetWindowFocusCallback() and forward it to the backend via ImGui_ImplGlfw_WindowFocusCallback(). 27 | // 2021-07-29: *BREAKING CHANGE*: Now using glfwSetCursorEnterCallback(). MousePos is correctly reported when the host platform window is hovered but not focused. If you called ImGui_ImplGlfw_InitXXX() with install_callbacks = false, you MUST install glfwSetWindowFocusCallback() callback and forward it to the backend via ImGui_ImplGlfw_CursorEnterCallback(). 28 | // 2021-06-29: Reorganized backend to pull data from a single structure to facilitate usage with multiple-contexts (all g_XXXX access changed to bd->XXXX). 29 | // 2020-01-17: Inputs: Disable error callback while assigning mouse cursors because some X11 setup don't have them and it generates errors. 30 | // 2019-12-05: Inputs: Added support for new mouse cursors added in GLFW 3.4+ (resizing cursors, not allowed cursor). 31 | // 2019-10-18: Misc: Previously installed user callbacks are now restored on shutdown. 32 | // 2019-07-21: Inputs: Added mapping for ImGuiKey_KeyPadEnter. 33 | // 2019-05-11: Inputs: Don't filter value from character callback before calling AddInputCharacter(). 34 | // 2019-03-12: Misc: Preserve DisplayFramebufferScale when main window is minimized. 35 | // 2018-11-30: Misc: Setting up io.BackendPlatformName so it can be displayed in the About Window. 36 | // 2018-11-07: Inputs: When installing our GLFW callbacks, we save user's previously installed ones - if any - and chain call them. 37 | // 2018-08-01: Inputs: Workaround for Emscripten which doesn't seem to handle focus related calls. 38 | // 2018-06-29: Inputs: Added support for the ImGuiMouseCursor_Hand cursor. 39 | // 2018-06-08: Misc: Extracted imgui_impl_glfw.cpp/.h away from the old combined GLFW+OpenGL/Vulkan examples. 40 | // 2018-03-20: Misc: Setup io.BackendFlags ImGuiBackendFlags_HasMouseCursors flag + honor ImGuiConfigFlags_NoMouseCursorChange flag. 41 | // 2018-02-20: Inputs: Added support for mouse cursors (ImGui::GetMouseCursor() value, passed to glfwSetCursor()). 42 | // 2018-02-06: Misc: Removed call to ImGui::Shutdown() which is not available from 1.60 WIP, user needs to call CreateContext/DestroyContext themselves. 43 | // 2018-02-06: Inputs: Added mapping for ImGuiKey_Space. 44 | // 2018-01-25: Inputs: Added gamepad support if ImGuiConfigFlags_NavEnableGamepad is set. 45 | // 2018-01-25: Inputs: Honoring the io.WantSetMousePos by repositioning the mouse (when using navigation and ImGuiConfigFlags_NavMoveMouse is set). 46 | // 2018-01-20: Inputs: Added Horizontal Mouse Wheel support. 47 | // 2018-01-18: Inputs: Added mapping for ImGuiKey_Insert. 48 | // 2017-08-25: Inputs: MousePos set to -FLT_MAX,-FLT_MAX when mouse is unavailable/missing (instead of -1,-1). 49 | // 2016-10-15: Misc: Added a void* user_data parameter to Clipboard function handlers. 50 | 51 | #include "imgui.h" 52 | #include "imgui_impl_glfw.h" 53 | 54 | // Clang warnings with -Weverything 55 | #if defined(__clang__) 56 | #pragma clang diagnostic push 57 | #pragma clang diagnostic ignored "-Wold-style-cast" // warning: use of old-style cast 58 | #pragma clang diagnostic ignored "-Wsign-conversion" // warning: implicit conversion changes signedness 59 | #if __has_warning("-Wzero-as-null-pointer-constant") 60 | #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" 61 | #endif 62 | #endif 63 | 64 | // GLFW 65 | #include 66 | #ifdef _WIN32 67 | #undef APIENTRY 68 | #define GLFW_EXPOSE_NATIVE_WIN32 69 | #include // for glfwGetWin32Window 70 | #endif 71 | #ifdef GLFW_RESIZE_NESW_CURSOR // Let's be nice to people who pulled GLFW between 2019-04-16 (3.4 define) and 2019-11-29 (cursors defines) // FIXME: Remove when GLFW 3.4 is released? 72 | #define GLFW_HAS_NEW_CURSORS (GLFW_VERSION_MAJOR * 1000 + GLFW_VERSION_MINOR * 100 >= 3400) // 3.4+ GLFW_RESIZE_ALL_CURSOR, GLFW_RESIZE_NESW_CURSOR, GLFW_RESIZE_NWSE_CURSOR, GLFW_NOT_ALLOWED_CURSOR 73 | #else 74 | #define GLFW_HAS_NEW_CURSORS (0) 75 | #endif 76 | #define GLFW_HAS_GAMEPAD_API (GLFW_VERSION_MAJOR * 1000 + GLFW_VERSION_MINOR * 100 >= 3300) // 3.3+ glfwGetGamepadState() new api 77 | #define GLFW_HAS_GET_KEY_NAME (GLFW_VERSION_MAJOR * 1000 + GLFW_VERSION_MINOR * 100 >= 3200) // 3.2+ glfwGetKeyName() 78 | 79 | // GLFW data 80 | enum GlfwClientApi 81 | { 82 | GlfwClientApi_Unknown, 83 | GlfwClientApi_OpenGL, 84 | GlfwClientApi_Vulkan 85 | }; 86 | 87 | struct ImGui_ImplGlfw_Data 88 | { 89 | GLFWwindow* Window; 90 | GlfwClientApi ClientApi; 91 | double Time; 92 | GLFWwindow* MouseWindow; 93 | GLFWcursor* MouseCursors[ImGuiMouseCursor_COUNT]; 94 | bool InstalledCallbacks; 95 | 96 | // Chain GLFW callbacks: our callbacks will call the user's previously installed callbacks, if any. 97 | GLFWwindowfocusfun PrevUserCallbackWindowFocus; 98 | GLFWcursorposfun PrevUserCallbackCursorPos; 99 | GLFWcursorenterfun PrevUserCallbackCursorEnter; 100 | GLFWmousebuttonfun PrevUserCallbackMousebutton; 101 | GLFWscrollfun PrevUserCallbackScroll; 102 | GLFWkeyfun PrevUserCallbackKey; 103 | GLFWcharfun PrevUserCallbackChar; 104 | GLFWmonitorfun PrevUserCallbackMonitor; 105 | 106 | ImGui_ImplGlfw_Data() { memset(this, 0, sizeof(*this)); } 107 | }; 108 | 109 | // Backend data stored in io.BackendPlatformUserData to allow support for multiple Dear ImGui contexts 110 | // It is STRONGLY preferred that you use docking branch with multi-viewports (== single Dear ImGui context + multiple windows) instead of multiple Dear ImGui contexts. 111 | // FIXME: multi-context support is not well tested and probably dysfunctional in this backend. 112 | // - Because glfwPollEvents() process all windows and some events may be called outside of it, you will need to register your own callbacks 113 | // (passing install_callbacks=false in ImGui_ImplGlfw_InitXXX functions), set the current dear imgui context and then call our callbacks. 114 | // - Otherwise we may need to store a GLFWWindow* -> ImGuiContext* map and handle this in the backend, adding a little bit of extra complexity to it. 115 | // FIXME: some shared resources (mouse cursor shape, gamepad) are mishandled when using multi-context. 116 | static ImGui_ImplGlfw_Data* ImGui_ImplGlfw_GetBackendData() 117 | { 118 | return ImGui::GetCurrentContext() ? (ImGui_ImplGlfw_Data*)ImGui::GetIO().BackendPlatformUserData : NULL; 119 | } 120 | 121 | // Functions 122 | static const char* ImGui_ImplGlfw_GetClipboardText(void* user_data) 123 | { 124 | return glfwGetClipboardString((GLFWwindow*)user_data); 125 | } 126 | 127 | static void ImGui_ImplGlfw_SetClipboardText(void* user_data, const char* text) 128 | { 129 | glfwSetClipboardString((GLFWwindow*)user_data, text); 130 | } 131 | 132 | static ImGuiKey ImGui_ImplGlfw_KeyToImGuiKey(int key) 133 | { 134 | switch (key) 135 | { 136 | case GLFW_KEY_TAB: return ImGuiKey_Tab; 137 | case GLFW_KEY_LEFT: return ImGuiKey_LeftArrow; 138 | case GLFW_KEY_RIGHT: return ImGuiKey_RightArrow; 139 | case GLFW_KEY_UP: return ImGuiKey_UpArrow; 140 | case GLFW_KEY_DOWN: return ImGuiKey_DownArrow; 141 | case GLFW_KEY_PAGE_UP: return ImGuiKey_PageUp; 142 | case GLFW_KEY_PAGE_DOWN: return ImGuiKey_PageDown; 143 | case GLFW_KEY_HOME: return ImGuiKey_Home; 144 | case GLFW_KEY_END: return ImGuiKey_End; 145 | case GLFW_KEY_INSERT: return ImGuiKey_Insert; 146 | case GLFW_KEY_DELETE: return ImGuiKey_Delete; 147 | case GLFW_KEY_BACKSPACE: return ImGuiKey_Backspace; 148 | case GLFW_KEY_SPACE: return ImGuiKey_Space; 149 | case GLFW_KEY_ENTER: return ImGuiKey_Enter; 150 | case GLFW_KEY_ESCAPE: return ImGuiKey_Escape; 151 | case GLFW_KEY_APOSTROPHE: return ImGuiKey_Apostrophe; 152 | case GLFW_KEY_COMMA: return ImGuiKey_Comma; 153 | case GLFW_KEY_MINUS: return ImGuiKey_Minus; 154 | case GLFW_KEY_PERIOD: return ImGuiKey_Period; 155 | case GLFW_KEY_SLASH: return ImGuiKey_Slash; 156 | case GLFW_KEY_SEMICOLON: return ImGuiKey_Semicolon; 157 | case GLFW_KEY_EQUAL: return ImGuiKey_Equal; 158 | case GLFW_KEY_LEFT_BRACKET: return ImGuiKey_LeftBracket; 159 | case GLFW_KEY_BACKSLASH: return ImGuiKey_Backslash; 160 | case GLFW_KEY_RIGHT_BRACKET: return ImGuiKey_RightBracket; 161 | case GLFW_KEY_GRAVE_ACCENT: return ImGuiKey_GraveAccent; 162 | case GLFW_KEY_CAPS_LOCK: return ImGuiKey_CapsLock; 163 | case GLFW_KEY_SCROLL_LOCK: return ImGuiKey_ScrollLock; 164 | case GLFW_KEY_NUM_LOCK: return ImGuiKey_NumLock; 165 | case GLFW_KEY_PRINT_SCREEN: return ImGuiKey_PrintScreen; 166 | case GLFW_KEY_PAUSE: return ImGuiKey_Pause; 167 | case GLFW_KEY_KP_0: return ImGuiKey_Keypad0; 168 | case GLFW_KEY_KP_1: return ImGuiKey_Keypad1; 169 | case GLFW_KEY_KP_2: return ImGuiKey_Keypad2; 170 | case GLFW_KEY_KP_3: return ImGuiKey_Keypad3; 171 | case GLFW_KEY_KP_4: return ImGuiKey_Keypad4; 172 | case GLFW_KEY_KP_5: return ImGuiKey_Keypad5; 173 | case GLFW_KEY_KP_6: return ImGuiKey_Keypad6; 174 | case GLFW_KEY_KP_7: return ImGuiKey_Keypad7; 175 | case GLFW_KEY_KP_8: return ImGuiKey_Keypad8; 176 | case GLFW_KEY_KP_9: return ImGuiKey_Keypad9; 177 | case GLFW_KEY_KP_DECIMAL: return ImGuiKey_KeypadDecimal; 178 | case GLFW_KEY_KP_DIVIDE: return ImGuiKey_KeypadDivide; 179 | case GLFW_KEY_KP_MULTIPLY: return ImGuiKey_KeypadMultiply; 180 | case GLFW_KEY_KP_SUBTRACT: return ImGuiKey_KeypadSubtract; 181 | case GLFW_KEY_KP_ADD: return ImGuiKey_KeypadAdd; 182 | case GLFW_KEY_KP_ENTER: return ImGuiKey_KeypadEnter; 183 | case GLFW_KEY_KP_EQUAL: return ImGuiKey_KeypadEqual; 184 | case GLFW_KEY_LEFT_SHIFT: return ImGuiKey_LeftShift; 185 | case GLFW_KEY_LEFT_CONTROL: return ImGuiKey_LeftCtrl; 186 | case GLFW_KEY_LEFT_ALT: return ImGuiKey_LeftAlt; 187 | case GLFW_KEY_LEFT_SUPER: return ImGuiKey_LeftSuper; 188 | case GLFW_KEY_RIGHT_SHIFT: return ImGuiKey_RightShift; 189 | case GLFW_KEY_RIGHT_CONTROL: return ImGuiKey_RightCtrl; 190 | case GLFW_KEY_RIGHT_ALT: return ImGuiKey_RightAlt; 191 | case GLFW_KEY_RIGHT_SUPER: return ImGuiKey_RightSuper; 192 | case GLFW_KEY_MENU: return ImGuiKey_Menu; 193 | case GLFW_KEY_0: return ImGuiKey_0; 194 | case GLFW_KEY_1: return ImGuiKey_1; 195 | case GLFW_KEY_2: return ImGuiKey_2; 196 | case GLFW_KEY_3: return ImGuiKey_3; 197 | case GLFW_KEY_4: return ImGuiKey_4; 198 | case GLFW_KEY_5: return ImGuiKey_5; 199 | case GLFW_KEY_6: return ImGuiKey_6; 200 | case GLFW_KEY_7: return ImGuiKey_7; 201 | case GLFW_KEY_8: return ImGuiKey_8; 202 | case GLFW_KEY_9: return ImGuiKey_9; 203 | case GLFW_KEY_A: return ImGuiKey_A; 204 | case GLFW_KEY_B: return ImGuiKey_B; 205 | case GLFW_KEY_C: return ImGuiKey_C; 206 | case GLFW_KEY_D: return ImGuiKey_D; 207 | case GLFW_KEY_E: return ImGuiKey_E; 208 | case GLFW_KEY_F: return ImGuiKey_F; 209 | case GLFW_KEY_G: return ImGuiKey_G; 210 | case GLFW_KEY_H: return ImGuiKey_H; 211 | case GLFW_KEY_I: return ImGuiKey_I; 212 | case GLFW_KEY_J: return ImGuiKey_J; 213 | case GLFW_KEY_K: return ImGuiKey_K; 214 | case GLFW_KEY_L: return ImGuiKey_L; 215 | case GLFW_KEY_M: return ImGuiKey_M; 216 | case GLFW_KEY_N: return ImGuiKey_N; 217 | case GLFW_KEY_O: return ImGuiKey_O; 218 | case GLFW_KEY_P: return ImGuiKey_P; 219 | case GLFW_KEY_Q: return ImGuiKey_Q; 220 | case GLFW_KEY_R: return ImGuiKey_R; 221 | case GLFW_KEY_S: return ImGuiKey_S; 222 | case GLFW_KEY_T: return ImGuiKey_T; 223 | case GLFW_KEY_U: return ImGuiKey_U; 224 | case GLFW_KEY_V: return ImGuiKey_V; 225 | case GLFW_KEY_W: return ImGuiKey_W; 226 | case GLFW_KEY_X: return ImGuiKey_X; 227 | case GLFW_KEY_Y: return ImGuiKey_Y; 228 | case GLFW_KEY_Z: return ImGuiKey_Z; 229 | case GLFW_KEY_F1: return ImGuiKey_F1; 230 | case GLFW_KEY_F2: return ImGuiKey_F2; 231 | case GLFW_KEY_F3: return ImGuiKey_F3; 232 | case GLFW_KEY_F4: return ImGuiKey_F4; 233 | case GLFW_KEY_F5: return ImGuiKey_F5; 234 | case GLFW_KEY_F6: return ImGuiKey_F6; 235 | case GLFW_KEY_F7: return ImGuiKey_F7; 236 | case GLFW_KEY_F8: return ImGuiKey_F8; 237 | case GLFW_KEY_F9: return ImGuiKey_F9; 238 | case GLFW_KEY_F10: return ImGuiKey_F10; 239 | case GLFW_KEY_F11: return ImGuiKey_F11; 240 | case GLFW_KEY_F12: return ImGuiKey_F12; 241 | default: return ImGuiKey_None; 242 | } 243 | } 244 | 245 | static void ImGui_ImplGlfw_UpdateKeyModifiers(int mods) 246 | { 247 | ImGuiIO& io = ImGui::GetIO(); 248 | io.AddKeyEvent(ImGuiKey_ModCtrl, (mods & GLFW_MOD_CONTROL) != 0); 249 | io.AddKeyEvent(ImGuiKey_ModShift, (mods & GLFW_MOD_SHIFT) != 0); 250 | io.AddKeyEvent(ImGuiKey_ModAlt, (mods & GLFW_MOD_ALT) != 0); 251 | io.AddKeyEvent(ImGuiKey_ModSuper, (mods & GLFW_MOD_SUPER) != 0); 252 | } 253 | 254 | void ImGui_ImplGlfw_MouseButtonCallback(GLFWwindow* window, int button, int action, int mods) 255 | { 256 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 257 | if (bd->PrevUserCallbackMousebutton != NULL && window == bd->Window) 258 | bd->PrevUserCallbackMousebutton(window, button, action, mods); 259 | 260 | ImGui_ImplGlfw_UpdateKeyModifiers(mods); 261 | 262 | ImGuiIO& io = ImGui::GetIO(); 263 | if (button >= 0 && button < ImGuiMouseButton_COUNT) 264 | io.AddMouseButtonEvent(button, action == GLFW_PRESS); 265 | } 266 | 267 | void ImGui_ImplGlfw_ScrollCallback(GLFWwindow* window, double xoffset, double yoffset) 268 | { 269 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 270 | if (bd->PrevUserCallbackScroll != NULL && window == bd->Window) 271 | bd->PrevUserCallbackScroll(window, xoffset, yoffset); 272 | 273 | ImGuiIO& io = ImGui::GetIO(); 274 | io.AddMouseWheelEvent((float)xoffset, (float)yoffset); 275 | } 276 | 277 | static int ImGui_ImplGlfw_TranslateUntranslatedKey(int key, int scancode) 278 | { 279 | #if GLFW_HAS_GET_KEY_NAME && !defined(__EMSCRIPTEN__) 280 | // GLFW 3.1+ attempts to "untranslate" keys, which goes the opposite of what every other framework does, making using lettered shortcuts difficult. 281 | // (It had reasons to do so: namely GLFW is/was more likely to be used for WASD-type game controls rather than lettered shortcuts, but IHMO the 3.1 change could have been done differently) 282 | // See https://github.com/glfw/glfw/issues/1502 for details. 283 | // Adding a workaround to undo this (so our keys are translated->untranslated->translated, likely a lossy process). 284 | // This won't cover edge cases but this is at least going to cover common cases. 285 | const char* key_name = glfwGetKeyName(key, scancode); 286 | if (key_name && key_name[0] != 0 && key_name[1] == 0) 287 | { 288 | const char char_names[] = "`-=[]\\,;\'./"; 289 | const int char_keys[] = { GLFW_KEY_GRAVE_ACCENT, GLFW_KEY_MINUS, GLFW_KEY_EQUAL, GLFW_KEY_LEFT_BRACKET, GLFW_KEY_RIGHT_BRACKET, GLFW_KEY_BACKSLASH, GLFW_KEY_COMMA, GLFW_KEY_SEMICOLON, GLFW_KEY_APOSTROPHE, GLFW_KEY_PERIOD, GLFW_KEY_SLASH, 0 }; 290 | IM_ASSERT(IM_ARRAYSIZE(char_names) == IM_ARRAYSIZE(char_keys)); 291 | if (key_name[0] >= '0' && key_name[0] <= '9') { key = GLFW_KEY_0 + (key_name[0] - '0'); } 292 | else if (key_name[0] >= 'A' && key_name[0] <= 'Z') { key = GLFW_KEY_A + (key_name[0] - 'A'); } 293 | else if (const char* p = strchr(char_names, key_name[0])) { key = char_keys[p - char_names]; } 294 | } 295 | // if (action == GLFW_PRESS) printf("key %d scancode %d name '%s'\n", key, scancode, key_name); 296 | #else 297 | IM_UNUSED(scancode); 298 | #endif 299 | return key; 300 | } 301 | 302 | void ImGui_ImplGlfw_KeyCallback(GLFWwindow* window, int keycode, int scancode, int action, int mods) 303 | { 304 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 305 | if (bd->PrevUserCallbackKey != NULL && window == bd->Window) 306 | bd->PrevUserCallbackKey(window, keycode, scancode, action, mods); 307 | 308 | if (action != GLFW_PRESS && action != GLFW_RELEASE) 309 | return; 310 | 311 | ImGui_ImplGlfw_UpdateKeyModifiers(mods); 312 | 313 | keycode = ImGui_ImplGlfw_TranslateUntranslatedKey(keycode, scancode); 314 | 315 | ImGuiIO& io = ImGui::GetIO(); 316 | ImGuiKey imgui_key = ImGui_ImplGlfw_KeyToImGuiKey(keycode); 317 | io.AddKeyEvent(imgui_key, (action == GLFW_PRESS)); 318 | io.SetKeyEventNativeData(imgui_key, keycode, scancode); // To support legacy indexing (<1.87 user code) 319 | } 320 | 321 | void ImGui_ImplGlfw_WindowFocusCallback(GLFWwindow* window, int focused) 322 | { 323 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 324 | if (bd->PrevUserCallbackWindowFocus != NULL && window == bd->Window) 325 | bd->PrevUserCallbackWindowFocus(window, focused); 326 | 327 | ImGuiIO& io = ImGui::GetIO(); 328 | io.AddFocusEvent(focused != 0); 329 | } 330 | 331 | void ImGui_ImplGlfw_CursorPosCallback(GLFWwindow* window, double x, double y) 332 | { 333 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 334 | if (bd->PrevUserCallbackCursorPos != NULL && window == bd->Window) 335 | bd->PrevUserCallbackCursorPos(window, x, y); 336 | 337 | ImGuiIO& io = ImGui::GetIO(); 338 | io.AddMousePosEvent((float)x, (float)y); 339 | } 340 | 341 | void ImGui_ImplGlfw_CursorEnterCallback(GLFWwindow* window, int entered) 342 | { 343 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 344 | if (bd->PrevUserCallbackCursorEnter != NULL && window == bd->Window) 345 | bd->PrevUserCallbackCursorEnter(window, entered); 346 | 347 | ImGuiIO& io = ImGui::GetIO(); 348 | if (entered) 349 | bd->MouseWindow = window; 350 | if (!entered && bd->MouseWindow == window) 351 | { 352 | bd->MouseWindow = NULL; 353 | io.AddMousePosEvent(-FLT_MAX, -FLT_MAX); 354 | } 355 | } 356 | 357 | void ImGui_ImplGlfw_CharCallback(GLFWwindow* window, unsigned int c) 358 | { 359 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 360 | if (bd->PrevUserCallbackChar != NULL && window == bd->Window) 361 | bd->PrevUserCallbackChar(window, c); 362 | 363 | ImGuiIO& io = ImGui::GetIO(); 364 | io.AddInputCharacter(c); 365 | } 366 | 367 | void ImGui_ImplGlfw_MonitorCallback(GLFWmonitor*, int) 368 | { 369 | // Unused in 'master' branch but 'docking' branch will use this, so we declare it ahead of it so if you have to install callbacks you can install this one too. 370 | } 371 | 372 | static bool ImGui_ImplGlfw_Init(GLFWwindow* window, bool install_callbacks, GlfwClientApi client_api) 373 | { 374 | ImGuiIO& io = ImGui::GetIO(); 375 | IM_ASSERT(io.BackendPlatformUserData == NULL && "Already initialized a platform backend!"); 376 | 377 | // Setup backend capabilities flags 378 | ImGui_ImplGlfw_Data* bd = IM_NEW(ImGui_ImplGlfw_Data)(); 379 | io.BackendPlatformUserData = (void*)bd; 380 | io.BackendPlatformName = "imgui_impl_glfw"; 381 | io.BackendFlags |= ImGuiBackendFlags_HasMouseCursors; // We can honor GetMouseCursor() values (optional) 382 | io.BackendFlags |= ImGuiBackendFlags_HasSetMousePos; // We can honor io.WantSetMousePos requests (optional, rarely used) 383 | 384 | bd->Window = window; 385 | bd->Time = 0.0; 386 | 387 | io.SetClipboardTextFn = ImGui_ImplGlfw_SetClipboardText; 388 | io.GetClipboardTextFn = ImGui_ImplGlfw_GetClipboardText; 389 | io.ClipboardUserData = bd->Window; 390 | 391 | // Set platform dependent data in viewport 392 | #if defined(_WIN32) 393 | ImGui::GetMainViewport()->PlatformHandleRaw = (void*)glfwGetWin32Window(bd->Window); 394 | #endif 395 | 396 | // Create mouse cursors 397 | // (By design, on X11 cursors are user configurable and some cursors may be missing. When a cursor doesn't exist, 398 | // GLFW will emit an error which will often be printed by the app, so we temporarily disable error reporting. 399 | // Missing cursors will return NULL and our _UpdateMouseCursor() function will use the Arrow cursor instead.) 400 | GLFWerrorfun prev_error_callback = glfwSetErrorCallback(NULL); 401 | bd->MouseCursors[ImGuiMouseCursor_Arrow] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR); 402 | bd->MouseCursors[ImGuiMouseCursor_TextInput] = glfwCreateStandardCursor(GLFW_IBEAM_CURSOR); 403 | bd->MouseCursors[ImGuiMouseCursor_ResizeNS] = glfwCreateStandardCursor(GLFW_VRESIZE_CURSOR); 404 | bd->MouseCursors[ImGuiMouseCursor_ResizeEW] = glfwCreateStandardCursor(GLFW_HRESIZE_CURSOR); 405 | bd->MouseCursors[ImGuiMouseCursor_Hand] = glfwCreateStandardCursor(GLFW_HAND_CURSOR); 406 | #if GLFW_HAS_NEW_CURSORS 407 | bd->MouseCursors[ImGuiMouseCursor_ResizeAll] = glfwCreateStandardCursor(GLFW_RESIZE_ALL_CURSOR); 408 | bd->MouseCursors[ImGuiMouseCursor_ResizeNESW] = glfwCreateStandardCursor(GLFW_RESIZE_NESW_CURSOR); 409 | bd->MouseCursors[ImGuiMouseCursor_ResizeNWSE] = glfwCreateStandardCursor(GLFW_RESIZE_NWSE_CURSOR); 410 | bd->MouseCursors[ImGuiMouseCursor_NotAllowed] = glfwCreateStandardCursor(GLFW_NOT_ALLOWED_CURSOR); 411 | #else 412 | bd->MouseCursors[ImGuiMouseCursor_ResizeAll] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR); 413 | bd->MouseCursors[ImGuiMouseCursor_ResizeNESW] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR); 414 | bd->MouseCursors[ImGuiMouseCursor_ResizeNWSE] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR); 415 | bd->MouseCursors[ImGuiMouseCursor_NotAllowed] = glfwCreateStandardCursor(GLFW_ARROW_CURSOR); 416 | #endif 417 | glfwSetErrorCallback(prev_error_callback); 418 | 419 | // Chain GLFW callbacks: our callbacks will call the user's previously installed callbacks, if any. 420 | bd->PrevUserCallbackWindowFocus = NULL; 421 | bd->PrevUserCallbackCursorEnter = NULL; 422 | bd->PrevUserCallbackMousebutton = NULL; 423 | bd->PrevUserCallbackScroll = NULL; 424 | bd->PrevUserCallbackKey = NULL; 425 | bd->PrevUserCallbackChar = NULL; 426 | bd->PrevUserCallbackMonitor = NULL; 427 | if (install_callbacks) 428 | { 429 | bd->InstalledCallbacks = true; 430 | bd->PrevUserCallbackWindowFocus = glfwSetWindowFocusCallback(window, ImGui_ImplGlfw_WindowFocusCallback); 431 | bd->PrevUserCallbackCursorEnter = glfwSetCursorEnterCallback(window, ImGui_ImplGlfw_CursorEnterCallback); 432 | bd->PrevUserCallbackCursorPos = glfwSetCursorPosCallback(window, ImGui_ImplGlfw_CursorPosCallback); 433 | bd->PrevUserCallbackMousebutton = glfwSetMouseButtonCallback(window, ImGui_ImplGlfw_MouseButtonCallback); 434 | bd->PrevUserCallbackScroll = glfwSetScrollCallback(window, ImGui_ImplGlfw_ScrollCallback); 435 | bd->PrevUserCallbackKey = glfwSetKeyCallback(window, ImGui_ImplGlfw_KeyCallback); 436 | bd->PrevUserCallbackChar = glfwSetCharCallback(window, ImGui_ImplGlfw_CharCallback); 437 | bd->PrevUserCallbackMonitor = glfwSetMonitorCallback(ImGui_ImplGlfw_MonitorCallback); 438 | } 439 | 440 | bd->ClientApi = client_api; 441 | return true; 442 | } 443 | 444 | bool ImGui_ImplGlfw_InitForOpenGL(GLFWwindow* window, bool install_callbacks) 445 | { 446 | return ImGui_ImplGlfw_Init(window, install_callbacks, GlfwClientApi_OpenGL); 447 | } 448 | 449 | bool ImGui_ImplGlfw_InitForVulkan(GLFWwindow* window, bool install_callbacks) 450 | { 451 | return ImGui_ImplGlfw_Init(window, install_callbacks, GlfwClientApi_Vulkan); 452 | } 453 | 454 | bool ImGui_ImplGlfw_InitForOther(GLFWwindow* window, bool install_callbacks) 455 | { 456 | return ImGui_ImplGlfw_Init(window, install_callbacks, GlfwClientApi_Unknown); 457 | } 458 | 459 | void ImGui_ImplGlfw_Shutdown() 460 | { 461 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 462 | IM_ASSERT(bd != NULL && "No platform backend to shutdown, or already shutdown?"); 463 | ImGuiIO& io = ImGui::GetIO(); 464 | 465 | if (bd->InstalledCallbacks) 466 | { 467 | glfwSetWindowFocusCallback(bd->Window, bd->PrevUserCallbackWindowFocus); 468 | glfwSetCursorEnterCallback(bd->Window, bd->PrevUserCallbackCursorEnter); 469 | glfwSetCursorPosCallback(bd->Window, bd->PrevUserCallbackCursorPos); 470 | glfwSetMouseButtonCallback(bd->Window, bd->PrevUserCallbackMousebutton); 471 | glfwSetScrollCallback(bd->Window, bd->PrevUserCallbackScroll); 472 | glfwSetKeyCallback(bd->Window, bd->PrevUserCallbackKey); 473 | glfwSetCharCallback(bd->Window, bd->PrevUserCallbackChar); 474 | glfwSetMonitorCallback(bd->PrevUserCallbackMonitor); 475 | } 476 | 477 | for (ImGuiMouseCursor cursor_n = 0; cursor_n < ImGuiMouseCursor_COUNT; cursor_n++) 478 | glfwDestroyCursor(bd->MouseCursors[cursor_n]); 479 | 480 | io.BackendPlatformName = NULL; 481 | io.BackendPlatformUserData = NULL; 482 | IM_DELETE(bd); 483 | } 484 | 485 | static void ImGui_ImplGlfw_UpdateMouseData() 486 | { 487 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 488 | ImGuiIO& io = ImGui::GetIO(); 489 | 490 | #ifdef __EMSCRIPTEN__ 491 | const bool is_app_focused = true; 492 | #else 493 | const bool is_app_focused = glfwGetWindowAttrib(bd->Window, GLFW_FOCUSED) != 0; 494 | #endif 495 | if (is_app_focused) 496 | { 497 | // (Optional) Set OS mouse position from Dear ImGui if requested (rarely used, only when ImGuiConfigFlags_NavEnableSetMousePos is enabled by user) 498 | if (io.WantSetMousePos) 499 | glfwSetCursorPos(bd->Window, (double)io.MousePos.x, (double)io.MousePos.y); 500 | 501 | // (Optional) Fallback to provide mouse position when focused (ImGui_ImplGlfw_CursorPosCallback already provides this when hovered or captured) 502 | if (is_app_focused && bd->MouseWindow == NULL) 503 | { 504 | double mouse_x, mouse_y; 505 | glfwGetCursorPos(bd->Window, &mouse_x, &mouse_y); 506 | io.AddMousePosEvent((float)mouse_x, (float)mouse_y); 507 | } 508 | } 509 | } 510 | 511 | static void ImGui_ImplGlfw_UpdateMouseCursor() 512 | { 513 | ImGuiIO& io = ImGui::GetIO(); 514 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 515 | if ((io.ConfigFlags & ImGuiConfigFlags_NoMouseCursorChange) || glfwGetInputMode(bd->Window, GLFW_CURSOR) == GLFW_CURSOR_DISABLED) 516 | return; 517 | 518 | ImGuiMouseCursor imgui_cursor = ImGui::GetMouseCursor(); 519 | if (imgui_cursor == ImGuiMouseCursor_None || io.MouseDrawCursor) 520 | { 521 | // Hide OS mouse cursor if imgui is drawing it or if it wants no cursor 522 | glfwSetInputMode(bd->Window, GLFW_CURSOR, GLFW_CURSOR_HIDDEN); 523 | } 524 | else 525 | { 526 | // Show OS mouse cursor 527 | // FIXME-PLATFORM: Unfocused windows seems to fail changing the mouse cursor with GLFW 3.2, but 3.3 works here. 528 | glfwSetCursor(bd->Window, bd->MouseCursors[imgui_cursor] ? bd->MouseCursors[imgui_cursor] : bd->MouseCursors[ImGuiMouseCursor_Arrow]); 529 | glfwSetInputMode(bd->Window, GLFW_CURSOR, GLFW_CURSOR_NORMAL); 530 | } 531 | } 532 | 533 | // Update gamepad inputs 534 | static inline float Saturate(float v) { return v < 0.0f ? 0.0f : v > 1.0f ? 1.0f : v; } 535 | static void ImGui_ImplGlfw_UpdateGamepads() 536 | { 537 | ImGuiIO& io = ImGui::GetIO(); 538 | if ((io.ConfigFlags & ImGuiConfigFlags_NavEnableGamepad) == 0) 539 | return; 540 | 541 | io.BackendFlags &= ~ImGuiBackendFlags_HasGamepad; 542 | #if GLFW_HAS_GAMEPAD_API 543 | GLFWgamepadstate gamepad; 544 | if (!glfwGetGamepadState(GLFW_JOYSTICK_1, &gamepad)) 545 | return; 546 | #define MAP_BUTTON(KEY_NO, BUTTON_NO, _UNUSED) do { io.AddKeyEvent(KEY_NO, gamepad.buttons[BUTTON_NO] != 0); } while (0) 547 | #define MAP_ANALOG(KEY_NO, AXIS_NO, _UNUSED, V0, V1) do { float v = gamepad.axes[AXIS_NO]; v = (v - V0) / (V1 - V0); io.AddKeyAnalogEvent(KEY_NO, v > 0.10f, Saturate(v)); } while (0) 548 | #else 549 | int axes_count = 0, buttons_count = 0; 550 | const float* axes = glfwGetJoystickAxes(GLFW_JOYSTICK_1, &axes_count); 551 | const unsigned char* buttons = glfwGetJoystickButtons(GLFW_JOYSTICK_1, &buttons_count); 552 | if (axes_count == 0 || buttons_count == 0) 553 | return; 554 | #define MAP_BUTTON(KEY_NO, _UNUSED, BUTTON_NO) do { io.AddKeyEvent(KEY_NO, (buttons_count > BUTTON_NO && buttons[BUTTON_NO] == GLFW_PRESS)); } while (0) 555 | #define MAP_ANALOG(KEY_NO, _UNUSED, AXIS_NO, V0, V1) do { float v = (axes_count > AXIS_NO) ? axes[AXIS_NO] : V0; v = (v - V0) / (V1 - V0); io.AddKeyAnalogEvent(KEY_NO, v > 0.10f, Saturate(v)); } while (0) 556 | #endif 557 | io.BackendFlags |= ImGuiBackendFlags_HasGamepad; 558 | MAP_BUTTON(ImGuiKey_GamepadStart, GLFW_GAMEPAD_BUTTON_START, 7); 559 | MAP_BUTTON(ImGuiKey_GamepadBack, GLFW_GAMEPAD_BUTTON_BACK, 6); 560 | MAP_BUTTON(ImGuiKey_GamepadFaceDown, GLFW_GAMEPAD_BUTTON_A, 0); // Xbox A, PS Cross 561 | MAP_BUTTON(ImGuiKey_GamepadFaceRight, GLFW_GAMEPAD_BUTTON_B, 1); // Xbox B, PS Circle 562 | MAP_BUTTON(ImGuiKey_GamepadFaceLeft, GLFW_GAMEPAD_BUTTON_X, 2); // Xbox X, PS Square 563 | MAP_BUTTON(ImGuiKey_GamepadFaceUp, GLFW_GAMEPAD_BUTTON_Y, 3); // Xbox Y, PS Triangle 564 | MAP_BUTTON(ImGuiKey_GamepadDpadLeft, GLFW_GAMEPAD_BUTTON_DPAD_LEFT, 13); 565 | MAP_BUTTON(ImGuiKey_GamepadDpadRight, GLFW_GAMEPAD_BUTTON_DPAD_RIGHT, 11); 566 | MAP_BUTTON(ImGuiKey_GamepadDpadUp, GLFW_GAMEPAD_BUTTON_DPAD_UP, 10); 567 | MAP_BUTTON(ImGuiKey_GamepadDpadDown, GLFW_GAMEPAD_BUTTON_DPAD_DOWN, 12); 568 | MAP_BUTTON(ImGuiKey_GamepadL1, GLFW_GAMEPAD_BUTTON_LEFT_BUMPER, 4); 569 | MAP_BUTTON(ImGuiKey_GamepadR1, GLFW_GAMEPAD_BUTTON_RIGHT_BUMPER, 5); 570 | MAP_ANALOG(ImGuiKey_GamepadL2, GLFW_GAMEPAD_AXIS_LEFT_TRIGGER, 4, -0.75f, +1.0f); 571 | MAP_ANALOG(ImGuiKey_GamepadR2, GLFW_GAMEPAD_AXIS_RIGHT_TRIGGER, 5, -0.75f, +1.0f); 572 | MAP_BUTTON(ImGuiKey_GamepadL3, GLFW_GAMEPAD_BUTTON_LEFT_THUMB, 8); 573 | MAP_BUTTON(ImGuiKey_GamepadR3, GLFW_GAMEPAD_BUTTON_RIGHT_THUMB, 9); 574 | MAP_ANALOG(ImGuiKey_GamepadLStickLeft, GLFW_GAMEPAD_AXIS_LEFT_X, 0, -0.25f, -1.0f); 575 | MAP_ANALOG(ImGuiKey_GamepadLStickRight, GLFW_GAMEPAD_AXIS_LEFT_X, 0, +0.25f, +1.0f); 576 | MAP_ANALOG(ImGuiKey_GamepadLStickUp, GLFW_GAMEPAD_AXIS_LEFT_Y, 1, -0.25f, -1.0f); 577 | MAP_ANALOG(ImGuiKey_GamepadLStickDown, GLFW_GAMEPAD_AXIS_LEFT_Y, 1, +0.25f, +1.0f); 578 | MAP_ANALOG(ImGuiKey_GamepadRStickLeft, GLFW_GAMEPAD_AXIS_RIGHT_X, 2, -0.25f, -1.0f); 579 | MAP_ANALOG(ImGuiKey_GamepadRStickRight, GLFW_GAMEPAD_AXIS_RIGHT_X, 2, +0.25f, +1.0f); 580 | MAP_ANALOG(ImGuiKey_GamepadRStickUp, GLFW_GAMEPAD_AXIS_RIGHT_Y, 3, -0.25f, -1.0f); 581 | MAP_ANALOG(ImGuiKey_GamepadRStickDown, GLFW_GAMEPAD_AXIS_RIGHT_Y, 3, +0.25f, +1.0f); 582 | #undef MAP_BUTTON 583 | #undef MAP_ANALOG 584 | } 585 | 586 | void ImGui_ImplGlfw_NewFrame() 587 | { 588 | ImGuiIO& io = ImGui::GetIO(); 589 | ImGui_ImplGlfw_Data* bd = ImGui_ImplGlfw_GetBackendData(); 590 | IM_ASSERT(bd != NULL && "Did you call ImGui_ImplGlfw_InitForXXX()?"); 591 | 592 | // Setup display size (every frame to accommodate for window resizing) 593 | int w, h; 594 | int display_w, display_h; 595 | glfwGetWindowSize(bd->Window, &w, &h); 596 | glfwGetFramebufferSize(bd->Window, &display_w, &display_h); 597 | io.DisplaySize = ImVec2((float)w, (float)h); 598 | if (w > 0 && h > 0) 599 | io.DisplayFramebufferScale = ImVec2((float)display_w / (float)w, (float)display_h / (float)h); 600 | 601 | // Setup time step 602 | double current_time = glfwGetTime(); 603 | io.DeltaTime = bd->Time > 0.0 ? (float)(current_time - bd->Time) : (float)(1.0f / 60.0f); 604 | bd->Time = current_time; 605 | 606 | ImGui_ImplGlfw_UpdateMouseData(); 607 | ImGui_ImplGlfw_UpdateMouseCursor(); 608 | 609 | // Update game controllers (if enabled and available) 610 | ImGui_ImplGlfw_UpdateGamepads(); 611 | } 612 | 613 | #if defined(__clang__) 614 | #pragma clang diagnostic pop 615 | #endif 616 | -------------------------------------------------------------------------------- /scripts/build_cuda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2022 Codeplay Software Limited 4 | # This work is licensed under the terms of the MIT license. 5 | # For a copy, see https://opensource.org/licenses/MIT. 6 | 7 | BUILD_DIR="build_cuda" 8 | render=on 9 | 10 | if [ -n "$1" ]; then 11 | if [ "$1" = "no_render" ]; then 12 | render=off 13 | else 14 | echo "Unknown param $1" 15 | exit 16 | fi 17 | fi 18 | 19 | rm -rf $BUILD_DIR 20 | mkdir $BUILD_DIR 21 | cd $BUILD_DIR || exit 22 | 23 | cmake ../ \ 24 | -DRENDER=${render} \ 25 | -DGLEW_LIBRARY=/usr/lib/x86_64-linux-gnu/libGLEW.so \ 26 | -DCMAKE_EXPORT_COMPILE_COMMANDS=on || exit 27 | 28 | make release 29 | -------------------------------------------------------------------------------- /scripts/build_dpcpp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2022 Codeplay Software Limited 4 | # This work is licensed under the terms of the MIT license. 5 | # For a copy, see https://opensource.org/licenses/MIT. 6 | 7 | BUILD_DIR="build_dpcpp" 8 | render=on 9 | 10 | if [ -n "$1" ]; then 11 | if [ "$1" = "no_render" ]; then 12 | render=off 13 | else 14 | echo "Unknown param $1" 15 | exit 16 | fi 17 | fi 18 | 19 | rm -rf $BUILD_DIR 20 | mkdir $BUILD_DIR 21 | cd $BUILD_DIR || exit 22 | 23 | #CXX=clang++ \ 24 | #CC=clang \ 25 | CXX=icpx \ 26 | CC=icpx \ 27 | cmake ../ \ 28 | -DRENDER=${render} \ 29 | -DGLEW_LIBRARY=/usr/lib/x86_64-linux-gnu/libGLEW.so \ 30 | -DBACKEND=DPCPP -DDPCPP_CUDA_SUPPORT=on || exit 31 | 32 | make release 33 | -------------------------------------------------------------------------------- /scripts/docker_build_etc.sh: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Codeplay Software Limited 2 | # This work is licensed under the terms of the MIT license. 3 | # For a copy, see https://opensource.org/licenses/MIT. 4 | 5 | # non-functional code! This is a sketch of how to do the dpct conversion properly in a container 6 | # 7 | # Should be run with something like: 8 | # 9 | # docker run --rm \ 10 | # -v /opt/intel/oneapi/:/opt/intel/oneapi/ \ 11 | # -v $PWD:$PWD \ 12 | # -u $UID \ 13 | # -i joeatodd/onednn-cuda \ 14 | # bash < scripts/docker_build_etc.sh 15 | 16 | 17 | # Navigate to relevant directory 18 | 19 | cd $SRC_DIR 20 | 21 | # Call cmake on it 22 | bash scripts/build_cuda.sh 23 | 24 | # Call "intercept-build make" in build dir 25 | cd build 26 | make clean 27 | intercept-build make 28 | 29 | # Do conversion w/ -p 30 | -------------------------------------------------------------------------------- /scripts/perf_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2022 Codeplay Software Limited 4 | # This work is licensed under the terms of the MIT license. 5 | # For a copy, see https://opensource.org/licenses/MIT. 6 | 7 | # Get rid of any previous virtual frame buffer 8 | pkill -9 Xvfb 9 | rm /var/tmp/Xvfb_screen_0 10 | 11 | # Create a virtual screen :99.0 with given dimensions & color depth 12 | # mapping output to /var/tmp/Xvfb_screen_0 13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp & 14 | 15 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 & 16 | DISPLAY=:99.0 SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 & 17 | #DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0 & 18 | 19 | # To take a screenshot instead of a video (doesn't always work): 20 | # sleep 2 21 | # DISPLAY=:99 xwd -root -silent | convert xwd:- png:/tmp/screenshot.png 22 | 23 | # Use the x11grab device to write to video file 24 | ffmpeg -video_size 1920x1080 -framerate 25 -f x11grab -i :99.0+0,0 output.mp4 25 | -------------------------------------------------------------------------------- /scripts/perf_test_cuda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2022 Codeplay Software Limited 4 | # This work is licensed under the terms of the MIT license. 5 | # For a copy, see https://opensource.org/licenses/MIT. 6 | 7 | # Get rid of any previous virtual frame buffer 8 | pkill -9 Xvfb 9 | rm /var/tmp/Xvfb_screen_0 10 | 11 | # Create a virtual screen :99.0 with given dimensions & color depth 12 | # mapping output to /var/tmp/Xvfb_screen_0 13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp & 14 | 15 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 & 16 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 & 17 | DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0 18 | -------------------------------------------------------------------------------- /scripts/perf_test_dpcpp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2022 Codeplay Software Limited 4 | # This work is licensed under the terms of the MIT license. 5 | # For a copy, see https://opensource.org/licenses/MIT. 6 | 7 | # Get rid of any previous virtual frame buffer 8 | pkill -9 Xvfb 9 | rm /var/tmp/Xvfb_screen_0 10 | 11 | # Create a virtual screen :99.0 with given dimensions & color depth 12 | # mapping output to /var/tmp/Xvfb_screen_0 13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp & 14 | 15 | #DISPLAY=:99.0 SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 & 16 | DISPLAY=:99.0 SYCL_DEVICE_FILTER=cuda ./nbody_dpcpp 50 5 0.999 0.001 1.0e-3 2.0 17 | #DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0 & 18 | -------------------------------------------------------------------------------- /scripts/run_dpct.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2022 Codeplay Software Limited 4 | # This work is licensed under the terms of the MIT license. 5 | # For a copy, see https://opensource.org/licenses/MIT. 6 | 7 | # This script converts the project's CUDA code to SYCL code. The DPC++ compatibility tool offers options 8 | # for intercepting complex builds, but current dev environment restrictions require me to run dpct inside 9 | # a docker container. This complicates things, so for now I'm just doing single source conversion on the 10 | # simulator.cu file. 11 | # 12 | # The option --assume-nd-range-dim=1 prevents dpct from converting CUDA 1D ranges into SYCL 3D ranges. 13 | # It's not totally clear why the default behaviour isn't just to keep the CUDA dimensionality. 14 | # 15 | # The custom helper header files referred to by the --use-custom-helper flag are already part of this repo. 16 | # As such, we suppress generation of new helper 17 | # headers when calling dpct with `--use-custom-helper=none`. 18 | 19 | rm src_sycl/*.[ch]pp src_sycl/*.yaml 20 | cd src_sycl; ln -s ../src/*[ch]pp .; cd - 21 | 22 | docker run --rm \ 23 | -v /opt/intel/oneapi/dpcpp-ct/2023.1.0/:/dpcpp-ct \ 24 | -v $PWD:/nbody/ \ 25 | -u $UID \ 26 | -it joeatodd/onednn-cuda \ 27 | /dpcpp-ct/bin/dpct --out-root=/nbody/src_sycl \ 28 | --assume-nd-range-dim=1 \ 29 | --use-custom-helper=none \ 30 | --stop-on-parse-err \ 31 | --sycl-named-lambda \ 32 | /nbody/src/simulator.cu 33 | 34 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/renderer.hpp 35 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/nbody.cpp 36 | 37 | # -p=/nbody/build \ 38 | # --optimize-migration 39 | -------------------------------------------------------------------------------- /scripts/run_dpct_native.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2022 Codeplay Software Limited 4 | # This work is licensed under the terms of the MIT license. 5 | # For a copy, see https://opensource.org/licenses/MIT. 6 | 7 | # This script converts the project's CUDA code to SYCL code. The DPC++ compatibility tool offers options 8 | # for intercepting complex builds, but current dev environment restrictions require me to run dpct inside 9 | # a docker container. This complicates things, so for now I'm just doing single source conversion on the 10 | # simulator.cu file. 11 | # 12 | # The option --assume-nd-range-dim=1 prevents dpct from converting CUDA 1D ranges into SYCL 3D ranges. 13 | # It's not totally clear why the default behaviour isn't just to keep the CUDA dimensionality. 14 | # 15 | # The custom helper header files referred to by the --use-custom-helper flag are already part of this repo. 16 | # As such, we suppress generation of new helper 17 | # headers when calling dpct with `--use-custom-helper=none`. 18 | 19 | export NBODY_DIR=$PWD 20 | 21 | cd $NBODY_DIR 22 | 23 | rm src_sycl/*.[ch]pp src_sycl/*.yaml 24 | cd src_sycl; ln -s ../src/*[ch]pp .; cd - 25 | 26 | dpct --out-root=./src_sycl \ 27 | --assume-nd-range-dim=1 \ 28 | --use-custom-helper=none \ 29 | --stop-on-parse-err \ 30 | --sycl-named-lambda \ 31 | ./src/simulator.cu 32 | 33 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/renderer.hpp 34 | sed -i 's/simulator.cuh/simulator.dp.hpp/g' src_sycl/nbody.cpp 35 | 36 | # -p=/nbody/build \ 37 | # --optimize-migration 38 | -------------------------------------------------------------------------------- /scripts/run_nbody.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2022 Codeplay Software Limited 4 | # This work is licensed under the terms of the MIT license. 5 | # For a copy, see https://opensource.org/licenses/MIT. 6 | 7 | # This script runs a particular version of the nbody simulation 8 | # depending on the -b flag. All subsequent positional args are 9 | # passed on to nbody. See ../README.md for a description of these 10 | # positional args. 11 | # 12 | # ./scripts/run_nbody.sh -b dpcpp 50 5 0.999 0.001 1.0e-3 2.0 13 | 14 | while getopts b: flag 15 | do 16 | case "${flag}" in 17 | b) backend=${OPTARG};; 18 | esac 19 | done 20 | 21 | shift 2; 22 | 23 | case "$backend" in 24 | cuda) ./nbody_cuda "$@";; 25 | dpcpp) SYCL_DEVICE_FILTER=opencl:cpu ./nbody_dpcpp "$@";; 26 | *) echo "Bad backend"; exit 1;; 27 | esac 28 | -------------------------------------------------------------------------------- /scripts/xvfb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2022 Codeplay Software Limited 4 | # This work is licensed under the terms of the MIT license. 5 | # For a copy, see https://opensource.org/licenses/MIT. 6 | 7 | # Get rid of any previous virtual frame buffer 8 | pkill -9 Xvfb 9 | rm /var/tmp/Xvfb_screen_0 10 | 11 | # Create a virtual screen :99.0 with given dimensions & color depth 12 | # mapping output to /var/tmp/Xvfb_screen_0 13 | Xvfb :99 -screen 0 1920x1080x16 -fbdir /var/tmp & 14 | 15 | # Run the nbody simulation on this screen 16 | DISPLAY=:99.0 ./nbody_cuda 50 5 0.999 0.001 1.0e-3 2.0 & 17 | #DISPLAY=:99.0 ./nbody_cuda 250 5 0.999 0.001 1.0e-3 2.0 & 18 | 19 | # To take a screenshot instead of a video (doesn't always work): 20 | # sleep 2 21 | # DISPLAY=:99 xwd -root -silent | convert xwd:- png:/tmp/screenshot.png 22 | 23 | # Use the x11grab device to write to video file 24 | ffmpeg -video_size 1920x1080 -framerate 25 -f x11grab -i :99.0+0,0 output.mp4 25 | -------------------------------------------------------------------------------- /shaders/gl/blur.frag: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | #version 450 core 3 | 4 | layout (binding = 0) uniform sampler2D tex; 5 | 6 | layout (location = 0) uniform vec2 size; 7 | layout (location = 1) uniform vec2 mult; 8 | layout (location = 2) uniform int kHalfWidth; 9 | // Maximum length of gauss kernel sample = 100 10 | layout (location = 3) uniform float[100] offset; 11 | layout (location = 103) uniform float[100] weight; 12 | 13 | in vec2 pass_tc; 14 | 15 | out vec4 out_color; 16 | 17 | vec4 contribute(float offset, float weight) 18 | { 19 | return (texture(tex, pass_tc+offset*mult*size)+ 20 | texture(tex, pass_tc-offset*mult*size)) 21 | *weight; 22 | } 23 | 24 | void main() 25 | { 26 | out_color = texture(tex, pass_tc) * weight[0]; 27 | for(int i = 1; i < kHalfWidth; i++){ 28 | out_color += contribute(offset[i], weight[i]); 29 | } 30 | } -------------------------------------------------------------------------------- /shaders/gl/deferred.vert: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | #version 450 core 3 | 4 | layout (location = 0) in vec2 in_pos; 5 | 6 | out vec2 pass_tc; 7 | 8 | void main() 9 | { 10 | gl_Position = vec4(in_pos,0.0,1.0); 11 | pass_tc = in_pos*0.5+vec2(0.5); 12 | } -------------------------------------------------------------------------------- /shaders/gl/integration.comp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | #version 450 core 3 | 4 | layout (location = 0) uniform float dt; 5 | 6 | layout (local_size_x = 256) in; 7 | 8 | layout (binding = 0, std430) buffer particles_in 9 | { 10 | vec4 part_in[]; 11 | }; 12 | 13 | layout (binding = 1, std430) buffer particles_vel 14 | { 15 | vec4 part_vel[]; 16 | }; 17 | 18 | void main() 19 | { 20 | uint id = gl_GlobalInvocationID.x; 21 | vec4 pos = part_in[id]; 22 | part_in[id] = vec4(pos.xyz+dt*part_vel[id].xyz,pos.w); 23 | } -------------------------------------------------------------------------------- /shaders/gl/interaction.comp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | #version 450 core 3 | 4 | #define EPS2 0.2 5 | 6 | layout (location = 0) uniform float dt; 7 | layout (location = 1) uniform float G; 8 | layout (location = 2) uniform float damping; 9 | 10 | layout (local_size_x = 256) in; 11 | 12 | layout (binding = 0, std430) buffer particles_in 13 | { 14 | vec4 part_in[]; 15 | }; 16 | 17 | layout (binding = 1, std430) buffer particles_vel 18 | { 19 | vec4 part_vel[]; 20 | }; 21 | 22 | shared vec4 cache[gl_WorkGroupSize.x]; 23 | 24 | vec3 interaction(in vec4 p1,in vec4 p2) 25 | { 26 | vec3 r = p2.xyz - p1.xyz; 27 | float dist_sqr = dot(r,r) + EPS2; 28 | float dist_sixth = dist_sqr*dist_sqr*dist_sqr; 29 | float inv_dist_cube = inversesqrt(dist_sixth); 30 | return r*inv_dist_cube; 31 | } 32 | 33 | void main() 34 | { 35 | uint id = gl_GlobalInvocationID.x; 36 | vec4 p1 = part_in[id]; 37 | vec3 pos = p1.xyz; 38 | vec3 vel = vec3(0.0); 39 | for (uint i=0;iblue, fast->purple 18 | vec3 color = mix(vec3(0,0.4,1),vec3(1,0.2,1),clamp(dot(vel,vel)*0.0006,0,1)); 19 | 20 | pass_col = vec4(color,1.0); 21 | } -------------------------------------------------------------------------------- /shaders/gl/tonemap.frag: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | #version 450 core 3 | 4 | const int FBO_MARGIN = 50; 5 | 6 | layout (binding = 0) uniform sampler2D hdr; 7 | layout (binding = 1) uniform sampler2D bloom; 8 | layout (binding = 2) uniform sampler2D lum; 9 | 10 | layout (location = 0) uniform int lum_lod; 11 | 12 | in vec2 pass_tc; 13 | 14 | out vec4 out_color; 15 | 16 | void main() 17 | { 18 | ivec2 coord = ivec2(gl_FragCoord.xy)+ivec2(FBO_MARGIN); 19 | 20 | vec3 color = texelFetch(hdr,coord,0).rgb; 21 | 22 | float luminance = textureLod(lum, vec2(0.5), lum_lod).r; 23 | float exposure = 1.0/clamp(luminance*10, 0.2,1000.0); 24 | 25 | color += texture(bloom, vec2(coord)/textureSize(hdr, 0)).rgb; 26 | vec3 tonemap = vec3(1.0)- exp(-color*exposure); 27 | 28 | vec3 gamma = pow(tonemap, vec3(1.0/2.2)); 29 | out_color = vec4(gamma, 1.0); 30 | } -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 - 2018 Sarah Le Luron 2 | # Copyright (C) 2022 Codeplay Software Limited 3 | 4 | find_package(PkgConfig REQUIRED) 5 | 6 | if (RENDER) 7 | pkg_check_modules(Glew REQUIRED IMPORTED_TARGET glew) 8 | 9 | find_package(glm REQUIRED) 10 | find_package(glfw3 REQUIRED) 11 | find_package(OpenGL REQUIRED) 12 | endif() 13 | 14 | find_package(CUDA REQUIRED) 15 | 16 | set(COMMON_SOURCE 17 | nbody.cpp 18 | sim_param.cpp 19 | simulator.cu) 20 | set(OPENGL_SOURCE 21 | camera.cpp 22 | gen.cpp 23 | renderer_gl.cpp 24 | shader.cpp) 25 | 26 | if(NOT TARGET glm::glm) 27 | add_library(glm::glm IMPORTED INTERFACE) 28 | target_include_directories(glm::glm INTERFACE ${GLM_INCLUDE_DIR}) 29 | endif() 30 | 31 | set(DEBUG_FLAGS -g -O0) 32 | 33 | 34 | if (RENDER) 35 | set(RENDER_LIB glm::glm glfw PkgConfig::Glew OpenGL::OpenGL cuda) 36 | set(RENDER_FLAG -DUSE_OPENGL) 37 | set(SOURCE_FILES ${COMMON_SOURCE} ${OPENGL_SOURCE}) 38 | else() 39 | set(RENDER_LIB cuda) 40 | set(RENDER_FLAG DISABLE_GL) 41 | set(SOURCE_FILES ${COMMON_SOURCE}) 42 | endif() 43 | 44 | add_custom_target(release DEPENDS ${BINARY_NAME}) 45 | add_executable(${BINARY_NAME} ${SOURCE_FILES}) 46 | # COMPILER_NAME here is only used to print text overlay on simulation 47 | target_compile_definitions(${BINARY_NAME} PRIVATE ${RENDER_FLAG} COMPILER_NAME="CUDA") 48 | target_link_libraries(${BINARY_NAME} PRIVATE ${RENDER_LIB}) 49 | target_compile_features(${BINARY_NAME} PRIVATE cxx_auto_type cxx_nullptr cxx_range_for) 50 | target_include_directories(${BINARY_NAME} PRIVATE ${CUDA_INCLUDE_DIRS}) 51 | target_compile_options(${BINARY_NAME} PRIVATE -use_fast_math) 52 | 53 | add_custom_target(debug DEPENDS ${BINARY_NAME}_d) 54 | add_executable(${BINARY_NAME}_d ${SOURCE_FILES}) 55 | # COMPILER_NAME here is only used to print text overlay on simulation 56 | target_compile_definitions(${BINARY_NAME}_d PRIVATE ${RENDER_FLAG} COMPILER_NAME="CUDA") 57 | target_link_libraries(${BINARY_NAME}_d PRIVATE ${RENDER_LIB}) 58 | target_compile_features(${BINARY_NAME}_d PRIVATE cxx_auto_type cxx_nullptr cxx_range_for) 59 | target_include_directories(${BINARY_NAME}_d PRIVATE ${CUDA_INCLUDE_DIRS}) 60 | target_compile_options(${BINARY_NAME}_d PRIVATE ${DEBUG_FLAGS}) 61 | -------------------------------------------------------------------------------- /src/camera.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | 3 | #include "camera.hpp" 4 | 5 | #include 6 | #include 7 | 8 | const float PI = 3.14159265358979323846; 9 | 10 | using namespace std; 11 | 12 | Camera::Camera() { 13 | position.x = 0; 14 | position.y = PI / 4; 15 | position.z = 50.0; 16 | 17 | velocity = {0.0, 0.0, 0.0}; 18 | look_at = {0.0, 0.0, 0.0}; 19 | look_at_vel = {0.0, 0.0, 0.0}; 20 | } 21 | 22 | void Camera::step() { 23 | position.x -= velocity.x; 24 | position.y -= velocity.y; 25 | position.z *= (1.0 - velocity.z); 26 | look_at += look_at_vel; 27 | 28 | velocity *= 0.72; // damping 29 | look_at_vel *= 0.90; 30 | 31 | // limits 32 | if (position.x < 0) position.x += 2 * PI; 33 | if (position.x >= 2 * PI) position.x -= 2 * PI; 34 | position.y = 35 | max(-(float)PI / 2 + 0.001f, min(position.y, (float)PI / 2 - 0.001f)); 36 | } 37 | 38 | glm::mat4 Camera::getProj(int width, int height) { 39 | return glm::infinitePerspective(glm::radians(30.0f), width / (float)height, 40 | 1.f); 41 | } 42 | 43 | glm::vec3 getCartesianCoordinates(glm::vec3 v) { 44 | return glm::vec3(cos(v.x) * cos(v.y), sin(v.x) * cos(v.y), sin(v.y)) * v.z; 45 | } 46 | 47 | glm::mat4 Camera::getView() { 48 | // polar to cartesian coordinates 49 | glm::vec3 view_pos = getCartesianCoordinates(position); 50 | 51 | return glm::lookAt(view_pos + look_at, look_at, glm::vec3(0, 0, 1)); 52 | } 53 | 54 | glm::vec3 Camera::getForward() { 55 | return glm::normalize(-getCartesianCoordinates(position)); 56 | } 57 | 58 | glm::vec3 Camera::getRight() { 59 | return glm::normalize( 60 | glm::cross(getCartesianCoordinates(position), glm::vec3(0, 0, 1))); 61 | } 62 | 63 | glm::vec3 Camera::getUp() { 64 | return glm::normalize( 65 | glm::cross(getCartesianCoordinates(position), getRight())); 66 | } 67 | 68 | void Camera::addVelocity(glm::vec3 vel) { velocity += vel; } 69 | 70 | void Camera::addLookAtVelocity(glm::vec3 vel) { look_at_vel += vel; } 71 | 72 | glm::vec3 Camera::getPosition() { return position; } 73 | -------------------------------------------------------------------------------- /src/camera.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | class Camera { 8 | public: 9 | Camera(); 10 | 11 | /** 12 | * Computes next step of camera parameters 13 | * @param c camera at step n 14 | * @return camera at step n+1 15 | */ 16 | void step(); 17 | 18 | /** 19 | * Computes projection matrix from camera parameters 20 | * @param c camera parameters 21 | * @param width viewport width 22 | * @param height viewport height 23 | * @return projection matrix 24 | */ 25 | glm::mat4 getProj(int width, int height); 26 | 27 | /** 28 | * Computes view matrix from camera parameters 29 | * @param c camera parameters 30 | * @param view matrix 31 | */ 32 | glm::mat4 getView(); 33 | 34 | glm::vec3 getForward(); 35 | glm::vec3 getRight(); 36 | glm::vec3 getUp(); 37 | 38 | glm::vec3 getPosition(); 39 | 40 | void addVelocity(glm::vec3 vel); 41 | void addLookAtVelocity(glm::vec3 vel); 42 | 43 | private: 44 | glm::vec3 position; ///< Polar coordinates in radians 45 | glm::vec3 velocity; ///< dp/dt of polar coordinates 46 | glm::vec3 look_at; ///< Where is the camera looking at 47 | glm::vec3 look_at_vel; ///< dp/dt of lookat position 48 | }; 49 | -------------------------------------------------------------------------------- /src/gen.cpp: -------------------------------------------------------------------------------- 1 | #include "gen.hpp" 2 | 3 | #include 4 | 5 | const float PI = 3.14159265358979323846; 6 | 7 | // Copyright (C) 2016 - 2018 Sarah Le Luron 8 | // Copyright (C) 2022 Codeplay Software Limited 9 | 10 | using namespace std; 11 | 12 | mt19937 rng; 13 | uniform_real_distribution<> dis(0, 1); 14 | 15 | glm::vec4 randomParticlePos() { 16 | // Random position on a 'thick disk' 17 | glm::vec4 particle; 18 | float t = dis(rng) * 2 * PI; 19 | float s = dis(rng) * 100; 20 | particle.x = cos(t) * s; 21 | particle.y = sin(t) * s; 22 | particle.z = dis(rng) * 4; 23 | 24 | particle.w = 1.f; 25 | return particle; 26 | } 27 | 28 | glm::vec4 randomParticleVel(glm::vec4 pos) { 29 | // Initial velocity is 'orbital' velocity from position 30 | glm::vec3 vel = glm::cross(glm::vec3(pos), glm::vec3(0, 0, 1)); 31 | float orbital_vel = sqrt(2.0 * glm::length(vel)); 32 | vel = glm::normalize(vel) * orbital_vel; 33 | return glm::vec4(vel, 0.0); 34 | } 35 | 36 | std::vector genFlareTex(int tex_size) { 37 | std::vector pixels(tex_size * tex_size); 38 | float sigma2 = tex_size / 2.0; 39 | float A = 1.0; 40 | for (int i = 0; i < tex_size; ++i) { 41 | float i1 = i - tex_size / 2; 42 | for (int j = 0; j < tex_size; ++j) { 43 | float j1 = j - tex_size / 2; 44 | // gamma corrected gauss 45 | pixels[i * tex_size + j] = pow( 46 | A * exp(-((i1 * i1) / (2 * sigma2) + (j1 * j1) / (2 * sigma2))), 47 | 2.2); 48 | } 49 | } 50 | return pixels; 51 | } 52 | -------------------------------------------------------------------------------- /src/gen.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | // Copyright (C) 2022 Codeplay Software Limited 3 | 4 | #pragma once 5 | #include 6 | #include 7 | 8 | /** 9 | * Generates a random particle position 10 | * @return 3D position + w component at 1.f 11 | */ 12 | glm::vec4 randomParticlePos(); 13 | 14 | /** 15 | * Generates a random particle velocity 16 | * @param pos the same particle's position 17 | * @return 3D velocity + w component at 0.f 18 | */ 19 | glm::vec4 randomParticleVel(glm::vec4 pos); 20 | 21 | std::vector genFlareTex(int size); 22 | -------------------------------------------------------------------------------- /src/nbody.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | // Copyright (C) 2022 Codeplay Software Limited 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #ifndef DISABLE_GL 9 | #include 10 | 11 | #include "renderer_gl.hpp" 12 | #include 13 | #include 14 | #include "camera.hpp" 15 | #include "gen.hpp" 16 | #else 17 | #include 18 | #endif 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "sim_param.hpp" 26 | #include "simulator.cuh" 27 | 28 | using namespace std; 29 | using namespace simulation; 30 | 31 | int main(int argc, char **argv) { 32 | 33 | SimParam params; 34 | params.parseArgs(argc, argv); 35 | 36 | DiskGalaxySimulator nbodySim(params); 37 | 38 | #ifndef DISABLE_GL 39 | // Window initialization 40 | GLFWwindow *window; 41 | 42 | glfwSetErrorCallback([](const int error, const char *msg) { 43 | cout << "Error id : " << error << ", " << msg << endl; 44 | exit(-1); 45 | }); 46 | 47 | if (!glfwInit()) { 48 | cout << "GLFW can't initialize" << endl; 49 | return -1; 50 | } 51 | 52 | GLFWmonitor *monitor = glfwGetPrimaryMonitor(); 53 | 54 | const GLFWvidmode *mode = glfwGetVideoMode(monitor); 55 | 56 | glfwWindowHint(GLFW_RED_BITS, mode->redBits); 57 | glfwWindowHint(GLFW_GREEN_BITS, mode->greenBits); 58 | glfwWindowHint(GLFW_BLUE_BITS, mode->blueBits); 59 | glfwWindowHint(GLFW_REFRESH_RATE, mode->refreshRate); 60 | glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE); 61 | RendererGL renderer; 62 | 63 | renderer.initWindow(); 64 | 65 | int width = mode->width; 66 | int height = mode->height - 30; 67 | window = glfwCreateWindow(width, height, "N-Body Simulation", NULL, NULL); 68 | 69 | 70 | glfwMakeContextCurrent(window); 71 | 72 | renderer.init(window, width, height, nbodySim); 73 | renderer.initImgui(window); 74 | 75 | // Get initial postitions generated in simulator ctor 76 | renderer.updateParticles(); 77 | 78 | Camera camera; 79 | 80 | float last_fps{0}; 81 | #endif 82 | 83 | std::vector stepTimes; 84 | int step{0}; 85 | 86 | // Main loop 87 | float stepTime = 0.0; 88 | 89 | #ifndef DISABLE_GL 90 | while (!glfwWindowShouldClose(window) && 91 | glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_RELEASE && 92 | step < params.numFrames) { 93 | double frame_start = glfwGetTime(); 94 | #else 95 | while ( step < params.numFrames) { 96 | #endif 97 | nbodySim.stepSim(); 98 | #ifndef DISABLE_GL 99 | renderer.updateParticles(); 100 | renderer.render(camera.getProj(width, height), camera.getView()); 101 | #endif 102 | if(!(step % 20)) stepTime = nbodySim.getLastStepTime(); 103 | #ifndef DISABLE_GL 104 | renderer.printKernelTime(stepTime); 105 | #endif 106 | 107 | step++; 108 | int warmSteps{2}; 109 | if (step > warmSteps) { 110 | stepTimes.push_back(nbodySim.getLastStepTime()); 111 | float cumStepTime = 112 | std::accumulate(stepTimes.begin(), stepTimes.end(), 0.0); 113 | float meanTime = cumStepTime / stepTimes.size(); 114 | float accum{0.0}; 115 | std::for_each(stepTimes.begin(), stepTimes.end(), 116 | [&](const float time) { 117 | accum += std::pow((time - meanTime), 2); 118 | }); 119 | float stdDev = std::pow(accum / stepTimes.size(), 0.5); 120 | std::cout << "At step " << step << " kernel time is " 121 | << stepTimes.back() << " and mean is " << meanTime 122 | << " and stddev is: " << stdDev << "\n"; 123 | } 124 | #ifndef DISABLE_GL 125 | // Window refresh 126 | glfwSwapBuffers(window); 127 | glfwPollEvents(); 128 | 129 | // Thread sleep to match min frame time 130 | double frame_end = glfwGetTime(); 131 | double elapsed = frame_end - frame_start; 132 | last_fps = 1.0 / elapsed; 133 | #endif 134 | } 135 | #ifndef DISABLE_GL 136 | renderer.destroy(); 137 | glfwDestroyWindow(window); 138 | glfwTerminate(); 139 | #endif 140 | return 0; 141 | } 142 | -------------------------------------------------------------------------------- /src/renderer.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | // Copyright (C) 2022 Codeplay Software Limited 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include "simulator.cuh" 13 | 14 | class Renderer { 15 | public: 16 | virtual void initWindow() = 0; 17 | 18 | /** 19 | * Initializes the gl state 20 | * @param width viewport width 21 | * @param height viewport height 22 | * @param params simulation parameters 23 | */ 24 | virtual void init(GLFWwindow *window, int width, int height, 25 | simulation::Simulator &sim) = 0; 26 | 27 | virtual void destroy() = 0; 28 | 29 | /** 30 | * Supplies the gl state with updated particle position and velocity 31 | * @param pos particle positions 32 | * @param vel particle velocities 33 | */ 34 | virtual void updateParticles() = 0; 35 | 36 | /** 37 | * Renders the particles at the current step 38 | * @param proj_mat projection matrix @see camera_get_proj 39 | * @param view_mat view matrix @see camera_get_view 40 | */ 41 | virtual void render(glm::mat4 projMat, glm::mat4 viewMat) = 0; 42 | }; 43 | -------------------------------------------------------------------------------- /src/renderer_gl.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | // Copyright (C) 2022 Codeplay Software Limited 3 | 4 | #include "renderer_gl.hpp" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "imgui.h" 14 | #include "imgui_impl_glfw.h" 15 | #include "imgui_impl_opengl3.h" 16 | #include "gen.hpp" 17 | 18 | const int FBO_MARGIN = 50; 19 | 20 | #define PRINT_PSEUDO_FPS 0 21 | 22 | using namespace std; 23 | 24 | void RendererGL::initWindow() { 25 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4); 26 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 5); 27 | glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); 28 | } 29 | 30 | void RendererGL::init(GLFWwindow *window, int width, int height, 31 | simulation::Simulator &sim_) { 32 | // OpenGL initialization 33 | GLenum error = glewInit(); 34 | if (error != GLEW_OK) { 35 | throw std::runtime_error("Can't load GL"); 36 | } 37 | 38 | sim = &sim_; 39 | numParticles = sim->getNumParticles(); 40 | setWindowDimensions(width, height); 41 | createFlareTexture(); 42 | createVaosVbos(); 43 | initShaders(); 44 | initFbos(); 45 | setUniforms(); 46 | } 47 | 48 | void RendererGL::setWindowDimensions(int width, int height) { 49 | width_ = width; 50 | height_ = height; 51 | } 52 | 53 | void RendererGL::createFlareTexture() { 54 | texSize = 16; 55 | glCreateTextures(GL_TEXTURE_2D, 1, &flareTex); 56 | glTextureStorage2D(flareTex, 1, GL_R32F, texSize, texSize); 57 | glTextureParameteri(flareTex, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 58 | { 59 | std::vector pixels = genFlareTex(texSize); 60 | glTextureSubImage2D(flareTex, 0, 0, 0, texSize, texSize, GL_RED, GL_FLOAT, 61 | pixels.data()); 62 | } 63 | } 64 | 65 | void RendererGL::createVaosVbos() { 66 | // Particle VAO 67 | glCreateVertexArrays(1, &vaoParticles); 68 | glCreateBuffers(1, &vboParticlesPos); 69 | glCreateBuffers(1, &ssboVelocities); 70 | glVertexArrayVertexBuffer(vaoParticles, 0, vboParticlesPos, 0, 71 | sizeof(glm::vec4)); 72 | glVertexArrayVertexBuffer(vaoParticles, 1, ssboVelocities, 0, 73 | sizeof(glm::vec4)); 74 | 75 | // Position 76 | glEnableVertexArrayAttrib(vaoParticles, 0); 77 | glVertexArrayAttribFormat(vaoParticles, 0, 4, GL_FLOAT, GL_FALSE, 0); 78 | glVertexArrayAttribBinding(vaoParticles, 0, 0); 79 | 80 | // Velocity 81 | glEnableVertexArrayAttrib(vaoParticles, 1); 82 | glVertexArrayAttribFormat(vaoParticles, 1, 4, GL_FLOAT, GL_FALSE, 0); 83 | glVertexArrayAttribBinding(vaoParticles, 1, 1); 84 | 85 | // Deferred VAO 86 | glCreateVertexArrays(1, &vaoDeferred); 87 | glCreateBuffers(1, &vboDeferred); 88 | glVertexArrayVertexBuffer(vaoDeferred, 0, vboDeferred, 0, sizeof(glm::vec2)); 89 | // Position 90 | glEnableVertexArrayAttrib(vaoDeferred, 0); 91 | glVertexArrayAttribFormat(vaoDeferred, 0, 2, GL_FLOAT, GL_FALSE, 0); 92 | glVertexArrayAttribBinding(vaoDeferred, 0, 0); 93 | 94 | // Deferred tri 95 | glm::vec2 tri[3] = {glm::vec2(-2, -1), glm::vec2(+2, -1), glm::vec2(0, 4)}; 96 | glNamedBufferStorage(vboDeferred, 3 * sizeof(glm::vec2), tri, 0); 97 | 98 | // SSBO allocation (particle position & velocities) 99 | glNamedBufferStorage(vboParticlesPos, numParticles * sizeof(glm::vec4), 100 | nullptr, GL_MAP_WRITE_BIT); 101 | glNamedBufferStorage(ssboVelocities, numParticles * sizeof(glm::vec4), 102 | nullptr, GL_MAP_WRITE_BIT); 103 | 104 | // SSBO binding 105 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, vboParticlesPos, 0, 106 | numParticles * sizeof(glm::vec4)); 107 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, ssboVelocities, 0, 108 | numParticles * sizeof(glm::vec4)); 109 | } 110 | 111 | void RendererGL::updateParticles() { 112 | setParticleData(vboParticlesPos, sim->getParticlePos()); 113 | setParticleData(ssboVelocities, sim->getParticleVel()); 114 | } 115 | 116 | void RendererGL::initImgui(GLFWwindow *window) { 117 | // Setup ImGui context 118 | IMGUI_CHECKVERSION(); 119 | ImGui::CreateContext(); 120 | ImGuiIO &io = ImGui::GetIO(); 121 | (void)io; 122 | ImGui::StyleColorsDark(); 123 | 124 | // Setup Platform/Renderer bindings 125 | ImGui_ImplGlfw_InitForOpenGL(window, true); 126 | ImGui_ImplOpenGL3_Init("#version 450"); 127 | } 128 | 129 | void RendererGL::printKernelTime(float kernelTime) { 130 | // Start the Dear ImGui frame 131 | ImGui_ImplOpenGL3_NewFrame(); 132 | ImGui_ImplGlfw_NewFrame(); 133 | ImGui::NewFrame(); 134 | 135 | // Generate a minimal window 136 | bool isOpen; 137 | ImGui::Begin("N/A", &isOpen, 138 | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoMove | 139 | ImGuiWindowFlags_NoScrollbar | 140 | ImGuiWindowFlags_NoSavedSettings | 141 | ImGuiWindowFlags_NoInputs); 142 | ImGui::SetWindowFontScale(2.5); 143 | ImGui::Text("%s", (std::string("N-body demo running with " COMPILER_NAME 144 | " on device: ") + *sim->getDeviceName()).c_str()); 145 | if (PRINT_PSEUDO_FPS) { 146 | ImGui::Text("FPS: %2.0f", 1000.0/kernelTime); 147 | } else { 148 | ImGui::Text("Kernel time: %4.2f ms", kernelTime); 149 | } 150 | ImGui::End(); 151 | 152 | ImGui::Render(); 153 | ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); 154 | } 155 | 156 | void RendererGL::setParticleData(const GLuint buffer, 157 | const ParticleData &data) { 158 | void *particle_ptr = glMapNamedBufferRange( 159 | buffer, 0, numParticles * sizeof(glm::vec4), GL_MAP_WRITE_BIT); 160 | 161 | assert(!glGetError()); 162 | assert(particle_ptr); 163 | 164 | const ParticleData &particles = sim->getParticlePos(); 165 | 166 | // Fill using placement new 167 | for (size_t i = 0; i < numParticles; i++) { 168 | glm::vec4 *my4 = new ((glm::vec4 *)particle_ptr + i) 169 | glm::vec4(data.x[i], data.y[i], data.z[i], 1.0f); 170 | } 171 | glUnmapNamedBuffer(buffer); 172 | } 173 | 174 | void RendererGL::initShaders() { 175 | // Need to cut these two shaders out 176 | // programInteraction.source(GL_COMPUTE_SHADER, 177 | // "shaders/gl/interaction.comp"); programInteraction.link(); 178 | 179 | // programIntegration.source(GL_COMPUTE_SHADER, 180 | // "shaders/gl/integration.comp"); programIntegration.link(); 181 | 182 | programHdr.source(GL_VERTEX_SHADER, "shaders/gl/main.vert"); 183 | programHdr.source(GL_FRAGMENT_SHADER, "shaders/gl/main.frag"); 184 | programHdr.source(GL_GEOMETRY_SHADER, "shaders/gl/main.geom"); 185 | programHdr.link(); 186 | 187 | programTonemap.source(GL_VERTEX_SHADER, "shaders/gl/deferred.vert"); 188 | programTonemap.source(GL_FRAGMENT_SHADER, "shaders/gl/tonemap.frag"); 189 | programTonemap.link(); 190 | 191 | programBlur.source(GL_VERTEX_SHADER, "shaders/gl/deferred.vert"); 192 | programBlur.source(GL_FRAGMENT_SHADER, "shaders/gl/blur.frag"); 193 | programBlur.link(); 194 | 195 | programLum.source(GL_VERTEX_SHADER, "shaders/gl/deferred.vert"); 196 | programLum.source(GL_FRAGMENT_SHADER, "shaders/gl/luminance.frag"); 197 | programLum.link(); 198 | } 199 | 200 | void RendererGL::initFbos() { 201 | int blur_dsc = 2; 202 | blurDownscale = blur_dsc; 203 | 204 | glCreateFramebuffers(4, fbos); 205 | glCreateTextures(GL_TEXTURE_2D, 4, attachs); 206 | 207 | int base_width = width_ + 2 * FBO_MARGIN; 208 | int base_height = height_ + 2 * FBO_MARGIN; 209 | 210 | int widths[] = {base_width, base_width / blur_dsc, base_width / blur_dsc, 211 | base_width / 2}; 212 | 213 | int heights[] = {base_height, base_height / blur_dsc, base_height / blur_dsc, 214 | base_height / 2}; 215 | 216 | lumLod = (int)floor(log2(max(base_width, base_height) / 2)); 217 | int mipmaps[] = {1, 1, 1, lumLod + 1}; 218 | GLenum types[] = {GL_RGBA16F, GL_RGBA16F, GL_RGBA16F, GL_R16F}; 219 | GLenum min_filters[] = {GL_LINEAR, GL_LINEAR, GL_LINEAR, 220 | GL_LINEAR_MIPMAP_LINEAR}; 221 | 222 | for (int i = 0; i < 4; ++i) { 223 | glTextureStorage2D(attachs[i], mipmaps[i], types[i], widths[i], 224 | heights[i]); 225 | glTextureParameteri(attachs[i], GL_TEXTURE_MIN_FILTER, min_filters[i]); 226 | glTextureParameteri(attachs[i], GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 227 | glTextureParameteri(attachs[i], GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 228 | glNamedFramebufferTexture(fbos[i], GL_COLOR_ATTACHMENT0, attachs[i], 0); 229 | } 230 | } 231 | 232 | void RendererGL::setUniforms() { 233 | // // NDC sprite size 234 | glProgramUniform2f(programHdr.getId(), 8, texSize / float(2 * width_), 235 | texSize / float(2 * height_)); 236 | // Blur sample offset length 237 | glProgramUniform2f(programBlur.getId(), 0, (float)blurDownscale / width_, 238 | (float)blurDownscale / height_); 239 | 240 | // Compute optimized 1D gaussian kernel & send to device 241 | auto optimGauss = optimGaussKernel(gaussKernel(10.0, 25)); 242 | auto offsets = optimGauss.first; 243 | auto weights = optimGauss.second; 244 | 245 | assert(offsets.size() < 100 && "Maximum Gaussian kernel size exceeded!"); 246 | glProgramUniform1i(programBlur.getId(), 2, offsets.size()); 247 | glProgramUniform1fv(programBlur.getId(), 3, offsets.size(), offsets.data()); 248 | glProgramUniform1fv(programBlur.getId(), 103, offsets.size(), weights.data()); 249 | } 250 | 251 | void RendererGL::render(glm::mat4 proj_mat, glm::mat4 view_mat) { 252 | // Particle HDR rendering 253 | glViewport(0, 0, width_ + 2 * FBO_MARGIN, height_ + 2 * FBO_MARGIN); 254 | glBindVertexArray(vaoParticles); 255 | glEnable(GL_BLEND); 256 | glBlendFunc(GL_ONE, GL_ONE); 257 | glBindFramebuffer(GL_FRAMEBUFFER, fbos[0]); 258 | glUseProgram(programHdr.getId()); 259 | glClear(GL_COLOR_BUFFER_BIT); 260 | glProgramUniformMatrix4fv(programHdr.getId(), 0, 1, GL_FALSE, 261 | glm::value_ptr(view_mat)); 262 | glProgramUniformMatrix4fv(programHdr.getId(), 4, 1, GL_FALSE, 263 | glm::value_ptr(proj_mat)); 264 | glBindTextureUnit(0, flareTex); 265 | glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT); 266 | glDrawArrays(GL_POINTS, 0, numParticles); 267 | 268 | glBindVertexArray(vaoDeferred); 269 | glDisable(GL_BLEND); 270 | 271 | glViewport(0, 0, (width_ + 2 * FBO_MARGIN) / blurDownscale, 272 | (height_ + 2 * FBO_MARGIN) / blurDownscale); 273 | glUseProgram(programBlur.getId()); 274 | 275 | // Blur pingpong (N horizontal blurs then N vertical blurs) 276 | 277 | const int nPasses = 1; // Only one blur pass in each direction 278 | int loop = 0; 279 | for (int i = 0; i < 2; ++i) { 280 | if (i == 0) 281 | glProgramUniform2f(programBlur.getId(), 1, 1, 0); 282 | else 283 | glProgramUniform2f(programBlur.getId(), 1, 0, 1); 284 | for (int j = 0; j < nPasses; ++j) { 285 | GLuint fbo = fbos[(loop % 2) + 1]; 286 | GLuint attach = attachs[loop ? ((loop + 1) % 2 + 1) : 0]; 287 | glBindFramebuffer(GL_FRAMEBUFFER, fbo); 288 | glBindTextureUnit(0, attach); 289 | glDrawArrays(GL_TRIANGLES, 0, 3); 290 | loop++; 291 | } 292 | } 293 | 294 | // Average luminance 295 | glViewport(0, 0, (width_ + 2 * FBO_MARGIN) / 2, 296 | (height_ + 2 * FBO_MARGIN) / 2); 297 | glBindFramebuffer(GL_FRAMEBUFFER, fbos[3]); 298 | glUseProgram(programLum.getId()); 299 | glBindTextureUnit(0, attachs[0]); 300 | glDrawArrays(GL_TRIANGLES, 0, 3); 301 | glGenerateTextureMipmap(attachs[3]); 302 | 303 | // Tonemapping step (direct to screen) 304 | glViewport(0, 0, width_, height_); 305 | glBindFramebuffer(GL_FRAMEBUFFER, 0); 306 | glUseProgram(programTonemap.getId()); 307 | glProgramUniform1i(programTonemap.getId(), 0, lumLod); 308 | glBindTextureUnit(0, attachs[0]); 309 | glBindTextureUnit(1, attachs[2]); 310 | glBindTextureUnit(2, attachs[3]); 311 | glDrawArrays(GL_TRIANGLES, 0, 3); 312 | } 313 | 314 | std::vector RendererGL::gaussKernel(const float sigma, 315 | const int halfwidth) { 316 | float sigma_factor = 1.0 / (sigma * sqrt(2 * glm::pi())); 317 | 318 | auto sigma_fun = [sigma, sigma_factor, n = 0]() mutable { 319 | float sigma_val = 320 | sigma_factor * std::exp(-std::pow(static_cast(n), 2) / 321 | (2 * std::pow(sigma, 2))); 322 | n++; 323 | return sigma_val; 324 | }; 325 | 326 | std::vector result(halfwidth); 327 | std::generate(result.begin(), result.end(), sigma_fun); 328 | 329 | // Normalize the Gaussian kernel 330 | float halfnorm = std::accumulate(result.begin() + 1, result.end(), 0.0); 331 | float norm = 2 * halfnorm + result[0]; 332 | 333 | std::transform(result.begin(), result.end(), result.begin(), 334 | [norm](auto val) { return val / norm; }); 335 | 336 | return result; 337 | } 338 | 339 | std::pair, std::vector> RendererGL::optimGaussKernel( 340 | const std::vector weightsIn) { 341 | const int inSize = weightsIn.size(); 342 | const int outSize = (inSize / 2) + 1; 343 | 344 | std::vector offsetsIn(inSize); 345 | std::iota(offsetsIn.begin(), offsetsIn.end(), 0); 346 | 347 | std::vector offsetsOut(outSize); 348 | std::vector weightsOut(outSize); 349 | 350 | // Centre point of gaussian doesn't change 351 | offsetsOut[0] = offsetsIn[0]; // 0.0 352 | weightsOut[0] = weightsIn[0]; 353 | 354 | // Convert pairs of neighbouring texel weights into a single 355 | // weight linearly interpolated between texels. Take care of 356 | // possible last lone weight. 357 | for (int i = 1; i < outSize; i++) { 358 | weightsOut[i] = weightsIn[i * 2 - 1]; 359 | offsetsOut[i] = offsetsIn[i * 2 - 1]; 360 | if (i * 2 < inSize) { 361 | weightsOut[i] += weightsIn[i * 2]; 362 | offsetsOut[i] = (offsetsIn[i * 2 - 1] * weightsIn[i * 2 - 1] + 363 | offsetsIn[i * 2] * weightsIn[i * 2]) / 364 | weightsOut[i]; 365 | } 366 | } 367 | return std::make_pair(offsetsOut, weightsOut); 368 | } 369 | 370 | void RendererGL::destroy() {} 371 | -------------------------------------------------------------------------------- /src/renderer_gl.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | // Copyright (C) 2022 Codeplay Software Limited 3 | 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include "renderer.hpp" 11 | #include "shader.hpp" 12 | 13 | using namespace simulation; 14 | 15 | class RendererGL : public Renderer { 16 | public: 17 | void initWindow(); 18 | void init(GLFWwindow *window, int width, int height, 19 | simulation::Simulator &sim); 20 | void destroy(); 21 | /// Initialize Imgui 22 | void initImgui(GLFWwindow *window); 23 | void updateParticles(); 24 | void render(glm::mat4 proj_mat, glm::mat4 view_mat); 25 | void printKernelTime(float kernelTime); 26 | RendererGL() : sim{} {} 27 | 28 | private: 29 | /// Provides the gl state with window dimensions for fbo size, etc 30 | void setWindowDimensions(int width, int height); 31 | 32 | /// Generates the star flare texture 33 | void createFlareTexture(); 34 | 35 | /// Creates the VAO and VBO objects 36 | void createVaosVbos(); 37 | 38 | /// Loads the shaders into the gl state 39 | void initShaders(); 40 | 41 | // Initializes and supplies the framebuffers with valid data 42 | void initFbos(); 43 | 44 | // Supplies the gl state with nbody simulation parameters 45 | void setUniforms(); 46 | 47 | // Send data obtained from simulation to a buffer 48 | void setParticleData(const GLuint buffer, const ParticleData &data); 49 | 50 | // Compute the 1D gaussian kernel for given sigma & halfwidth 51 | static std::vector gaussKernel(const float sigma, 52 | const int halfwidth); 53 | 54 | // Optimizes the given 1D gaussian kernel via texel linear interp 55 | static std::pair, std::vector> optimGaussKernel( 56 | const std::vector inKernel); 57 | 58 | Simulator *sim{nullptr}; 59 | 60 | GLuint flareTex; ///< Texture for the star flare 61 | GLuint vaoParticles; ///< Vertex definition for points 62 | GLuint vboParticlesPos; ///< Particle position buffer 63 | GLuint ssboVelocities; ///< Particle velocity buffer 64 | GLuint vaoDeferred; ///< Vertex definition for deferred 65 | GLuint vboDeferred; ///< Vertex buffer of deferred fullscreen tri 66 | 67 | /** Shader programs **/ 68 | ShaderProgram programHdr; ///< HDR rendering step 69 | ShaderProgram programBlur; ///< Bloom blurring step 70 | ShaderProgram programLum; ///< Average luminance step 71 | ShaderProgram programTonemap; ///< Tonemapping step 72 | 73 | GLuint fbos[4]; ///< FBOs (0 for hdr, 1 & 2 for blur ping pong, 3 for 74 | ///< luminance) 75 | GLuint attachs[4]; ///< Respective FBO attachments. 76 | 77 | int texSize; ///< Flare texture size in pixels 78 | int lumLod; ///< Luminance texture level to sample from 79 | int blurDownscale; ///< Downscale factor for the blurring step 80 | int width_; ///< Viewport width 81 | int height_; ///< Viewport height 82 | 83 | size_t numParticles; 84 | size_t computeIterations; 85 | }; 86 | -------------------------------------------------------------------------------- /src/shader.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | 3 | #include "shader.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | ShaderProgram::ShaderProgram() : id(0) {} 13 | 14 | void ShaderProgram::source(GLenum shader_type, const string &filename) { 15 | if (!id) id = glCreateProgram(); 16 | 17 | string code; 18 | 19 | // IO stuff 20 | try { 21 | stringstream sstream; 22 | { 23 | ifstream stream; 24 | stream.exceptions(ifstream::failbit | ifstream::badbit); 25 | stream.open(filename); 26 | sstream << stream.rdbuf(); 27 | } 28 | code = sstream.str(); 29 | } catch (ifstream::failure e) { 30 | throw std::runtime_error(std::string("Can't open ") + filename + 31 | std::string(e.what())); 32 | } 33 | 34 | GLint success; 35 | GLchar info_log[2048]; 36 | 37 | const char *s = code.c_str(); 38 | 39 | // OpenGL stuff 40 | GLuint shad_id = glCreateShader(shader_type); 41 | glShaderSource(shad_id, 1, &s, NULL); 42 | glCompileShader(shad_id); 43 | glGetShaderiv(shad_id, GL_COMPILE_STATUS, &success); 44 | if (!success) { 45 | // error log 46 | glGetShaderInfoLog(shad_id, sizeof(info_log), NULL, info_log); 47 | throw std::runtime_error(std::string("Can't compile ") + filename + " " + 48 | info_log); 49 | exit(-1); 50 | } 51 | glAttachShader(id, shad_id); 52 | } 53 | 54 | void ShaderProgram::link() { 55 | GLint success; 56 | GLchar info_log[2048]; 57 | 58 | glLinkProgram(id); 59 | glGetProgramiv(id, GL_LINK_STATUS, &success); 60 | if (!success) { 61 | // error log 62 | glGetProgramInfoLog(id, sizeof(info_log), NULL, info_log); 63 | throw std::runtime_error(std::string("Can't link ") + 64 | std::string(info_log)); 65 | } 66 | } 67 | 68 | GLuint ShaderProgram::getId() { return id; } 69 | -------------------------------------------------------------------------------- /src/shader.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include 8 | 9 | class ShaderProgram { 10 | public: 11 | ShaderProgram(); 12 | 13 | /** 14 | * Compiles a shader stage from a given source, displays errors in stderr 15 | * @param program shader program handle 16 | * @param shader_type one of GL_COMPUTE_SHADER, GL_VERTEX_SHADER, 17 | * GL_TESS_CONTROL_SHADER, GL_TESS_EVALUATION_SHADER, GL_GEOMETRY_SHADER, or 18 | * GL_FRAGMENT_SHADER 19 | * @param filename GLSL source file 20 | */ 21 | void source(GLenum shaderType, const std::string &filename); 22 | 23 | /** 24 | * Links all shaders inside the program, displays errors in stderr 25 | */ 26 | void link(); 27 | 28 | GLuint getId(); 29 | 30 | private: 31 | GLuint id; 32 | }; 33 | -------------------------------------------------------------------------------- /src/sim_param.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | // Copyright (C) 2022 Codeplay Software Limited 3 | 4 | #include "sim_param.hpp" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | SimParam::SimParam() { 13 | G = 2.0; 14 | dt = 0.005; 15 | numParticles = 50 * 256; 16 | numFrames = SIZE_MAX; 17 | simIterationsPerFrame = 4; 18 | damping = 0.999998; 19 | distEps = 1.0e-7; 20 | gwSize = 64; 21 | calcMethod = CalculationMethod::BRANCH; 22 | } 23 | 24 | // Set the calculation method from the given string 25 | CalculationMethod getCalculationMethod(const std::string& method) { 26 | 27 | static const std::map methodMap = { 28 | {"BRANCH", CalculationMethod::BRANCH}, 29 | {"PREDICATED", CalculationMethod::PREDICATED} 30 | }; 31 | 32 | auto it = methodMap.find(method); 33 | if (it != methodMap.end()) { 34 | return it->second; 35 | } else { 36 | throw std::invalid_argument("Valid calculation methods are BRANCH or PREDICATED"); 37 | } 38 | } 39 | 40 | void SimParam::parseArgs(int argc, char **argv) { 41 | // First argument if existing = number of particle batches (256 per batch) 42 | if (argc >= 2) numParticles = 256 * atoi(argv[1]); 43 | 44 | // Second argument if existing = number of iterations per frame 45 | if (argc >= 3) simIterationsPerFrame = atoi(argv[2]); 46 | 47 | // Third argument if existing = damping parameter 48 | if (argc >= 4) damping = atof(argv[3]); 49 | 50 | // Fourth argument if existing = dt (timestep size) parameter 51 | if (argc >= 5) dt = atof(argv[4]); 52 | 53 | // Fifth argument if existing = distEps (minimum inter-particle distance) parameter 54 | if (argc >= 6) distEps = atof(argv[5]); 55 | 56 | // Sixth argument if existing = G (gravity) parameter 57 | if (argc >= 7) G = atof(argv[6]); 58 | 59 | // Seventh argument if existing = number of frames to simulate 60 | if (argc >= 8) numFrames = atoi(argv[7]); 61 | 62 | // Eighth argument if existing = the work group size 63 | if (argc >= 9) gwSize = atoi(argv[8]); 64 | 65 | // Ninth argument if existing = the calculation method 66 | if (argc >= 10) calcMethod = getCalculationMethod(argv[9]); 67 | } 68 | -------------------------------------------------------------------------------- /src/sim_param.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | // Copyright (C) 2022 Codeplay Software Limited 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | enum class CalculationMethod { 9 | BRANCH, 10 | PREDICATED 11 | }; 12 | 13 | /** 14 | * Simulation parameters 15 | */ 16 | class SimParam { 17 | public: 18 | /** 19 | * Creates default simulation parameters 20 | */ 21 | SimParam(); 22 | 23 | /** 24 | * Provides user-defined simulation parameters 25 | * @param argc number of arguments 26 | * @param argv arguments 27 | */ 28 | void parseArgs(int argc, char **argv); 29 | 30 | float G; ///< Gravitational parameter 31 | float dt; ///< Simulation delta t 32 | size_t numParticles; ///< Number of particles simulated 33 | size_t numFrames; ///< Number of frames simulated 34 | int simIterationsPerFrame; ///< Simulation iterations per frame rendered 35 | float damping; ///< Damping parameter for simulating 'soupy' galaxy (1.0 = 36 | ///< no damping) 37 | float distEps; ///< Minimum distance to limit gravity of very close particles 38 | int gwSize; ///< Work group size 39 | CalculationMethod calcMethod; /// Use or not branch instruction in kernel 40 | }; 41 | -------------------------------------------------------------------------------- /src/simulator.cu: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2022 Codeplay Software Limited 2 | // This work is licensed under the terms of the MIT license. 3 | // For a copy, see https://opensource.org/licenses/MIT. 4 | 5 | #include "simulator.cuh" 6 | //#include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace simulation { 17 | 18 | // Forward decl 19 | template 20 | __global__ void particle_interaction(ParticleData_d pPos, 21 | ParticleData_d pNextPos, 22 | ParticleData_d pVel, SimParam params); 23 | 24 | DiskGalaxySimulator::DiskGalaxySimulator(SimParam params_) 25 | : params(params_), 26 | pos(params_.numParticles), 27 | vel(params_.numParticles), 28 | pos_d(params_.numParticles), 29 | vel_d(params_.numParticles), 30 | pos_next_d(params_.numParticles) { 31 | randomParticlePos(); 32 | initialParticleVel(); 33 | sendToDevice(); 34 | }; 35 | 36 | const std::string* DiskGalaxySimulator::getDeviceName() { 37 | // Query the device first time only 38 | if(devName.empty()){ 39 | char devNameHolder[256]; 40 | int error_id = cuDeviceGetName(devNameHolder, 256, 0); // Assume main device 41 | if(error_id != CUDA_SUCCESS) devName = "Unknown Device"; 42 | else devName = devNameHolder; 43 | } 44 | return &devName; 45 | } 46 | 47 | void DiskGalaxySimulator::stepSim() { 48 | // Compute updated positions 49 | int wg_size = getGwSize(); 50 | int nblocks = ((getNumParticles() - 1) / wg_size) + 1; 51 | 52 | // Profiling info - rather than using the CUDA event recording 53 | // approach, we are instead measuring the time from before kernel 54 | // submission until host synchronization. This is more portable via 55 | // dpct. 56 | auto start = std::chrono::steady_clock::now(); 57 | for (size_t i = 0; i < params.simIterationsPerFrame; i++) { 58 | if ( getCM() == CalculationMethod::BRANCH ) { 59 | particle_interaction<<>>(pos_d, pos_next_d, vel_d, 60 | params); 61 | } else { 62 | particle_interaction<<>>(pos_d, pos_next_d, vel_d, 63 | params); 64 | } 65 | std::swap(pos_d, pos_next_d); 66 | } 67 | gpuErrchk(cudaDeviceSynchronize()); 68 | auto stop = std::chrono::steady_clock::now(); 69 | lastStepTime = 70 | std::chrono::duration(stop - start) 71 | .count(); 72 | 73 | // Sync data 74 | recvFromDevice(); 75 | } 76 | 77 | // Only necessary because we can't initialize data on device yet, in a 78 | // dpct-friendly way 79 | void DiskGalaxySimulator::sendToDevice() { 80 | gpuErrchk(cudaDeviceSynchronize()); 81 | 82 | gpuErrchk(cudaMemcpy(pos_d.x, pos.x.data(), 83 | params.numParticles * sizeof(coords_t), 84 | cudaMemcpyHostToDevice)); 85 | gpuErrchk(cudaMemcpy(pos_d.y, pos.y.data(), 86 | params.numParticles * sizeof(coords_t), 87 | cudaMemcpyHostToDevice)); 88 | gpuErrchk(cudaMemcpy(pos_d.z, pos.z.data(), 89 | params.numParticles * sizeof(coords_t), 90 | cudaMemcpyHostToDevice)); 91 | 92 | gpuErrchk(cudaMemcpy(vel_d.x, vel.x.data(), 93 | params.numParticles * sizeof(coords_t), 94 | cudaMemcpyHostToDevice)); 95 | gpuErrchk(cudaMemcpy(vel_d.y, vel.y.data(), 96 | params.numParticles * sizeof(coords_t), 97 | cudaMemcpyHostToDevice)); 98 | gpuErrchk(cudaMemcpy(vel_d.z, vel.z.data(), 99 | params.numParticles * sizeof(coords_t), 100 | cudaMemcpyHostToDevice)); 101 | 102 | gpuErrchk(cudaDeviceSynchronize()); 103 | } 104 | 105 | // Receive particle positions & velocity from device 106 | void DiskGalaxySimulator::recvFromDevice() { 107 | gpuErrchk(cudaDeviceSynchronize()); 108 | 109 | gpuErrchk(cudaMemcpy(pos.x.data(), pos_d.x, 110 | params.numParticles * sizeof(coords_t), 111 | cudaMemcpyDeviceToHost)); 112 | gpuErrchk(cudaMemcpy(pos.y.data(), pos_d.y, 113 | params.numParticles * sizeof(coords_t), 114 | cudaMemcpyDeviceToHost)); 115 | gpuErrchk(cudaMemcpy(pos.z.data(), pos_d.z, 116 | params.numParticles * sizeof(coords_t), 117 | cudaMemcpyDeviceToHost)); 118 | 119 | gpuErrchk(cudaMemcpy(vel.x.data(), vel_d.x, 120 | params.numParticles * sizeof(coords_t), 121 | cudaMemcpyDeviceToHost)); 122 | gpuErrchk(cudaMemcpy(vel.y.data(), vel_d.y, 123 | params.numParticles * sizeof(coords_t), 124 | cudaMemcpyDeviceToHost)); 125 | gpuErrchk(cudaMemcpy(vel.z.data(), vel_d.z, 126 | params.numParticles * sizeof(coords_t), 127 | cudaMemcpyDeviceToHost)); 128 | gpuErrchk(cudaDeviceSynchronize()); 129 | } 130 | 131 | void DiskGalaxySimulator::randomParticlePos() { 132 | // deterministic - default seed 133 | std::mt19937 gen; 134 | std::uniform_real_distribution<> dis(0.0, 1.0); 135 | 136 | // Disk shape in x-y plane 137 | for (int i = 0; i < params.numParticles; i++) { 138 | float t = dis(gen) * 2 * PI; 139 | float s = dis(gen) * 100; 140 | pos.x[i] = cos(t) * s; 141 | pos.y[i] = sin(t) * s; 142 | } 143 | 144 | // Z component is independent (uniform range 0-4) 145 | std::generate(begin(pos.z), end(pos.z), 146 | [&gen, &dis]() { return 4.0 * dis(gen); }); 147 | } 148 | 149 | void DiskGalaxySimulator::initialParticleVel() { 150 | for (int i = 0; i < params.numParticles; i++) { 151 | vec3 vel = cross({pos.x[i], pos.y[i], pos.z[i]}, {0.0, 0.0, 1.0}); 152 | coords_t orbital_vel = std::sqrt(2.0 * length(vel)); 153 | vel = normalize(vel) * orbital_vel; 154 | this->vel.x[i] = vel.x; 155 | this->vel.y[i] = vel.y; 156 | this->vel.z[i] = vel.z; 157 | } 158 | } 159 | 160 | const ParticleData& DiskGalaxySimulator::getParticlePos() { return pos; }; 161 | 162 | const ParticleData& DiskGalaxySimulator::getParticleVel() { return vel; }; 163 | 164 | // Linear Algebra functions (not yet exposed in header) 165 | HOSTDEV vec3 cross(const vec3 v0, const vec3 v1) { 166 | return vec3(v0.y * v1.z - v0.z * v1.y, v0.z * v1.x - v0.x * v1.z, 167 | v0.x * v1.y - v0.y * v1.x); 168 | }; 169 | 170 | HOSTDEV coords_t length(const vec3 v) { 171 | return std::sqrt(std::pow(v.x, 2) + std::pow(v.y, 2) + std::pow(v.z, 2)); 172 | } 173 | 174 | HOSTDEV vec3 normalize(const vec3 v) { 175 | vec3 result = v; 176 | coords_t len = length(v); 177 | result.x /= len; 178 | result.y /= len; 179 | result.z /= len; 180 | return result; 181 | } 182 | 183 | /* O(n^2) implementation (no distance threshold), with no shared 184 | memory etc. 185 | */ 186 | template 187 | __global__ void particle_interaction(ParticleData_d pPos, 188 | ParticleData_d pNextPos, 189 | ParticleData_d pVel, SimParam params) { 190 | int id = threadIdx.x + (blockIdx.x * blockDim.x); 191 | if (id >= params.numParticles) return; 192 | 193 | vec3 force(0.0f, 0.0f, 0.0f); 194 | vec3 pos(pPos.x[id], pPos.y[id], pPos.z[id]); 195 | 196 | #pragma unroll 4 197 | for (int i = 0; i < params.numParticles; i++) { 198 | vec3 other_pos{pPos.x[i], pPos.y[i], pPos.z[i]}; 199 | vec3 r = other_pos - pos; 200 | // Fast computation of 1/(|r|^3) 201 | coords_t dist_sqr = dot(r, r) + params.distEps; 202 | coords_t inv_dist_cube = rsqrt(dist_sqr * dist_sqr * dist_sqr); 203 | 204 | // assume uniform unit mass 205 | if constexpr(ct == CalculationMethod::BRANCH) { 206 | if ( i == id ) continue; 207 | force += r * inv_dist_cube; 208 | } else if constexpr (ct == CalculationMethod::PREDICATED) { 209 | force += r * inv_dist_cube * (i == id); 210 | } 211 | } 212 | 213 | // Update velocity 214 | vec3 curr_vel(pVel.x[id], pVel.y[id], pVel.z[id]); 215 | curr_vel *= params.damping; 216 | curr_vel += force * params.dt * params.G; 217 | 218 | pVel.x[id] = curr_vel.x; 219 | pVel.y[id] = curr_vel.y; 220 | pVel.z[id] = curr_vel.z; 221 | 222 | // Update position (integration) 223 | vec3 curr_pos(pPos.x[id], pPos.y[id], pPos.z[id]); 224 | 225 | curr_pos += curr_vel * params.dt; 226 | pNextPos.x[id] = curr_pos.x; 227 | pNextPos.y[id] = curr_pos.y; 228 | pNextPos.z[id] = curr_pos.z; 229 | } 230 | 231 | } // namespace simulation 232 | -------------------------------------------------------------------------------- /src/simulator.cuh: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2022 Codeplay Software Limited 2 | // This work is licensed under the terms of the MIT license. 3 | // For a copy, see https://opensource.org/licenses/MIT. 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "sim_param.hpp" 15 | 16 | #ifdef __CUDACC__ 17 | #define HOSTDEV __host__ __device__ 18 | #else 19 | #define HOSTDEV 20 | #endif 21 | 22 | #define gpuErrchk(ans) \ 23 | { gpuAssert((ans), __FILE__, __LINE__); } 24 | inline void gpuAssert(cudaError_t code, const char *file, int line, 25 | bool abort = true) { 26 | if (code != cudaSuccess) { 27 | fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, 28 | line); 29 | if (abort) exit(code); 30 | } 31 | } 32 | 33 | namespace simulation { 34 | 35 | const float PI = 3.14159265358979323846; 36 | 37 | typedef float coords_t; 38 | 39 | struct vec3 { 40 | coords_t x = 0.0; 41 | coords_t y = 0.0; 42 | coords_t z = 0.0; 43 | 44 | HOSTDEV vec3() {}; 45 | HOSTDEV vec3(coords_t x_, coords_t y_, coords_t z_) 46 | : x{x_}, y{y_}, z{z_} {} 47 | 48 | HOSTDEV inline vec3 &operator+=(const vec3 &rhs) { 49 | x += rhs.x; 50 | y += rhs.y; 51 | z += rhs.z; 52 | return *this; 53 | } 54 | 55 | HOSTDEV inline vec3 &operator*=(const coords_t &scale) { 56 | x *= scale; 57 | y *= scale; 58 | z *= scale; 59 | return *this; 60 | } 61 | }; 62 | 63 | HOSTDEV inline const vec3 operator*(const vec3 &pos, const coords_t &scale) { 64 | return {pos.x * scale, pos.y * scale, pos.z * scale}; 65 | } 66 | 67 | HOSTDEV inline const vec3 operator-(const vec3 &vec1, const vec3 &vec2) { 68 | return {vec1.x - vec2.x, vec1.y - vec2.y, vec1.z - vec2.z}; 69 | } 70 | 71 | HOSTDEV inline coords_t dot(const vec3 &vec1, const vec3 &vec2) { 72 | return vec1.x * vec2.x + vec1.y * vec2.y + vec1.z * vec2.z; 73 | } 74 | 75 | struct ParticleData { 76 | std::vector x; 77 | std::vector y; 78 | std::vector z; 79 | 80 | ParticleData(std::vector x_, std::vector y_, 81 | std::vector z_) 82 | : x(std::move(x_)), y(std::move(y_)), z(std::move(z_)){}; 83 | ParticleData(size_t n) : x(n, 0.0), y(n, 0.0), z(n, 0.0){}; 84 | }; 85 | 86 | // Simply holds 3 coords_t* as a SoA 87 | struct ParticleData_d { 88 | coords_t *x = nullptr; 89 | coords_t *y = nullptr; 90 | coords_t *z = nullptr; 91 | 92 | ParticleData_d(size_t n) { 93 | // Allocate device memory for particle coords & velocity... 94 | gpuErrchk(cudaMalloc((void **)&x, sizeof(coords_t) * n)); 95 | gpuErrchk(cudaMalloc((void **)&y, sizeof(coords_t) * n)); 96 | gpuErrchk(cudaMalloc((void **)&z, sizeof(coords_t) * n)); 97 | }; 98 | }; 99 | 100 | HOSTDEV coords_t length(const vec3 v); 101 | HOSTDEV vec3 cross(const vec3 v0, const vec3 v1); 102 | HOSTDEV vec3 normalize(const vec3 v); 103 | 104 | /* 105 | Interface class for Simulator 106 | */ 107 | class Simulator { 108 | public: 109 | virtual void stepSim() = 0; 110 | virtual size_t getNumParticles() = 0; 111 | virtual const ParticleData &getParticlePos() = 0; 112 | virtual const ParticleData &getParticleVel() = 0; 113 | virtual float getLastStepTime() = 0; 114 | virtual const std::string* getDeviceName() = 0; 115 | virtual int getGwSize() = 0; 116 | }; 117 | 118 | /* 119 | DiskGalaxySimulator class to handle execution of the nbody simulation. 120 | 121 | Regular data transfer only occurs in the device->host direction (from 122 | Simulator to Renderer). 123 | 124 | Invariants: 125 | - Has params 126 | - Has valid particle positions & velocities, allocated on host & device 127 | */ 128 | 129 | class DiskGalaxySimulator : public Simulator { 130 | public: 131 | DiskGalaxySimulator(SimParam params_); 132 | 133 | void stepSim(); 134 | float getLastStepTime() { return lastStepTime; } 135 | size_t getNumParticles() { return params.numParticles; } 136 | const ParticleData &getParticlePos(); 137 | const ParticleData &getParticleVel(); 138 | const std::string* getDeviceName(); 139 | int getGwSize() { return params.gwSize; } 140 | CalculationMethod getCM() { return params.calcMethod; } 141 | 142 | private: 143 | SimParam params; 144 | std::string devName; 145 | float lastStepTime{0.0}; 146 | 147 | // Data for particle positions & vel on host 148 | ParticleData pos; 149 | ParticleData vel; 150 | 151 | // and on device 152 | ParticleData_d pos_d; 153 | ParticleData_d pos_next_d; // double buffering 154 | ParticleData_d vel_d; 155 | 156 | void randomParticlePos(); 157 | void initialParticleVel(); 158 | void sendToDevice(); 159 | void recvFromDevice(); 160 | }; 161 | 162 | } // namespace simulation 163 | -------------------------------------------------------------------------------- /src_sycl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 - 2018 Sarah Le Luron 2 | # Copyright (C) 2022 Codeplay Software Limited 3 | 4 | find_package(PkgConfig REQUIRED) 5 | 6 | if (RENDER) 7 | pkg_check_modules(Glew REQUIRED IMPORTED_TARGET glew) 8 | 9 | find_package(glm REQUIRED) 10 | find_package(glfw3 REQUIRED) 11 | find_package(OpenGL REQUIRED) 12 | endif() 13 | 14 | find_package(dpct REQUIRED) 15 | 16 | set(COMMON_SOURCE 17 | nbody.cpp 18 | sim_param.cpp 19 | simulator.dp.cpp) 20 | 21 | set(OPENGL_SOURCE 22 | gen.cpp 23 | camera.cpp 24 | renderer_gl.cpp 25 | shader.cpp) 26 | 27 | set(DEBUG_FLAGS -g -O0) 28 | 29 | if (RENDER) 30 | set(RENDER_LIB glm::glm glfw PkgConfig::Glew OpenGL::OpenGL) 31 | set(RENDER_FLAG -DUSE_OPENGL) 32 | set(SOURCE_FILES ${COMMON_SOURCE} ${OPENGL_SOURCE}) 33 | else() 34 | set(RENDER_LIB) 35 | set(RENDER_FLAG DISABLE_GL) 36 | set(SOURCE_FILES ${COMMON_SOURCE}) 37 | endif() 38 | 39 | add_custom_target(release DEPENDS ${BINARY_NAME}) 40 | add_executable(${BINARY_NAME} ${SOURCE_FILES}) 41 | target_compile_definitions(${BINARY_NAME} PRIVATE ${RENDER_FLAG} COMPILER_NAME="SYCL") 42 | target_link_libraries(${BINARY_NAME} PRIVATE ${RENDER_LIB}) 43 | target_compile_features(${BINARY_NAME} PRIVATE cxx_auto_type cxx_nullptr cxx_range_for) 44 | target_include_directories(${BINARY_NAME} PRIVATE ${dpct_INCLUDE_DIR}) 45 | 46 | add_custom_target(debug DEPENDS ${BINARY_NAME}_d) 47 | add_executable(${BINARY_NAME}_d ${SOURCE_FILES}) 48 | target_compile_definitions(${BINARY_NAME}_d PRIVATE ${RENDER_FLAG} COMPILER_NAME="SYCL") 49 | target_link_libraries(${BINARY_NAME}_d PRIVATE ${RENDER_LIB}) 50 | target_compile_features(${BINARY_NAME}_d PRIVATE cxx_auto_type cxx_nullptr cxx_range_for) 51 | target_include_directories(${BINARY_NAME}_d PRIVATE ${dpct_INCLUDE_DIR}) 52 | 53 | if(NOT TARGET glm::glm) 54 | add_library(glm::glm IMPORTED INTERFACE) 55 | target_include_directories(glm::glm INTERFACE ${GLM_INCLUDE_DIR}) 56 | endif() 57 | 58 | 59 | if(NOT ((CMAKE_CXX_COMPILER MATCHES ".*dpcpp(\\.exe)?$") OR 60 | (CMAKE_CXX_COMPILER MATCHES ".*icpx(\\.exe)?$") OR 61 | (CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+(\\.exe)?$"))) 62 | message( FATAL_ERROR "Invalid C++ compiler for SYCL compilation" ) 63 | endif() 64 | 65 | option(DPCPP_CUDA_SUPPORT "Whether to enable DPC++'s CUDA backend" ON) 66 | if(DPCPP_CUDA_SUPPORT) 67 | set(DEFAULT_CUDA_COMPUTE_CAPABILITY "50") 68 | execute_process( 69 | COMMAND bash -c "which nvidia-smi >/dev/null && nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -n 1 | tr -d '.'" 70 | OUTPUT_VARIABLE CUDA_COMPUTE_CAPABILITY 71 | OUTPUT_STRIP_TRAILING_WHITESPACE) 72 | if ("${CUDA_COMPUTE_CAPABILITY}" STREQUAL "") 73 | set(CUDA_COMPUTE_CAPABILITY ${DEFAULT_CUDA_COMPUTE_CAPABILITY}) 74 | endif() 75 | set(SYCL_FLAGS -fsycl 76 | -fsycl-targets=nvptx64-nvidia-cuda,spir64 77 | -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_${CUDA_COMPUTE_CAPABILITY} 78 | -fsycl-unnamed-lambda 79 | -fgpu-inline-threshold=100000) 80 | # set(OPT_FLAGS -Ofast) 81 | else() 82 | set(SYCL_FLAGS -fsycl -fsycl-targets=spir64 -fsycl-unnamed-lambda) 83 | set(OPT_FLAGS) 84 | endif() 85 | 86 | target_compile_options(${BINARY_NAME} PRIVATE ${SYCL_FLAGS} ${OPT_FLAGS}) 87 | target_link_options(${BINARY_NAME} PRIVATE ${SYCL_FLAGS} ${OPT_FLAGS}) 88 | 89 | target_compile_options(${BINARY_NAME}_d PRIVATE ${SYCL_FLAGS} ${DEBUG_FLAGS}) 90 | target_link_options(${BINARY_NAME}_d PRIVATE ${SYCL_FLAGS} ${DEBUG_FLAGS}) 91 | -------------------------------------------------------------------------------- /src_sycl/README.md: -------------------------------------------------------------------------------- 1 | Run ../scripts/run_dpct.sh to populate this directory 2 | -------------------------------------------------------------------------------- /src_sycl/camera.cpp: -------------------------------------------------------------------------------- 1 | ../src/camera.cpp -------------------------------------------------------------------------------- /src_sycl/camera.hpp: -------------------------------------------------------------------------------- 1 | ../src/camera.hpp -------------------------------------------------------------------------------- /src_sycl/gen.cpp: -------------------------------------------------------------------------------- 1 | ../src/gen.cpp -------------------------------------------------------------------------------- /src_sycl/gen.hpp: -------------------------------------------------------------------------------- 1 | ../src/gen.hpp -------------------------------------------------------------------------------- /src_sycl/nbody.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | // Copyright (C) 2022 Codeplay Software Limited 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #ifndef DISABLE_GL 9 | #include 10 | 11 | #include "renderer_gl.hpp" 12 | #include 13 | #include 14 | #include "camera.hpp" 15 | #include "gen.hpp" 16 | #else 17 | #include 18 | #endif 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "sim_param.hpp" 26 | #include "simulator.dp.hpp" 27 | 28 | 29 | using namespace std; 30 | using namespace simulation; 31 | 32 | int main(int argc, char **argv) { 33 | 34 | SimParam params; 35 | params.parseArgs(argc, argv); 36 | 37 | DiskGalaxySimulator nbodySim(params); 38 | 39 | #ifndef DISABLE_GL 40 | // Window initialization 41 | GLFWwindow *window; 42 | 43 | glfwSetErrorCallback([](const int error, const char *msg) { 44 | cout << "Error id : " << error << ", " << msg << endl; 45 | exit(-1); 46 | }); 47 | 48 | if (!glfwInit()) { 49 | cout << "GLFW can't initialize" << endl; 50 | return -1; 51 | } 52 | 53 | GLFWmonitor *monitor = glfwGetPrimaryMonitor(); 54 | 55 | const GLFWvidmode *mode = glfwGetVideoMode(monitor); 56 | 57 | glfwWindowHint(GLFW_RED_BITS, mode->redBits); 58 | glfwWindowHint(GLFW_GREEN_BITS, mode->greenBits); 59 | glfwWindowHint(GLFW_BLUE_BITS, mode->blueBits); 60 | glfwWindowHint(GLFW_REFRESH_RATE, mode->refreshRate); 61 | glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE); 62 | 63 | RendererGL renderer; 64 | 65 | renderer.initWindow(); 66 | 67 | int width = mode->width; 68 | int height = mode->height - 30; 69 | window = glfwCreateWindow(width, height, "N-Body Simulation", NULL, NULL); 70 | 71 | glfwMakeContextCurrent(window); 72 | 73 | renderer.init(window, width, height, nbodySim); 74 | renderer.initImgui(window); 75 | 76 | // Get initial postitions generated in simulator ctor 77 | renderer.updateParticles(); 78 | 79 | Camera camera; 80 | 81 | float last_fps{0}; 82 | #endif 83 | 84 | std::vector stepTimes; 85 | int step{0}; 86 | 87 | // Main loop 88 | float stepTime = 0.0; 89 | 90 | #ifndef DISABLE_GL 91 | while (!glfwWindowShouldClose(window) && 92 | glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_RELEASE && 93 | step < params.numFrames) { 94 | double frame_start = glfwGetTime(); 95 | #else 96 | while ( step < params.numFrames) { 97 | #endif 98 | nbodySim.stepSim(); 99 | #ifndef DISABLE_GL 100 | renderer.updateParticles(); 101 | renderer.render(camera.getProj(width, height), camera.getView()); 102 | #endif 103 | if(!(step % 20)) stepTime = nbodySim.getLastStepTime(); 104 | #ifndef DISABLE_GL 105 | renderer.printKernelTime(stepTime); 106 | #endif 107 | 108 | step++; 109 | int warmSteps{2}; 110 | if (step > warmSteps) { 111 | stepTimes.push_back(nbodySim.getLastStepTime()); 112 | float cumStepTime = 113 | std::accumulate(stepTimes.begin(), stepTimes.end(), 0.0); 114 | float meanTime = cumStepTime / stepTimes.size(); 115 | float accum{0.0}; 116 | std::for_each(stepTimes.begin(), stepTimes.end(), 117 | [&](const float time) { 118 | accum += std::pow((time - meanTime), 2); 119 | }); 120 | float stdDev = std::pow(accum / stepTimes.size(), 0.5); 121 | std::cout << "At step " << step << " kernel time is " 122 | << stepTimes.back() << " and mean is " << meanTime 123 | << " and stddev is: " << stdDev << "\n"; 124 | } 125 | #ifndef DISABLE_GL 126 | // Window refresh 127 | glfwSwapBuffers(window); 128 | glfwPollEvents(); 129 | 130 | // Thread sleep to match min frame time 131 | double frame_end = glfwGetTime(); 132 | double elapsed = frame_end - frame_start; 133 | last_fps = 1.0 / elapsed; 134 | #endif 135 | } 136 | #ifndef DISABLE_GL 137 | renderer.destroy(); 138 | glfwDestroyWindow(window); 139 | glfwTerminate(); 140 | #endif 141 | return 0; 142 | } 143 | -------------------------------------------------------------------------------- /src_sycl/renderer.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 - 2018 Sarah Le Luron 2 | // Copyright (C) 2022 Codeplay Software Limited 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include "simulator.dp.hpp" 13 | 14 | class Renderer { 15 | public: 16 | virtual void initWindow() = 0; 17 | 18 | /** 19 | * Initializes the gl state 20 | * @param width viewport width 21 | * @param height viewport height 22 | * @param params simulation parameters 23 | */ 24 | virtual void init(GLFWwindow *window, int width, int height, 25 | simulation::Simulator &sim) = 0; 26 | 27 | virtual void destroy() = 0; 28 | 29 | /** 30 | * Supplies the gl state with updated particle position and velocity 31 | * @param pos particle positions 32 | * @param vel particle velocities 33 | */ 34 | virtual void updateParticles() = 0; 35 | 36 | /** 37 | * Renders the particles at the current step 38 | * @param proj_mat projection matrix @see camera_get_proj 39 | * @param view_mat view matrix @see camera_get_view 40 | */ 41 | virtual void render(glm::mat4 projMat, glm::mat4 viewMat) = 0; 42 | }; -------------------------------------------------------------------------------- /src_sycl/renderer_gl.cpp: -------------------------------------------------------------------------------- 1 | ../src/renderer_gl.cpp -------------------------------------------------------------------------------- /src_sycl/renderer_gl.hpp: -------------------------------------------------------------------------------- 1 | ../src/renderer_gl.hpp -------------------------------------------------------------------------------- /src_sycl/shader.cpp: -------------------------------------------------------------------------------- 1 | ../src/shader.cpp -------------------------------------------------------------------------------- /src_sycl/shader.hpp: -------------------------------------------------------------------------------- 1 | ../src/shader.hpp -------------------------------------------------------------------------------- /src_sycl/sim_param.cpp: -------------------------------------------------------------------------------- 1 | ../src/sim_param.cpp -------------------------------------------------------------------------------- /src_sycl/sim_param.hpp: -------------------------------------------------------------------------------- 1 | ../src/sim_param.hpp -------------------------------------------------------------------------------- /src_sycl/simulator.dp.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2022 Codeplay Software Limited 2 | // This work is licensed under the terms of the MIT license. 3 | // For a copy, see https://opensource.org/licenses/MIT. 4 | 5 | #include 6 | #include 7 | #include "simulator.dp.hpp" 8 | //#include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace simulation { 18 | 19 | // Forward decl 20 | template 21 | void particle_interaction(ParticleData_d pPos, 22 | ParticleData_d pNextPos, 23 | ParticleData_d pVel, SimParam params, 24 | const sycl::nd_item<1> &item_ct1); 25 | 26 | DiskGalaxySimulator::DiskGalaxySimulator(SimParam params_) 27 | : params(params_), 28 | pos(params_.numParticles), 29 | vel(params_.numParticles), 30 | pos_d(params_.numParticles), 31 | vel_d(params_.numParticles), 32 | pos_next_d(params_.numParticles) { 33 | randomParticlePos(); 34 | initialParticleVel(); 35 | sendToDevice(); 36 | }; 37 | 38 | const std::string* DiskGalaxySimulator::getDeviceName() { 39 | // Query the device first time only 40 | if(devName.empty()){ 41 | char devNameHolder[256]; 42 | /* 43 | DPCT1003:4: Migrated API does not return error code. (*, 0) is inserted. 44 | You may need to rewrite this code. 45 | */ 46 | int error_id = (memcpy(devNameHolder, 47 | dpct::dev_mgr::instance() 48 | .get_device(0) 49 | .get_info() 50 | .c_str(), 51 | 256), 52 | 0); // Assume main device 53 | if (error_id != 0) devName = "Unknown Device"; 54 | else devName = devNameHolder; 55 | } 56 | return &devName; 57 | } 58 | 59 | void DiskGalaxySimulator::stepSim() { 60 | // Compute updated positions 61 | int wg_size = getGwSize(); 62 | int nblocks = ((getNumParticles() - 1) / wg_size) + 1; 63 | 64 | // Profiling info - rather than using the CUDA event recording 65 | // approach, we are instead measuring the time from before kernel 66 | // submission until host synchronization. This is more portable via 67 | // dpct. 68 | auto start = std::chrono::steady_clock::now(); 69 | for (size_t i = 0; i < params.simIterationsPerFrame; i++) { 70 | dpct::get_default_queue().submit([&](sycl::handler &cgh) { 71 | auto pos_d_ct0 = pos_d; 72 | auto pos_next_d_ct1 = pos_next_d; 73 | auto vel_d_ct2 = vel_d; 74 | auto params_ct3 = params; 75 | 76 | if ( getCM() == CalculationMethod::BRANCH ) { 77 | cgh.parallel_for< 78 | dpct_kernel_name>( 79 | sycl::nd_range<1>( 80 | sycl::range<1>(nblocks) * sycl::range<1>(wg_size), 81 | sycl::range<1>(wg_size)), 82 | [=](sycl::nd_item<1> item_ct1) { 83 | particle_interaction(pos_d_ct0, pos_next_d_ct1, vel_d_ct2, 84 | params_ct3, item_ct1); 85 | }); 86 | } else { 87 | cgh.parallel_for< 88 | dpct_kernel_name>( 89 | sycl::nd_range<1>( 90 | sycl::range<1>(nblocks) * sycl::range<1>(wg_size), 91 | sycl::range<1>(wg_size)), 92 | [=](sycl::nd_item<1> item_ct1) { 93 | particle_interaction(pos_d_ct0, pos_next_d_ct1, vel_d_ct2, 94 | params_ct3, item_ct1); 95 | }); 96 | } 97 | }); 98 | std::swap(pos_d, pos_next_d); 99 | } 100 | /* 101 | DPCT1003:5: Migrated API does not return error code. (*, 0) is inserted. 102 | You may need to rewrite this code. 103 | */ 104 | gpuErrchk((dpct::get_current_device().queues_wait_and_throw(), 0)); 105 | auto stop = std::chrono::steady_clock::now(); 106 | lastStepTime = 107 | std::chrono::duration(stop - start) 108 | .count(); 109 | 110 | // Sync data 111 | recvFromDevice(); 112 | } 113 | 114 | // Only necessary because we can't initialize data on device yet, in a 115 | // dpct-friendly way 116 | void DiskGalaxySimulator::sendToDevice() { 117 | dpct::device_ext &dev_ct1 = dpct::get_current_device(); 118 | sycl::queue &q_ct1 = dev_ct1.default_queue(); 119 | /* 120 | DPCT1003:6: Migrated API does not return error code. (*, 0) is inserted. 121 | You may need to rewrite this code. 122 | */ 123 | gpuErrchk((dev_ct1.queues_wait_and_throw(), 0)); 124 | 125 | /* 126 | DPCT1003:7: Migrated API does not return error code. (*, 0) is inserted. 127 | You may need to rewrite this code. 128 | */ 129 | gpuErrchk((q_ct1 130 | .memcpy(pos_d.x, pos.x.data(), 131 | params.numParticles * sizeof(coords_t)) 132 | .wait(), 133 | 0)); 134 | /* 135 | DPCT1003:8: Migrated API does not return error code. (*, 0) is inserted. 136 | You may need to rewrite this code. 137 | */ 138 | gpuErrchk((q_ct1 139 | .memcpy(pos_d.y, pos.y.data(), 140 | params.numParticles * sizeof(coords_t)) 141 | .wait(), 142 | 0)); 143 | /* 144 | DPCT1003:9: Migrated API does not return error code. (*, 0) is inserted. 145 | You may need to rewrite this code. 146 | */ 147 | gpuErrchk((q_ct1 148 | .memcpy(pos_d.z, pos.z.data(), 149 | params.numParticles * sizeof(coords_t)) 150 | .wait(), 151 | 0)); 152 | 153 | /* 154 | DPCT1003:10: Migrated API does not return error code. (*, 0) is inserted. 155 | You may need to rewrite this code. 156 | */ 157 | gpuErrchk((q_ct1 158 | .memcpy(vel_d.x, vel.x.data(), 159 | params.numParticles * sizeof(coords_t)) 160 | .wait(), 161 | 0)); 162 | /* 163 | DPCT1003:11: Migrated API does not return error code. (*, 0) is inserted. 164 | You may need to rewrite this code. 165 | */ 166 | gpuErrchk((q_ct1 167 | .memcpy(vel_d.y, vel.y.data(), 168 | params.numParticles * sizeof(coords_t)) 169 | .wait(), 170 | 0)); 171 | /* 172 | DPCT1003:12: Migrated API does not return error code. (*, 0) is inserted. 173 | You may need to rewrite this code. 174 | */ 175 | gpuErrchk((q_ct1 176 | .memcpy(vel_d.z, vel.z.data(), 177 | params.numParticles * sizeof(coords_t)) 178 | .wait(), 179 | 0)); 180 | 181 | /* 182 | DPCT1003:13: Migrated API does not return error code. (*, 0) is inserted. 183 | You may need to rewrite this code. 184 | */ 185 | gpuErrchk((dev_ct1.queues_wait_and_throw(), 0)); 186 | } 187 | 188 | // Receive particle positions & velocity from device 189 | void DiskGalaxySimulator::recvFromDevice() { 190 | dpct::device_ext &dev_ct1 = dpct::get_current_device(); 191 | sycl::queue &q_ct1 = dev_ct1.default_queue(); 192 | /* 193 | DPCT1003:14: Migrated API does not return error code. (*, 0) is inserted. 194 | You may need to rewrite this code. 195 | */ 196 | gpuErrchk((dev_ct1.queues_wait_and_throw(), 0)); 197 | 198 | /* 199 | DPCT1003:15: Migrated API does not return error code. (*, 0) is inserted. 200 | You may need to rewrite this code. 201 | */ 202 | gpuErrchk((q_ct1 203 | .memcpy(pos.x.data(), pos_d.x, 204 | params.numParticles * sizeof(coords_t)) 205 | .wait(), 206 | 0)); 207 | /* 208 | DPCT1003:16: Migrated API does not return error code. (*, 0) is inserted. 209 | You may need to rewrite this code. 210 | */ 211 | gpuErrchk((q_ct1 212 | .memcpy(pos.y.data(), pos_d.y, 213 | params.numParticles * sizeof(coords_t)) 214 | .wait(), 215 | 0)); 216 | /* 217 | DPCT1003:17: Migrated API does not return error code. (*, 0) is inserted. 218 | You may need to rewrite this code. 219 | */ 220 | gpuErrchk((q_ct1 221 | .memcpy(pos.z.data(), pos_d.z, 222 | params.numParticles * sizeof(coords_t)) 223 | .wait(), 224 | 0)); 225 | 226 | /* 227 | DPCT1003:18: Migrated API does not return error code. (*, 0) is inserted. 228 | You may need to rewrite this code. 229 | */ 230 | gpuErrchk((q_ct1 231 | .memcpy(vel.x.data(), vel_d.x, 232 | params.numParticles * sizeof(coords_t)) 233 | .wait(), 234 | 0)); 235 | /* 236 | DPCT1003:19: Migrated API does not return error code. (*, 0) is inserted. 237 | You may need to rewrite this code. 238 | */ 239 | gpuErrchk((q_ct1 240 | .memcpy(vel.y.data(), vel_d.y, 241 | params.numParticles * sizeof(coords_t)) 242 | .wait(), 243 | 0)); 244 | /* 245 | DPCT1003:20: Migrated API does not return error code. (*, 0) is inserted. 246 | You may need to rewrite this code. 247 | */ 248 | gpuErrchk((q_ct1 249 | .memcpy(vel.z.data(), vel_d.z, 250 | params.numParticles * sizeof(coords_t)) 251 | .wait(), 252 | 0)); 253 | /* 254 | DPCT1003:21: Migrated API does not return error code. (*, 0) is inserted. 255 | You may need to rewrite this code. 256 | */ 257 | gpuErrchk((dev_ct1.queues_wait_and_throw(), 0)); 258 | } 259 | 260 | void DiskGalaxySimulator::randomParticlePos() { 261 | // deterministic - default seed 262 | std::mt19937 gen; 263 | std::uniform_real_distribution<> dis(0.0, 1.0); 264 | 265 | // Disk shape in x-y plane 266 | for (int i = 0; i < params.numParticles; i++) { 267 | float t = dis(gen) * 2 * PI; 268 | float s = dis(gen) * 100; 269 | pos.x[i] = cos(t) * s; 270 | pos.y[i] = sin(t) * s; 271 | } 272 | 273 | // Z component is independent (uniform range 0-4) 274 | std::generate(begin(pos.z), end(pos.z), 275 | [&gen, &dis]() { return 4.0 * dis(gen); }); 276 | } 277 | 278 | void DiskGalaxySimulator::initialParticleVel() { 279 | for (int i = 0; i < params.numParticles; i++) { 280 | vec3 vel = cross({pos.x[i], pos.y[i], pos.z[i]}, {0.0, 0.0, 1.0}); 281 | coords_t orbital_vel = std::sqrt(2.0 * length(vel)); 282 | vel = normalize(vel) * orbital_vel; 283 | this->vel.x[i] = vel.x; 284 | this->vel.y[i] = vel.y; 285 | this->vel.z[i] = vel.z; 286 | } 287 | } 288 | 289 | const ParticleData& DiskGalaxySimulator::getParticlePos() { return pos; }; 290 | 291 | const ParticleData& DiskGalaxySimulator::getParticleVel() { return vel; }; 292 | 293 | // Linear Algebra functions (not yet exposed in header) 294 | HOSTDEV vec3 cross(const vec3 v0, const vec3 v1) { 295 | return vec3(v0.y * v1.z - v0.z * v1.y, v0.z * v1.x - v0.x * v1.z, 296 | v0.x * v1.y - v0.y * v1.x); 297 | }; 298 | 299 | HOSTDEV coords_t length(const vec3 v) { 300 | return sycl::sqrt(v.x * v.x + v.y * v.y + v.z * v.z); 301 | } 302 | 303 | HOSTDEV vec3 normalize(const vec3 v) { 304 | vec3 result = v; 305 | coords_t len = length(v); 306 | result.x /= len; 307 | result.y /= len; 308 | result.z /= len; 309 | return result; 310 | } 311 | 312 | /* O(n^2) implementation (no distance threshold), with no shared 313 | memory etc. 314 | */ 315 | template 316 | void particle_interaction(ParticleData_d pPos, 317 | ParticleData_d pNextPos, 318 | ParticleData_d pVel, SimParam params, 319 | const sycl::nd_item<1> &item_ct1) { 320 | int id = item_ct1.get_local_id(0) + 321 | (item_ct1.get_group(0) * item_ct1.get_local_range(0)); 322 | if (id >= params.numParticles) return; 323 | 324 | vec3 force(0.0f, 0.0f, 0.0f); 325 | vec3 pos(pPos.x[id], pPos.y[id], pPos.z[id]); 326 | 327 | #pragma unroll 4 328 | for (int i = 0; i < params.numParticles; i++) { 329 | vec3 other_pos{pPos.x[i], pPos.y[i], pPos.z[i]}; 330 | vec3 r = other_pos - pos; 331 | // Fast computation of 1/(|r|^3) 332 | coords_t dist_sqr = dot(r, r) + params.distEps; 333 | coords_t inv_dist_cube = sycl::rsqrt(dist_sqr * dist_sqr * dist_sqr); 334 | 335 | // assume uniform unit mass 336 | if constexpr(ct == CalculationMethod::BRANCH) { 337 | if (i == id) continue; 338 | force += r * inv_dist_cube; 339 | } else if constexpr (ct == CalculationMethod::PREDICATED) { 340 | force += r * inv_dist_cube * (i == id); 341 | } 342 | } 343 | 344 | // Update velocity 345 | vec3 curr_vel(pVel.x[id], pVel.y[id], pVel.z[id]); 346 | curr_vel *= params.damping; 347 | curr_vel += force * params.dt * params.G; 348 | 349 | pVel.x[id] = curr_vel.x; 350 | pVel.y[id] = curr_vel.y; 351 | pVel.z[id] = curr_vel.z; 352 | 353 | // Update position (integration) 354 | vec3 curr_pos(pPos.x[id], pPos.y[id], pPos.z[id]); 355 | 356 | curr_pos += curr_vel * params.dt; 357 | pNextPos.x[id] = curr_pos.x; 358 | pNextPos.y[id] = curr_pos.y; 359 | pNextPos.z[id] = curr_pos.z; 360 | } 361 | 362 | } // namespace simulation 363 | -------------------------------------------------------------------------------- /src_sycl/simulator.dp.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2022 Codeplay Software Limited 2 | // This work is licensed under the terms of the MIT license. 3 | // For a copy, see https://opensource.org/licenses/MIT. 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "sim_param.hpp" 15 | 16 | #ifdef SYCL_LANGUAGE_VERSION 17 | #define HOSTDEV 18 | #else 19 | #define HOSTDEV 20 | #endif 21 | 22 | #define gpuErrchk(ans) \ 23 | { gpuAssert((ans), __FILE__, __LINE__); } 24 | inline void gpuAssert(dpct::err0 code, const char *file, int line, 25 | bool abort = true) { 26 | } 27 | 28 | namespace simulation { 29 | 30 | const float PI = 3.14159265358979323846; 31 | 32 | typedef float coords_t; 33 | 34 | struct vec3 { 35 | coords_t x = 0.0; 36 | coords_t y = 0.0; 37 | coords_t z = 0.0; 38 | 39 | HOSTDEV vec3() {}; 40 | HOSTDEV vec3(coords_t x_, coords_t y_, coords_t z_) 41 | : x{x_}, y{y_}, z{z_} {} 42 | 43 | HOSTDEV inline vec3 &operator+=(const vec3 &rhs) { 44 | x += rhs.x; 45 | y += rhs.y; 46 | z += rhs.z; 47 | return *this; 48 | } 49 | 50 | HOSTDEV inline vec3 &operator*=(const coords_t &scale) { 51 | x *= scale; 52 | y *= scale; 53 | z *= scale; 54 | return *this; 55 | } 56 | }; 57 | 58 | HOSTDEV inline const vec3 operator*(const vec3 &pos, const coords_t &scale) { 59 | return {pos.x * scale, pos.y * scale, pos.z * scale}; 60 | } 61 | 62 | HOSTDEV inline const vec3 operator-(const vec3 &vec1, const vec3 &vec2) { 63 | return {vec1.x - vec2.x, vec1.y - vec2.y, vec1.z - vec2.z}; 64 | } 65 | 66 | HOSTDEV inline coords_t dot(const vec3 &vec1, const vec3 &vec2) { 67 | return vec1.x * vec2.x + vec1.y * vec2.y + vec1.z * vec2.z; 68 | } 69 | 70 | struct ParticleData { 71 | std::vector x; 72 | std::vector y; 73 | std::vector z; 74 | 75 | ParticleData(std::vector x_, std::vector y_, 76 | std::vector z_) 77 | : x(std::move(x_)), y(std::move(y_)), z(std::move(z_)){}; 78 | ParticleData(size_t n) : x(n, 0.0), y(n, 0.0), z(n, 0.0){}; 79 | }; 80 | 81 | // Simply holds 3 coords_t* as a SoA 82 | struct ParticleData_d { 83 | coords_t *x = nullptr; 84 | coords_t *y = nullptr; 85 | coords_t *z = nullptr; 86 | 87 | ParticleData_d(size_t n) { 88 | dpct::device_ext &dev_ct1 = dpct::get_current_device(); 89 | sycl::queue &q_ct1 = dev_ct1.default_queue(); 90 | // Allocate device memory for particle coords & velocity... 91 | /* 92 | DPCT1003:1: Migrated API does not return error code. (*, 0) is 93 | inserted. You may need to rewrite this code. 94 | */ 95 | gpuErrchk((x = sycl::malloc_device(n, q_ct1), 0)); 96 | /* 97 | DPCT1003:2: Migrated API does not return error code. (*, 0) is 98 | inserted. You may need to rewrite this code. 99 | */ 100 | gpuErrchk((y = sycl::malloc_device(n, q_ct1), 0)); 101 | /* 102 | DPCT1003:3: Migrated API does not return error code. (*, 0) is 103 | inserted. You may need to rewrite this code. 104 | */ 105 | gpuErrchk((z = sycl::malloc_device(n, q_ct1), 0)); 106 | }; 107 | }; 108 | 109 | HOSTDEV coords_t length(const vec3 v); 110 | HOSTDEV vec3 cross(const vec3 v0, const vec3 v1); 111 | HOSTDEV vec3 normalize(const vec3 v); 112 | 113 | /* 114 | Interface class for Simulator 115 | */ 116 | class Simulator { 117 | public: 118 | virtual void stepSim() = 0; 119 | virtual size_t getNumParticles() = 0; 120 | virtual const ParticleData &getParticlePos() = 0; 121 | virtual const ParticleData &getParticleVel() = 0; 122 | virtual float getLastStepTime() = 0; 123 | virtual const std::string* getDeviceName() = 0; 124 | virtual CalculationMethod getCM() = 0; 125 | }; 126 | 127 | /* 128 | DiskGalaxySimulator class to handle execution of the nbody simulation. 129 | 130 | Regular data transfer only occurs in the device->host direction (from 131 | Simulator to Renderer). 132 | 133 | Invariants: 134 | - Has params 135 | - Has valid particle positions & velocities, allocated on host & device 136 | */ 137 | 138 | class DiskGalaxySimulator : public Simulator { 139 | public: 140 | DiskGalaxySimulator(SimParam params_); 141 | 142 | void stepSim(); 143 | float getLastStepTime() { return lastStepTime; } 144 | size_t getNumParticles() { return params.numParticles; } 145 | const ParticleData &getParticlePos(); 146 | const ParticleData &getParticleVel(); 147 | const std::string* getDeviceName(); 148 | int getGwSize() { return params.gwSize; } 149 | CalculationMethod getCM() { return params.calcMethod; } 150 | 151 | private: 152 | SimParam params; 153 | std::string devName; 154 | float lastStepTime{0.0}; 155 | 156 | // Data for particle positions & vel on host 157 | ParticleData pos; 158 | ParticleData vel; 159 | 160 | // and on device 161 | ParticleData_d pos_d; 162 | ParticleData_d pos_next_d; // double buffering 163 | ParticleData_d vel_d; 164 | 165 | void randomParticlePos(); 166 | void initialParticleVel(); 167 | void sendToDevice(); 168 | void recvFromDevice(); 169 | }; 170 | 171 | } // namespace simulation 172 | --------------------------------------------------------------------------------