├── .gitignore ├── .gitattributes ├── images ├── suzanne_having_a_bad_day.png └── suzanne_having_a_bad_day.png.import ├── shaders ├── unpack_verts_from_compute.tres ├── write_triangles.glsl.import ├── unpack_verts_from_compute.gdshader └── write_triangles.glsl ├── project.godot ├── monkey.obj.import ├── icon.svg ├── HelloComputeGeometry.gd ├── icon.svg.import ├── lib ├── ByteWriter.gd ├── ByteBuf.gd └── FreeLookCamera.gd ├── LICENSE.txt ├── HelloComputeGeometry.tscn ├── README.md └── ComputeGeometryOrchestrator.gd /.gitignore: -------------------------------------------------------------------------------- 1 | # Godot 4+ specific ignores 2 | .godot/ 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Normalize EOL for all files that Git considers text files. 2 | * text=auto eol=lf 3 | -------------------------------------------------------------------------------- /images/suzanne_having_a_bad_day.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gamma-delta/HelloComputeGeometry/HEAD/images/suzanne_having_a_bad_day.png -------------------------------------------------------------------------------- /shaders/unpack_verts_from_compute.tres: -------------------------------------------------------------------------------- 1 | [gd_resource type="ShaderMaterial" load_steps=2 format=3 uid="uid://cglggv6kgsvdc"] 2 | 3 | [ext_resource type="Shader" path="res://shaders/unpack_verts_from_compute.gdshader" id="1_p8oxl"] 4 | 5 | [resource] 6 | render_priority = 0 7 | shader = ExtResource("1_p8oxl") 8 | -------------------------------------------------------------------------------- /shaders/write_triangles.glsl.import: -------------------------------------------------------------------------------- 1 | [remap] 2 | 3 | importer="glsl" 4 | type="RDShaderFile" 5 | uid="uid://d1kq6fad8v7dm" 6 | path="res://.godot/imported/write_triangles.glsl-774c94df21eb4a64e0dc4e8bec2480f1.res" 7 | 8 | [deps] 9 | 10 | source_file="res://shaders/write_triangles.glsl" 11 | dest_files=["res://.godot/imported/write_triangles.glsl-774c94df21eb4a64e0dc4e8bec2480f1.res"] 12 | 13 | [params] 14 | 15 | -------------------------------------------------------------------------------- /project.godot: -------------------------------------------------------------------------------- 1 | ; Engine configuration file. 2 | ; It's best edited using the editor UI and not directly, 3 | ; since the parameters that go here are not all obvious. 4 | ; 5 | ; Format: 6 | ; [section] ; section goes between [] 7 | ; param=value ; assign values to parameters 8 | 9 | config_version=5 10 | 11 | [application] 12 | 13 | config/name="Hello-compute-geometry" 14 | run/main_scene="res://HelloComputeGeometry.tscn" 15 | config/features=PackedStringArray("4.3", "Forward Plus") 16 | config/icon="res://icon.svg" 17 | 18 | [dotnet] 19 | 20 | project/assembly_name="Hello-compute-geometry" 21 | -------------------------------------------------------------------------------- /monkey.obj.import: -------------------------------------------------------------------------------- 1 | [remap] 2 | 3 | importer="wavefront_obj" 4 | importer_version=1 5 | type="Mesh" 6 | uid="uid://hxyq1veajvti" 7 | path="res://.godot/imported/monkey.obj-4497fd28cacdf5828d7618e33c59dea2.mesh" 8 | 9 | [deps] 10 | 11 | files=["res://.godot/imported/monkey.obj-4497fd28cacdf5828d7618e33c59dea2.mesh"] 12 | 13 | source_file="res://monkey.obj" 14 | dest_files=["res://.godot/imported/monkey.obj-4497fd28cacdf5828d7618e33c59dea2.mesh", "res://.godot/imported/monkey.obj-4497fd28cacdf5828d7618e33c59dea2.mesh"] 15 | 16 | [params] 17 | 18 | generate_tangents=true 19 | scale_mesh=Vector3(1, 1, 1) 20 | offset_mesh=Vector3(0, 0, 0) 21 | optimize_mesh=true 22 | force_disable_mesh_compression=false 23 | -------------------------------------------------------------------------------- /icon.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /HelloComputeGeometry.gd: -------------------------------------------------------------------------------- 1 | extends Node3D 2 | 3 | # Driver script for the demo 4 | # All the interesting code is in ComputeGeometryOrchestrator 5 | 6 | var cgo : ComputeGeometryOrchestrator 7 | 8 | @export var mesh_displayer : MeshInstance3D 9 | @export var meshes : Array[Mesh] 10 | 11 | # Called when the node enters the scene tree for the first time. 12 | func _ready() -> void: 13 | self.cgo = ComputeGeometryOrchestrator.new(mesh_displayer, 16384, 8) 14 | self.cgo.submit_base_mesh(meshes[0]) 15 | 16 | func _input(event: InputEvent) -> void: 17 | # this is terrible, i know 18 | if event is InputEventKey and event.is_pressed(): 19 | # between '1' and '9' 20 | if 0x31 <= event.unicode and event.unicode <= 0x39: 21 | var number_index = event.unicode - 0x31 22 | if number_index < self.meshes.size(): 23 | self.cgo.submit_base_mesh(self.meshes[number_index]) 24 | 25 | func _process(delta: float) -> void: 26 | self.cgo.draw() 27 | -------------------------------------------------------------------------------- /shaders/unpack_verts_from_compute.gdshader: -------------------------------------------------------------------------------- 1 | shader_type spatial; 2 | 3 | uniform sampler2D data_in : source_color; 4 | 5 | varying vec3 vertex_norm; 6 | 7 | vec4 read_vec4(inout int idx) { 8 | ivec2 sz = textureSize(data_in, 0); 9 | vec4 v = texelFetch(data_in, ivec2(idx % sz.x, idx / sz.x), 0); 10 | idx += 1; 11 | return v; 12 | } 13 | 14 | void vertex() { 15 | int cursor = VERTEX_ID * 3; 16 | // All of these values will be zero if the triangle in question wasn't written 17 | VERTEX = read_vec4(cursor).xyz; 18 | NORMAL = read_vec4(cursor).xyz; 19 | vertex_norm = NORMAL; 20 | vec4 uv_and_scratch = read_vec4(cursor); 21 | UV = uv_and_scratch.xy; 22 | } 23 | 24 | void fragment() { 25 | ALBEDO = vertex_norm / 2.0 + 0.5; 26 | // METALLIC = 0.95; 27 | ROUGHNESS = 0.8; 28 | } 29 | 30 | //void light() { 31 | // Called for every pixel for every light affecting the material. 32 | // Uncomment to replace the default light processing function with this one. 33 | //} 34 | -------------------------------------------------------------------------------- /images/suzanne_having_a_bad_day.png.import: -------------------------------------------------------------------------------- 1 | [remap] 2 | 3 | importer="texture" 4 | type="CompressedTexture2D" 5 | uid="uid://ccbdp6rjr8jq0" 6 | path="res://.godot/imported/suzanne_having_a_bad_day.png-db8d7060ea03ca1d9ba74c0b25c83d64.ctex" 7 | metadata={ 8 | "vram_texture": false 9 | } 10 | 11 | [deps] 12 | 13 | source_file="res://images/suzanne_having_a_bad_day.png" 14 | dest_files=["res://.godot/imported/suzanne_having_a_bad_day.png-db8d7060ea03ca1d9ba74c0b25c83d64.ctex"] 15 | 16 | [params] 17 | 18 | compress/mode=0 19 | compress/high_quality=false 20 | compress/lossy_quality=0.7 21 | compress/hdr_compression=1 22 | compress/normal_map=0 23 | compress/channel_pack=0 24 | mipmaps/generate=false 25 | mipmaps/limit=-1 26 | roughness/mode=0 27 | roughness/src_normal="" 28 | process/fix_alpha_border=true 29 | process/premult_alpha=false 30 | process/normal_map_invert_y=false 31 | process/hdr_as_srgb=false 32 | process/hdr_clamp_exposure=false 33 | process/size_limit=0 34 | detect_3d/compress_to=1 35 | -------------------------------------------------------------------------------- /icon.svg.import: -------------------------------------------------------------------------------- 1 | [remap] 2 | 3 | importer="texture" 4 | type="CompressedTexture2D" 5 | uid="uid://bqmqaado7k8pi" 6 | path="res://.godot/imported/icon.svg-218a8f2b3041327d8a5756f3a245f83b.ctex" 7 | metadata={ 8 | "vram_texture": false 9 | } 10 | 11 | [deps] 12 | 13 | source_file="res://icon.svg" 14 | dest_files=["res://.godot/imported/icon.svg-218a8f2b3041327d8a5756f3a245f83b.ctex"] 15 | 16 | [params] 17 | 18 | compress/mode=0 19 | compress/high_quality=false 20 | compress/lossy_quality=0.7 21 | compress/hdr_compression=1 22 | compress/normal_map=0 23 | compress/channel_pack=0 24 | mipmaps/generate=false 25 | mipmaps/limit=-1 26 | roughness/mode=0 27 | roughness/src_normal="" 28 | process/fix_alpha_border=true 29 | process/premult_alpha=false 30 | process/normal_map_invert_y=false 31 | process/hdr_as_srgb=false 32 | process/hdr_clamp_exposure=false 33 | process/size_limit=0 34 | detect_3d/compress_to=1 35 | svg/scale=1.0 36 | editor/scale_with_editor_scale=false 37 | editor/convert_colors_with_editor_theme=false 38 | -------------------------------------------------------------------------------- /lib/ByteWriter.gd: -------------------------------------------------------------------------------- 1 | # Helper class I wrote to write values to a PackedByteArray 2 | 3 | class_name ByteWriter extends RefCounted 4 | 5 | var inner : PackedByteArray 6 | 7 | func _init(): 8 | self.inner = PackedByteArray() 9 | 10 | func write_float(f: float): 11 | var sz := self.size() 12 | self.inner.resize(sz + 4) 13 | self.inner.encode_float(sz, f) 14 | 15 | func write_int(i: int): 16 | var sz := self.size() 17 | self.inner.resize(sz + 4) 18 | self.inner.encode_s32(sz, i) 19 | 20 | func write_vec2(v: Vector2): 21 | self.write_float(v.x) 22 | self.write_float(v.y) 23 | 24 | func write_vec3(v: Vector3): 25 | self.write_float(v.x) 26 | self.write_float(v.y) 27 | self.write_float(v.z) 28 | 29 | func write_vec4(v: Vector4): 30 | self.write_float(v.x) 31 | self.write_float(v.y) 32 | self.write_float(v.z) 33 | self.write_float(v.w) 34 | 35 | func write_pba(pba: PackedByteArray): 36 | self.inner.append_array(pba) 37 | 38 | func skip(bytes: int): 39 | self.inner.resize(self.size() + bytes) 40 | 41 | func size() -> int: return self.inner.size() 42 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | The MIT License (MIT) 3 | Copyright © 2024 petrak@ 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software 6 | and associated documentation files (the “Software”), to deal in the Software without 7 | restriction, including without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all copies or 12 | substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING 15 | BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 16 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 17 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /lib/ByteBuf.gd: -------------------------------------------------------------------------------- 1 | # Helper class I wrote to read values out of a PackedByteArray 2 | 3 | class_name ByteBuf extends RefCounted 4 | 5 | var inner : PackedByteArray 6 | var cursor := 0 7 | 8 | func _init(inner: PackedByteArray): 9 | self.inner = inner 10 | 11 | func read_float() -> float: 12 | var out := self.inner.decode_float(cursor) 13 | cursor += 4 14 | return out 15 | 16 | func read_int() -> int: 17 | var out := self.inner.decode_s32(cursor) 18 | cursor += 4 19 | return out 20 | 21 | func read_vec2() -> Vector2: 22 | var x := self.read_float() 23 | var y := self.read_float() 24 | return Vector2(x, y) 25 | 26 | func read_vec3() -> Vector3: 27 | var x := self.read_float() 28 | var y := self.read_float() 29 | var z := self.read_float() 30 | return Vector3(x, y, z) 31 | 32 | # Read a vec4 and drop a float 33 | func read_vec3_opengl() -> Vector3: 34 | var out := self.read_vec3() 35 | self.read_float() 36 | return out 37 | 38 | func read_vec4() -> Vector4: 39 | var x := self.read_float() 40 | var y := self.read_float() 41 | var z := self.read_float() 42 | var w := self.read_float() 43 | return Vector4(x, y, z, w) 44 | 45 | func skip(bytes: int): 46 | self.cursor += bytes 47 | 48 | func size() -> int: return self.inner.size() 49 | func bytes_remaining() -> int: return self.size() - self.cursor 50 | func finished() -> bool: return self.bytes_remaining() == 0 51 | -------------------------------------------------------------------------------- /HelloComputeGeometry.tscn: -------------------------------------------------------------------------------- 1 | [gd_scene load_steps=16 format=3 uid="uid://cw3t51s6rf6bf"] 2 | 3 | [ext_resource type="Script" path="res://HelloComputeGeometry.gd" id="1_gffpr"] 4 | [ext_resource type="Shader" path="res://shaders/unpack_verts_from_compute.gdshader" id="2_baoxi"] 5 | [ext_resource type="ArrayMesh" uid="uid://hxyq1veajvti" path="res://monkey.obj" id="2_lb38n"] 6 | [ext_resource type="Script" path="res://lib/FreeLookCamera.gd" id="2_vtr0j"] 7 | 8 | [sub_resource type="TorusMesh" id="TorusMesh_hlklk"] 9 | 10 | [sub_resource type="BoxMesh" id="BoxMesh_l3bqw"] 11 | subdivide_width = 4 12 | subdivide_height = 5 13 | subdivide_depth = 6 14 | 15 | [sub_resource type="SphereMesh" id="SphereMesh_wc02f"] 16 | 17 | [sub_resource type="CapsuleMesh" id="CapsuleMesh_p6wqt"] 18 | 19 | [sub_resource type="SystemFont" id="SystemFont_bwi4p"] 20 | font_names = PackedStringArray("Monospace") 21 | 22 | [sub_resource type="TextMesh" id="TextMesh_qrv5y"] 23 | text = "awagga" 24 | font = SubResource("SystemFont_bwi4p") 25 | font_size = 127 26 | depth = 0.5 27 | 28 | [sub_resource type="ShaderMaterial" id="ShaderMaterial_c17xg"] 29 | render_priority = 0 30 | shader = ExtResource("2_baoxi") 31 | 32 | [sub_resource type="PlaneMesh" id="PlaneMesh_e7dlk"] 33 | size = Vector2(32, 32) 34 | subdivide_width = 32 35 | subdivide_depth = 32 36 | 37 | [sub_resource type="ProceduralSkyMaterial" id="ProceduralSkyMaterial_xdxu5"] 38 | 39 | [sub_resource type="Sky" id="Sky_cs3x1"] 40 | sky_material = SubResource("ProceduralSkyMaterial_xdxu5") 41 | 42 | [sub_resource type="Environment" id="Environment_vurpw"] 43 | sky = SubResource("Sky_cs3x1") 44 | 45 | [node name="RootNode" type="Node3D" node_paths=PackedStringArray("mesh_displayer")] 46 | script = ExtResource("1_gffpr") 47 | mesh_displayer = NodePath("World/MeshDisplay") 48 | meshes = Array[Mesh]([SubResource("TorusMesh_hlklk"), SubResource("BoxMesh_l3bqw"), SubResource("SphereMesh_wc02f"), SubResource("CapsuleMesh_p6wqt"), SubResource("TextMesh_qrv5y"), ExtResource("2_lb38n")]) 49 | 50 | [node name="World" type="Node3D" parent="."] 51 | 52 | [node name="MeshDisplay" type="MeshInstance3D" parent="World"] 53 | material_override = SubResource("ShaderMaterial_c17xg") 54 | custom_aabb = AABB(-10, -10, -10, 20, 20, 20) 55 | ignore_occlusion_culling = true 56 | skeleton = NodePath("../..") 57 | 58 | [node name="Ground" type="MeshInstance3D" parent="World"] 59 | transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, -1.5, 0) 60 | mesh = SubResource("PlaneMesh_e7dlk") 61 | 62 | [node name="WorldEnvironment" type="WorldEnvironment" parent="World"] 63 | environment = SubResource("Environment_vurpw") 64 | 65 | [node name="DirectionalLight3D" type="DirectionalLight3D" parent="."] 66 | transform = Transform3D(0.00898153, 0.774161, -0.632925, -0.0109846, 0.632989, 0.774083, 0.999899, -1.90903e-08, 0.0141891, -6.55135, 2.49434, 0) 67 | shadow_enabled = true 68 | 69 | [node name="Camera3D" type="Camera3D" parent="."] 70 | transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 3.69079) 71 | current = true 72 | script = ExtResource("2_vtr0j") 73 | 74 | [node name="SpotLight3D" type="SpotLight3D" parent="Camera3D"] 75 | shadow_enabled = true 76 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hello, Compute Geometry! 2 | 3 | ![Suzanne, the blender monkey, with each of her triangles puckering outwards.](images/suzanne_having_a_bad_day.png) 4 | 5 | This is a tech demo for mocking geometry shaders in Godot using compute shaders. 6 | Take input data, and produce a varying number of triangles from it. 7 | 8 | This particular demo takes each triangle, splits it into three, 9 | and has the center vertex move in and out. 10 | If you run the demo, you can use WASDQE to move, and hold right-click to pan (like in the editor). 11 | 12 | ## How does it work? 13 | 14 | With raw access to a graphics library, you can send a buffer of vertices (or whatever) to the GPU, 15 | have a compute shader write your vertices to another buffer, and then render that directly. 16 | 17 | However, [Godot doesn't currently support rendering directly from a buffer of meshes](https://github.com/godotengine/godot-proposals/issues/5995#issuecomment-1741841412). 18 | So, this is my hackneyed solution: 19 | 20 | - Send a buffer of data to a compute shader. 21 | - In the compute shader, write the output data to a *texture*, which Godot does support sharing between shaders. 22 | In this demo I'm using an RGBA texture, so each pixel is a vec4; 23 | this means I can put a vertex's position, normal, UV, and 2 floats of scratch in 3 pixels per vertex. 24 | - Generate a dummy mesh with some giant number of arbitrary triangles. 25 | - Render the dummy mesh with a vertex shader that just ignores the input vertex and reads the vertex data out 26 | of the texture the compute shader wrote to. 27 | (Note the number of input vertices in the dummy mesh must be enough to cover all the vertices the compute shader 28 | writes! 29 | Additional unused vertices have all their values set to 0, so they don't render.) 30 | 31 | I've left comments throughout, which hopefully are helpful. 32 | I've also tried to make it clear what is necessary for this to work at all, 33 | and what is just for this particular shader. 34 | 35 | ## Structure 36 | 37 | The Godot script that orchestrates all this is `./ComputeGeometryOrchestrator.gd`. 38 | This is probably the best-commented thing in the project. 39 | 40 | The compute shader is `shaders/write_triangles.glsl`. 41 | 42 | The Godot shader that unpacks the vertices from the texture is `shaders/unpack_verts_from_compute.gdshader`. 43 | 44 | ## Citations 45 | 46 | The source code has a lot of links to stuff I found was helpful at the moment, but not all of it is the best. 47 | Here's my curated list of citations: 48 | 49 | - https://github.com/erickweil/GodotTests/tree/main/ProceduralGeometry. 50 | This was my main source; thanks Erick! 51 | - https://learnopengl.com/Guest-Articles/2022/Compute-Shaders/Introduction. 52 | Easy-to-digest rudiments of compute shaders. 53 | - https://ktstephano.github.io/rendering/opengl/prog_vtx_pulling. 54 | - https://stackoverflow.com/q/38172696. 55 | OpenGL expects the data being sent to anything on the GPU to be packed in a certain way, and it's a giant footgun. 56 | This is why there is a `_scratch` variable in the `Vertex` struct, and why I'm using `vec4`s instead of `vec3`s. 57 | - https://www.khronos.org/opengl/wiki/Compute_Shader. The wiki is kind of spotty, but sometimes helpful. 58 | 59 | If you don't understand how something works, open an issue please! 60 | I was tearing my hair out trying to figure out how to do this, and I'd love if this repo was a helpful source if you want to do the same. 61 | 62 | -------------------------------------------------------------------------------- /shaders/write_triangles.glsl: -------------------------------------------------------------------------------- 1 | #[compute] 2 | #version 460 3 | 4 | // https://ktstephano.github.io/rendering/opengl/ssbos 5 | // https://stackoverflow.com/questions/69497498/updating-vertices-from-compute-shader 6 | // https://github.com/erickweil/GodotTests/blob/main/ProceduralGeometry/procedural_geometry.glsl 7 | 8 | struct Vertex { 9 | // https://stackoverflow.com/questions/38172696/should-i-ever-use-a-vec3-inside-of-a-uniform-buffer-or-shader-storage-buffer-o/38172697#38172697 10 | // https://ktstephano.github.io/rendering/opengl/prog_vtx_pulling 11 | vec4 position; 12 | vec4 normal; 13 | vec2 uv; 14 | // SUPER IMPORTANT: YOU NEED THIS. 15 | // I am wasting a little bit of space like this, but w/e 16 | vec2 _scratch; 17 | }; 18 | 19 | // IN FLOATS 20 | #define SIZEOF_VERTEX ((4 + 4 + 2 + 2)) 21 | 22 | struct Triangle { 23 | Vertex verts[3]; 24 | }; 25 | 26 | Triangle normalsFromPositions(Triangle trongle) { 27 | Vertex verts[3] = trongle.verts; 28 | vec3 normal = normalize(cross(verts[1].position.xyz - verts[0].position.xyz, verts[2].position.xyz - verts[0].position.xyz)); 29 | verts[0].normal = vec4(normal, 0.0); 30 | verts[1].normal = vec4(normal, 0.0); 31 | verts[2].normal = vec4(normal, 0.0); 32 | return Triangle(verts); 33 | } 34 | 35 | #define SIZEOF_TRIANGLE ((3 * SIZEOF_VERTEX)) 36 | 37 | layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; 38 | 39 | // yes, the readonly goes before restrict here, but writeonly goes AFTER restrict later 40 | // beats me 41 | layout(std430, set = 0, binding = 0) readonly restrict buffer DataIn { 42 | Triangle tris[]; 43 | } DATA_IN; 44 | layout(rgba32f, set = 0, binding = 1) uniform restrict writeonly image2D IMAGE_OUT; 45 | 46 | // it turns out, this increases over EVERY work group at once. 47 | // not just one per workgroup 48 | layout(std430, set = 0, binding = 2) restrict buffer CounterBuffer { 49 | uint COUNTER; 50 | }; 51 | 52 | layout(push_constant, std430) uniform Params { 53 | uint out_tex_width; 54 | uint out_tex_height; 55 | uint max_tris_per_tri; 56 | // Everything above this line is mandatory 57 | // I also just have the time here for example purposes 58 | float time; 59 | } PARAMS; 60 | 61 | void writeVec4(uint index, vec4 v) { 62 | // pixel index equals the index, handy! 63 | imageStore(IMAGE_OUT, 64 | ivec2(index % PARAMS.out_tex_width, index / PARAMS.out_tex_width), 65 | v); 66 | } 67 | 68 | void writeVertex(uint index, Vertex vert) { 69 | writeVec4(index * 3 + 0, vert.position); 70 | writeVec4(index * 3 + 1, vert.normal); 71 | writeVec4(index * 3 + 2, vec4(vert.uv, vert._scratch)); 72 | } 73 | 74 | void writeTriangle(Triangle tri) { 75 | uint wgIdx = atomicAdd(COUNTER, 1); 76 | // if (wgIdx > PARAMS.max_tris_per_tri) return; 77 | uint triIdx = wgIdx; 78 | 79 | writeVertex(triIdx * 3 + 0, tri.verts[0]); 80 | writeVertex(triIdx * 3 + 1, tri.verts[1]); 81 | writeVertex(triIdx * 3 + 2, tri.verts[2]); 82 | } 83 | 84 | void main() { 85 | if (gl_GlobalInvocationID.x >= gl_WorkGroupSize.x * gl_NumWorkGroups.x) return; 86 | 87 | Triangle tri = DATA_IN.tris[gl_GlobalInvocationID.x]; 88 | 89 | // Your interesting user code goes here. 90 | 91 | /* 92 | 0 93 | /|\ 94 | / X \ 95 | / / \ \ 96 | 1-------2 97 | */ 98 | 99 | vec4 center_pos = (tri.verts[0].position + tri.verts[1].position + tri.verts[2].position) / 3.0; 100 | vec4 avg_norm = (tri.verts[0].normal + tri.verts[1].normal + tri.verts[2].normal) / 3.0; 101 | vec4 spine_out = center_pos + avg_norm * 0.05 * cos(PARAMS.time); 102 | Vertex center = Vertex(spine_out, avg_norm, vec2(0), vec2(0)); 103 | 104 | Triangle triOut = tri; 105 | triOut.verts[0] = center; 106 | writeTriangle(normalsFromPositions(triOut)); 107 | 108 | triOut = tri; 109 | triOut.verts[1] = center; 110 | writeTriangle(normalsFromPositions(triOut)); 111 | 112 | triOut = tri; 113 | triOut.verts[2] = center; 114 | writeTriangle(normalsFromPositions(triOut)); 115 | } 116 | -------------------------------------------------------------------------------- /lib/FreeLookCamera.gd: -------------------------------------------------------------------------------- 1 | # https://github.com/adamviola/simple-free-look-camera/blob/master/camera.gd 2 | class_name FreeLookCamera extends Camera3D 3 | 4 | # Modifier keys' speed multiplier 5 | const SHIFT_MULTIPLIER = 2.5 6 | const ALT_MULTIPLIER = 1.0 / SHIFT_MULTIPLIER 7 | 8 | 9 | @export_range(0.0, 1.0) var sensitivity: float = 0.25 10 | 11 | # Mouse state 12 | var _mouse_position = Vector2(0.0, 0.0) 13 | var _total_pitch = 0.0 14 | 15 | # Movement state 16 | var _direction = Vector3(0.0, 0.0, 0.0) 17 | var _velocity = Vector3(0.0, 0.0, 0.0) 18 | var _acceleration = 30 19 | var _deceleration = -10 20 | var _vel_multiplier = 4 21 | 22 | # Keyboard state 23 | var _w = false 24 | var _s = false 25 | var _a = false 26 | var _d = false 27 | var _q = false 28 | var _e = false 29 | var _shift = false 30 | var _alt = false 31 | 32 | func _input(event): 33 | # Receives mouse motion 34 | if event is InputEventMouseMotion: 35 | _mouse_position = event.relative 36 | 37 | # Receives mouse button input 38 | if event is InputEventMouseButton: 39 | match event.button_index: 40 | MOUSE_BUTTON_RIGHT: # Only allows rotation if right click down 41 | Input.set_mouse_mode(Input.MOUSE_MODE_CAPTURED if event.pressed else Input.MOUSE_MODE_VISIBLE) 42 | MOUSE_BUTTON_WHEEL_UP: # Increases max velocity 43 | _vel_multiplier = clamp(_vel_multiplier * 1.1, 0.2, 20) 44 | MOUSE_BUTTON_WHEEL_DOWN: # Decereases max velocity 45 | _vel_multiplier = clamp(_vel_multiplier / 1.1, 0.2, 20) 46 | 47 | # Receives key input 48 | if event is InputEventKey: 49 | match event.keycode: 50 | KEY_W: 51 | _w = event.pressed 52 | KEY_S: 53 | _s = event.pressed 54 | KEY_A: 55 | _a = event.pressed 56 | KEY_D: 57 | _d = event.pressed 58 | KEY_Q: 59 | _q = event.pressed 60 | KEY_E: 61 | _e = event.pressed 62 | KEY_SHIFT: 63 | _shift = event.pressed 64 | KEY_ALT: 65 | _alt = event.pressed 66 | 67 | # Updates mouselook and movement every frame 68 | func _process(delta): 69 | _update_mouselook() 70 | _update_movement(delta) 71 | 72 | # Updates camera movement 73 | func _update_movement(delta): 74 | # Computes desired direction from key states 75 | _direction = Vector3( 76 | (_d as float) - (_a as float), 77 | (_e as float) - (_q as float), 78 | (_s as float) - (_w as float) 79 | ) 80 | 81 | # Computes the change in velocity due to desired direction and "drag" 82 | # The "drag" is a constant acceleration on the camera to bring it's velocity to 0 83 | var offset = _direction.normalized() * _acceleration * _vel_multiplier * delta \ 84 | + _velocity.normalized() * _deceleration * _vel_multiplier * delta 85 | 86 | # Compute modifiers' speed multiplier 87 | var speed_multi = 1 88 | if _shift: speed_multi *= SHIFT_MULTIPLIER 89 | if _alt: speed_multi *= ALT_MULTIPLIER 90 | 91 | # Checks if we should bother translating the camera 92 | if _direction == Vector3.ZERO and offset.length_squared() > _velocity.length_squared(): 93 | # Sets the velocity to 0 to prevent jittering due to imperfect deceleration 94 | _velocity = Vector3.ZERO 95 | else: 96 | # Clamps speed to stay within maximum value (_vel_multiplier) 97 | _velocity.x = clamp(_velocity.x + offset.x, -_vel_multiplier, _vel_multiplier) 98 | _velocity.y = clamp(_velocity.y + offset.y, -_vel_multiplier, _vel_multiplier) 99 | _velocity.z = clamp(_velocity.z + offset.z, -_vel_multiplier, _vel_multiplier) 100 | 101 | translate(_velocity * delta * speed_multi) 102 | 103 | # Updates mouse look 104 | func _update_mouselook(): 105 | # Only rotates mouse if the mouse is captured 106 | if Input.get_mouse_mode() == Input.MOUSE_MODE_CAPTURED: 107 | _mouse_position *= sensitivity 108 | var yaw = _mouse_position.x 109 | var pitch = _mouse_position.y 110 | _mouse_position = Vector2(0, 0) 111 | 112 | # Prevents looking up/down too far 113 | pitch = clamp(pitch, -90 - _total_pitch, 90 - _total_pitch) 114 | _total_pitch += pitch 115 | 116 | rotate_y(deg_to_rad(-yaw)) 117 | rotate_object_local(Vector3(1,0,0), deg_to_rad(-pitch)) 118 | -------------------------------------------------------------------------------- /ComputeGeometryOrchestrator.gd: -------------------------------------------------------------------------------- 1 | class_name ComputeGeometryOrchestrator extends RefCounted 2 | 3 | # The main logic goes here. 4 | # The script on the RootNode changes the mesh input on number keys 5 | 6 | var mesh_displayer : MeshInstance3D 7 | 8 | # Handy pointer to the main rendering device 9 | var rd : RenderingDevice 10 | # To avoid having a billion variables everywhere, all the RIDs are stored in here 11 | var compute_stuff := {} 12 | 13 | var in_buf_scratch := PackedByteArray() 14 | var shader_texture : Texture2DRD 15 | 16 | var mesh_in_dirty := false 17 | var queued_push_data := PackedByteArray() 18 | var texture_size : int 19 | var actual_tri_count : int 20 | 21 | # constants across the life of the object 22 | var max_triangles : int 23 | var max_generated_tris_per_tri : int 24 | 25 | const SIZE_OF_FLOAT := 4 26 | const SIZE_OF_VERTEX := 12 * SIZE_OF_FLOAT 27 | const SIZE_OF_TRI := SIZE_OF_VERTEX * 3 28 | 29 | func _init(mesh_displayer: MeshInstance3D, max_triangles: int, max_generated_tris_per_tri: int) -> void: 30 | self.mesh_displayer = mesh_displayer 31 | self.max_triangles = max_triangles 32 | self.max_generated_tris_per_tri = max_generated_tris_per_tri 33 | 34 | var max_out_tris := max_triangles * max_generated_tris_per_tri 35 | var max_out_v4s := max_out_tris * 9 36 | self.texture_size = ceili(sqrt(max_out_v4s)) 37 | # self.texture_size = 1024 38 | 39 | print("texture size: ", self.texture_size) 40 | var shader_mat := load("res://shaders/unpack_verts_from_compute.tres") 41 | mesh_displayer.material_override = shader_mat 42 | self.shader_texture = Texture2DRD.new() 43 | shader_mat.set_shader_parameter("data_in", self.shader_texture) 44 | 45 | func submit_base_mesh(mesh: Mesh) -> void: 46 | self.actual_tri_count = format_mesh(mesh, self.in_buf_scratch) 47 | print("triangles: ", self.actual_tri_count) 48 | if self.actual_tri_count > self.max_triangles: 49 | push_error("Too many triangles! This mesh has ", 50 | self.actual_tri_count, "but only set up to handle ", self.max_triangles) 51 | # This is a totally solvable problem; 52 | # for example every time you got more triangles than you could handle 53 | # you could reallocate the buffer & out image to be as big as you needed, 54 | # or twice as big, or whatever. 55 | # Sending the data to the GPU is pretty fast these days. 56 | self.mesh_in_dirty = true 57 | 58 | func draw(): 59 | if self.rd == null: 60 | # Some of this setup apparently has to be done once it's ready to draw, 61 | # not in init. beats me 62 | RenderingServer.call_on_render_thread(self._init_gpu) 63 | 64 | RenderingServer.call_on_render_thread(self._compute_frame) 65 | self.mesh_in_dirty = false 66 | 67 | func _init_gpu(): 68 | self.rd = RenderingServer.get_rendering_device() 69 | 70 | var shader_file := load("res://shaders/write_triangles.glsl") 71 | var shader_spirv : RDShaderSPIRV = shader_file.get_spirv() 72 | var shader_rid := rd.shader_create_from_spirv(shader_spirv) 73 | 74 | # we are going to update this every frame, but we can't init it with an empty buffer 75 | # also, it has problems reallocating 76 | # so just give it a huge amount idfc 77 | self.in_buf_scratch.resize(self.max_triangles * SIZE_OF_TRI) 78 | var in_buf_rid := self.rd.storage_buffer_create(self.in_buf_scratch.size(), self.in_buf_scratch) 79 | var atomics_at_home_rid := self.rd.storage_buffer_create(4, PackedByteArray([0, 0, 0, 0])) 80 | 81 | var tex_format := RDTextureFormat.new() 82 | tex_format.format = RenderingDevice.DATA_FORMAT_R32G32B32A32_SFLOAT 83 | tex_format.width = self.texture_size 84 | tex_format.height = self.texture_size 85 | tex_format.depth = 1 86 | tex_format.usage_bits = \ 87 | RenderingDevice.TEXTURE_USAGE_SAMPLING_BIT \ 88 | | RenderingDevice.TEXTURE_USAGE_STORAGE_BIT \ 89 | | RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT \ 90 | | RenderingDevice.TEXTURE_USAGE_CAN_COPY_FROM_BIT \ 91 | | RenderingDevice.TEXTURE_USAGE_CAN_COPY_TO_BIT 92 | tex_format.mipmaps = 1 93 | tex_format.samples = RenderingDevice.TEXTURE_SAMPLES_1 94 | var out_tex_rid := self.rd.texture_create(tex_format, RDTextureView.new()) 95 | self.rd.texture_clear(out_tex_rid, Color.TRANSPARENT, 0, 1, 0, 1) 96 | 97 | # First uniform: a storage buffer of our vertices 98 | var in_buf_uniform := RDUniform.new() 99 | in_buf_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER 100 | in_buf_uniform.binding = 0 # lines up with the binding=0 in the glsl 101 | in_buf_uniform.add_id(in_buf_rid) 102 | # Second uniform: the texture we write our vertex data to 103 | var out_tex_uniform := RDUniform.new() 104 | out_tex_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_IMAGE 105 | out_tex_uniform.binding = 1 106 | out_tex_uniform.add_id(out_tex_rid) 107 | # We want to be able to count the number of triangles every invocation has written altogether 108 | # For that, we use an atomic! Or rather we use atomic operations on an int. 109 | # This is the CounterBuffer in the glsl. 110 | # It has to be a separate buffer so it can be read/write 111 | var atomics_at_home_uniform := RDUniform.new() 112 | atomics_at_home_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER 113 | atomics_at_home_uniform.binding = 2 114 | atomics_at_home_uniform.add_id(atomics_at_home_rid) 115 | 116 | # the 0 lines up with the set=0 in glsl 117 | var uniform_set := rd.uniform_set_create([ 118 | in_buf_uniform, out_tex_uniform, atomics_at_home_uniform, 119 | ], shader_rid, 0) 120 | var pipeline := rd.compute_pipeline_create(shader_rid) 121 | assert(pipeline.is_valid()) 122 | 123 | self.compute_stuff["shader_rid"] = shader_rid 124 | self.compute_stuff["in_buf_rid"] = in_buf_rid 125 | self.compute_stuff["out_tex_rid"] = out_tex_rid 126 | self.compute_stuff["atomics_at_home_rid"] = atomics_at_home_rid 127 | self.compute_stuff["uniform_set"] = uniform_set 128 | self.compute_stuff["pipeline"] = pipeline 129 | 130 | # This is the "Secret sauce" 131 | # This makes the Godot texture read from the .gdshader and the GLSL texture 132 | # written from the .compute.glsl be the same image 133 | self.shader_texture.texture_rd_rid = self.compute_stuff["out_tex_rid"] 134 | 135 | self.mesh_displayer.mesh = self._make_dummy_arraymesh() 136 | 137 | func _compute_frame(): 138 | if self.mesh_in_dirty: 139 | # We have to clear the entire input vertex buffer, otherwise old triangles will be left over, 140 | # because buffer_update only overwrites as many bytes as is given. 141 | # I'm not sure if this is slow; possibly you could extend in_buf_scratch to be the size 142 | # of the whole GPU-side buffer and do it all in one operation 143 | rd.buffer_clear(self.compute_stuff["in_buf_rid"], 0, self.max_triangles * SIZE_OF_TRI) 144 | rd.buffer_update(self.compute_stuff["in_buf_rid"], 0, self.in_buf_scratch.size(), self.in_buf_scratch) 145 | # reset counter 146 | rd.buffer_update(self.compute_stuff["atomics_at_home_rid"], 0, 4, PackedByteArray([0, 0, 0, 0])) 147 | # reset output texture 148 | # transparent = all zeroes 149 | rd.texture_clear(self.compute_stuff["out_tex_rid"], Color.TRANSPARENT, 0, 1, 0, 1) 150 | 151 | # Write the "push list" data, which is a way to send a small amount of data to the GPU 152 | # without needing a uniform. 153 | # https://vkguide.dev/docs/chapter-3/push_constants/ 154 | # This corresponds with the push_constant block in the glsl file 155 | var push_list := ByteWriter.new() 156 | push_list.write_int(self.texture_size) 157 | push_list.write_int(self.texture_size) 158 | push_list.write_int(self.max_generated_tris_per_tri) 159 | # turns out that Time.get_unix_time_from_system just straight-up doesn't work 160 | push_list.write_float(Time.get_ticks_msec() / 1000.0) 161 | 162 | # set up the computation! 163 | var compute_list := rd.compute_list_begin() 164 | # boring stuff 165 | rd.compute_list_bind_compute_pipeline(compute_list, self.compute_stuff["pipeline"]) 166 | rd.compute_list_bind_uniform_set(compute_list, self.compute_stuff["uniform_set"], 0) 167 | rd.compute_list_set_push_constant(compute_list, push_list.inner, push_list.size()) 168 | # set up the dispatch! 169 | # In the shader, I've set up a local size of 32 170 | # this means every workgroup processes 32 triangles 171 | var needed_dispatch_count : int = self.max_triangles / 32 172 | rd.compute_list_dispatch(compute_list, needed_dispatch_count, 1, 1) 173 | rd.compute_list_end() 174 | 175 | # https://github.com/erickweil/GodotTests/blob/38237af0bd88dfcc39ec2480fbb84a674ab7c9e2/ProceduralGeometry/ProceduralGeometry.cs#L123 176 | func _make_dummy_arraymesh(): 177 | var tri_count := self.max_triangles * self.max_generated_tris_per_tri 178 | var verts := PackedVector3Array() 179 | # three verts per tri 180 | for i in range(0, tri_count * 3): 181 | verts.append(Vector3.ZERO) 182 | 183 | var surfaces := [] 184 | surfaces.resize(ArrayMesh.ARRAY_MAX) 185 | surfaces[ArrayMesh.ARRAY_VERTEX] = verts 186 | 187 | var mesh := ArrayMesh.new() 188 | mesh.add_surface_from_arrays(Mesh.PRIMITIVE_TRIANGLES, surfaces) 189 | return mesh 190 | 191 | func _display_mat() -> ShaderMaterial: 192 | return self.mesh_displayer.material_override as ShaderMaterial 193 | 194 | # Returns the number of triangles found 195 | static func format_mesh(mesh: Mesh, tris_buf: PackedByteArray) -> int: 196 | var surf1 := mesh.surface_get_arrays(0) 197 | var vertices : PackedVector3Array = surf1[Mesh.ARRAY_VERTEX] 198 | var normals : PackedVector3Array = surf1[Mesh.ARRAY_NORMAL] 199 | var has_uvs := surf1[Mesh.ARRAY_TEX_UV] != null 200 | var uvs : PackedVector2Array = surf1[Mesh.ARRAY_TEX_UV] if has_uvs else PackedVector2Array() 201 | var indices : PackedInt32Array = surf1[Mesh.ARRAY_INDEX] 202 | 203 | tris_buf.clear() 204 | # we have to de-index the array here. 205 | for i in range(0, indices.size()): 206 | var index := indices[i] 207 | var vert := vertices[index] 208 | var norm := normals[index] 209 | var uv := uvs[index] if has_uvs else Vector2.ZERO 210 | 211 | # Must be laid out the same as in the glsl 212 | var float_arr := PackedFloat32Array([ 213 | vert.x, vert.y, vert.z, 0.0, 214 | norm.x, norm.y, norm.z, 0.0, 215 | uv.x, uv.y, 216 | 0.0, 0.0]) 217 | tris_buf.append_array(float_arr.to_byte_array()) 218 | 219 | return indices.size() / 3 220 | --------------------------------------------------------------------------------