├── BitangentNoise.glsl
├── BitangentNoise.hlsl
├── Develop
    ├── BitangentNoise_ref.hlsl
    ├── BitangentNoise_v0.hlsl
    └── SimplexNoise.hlsl
├── LICENSE
├── README.md
└── image.png


/BitangentNoise.glsl:
--------------------------------------------------------------------------------
  1 | //	--------------------------------------------------------------------
  2 | //	Optimized implementation of 3D/4D bitangent noise.
  3 | //	Based on stegu's simplex noise: https://github.com/stegu/webgl-noise.
  4 | //	Contact : atyuwen@gmail.com
  5 | //	Author : Yuwen Wu (https://atyuwen.github.io/)
  6 | //	License : Distributed under the MIT License.
  7 | //	--------------------------------------------------------------------
  8 | 
  9 | // Permuted congruential generator (only top 16 bits are well shuffled).
 10 | // References: 1. Mark Jarzynski and Marc Olano, "Hash Functions for GPU Rendering".
 11 | //             2. UnrealEngine/Random.ush. https://github.com/EpicGames/UnrealEngine
 12 | uvec2 _pcg3d16(uvec3 p)
 13 | {
 14 | 	uvec3 v = p * 1664525u + 1013904223u;
 15 | 	v.x += v.y*v.z; v.y += v.z*v.x; v.z += v.x*v.y;
 16 | 	v.x += v.y*v.z; v.y += v.z*v.x;
 17 | 	return v.xy;
 18 | }
 19 | uvec2 _pcg4d16(uvec4 p)
 20 | {
 21 | 	uvec4 v = p * 1664525u + 1013904223u;
 22 | 	v.x += v.y*v.w; v.y += v.z*v.x; v.z += v.x*v.y; v.w += v.y*v.z;
 23 | 	v.x += v.y*v.w; v.y += v.z*v.x;
 24 | 	return v.xy;
 25 | }
 26 | 
 27 | // Get random gradient from hash value.
 28 | vec3 _gradient3d(uint hash)
 29 | {
 30 | 	vec3 g = vec3(uvec3(hash) & uvec3(0x80000, 0x40000, 0x20000));
 31 | 	return g * (1.0 / vec3(0x40000, 0x20000, 0x10000)) - 1.0;
 32 | }
 33 | vec4 _gradient4d(uint hash)
 34 | {
 35 | 	vec4 g = vec4(uvec4(hash) & uvec4(0x80000, 0x40000, 0x20000, 0x10000));
 36 | 	return g * (1.0 / vec4(0x40000, 0x20000, 0x10000, 0x8000)) - 1.0;
 37 | }
 38 | 
 39 | // Optimized 3D Bitangent Noise. Approximately 113 instruction slots used.
 40 | // Assume p is in the range [-32768, 32767].
 41 | vec3 BitangentNoise3D(vec3 p)
 42 | {
 43 | 	const vec2 C = vec2(1.0 / 6.0, 1.0 / 3.0);
 44 | 	const vec4 D = vec4(0.0, 0.5, 1.0, 2.0);
 45 | 
 46 | 	// First corner
 47 | 	vec3 i = floor(p + dot(p, C.yyy));
 48 | 	vec3 x0 = p - i + dot(i, C.xxx);
 49 | 
 50 | 	// Other corners
 51 | 	vec3 g = step(x0.yzx, x0.xyz);
 52 | 	vec3 l = 1.0 - g;
 53 | 	vec3 i1 = min(g.xyz, l.zxy);
 54 | 	vec3 i2 = max(g.xyz, l.zxy);
 55 | 
 56 | 	// x0 = x0 - 0.0 + 0.0 * C.xxx;
 57 | 	// x1 = x0 - i1  + 1.0 * C.xxx;
 58 | 	// x2 = x0 - i2  + 2.0 * C.xxx;
 59 | 	// x3 = x0 - 1.0 + 3.0 * C.xxx;
 60 | 	vec3 x1 = x0 - i1 + C.xxx;
 61 | 	vec3 x2 = x0 - i2 + C.yyy; // 2.0*C.x = 1/3 = C.y
 62 | 	vec3 x3 = x0 - D.yyy;      // -1.0+3.0*C.x = -0.5 = -D.y
 63 | 
 64 | 	i = i + 32768.5;
 65 | 	uvec2 hash0 = _pcg3d16(uvec3(i));
 66 | 	uvec2 hash1 = _pcg3d16(uvec3(i + i1));
 67 | 	uvec2 hash2 = _pcg3d16(uvec3(i + i2));
 68 | 	uvec2 hash3 = _pcg3d16(uvec3(i + 1.0 ));
 69 | 
 70 | 	vec3 p00 = _gradient3d(hash0.x); vec3 p01 = _gradient3d(hash0.y);
 71 | 	vec3 p10 = _gradient3d(hash1.x); vec3 p11 = _gradient3d(hash1.y);
 72 | 	vec3 p20 = _gradient3d(hash2.x); vec3 p21 = _gradient3d(hash2.y);
 73 | 	vec3 p30 = _gradient3d(hash3.x); vec3 p31 = _gradient3d(hash3.y);
 74 | 
 75 | 	// Calculate noise gradients.
 76 | 	vec4 m = clamp(0.5 - vec4(dot(x0, x0), dot(x1, x1), dot(x2, x2), dot(x3, x3)), 0.0, 1.0);
 77 | 	vec4 mt = m * m;
 78 | 	vec4 m4 = mt * mt;
 79 | 
 80 | 	mt = mt * m;
 81 | 	vec4 pdotx = vec4(dot(p00, x0), dot(p10, x1), dot(p20, x2), dot(p30, x3));
 82 | 	vec4 temp = mt * pdotx;
 83 | 	vec3 gradient0 = -8.0 * (temp.x * x0 + temp.y * x1 + temp.z * x2 + temp.w * x3);
 84 | 	gradient0 += m4.x * p00 + m4.y * p10 + m4.z * p20 + m4.w * p30;
 85 | 
 86 | 	pdotx = vec4(dot(p01, x0), dot(p11, x1), dot(p21, x2), dot(p31, x3));
 87 | 	temp = mt * pdotx;
 88 | 	vec3 gradient1 = -8.0 * (temp.x * x0 + temp.y * x1 + temp.z * x2 + temp.w * x3);
 89 | 	gradient1 += m4.x * p01 + m4.y * p11 + m4.z * p21 + m4.w * p31;
 90 | 
 91 | 	// The cross products of two gradients is divergence free.
 92 | 	return cross(gradient0, gradient1) * 3918.76;
 93 | }
 94 | 
 95 | // 4D Bitangent noise. Approximately 163 instruction slots used.
 96 | // Assume p is in the range [-32768, 32767].
 97 | vec3 BitangentNoise4D(vec4 p)
 98 | {
 99 | 	const vec4 F4 = vec4( 0.309016994374947451 );
100 | 	const vec4  C = vec4( 0.138196601125011,  // (5 - sqrt(5))/20  G4
101 | 	                      0.276393202250021,  // 2 * G4
102 | 	                      0.414589803375032,  // 3 * G4
103 | 	                     -0.447213595499958 ); // -1 + 4 * G4
104 | 
105 | 	// First corner
106 | 	vec4 i  = floor(p + dot(p, F4) );
107 | 	vec4 x0 = p -   i + dot(i, C.xxxx);
108 | 
109 | 	// Other corners
110 | 
111 | 	// Rank sorting originally contributed by Bill Licea-Kane, AMD (formerly ATI)
112 | 	vec4 i0;
113 | 	vec3 isX = step( x0.yzw, x0.xxx );
114 | 	vec3 isYZ = step( x0.zww, x0.yyz );
115 | 	// i0.x = dot( isX, vec3( 1.0 ) );
116 | 	i0.x = isX.x + isX.y + isX.z;
117 | 	i0.yzw = 1.0 - isX;
118 | 	// i0.y += dot( isYZ.xy, vec2( 1.0 ) );
119 | 	i0.y += isYZ.x + isYZ.y;
120 | 	i0.zw += 1.0 - isYZ.xy;
121 | 	i0.z += isYZ.z;
122 | 	i0.w += 1.0 - isYZ.z;
123 | 
124 | 	// i0 now contains the unique values 0,1,2,3 in each channel
125 | 	vec4 i3 = clamp( i0, 0.0, 1.0 );
126 | 	vec4 i2 = clamp( i0 - 1.0, 0.0, 1.0 );
127 | 	vec4 i1 = clamp( i0 - 2.0, 0.0, 1.0 );
128 | 
129 | 	// x0 = x0 - 0.0 + 0.0 * C.xxxx
130 | 	// x1 = x0 - i1  + 1.0 * C.xxxx
131 | 	// x2 = x0 - i2  + 2.0 * C.xxxx
132 | 	// x3 = x0 - i3  + 3.0 * C.xxxx
133 | 	// x4 = x0 - 1.0 + 4.0 * C.xxxx
134 | 	vec4 x1 = x0 - i1 + C.xxxx;
135 | 	vec4 x2 = x0 - i2 + C.yyyy;
136 | 	vec4 x3 = x0 - i3 + C.zzzz;
137 | 	vec4 x4 = x0 + C.wwww;
138 | 
139 | 	i = i + 32768.5;
140 | 	uvec2 hash0 = _pcg4d16(uvec4(i));
141 | 	uvec2 hash1 = _pcg4d16(uvec4(i + i1));
142 | 	uvec2 hash2 = _pcg4d16(uvec4(i + i2));
143 | 	uvec2 hash3 = _pcg4d16(uvec4(i + i3));
144 | 	uvec2 hash4 = _pcg4d16(uvec4(i + 1.0 ));
145 | 
146 | 	vec4 p00 = _gradient4d(hash0.x); vec4 p01 = _gradient4d(hash0.y);
147 | 	vec4 p10 = _gradient4d(hash1.x); vec4 p11 = _gradient4d(hash1.y);
148 | 	vec4 p20 = _gradient4d(hash2.x); vec4 p21 = _gradient4d(hash2.y);
149 | 	vec4 p30 = _gradient4d(hash3.x); vec4 p31 = _gradient4d(hash3.y);
150 | 	vec4 p40 = _gradient4d(hash4.x); vec4 p41 = _gradient4d(hash4.y);
151 | 
152 | 	// Calculate noise gradients.
153 | 	vec3 m0 = clamp(0.6 - vec3(dot(x0, x0), dot(x1, x1), dot(x2, x2)), 0.0, 1.0);
154 | 	vec2 m1 = clamp(0.6 - vec2(dot(x3, x3), dot(x4, x4)             ), 0.0, 1.0);
155 | 	vec3 m02 = m0 * m0; vec3 m03 = m02 * m0;
156 | 	vec2 m12 = m1 * m1; vec2 m13 = m12 * m1;
157 | 
158 | 	vec3 temp0 = m02 * vec3(dot(p00, x0), dot(p10, x1), dot(p20, x2));
159 | 	vec2 temp1 = m12 * vec2(dot(p30, x3), dot(p40, x4));
160 | 	vec4 grad0 = -6.0 * (temp0.x * x0 + temp0.y * x1 + temp0.z * x2 + temp1.x * x3 + temp1.y * x4);
161 | 	grad0 += m03.x * p00 + m03.y * p10 + m03.z * p20 + m13.x * p30 + m13.y * p40;
162 | 
163 | 	temp0 = m02 * vec3(dot(p01, x0), dot(p11, x1), dot(p21, x2));
164 | 	temp1 = m12 * vec2(dot(p31, x3), dot(p41, x4));
165 | 	vec4 grad1 = -6.0 * (temp0.x * x0 + temp0.y * x1 + temp0.z * x2 + temp1.x * x3 + temp1.y * x4);
166 | 	grad1 += m03.x * p01 + m03.y * p11 + m03.z * p21 + m13.x * p31 + m13.y * p41;
167 | 
168 | 	// The cross products of two gradients is divergence free.
169 | 	return cross(grad0.xyz, grad1.xyz) * 81.0;
170 | }
171 | 


--------------------------------------------------------------------------------
/BitangentNoise.hlsl:
--------------------------------------------------------------------------------
  1 | //	--------------------------------------------------------------------
  2 | //	Optimized implementation of 3D/4D bitangent noise.
  3 | //	Based on stegu's simplex noise: https://github.com/stegu/webgl-noise.
  4 | //	Contact : atyuwen@gmail.com
  5 | //	Author : Yuwen Wu (https://atyuwen.github.io/)
  6 | //	License : Distributed under the MIT License.
  7 | //	--------------------------------------------------------------------
  8 | 
  9 | // Permuted congruential generator (only top 16 bits are well shuffled).
 10 | // References: 1. Mark Jarzynski and Marc Olano, "Hash Functions for GPU Rendering".
 11 | //             2. UnrealEngine/Random.ush. https://github.com/EpicGames/UnrealEngine
 12 | uint2 _pcg3d16(uint3 p)
 13 | {
 14 | 	uint3 v = p * 1664525u + 1013904223u;
 15 | 	v.x += v.y*v.z; v.y += v.z*v.x; v.z += v.x*v.y;
 16 | 	v.x += v.y*v.z; v.y += v.z*v.x;
 17 | 	return v.xy;
 18 | }
 19 | uint2 _pcg4d16(uint4 p)
 20 | {
 21 | 	uint4 v = p * 1664525u + 1013904223u;
 22 | 	v.x += v.y*v.w; v.y += v.z*v.x; v.z += v.x*v.y; v.w += v.y*v.z;
 23 | 	v.x += v.y*v.w; v.y += v.z*v.x;
 24 | 	return v.xy;
 25 | }
 26 | 
 27 | // Get random gradient from hash value.
 28 | float3 _gradient3d(uint hash)
 29 | {
 30 | 	float3 g = float3(hash.xxx & uint3(0x80000, 0x40000, 0x20000));
 31 | 	return g * float3(1.0 / 0x40000, 1.0 / 0x20000, 1.0 / 0x10000) - 1.0;
 32 | }
 33 | float4 _gradient4d(uint hash)
 34 | {
 35 | 	float4 g = float4(hash.xxxx & uint4(0x80000, 0x40000, 0x20000, 0x10000));
 36 | 	return g * float4(1.0 / 0x40000, 1.0 / 0x20000, 1.0 / 0x10000, 1.0 / 0x8000) - 1.0;
 37 | }
 38 | 
 39 | // Optimized 3D Bitangent Noise. Approximately 113 instruction slots used.
 40 | // Assume p is in the range [-32768, 32767].
 41 | float3 BitangentNoise3D(float3 p)
 42 | {
 43 | 	const float2 C = float2(1.0 / 6.0, 1.0 / 3.0);
 44 | 	const float4 D = float4(0.0, 0.5, 1.0, 2.0);
 45 | 
 46 | 	// First corner
 47 | 	float3 i = floor(p + dot(p, C.yyy));
 48 | 	float3 x0 = p - i + dot(i, C.xxx);
 49 | 
 50 | 	// Other corners
 51 | 	float3 g = step(x0.yzx, x0.xyz);
 52 | 	float3 l = 1.0 - g;
 53 | 	float3 i1 = min(g.xyz, l.zxy);
 54 | 	float3 i2 = max(g.xyz, l.zxy);
 55 | 
 56 | 	// x0 = x0 - 0.0 + 0.0 * C.xxx;
 57 | 	// x1 = x0 - i1  + 1.0 * C.xxx;
 58 | 	// x2 = x0 - i2  + 2.0 * C.xxx;
 59 | 	// x3 = x0 - 1.0 + 3.0 * C.xxx;
 60 | 	float3 x1 = x0 - i1 + C.xxx;
 61 | 	float3 x2 = x0 - i2 + C.yyy; // 2.0*C.x = 1/3 = C.y
 62 | 	float3 x3 = x0 - D.yyy;      // -1.0+3.0*C.x = -0.5 = -D.y
 63 | 
 64 | 	i = i + 32768.5;
 65 | 	uint2 hash0 = _pcg3d16((uint3)i);
 66 | 	uint2 hash1 = _pcg3d16((uint3)(i + i1));
 67 | 	uint2 hash2 = _pcg3d16((uint3)(i + i2));
 68 | 	uint2 hash3 = _pcg3d16((uint3)(i + 1 ));
 69 | 
 70 | 	float3 p00 = _gradient3d(hash0.x); float3 p01 = _gradient3d(hash0.y);
 71 | 	float3 p10 = _gradient3d(hash1.x); float3 p11 = _gradient3d(hash1.y);
 72 | 	float3 p20 = _gradient3d(hash2.x); float3 p21 = _gradient3d(hash2.y);
 73 | 	float3 p30 = _gradient3d(hash3.x); float3 p31 = _gradient3d(hash3.y);
 74 | 
 75 | 	// Calculate noise gradients.
 76 | 	float4 m = saturate(0.5 - float4(dot(x0, x0), dot(x1, x1), dot(x2, x2), dot(x3, x3)));
 77 | 	float4 mt = m * m;
 78 | 	float4 m4 = mt * mt;
 79 | 
 80 | 	mt = mt * m;
 81 | 	float4 pdotx = float4(dot(p00, x0), dot(p10, x1), dot(p20, x2), dot(p30, x3));
 82 | 	float4 temp = mt * pdotx;
 83 | 	float3 gradient0 = -8.0 * (temp.x * x0 + temp.y * x1 + temp.z * x2 + temp.w * x3);
 84 | 	gradient0 += m4.x * p00 + m4.y * p10 + m4.z * p20 + m4.w * p30;
 85 | 
 86 | 	pdotx = float4(dot(p01, x0), dot(p11, x1), dot(p21, x2), dot(p31, x3));
 87 | 	temp = mt * pdotx;
 88 | 	float3 gradient1 = -8.0 * (temp.x * x0 + temp.y * x1 + temp.z * x2 + temp.w * x3);
 89 | 	gradient1 += m4.x * p01 + m4.y * p11 + m4.z * p21 + m4.w * p31;
 90 | 
 91 | 	// The cross products of two gradients is divergence free.
 92 | 	return cross(gradient0, gradient1) * 3918.76;
 93 | }
 94 | 
 95 | // 4D Bitangent noise. Approximately 163 instruction slots used.
 96 | // Assume p is in the range [-32768, 32767].
 97 | float3 BitangentNoise4D(float4 p)
 98 | {
 99 | 	const float4 F4 = 0.309016994374947451;
100 | 	const float4  C = float4( 0.138196601125011,  // (5 - sqrt(5))/20  G4
101 | 	                          0.276393202250021,  // 2 * G4
102 | 	                          0.414589803375032,  // 3 * G4
103 | 	                         -0.447213595499958); // -1 + 4 * G4
104 | 
105 | 	// First corner
106 | 	float4 i  = floor(p + dot(p, F4) );
107 | 	float4 x0 = p -   i + dot(i, C.xxxx);
108 | 
109 | 	// Other corners
110 | 
111 | 	// Rank sorting originally contributed by Bill Licea-Kane, AMD (formerly ATI)
112 | 	float4 i0;
113 | 	float3 isX = step( x0.yzw, x0.xxx );
114 | 	float3 isYZ = step( x0.zww, x0.yyz );
115 | 	// i0.x = dot( isX, float3( 1.0 ) );
116 | 	i0.x = isX.x + isX.y + isX.z;
117 | 	i0.yzw = 1.0 - isX;
118 | 	// i0.y += dot( isYZ.xy, float2( 1.0 ) );
119 | 	i0.y += isYZ.x + isYZ.y;
120 | 	i0.zw += 1.0 - isYZ.xy;
121 | 	i0.z += isYZ.z;
122 | 	i0.w += 1.0 - isYZ.z;
123 | 
124 | 	// i0 now contains the unique values 0,1,2,3 in each channel
125 | 	float4 i3 = saturate( i0 );
126 | 	float4 i2 = saturate( i0 - 1.0 );
127 | 	float4 i1 = saturate( i0 - 2.0 );
128 | 
129 | 	// x0 = x0 - 0.0 + 0.0 * C.xxxx
130 | 	// x1 = x0 - i1  + 1.0 * C.xxxx
131 | 	// x2 = x0 - i2  + 2.0 * C.xxxx
132 | 	// x3 = x0 - i3  + 3.0 * C.xxxx
133 | 	// x4 = x0 - 1.0 + 4.0 * C.xxxx
134 | 	float4 x1 = x0 - i1 + C.xxxx;
135 | 	float4 x2 = x0 - i2 + C.yyyy;
136 | 	float4 x3 = x0 - i3 + C.zzzz;
137 | 	float4 x4 = x0 + C.wwww;
138 | 
139 | 	i = i + 32768.5;
140 | 	uint2 hash0 = _pcg4d16((uint4)i);
141 | 	uint2 hash1 = _pcg4d16((uint4)(i + i1));
142 | 	uint2 hash2 = _pcg4d16((uint4)(i + i2));
143 | 	uint2 hash3 = _pcg4d16((uint4)(i + i3));
144 | 	uint2 hash4 = _pcg4d16((uint4)(i + 1 ));
145 | 
146 | 	float4 p00 = _gradient4d(hash0.x); float4 p01 = _gradient4d(hash0.y);
147 | 	float4 p10 = _gradient4d(hash1.x); float4 p11 = _gradient4d(hash1.y);
148 | 	float4 p20 = _gradient4d(hash2.x); float4 p21 = _gradient4d(hash2.y);
149 | 	float4 p30 = _gradient4d(hash3.x); float4 p31 = _gradient4d(hash3.y);
150 | 	float4 p40 = _gradient4d(hash4.x); float4 p41 = _gradient4d(hash4.y);
151 | 
152 | 	// Calculate noise gradients.
153 | 	float3 m0 = saturate(0.6 - float3(dot(x0, x0), dot(x1, x1), dot(x2, x2)));
154 | 	float2 m1 = saturate(0.6 - float2(dot(x3, x3), dot(x4, x4)             ));
155 | 	float3 m02 = m0 * m0; float3 m03 = m02 * m0;
156 | 	float2 m12 = m1 * m1; float2 m13 = m12 * m1;
157 | 
158 | 	float3 temp0 = m02 * float3(dot(p00, x0), dot(p10, x1), dot(p20, x2));
159 | 	float2 temp1 = m12 * float2(dot(p30, x3), dot(p40, x4));
160 | 	float4 grad0 = -6.0 * (temp0.x * x0 + temp0.y * x1 + temp0.z * x2 + temp1.x * x3 + temp1.y * x4);
161 | 	grad0 += m03.x * p00 + m03.y * p10 + m03.z * p20 + m13.x * p30 + m13.y * p40;
162 | 
163 | 	temp0 = m02 * float3(dot(p01, x0), dot(p11, x1), dot(p21, x2));
164 | 	temp1 = m12 * float2(dot(p31, x3), dot(p41, x4));
165 | 	float4 grad1 = -6.0 * (temp0.x * x0 + temp0.y * x1 + temp0.z * x2 + temp1.x * x3 + temp1.y * x4);
166 | 	grad1 += m03.x * p01 + m03.y * p11 + m03.z * p21 + m13.x * p31 + m13.y * p41;
167 | 
168 | 	// The cross products of two gradients is divergence free.
169 | 	return cross(grad0.xyz, grad1.xyz) * 81.0;
170 | }
171 | 


--------------------------------------------------------------------------------
/Develop/BitangentNoise_ref.hlsl:
--------------------------------------------------------------------------------
  1 | //	--------------------------------------------------------------------
  2 | //	Reference implementation of 3D/4D bitangent noise.
  3 | //	Based on stegu's simplex noise: https://github.com/stegu/webgl-noise.
  4 | //	Contact : atyuwen@gmail.com
  5 | //	Author : Yuwen Wu (atyuwen)
  6 | //	License : Distributed under the MIT License.
  7 | //	--------------------------------------------------------------------
  8 | 
  9 | float mod289(float x)
 10 | {
 11 | 	return x - floor(x * (1.0 / 289.0)) * 289.0;
 12 | }
 13 | float3 mod289(float3 x)
 14 | {
 15 | 	return x - floor(x * (1.0 / 289.0)) * 289.0;
 16 | }
 17 | float4 mod289(float4 x)
 18 | {
 19 | 	return x - floor(x * (1.0 / 289.0)) * 289.0;
 20 | }
 21 | 
 22 | float permute(float x)
 23 | {
 24 | 	return mod289(((x*34.0) + 1.0)*x);
 25 | }
 26 | float4 permute(float4 x)
 27 | {
 28 | 	return mod289(((x*34.0) + 1.0)*x);
 29 | }
 30 | 
 31 | float taylorInvSqrt(float r)
 32 | {
 33 | 	return 1.79284291400159 - 0.85373472095314 * r;
 34 | }
 35 | float4 taylorInvSqrt(float4 r)
 36 | {
 37 | 	return 1.79284291400159 - 0.85373472095314 * r;
 38 | }
 39 | 
 40 | float3 SimplexNoise3DGrad(float3 v)
 41 | {
 42 | 	const float2 C = float2(1.0 / 6.0, 1.0 / 3.0);
 43 | 	const float4 D = float4(0.0, 0.5, 1.0, 2.0);
 44 | 
 45 | 	// First corner
 46 | 	float3 i = floor(v + dot(v, C.yyy));
 47 | 	float3 x0 = v - i + dot(i, C.xxx);
 48 | 
 49 | 	// Other corners
 50 | 	float3 g = step(x0.yzx, x0.xyz);
 51 | 	float3 l = 1.0 - g;
 52 | 	float3 i1 = min(g.xyz, l.zxy);
 53 | 	float3 i2 = max(g.xyz, l.zxy);
 54 | 
 55 | 	//   x0 = x0 - 0.0 + 0.0 * C.xxx;
 56 | 	//   x1 = x0 - i1  + 1.0 * C.xxx;
 57 | 	//   x2 = x0 - i2  + 2.0 * C.xxx;
 58 | 	//   x3 = x0 - 1.0 + 3.0 * C.xxx;
 59 | 	float3 x1 = x0 - i1 + C.xxx;
 60 | 	float3 x2 = x0 - i2 + C.yyy; // 2.0*C.x = 1/3 = C.y
 61 | 	float3 x3 = x0 - D.yyy;      // -1.0+3.0*C.x = -0.5 = -D.y
 62 | 
 63 | 	// Permutations
 64 | 	i = mod289(i);
 65 | 	float4 p = permute(permute(permute(
 66 | 		i.z + float4(0.0, i1.z, i2.z, 1.0))
 67 | 		+ i.y + float4(0.0, i1.y, i2.y, 1.0))
 68 | 		+ i.x + float4(0.0, i1.x, i2.x, 1.0));
 69 | 
 70 | 	// Gradients: 7x7 points over a square, mapped onto an octahedron.
 71 | 	// The ring size 17*17 = 289 is close to a multiple of 49 (49*6 = 294)
 72 | 	float n_ = 0.142857142857; // 1.0/7.0
 73 | 	float3 ns = n_ * D.wyz - D.xzx;
 74 | 
 75 | 	float4 j = p - 49.0 * floor(p * ns.z * ns.z);  //  mod(p,7*7)
 76 | 
 77 | 	float4 x_ = floor(j * ns.z);
 78 | 	float4 y_ = floor(j - 7.0 * x_);    // mod(j,N)
 79 | 
 80 | 	float4 x = x_ *ns.x + ns.yyyy;
 81 | 	float4 y = y_ *ns.x + ns.yyyy;
 82 | 	float4 h = 1.0 - abs(x) - abs(y);
 83 | 
 84 | 	float4 b0 = float4(x.xy, y.xy);
 85 | 	float4 b1 = float4(x.zw, y.zw);
 86 | 
 87 | 	//float4 s0 = float4(lessThan(b0,0.0))*2.0 - 1.0;
 88 | 	//float4 s1 = float4(lessThan(b1,0.0))*2.0 - 1.0;
 89 | 	float4 s0 = floor(b0)*2.0 + 1.0;
 90 | 	float4 s1 = floor(b1)*2.0 + 1.0;
 91 | 	float4 sh = -step(h, float4(0, 0, 0, 0));
 92 | 
 93 | 	float4 a0 = b0.xzyw + s0.xzyw*sh.xxyy;
 94 | 	float4 a1 = b1.xzyw + s1.xzyw*sh.zzww;
 95 | 
 96 | 	float3 p0 = float3(a0.xy, h.x);
 97 | 	float3 p1 = float3(a0.zw, h.y);
 98 | 	float3 p2 = float3(a1.xy, h.z);
 99 | 	float3 p3 = float3(a1.zw, h.w);
100 | 
101 | 	// Normalise gradients
102 | 	float4 norm = taylorInvSqrt(float4(dot(p0, p0), dot(p1, p1), dot(p2, p2), dot(p3, p3)));
103 | 	p0 *= norm.x;
104 | 	p1 *= norm.y;
105 | 	p2 *= norm.z;
106 | 	p3 *= norm.w;
107 | 
108 | 	// lerp final noise value
109 | 	float4 m = max(0.5 - float4(dot(x0, x0), dot(x1, x1), dot(x2, x2), dot(x3, x3)), 0.0);
110 | 	float4 m2 = m * m;
111 | 	float4 m4 = m2 * m2;
112 | 	float4 pdotx = float4(dot(p0, x0), dot(p1, x1), dot(p2, x2), dot(p3, x3));
113 | 
114 | 	// Determine noise gradient
115 | 	float4 temp = m2 * m * pdotx;
116 | 	float3 gradient = -8.0 * (temp.x * x0 + temp.y * x1 + temp.z * x2 + temp.w * x3);
117 | 	gradient += m4.x * p0 + m4.y * p1 + m4.z * p2 + m4.w * p3;
118 | 	gradient *= 105.0;
119 | 	return gradient;
120 | }
121 | 
122 | float4 grad4(float j, float4 ip)
123 | {
124 | 	const float4 ones = float4(1.0, 1.0, 1.0, -1.0);
125 | 	float4 p,s;
126 | 	p.xyz = floor( frac(float3(j, j, j) * ip.xyz) * 7.0) * ip.z - 1.0;
127 | 	p.w = 1.5 - dot(abs(p.xyz), ones.xyz);
128 | 	s = float4(p < float4(0, 0, 0, 0));
129 | 	p.xyz = p.xyz + (s.xyz*2.0 - 1.0) * s.www;
130 | 	return p;
131 | }
132 | 
133 | float4 SimplexNoise4DGrad(float4 v)
134 | {
135 | 	const float4  C = float4( 0.138196601125011,  // (5 - sqrt(5))/20  G4
136 | 	                          0.276393202250021,  // 2 * G4
137 | 	                          0.414589803375032,  // 3 * G4
138 | 	                         -0.447213595499958); // -1 + 4 * G4
139 | 
140 | 	// First corner
141 | 	float4 F4 = 0.309016994374947451;
142 | 	float4 i  = floor(v + dot(v, F4) );
143 | 	float4 x0 = v -   i + dot(i, C.xxxx);
144 | 
145 | 	// Other corners
146 | 
147 | 	// Rank sorting originally contributed by Bill Licea-Kane, AMD (formerly ATI)
148 | 	float4 i0;
149 | 	float3 isX = step( x0.yzw, x0.xxx );
150 | 	float3 isYZ = step( x0.zww, x0.yyz );
151 | 	//  i0.x = dot( isX, float3( 1.0 ) );
152 | 	i0.x = isX.x + isX.y + isX.z;
153 | 	i0.yzw = 1.0 - isX;
154 | 	//  i0.y += dot( isYZ.xy, float2( 1.0 ) );
155 | 	i0.y += isYZ.x + isYZ.y;
156 | 	i0.zw += 1.0 - isYZ.xy;
157 | 	i0.z += isYZ.z;
158 | 	i0.w += 1.0 - isYZ.z;
159 | 
160 | 	// i0 now contains the unique values 0,1,2,3 in each channel
161 | 	float4 i3 = clamp( i0, 0.0, 1.0 );
162 | 	float4 i2 = clamp( i0-1.0, 0.0, 1.0 );
163 | 	float4 i1 = clamp( i0-2.0, 0.0, 1.0 );
164 | 
165 | 	//  x0 = x0 - 0.0 + 0.0 * C.xxxx
166 | 	//  x1 = x0 - i1  + 1.0 * C.xxxx
167 | 	//  x2 = x0 - i2  + 2.0 * C.xxxx
168 | 	//  x3 = x0 - i3  + 3.0 * C.xxxx
169 | 	//  x4 = x0 - 1.0 + 4.0 * C.xxxx
170 | 	float4 x1 = x0 - i1 + C.xxxx;
171 | 	float4 x2 = x0 - i2 + C.yyyy;
172 | 	float4 x3 = x0 - i3 + C.zzzz;
173 | 	float4 x4 = x0 + C.wwww;
174 | 
175 | 	// Permutations
176 | 	i = mod289(i); 
177 | 	float j0 = permute( permute( permute( permute(i.w) + i.z) + i.y) + i.x);
178 | 	float4 j1 = permute( permute( permute( permute (
179 | 	           i.w + float4(i1.w, i2.w, i3.w, 1.0 ))
180 | 	         + i.z + float4(i1.z, i2.z, i3.z, 1.0 ))
181 | 	         + i.y + float4(i1.y, i2.y, i3.y, 1.0 ))
182 | 	         + i.x + float4(i1.x, i2.x, i3.x, 1.0 ));
183 | 
184 | 	// Gradients: 7x7x6 points over a cube, mapped onto a 4-cross polytope
185 | 	// 7*7*6 = 294, which is close to the ring size 17*17 = 289.
186 | 	float4 ip = float4(1.0/294.0, 1.0/49.0, 1.0/7.0, 0.0) ;
187 | 
188 | 	float4 p0 = grad4(j0,   ip);
189 | 	float4 p1 = grad4(j1.x, ip);
190 | 	float4 p2 = grad4(j1.y, ip);
191 | 	float4 p3 = grad4(j1.z, ip);
192 | 	float4 p4 = grad4(j1.w, ip);
193 | 
194 | 	// Normalise gradients
195 | 	float4 norm = taylorInvSqrt(float4(dot(p0,p0), dot(p1,p1), dot(p2, p2), dot(p3,p3)));
196 | 	p0 *= norm.x;
197 | 	p1 *= norm.y;
198 | 	p2 *= norm.z;
199 | 	p3 *= norm.w;
200 | 	p4 *= taylorInvSqrt(dot(p4,p4));
201 | 
202 | 	// Mix contributions from the five corners
203 | 	float3 m0 = max(0.6 - float3(dot(x0,x0), dot(x1,x1), dot(x2,x2)), 0.0);
204 | 	float2 m1 = max(0.6 - float2(dot(x3,x3), dot(x4,x4)            ), 0.0);
205 | 	float3 m02 = m0 * m0;
206 | 	float2 m12 = m1 * m1;
207 | 	float3 m04 = m02 * m02;
208 | 	float2 m14 = m12 * m12;
209 | 
210 | 	float3 temp0 = (m02 * m0) * float3( dot( p0, x0 ), dot( p1, x1 ), dot( p2, x2 ) );
211 | 	float2 temp1 = (m12 * m1) * float2( dot( p3, x3 ), dot( p4, x4 ) );
212 | 	float4 grad = -8.0 * (temp0.x * x0 + temp0.y * x1 + temp0.z * x2 + temp1.x * x3 + temp1.y * x4);
213 | 	grad += m04.x * p0 + m04.y * p1 + m04.z * p2 + m14.x * p3 + m14.y * p4;
214 | 	grad *= 49.0;
215 | 	return grad;
216 | }
217 | 
218 | // 3D Bitangent noise. Approximately 223 instruction slots used.
219 | float3 BitangentNoise3D(float3 p)
220 | {
221 | 	float3 dA = SimplexNoise3DGrad(p);
222 | 	float3 dB = SimplexNoise3DGrad(p + float3(31.416, -47.853, 12.679));
223 | 	return cross(dA, dB);
224 | }
225 | 
226 | // 4D Bitangent noise. Approximately 318 instruction slots used.
227 | float3 BitangentNoise4D(float4 p)
228 | {
229 | 	float3 dA = SimplexNoise4DGrad(p).xyz;
230 | 	// 1. Correct but expensive.
231 | 	//float3 dB = SimplexNoise4DGrad(p + float4(31.416, -47.853, 12.679, 113.408)).xyz;
232 | 	// 2. Cheaper, less random but still looks nice.
233 | 	float3 dB = SimplexNoise3DGrad(p.xyz + float3(31.416, -47.853, 12.679));
234 | 	return cross(dA, dB);
235 | }
236 | 
237 | // 4D Bitangent noise. Fast version with bad quality.
238 | // Approximately 222 instruction slots used.
239 | float3 BitangentNoise4DFast(float4 p)
240 | {
241 | 	float3 dA = SimplexNoise3DGrad(p.xyz);
242 | 	float3 dB = SimplexNoise3DGrad(p.yzw);
243 | 	return cross(dA, dB);
244 | }
245 | 


--------------------------------------------------------------------------------
/Develop/BitangentNoise_v0.hlsl:
--------------------------------------------------------------------------------
  1 | //	--------------------------------------------------------------------
  2 | //	Optimized implementation of 3D/4D bitangent noise.
  3 | //	Based on stegu's simplex noise: https://github.com/stegu/webgl-noise.
  4 | //	Contact : atyuwen@gmail.com
  5 | //	Author : Yuwen Wu (https://atyuwen.github.io/)
  6 | //	License : Distributed under the MIT License.
  7 | //	--------------------------------------------------------------------
  8 | 
  9 | // Permuted congruential generator (only top 16 bits are well shuffled).
 10 | // References: 1. Mark Jarzynski and Marc Olano, "Hash Functions for GPU Rendering".
 11 | //             2. UnrealEngine/Random.ush. https://github.com/EpicGames/UnrealEngine
 12 | uint2 pcg3d16(uint3 p)
 13 | {
 14 | 	uint3 v = p * 1664525u + 1013904223u;
 15 | 	v.x += v.y*v.z; v.y += v.z*v.x; v.z += v.x*v.y;
 16 | 	v.x += v.y*v.z; v.y += v.z*v.x;
 17 | 	return v.xy;
 18 | }
 19 | uint2 pcg4d16(uint4 p)
 20 | {
 21 | 	uint4 v = p * 1664525u + 1013904223u;
 22 | 	v.x += v.y*v.w; v.y += v.z*v.x; v.z += v.x*v.y; v.w += v.y*v.z;
 23 | 	v.x += v.y*v.w; v.y += v.z*v.x;
 24 | 	return v.xy;
 25 | }
 26 | 
 27 | // Get random gradient from hash value.
 28 | float3 gradient3d(uint hash)
 29 | {
 30 | 	float3 g = float3(hash.xxx & uint3(0x80000, 0x40000, 0x20000));
 31 | 	return g * float3(1.0 / 0x40000, 1.0 / 0x20000, 1.0 / 0x10000) - 1.0;
 32 | }
 33 | float4 gradient4d(uint hash)
 34 | {
 35 | 	float4 g = float4(hash.xxxx & uint4(0x80000, 0x40000, 0x20000, 0x10000));
 36 | 	return g * float4(1.0 / 0x40000, 1.0 / 0x20000, 1.0 / 0x10000, 1.0 / 0x8000) - 1.0;
 37 | }
 38 | 
 39 | // Optimized 3D Bitangent Noise. Approximately 113 instruction slots used.
 40 | // Assume p is in the range [-32768, 32767].
 41 | float3 BitangentNoise3D(float3 p)
 42 | {
 43 | 	const float2 C = float2(1.0 / 6.0, 1.0 / 3.0);
 44 | 	const float4 D = float4(0.0, 0.5, 1.0, 2.0);
 45 | 
 46 | 	// First corner
 47 | 	float3 i = floor(p + dot(p, C.yyy));
 48 | 	float3 x0 = p - i + dot(i, C.xxx);
 49 | 
 50 | 	// Other corners
 51 | 	float3 g = step(x0.yzx, x0.xyz);
 52 | 	float3 l = 1.0 - g;
 53 | 	float3 i1 = min(g.xyz, l.zxy);
 54 | 	float3 i2 = max(g.xyz, l.zxy);
 55 | 
 56 | 	// x0 = x0 - 0.0 + 0.0 * C.xxx;
 57 | 	// x1 = x0 - i1  + 1.0 * C.xxx;
 58 | 	// x2 = x0 - i2  + 2.0 * C.xxx;
 59 | 	// x3 = x0 - 1.0 + 3.0 * C.xxx;
 60 | 	float3 x1 = x0 - i1 + C.xxx;
 61 | 	float3 x2 = x0 - i2 + C.yyy; // 2.0*C.x = 1/3 = C.y
 62 | 	float3 x3 = x0 - D.yyy;      // -1.0+3.0*C.x = -0.5 = -D.y
 63 | 
 64 | 	i = i + 32768.5;
 65 | 	uint2 hash0 = pcg3d16((uint3)i);
 66 | 	uint2 hash1 = pcg3d16((uint3)(i + i1));
 67 | 	uint2 hash2 = pcg3d16((uint3)(i + i2));
 68 | 	uint2 hash3 = pcg3d16((uint3)(i + 1 ));
 69 | 
 70 | 	float3 p00 = gradient3d(hash0.x); float3 p01 = gradient3d(hash0.y);
 71 | 	float3 p10 = gradient3d(hash1.x); float3 p11 = gradient3d(hash1.y);
 72 | 	float3 p20 = gradient3d(hash2.x); float3 p21 = gradient3d(hash2.y);
 73 | 	float3 p30 = gradient3d(hash3.x); float3 p31 = gradient3d(hash3.y);
 74 | 
 75 | 	// Calculate noise gradients.
 76 | 	float4 m = saturate(0.5 - float4(dot(x0, x0), dot(x1, x1), dot(x2, x2), dot(x3, x3)));
 77 | 	float4 mt = m * m;
 78 | 	float4 m4 = mt * mt;
 79 | 
 80 | 	mt = mt * m;
 81 | 	float4 pdotx = float4(dot(p00, x0), dot(p10, x1), dot(p20, x2), dot(p30, x3));
 82 | 	float4 temp = mt * pdotx;
 83 | 	float3 gradient0 = -8.0 * (temp.x * x0 + temp.y * x1 + temp.z * x2 + temp.w * x3);
 84 | 	gradient0 += m4.x * p00 + m4.y * p10 + m4.z * p20 + m4.w * p30;
 85 | 
 86 | 	pdotx = float4(dot(p01, x0), dot(p11, x1), dot(p21, x2), dot(p31, x3));
 87 | 	temp = mt * pdotx;
 88 | 	float3 gradient1 = -8.0 * (temp.x * x0 + temp.y * x1 + temp.z * x2 + temp.w * x3);
 89 | 	gradient1 += m4.x * p01 + m4.y * p11 + m4.z * p21 + m4.w * p31;
 90 | 
 91 | 	// The cross products of two gradients is divergence free.
 92 | 	return cross(gradient0, gradient1) * 3918.76;
 93 | }
 94 | 
 95 | // 4D Bitangent noise. Approximately 165 instruction slots used.
 96 | // Assume p is in the range [-32768, 32767].
 97 | float3 BitangentNoise4D(float4 p)
 98 | {
 99 | 	const float4 F4 = 0.309016994374947451;
100 | 	const float4  C = float4( 0.138196601125011,  // (5 - sqrt(5))/20  G4
101 | 	                          0.276393202250021,  // 2 * G4
102 | 	                          0.414589803375032,  // 3 * G4
103 | 	                         -0.447213595499958); // -1 + 4 * G4
104 | 
105 | 	// First corner
106 | 	float4 i  = floor(p + dot(p, F4) );
107 | 	float4 x0 = p -   i + dot(i, C.xxxx);
108 | 
109 | 	// Other corners
110 | 
111 | 	// Rank sorting originally contributed by Bill Licea-Kane, AMD (formerly ATI)
112 | 	float4 i0;
113 | 	float3 isX = step( x0.yzw, x0.xxx );
114 | 	float3 isYZ = step( x0.zww, x0.yyz );
115 | 	// i0.x = dot( isX, float3( 1.0 ) );
116 | 	i0.x = isX.x + isX.y + isX.z;
117 | 	i0.yzw = 1.0 - isX;
118 | 	// i0.y += dot( isYZ.xy, float2( 1.0 ) );
119 | 	i0.y += isYZ.x + isYZ.y;
120 | 	i0.zw += 1.0 - isYZ.xy;
121 | 	i0.z += isYZ.z;
122 | 	i0.w += 1.0 - isYZ.z;
123 | 
124 | 	// i0 now contains the unique values 0,1,2,3 in each channel
125 | 	float4 i3 = saturate( i0 );
126 | 	float4 i2 = saturate( i0 - 1.0 );
127 | 	float4 i1 = saturate( i0 - 2.0 );
128 | 
129 | 	// x0 = x0 - 0.0 + 0.0 * C.xxxx
130 | 	// x1 = x0 - i1  + 1.0 * C.xxxx
131 | 	// x2 = x0 - i2  + 2.0 * C.xxxx
132 | 	// x3 = x0 - i3  + 3.0 * C.xxxx
133 | 	// x4 = x0 - 1.0 + 4.0 * C.xxxx
134 | 	float4 x1 = x0 - i1 + C.xxxx;
135 | 	float4 x2 = x0 - i2 + C.yyyy;
136 | 	float4 x3 = x0 - i3 + C.zzzz;
137 | 	float4 x4 = x0 + C.wwww;
138 | 
139 | 	i = i + 32768.5;
140 | 	uint2 hash0 = pcg4d16((uint4)i);
141 | 	uint2 hash1 = pcg4d16((uint4)(i + i1));
142 | 	uint2 hash2 = pcg4d16((uint4)(i + i2));
143 | 	uint2 hash3 = pcg4d16((uint4)(i + i3));
144 | 	uint2 hash4 = pcg4d16((uint4)(i + 1 ));
145 | 
146 | 	float4 p00 = gradient4d(hash0.x); float4 p01 = gradient4d(hash0.y);
147 | 	float4 p10 = gradient4d(hash1.x); float4 p11 = gradient4d(hash1.y);
148 | 	float4 p20 = gradient4d(hash2.x); float4 p21 = gradient4d(hash2.y);
149 | 	float4 p30 = gradient4d(hash3.x); float4 p31 = gradient4d(hash3.y);
150 | 	float4 p40 = gradient4d(hash4.x); float4 p41 = gradient4d(hash4.y);
151 | 
152 | 	// Calculate noise gradients.
153 | 	float3 m0 = saturate(0.5 - float3(dot(x0,x0), dot(x1,x1), dot(x2,x2)));
154 | 	float2 m1 = saturate(0.5 - float2(dot(x3,x3), dot(x4,x4)            ));
155 | 	float3 m0t = m0 * m0;
156 | 	float2 m1t = m1 * m1;
157 | 	float3 m04 = m0t * m0t;
158 | 	float2 m14 = m1t * m1t;
159 | 
160 | 	m0t = m0t * m0;
161 | 	m1t = m1t * m1;
162 | 	float3 temp0 = m0t * float3( dot( p00, x0 ), dot( p10, x1 ), dot( p20, x2 ) );
163 | 	float2 temp1 = m1t * float2( dot( p30, x3 ), dot( p40, x4 ) );
164 | 	float4 grad0 = -8.0 * (temp0.x * x0 + temp0.y * x1 + temp0.z * x2 + temp1.x * x3 + temp1.y * x4);
165 | 	grad0 += m04.x * p00 + m04.y * p10 + m04.z * p20 + m14.x * p30 + m14.y * p40;
166 | 
167 | 	temp0 = m0t * float3( dot( p01, x0 ), dot( p11, x1 ), dot( p21, x2 ) );
168 | 	temp1 = m1t * float2( dot( p31, x3 ), dot( p41, x4 ) );
169 | 	float4 grad1 = -8.0 * (temp0.x * x0 + temp0.y * x1 + temp0.z * x2 + temp1.x * x3 + temp1.y * x4);
170 | 	grad1 += m04.x * p01 + m04.y * p11 + m04.z * p21 + m14.x * p31 + m14.y * p41;
171 | 
172 | 	// The cross products of two gradients is divergence free.
173 | 	return cross(grad0.xyz, grad1.xyz) * 2992.1;
174 | }
175 | 


--------------------------------------------------------------------------------
/Develop/SimplexNoise.hlsl:
--------------------------------------------------------------------------------
  1 | //	--------------------------------------------------------------------
  2 | //	Optimized implementation of simplex noise.
  3 | //	Based on stegu's simplex noise: https://github.com/stegu/webgl-noise.
  4 | //	Contact : atyuwen@gmail.com
  5 | //	Author : Yuwen Wu (https://atyuwen.github.io/)
  6 | //	License : Distributed under the MIT License.
  7 | //	--------------------------------------------------------------------
  8 | 
  9 | // Permuted congruential generator (only top 16 bits are well shuffled).
 10 | // References: 1. Mark Jarzynski and Marc Olano, "Hash Functions for GPU Rendering".
 11 | //             2. UnrealEngine/Random.ush. https://github.com/EpicGames/UnrealEngine
 12 | uint pcg3d16(uint3 p)
 13 | {
 14 | 	uint3 v = p * 1664525u + 1013904223u;
 15 | 	v.x += v.y*v.z; v.y += v.z*v.x; v.z += v.x*v.y;
 16 | 	v.x += v.y*v.z;
 17 | 	return v.x;
 18 | }
 19 | uint pcg4d16(uint4 p)
 20 | {
 21 | 	uint4 v = p * 1664525u + 1013904223u;
 22 | 	v.x += v.y*v.w; v.y += v.z*v.x; v.z += v.x*v.y; v.w += v.y*v.z;
 23 | 	v.x += v.y*v.w;
 24 | 	return v.x;
 25 | }
 26 | 
 27 | // Get random gradient from hash value.
 28 | float3 gradient3d(uint hash)
 29 | {
 30 | 	float3 g = float3(hash.xxx & uint3(0x80000, 0x40000, 0x20000));
 31 | 	return g * float3(1.0 / 0x40000, 1.0 / 0x20000, 1.0 / 0x10000) - 1.0;
 32 | }
 33 | float4 gradient4d(uint hash)
 34 | {
 35 | 	float4 g = float4(hash.xxxx & uint4(0x80000, 0x40000, 0x20000, 0x10000));
 36 | 	return g * float4(1.0 / 0x40000, 1.0 / 0x20000, 1.0 / 0x10000, 1.0 / 0x8000) - 1.0;
 37 | }
 38 | 
 39 | // 3D Simplex Noise. Approximately 71 instruction slots used.
 40 | // Assume p is in the range [-32768, 32767].
 41 | float SimplexNoise3D(float3 p)
 42 | {
 43 | 	const float2 C = float2(1.0 / 6.0, 1.0 / 3.0);
 44 | 	const float4 D = float4(0.0, 0.5, 1.0, 2.0);
 45 | 
 46 | 	// First corner
 47 | 	float3 i = floor(p + dot(p, C.yyy));
 48 | 	float3 x0 = p - i + dot(i, C.xxx);
 49 | 
 50 | 	// Other corners
 51 | 	float3 g = step(x0.yzx, x0.xyz);
 52 | 	float3 l = 1.0 - g;
 53 | 	float3 i1 = min(g.xyz, l.zxy);
 54 | 	float3 i2 = max(g.xyz, l.zxy);
 55 | 
 56 | 	// x0 = x0 - 0.0 + 0.0 * C.xxx;
 57 | 	// x1 = x0 - i1  + 1.0 * C.xxx;
 58 | 	// x2 = x0 - i2  + 2.0 * C.xxx;
 59 | 	// x3 = x0 - 1.0 + 3.0 * C.xxx;
 60 | 	float3 x1 = x0 - i1 + C.xxx;
 61 | 	float3 x2 = x0 - i2 + C.yyy; // 2.0*C.x = 1/3 = C.y
 62 | 	float3 x3 = x0 - D.yyy;      // -1.0+3.0*C.x = -0.5 = -D.y
 63 | 
 64 | 	i = i + 32768.5;
 65 | 	uint hash0 = pcg3d16((uint3)i);
 66 | 	uint hash1 = pcg3d16((uint3)(i + i1));
 67 | 	uint hash2 = pcg3d16((uint3)(i + i2));
 68 | 	uint hash3 = pcg3d16((uint3)(i + 1 ));
 69 | 
 70 | 	float3 p0 = gradient3d(hash0);
 71 | 	float3 p1 = gradient3d(hash1);
 72 | 	float3 p2 = gradient3d(hash2);
 73 | 	float3 p3 = gradient3d(hash3);
 74 | 
 75 | 	// Mix final noise value.
 76 | 	float4 m = saturate(0.5 - float4(dot(x0, x0), dot(x1, x1), dot(x2, x2), dot(x3, x3)));
 77 | 	float4 mt = m * m;
 78 | 	float4 m4 = mt * mt;
 79 | 	return 62.6 * dot(m4, float4(dot(x0, p0), dot(x1, p1), dot(x2, p2), dot(x3, p3)));
 80 | }
 81 | 
 82 | // 4D Simplex Noise. Approximately 113 instruction slots used.
 83 | // Assume p is in the range [-32768, 32767].
 84 | float SimplexNoise4D(float4 p)
 85 | {
 86 | 	const float4 F4 = 0.309016994374947451;
 87 | 	const float4  C = float4( 0.138196601125011,  // (5 - sqrt(5))/20  G4
 88 | 	                          0.276393202250021,  // 2 * G4
 89 | 	                          0.414589803375032,  // 3 * G4
 90 | 	                         -0.447213595499958); // -1 + 4 * G4
 91 | 
 92 | 	// First corner
 93 | 	float4 i  = floor(p + dot(p, F4) );
 94 | 	float4 x0 = p -   i + dot(i, C.xxxx);
 95 | 
 96 | 	// Other corners
 97 | 
 98 | 	// Rank sorting originally contributed by Bill Licea-Kane, AMD (formerly ATI)
 99 | 	float4 i0;
100 | 	float3 isX = step( x0.yzw, x0.xxx );
101 | 	float3 isYZ = step( x0.zww, x0.yyz );
102 | 	// i0.x = dot( isX, float3( 1.0 ) );
103 | 	i0.x = isX.x + isX.y + isX.z;
104 | 	i0.yzw = 1.0 - isX;
105 | 	// i0.y += dot( isYZ.xy, float2( 1.0 ) );
106 | 	i0.y += isYZ.x + isYZ.y;
107 | 	i0.zw += 1.0 - isYZ.xy;
108 | 	i0.z += isYZ.z;
109 | 	i0.w += 1.0 - isYZ.z;
110 | 
111 | 	// i0 now contains the unique values 0,1,2,3 in each channel
112 | 	float4 i3 = saturate( i0 );
113 | 	float4 i2 = saturate( i0 - 1.0 );
114 | 	float4 i1 = saturate( i0 - 2.0 );
115 | 
116 | 	// x0 = x0 - 0.0 + 0.0 * C.xxxx
117 | 	// x1 = x0 - i1  + 1.0 * C.xxxx
118 | 	// x2 = x0 - i2  + 2.0 * C.xxxx
119 | 	// x3 = x0 - i3  + 3.0 * C.xxxx
120 | 	// x4 = x0 - 1.0 + 4.0 * C.xxxx
121 | 	float4 x1 = x0 - i1 + C.xxxx;
122 | 	float4 x2 = x0 - i2 + C.yyyy;
123 | 	float4 x3 = x0 - i3 + C.zzzz;
124 | 	float4 x4 = x0 + C.wwww;
125 | 
126 | 	i = i + 32768.5;
127 | 	uint hash0 = pcg4d16((uint4)i);
128 | 	uint hash1 = pcg4d16((uint4)(i + i1));
129 | 	uint hash2 = pcg4d16((uint4)(i + i2));
130 | 	uint hash3 = pcg4d16((uint4)(i + i3));
131 | 	uint hash4 = pcg4d16((uint4)(i + 1 ));
132 | 
133 | 	float4 p0 = gradient4d(hash0);
134 | 	float4 p1 = gradient4d(hash1);
135 | 	float4 p2 = gradient4d(hash2);
136 | 	float4 p3 = gradient4d(hash3);
137 | 	float4 p4 = gradient4d(hash4);
138 | 
139 | 	// Mix contributions from the five corners
140 | 	float3 m0 = saturate(0.6 - float3(dot(x0,x0), dot(x1,x1), dot(x2,x2)));
141 | 	float2 m1 = saturate(0.6 - float2(dot(x3,x3), dot(x4,x4)            ));
142 | 	float3 m03 = m0 * m0 * m0;
143 | 	float2 m13 = m1 * m1 * m1;
144 | 	return (dot(m03, float3(dot(p0, x0), dot(p1, x1), dot(p2, x2)))
145 | 	      + dot(m13, float2(dot(p3, x3), dot(p4, x4)))) * 9.0;
146 | }
147 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Yuwen Wu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Bitangent Noise
 2 | 
 3 | [Curl Noise](https://www.cs.ubc.ca/~rbridson/docs/bridson-siggraph2007-curlnoise.pdf) by Robert Bridson is a widely known method that can generate divergence-free noise. This divergence-free property makes it extremely suitable for driving particles to move like real fluid motion.
 4 | 
 5 | Here is another divergence-free noise generator that is **computationally cheaper** than curl noise. I thought it was new and named it **Bitangent Noise**, but later I found it was already invented by [Ivan DeWolf](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.93.7627&rep=rep1&type=pdf) in 2005. (I'm wondering why it is so less popular comparing to curl noise.)
 6 | 
 7 | The implementation is carefully optimized, and both HLSL and GLSL codes are provided for your convenience. More details can be found [here](https://atyuwen.github.io/posts/bitangent-noise/). Following image shows a particle system that is updated using bitangent noise.
 8 | 
 9 | ![image](image.png?raw=true)
10 | 
11 | Here is a [shadertoy example](https://www.shadertoy.com/view/ftl3zN) shows how to use bitangent noise to make a smoke ball.
12 | 
13 | ## Performance
14 | 
15 | These performance data are measured on a Nvidia GTX 1060 card, where each noise function is executed 1280 * 720 * 10 times.
16 | 
17 | | Noise Function           |   Cost      | Desc                                                                                                                                |
18 | |--------------------------|  ---------  |-------------------------------------------------------------------------------------------------------------------------------------|
19 | | snoise3d                 |   1530 μs   | [stegu's 3d simplex nosie](https://github.com/stegu/webgl-noise/blob/master/src/noise3D.glsl)                                       |
20 | | **SimplexNoise3D**       | **1153 μs** | [optimized 3d simplex noise](https://github.com/atyuwen/bitangent_noise/blob/main/Develop/SimplexNoise.hlsl#L41)                    |
21 | | snoise4d                 |   2578 μs   | [stegu's 4d simplex nosie](https://github.com/stegu/webgl-noise/blob/master/src/noise4D.glsl)                                       |
22 | | **SimplexNoise4D**       | **1798 μs** | [optimized 4d simplex noise](https://github.com/atyuwen/bitangent_noise/blob/main/Develop/SimplexNoise.hlsl#L84)                    |
23 | | BitangentNoise3D_ref     |   2991 μs   | [3d bitangent noise, reference version ](https://github.com/atyuwen/bitangent_noise/blob/main/Develop/BitangentNoise_ref.hlsl#L219) |
24 | | **BitangentNoise3D**     | **1534 μs** | [optimized 3d bitangent noise](https://github.com/atyuwen/bitangent_noise/blob/main/BitangentNoise.hlsl#L41)                        |
25 | | BitangentNoise4D_ref     |   4365 μs   | [4d bitangent noise, reference version](https://github.com/atyuwen/bitangent_noise/blob/main/Develop/BitangentNoise_ref.hlsl#L227)  |
26 | | BitangentNoise4DFast_ref |   3152 μs   | [4d bitangent noise, low quality](https://github.com/atyuwen/bitangent_noise/blob/main/Develop/BitangentNoise_ref.hlsl#L239)        |
27 | | **BitangentNoise4D**     | **2413 μs** | [optimized 4d bitangent noise](https://github.com/atyuwen/bitangent_noise/blob/main/BitangentNoise.hlsl#L97)                        |
28 | 


--------------------------------------------------------------------------------
/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atyuwen/bitangent_noise/01adc0d4cb989edcc0d5cf7dafc77c55a43d49bd/image.png


--------------------------------------------------------------------------------