├── sph_fastest
    ├── Hybrid_Fluid_Simulation
    │   ├── nv_gui.h
    │   ├── ball32.png
    │   ├── gl_main.cpp
    │   ├── sph_kernel.cu
    │   ├── sph_particle.h
    │   ├── pcisph_factor.h
    │   ├── sph_parameter.h
    │   ├── GL_LIB
    │   │   ├── freeglut.dll
    │   │   ├── glew32.dll
    │   │   ├── glew_64
    │   │   │   ├── dll
    │   │   │   │   ├── debug
    │   │   │   │   │   └── glew32d.dll
    │   │   │   │   └── release
    │   │   │   │   │   └── glew32.dll
    │   │   │   └── libs
    │   │   │   │   ├── debug
    │   │   │   │       └── glew32d.lib
    │   │   │   │   └── release
    │   │   │   │       └── glew32.lib
    │   │   └── freeglut_64
    │   │   │   ├── dll
    │   │   │       ├── debug
    │   │   │       │   └── freeglutd.dll
    │   │   │       └── release
    │   │   │       │   └── freeglut.dll
    │   │   │   ├── libs
    │   │   │       ├── debug
    │   │   │       │   └── freeglutd.lib
    │   │   │       └── release
    │   │   │       │   └── freeglut.lib
    │   │   │   └── include
    │   │   │       └── GL
    │   │   │           ├── glut.h
    │   │   │           ├── freeglut.h
    │   │   │           └── freeglut_ext.h
    │   ├── json
    │   │   ├── lib_json.lib
    │   │   └── include
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── json
    │   │   │       ├── json.h
    │   │   │       ├── autolink.h
    │   │   │       ├── forwards.h
    │   │   │       ├── features.h
    │   │   │       ├── assertions.h
    │   │   │       ├── allocator.h
    │   │   │       ├── config.h
    │   │   │       ├── writer.h
    │   │   │       └── reader.h
    │   │   │   └── version.h
    │   ├── pcisph_factor.cpp
    │   ├── sph_arrangement.cu
    │   ├── sph_hybrid_system.cpp
    │   ├── sph_marching_cube.cpp
    │   ├── scene_default1.json
    │   ├── Shader
    │   │   ├── shader.vs
    │   │   └── shader.fs
    │   ├── sph_timer.h
    │   ├── scene_default.json
    │   ├── sph_timer.cpp
    │   ├── sph_header.h
    │   ├── insts_latency.json
    │   ├── sph_data.h
    │   ├── sph_marching_cube.h
    │   ├── Hybrid_Fluid_Simulation.vcxproj.user
    │   ├── gpu_model_reader.h
    │   ├── parameters.h
    │   ├── gpu_model.cuh
    │   ├── cuda_prescan
    │   │   ├── scan.cuh
    │   │   ├── scan_kern.cuh
    │   │   ├── prefix_sum.cu
    │   │   └── scan.cu
    │   ├── parameters.cpp
    │   ├── high_resolution_timer.h
    │   ├── cuda_call_check.h
    │   ├── gl_texture.h
    │   ├── cuda_math.cuh
    │   ├── save_screen.h
    │   ├── sph_hybrid_system.h
    │   ├── sph_utils.cuh
    │   ├── gl_main_header.h
    │   ├── sph_arrangement.cuh
    │   ├── Hybrid_Fluid_Simulation.vcxproj.filters
    │   ├── gpu_model.h
    │   ├── gpu_model_reader.cpp
    │   ├── sph_kernel.cuh
    │   ├── sph_tra_arti_block_statistics.json
    │   ├── gpu_model.cu
    │   ├── Hybrid_Fluid_Simulation.vcxproj
    │   ├── sph_sms_arti_block_statistics.json
    │   └── main.h
    └── Hybrid_Fluid_Simulation.sln
├── .gitignore
└── README.md


/sph_fastest/Hybrid_Fluid_Simulation/nv_gui.h:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/ball32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/ball32.png


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/gl_main.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/gl_main.cpp


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_kernel.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_kernel.cu


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_particle.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_particle.h


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/pcisph_factor.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/pcisph_factor.h


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_parameter.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_parameter.h


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut.dll


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew32.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew32.dll


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/lib_json.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/json/lib_json.lib


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/pcisph_factor.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/pcisph_factor.cpp


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_arrangement.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_arrangement.cu


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_hybrid_system.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_hybrid_system.cpp


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_marching_cube.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_marching_cube.cpp


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/dll/debug/glew32d.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/dll/debug/glew32d.dll


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/dll/release/glew32.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/dll/release/glew32.dll


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/libs/debug/glew32d.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/libs/debug/glew32d.lib


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/libs/release/glew32.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/libs/release/glew32.lib


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/dll/debug/freeglutd.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/dll/debug/freeglutd.dll


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/dll/release/freeglut.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/dll/release/freeglut.dll


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/libs/debug/freeglutd.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/libs/debug/freeglutd.lib


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/libs/release/freeglut.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/libs/release/freeglut.lib


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | file(GLOB INCLUDE_FILES "json/*.h")
2 | install(FILES
3 |     ${INCLUDE_FILES}
4 |     ${PROJECT_BINARY_DIR}/include/json/version.h
5 |     DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/json)
6 | 
7 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/scene_default1.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"fluid_block" : [
 3 | 		{
 4 | 			"begin_x" : 0.001, 
 5 | 			"begin_y" : 0.001,
 6 | 			"begin_z" : 0.001,
 7 | 			"end_x" : 0.55,
 8 | 			"end_y" : 0.55, 
 9 | 			"end_z" : 0.55
10 | 		}
11 | 	], 
12 | 	"mass" : 0.000088,
13 | 	"interval" : 0.105,
14 | 	"recomm_nump" : 15500000
15 | }


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/Shader/shader.vs:
--------------------------------------------------------------------------------
1 | void main()
2 | {
3 | 	vec3 posEye = vec3(gl_ModelViewMatrix * vec4(gl_Vertex.xyz, 1.0));
4 |     float dist = length(posEye);
5 |     gl_PointSize = 200.0/dist;
6 | 	gl_TexCoord[0] = gl_MultiTexCoord0;
7 |     gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
8 |     gl_FrontColor = gl_Color;
9 | }


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SPHTIMER_H__
 2 | #define __SPHTIMER_H__
 3 | 
 4 | #include <windows.h>
 5 | 
 6 | class Timer
 7 | {
 8 | private:
 9 | 	int frames;
10 | 	int update_time;
11 | 	int last_time;
12 | 	double FPS;
13 | 
14 | public:
15 | 	Timer();
16 | 	void update();
17 | 	double get_fps();
18 | };
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/scene_default.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"fluid_block" : [
 3 | 		{
 4 | 			"begin_x" : 0.091, 
 5 | 			"begin_y" : 0.091,
 6 | 			"begin_z" : 0.091,
 7 | 			"end_x" : 0.8,
 8 | 			"end_y" : 0.8, 
 9 | 			"end_z" : 0.8
10 | 		}
11 | 	], 
12 | 	"mass" : 0.00027,
13 | 	"interval" : 0.15,
14 | 	"recomm_nump" : 15500000,
15 | 	"xx": 1.6,
16 | 	"yy": 1.6,
17 | 	"zz": 1.6
18 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | sph_fastest/.vs
 2 | sph_fastest/Hybrid_Fluid_Simulation/x64
 3 | sph_fastest/x64
 4 | sph_fastest/saveScreen
 5 | sph_fastest/*.zip
 6 | sph_fastest/Hybrid_Fluid_Simulation/saveScreen*
 7 | sph_fastest/Hybrid_Fluid_Simulation/saveSurface*
 8 | sph_fastest/Hybrid_Fluid_Simulation/*.mp4
 9 | sph_fastest/Hybrid_Fluid_Simulation/mesh
10 | sph_fastest/Hybrid_Fluid_Simulation/*.txt
11 | sph_fastest/Hybrid_Fluid_Simulation/Release
12 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_timer.cpp:
--------------------------------------------------------------------------------
 1 | #include "sph_timer.h"
 2 | 
 3 | Timer::Timer()
 4 | {
 5 | 	frames=0;
 6 | 	update_time=1000;
 7 | 	last_time=0;
 8 | 	FPS=0;
 9 | }
10 | 
11 | void Timer::update()
12 | {
13 | 	frames++;
14 | 
15 | 	if(GetTickCount()-last_time > update_time) 
16 | 	{
17 | 		FPS=((double)frames/(double)(GetTickCount()-last_time))*1000.0; 
18 | 		last_time=GetTickCount();
19 | 		frames=0;
20 | 	}
21 | }
22 | 
23 | double Timer::get_fps()
24 | {
25 | 	return FPS;
26 | }
27 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/Shader/shader.fs:
--------------------------------------------------------------------------------
 1 | void main()
 2 | {
 3 |     const vec3 lightDir = vec3(0.577, 0.577, 0.577);
 4 | 
 5 |     // calculate normal from texture coordinates
 6 |     vec3 N;
 7 |     N.xy = gl_TexCoord[0].xy*vec2(2.0, -2.0) + vec2(-1.0, 1.0);
 8 |     float mag = dot(N.xy, N.xy);
 9 |     if (mag > 1.0) discard;   // kill pixels outside circle
10 |     N.z = sqrt(1.0-mag);
11 | 
12 |     // calculate lighting
13 |     float diffuse = max(0.0, dot(lightDir, N));
14 | 
15 |     gl_FragColor = gl_Color * diffuse;
16 | }


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_header.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SPHHEADER_H__
 2 | #define __SPHHEADER_H__
 3 | 
 4 | #include <thrust/device_vector.h>
 5 | #include <thrust/copy.h>
 6 | #include <thrust/device_ptr.h>
 7 | #include <thrust/for_each.h>
 8 | #include <thrust/iterator/zip_iterator.h>
 9 | #include <thrust/sort.h>
10 | 
11 | #include <vector>
12 | #include <list>
13 | 
14 | #include <stdio.h>
15 | #include <stdlib.h>
16 | #include <math.h>
17 | #include <time.h>
18 | 
19 | #define PI 3.141592f
20 | #define INF 1E-12f
21 | #define BOUNDARY 0.01f
22 | 
23 | #endif


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/json/json.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
 2 | // Distributed under MIT license, or public domain if desired and
 3 | // recognized in your jurisdiction.
 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
 5 | 
 6 | #ifndef JSON_JSON_H_INCLUDED
 7 | #define JSON_JSON_H_INCLUDED
 8 | 
 9 | #include "autolink.h"
10 | #include "features.h"
11 | #include "reader.h"
12 | #include "value.h"
13 | #include "writer.h"
14 | 
15 | #endif // JSON_JSON_H_INCLUDED
16 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/insts_latency.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"I_ADD_SUB" : 6.047,
 3 | 	"I_MAD_MUL" : 13.164,
 4 | 	"I_DIV_REM" : 255.544,
 5 | 	"I_MIN_MAX" : 12.047,
 6 | 	"I_ABS" : 15.086,
 7 | 	"I_MUL24" : 19.3,
 8 | 	"I_LOGICAL" : 0.145,
 9 | 	"I_SHL_SHR" : 6.051,
10 | 	"I_SAD" : 6.074,
11 | 	"F_ADD_SUB" : 6.047,
12 | 	"F_MAD_MUL" : 6.047,
13 | 	"F_DIV" : 365.641,
14 | 	"F_DIVIDEF" : 34.07,
15 | 	"F_EXP2" : 40.117,
16 | 	"F_LOG2" : 34.07,
17 | 	"F_SIN_COS" : 15.094,
18 | 	"F_SQRT" : 130.840,
19 | 	"F_RSQRT" : 34.07,
20 | 	"F_MIN_MAX" : 12.074,
21 | 	"F_RCP" : 132.703,
22 | 	"DEFAULT" : 10.0
23 | }


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_data.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SPHDATA_H__
 2 | #define __SPHDATA_H__
 3 | 
 4 | #include "sph_header.h"
 5 | 
 6 | float window_width = 1000;
 7 | float window_height = 750;
 8 | 
 9 | float xRot = 0.0f;
10 | float yRot = 0.0f;
11 | float xTrans = 0;
12 | float yTrans = 0;
13 | float zTrans = -175.0;
14 | 
15 | int psize = 12;
16 | int ox;
17 | int oy;
18 | int buttonState;
19 | float xRotLength = 0.0f;
20 | float yRotLength = 0.0f;
21 | 
22 | float3 real_world_origin;
23 | float3 real_world_side;
24 | float3 sim_ratio;
25 | 
26 | float world_width;
27 | float world_height;
28 | float world_length;
29 | 
30 | #endif


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_marching_cube.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // sph_marching_cube.h
 3 | // Hybrid_Parallel_SPH 
 4 | //
 5 | // created by ruanjm on 22/04/16
 6 | // Copyright (c) 2016 ruanjm. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef _SPH_MARCHING_CUBE_H
10 | #define _SPH_MARCHING_CUBE_H
11 | 
12 | #include <vector_functions.h>
13 | #include "sph_parameter.h"
14 | 
15 | namespace sph
16 | {
17 | 
18 | bool generateMesh(float3 *pos, unsigned int nump, SystemParameter *sys_para, unsigned int loop_times);
19 | 
20 | void outputMesh(SystemParameter *sys_para, unsigned int loop_times);
21 | 
22 | }
23 | 
24 | #endif/*_SPH_MARCHING_CUBE_H*/
25 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/Hybrid_Fluid_Simulation.vcxproj.user:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 4 |     <LocalDebuggerDebuggerType>Auto</LocalDebuggerDebuggerType>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 7 |     <LocalDebuggerEnvironment>PATH=$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 | </Project>


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/gpu_model_reader.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // gpu_model_reader.h
 3 | // Hybrid_Parallel_SPH
 4 | //
 5 | // created by kmhuang and ruanjm on 2018/09/01
 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef _GPU_MODEL_READER_H
10 | #define _GPU_MODEL_READER_H
11 | 
12 | #include <string>
13 | #include "gpu_model.h"
14 | 
15 | namespace gpu_model
16 | {
17 | 
18 | unsigned int readPTXStatisticsFromFile(PTXBlockStatistic *&output, const std::string &func_name, const std::string &file_name);
19 | 
20 | void readInstructionLatencyFromFile(InstructionInfo &inst_info, const std::string &file_name);
21 | 
22 | }
23 | 
24 | #endif/*_GPU_MODEL_READER_H*/


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/parameters.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PARAMETERS_H
 2 | #define _PARAMETERS_H
 3 | 
 4 | #include <cuda_runtime.h>
 5 | 
 6 | // common
 7 | extern const bool DEBUG;
 8 | extern const float TIME_STEP;
 9 | extern const float3 WORLD_SIZE;
10 | extern const float3 GRAVITY;
11 | 
12 | // SPH simulation
13 | extern const float KERNAL_RADIUS;
14 | extern const float MASS;
15 | extern const float VICOSITY_COEFFICIENT;
16 | extern const float REST_DENSITY;
17 | extern const float WALL_DAMPING;
18 | extern const float GAS_CONSTANT;
19 | extern const int pcisph_min_loops;
20 | extern const int pcisph_max_loops;
21 | extern const float pcisph_max_density_error_allowed;
22 | 
23 | // Eulerian simulation
24 | extern const int eulerDim[3];
25 | 
26 | #endif /*_PARAMETERS_H*/


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/include/GL/glut.h:
--------------------------------------------------------------------------------
 1 | #ifndef  __GLUT_H__
 2 | #define  __GLUT_H__
 3 | 
 4 | /*
 5 |  * glut.h
 6 |  *
 7 |  * The freeglut library include file
 8 |  *
 9 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
10 |  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
11 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
12 |  * PAWEL W. OLSZTA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 |  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 |  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 |  */
16 | 
17 | #include "freeglut_std.h"
18 | 
19 | /*** END OF FILE ***/
20 | 
21 | #endif /* __GLUT_H__ */
22 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/gpu_model.cuh:
--------------------------------------------------------------------------------
 1 | //
 2 | // gpu_model.cuh
 3 | // Hybrid_Parallel_SPH
 4 | //
 5 | // created by kmhuang and ruanjm on 2018/09/01
 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef _GPU_MODEL_CUH
10 | #define _GPU_MODEL_CUH
11 | 
12 | namespace gpu_model
13 | {
14 | 
15 | struct GPUModel;
16 | 
17 | void allocateGPUModel(GPUModel *&gm);
18 | 
19 | void freeGPUModel(GPUModel *gm);
20 | 
21 | void calculateBlockRequirementSMSMode(int *block_req, int *cell_start, int *cell_end, int block_size, int numc);
22 | 
23 | void calculateBlockRequirementHybridMode(int *cell_type, int *d_cell_num, int *block_req, GPUModel *gm, int *cell_offset, int *cell_num, ushort3 grid_size, int block_size);
24 | 
25 | }
26 | 
27 | #endif/*_GPU_MODEL_CUH*/
28 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/include/GL/freeglut.h:
--------------------------------------------------------------------------------
 1 | #ifndef  __FREEGLUT_H__
 2 | #define  __FREEGLUT_H__
 3 | 
 4 | /*
 5 |  * freeglut.h
 6 |  *
 7 |  * The freeglut library include file
 8 |  *
 9 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
10 |  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
11 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
12 |  * PAWEL W. OLSZTA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 |  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 |  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 |  */
16 | 
17 | #include "freeglut_std.h"
18 | #include "freeglut_ext.h"
19 | 
20 | /*** END OF FILE ***/
21 | 
22 | #endif /* __FREEGLUT_H__ */
23 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/json/autolink.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
 2 | // Distributed under MIT license, or public domain if desired and
 3 | // recognized in your jurisdiction.
 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
 5 | 
 6 | #ifndef JSON_AUTOLINK_H_INCLUDED
 7 | #define JSON_AUTOLINK_H_INCLUDED
 8 | 
 9 | #include "config.h"
10 | 
11 | #ifdef JSON_IN_CPPTL
12 | #include <cpptl/cpptl_autolink.h>
13 | #endif
14 | 
15 | #if !defined(JSON_NO_AUTOLINK) && !defined(JSON_DLL_BUILD) &&                  \
16 |     !defined(JSON_IN_CPPTL)
17 | #define CPPTL_AUTOLINK_NAME "json"
18 | #undef CPPTL_AUTOLINK_DLL
19 | #ifdef JSON_DLL
20 | #define CPPTL_AUTOLINK_DLL
21 | #endif
22 | #include "autolink.h"
23 | #endif
24 | 
25 | #endif // JSON_AUTOLINK_H_INCLUDED
26 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/cuda_prescan/scan.cuh:
--------------------------------------------------------------------------------
 1 | //
 2 | // scan.cuh
 3 | // prefix_sum 
 4 | //
 5 | // created by ruanjm on 16/12/15
 6 | // Copyright (c) 2015 ruanjm. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef _SCAN_CUH
10 | #define _SCAN_CUH
11 | 
12 | extern "C"
13 | {
14 | 
15 |     void prefixSumToGPU(char* inArray, int num, int siz);
16 |     void prefixSumFromGPU(char* outArray, int num, int siz);
17 |     void prefixSum(int num);
18 |     void prefixSumInt(int num);
19 |     void preallocBlockSumsInt(unsigned int num);
20 |     void deallocBlockSumsInt();
21 |     void prescanArray(float* outArray, float* inArray, int numElements);
22 |     void prescanArrayInt(int* outArray, int* inArray, int numElements);
23 |     void prescanArrayRecursiveInt(int *outArray, const int *inArray, int numElements, int level);
24 | 
25 | }
26 | 
27 | #endif/*_SCAN_CUH*/


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/version.h:
--------------------------------------------------------------------------------
 1 | // DO NOT EDIT. This file (and "version") is a template used by the build system
 2 | // (either CMake or Meson) to generate a "version.h" header file.
 3 | #ifndef JSON_VERSION_H_INCLUDED
 4 | #define JSON_VERSION_H_INCLUDED
 5 | 
 6 | #define JSONCPP_VERSION_STRING "1.9.0"
 7 | #define JSONCPP_VERSION_MAJOR 1
 8 | #define JSONCPP_VERSION_MINOR 9
 9 | #define JSONCPP_VERSION_PATCH 0
10 | #define JSONCPP_VERSION_QUALIFIER
11 | #define JSONCPP_VERSION_HEXA ((JSONCPP_VERSION_MAJOR << 24) \
12 |                             | (JSONCPP_VERSION_MINOR << 16) \
13 |                             | (JSONCPP_VERSION_PATCH << 8))
14 | 
15 | #ifdef JSONCPP_USING_SECURE_MEMORY
16 | #undef JSONCPP_USING_SECURE_MEMORY
17 | #endif
18 | #define JSONCPP_USING_SECURE_MEMORY 0
19 | // If non-zero, the library zeroes any memory that it has allocated before
20 | // it frees its memory.
21 | 
22 | #endif // JSON_VERSION_H_INCLUDED
23 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/json/forwards.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
 2 | // Distributed under MIT license, or public domain if desired and
 3 | // recognized in your jurisdiction.
 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
 5 | 
 6 | #ifndef JSON_FORWARDS_H_INCLUDED
 7 | #define JSON_FORWARDS_H_INCLUDED
 8 | 
 9 | #if !defined(JSON_IS_AMALGAMATION)
10 | #include "config.h"
11 | #endif // if !defined(JSON_IS_AMALGAMATION)
12 | 
13 | namespace Json {
14 | 
15 | // writer.h
16 | class StreamWriter;
17 | class StreamWriterBuilder;
18 | class Writer;
19 | class FastWriter;
20 | class StyledWriter;
21 | class StyledStreamWriter;
22 | 
23 | // reader.h
24 | class Reader;
25 | class CharReader;
26 | class CharReaderBuilder;
27 | 
28 | // features.h
29 | class Features;
30 | 
31 | // value.h
32 | typedef unsigned int ArrayIndex;
33 | class StaticString;
34 | class Path;
35 | class PathArgument;
36 | class Value;
37 | class ValueIteratorBase;
38 | class ValueIterator;
39 | class ValueConstIterator;
40 | 
41 | } // namespace Json
42 | 
43 | #endif // JSON_FORWARDS_H_INCLUDED
44 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/parameters.cpp:
--------------------------------------------------------------------------------
 1 | #include "parameters.h"
 2 | 
 3 | ///////////////////////////////////////////////////////////////////////////////////
 4 | // common
 5 | ///////////////////////////////////////////////////////////////////////////////////
 6 | const bool DEBUG = false;
 7 | const float TIME_STEP = 0.004;
 8 | const float3 WORLD_SIZE = make_float3(4, 4, 4);
 9 | const float3 GRAVITY = make_float3(0.0f, -9.8f, 0.0f);
10 | 
11 | ///////////////////////////////////////////////////////////////////////////////////
12 | // SPH simulation
13 | ///////////////////////////////////////////////////////////////////////////////////
14 | const float KERNAL_RADIUS = 0.03f;
15 | const float MASS = 0.002f;
16 | const float VICOSITY_COEFFICIENT = 10.0f;
17 | const float REST_DENSITY = 1000.0f;
18 | const float WALL_DAMPING = -0.5f;
19 | const float GAS_CONSTANT = 1.0f;
20 | 
21 | const int pcisph_min_loops = 3;
22 | const int pcisph_max_loops = 5;
23 | const float pcisph_max_density_error_allowed = 10.0f;
24 | 
25 | ///////////////////////////////////////////////////////////////////////////////////
26 | // Eulerian simulation
27 | ///////////////////////////////////////////////////////////////////////////////////
28 | const int eulerDim[3] = {16, 16, 16};


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/high_resolution_timer.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // high_resolution_timer.h
 3 | // Heterogeneous_SPH 
 4 | //
 5 | // created by ruanjm on 09/07/15
 6 | // Copyright (c) 2015 ruanjm. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef _HIGH_RESOLUTION_TIMER_H
10 | #define _HIGH_RESOLUTION_TIMER_H
11 | 
12 | class HighResolutionTimer
13 | {
14 | public:
15 |     virtual void set_start() = 0;
16 |     virtual void set_end() = 0;
17 |     virtual float get_millisecond() = 0;
18 | };
19 | 
20 | #ifdef WIN32
21 | 
22 | #include <windows.h>
23 | 
24 | class HighResolutionTimerForWin : public HighResolutionTimer
25 | {
26 | public:
27 | 
28 |     HighResolutionTimerForWin(){
29 |         QueryPerformanceFrequency(&freq_);
30 |         start_.QuadPart = 0;
31 |         end_.QuadPart = 0;
32 |     }
33 | 
34 |     void set_start(){
35 |         QueryPerformanceCounter(&start_);
36 |     }
37 | 
38 |     void set_end(){
39 |         QueryPerformanceCounter(&end_);
40 |     }
41 | 
42 |     float get_millisecond(){
43 |         return static_cast<float>((end_.QuadPart - start_.QuadPart) * 1000 / (float)freq_.QuadPart);
44 |     }
45 | 
46 | private:
47 |     LARGE_INTEGER freq_;
48 |     LARGE_INTEGER start_, end_;
49 | };
50 | 
51 | #endif // WIN32
52 | 
53 | #endif/*_HIGH_RESOLUTION_TIMER_H*/


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/cuda_call_check.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // cuda_call_check.h
 3 | // cuda texture tester 
 4 | //
 5 | // created by ruanjm on 12/03/15
 6 | // Copyright (c) 2015 ruanjm. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef _CUDA_CALL_CHECK_H
10 | #define _CUDA_CALL_CHECK_H
11 | 
12 | #include <cuda_runtime.h>
13 | #include <iostream>
14 | 
15 | #define CUDA_SAFE_CALL(err)     cuda_safe_call_(err, __FILE__, __LINE__)
16 | #define CUDA_KERNEL_CHECK(err)  cuda_kernel_check_(err, __FILE__, __LINE__)
17 | 
18 | inline void cuda_safe_call_(cudaError err, const char *file_name, const int num_line)
19 | {
20 |     if (cudaSuccess != err)
21 |     {
22 |         exit(0);
23 |         std::cerr << file_name << "[" << num_line << "]: "
24 |             << "CUDA Running API error[" << (int)err << "]: "
25 |             << cudaGetErrorString(err) << std::endl;
26 |     }
27 | }
28 | 
29 | inline void cuda_kernel_check_(const char *error_msg, const char *file_name, const int num_line)
30 | {
31 |     cudaError_t err = cudaDeviceSynchronize();
32 |     if (cudaSuccess != err)
33 |     {
34 |         exit(0);
35 |         std::cerr << file_name << "[" << num_line << "]: "
36 |             << (error_msg == nullptr ? "NONE" : error_msg)
37 |             << "[" << (int)err << "]: "
38 |             << cudaGetErrorString(err) << std::endl;
39 |     }
40 | }
41 | 
42 | #endif/*_CUDA_CALL_CHECK_H*/


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation.sln:
--------------------------------------------------------------------------------
 1 | 
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 2013
 4 | VisualStudioVersion = 12.0.30501.0
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Hybrid_Fluid_Simulation", "Hybrid_Fluid_Simulation\Hybrid_Fluid_Simulation.vcxproj", "{BC50E9FA-E95F-4E72-9F2B-D45567958A71}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Win32 = Debug|Win32
11 | 		Debug|x64 = Debug|x64
12 | 		Release|Win32 = Release|Win32
13 | 		Release|x64 = Release|x64
14 | 	EndGlobalSection
15 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | 		{BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Debug|Win32.ActiveCfg = Debug|Win32
17 | 		{BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Debug|Win32.Build.0 = Debug|Win32
18 | 		{BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Debug|x64.ActiveCfg = Debug|x64
19 | 		{BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Debug|x64.Build.0 = Debug|x64
20 | 		{BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Release|Win32.ActiveCfg = Release|Win32
21 | 		{BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Release|Win32.Build.0 = Release|Win32
22 | 		{BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Release|x64.ActiveCfg = Release|x64
23 | 		{BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Release|x64.Build.0 = Release|x64
24 | 	EndGlobalSection
25 | 	GlobalSection(SolutionProperties) = preSolution
26 | 		HideSolutionNode = FALSE
27 | 	EndGlobalSection
28 | EndGlobal
29 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/cuda_prescan/scan_kern.cuh:
--------------------------------------------------------------------------------
 1 | //
 2 | // scan_kern.cuh
 3 | // prefix_sum 
 4 | //
 5 | // created by ruanjm on 16/12/15
 6 | // Copyright (c) 2015 ruanjm. All rights reserved.
 7 | //
 8 | 
 9 | 
10 | #ifndef _SCAN_KERN_CUH
11 | #define _SCAN_KERN_CUH
12 | 
13 | #define max(a,b)            (((a) > (b)) ? (a) : (b))
14 | 
15 | #define NUM_BANKS   32
16 | #define BLOCK_SIZE  256
17 | 
18 | /* prefix sum */
19 | #include "prefix_sum.cu"
20 | // NOTE: Template functions must be defined in the header
21 | template <bool storeSum, bool isNP2> __global__ void prescan(float *g_odata, const float *g_idata, float *g_blockSums, int n, int blockIndex, int baseIndex) {
22 |     int ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB;
23 |     extern __shared__ float s_data[];
24 |     loadSharedChunkFromMem<isNP2>(s_data, g_idata, n, (baseIndex == 0) ? __mul24(blockIdx.x, (blockDim.x << 1)) : baseIndex, ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB);
25 |     prescanBlock<storeSum>(s_data, blockIndex, g_blockSums);
26 |     storeSharedChunkToMem<isNP2>(g_odata, s_data, n, ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB);
27 | }
28 | template <bool storeSum, bool isNP2> __global__ void prescanInt(int *g_odata, const int *g_idata, int *g_blockSums, int n, int blockIndex, int baseIndex) {
29 |     int ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB;
30 |     extern __shared__ int s_dataInt[];
31 |     loadSharedChunkFromMemInt <isNP2>(s_dataInt, g_idata, n, (baseIndex == 0) ? __mul24(blockIdx.x, (blockDim.x << 1)) : baseIndex, ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB);
32 |     prescanBlockInt<storeSum>(s_dataInt, blockIndex, g_blockSums);
33 |     storeSharedChunkToMemInt <isNP2>(g_odata, s_dataInt, n, ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB);
34 | }
35 | __global__ void uniformAddInt(int*  g_data, int *uniforms, int n, int blockOffset, int baseIndex);
36 | __global__ void uniformAdd(float*g_data, float *uniforms, int n, int blockOffset, int baseIndex);
37 | 
38 | 
39 | #endif/*_SCAN_KERN_CUH*/


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/json/features.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
 2 | // Distributed under MIT license, or public domain if desired and
 3 | // recognized in your jurisdiction.
 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
 5 | 
 6 | #ifndef CPPTL_JSON_FEATURES_H_INCLUDED
 7 | #define CPPTL_JSON_FEATURES_H_INCLUDED
 8 | 
 9 | #if !defined(JSON_IS_AMALGAMATION)
10 | #include "forwards.h"
11 | #endif // if !defined(JSON_IS_AMALGAMATION)
12 | 
13 | #pragma pack(push, 8)
14 | 
15 | namespace Json {
16 | 
17 | /** \brief Configuration passed to reader and writer.
18 |  * This configuration object can be used to force the Reader or Writer
19 |  * to behave in a standard conforming way.
20 |  */
21 | class JSON_API Features {
22 | public:
23 |   /** \brief A configuration that allows all features and assumes all strings
24 |    * are UTF-8.
25 |    * - C & C++ comments are allowed
26 |    * - Root object can be any JSON value
27 |    * - Assumes Value strings are encoded in UTF-8
28 |    */
29 |   static Features all();
30 | 
31 |   /** \brief A configuration that is strictly compatible with the JSON
32 |    * specification.
33 |    * - Comments are forbidden.
34 |    * - Root object must be either an array or an object value.
35 |    * - Assumes Value strings are encoded in UTF-8
36 |    */
37 |   static Features strictMode();
38 | 
39 |   /** \brief Initialize the configuration like JsonConfig::allFeatures;
40 |    */
41 |   Features();
42 | 
43 |   /// \c true if comments are allowed. Default: \c true.
44 |   bool allowComments_{true};
45 | 
46 |   /// \c true if root must be either an array or an object value. Default: \c
47 |   /// false.
48 |   bool strictRoot_{false};
49 | 
50 |   /// \c true if dropped null placeholders are allowed. Default: \c false.
51 |   bool allowDroppedNullPlaceholders_{false};
52 | 
53 |   /// \c true if numeric object key are allowed. Default: \c false.
54 |   bool allowNumericKeys_{false};
55 | };
56 | 
57 | } // namespace Json
58 | 
59 | #pragma pack(pop)
60 | 
61 | #endif // CPPTL_JSON_FEATURES_H_INCLUDED
62 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fast-SPH-framework
 2 | This framework represents a fast general implementation of GPU SPH method utilizing the uniform grid approach.
 3 | 
 4 | DESCRIPTION
 5 | ===========
 6 | This project is the source code of ["Novel Hierarchical Strategies for SPH-centric Algorithms on GPGPU"](https://doi.org/10.1016/j.gmod.2020.101088)
 7 | and ["A General Novel Parallel Framework for SPH-centric Algorithms"](https://dl.acm.org/doi/10.1145/3321360). 
 8 | 
 9 | This project offers fast optimization strategies based on uniform grid. When compared to a well-optimized GPU SPH method based on the uniform grid, the method proposed in the papers demonstrates a significant speed improvement of up to 3.5 times. As a result, it serves as an excellent benchmark for conducting further research on GPU SPH and facilitates meaningful comparisons.
10 | 
11 | 
12 | Source code contributor: [Kemeng Huang](https://kemenghuang.github.io), Jiming Ruan
13 | 
14 | **Note: this software is released under the MPLv2.0 license. For commercial use, please email authors for negotiation.**
15 | 
16 | ## BibTex 
17 | 
18 | Please cite the following papers if it helps. 
19 | 
20 | 
21 | ```
22 | @article{HUANG2020101088,
23 |   title = {Novel hierarchical strategies for SPH-centric algorithms on GPGPU},
24 |   journal = {Graphical Models},
25 |   volume = {111},
26 |   pages = {101088},
27 |   year = {2020},
28 |   issn = {1524-0703},
29 |   doi = {https://doi.org/10.1016/j.gmod.2020.101088},
30 |   url = {https://www.sciencedirect.com/science/article/pii/S152407032030028X},
31 |   author = {Kemeng Huang and Zipeng Zhao and Chen Li and Changbo Wang and Hong Qin}
32 | }
33 | ```
34 | 
35 | 
36 | ```
37 | @article{10.1145/3321360,
38 |   author = {Huang, Kemeng and Ruan, Jiming and Zhao, Zipeng and Li, Chen and Wang, Changbo and Qin, Hong},
39 |   title = {A General Novel Parallel Framework for SPH-Centric Algorithms},
40 |   year = {2019},
41 |   issue_date = {May 2019},
42 |   publisher = {Association for Computing Machinery},
43 |   address = {New York, NY, USA},
44 |   volume = {2},
45 |   number = {1},
46 |   url = {https://doi.org/10.1145/3321360},
47 |   doi = {10.1145/3321360},
48 |   journal = {Proc. ACM Comput. Graph. Interact. Tech.},
49 |   month = {jun},
50 |   articleno = {7},
51 |   numpages = {16}
52 | }
53 | ```
54 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/gl_texture.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // gl_texture.h
 3 | // Hybrid_Parallel_SPH
 4 | //
 5 | // created by ruanjm on 2016/05/01
 6 | // Copyright (c) 2016 ruanjm. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef _GL_TEXTURE_H
10 | #define _GL_TEXTURE_H
11 | 
12 | #include <vector>
13 | #include <GL\glew.h>
14 | #include "lodepng.h"
15 | 
16 | #define IMG_RGB			0
17 | #define IMG_RGBA		1
18 | #define IMG_LUM			2
19 | 
20 | class PNGTexture
21 | {
22 | public:
23 |     ~PNGTexture(){
24 |         if (data_) free(data_);
25 |     }
26 | 
27 |     bool loadPNG(const char *path){
28 |         std::vector<unsigned char> out;
29 |         unsigned int w, h;
30 | 
31 |         unsigned error = lodepng::decode(out, w, h, path);
32 |         if (error)
33 |         {
34 |             printf("can not decode %s\n", path);
35 |             return false;
36 |         }
37 | 
38 |         x_resolution_ = w;
39 |         y_resolution_ = h;
40 |         size_ = 4 * w * h;
41 |         format_ = IMG_RGBA;
42 | 
43 |         if (data_) free(data_);
44 |         data_ = (unsigned int*)malloc(size_);
45 |         memcpy(data_, &out[0], size_);
46 | 
47 |         updateTexture();
48 | 
49 |         return true;
50 |     }
51 | 
52 |     GLuint get_texture(){
53 |         return texture_;
54 |     }
55 | 
56 | private:
57 |     void updateTexture(){
58 |         if (texture_) glDeleteTextures(1, &texture_);
59 | 
60 |         glGenTextures(1, &texture_);
61 |         glBindTexture(GL_TEXTURE_2D, texture_);
62 | 
63 |         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
64 |         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
65 |         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
66 |         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
67 | 
68 |         GLenum fmt;
69 |         int size;
70 |         switch (format_) {
71 |         case IMG_RGB:	fmt = GL_RGB; size = 3;			break;
72 |         case IMG_RGBA:	fmt = GL_RGBA; size = 4;		break;
73 |         case IMG_LUM:	fmt = GL_LUMINANCE; size = 1;	break;
74 |         }
75 | 
76 |         glTexImage2D(GL_TEXTURE_2D, 0, fmt, x_resolution_, y_resolution_, 0, fmt, GL_UNSIGNED_BYTE, data_);
77 |     }
78 | 
79 |     GLuint texture_ = 0;
80 |     unsigned int x_resolution_;
81 |     unsigned int y_resolution_;
82 |     unsigned int size_;
83 |     unsigned int format_;
84 |     unsigned int *data_ = nullptr;
85 | };
86 | 
87 | #endif/*_GL_TEXTURE_H*/
88 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/cuda_math.cuh:
--------------------------------------------------------------------------------
 1 | //
 2 | // cuda_math.cuh
 3 | // Heterogeneous_SPH
 4 | //
 5 | // created by kmhuang and ruanjm on 2018/09/01
 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef _CUDA_MATH_H
10 | #define _CUDA_MATH_H
11 | 
12 | #include <cuda_runtime.h>
13 | 
14 | #define  kFloatSmall    (1e-12f)
15 | 
16 | __host__ __device__ 
17 | inline uint3 operator+(const uint3 &a, const uint3 &b)
18 | {
19 |     return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
20 | }
21 | 
22 | __host__ __device__
23 | inline uint3 operator+(const uint3 &a, const int3 &b)
24 | {
25 |     return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
26 | }
27 | 
28 | __host__ __device__
29 | inline int3 operator+(const int3 &a, const int3 &b)
30 | {
31 |     return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
32 | }
33 | 
34 | __host__ __device__
35 | inline ushort3 operator+(const ushort3 &a, const ushort3 &b)
36 | {
37 | 	return make_ushort3(a.x + b.x, a.y + b.y, a.z + b.z);
38 | }
39 | 
40 | __host__ __device__
41 | inline float3 operator+(const float3 &a, const float3 &b)
42 | {
43 |     return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
44 | }
45 | 
46 | __host__ __device__
47 | inline float3 operator-(const float3 &a, const float3 &b)
48 | {
49 |     return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
50 | }
51 | 
52 | __host__ __device__
53 | inline float3 operator*(const float3 &a, float b)
54 | {
55 |     return make_float3(a.x * b, a.y * b, a.z * b);
56 | }
57 | 
58 | __host__ __device__
59 | inline float3 operator*(const float3 &a, const float3 &b)
60 | {
61 |     return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
62 | }
63 | 
64 | __host__ __device__
65 | inline float3 operator/(const float3 &a, float b)
66 | {
67 |     return make_float3(a.x / b, a.y / b, a.z / b);
68 | }
69 | 
70 | __host__ __device__
71 | inline void operator-=(float3 &a, const float3 &b)
72 | {
73 |     a.x -= b.x; a.y -= b.y; a.z -= b.z;
74 | }
75 | 
76 | __host__ __device__
77 | inline void operator+=(float3 &a, const float3 &b)
78 | {
79 |     a.x += b.x; a.y += b.y; a.z += b.z;
80 | }
81 | 
82 | __host__ __device__
83 | inline void operator*=(float3 &a, const float b)
84 | {
85 | 	a.x *= b; a.y *= b; a.z *= b;
86 | }
87 | 
88 | __host__ __device__
89 | inline float distance_square(const float4 &a, const float4 &b)
90 | {
91 |     float deltax = a.x - b.x;
92 |     float deltay = a.y - b.y;
93 |     float deltaz = a.z - b.z;
94 |     return deltax* deltax + deltay * deltay + deltaz * deltaz;
95 | }
96 | 
97 | #endif/*_CUDA_MATH_H*/


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/save_screen.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // save_screen.h
 3 | // Heterogeneous_SPH
 4 | //
 5 | // created by ruanjm on 03/10/15
 6 | // Copyright (c) 2015 ruanjm. All right reserved.
 7 | //
 8 | 
 9 | #ifndef _SAVE_SCREEN_H
10 | #define _SAVE_SCREEN_H
11 | 
12 | #include <GL\GL.h>
13 | #include <windows.h>
14 | #include <string>
15 | 
16 | #define BITMAP_ID 0x4D42        // the universal bitmap ID  
17 | 
18 | BITMAPINFOHEADER    bitmapInfoHeader;
19 | 
20 | bool WriteBitmapFile(int width, int height, const std::string &file_name, unsigned char *bitmapData)
21 | { 
22 |     BITMAPFILEHEADER bitmapFileHeader;
23 |     memset(&bitmapFileHeader, 0, sizeof(BITMAPFILEHEADER));
24 |     bitmapFileHeader.bfSize = sizeof(BITMAPFILEHEADER);
25 |     bitmapFileHeader.bfType = 0x4d42;   //BM  
26 |     bitmapFileHeader.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER);
27 | 
28 |     BITMAPINFOHEADER bitmapInfoHeader;
29 |     memset(&bitmapInfoHeader, 0, sizeof(BITMAPINFOHEADER));
30 |     bitmapInfoHeader.biSize = sizeof(BITMAPINFOHEADER);
31 |     bitmapInfoHeader.biWidth = width;
32 |     bitmapInfoHeader.biHeight = height;
33 |     bitmapInfoHeader.biPlanes = 1;
34 |     bitmapInfoHeader.biBitCount = 24;
35 |     bitmapInfoHeader.biCompression = BI_RGB;
36 |     bitmapInfoHeader.biSizeImage = width * abs(height) * 3;
37 | 
38 |     //////////////////////////////////////////////////////////////////////////  
39 |     FILE * filePtr;        
40 |     unsigned char tempRGB;  
41 |     int imageIdx;
42 | 
43 |     for (imageIdx = 0; imageIdx < (int)bitmapInfoHeader.biSizeImage; imageIdx += 3)
44 |     {
45 |         tempRGB = bitmapData[imageIdx];
46 |         bitmapData[imageIdx] = bitmapData[imageIdx + 2];
47 |         bitmapData[imageIdx + 2] = tempRGB;
48 |     }
49 | 
50 |     filePtr = fopen(file_name.c_str(), "wb");
51 |     if (NULL == filePtr)
52 |     {
53 |         return false;
54 |     }
55 | 
56 |     fwrite(&bitmapFileHeader, sizeof(BITMAPFILEHEADER), 1, filePtr);
57 | 
58 |     fwrite(&bitmapInfoHeader, sizeof(BITMAPINFOHEADER), 1, filePtr);
59 | 
60 |     fwrite(bitmapData, bitmapInfoHeader.biSizeImage, 1, filePtr);
61 | 
62 |     fclose(filePtr);
63 |     return true;
64 | }
65 | 
66 | void SaveScreenShot(int width, int height, const std::string &file_name)
67 | {
68 |     int data_len = height * width * 3;      // bytes
69 |     void *screen_data = malloc(data_len);
70 |     memset(screen_data, 0, data_len);
71 |     glReadPixels(0, 0, width, height, GL_RGB, GL_UNSIGNED_BYTE, screen_data);
72 | 
73 |     WriteBitmapFile(width, height, file_name + ".bmp", (unsigned char*)screen_data);
74 | 
75 |     free(screen_data);
76 | }
77 | 
78 | 
79 | #endif/*_SAVE_SCREEN_H*/


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/json/assertions.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
 2 | // Distributed under MIT license, or public domain if desired and
 3 | // recognized in your jurisdiction.
 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
 5 | 
 6 | #ifndef CPPTL_JSON_ASSERTIONS_H_INCLUDED
 7 | #define CPPTL_JSON_ASSERTIONS_H_INCLUDED
 8 | 
 9 | #include <cstdlib>
10 | #include <sstream>
11 | 
12 | #if !defined(JSON_IS_AMALGAMATION)
13 | #include "config.h"
14 | #endif // if !defined(JSON_IS_AMALGAMATION)
15 | 
16 | /** It should not be possible for a maliciously designed file to
17 |  *  cause an abort() or seg-fault, so these macros are used only
18 |  *  for pre-condition violations and internal logic errors.
19 |  */
20 | #if JSON_USE_EXCEPTION
21 | 
22 | // @todo <= add detail about condition in exception
23 | #define JSON_ASSERT(condition)                                                 \
24 |   {                                                                            \
25 |     if (!(condition)) {                                                        \
26 |       Json::throwLogicError("assert json failed");                             \
27 |     }                                                                          \
28 |   }
29 | 
30 | #define JSON_FAIL_MESSAGE(message)                                             \
31 |   {                                                                            \
32 |     OStringStream oss;                                                         \
33 |     oss << message;                                                            \
34 |     Json::throwLogicError(oss.str());                                          \
35 |     abort();                                                                   \
36 |   }
37 | 
38 | #else // JSON_USE_EXCEPTION
39 | 
40 | #define JSON_ASSERT(condition) assert(condition)
41 | 
42 | // The call to assert() will show the failure message in debug builds. In
43 | // release builds we abort, for a core-dump or debugger.
44 | #define JSON_FAIL_MESSAGE(message)                                             \
45 |   {                                                                            \
46 |     OStringStream oss;                                                         \
47 |     oss << message;                                                            \
48 |     assert(false && oss.str().c_str());                                        \
49 |     abort();                                                                   \
50 |   }
51 | 
52 | #endif
53 | 
54 | #define JSON_ASSERT_MESSAGE(condition, message)                                \
55 |   if (!(condition)) {                                                          \
56 |     JSON_FAIL_MESSAGE(message);                                                \
57 |   }
58 | 
59 | #endif // CPPTL_JSON_ASSERTIONS_H_INCLUDED
60 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_hybrid_system.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // sph_hybrid_system.h
  3 | // Hybrid_Parallel_SPH
  4 | //
  5 | // created by kmhuang and ruanjm on 2018/09/01
  6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef _SPH_HYBRID_SYSTEM_H
 10 | #define _SPH_HYBRID_SYSTEM_H
 11 | 
 12 | #include <memory>
 13 | #include <string>
 14 | #include <utility>
 15 | #include <vector>
 16 | #include <cuda_runtime.h>
 17 | #include <GL/glew.h>
 18 | #include "gl_texture.h"
 19 | #include "high_resolution_timer.h"
 20 | #include "sph_arrangement.cuh"
 21 | #include "sph_parameter.h"
 22 | #include "sph_particle.h"
 23 | 
 24 | typedef unsigned int uint;
 25 | 
 26 | namespace sph
 27 | {
 28 | 
 29 | const int kDefaultNumParticles = 65536;
 30 | 
 31 | struct Scene
 32 | {
 33 |     std::vector<std::pair<float3, float3>> fluid_blocks;
 34 |     float interval = 0.5f;
 35 |     float mass = 0.02f;
 36 |     uint recomm_nump = kDefaultNumParticles;
 37 | 	float x, y, z;
 38 | };
 39 | 
 40 | class HybridSystem
 41 | {
 42 | public:
 43 |     HybridSystem(const float3 &real_world_side, const float3 &sim_origin);
 44 |     ~HybridSystem();
 45 | 
 46 |     void tick();                            
 47 |     void setPause();
 48 |     bool isRunning();
 49 |     uint getNumParticles();
 50 |     float3 getPosition(uint idx);
 51 | 
 52 | 	void insertParticles(unsigned int type);
 53 | 
 54 |     void drawParticles(float rad, int size);
 55 |     void drawInfo(GLdouble w, GLdouble h);
 56 | 	int loop;
 57 | 
 58 | private:
 59 |     void initializeScene(const std::string &file_name, Scene scene);
 60 |     void initializeScene2(const std::string &file_name);
 61 |     void resetBuffer(uint nump);
 62 | 	void addParticle2(float3 position, float3 velocity, condition phase, float temperature);
 63 |     void addParticle(float3 position, float3 velocity = make_float3(0.0f, 0.0f, 0.0f), int color_type = 1);
 64 |     bool is_running_ = false;
 65 |     uint nump_ = 0U;
 66 |     uint buff_capacity_ = 0U;
 67 |     ParticleBufferObject host_buff_;
 68 |     ParticleBufferObject device_buff_;
 69 |     ParticleBufferObject device_buff_temp_;
 70 | 
 71 |     ParticleBufferObject device_buff_data_;
 72 | 
 73 |     SystemParameter sys_para_;
 74 |     //std::unique_ptr<Arrangement> arrangement_;
 75 | 	Arrangement *arrangement_;
 76 | 	float particle_interval = 0.5f;
 77 |     HighResolutionTimerForWin frame_timer_;
 78 |     bool get_detailed_time_;
 79 |     float total_time_;
 80 |     float pre_time_, density_time_, force_time_;
 81 |     bool generate_mesh_;
 82 | 	bool add_smoke_;
 83 | 
 84 |     // render
 85 |     PNGTexture particle_texture_;
 86 |     GLuint position_vbo_;
 87 |     GLuint color_vbo_;
 88 | 
 89 | 	// action
 90 | 	void action1();
 91 | 	bool action1_ = false;
 92 | 
 93 | 	//sf add
 94 | 	float pcisph_density_factor;
 95 | };
 96 | 
 97 | }
 98 | 
 99 | #endif/*_SPH_HYBRID_SYSTEM_H*/
100 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/json/allocator.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
 2 | // Distributed under MIT license, or public domain if desired and
 3 | // recognized in your jurisdiction.
 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
 5 | 
 6 | #ifndef CPPTL_JSON_ALLOCATOR_H_INCLUDED
 7 | #define CPPTL_JSON_ALLOCATOR_H_INCLUDED
 8 | 
 9 | #include <cstring>
10 | #include <memory>
11 | 
12 | #pragma pack(push, 8)
13 | 
14 | namespace Json {
15 | template <typename T> class SecureAllocator {
16 | public:
17 |   // Type definitions
18 |   using value_type = T;
19 |   using pointer = T*;
20 |   using const_pointer = const T*;
21 |   using reference = T&;
22 |   using const_reference = const T&;
23 |   using size_type = std::size_t;
24 |   using difference_type = std::ptrdiff_t;
25 | 
26 |   /**
27 |    * Allocate memory for N items using the standard allocator.
28 |    */
29 |   pointer allocate(size_type n) {
30 |     // allocate using "global operator new"
31 |     return static_cast<pointer>(::operator new(n * sizeof(T)));
32 |   }
33 | 
34 |   /**
35 |    * Release memory which was allocated for N items at pointer P.
36 |    *
37 |    * The memory block is filled with zeroes before being released.
38 |    * The pointer argument is tagged as "volatile" to prevent the
39 |    * compiler optimizing out this critical step.
40 |    */
41 |   void deallocate(volatile pointer p, size_type n) {
42 |     std::memset(p, 0, n * sizeof(T));
43 |     // free using "global operator delete"
44 |     ::operator delete(p);
45 |   }
46 | 
47 |   /**
48 |    * Construct an item in-place at pointer P.
49 |    */
50 |   template <typename... Args> void construct(pointer p, Args&&... args) {
51 |     // construct using "placement new" and "perfect forwarding"
52 |     ::new (static_cast<void*>(p)) T(std::forward<Args>(args)...);
53 |   }
54 | 
55 |   size_type max_size() const { return size_t(-1) / sizeof(T); }
56 | 
57 |   pointer address(reference x) const { return std::addressof(x); }
58 | 
59 |   const_pointer address(const_reference x) const { return std::addressof(x); }
60 | 
61 |   /**
62 |    * Destroy an item in-place at pointer P.
63 |    */
64 |   void destroy(pointer p) {
65 |     // destroy using "explicit destructor"
66 |     p->~T();
67 |   }
68 | 
69 |   // Boilerplate
70 |   SecureAllocator() {}
71 |   template <typename U> SecureAllocator(const SecureAllocator<U>&) {}
72 |   template <typename U> struct rebind { using other = SecureAllocator<U>; };
73 | };
74 | 
75 | template <typename T, typename U>
76 | bool operator==(const SecureAllocator<T>&, const SecureAllocator<U>&) {
77 |   return true;
78 | }
79 | 
80 | template <typename T, typename U>
81 | bool operator!=(const SecureAllocator<T>&, const SecureAllocator<U>&) {
82 |   return false;
83 | }
84 | 
85 | } // namespace Json
86 | 
87 | #pragma pack(pop)
88 | 
89 | #endif // CPPTL_JSON_ALLOCATOR_H_INCLUDED
90 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_utils.cuh:
--------------------------------------------------------------------------------
 1 | //
 2 | // sph_utils.cuh
 3 | // Hybrid_Parallel_SPH
 4 | //
 5 | // created by kmhuang and ruanjm on 2018/09/01
 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef _SPH_UTILS_CUH
10 | #define _SPH_UTILS_CUH
11 | 
12 | #include <math.h>
13 | #include <cuda_runtime.h>
14 | 
15 | namespace sph
16 | {
17 | 
18 | 	const int kInvalidCellIdx = 0xffffffff;
19 | 
20 | 	__device__ __host__
21 | 		inline int ceil_int(int a, int b) { return (a + b - 1) / b; }
22 | 
23 | 	__device__
24 | 		inline ushort3 ParticlePos2CellPos(const float4 &pos, float cell_size)
25 | 	{
26 | 			return make_ushort3(floorf(pos.x / cell_size),
27 | 				floorf(pos.y / cell_size),
28 | 				floorf(pos.z / cell_size));
29 | 		}
30 | 	__device__
31 | 		inline ushort3 ParticlePos2CellPosM(const float4 &pos, float cell_size)
32 | 	{
33 | 			float rat = 4.f / cell_size;
34 | 			return make_ushort3(floorf(pos.x *rat),
35 | 				floorf(pos.y *rat),
36 | 				floorf(pos.z *rat));
37 | 		}
38 | 	__device__
39 | 		inline int CellPos2CellIdx(const ushort3 &cell_pos, const ushort3 &grid_size)
40 | 	{
41 | 			if (cell_pos.x >= grid_size.x || cell_pos.x < 0 ||
42 | 				cell_pos.y >= grid_size.y || cell_pos.y < 0 ||
43 | 				cell_pos.z >= grid_size.z || cell_pos.z < 0)
44 | 				return kInvalidCellIdx;
45 | 			return cell_pos.x + grid_size.x * (cell_pos.y + grid_size.y * cell_pos.z);
46 | 		}
47 | 
48 | 	__device__
49 | 		inline int ParticlePos2CellIdx(const float4 &pos, const ushort3 &grid_size, float cell_size)
50 | 	{
51 | 			ushort3 cell_pos = ParticlePos2CellPos(pos, cell_size);
52 | 			return CellPos2CellIdx(cell_pos, grid_size);
53 | 		}
54 | 	__device__
55 | 		inline int CellPos2CellIdxM(const ushort3 &cell_pos, const ushort3 &grid_size)
56 | 	{
57 | 			if (cell_pos.x >= (grid_size.x << 2) || cell_pos.x < 0 ||
58 | 				cell_pos.y >= (grid_size.y << 2) || cell_pos.y < 0 ||
59 | 				cell_pos.z >= (grid_size.z << 2) || cell_pos.z < 0)
60 | 				return kInvalidCellIdx;
61 | 
62 | 			int x = cell_pos.x & 0x03;
63 | 			int y = cell_pos.y & 0x03;
64 | 			int z = cell_pos.z & 0x03;
65 | 			int xx = (cell_pos.x >> 2);
66 | 			int yy = (cell_pos.y >> 2);
67 | 			int zz = (cell_pos.z >> 2);
68 | 
69 | 			int idc = xx + grid_size.x * (yy + grid_size.y * zz);
70 | 
71 | 		//	int idi = (x << 4) | (y & 0x01) | ((y & 0x02) << 1) | ((z & 0x01) << 1) | ((z & 0x02) << 2);
72 | 
73 | 			int idi = y + ((z + (x<<2))<<2);
74 | 			int id = (idc << 6) | (idi);
75 | 			return id;
76 | 		}
77 | 	__device__
78 | 		inline int ParticlePos2CellIdxM(const float4 &pos, const ushort3 &grid_size, float cell_size)
79 | 	{
80 | 			ushort3 cell_pos = ParticlePos2CellPosM(pos, cell_size);
81 | 			return CellPos2CellIdxM(cell_pos, grid_size);
82 | 		}
83 | 	__device__
84 | 		inline ushort3 CellIdx2CellPos(int idx, const ushort3 &grid_size)
85 | 	{
86 | 			return make_ushort3(idx % grid_size.x,
87 | 				idx / grid_size.x % grid_size.y,
88 | 				idx / grid_size.x / grid_size.y);
89 | 		}
90 | 
91 | }
92 | 
93 | #endif/*_SPH_UTILS_CUH*/
94 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/gl_main_header.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GL_MAIN_HEADER_H
 2 | #define _GL_MAIN_HEADER_H
 3 | 
 4 | #define VNAME		4DF
 5 | #define VTYPE		float
 6 | 
 7 | class Vector4DF {
 8 | public:
 9 |     VTYPE x, y, z, w;
10 | 
11 |     Vector4DF &Set(const float xa, const float ya, const float za)	{ x = xa; y = ya; z = za; w = 1; return *this; }
12 |     Vector4DF &Set(const float xa, const float ya, const float za, const float wa)	{ x = xa; y = ya; z = za; w = wa; return *this; }
13 | 
14 |     // Constructors/Destructors
15 |     Vector4DF() { x = 0; y = 0; z = 0; w = 0; }
16 |     Vector4DF(const VTYPE xa, const VTYPE ya, const VTYPE za, const VTYPE wa);
17 | 
18 |     Vector4DF(const Vector4DF &op);
19 | 
20 |     // Member Functions
21 |     Vector4DF &operator= (const int op);
22 |     Vector4DF &operator= (const double op);
23 | 
24 |     Vector4DF &operator= (const Vector4DF &op);
25 | 
26 |     Vector4DF &operator+= (const int op);
27 |     Vector4DF &operator+= (const float op);
28 |     Vector4DF &operator+= (const double op);
29 | 
30 |     Vector4DF &operator+= (const Vector4DF &op);
31 | 
32 |     Vector4DF &operator-= (const int op);
33 |     Vector4DF &operator-= (const double op);
34 | 
35 |     Vector4DF &operator-= (const Vector4DF &op);
36 | 
37 |     Vector4DF &operator*= (const int op);
38 |     Vector4DF &operator*= (const double op);
39 | 
40 |     Vector4DF &operator*= (const Vector4DF &op);
41 |     Vector4DF &operator*= (const float* op);
42 | 
43 |     Vector4DF &operator/= (const int op);
44 |     Vector4DF &operator/= (const double op);
45 | 
46 |     // Slow operations - require temporary variables
47 |     Vector4DF operator+ (const int op)			{ return Vector4DF(x + float(op), y + float(op), z + float(op), w + float(op)); }
48 |     Vector4DF operator+ (const float op)		{ return Vector4DF(x + op, y + op, z + op, w*op); }
49 |     Vector4DF operator+ (const Vector4DF &op)	{ return Vector4DF(x + op.x, y + op.y, z + op.z, w + op.w); }
50 |     Vector4DF operator- (const int op)			{ return Vector4DF(x - float(op), y - float(op), z - float(op), w - float(op)); }
51 |     Vector4DF operator- (const float op)		{ return Vector4DF(x - op, y - op, z - op, w*op); }
52 |     Vector4DF operator- (const Vector4DF &op)	{ return Vector4DF(x - op.x, y - op.y, z - op.z, w - op.w); }
53 |     Vector4DF operator* (const int op)			{ return Vector4DF(x*float(op), y*float(op), z*float(op), w*float(op)); }
54 |     Vector4DF operator* (const float op)		{ return Vector4DF(x*op, y*op, z*op, w*op); }
55 |     Vector4DF operator* (const Vector4DF &op)	{ return Vector4DF(x*op.x, y*op.y, z*op.z, w*op.w); }
56 |     // --
57 | 
58 |     Vector4DF& Clamp(float xc, float yc, float zc, float wc)
59 |     {
60 |         x = (x > xc) ? xc : x;
61 |         y = (y > yc) ? yc : y;
62 |         z = (z > zc) ? zc : z;
63 |         w = (w > wc) ? wc : w;
64 |         return *this;
65 |     }
66 | 
67 |     Vector4DF &Cross(const Vector4DF &v);
68 | 
69 |     double Dot(const Vector4DF &v);
70 | 
71 |     double Dist(const Vector4DF &v);
72 | 
73 |     double DistSq(const Vector4DF &v);
74 | 
75 |     Vector4DF &Normalize(void);
76 |     double Length(void);
77 | 
78 |     VTYPE &X(void)				{ return x; }
79 |     VTYPE &Y(void)				{ return y; }
80 |     VTYPE &Z(void)				{ return z; }
81 |     VTYPE &W(void)				{ return w; }
82 |     const VTYPE &X(void) const	{ return x; }
83 |     const VTYPE &Y(void) const	{ return y; }
84 |     const VTYPE &Z(void) const	{ return z; }
85 |     const VTYPE &W(void) const	{ return w; }
86 |     VTYPE *Data(void)			{ return &x; }
87 | };
88 | 
89 | #undef VNAME
90 | #undef VTYPE
91 | 
92 | #endif/*_GL_MAIN_HEADER_H*/
93 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_arrangement.cuh:
--------------------------------------------------------------------------------
  1 | //
  2 | // sph_arrangement.cuh
  3 | // Hybrid_Parallel_SPH
  4 | //
  5 | // created by kmhuang and ruanjm on 2018/09/01
  6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef _SPH_ARRANGEMENT_CUH
 10 | #define _SPH_ARRANGEMENT_CUH
 11 | 
 12 | #include "sph_particle.h"
 13 | 
 14 | namespace gpu_model { struct GPUModel; }
 15 | 
 16 | namespace sph
 17 | {
 18 | 
 19 | class Arrangement
 20 | {
 21 | public:
 22 |     Arrangement(ParticleBufferObject &buff_list,
 23 |                 ParticleBufferObject &buff_temp,
 24 |                
 25 |                 unsigned int nump, 
 26 | 				unsigned int nump_capacity,
 27 |                 float cell_size, 
 28 |                 ushort3 grid_size);
 29 | 
 30 |     ~Arrangement();
 31 | 
 32 |     // return middle value of 2 parallel framework
 33 |     int arrangeTRAMode();
 34 |     void arrangeSMSMode();
 35 | 	int arrangeHybridMode();
 36 |     int arrangeHybridMode9();
 37 |     void test();
 38 | 
 39 |     void sortParticles();
 40 |     void assignTasksFixedCTA();
 41 | 
 42 |     int* getDevCellStartIdx();
 43 |     int* getDevCellEndIdx();
 44 |     int getNumBlockSMSMode();
 45 |     BlockTask *getBlockTasks();
 46 | 
 47 | 	void resetNumParticle(unsigned int nump);
 48 | 
 49 |     
 50 |     int* getDevOffsetData() { return d_cell_offset_data; }
 51 |     int* getDevCellOffset() { return d_cell_offset_; }
 52 | 	int* getDevCellOffsetM() { return d_cell_offset_M; }
 53 |     int* getDevCellIndex() { return d_index_; }
 54 | 
 55 |     int* getDevCellNumP() { return d_cell_nump_; }
 56 | 
 57 |     unsigned int getNumC() { return numc_; }
 58 | 
 59 | 
 60 |     void CountingSortCUDA();
 61 |     void CountingSort_O();
 62 | 
 63 |     void CountingSortCUDA_Two();
 64 |     void CountingSortCUDA_Two9();
 65 |     void countNum();
 66 | 
 67 | 	void CountingSort_O_M();
 68 | 	void CountingSortCUDA_Two9_M();
 69 | 	int arrangeHybridMode9M();
 70 | private:
 71 |     void calculateHash();
 72 |     void calculateHashWithBlockReq();
 73 |     void sortHash();
 74 |     void sortIndexByHash();
 75 |     void reindexParticles();    // use index sorted by hash to reindex
 76 |     void reindexParticles2();   // use "particle offset in cell" and "prefix summed cell offset" to reindex
 77 |     void findCellRange();
 78 |     void findCellRangeAndHybridModeMiddleValue();
 79 |     void insertParticles();
 80 |     void arrangeBlockTasks();
 81 | 
 82 | 	
 83 | 
 84 |     void CSInsertParticles();
 85 |     void CSCountingSortFull();
 86 | 	void arrangeBlockTasksFixedM(int *hash, int *celloff, int *cellnum, BlockTask* d_task_array, int* d_cta_reqs, int* d_task_array_offset, int cta_size);
 87 |     void arrangeBlockTasksFixed(BlockTask* d_task_array, int* d_cta_reqs, int* d_task_array_offset, int cta_size);
 88 |     void arrangeBlockTasksFloat();
 89 | 
 90 |     void CSCalculateRequiredCTAsFixed(int *cat_offset, int* d_cta_reqs, int cta_size);
 91 | 
 92 | 
 93 |   
 94 | 
 95 |     ParticleBufferObject &buff_list_; // particle device buffer
 96 |     ParticleBufferObject &buff_temp_; // particle device buffer for replacement 
 97 |     unsigned int nump_;             // #particles
 98 | 	unsigned int nump_capacity_;
 99 |     unsigned int numc_;             // #cells
100 |     float cell_size_;
101 |     ushort3 grid_size_;
102 |     int middle_value_ = 0;
103 | 
104 |     int  h_num_cta_;
105 | 
106 |     int* d_num_cta_;
107 |     int* d_cell_offset_;            // [numc] the offset in memory of the particles in each cell
108 |     int* d_cell_nump_;              // [numc] the number of particles in each cell
109 |     int* d_p_offset_;
110 | 
111 |     int* d_p_offset_p;
112 | 
113 |     int* d_cell_offset_data;
114 | 
115 |     int *d_start_index_;            // [numc]device buffer, cell start index
116 |     int *d_end_index_;              // [numc]device buffer, cell end index
117 |     int *d_hash_;                   // [nump]
118 |     int *d_index_;                  // [nump]
119 |     
120 |     int *hashp;                   // [nump]
121 | 	int *d_hash_p;                   // [nump]
122 |    // int *indexp;
123 |     int *cell_num_;
124 |     int *cell_num_two;
125 |     int *cell_type;
126 | 	int* d_cell_nump_M;
127 | 	int* d_cell_offset_M;
128 | 
129 | 
130 |     int* d_task_array_offset_32_;   // [numc]result of prescan
131 |     int *d_block_reqs_;             // [numc]for SMS Mode
132 |     int *d_breqs_offset_;           // [numc]result of prescan
133 |     int *d_num_block_;              // [1]
134 |     int h_num_block_ = 0;
135 |     BlockTask *d_block_task_;       // [numb]
136 |     
137 |     int *d_middle_value_;           // [1]for Hybrid Mode
138 | 
139 |     gpu_model::GPUModel *p_gpu_model_ = nullptr;
140 | };
141 | 
142 | }
143 | 
144 | #endif/*_SPH_ARRANGEMENT_CUH*/
145 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/Hybrid_Fluid_Simulation.vcxproj.filters:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup>
  4 |     <Filter Include="GPU Model">
  5 |       <UniqueIdentifier>{55d31f92-036c-4a57-b177-1d7d5d7355b6}</UniqueIdentifier>
  6 |     </Filter>
  7 |     <Filter Include="GPU Prescan">
  8 |       <UniqueIdentifier>{f758ad77-964e-4ddf-ad45-aba65e76286e}</UniqueIdentifier>
  9 |     </Filter>
 10 |     <Filter Include="Eulerian System">
 11 |       <UniqueIdentifier>{df5589b3-0608-4b75-8f82-5a3ca8af24bd}</UniqueIdentifier>
 12 |     </Filter>
 13 |     <Filter Include="Utils">
 14 |       <UniqueIdentifier>{9b82fa81-bedb-4faf-8d92-e1fa93169963}</UniqueIdentifier>
 15 |     </Filter>
 16 |     <Filter Include="SPH System">
 17 |       <UniqueIdentifier>{6e33e5de-acd1-4615-9555-f3cf68a606ef}</UniqueIdentifier>
 18 |     </Filter>
 19 |     <Filter Include="Marching Cubes">
 20 |       <UniqueIdentifier>{a08c016c-896a-4c51-b108-2eec44fe4591}</UniqueIdentifier>
 21 |     </Filter>
 22 |   </ItemGroup>
 23 |   <ItemGroup>
 24 |     <ClCompile Include="gl_main.cpp" />
 25 |     <ClCompile Include="gpu_model_reader.cpp">
 26 |       <Filter>GPU Model</Filter>
 27 |     </ClCompile>
 28 |     <ClCompile Include="sph_marching_cube.cpp">
 29 |       <Filter>Marching Cubes</Filter>
 30 |     </ClCompile>
 31 |     <ClCompile Include="sph_particle.cpp">
 32 |       <Filter>SPH System</Filter>
 33 |     </ClCompile>
 34 |     <ClCompile Include="pcisph_factor.cpp">
 35 |       <Filter>SPH System</Filter>
 36 |     </ClCompile>
 37 |     <ClCompile Include="sph_timer.cpp">
 38 |       <Filter>Utils</Filter>
 39 |     </ClCompile>
 40 |     <ClCompile Include="lodepng.cpp">
 41 |       <Filter>Utils</Filter>
 42 |     </ClCompile>
 43 |     <ClCompile Include="parameters.cpp" />
 44 |   </ItemGroup>
 45 |   <ItemGroup>
 46 |     <ClInclude Include="sph_hybrid_system.h">
 47 |       <Filter>SPH System</Filter>
 48 |     </ClInclude>
 49 |     <ClInclude Include="sph_particle.h">
 50 |       <Filter>SPH System</Filter>
 51 |     </ClInclude>
 52 |     <ClInclude Include="sph_parameter.h">
 53 |       <Filter>SPH System</Filter>
 54 |     </ClInclude>
 55 |     <ClInclude Include="sph_arrangement.cuh">
 56 |       <Filter>SPH System</Filter>
 57 |     </ClInclude>
 58 |     <ClInclude Include="sph_utils.cuh">
 59 |       <Filter>SPH System</Filter>
 60 |     </ClInclude>
 61 |     <ClInclude Include="sph_kernel.cuh">
 62 |       <Filter>SPH System</Filter>
 63 |     </ClInclude>
 64 |     <ClInclude Include="sph_kernel_shared_data.cuh">
 65 |       <Filter>SPH System</Filter>
 66 |     </ClInclude>
 67 |     <ClInclude Include="cuda_prescan\scan.cuh">
 68 |       <Filter>GPU Prescan</Filter>
 69 |     </ClInclude>
 70 |     <ClInclude Include="cuda_prescan\scan_kern.cuh">
 71 |       <Filter>GPU Prescan</Filter>
 72 |     </ClInclude>
 73 |     <ClInclude Include="gpu_model.cuh">
 74 |       <Filter>GPU Model</Filter>
 75 |     </ClInclude>
 76 |     <ClInclude Include="gpu_model.h">
 77 |       <Filter>GPU Model</Filter>
 78 |     </ClInclude>
 79 |     <ClInclude Include="gpu_model_reader.h">
 80 |       <Filter>GPU Model</Filter>
 81 |     </ClInclude>
 82 |     <ClInclude Include="gl_main_header.h" />
 83 |     <ClInclude Include="sph_marching_cube.h">
 84 |       <Filter>Marching Cubes</Filter>
 85 |     </ClInclude>
 86 |     <ClInclude Include="pcisph_factor.h">
 87 |       <Filter>SPH System</Filter>
 88 |     </ClInclude>
 89 |     <ClInclude Include="save_screen.h">
 90 |       <Filter>Utils</Filter>
 91 |     </ClInclude>
 92 |     <ClInclude Include="sph_timer.h">
 93 |       <Filter>Utils</Filter>
 94 |     </ClInclude>
 95 |     <ClInclude Include="gl_texture.h">
 96 |       <Filter>Utils</Filter>
 97 |     </ClInclude>
 98 |     <ClInclude Include="high_resolution_timer.h">
 99 |       <Filter>Utils</Filter>
100 |     </ClInclude>
101 |     <ClInclude Include="lodepng.h">
102 |       <Filter>Utils</Filter>
103 |     </ClInclude>
104 |     <ClInclude Include="parameters.h" />
105 |     <ClInclude Include="main.h" />
106 |     <ClInclude Include="nv_gui.h" />
107 |   </ItemGroup>
108 |   <ItemGroup>
109 |     <CudaCompile Include="sph_arrangement.cu">
110 |       <Filter>SPH System</Filter>
111 |     </CudaCompile>
112 |     <CudaCompile Include="sph_hybrid_system.cpp">
113 |       <Filter>SPH System</Filter>
114 |     </CudaCompile>
115 |     <CudaCompile Include="sph_kernel.cu">
116 |       <Filter>SPH System</Filter>
117 |     </CudaCompile>
118 |     <CudaCompile Include="cuda_prescan\prefix_sum.cu">
119 |       <Filter>GPU Prescan</Filter>
120 |     </CudaCompile>
121 |     <CudaCompile Include="cuda_prescan\scan.cu">
122 |       <Filter>GPU Prescan</Filter>
123 |     </CudaCompile>
124 |     <CudaCompile Include="gpu_model.cu">
125 |       <Filter>GPU Model</Filter>
126 |     </CudaCompile>
127 |   </ItemGroup>
128 | </Project>


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/gpu_model.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // gpu_model.h
  3 | // Hybrid_Parallel_SPH
  4 | //
  5 | // created by kmhuang and ruanjm on 2018/09/01
  6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef _GPU_MODEL_H
 10 | #define _GPU_MODEL_H
 11 | 
 12 | #include <cuda_runtime_api.h>
 13 | 
 14 | #define I_ADD_SUB   0   // add.s32, sub.s32
 15 | #define I_MAD_MUL   1   // mul.xx.s32, mad.xx.s32, xx=lo
 16 | #define I_DIV_REM   2   // div.u32, rem.u32, div.s32, rem.s32
 17 | #define I_MIN_MAX   3   // min.u32, max.u32, min.s32, max.s32
 18 | #define I_ABS       4   // abs.s32
 19 | #define I_MUL24     5   // mul24.xx.u32, mul24.xx.s32, xx=lo
 20 | #define I_LOGICAL   6   // AND, OR and XOR
 21 | #define I_SHL_SHR   7   // shl.b32, shr.u32
 22 | #define I_SAD       8   // sad.u32, sad.s32
 23 | #define F_ADD_SUB   9   // add.f32, sub.f32
 24 | #define F_MAD_MUL   10  // mul.f32, fma.xx.f32, xx=rn
 25 | #define F_DIV       11  // div.xx.f32, xx=rn
 26 | #define F_DIVIDEF   12  // div.approx.f32
 27 | #define F_EXP2      13  // ex2.approx.f32
 28 | #define F_LOG2      14  // lg2.approx.f32
 29 | #define F_SIN_COS   15  // sin.approx.f32, cos.approx.f32
 30 | #define F_SQRT      16  // sqrt.xx.f32, xx=rn
 31 | #define F_RSQRT     17  // rsqrt.approx.f32
 32 | #define F_MIN_MAX   18  // min.f32, max.f32
 33 | #define F_RCP       19  // rcp.xx.f32, xx=rn
 34 | 
 35 | #define ARI_STAT_SIZE   20  // arithmetical insts
 36 | #define DEFAULT_INST    ARI_STAT_SIZE
 37 | 
 38 | #define GLOBAL_ACC	0
 39 | #define SHARED_ACC  1
 40 | #define LOCAL_ACC	2
 41 | #define CONST_ACC   3
 42 | 
 43 | #define MEM_STAT_SIZE   4
 44 | 
 45 | #define NO_RECOMMENDATION  -1
 46 | 
 47 | struct PTXBlockStatistic
 48 | {
 49 |     __host__ __device__
 50 |     inline PTXBlockStatistic operator*(unsigned int t) const{
 51 |         PTXBlockStatistic result;
 52 | 
 53 |         result.recommended_times = 1;
 54 | 
 55 |         result.num_insts = num_insts * t;
 56 |         result.num_unknown = num_unknown * t;
 57 |         result.num_sync = num_sync * t;
 58 |         result.num_bra = num_bra * t;
 59 |         for (size_t i = 0; i < ARI_STAT_SIZE; ++i){
 60 |             result.num_stat[i] = num_stat[i] * t;
 61 |         }
 62 |         for (size_t i = 0; i < MEM_STAT_SIZE; ++i){
 63 |             result.num_mem[i] = num_mem[i] * t;
 64 |         }
 65 | 
 66 |         return result;
 67 |     }
 68 | 
 69 |     __host__ __device__
 70 |     inline void operator*=(unsigned int t) {
 71 |         recommended_times = 1;
 72 |         num_insts *= t;
 73 |         num_unknown *= t;
 74 |         num_sync *= t;
 75 |         num_bra *= t;
 76 |         for (size_t i = 0; i < ARI_STAT_SIZE; ++i){
 77 |             num_stat[i] *= t;
 78 |         }
 79 |         for (size_t i = 0; i < MEM_STAT_SIZE; ++i){
 80 |             num_mem[i] *= t;
 81 |         }
 82 |     }
 83 | 
 84 |     __host__ __device__
 85 |     inline PTXBlockStatistic operator+(const PTXBlockStatistic &a) const{
 86 |         PTXBlockStatistic result;
 87 | 
 88 |         result.recommended_times = NO_RECOMMENDATION;
 89 | 
 90 |         result.num_insts = num_insts + a.num_insts;
 91 |         result.num_unknown = num_unknown + a.num_unknown;
 92 |         result.num_sync = num_sync + a.num_sync;
 93 |         result.num_bra = num_bra + a.num_bra;
 94 |         for (size_t i = 0; i < ARI_STAT_SIZE; ++i){
 95 |             result.num_stat[i] = num_stat[i] + a.num_stat[i];
 96 |         }
 97 |         for (size_t i = 0; i < MEM_STAT_SIZE; ++i){
 98 |             result.num_mem[i] = num_mem[i] + a.num_mem[i];
 99 |         }
100 | 
101 |         return result;
102 |     }
103 | 
104 |     __host__ __device__
105 |     inline void operator +=(const PTXBlockStatistic &a){
106 |         recommended_times = NO_RECOMMENDATION;
107 | 
108 |         num_insts += a.num_insts;
109 |         num_unknown += a.num_unknown;
110 |         num_sync += a.num_sync;
111 |         num_bra += a.num_bra;
112 |         for (size_t i = 0; i < ARI_STAT_SIZE; ++i){
113 |             num_stat[i] += a.num_stat[i];
114 |         }
115 |         for (size_t i = 0; i < MEM_STAT_SIZE; ++i){
116 |             num_mem[i] += a.num_mem[i];
117 |         }
118 |     }
119 | 
120 |     int recommended_times;
121 | 
122 |     // statistics
123 |     unsigned int num_insts;
124 |     unsigned int num_unknown;
125 |     unsigned int num_sync;
126 |     unsigned int num_bra;
127 |     unsigned int num_stat[ARI_STAT_SIZE];
128 |     unsigned int num_mem[MEM_STAT_SIZE];
129 | };
130 | 
131 | struct KernelRelatedParas
132 | {
133 |     //float l2_hit_rate;          // L2 cache hit rate, get from NSIGHT profiler
134 |     //float num_uncoal_per_warp;  // #memory_transactions per warp(uncoalesced access)
135 |     float dram_lat;             // baseline DRAM access latency
136 |     float delta;                // transaction departure delay
137 |     float default_inst_lat;     // default instruction latency
138 |     //float block_size;           // #threads in a CTA
139 | };
140 | 
141 | struct GPUDeviceInfo
142 | {
143 |     //float freq;                 // GPU core clock frequency in GHz
144 |     //float mem_peak_bw;          // GPU memory bandwidth in GBps
145 |     float simd_width;           // #SPs per SM
146 |     float warp_size;            // #threads in a warp
147 |     //float transaction_size;     // transaction size for a DRAM request in Bytes
148 |     //float l2_lat;
149 |     float gamma;                // machine dependent parameter(for thread sync)
150 |     float ilp;
151 |     float mlp;
152 | };
153 | 
154 | struct InstructionInfo
155 | {
156 |     float inst_info[ARI_STAT_SIZE];
157 | 
158 |     __device__ __host__
159 |     float operator[](unsigned int i) const {
160 |         return inst_info[i];
161 |     }
162 | 
163 |     __device__ __host__
164 |     float &operator[](unsigned int i){
165 |         return inst_info[i];
166 |     }
167 | };
168 | 
169 | #endif/*_GPU_MODEL_H*/
170 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/gpu_model_reader.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | // gpu_model_reader.cpp
  3 | // Hybrid_Parallel_SPH
  4 | //
  5 | // created by kmhuang and ruanjm on 2018/09/01
  6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved.
  7 | //
  8 | 
  9 | #include "gpu_model_reader.h"
 10 | #include <fstream>
 11 | #include <iostream>
 12 | #include "json/json.h"
 13 | #include "json/reader.h"
 14 | 
 15 | namespace gpu_model
 16 | {
 17 | 
 18 | unsigned int readPTXStatisticsFromFile(PTXBlockStatistic *&output, const std::string &func_name, const std::string &file_name)
 19 | {
 20 |     std::ifstream input(file_name, std::ios::binary);
 21 |     if (!input.is_open())
 22 |     {
 23 |         std::cout << "can not open " << file_name << std::endl;
 24 |         return 0;
 25 |     }
 26 | 
 27 |     unsigned int num_block = 0;
 28 | 
 29 |     Json::Reader js_reader;
 30 |     Json::Value root;
 31 | 
 32 |     if (js_reader.parse(input, root))
 33 |     {
 34 |         for (int i = 0; i < root.size(); ++i)
 35 |         {
 36 |             if (func_name == root[i]["function_name"].asString())
 37 |             {
 38 |                 Json::Value blocks = root[i]["block_array"];
 39 |                 num_block = blocks.size();
 40 |                 output = new PTXBlockStatistic[blocks.size()];
 41 |                 for (int j = 0; j < blocks.size(); ++j)
 42 |                 {
 43 |                     PTXBlockStatistic stat;
 44 | 
 45 |                     stat.num_insts = blocks[j]["num_insts"].asUInt();
 46 |                     stat.num_unknown = blocks[j]["num_unknown"].asUInt();
 47 |                     stat.num_sync = blocks[j]["num_sync"].asUInt();
 48 |                     stat.num_bra = blocks[j]["num_bra"].asUInt();
 49 |                     stat.num_stat[I_ADD_SUB] = blocks[j]["I_ADD_SUB"].asUInt();
 50 |                     stat.num_stat[I_MAD_MUL] = blocks[j]["I_MAD_MUL"].asUInt();
 51 |                     stat.num_stat[I_DIV_REM] = blocks[j]["I_DIV_REM"].asUInt();
 52 |                     stat.num_stat[I_MIN_MAX] = blocks[j]["I_MIN_MAX"].asUInt();
 53 |                     stat.num_stat[I_ABS] = blocks[j]["I_ABS"].asUInt();
 54 |                     stat.num_stat[I_MUL24] = blocks[j]["I_MUL24"].asUInt();
 55 |                     stat.num_stat[I_LOGICAL] = blocks[j]["I_LOGICAL"].asUInt();
 56 |                     stat.num_stat[I_SHL_SHR] = blocks[j]["I_SHL_SHR"].asUInt();
 57 |                     stat.num_stat[I_SAD] = blocks[j]["I_SAD"].asUInt();
 58 |                     stat.num_stat[F_ADD_SUB] = blocks[j]["F_ADD_SUB"].asUInt();
 59 |                     stat.num_stat[F_MAD_MUL] = blocks[j]["F_MAD_MUL"].asUInt();
 60 |                     stat.num_stat[F_DIV] = blocks[j]["F_DIV"].asUInt();
 61 |                     stat.num_stat[F_DIVIDEF] = blocks[j]["F_DIVIDEF"].asUInt();
 62 |                     stat.num_stat[F_EXP2] = blocks[j]["F_EXP2"].asUInt();
 63 |                     stat.num_stat[F_LOG2] = blocks[j]["F_LOG2"].asUInt();
 64 |                     stat.num_stat[F_SIN_COS] = blocks[j]["F_SIN_COS"].asUInt();
 65 |                     stat.num_stat[F_SQRT] = blocks[j]["F_SQRT"].asUInt();
 66 |                     stat.num_stat[F_RSQRT] = blocks[j]["F_RSQRT"].asUInt();
 67 |                     stat.num_stat[F_MIN_MAX] = blocks[j]["F_MIN_MAX"].asUInt();
 68 |                     stat.num_stat[F_RCP] = blocks[j]["F_RCP"].asUInt();
 69 |                     stat.num_mem[GLOBAL_ACC] = blocks[j]["GLOBAL_ACC"].asUInt();
 70 |                     stat.num_mem[SHARED_ACC] = blocks[j]["SHARED_ACC"].asUInt();
 71 |                     stat.num_mem[LOCAL_ACC] = blocks[j]["LOCAL_ACC"].asUInt();
 72 |                     stat.num_mem[CONST_ACC] = blocks[j]["CONST_ACC"].asUInt();
 73 | 
 74 |                     output[j] = stat;
 75 |                 }
 76 | 
 77 |                 break;
 78 |             }
 79 |         }
 80 | 
 81 |         // set recommendation
 82 |         if (num_block == 7) // SMS
 83 |         {
 84 |             output[0].recommended_times = 1;
 85 |             output[1].recommended_times = 27 / 2;
 86 |             output[2].recommended_times = 1;
 87 |             output[3].recommended_times = NO_RECOMMENDATION;
 88 |             output[4].recommended_times = NO_RECOMMENDATION;
 89 |             output[5].recommended_times = NO_RECOMMENDATION;
 90 |             output[6].recommended_times = 1;
 91 |         }
 92 | 
 93 |         if (num_block == 5) // traditional
 94 |         {
 95 |             output[0].recommended_times = 1;
 96 |             output[1].recommended_times = 27;
 97 |             output[2].recommended_times = NO_RECOMMENDATION;
 98 |             output[3].recommended_times = 27;
 99 |             output[4].recommended_times = 1;
100 |         }
101 |     }
102 | 
103 |     return num_block;
104 | }
105 | 
106 | void readInstructionLatencyFromFile(InstructionInfo &inst_info, const std::string &file_name)
107 | {
108 |     std::ifstream input(file_name, std::ios::binary);
109 |     if (!input.is_open())
110 |     {
111 |         std::cout << "can not open " << file_name << std::endl;
112 |         return;
113 |     }
114 | 
115 |     Json::Reader js_reader;
116 |     Json::Value root;
117 | 
118 |     if (js_reader.parse(input, root))
119 |     {
120 |         inst_info[I_ADD_SUB] = root["I_ADD_SUB"].asFloat();
121 |         inst_info[I_MAD_MUL] = root["I_MAD_MUL"].asFloat();
122 |         inst_info[I_DIV_REM] = root["I_DIV_REM"].asFloat();
123 |         inst_info[I_MIN_MAX] = root["I_MIN_MAX"].asFloat();
124 |         inst_info[I_ABS] = root["I_ABS"].asFloat();
125 |         inst_info[I_MUL24] = root["I_MUL24"].asFloat();
126 |         inst_info[I_LOGICAL] = root["I_LOGICAL"].asFloat();
127 |         inst_info[I_SHL_SHR] = root["I_SHL_SHR"].asFloat();
128 |         inst_info[I_SAD] = root["I_SAD"].asFloat();
129 |         inst_info[F_ADD_SUB] = root["F_ADD_SUB"].asFloat();
130 |         inst_info[F_MAD_MUL] = root["F_MAD_MUL"].asFloat();
131 |         inst_info[F_DIV] = root["F_DIV"].asFloat();
132 |         inst_info[F_DIVIDEF] = root["F_DIVIDEF"].asFloat();
133 |         inst_info[F_EXP2] = root["F_EXP2"].asFloat();
134 |         inst_info[F_LOG2] = root["F_LOG2"].asFloat();
135 |         inst_info[F_SIN_COS] = root["F_SIN_COS"].asFloat();
136 |         inst_info[F_SQRT] = root["F_SQRT"].asFloat();
137 |         inst_info[F_RSQRT] = root["F_RSQRT"].asFloat();
138 |         inst_info[F_MIN_MAX] = root["F_MIN_MAX"].asFloat();
139 |         inst_info[F_RCP] = root["F_RCP"].asFloat();
140 |     }
141 | }
142 | 
143 | }


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/json/config.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
  2 | // Distributed under MIT license, or public domain if desired and
  3 | // recognized in your jurisdiction.
  4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
  5 | 
  6 | #ifndef JSON_CONFIG_H_INCLUDED
  7 | #define JSON_CONFIG_H_INCLUDED
  8 | #include <cstddef>
  9 | #include <cstdint>
 10 | #include <istream>
 11 | #include <memory>
 12 | #include <ostream>
 13 | #include <sstream>
 14 | #include <string>
 15 | #include <type_traits>
 16 | 
 17 | /// If defined, indicates that json library is embedded in CppTL library.
 18 | //# define JSON_IN_CPPTL 1
 19 | 
 20 | /// If defined, indicates that json may leverage CppTL library
 21 | //#  define JSON_USE_CPPTL 1
 22 | /// If defined, indicates that cpptl vector based map should be used instead of
 23 | /// std::map
 24 | /// as Value container.
 25 | //#  define JSON_USE_CPPTL_SMALLMAP 1
 26 | 
 27 | // If non-zero, the library uses exceptions to report bad input instead of C
 28 | // assertion macros. The default is to use exceptions.
 29 | #ifndef JSON_USE_EXCEPTION
 30 | #define JSON_USE_EXCEPTION 1
 31 | #endif
 32 | 
 33 | // Temporary, tracked for removal with issue #982.
 34 | #ifndef JSON_USE_NULLREF
 35 | #define JSON_USE_NULLREF 1
 36 | #endif
 37 | 
 38 | /// If defined, indicates that the source file is amalgamated
 39 | /// to prevent private header inclusion.
 40 | /// Remarks: it is automatically defined in the generated amalgamated header.
 41 | // #define JSON_IS_AMALGAMATION
 42 | 
 43 | #ifdef JSON_IN_CPPTL
 44 | #include <cpptl/config.h>
 45 | #ifndef JSON_USE_CPPTL
 46 | #define JSON_USE_CPPTL 1
 47 | #endif
 48 | #endif
 49 | 
 50 | #ifdef JSON_IN_CPPTL
 51 | #define JSON_API CPPTL_API
 52 | #elif defined(JSON_DLL_BUILD)
 53 | #if defined(_MSC_VER) || defined(__MINGW32__)
 54 | #define JSON_API __declspec(dllexport)
 55 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING
 56 | #elif defined(__GNUC__) || defined(__clang__)
 57 | #define JSON_API __attribute__((visibility("default")))
 58 | #endif // if defined(_MSC_VER)
 59 | #elif defined(JSON_DLL)
 60 | #if defined(_MSC_VER) || defined(__MINGW32__)
 61 | #define JSON_API __declspec(dllimport)
 62 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING
 63 | #endif // if defined(_MSC_VER)
 64 | #endif // ifdef JSON_IN_CPPTL
 65 | #if !defined(JSON_API)
 66 | #define JSON_API
 67 | #endif
 68 | 
 69 | #if defined(_MSC_VER) && _MSC_VER < 1800
 70 | #error                                                                         \
 71 |     "ERROR:  Visual Studio 12 (2013) with _MSC_VER=1800 is the oldest supported compiler with sufficient C++11 capabilities"
 72 | #endif
 73 | 
 74 | #if defined(_MSC_VER) && _MSC_VER < 1900
 75 | // As recommended at
 76 | // https://stackoverflow.com/questions/2915672/snprintf-and-visual-studio-2010
 77 | extern JSON_API int
 78 | msvc_pre1900_c99_snprintf(char* outBuf, size_t size, const char* format, ...);
 79 | #define jsoncpp_snprintf msvc_pre1900_c99_snprintf
 80 | #else
 81 | #define jsoncpp_snprintf std::snprintf
 82 | #endif
 83 | 
 84 | // If JSON_NO_INT64 is defined, then Json only support C++ "int" type for
 85 | // integer
 86 | // Storages, and 64 bits integer support is disabled.
 87 | // #define JSON_NO_INT64 1
 88 | 
 89 | // JSONCPP_OVERRIDE is maintained for backwards compatibility of external tools.
 90 | // C++11 should be used directly in JSONCPP.
 91 | #define JSONCPP_OVERRIDE override
 92 | 
 93 | #if __cplusplus >= 201103L
 94 | #define JSONCPP_NOEXCEPT noexcept
 95 | #define JSONCPP_OP_EXPLICIT explicit
 96 | #elif defined(_MSC_VER) && _MSC_VER < 1900
 97 | #define JSONCPP_NOEXCEPT throw()
 98 | #define JSONCPP_OP_EXPLICIT explicit
 99 | #elif defined(_MSC_VER) && _MSC_VER >= 1900
100 | #define JSONCPP_NOEXCEPT noexcept
101 | #define JSONCPP_OP_EXPLICIT explicit
102 | #else
103 | #define JSONCPP_NOEXCEPT throw()
104 | #define JSONCPP_OP_EXPLICIT
105 | #endif
106 | 
107 | #ifdef __clang__
108 | #if __has_extension(attribute_deprecated_with_message)
109 | #define JSONCPP_DEPRECATED(message) __attribute__((deprecated(message)))
110 | #endif
111 | #elif defined __GNUC__ // not clang (gcc comes later since clang emulates gcc)
112 | #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
113 | #define JSONCPP_DEPRECATED(message) __attribute__((deprecated(message)))
114 | #elif (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
115 | #define JSONCPP_DEPRECATED(message) __attribute__((__deprecated__))
116 | #endif                  // GNUC version
117 | #elif defined(_MSC_VER) // MSVC (after clang because clang on Windows emulates
118 |                         // MSVC)
119 | #define JSONCPP_DEPRECATED(message) __declspec(deprecated(message))
120 | #endif // __clang__ || __GNUC__ || _MSC_VER
121 | 
122 | #if !defined(JSONCPP_DEPRECATED)
123 | #define JSONCPP_DEPRECATED(message)
124 | #endif // if !defined(JSONCPP_DEPRECATED)
125 | 
126 | #if __GNUC__ >= 6
127 | #define JSON_USE_INT64_DOUBLE_CONVERSION 1
128 | #endif
129 | 
130 | #if !defined(JSON_IS_AMALGAMATION)
131 | 
132 | #include "allocator.h"
133 | #include "version.h"
134 | 
135 | #endif // if !defined(JSON_IS_AMALGAMATION)
136 | 
137 | namespace Json {
138 | typedef int Int;
139 | typedef unsigned int UInt;
140 | #if defined(JSON_NO_INT64)
141 | typedef int LargestInt;
142 | typedef unsigned int LargestUInt;
143 | #undef JSON_HAS_INT64
144 | #else                 // if defined(JSON_NO_INT64)
145 | // For Microsoft Visual use specific types as long long is not supported
146 | #if defined(_MSC_VER) // Microsoft Visual Studio
147 | typedef __int64 Int64;
148 | typedef unsigned __int64 UInt64;
149 | #else                 // if defined(_MSC_VER) // Other platforms, use long long
150 | typedef int64_t Int64;
151 | typedef uint64_t UInt64;
152 | #endif                // if defined(_MSC_VER)
153 | typedef Int64 LargestInt;
154 | typedef UInt64 LargestUInt;
155 | #define JSON_HAS_INT64
156 | #endif // if defined(JSON_NO_INT64)
157 | 
158 | template <typename T>
159 | using Allocator = typename std::conditional<JSONCPP_USING_SECURE_MEMORY,
160 |                                             SecureAllocator<T>,
161 |                                             std::allocator<T>>::type;
162 | using String = std::basic_string<char, std::char_traits<char>, Allocator<char>>;
163 | using IStringStream = std::basic_istringstream<String::value_type,
164 |                                                String::traits_type,
165 |                                                String::allocator_type>;
166 | using OStringStream = std::basic_ostringstream<String::value_type,
167 |                                                String::traits_type,
168 |                                                String::allocator_type>;
169 | using IStream = std::istream;
170 | using OStream = std::ostream;
171 | } // namespace Json
172 | 
173 | // Legacy names (formerly macros).
174 | using JSONCPP_STRING = Json::String;
175 | using JSONCPP_ISTRINGSTREAM = Json::IStringStream;
176 | using JSONCPP_OSTRINGSTREAM = Json::OStringStream;
177 | using JSONCPP_ISTREAM = Json::IStream;
178 | using JSONCPP_OSTREAM = Json::OStream;
179 | 
180 | #endif // JSON_CONFIG_H_INCLUDED
181 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_kernel.cuh:
--------------------------------------------------------------------------------
  1 | //
  2 | // sph_kernel.cuh
  3 | // Hybrid_Parallel_SPH
  4 | //
  5 | // created by kmhuang and ruanjm on 2018/09/01
  6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef _SPH_KERNEL_CUH
 10 | #define _SPH_KERNEL_CUH
 11 | 
 12 | #include <cuda_runtime.h>
 13 | #include "sph_parameter.h"
 14 | #include "sph_particle.h"
 15 | #include "pcisph_factor.h"
 16 | 
 17 | namespace sph
 18 | {
 19 | 
 20 | struct ParticleIdxRange // [begin, end), zero-based numbering
 21 | {
 22 |     __host__ __device__
 23 |     ParticleIdxRange(){}
 24 |     __host__ __device__
 25 |     ParticleIdxRange(int b, int e) : begin(b), end(e) {}
 26 |     int begin, end;
 27 | };
 28 | 
 29 | 
 30 | void BuffInit(ParticleBufferList buff_list_n, int nm);
 31 | 
 32 | void transSysParaToDevice(const SystemParameter *host_para);
 33 | 
 34 | void initializeKernel();
 35 | 
 36 | void releaseKernel();
 37 | 
 38 | void find_max_P(int blocks, int tds, sumGrad *id_value, int numbers);
 39 | 
 40 | 
 41 | 
 42 | void computeMixDensityTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num);
 43 | 
 44 | void computeDriftVelocityTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num);
 45 | 
 46 | void computeVolumeFracTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num);
 47 | 
 48 | void computeAccelTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num);
 49 | 
 50 | void advanceMix(ParticleBufferList buff_list, int nump);
 51 | 
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | 
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | void computeDensityTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num);
 68 | 
 69 | void computeForceTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num);
 70 | 
 71 | void computeDensitySMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 72 | 
 73 | void computeDensitySMS64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 74 | void computeDensityHybrid128n(int *cell_offset_M, ParticleIdxRange range, ParticleBufferList buff_list_n, int* cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 75 | 
 76 | void computeForceHybrid128n(int *cell_offset_M, ParticleIdxRange range, ParticleBufferList buff_list_n, int* cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 77 | //void computeDensityHybrid128n(ParticleIdxRange range, ParticleBufferList buff_list_n, int* cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 78 | 
 79 | //void computeForceHybrid128n(ParticleIdxRange range, ParticleBufferList buff_list_n, int* cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 80 | 
 81 | void computeForceSMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 82 | 
 83 | void computeForceSMS64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 84 | 
 85 | void computeOtherForceSMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 86 | 
 87 | void computeOtherForceTRAS(ParticleBufferList buff_list, int *cell_offset, int *cell_number, BlockTask *block_task, int num_block);
 88 | 
 89 | void computeOtherForceSMS64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 90 | 
 91 | void computeOtherForceHybrid(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 92 | 
 93 | 
 94 | void computeOtherForceHybrid128(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 95 | 
 96 | void computeOtherForceHybrid128n(ParticleIdxRange range, ParticleBufferList buff_list_n, int* cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
 97 | 
 98 | void computeOtherForceTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num);
 99 | 
100 | void manualSetting(ParticleBufferList buff_list, int nump, int step);
101 | 
102 | void advance(ParticleBufferList buff_list, int nump);
103 | void advanceWave(ParticleBufferList buff_list, int nump, float time);
104 | //sf pcisph-----------------------
105 | 
106 | void advancePCI(ParticleBufferList buff_list, int nump);
107 | 
108 | float computeDensityErrorFactorTRA(float mass, float rest_density, float time_step, ParticleBufferList buff_list, int *cell_offset, int *cell_num, uint nump);
109 | 
110 | 
111 | void computeGradWValuesSimpleSMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, sumGrad *particle_device);
112 | 
113 | void computeGradWValuesSimpleTRA(ParticleBufferList buff_list, int *cell_offset, int *cell_num, ParticleIdxRange range, sumGrad *particle_device);
114 | 
115 | void predictionCorrectionStepSMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, 
116 | 							float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float	pcisph_max_density_error_allowed);
117 | 
118 | 
119 | 
120 | 
121 | 
122 | void predictionCorrectionStepTRAS(ParticleBufferList buff_list, int *cell_offset, int *cell_number, BlockTask *block_task, int num_block
123 |                                   , float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float pcisph_max_density_error_allowed);
124 | 
125 | 
126 | 
127 | 
128 | 
129 | void predictionCorrectionStepSMS64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block,
130 |                                  float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float	pcisph_max_density_error_allowed);
131 | 
132 | 
133 | void predictionCorrectionStepHybrid(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block,
134 |                                     float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float	pcisph_max_density_error_allowed, ParticleIdxRange range);
135 | 
136 | void predictionCorrectionStepHybrid128(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block,
137 |                                     float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float	pcisph_max_density_error_allowed, ParticleIdxRange range);
138 | 
139 | void predictionCorrectionStepHybrid128n(ParticleBufferList buff_list_n,int *cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block,
140 |                                        float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float	pcisph_max_density_error_allowed, ParticleIdxRange range);
141 | 
142 | void predictionCorrectionStepTRA(ParticleBufferList buff_list, int *cell_offset, int *cell_num, 
143 |                                  float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float	pcisph_max_density_error_allowed, ParticleIdxRange range);
144 | 
145 | 
146 | 
147 | void predictPositionAndVelocity(ParticleBufferList buff_list, unsigned int nump);
148 | void computePredictedDensityAndPressureSMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor);
149 | 
150 | 
151 | 
152 | void computePredictedDensityAndPressureTRAS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor);
153 | 
154 | 
155 | 
156 | void computePredictedDensityAndPressureSMS64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor);
157 | 
158 | void computePredictedDensityAndPressureHybrid(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor);
159 | 
160 | void computePredictedDensityAndPressureHybrid128(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor);
161 | 
162 | void computePredictedDensityAndPressureHybrid128n(ParticleIdxRange range, ParticleBufferList buff_list_n, int *cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor);
163 | void computePredictedDensityAndPressureTRA(ParticleBufferList buff_list, int *cell_offset, int *cell_num, ParticleIdxRange range, float pcisph_density_factor);
164 | 
165 | 
166 | 
167 | 
168 | void getMaxPredictedDensityCUDA(ParticleBufferList buff_list, float& max_predicted_density, unsigned int nump);
169 | void computeCorrectivePressureForce(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
170 | 
171 | 
172 | void computeCorrectivePressureForceTRAS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
173 | 
174 | 
175 | 
176 | void computeCorrectivePressureForce64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
177 | 
178 | void computeCorrectivePressureForceHybrid(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
179 | 
180 | void computeCorrectivePressureForceHybrid128(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
181 | void computeCorrectivePressureForceHybrid128n(ParticleIdxRange range, ParticleBufferList buff_list_n, int *cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
182 | 
183 | void computeCorrectivePressureForceTRA(ParticleBufferList buff_list, int *cell_offset, int *cell_num, ParticleIdxRange range);
184 | 
185 | //sf heat conduction-------------------
186 | void computeHeatFlux(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
187 | 
188 | void computeTemperatureAndPhaseTransAndGetVis(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block);
189 | 
190 | }
191 | 
192 | #endif/*_SPH_KERNEL_CUH*/
193 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_tra_arti_block_statistics.json:
--------------------------------------------------------------------------------
  1 | [
  2 |    {
  3 |       "block_array" : [
  4 |          {
  5 |             "CONST_ACC" : 11,
  6 |             "F_ADD_SUB" : 0,
  7 |             "F_DIV" : 3,
  8 |             "F_DIVIDEF" : 0,
  9 |             "F_EXP2" : 0,
 10 |             "F_LOG2" : 0,
 11 |             "F_MAD_MUL" : 0,
 12 |             "F_MIN_MAX" : 0,
 13 |             "F_RCP" : 0,
 14 |             "F_RSQRT" : 0,
 15 |             "F_SIN_COS" : 0,
 16 |             "F_SQRT" : 0,
 17 |             "GLOBAL_ACC" : 5,
 18 |             "I_ABS" : 0,
 19 |             "I_ADD_SUB" : 7,
 20 |             "I_DIV_REM" : 0,
 21 |             "I_LOGICAL" : 0,
 22 |             "I_MAD_MUL" : 3,
 23 |             "I_MIN_MAX" : 0,
 24 |             "I_MUL24" : 1,
 25 |             "I_SAD" : 0,
 26 |             "I_SHL_SHR" : 1,
 27 |             "LOCAL_ACC" : 0,
 28 |             "SHARED_ACC" : 0,
 29 |             "begin" : 0,
 30 |             "end" : 55,
 31 |             "name" : "arti - [0, 55]",
 32 |             "num_bra" : 1,
 33 |             "num_insts" : 52,
 34 |             "num_sync" : 0,
 35 |             "num_unknown" : 20
 36 |          },
 37 |          {
 38 |             "CONST_ACC" : 0,
 39 |             "F_ADD_SUB" : 0,
 40 |             "F_DIV" : 0,
 41 |             "F_DIVIDEF" : 0,
 42 |             "F_EXP2" : 0,
 43 |             "F_LOG2" : 0,
 44 |             "F_MAD_MUL" : 0,
 45 |             "F_MIN_MAX" : 0,
 46 |             "F_RCP" : 0,
 47 |             "F_RSQRT" : 0,
 48 |             "F_SIN_COS" : 0,
 49 |             "F_SQRT" : 0,
 50 |             "GLOBAL_ACC" : 2,
 51 |             "I_ABS" : 0,
 52 |             "I_ADD_SUB" : 4,
 53 |             "I_DIV_REM" : 0,
 54 |             "I_LOGICAL" : 0,
 55 |             "I_MAD_MUL" : 1,
 56 |             "I_MIN_MAX" : 0,
 57 |             "I_MUL24" : 0,
 58 |             "I_SAD" : 0,
 59 |             "I_SHL_SHR" : 1,
 60 |             "LOCAL_ACC" : 0,
 61 |             "SHARED_ACC" : 0,
 62 |             "begin" : 55,
 63 |             "end" : 81,
 64 |             "name" : "arti - [55, 81]",
 65 |             "num_bra" : 2,
 66 |             "num_insts" : 24,
 67 |             "num_sync" : 0,
 68 |             "num_unknown" : 14
 69 |          },
 70 |          {
 71 |             "CONST_ACC" : 0,
 72 |             "F_ADD_SUB" : 5,
 73 |             "F_DIV" : 0,
 74 |             "F_DIVIDEF" : 0,
 75 |             "F_EXP2" : 1,
 76 |             "F_LOG2" : 1,
 77 |             "F_MAD_MUL" : 4,
 78 |             "F_MIN_MAX" : 0,
 79 |             "F_RCP" : 0,
 80 |             "F_RSQRT" : 0,
 81 |             "F_SIN_COS" : 0,
 82 |             "F_SQRT" : 0,
 83 |             "GLOBAL_ACC" : 3,
 84 |             "I_ABS" : 0,
 85 |             "I_ADD_SUB" : 2,
 86 |             "I_DIV_REM" : 0,
 87 |             "I_LOGICAL" : 0,
 88 |             "I_MAD_MUL" : 0,
 89 |             "I_MIN_MAX" : 0,
 90 |             "I_MUL24" : 0,
 91 |             "I_SAD" : 0,
 92 |             "I_SHL_SHR" : 0,
 93 |             "LOCAL_ACC" : 0,
 94 |             "SHARED_ACC" : 0,
 95 |             "begin" : 81,
 96 |             "end" : 104,
 97 |             "name" : "arti - [81, 104]",
 98 |             "num_bra" : 2,
 99 |             "num_insts" : 22,
100 |             "num_sync" : 0,
101 |             "num_unknown" : 4
102 |          },
103 |          {
104 |             "CONST_ACC" : 0,
105 |             "F_ADD_SUB" : 1,
106 |             "F_DIV" : 0,
107 |             "F_DIVIDEF" : 0,
108 |             "F_EXP2" : 0,
109 |             "F_LOG2" : 0,
110 |             "F_MAD_MUL" : 0,
111 |             "F_MIN_MAX" : 0,
112 |             "F_RCP" : 0,
113 |             "F_RSQRT" : 0,
114 |             "F_SIN_COS" : 0,
115 |             "F_SQRT" : 0,
116 |             "GLOBAL_ACC" : 0,
117 |             "I_ABS" : 0,
118 |             "I_ADD_SUB" : 1,
119 |             "I_DIV_REM" : 0,
120 |             "I_LOGICAL" : 0,
121 |             "I_MAD_MUL" : 0,
122 |             "I_MIN_MAX" : 0,
123 |             "I_MUL24" : 0,
124 |             "I_SAD" : 0,
125 |             "I_SHL_SHR" : 0,
126 |             "LOCAL_ACC" : 0,
127 |             "SHARED_ACC" : 0,
128 |             "begin" : 104,
129 |             "end" : 109,
130 |             "name" : "arti - [104, 109]",
131 |             "num_bra" : 0,
132 |             "num_insts" : 4,
133 |             "num_sync" : 0,
134 |             "num_unknown" : 2
135 |          },
136 |          {
137 |             "CONST_ACC" : 5,
138 |             "F_ADD_SUB" : 1,
139 |             "F_DIV" : 0,
140 |             "F_DIVIDEF" : 1,
141 |             "F_EXP2" : 1,
142 |             "F_LOG2" : 1,
143 |             "F_MAD_MUL" : 4,
144 |             "F_MIN_MAX" : 0,
145 |             "F_RCP" : 0,
146 |             "F_RSQRT" : 0,
147 |             "F_SIN_COS" : 0,
148 |             "F_SQRT" : 0,
149 |             "GLOBAL_ACC" : 2,
150 |             "I_ABS" : 0,
151 |             "I_ADD_SUB" : 4,
152 |             "I_DIV_REM" : 0,
153 |             "I_LOGICAL" : 0,
154 |             "I_MAD_MUL" : 0,
155 |             "I_MIN_MAX" : 0,
156 |             "I_MUL24" : 0,
157 |             "I_SAD" : 0,
158 |             "I_SHL_SHR" : 1,
159 |             "LOCAL_ACC" : 0,
160 |             "SHARED_ACC" : 0,
161 |             "begin" : 109,
162 |             "end" : 136,
163 |             "name" : "arti - [109, 136]",
164 |             "num_bra" : 3,
165 |             "num_insts" : 26,
166 |             "num_sync" : 0,
167 |             "num_unknown" : 3
168 |          }
169 |       ],
170 |       "function_name" : "_Z21knBmComputeDensityTRAN3SPH18ParticleBufferListEPjS1_jS1_"
171 |    },
172 |    {
173 |       "block_array" : [
174 |          {
175 |             "CONST_ACC" : 11,
176 |             "F_ADD_SUB" : 0,
177 |             "F_DIV" : 3,
178 |             "F_DIVIDEF" : 0,
179 |             "F_EXP2" : 0,
180 |             "F_LOG2" : 0,
181 |             "F_MAD_MUL" : 0,
182 |             "F_MIN_MAX" : 0,
183 |             "F_RCP" : 0,
184 |             "F_RSQRT" : 0,
185 |             "F_SIN_COS" : 0,
186 |             "F_SQRT" : 0,
187 |             "GLOBAL_ACC" : 9,
188 |             "I_ABS" : 0,
189 |             "I_ADD_SUB" : 10,
190 |             "I_DIV_REM" : 0,
191 |             "I_LOGICAL" : 0,
192 |             "I_MAD_MUL" : 3,
193 |             "I_MIN_MAX" : 0,
194 |             "I_MUL24" : 1,
195 |             "I_SAD" : 0,
196 |             "I_SHL_SHR" : 2,
197 |             "LOCAL_ACC" : 0,
198 |             "SHARED_ACC" : 0,
199 |             "begin" : 0,
200 |             "end" : 70,
201 |             "name" : "arti - [0, 70]",
202 |             "num_bra" : 1,
203 |             "num_insts" : 67,
204 |             "num_sync" : 0,
205 |             "num_unknown" : 27
206 |          },
207 |          {
208 |             "CONST_ACC" : 4,
209 |             "F_ADD_SUB" : 0,
210 |             "F_DIV" : 0,
211 |             "F_DIVIDEF" : 0,
212 |             "F_EXP2" : 0,
213 |             "F_LOG2" : 0,
214 |             "F_MAD_MUL" : 0,
215 |             "F_MIN_MAX" : 0,
216 |             "F_RCP" : 0,
217 |             "F_RSQRT" : 0,
218 |             "F_SIN_COS" : 0,
219 |             "F_SQRT" : 0,
220 |             "GLOBAL_ACC" : 2,
221 |             "I_ABS" : 0,
222 |             "I_ADD_SUB" : 6,
223 |             "I_DIV_REM" : 0,
224 |             "I_LOGICAL" : 0,
225 |             "I_MAD_MUL" : 1,
226 |             "I_MIN_MAX" : 0,
227 |             "I_MUL24" : 0,
228 |             "I_SAD" : 0,
229 |             "I_SHL_SHR" : 2,
230 |             "LOCAL_ACC" : 0,
231 |             "SHARED_ACC" : 0,
232 |             "begin" : 70,
233 |             "end" : 117,
234 |             "name" : "arti - [70, 117]",
235 |             "num_bra" : 3,
236 |             "num_insts" : 46,
237 |             "num_sync" : 0,
238 |             "num_unknown" : 28
239 |          },
240 |          {
241 |             "CONST_ACC" : 6,
242 |             "F_ADD_SUB" : 13,
243 |             "F_DIV" : 0,
244 |             "F_DIVIDEF" : 1,
245 |             "F_EXP2" : 1,
246 |             "F_LOG2" : 1,
247 |             "F_MAD_MUL" : 24,
248 |             "F_MIN_MAX" : 0,
249 |             "F_RCP" : 1,
250 |             "F_RSQRT" : 0,
251 |             "F_SIN_COS" : 0,
252 |             "F_SQRT" : 1,
253 |             "GLOBAL_ACC" : 8,
254 |             "I_ABS" : 0,
255 |             "I_ADD_SUB" : 8,
256 |             "I_DIV_REM" : 0,
257 |             "I_LOGICAL" : 0,
258 |             "I_MAD_MUL" : 0,
259 |             "I_MIN_MAX" : 0,
260 |             "I_MUL24" : 0,
261 |             "I_SAD" : 0,
262 |             "I_SHL_SHR" : 0,
263 |             "LOCAL_ACC" : 0,
264 |             "SHARED_ACC" : 0,
265 |             "begin" : 117,
266 |             "end" : 191,
267 |             "name" : "arti - [117, 191]",
268 |             "num_bra" : 1,
269 |             "num_insts" : 73,
270 |             "num_sync" : 0,
271 |             "num_unknown" : 8
272 |          },
273 |          {
274 |             "CONST_ACC" : 0,
275 |             "F_ADD_SUB" : 3,
276 |             "F_DIV" : 0,
277 |             "F_DIVIDEF" : 0,
278 |             "F_EXP2" : 0,
279 |             "F_LOG2" : 0,
280 |             "F_MAD_MUL" : 0,
281 |             "F_MIN_MAX" : 0,
282 |             "F_RCP" : 0,
283 |             "F_RSQRT" : 0,
284 |             "F_SIN_COS" : 0,
285 |             "F_SQRT" : 0,
286 |             "GLOBAL_ACC" : 0,
287 |             "I_ABS" : 0,
288 |             "I_ADD_SUB" : 1,
289 |             "I_DIV_REM" : 0,
290 |             "I_LOGICAL" : 0,
291 |             "I_MAD_MUL" : 0,
292 |             "I_MIN_MAX" : 0,
293 |             "I_MUL24" : 0,
294 |             "I_SAD" : 0,
295 |             "I_SHL_SHR" : 0,
296 |             "LOCAL_ACC" : 0,
297 |             "SHARED_ACC" : 0,
298 |             "begin" : 191,
299 |             "end" : 203,
300 |             "name" : "arti - [191, 203]",
301 |             "num_bra" : 1,
302 |             "num_insts" : 10,
303 |             "num_sync" : 0,
304 |             "num_unknown" : 5
305 |          },
306 |          {
307 |             "CONST_ACC" : 8,
308 |             "F_ADD_SUB" : 0,
309 |             "F_DIV" : 0,
310 |             "F_DIVIDEF" : 2,
311 |             "F_EXP2" : 0,
312 |             "F_LOG2" : 0,
313 |             "F_MAD_MUL" : 21,
314 |             "F_MIN_MAX" : 0,
315 |             "F_RCP" : 0,
316 |             "F_RSQRT" : 0,
317 |             "F_SIN_COS" : 0,
318 |             "F_SQRT" : 1,
319 |             "GLOBAL_ACC" : 5,
320 |             "I_ABS" : 0,
321 |             "I_ADD_SUB" : 8,
322 |             "I_DIV_REM" : 0,
323 |             "I_LOGICAL" : 0,
324 |             "I_MAD_MUL" : 1,
325 |             "I_MIN_MAX" : 0,
326 |             "I_MUL24" : 2,
327 |             "I_SAD" : 0,
328 |             "I_SHL_SHR" : 1,
329 |             "LOCAL_ACC" : 0,
330 |             "SHARED_ACC" : 0,
331 |             "begin" : 203,
332 |             "end" : 276,
333 |             "name" : "arti - [203, 276]",
334 |             "num_bra" : 4,
335 |             "num_insts" : 71,
336 |             "num_sync" : 0,
337 |             "num_unknown" : 18
338 |          }
339 |       ],
340 |       "function_name" : "_Z19knBmComputeForceTRAN3SPH18ParticleBufferListEPjS1_jS1_"
341 |    }
342 | ]
343 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/cuda_prescan/prefix_sum.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * NVIDIA Corporation and its licensors retain all intellectual property and 
  5 |  * proprietary rights in and to this software and related documentation and 
  6 |  * any modifications thereto.  Any use, reproduction, disclosure, or distribution 
  7 |  * of this software and related documentation without an express license 
  8 |  * agreement from NVIDIA Corporation is strictly prohibited.
  9 |  * 
 10 |  */
 11 | 
 12 | #ifndef _SCAN_BEST_KERNEL_CU_
 13 | #define _SCAN_BEST_KERNEL_CU_
 14 | 
 15 | // Define this to more rigorously avoid bank conflicts, 
 16 | // even at the lower (root) levels of the tree
 17 | // Note that due to the higher addressing overhead, performance 
 18 | // is lower with ZERO_BANK_CONFLICTS enabled.  It is provided
 19 | // as an example.
 20 | //#define ZERO_BANK_CONFLICTS 
 21 | 
 22 | #define LOG_NUM_BANKS	 4
 23 | 
 24 | #ifdef ZERO_BANK_CONFLICTS
 25 | #define CONFLICT_FREE_OFFSET(index) ((index) >> LOG_NUM_BANKS + (index) >> (2*LOG_NUM_BANKS))
 26 | #else
 27 | #define CONFLICT_FREE_OFFSET(index) ((index) >> LOG_NUM_BANKS)
 28 | #endif
 29 | 
 30 | ///////////////////////////////////////////////////////////////////////////////
 31 | // Work-efficient compute implementation of scan, one thread per 2 elements
 32 | // Work-efficient: O(log(n)) steps, and O(n) adds.
 33 | // Also shared storage efficient: Uses n + n/NUM_BANKS shared memory -- no ping-ponging
 34 | // Also avoids most bank conflicts using single-element offsets every NUM_BANKS elements.
 35 | //
 36 | // In addition, If ZERO_BANK_CONFLICTS is defined, uses 
 37 | //     n + n/NUM_BANKS + n/(NUM_BANKS*NUM_BANKS) 
 38 | // shared memory. If ZERO_BANK_CONFLICTS is defined, avoids ALL bank conflicts using 
 39 | // single-element offsets every NUM_BANKS elements, plus additional single-element offsets 
 40 | // after every NUM_BANKS^2 elements.
 41 | //
 42 | // Uses a balanced tree type algorithm.  See Blelloch, 1990 "Prefix Sums 
 43 | // and Their Applications", or Prins and Chatterjee PRAM course notes:
 44 | // https://www.cs.unc.edu/~prins/Classes/633/Handouts/pram.pdf
 45 | // 
 46 | // This work-efficient version is based on the algorithm presented in Guy Blelloch's
 47 | // excellent paper "Prefix sums and their applications".
 48 | // http://www.cs.cmu.edu/~blelloch/papers/Ble93.pdf
 49 | //
 50 | // Pro: Work Efficient, very few bank conflicts (or zero if ZERO_BANK_CONFLICTS is defined)
 51 | // Con: More instructions to compute bank-conflict-free shared memory addressing,
 52 | // and slightly more shared memory storage used.
 53 | //
 54 | 
 55 | template <bool isNP2> __device__ void loadSharedChunkFromMem (float *s_data, const float *g_idata, int n, int baseIndex, int& ai, int& bi, int& mem_ai, int& mem_bi, int& bankOffsetA, int& bankOffsetB )
 56 | {
 57 |     int thid = threadIdx.x;
 58 |     mem_ai = baseIndex + threadIdx.x;
 59 |     mem_bi = mem_ai + blockDim.x;
 60 | 
 61 |     ai = thid;
 62 |     bi = thid + blockDim.x;
 63 |     bankOffsetA = CONFLICT_FREE_OFFSET(ai);		    // compute spacing to avoid bank conflicts
 64 |     bankOffsetB = CONFLICT_FREE_OFFSET(bi);
 65 |     
 66 | 	s_data[ai + bankOffsetA] = g_idata[mem_ai];		// Cache the computational window in shared memory pad values beyond n with zeros
 67 |     
 68 |     if (isNP2) { // compile-time decision
 69 |         s_data[bi + bankOffsetB] = (bi < n) ? g_idata[mem_bi] : 0; 
 70 |     } else {
 71 |         s_data[bi + bankOffsetB] = g_idata[mem_bi]; 
 72 |     }
 73 | }
 74 | 
 75 | 
 76 | template <bool isNP2> __device__ void loadSharedChunkFromMemInt (int *s_data, const int *g_idata, int n, int baseIndex, int& ai, int& bi, int& mem_ai, int& mem_bi, int& bankOffsetA, int& bankOffsetB )
 77 | {
 78 |     int thid = threadIdx.x;
 79 |     mem_ai = baseIndex + threadIdx.x;
 80 |     mem_bi = mem_ai + blockDim.x;
 81 | 
 82 |     ai = thid;
 83 |     bi = thid + blockDim.x;
 84 |     bankOffsetA = CONFLICT_FREE_OFFSET(ai);		    // compute spacing to avoid bank conflicts
 85 |     bankOffsetB = CONFLICT_FREE_OFFSET(bi);
 86 |     
 87 | 	s_data[ai + bankOffsetA] = g_idata[mem_ai];		// Cache the computational window in shared memory pad values beyond n with zeros
 88 |     
 89 |     if (isNP2) { // compile-time decision
 90 |         s_data[bi + bankOffsetB] = (bi < n) ? g_idata[mem_bi] : 0; 
 91 |     } else {
 92 |         s_data[bi + bankOffsetB] = g_idata[mem_bi]; 
 93 |     }
 94 | }
 95 | 
 96 | template <bool isNP2> __device__ void storeSharedChunkToMem(float* g_odata, const float* s_data, int n, int ai, int bi, int mem_ai, int mem_bi,int bankOffsetA, int bankOffsetB)
 97 | {
 98 |     __syncthreads();
 99 | 
100 |     g_odata[mem_ai] = s_data[ai + bankOffsetA];			// write results to global memory
101 |     if (isNP2) { // compile-time decision
102 |         if (bi < n) g_odata[mem_bi] = s_data[bi + bankOffsetB]; 
103 |     } else {
104 |         g_odata[mem_bi] = s_data[bi + bankOffsetB]; 
105 |     }
106 | }
107 | template <bool isNP2> __device__ void storeSharedChunkToMemInt (int* g_odata, const int* s_data, int n, int ai, int bi, int mem_ai, int mem_bi,int bankOffsetA, int bankOffsetB)
108 | {
109 |     __syncthreads();
110 | 
111 |     g_odata[mem_ai] = s_data[ai + bankOffsetA];			// write results to global memory
112 |     if (isNP2) { // compile-time decision
113 |         if (bi < n) g_odata[mem_bi] = s_data[bi + bankOffsetB]; 
114 |     } else {
115 |         g_odata[mem_bi] = s_data[bi + bankOffsetB]; 
116 |     }
117 | }
118 | 
119 | 
120 | template <bool storeSum> __device__ void clearLastElement( float* s_data, float *g_blockSums, int blockIndex)
121 | {
122 |     if (threadIdx.x == 0) {
123 |         int index = (blockDim.x << 1) - 1;
124 |         index += CONFLICT_FREE_OFFSET(index);        
125 |         if (storeSum) { // compile-time decision
126 |             // write this block's total sum to the corresponding index in the blockSums array
127 |             g_blockSums[blockIndex] = s_data[index];
128 |         }
129 |         s_data[index] = 0;		// zero the last element in the scan so it will propagate back to the front
130 |     }
131 | }
132 | 
133 | template <bool storeSum> __device__ void clearLastElementInt ( int* s_data, int *g_blockSums, int blockIndex)
134 | {
135 |     if (threadIdx.x == 0) {
136 |         int index = (blockDim.x << 1) - 1;
137 |         index += CONFLICT_FREE_OFFSET(index);        
138 |         if (storeSum) { // compile-time decision
139 |             // write this block's total sum to the corresponding index in the blockSums array
140 |             g_blockSums[blockIndex] = s_data[index];
141 |         }
142 |         s_data[index] = 0;		// zero the last element in the scan so it will propagate back to the front
143 |     }
144 | }
145 | 
146 | 
147 | __device__  static unsigned int buildSum(float *s_data)
148 | {
149 |     unsigned int thid = threadIdx.x;
150 |     unsigned int stride = 1;
151 |     
152 |     // build the sum in place up the tree
153 |     for (int d = blockDim.x; d > 0; d >>= 1) {
154 |         __syncthreads();
155 | 
156 |         if (thid < d) {
157 |             int i  = __mul24(__mul24(2, stride), thid);
158 |             int ai = i + stride - 1;
159 |             int bi = ai + stride;
160 |             ai += CONFLICT_FREE_OFFSET(ai);
161 |             bi += CONFLICT_FREE_OFFSET(bi);
162 |             s_data[bi] += s_data[ai];
163 |         }
164 |         stride *= 2;
165 |     }
166 |     return stride;
167 | }
168 | __device__ static unsigned int buildSumInt(int *s_data)
169 | {
170 |     unsigned int thid = threadIdx.x;
171 |     unsigned int stride = 1;
172 |     
173 |     // build the sum in place up the tree
174 |     for (int d = blockDim.x; d > 0; d >>= 1) {
175 |         __syncthreads();
176 |         if (thid < d) {
177 |             int i  = __mul24(__mul24(2, stride), thid);
178 |             int ai = i + stride - 1;
179 |             int bi = ai + stride;
180 |             ai += CONFLICT_FREE_OFFSET(ai);
181 |             bi += CONFLICT_FREE_OFFSET(bi);
182 |             s_data[bi] += s_data[ai];
183 |         }
184 |         stride *= 2;
185 |     }
186 |     return stride;
187 | }
188 | 
189 | __device__ static void scanRootToLeaves(float *s_data, unsigned int stride)
190 | {
191 |      unsigned int thid = threadIdx.x;
192 | 
193 |     // traverse down the tree building the scan in place
194 |     for (int d = 1; d <= blockDim.x; d *= 2) {
195 |         stride >>= 1;
196 |         __syncthreads();
197 | 
198 |         if (thid < d) {
199 |             int i  = __mul24(__mul24(2, stride), thid);
200 |             int ai = i + stride - 1;
201 |             int bi = ai + stride;
202 |             ai += CONFLICT_FREE_OFFSET(ai);
203 |             bi += CONFLICT_FREE_OFFSET(bi);
204 |             float t = s_data[ai];
205 |             s_data[ai] = s_data[bi];
206 |             s_data[bi] += t;
207 |         }
208 |     }
209 | }
210 | 
211 | __device__ static void scanRootToLeavesInt(int *s_data, unsigned int stride)
212 | {
213 |      unsigned int thid = threadIdx.x;
214 | 
215 |     // traverse down the tree building the scan in place
216 |     for (int d = 1; d <= blockDim.x; d *= 2) {
217 |         stride >>= 1;
218 |         __syncthreads();
219 | 
220 |         if (thid < d) {
221 |             int i  = __mul24(__mul24(2, stride), thid);
222 |             int ai = i + stride - 1;
223 |             int bi = ai + stride;
224 |             ai += CONFLICT_FREE_OFFSET(ai);
225 |             bi += CONFLICT_FREE_OFFSET(bi);
226 |             int t = s_data[ai];
227 |             s_data[ai] = s_data[bi];
228 |             s_data[bi] += t;
229 |         }
230 |     }
231 | }
232 | 
233 | template <bool storeSum> __device__ void prescanBlock(float *data, int blockIndex, float *blockSums)
234 | {
235 |     int stride = buildSum (data);               // build the sum in place up the tree
236 |     clearLastElement<storeSum> (data, blockSums, (blockIndex == 0) ? blockIdx.x : blockIndex);
237 |     scanRootToLeaves (data, stride);            // traverse down tree to build the scan 
238 | }
239 | template <bool storeSum> __device__ void prescanBlockInt (int *data, int blockIndex, int *blockSums)
240 | {
241 |     int stride = buildSumInt (data);               // build the sum in place up the tree
242 |     clearLastElementInt <storeSum>(data, blockSums, (blockIndex == 0) ? blockIdx.x : blockIndex);
243 |     scanRootToLeavesInt (data, stride);            // traverse down tree to build the scan 
244 | }
245 | 
246 | __global__ static void uniformAdd(float *g_data, float *uniforms, int n, int blockOffset, int baseIndex)
247 | {
248 |     __shared__ float uni;
249 |     if (threadIdx.x == 0) uni = uniforms[blockIdx.x + blockOffset];    
250 |     unsigned int address = __mul24(blockIdx.x, (blockDim.x << 1)) + baseIndex + threadIdx.x; 
251 | 
252 |     __syncthreads();    
253 |     // note two adds per thread
254 |     g_data[address]              += uni;
255 |     g_data[address + blockDim.x] += (threadIdx.x + blockDim.x < n) * uni;
256 | }
257 | __global__ static void uniformAddInt(int *g_data, int *uniforms, int n, int blockOffset, int baseIndex)
258 | {
259 |     __shared__ int uni;
260 |     if (threadIdx.x == 0) uni = uniforms[blockIdx.x + blockOffset];    
261 |     unsigned int address = __mul24(blockIdx.x, (blockDim.x << 1)) + baseIndex + threadIdx.x; 
262 | 
263 |     __syncthreads();    
264 |     // note two adds per thread
265 |     g_data[address]              += uni;
266 |     g_data[address + blockDim.x] += (threadIdx.x + blockDim.x < n) * uni;
267 | }
268 | 
269 | 
270 | #endif // #ifndef _SCAN_BEST_KERNEL_CU_
271 | 
272 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/include/GL/freeglut_ext.h:
--------------------------------------------------------------------------------
  1 | #ifndef  __FREEGLUT_EXT_H__
  2 | #define  __FREEGLUT_EXT_H__
  3 | 
  4 | /*
  5 |  * freeglut_ext.h
  6 |  *
  7 |  * The non-GLUT-compatible extensions to the freeglut library include file
  8 |  *
  9 |  * Copyright (c) 1999-2000 Pawel W. Olszta. All Rights Reserved.
 10 |  * Written by Pawel W. Olszta, <olszta@sourceforge.net>
 11 |  * Creation date: Thu Dec 2 1999
 12 |  *
 13 |  * Permission is hereby granted, free of charge, to any person obtaining a
 14 |  * copy of this software and associated documentation files (the "Software"),
 15 |  * to deal in the Software without restriction, including without limitation
 16 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 17 |  * and/or sell copies of the Software, and to permit persons to whom the
 18 |  * Software is furnished to do so, subject to the following conditions:
 19 |  *
 20 |  * The above copyright notice and this permission notice shall be included
 21 |  * in all copies or substantial portions of the Software.
 22 |  *
 23 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 24 |  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 25 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 26 |  * PAWEL W. OLSZTA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 27 |  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 28 |  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29 |  */
 30 | 
 31 | #ifdef __cplusplus
 32 |     extern "C" {
 33 | #endif
 34 | 
 35 | /*
 36 |  * Additional GLUT Key definitions for the Special key function
 37 |  */
 38 | #define GLUT_KEY_NUM_LOCK           0x006D
 39 | #define GLUT_KEY_BEGIN              0x006E
 40 | #define GLUT_KEY_DELETE             0x006F
 41 | #define GLUT_KEY_SHIFT_L            0x0070
 42 | #define GLUT_KEY_SHIFT_R            0x0071
 43 | #define GLUT_KEY_CTRL_L             0x0072
 44 | #define GLUT_KEY_CTRL_R             0x0073
 45 | #define GLUT_KEY_ALT_L              0x0074
 46 | #define GLUT_KEY_ALT_R              0x0075
 47 | 
 48 | /*
 49 |  * GLUT API Extension macro definitions -- behaviour when the user clicks on an "x" to close a window
 50 |  */
 51 | #define GLUT_ACTION_EXIT                         0
 52 | #define GLUT_ACTION_GLUTMAINLOOP_RETURNS         1
 53 | #define GLUT_ACTION_CONTINUE_EXECUTION           2
 54 | 
 55 | /*
 56 |  * Create a new rendering context when the user opens a new window?
 57 |  */
 58 | #define GLUT_CREATE_NEW_CONTEXT                  0
 59 | #define GLUT_USE_CURRENT_CONTEXT                 1
 60 | 
 61 | /*
 62 |  * Direct/Indirect rendering context options (has meaning only in Unix/X11)
 63 |  */
 64 | #define GLUT_FORCE_INDIRECT_CONTEXT              0
 65 | #define GLUT_ALLOW_DIRECT_CONTEXT                1
 66 | #define GLUT_TRY_DIRECT_CONTEXT                  2
 67 | #define GLUT_FORCE_DIRECT_CONTEXT                3
 68 | 
 69 | /*
 70 |  * GLUT API Extension macro definitions -- the glutGet parameters
 71 |  */
 72 | #define  GLUT_INIT_STATE                    0x007C
 73 | 
 74 | #define  GLUT_ACTION_ON_WINDOW_CLOSE        0x01F9
 75 | 
 76 | #define  GLUT_WINDOW_BORDER_WIDTH           0x01FA
 77 | #define  GLUT_WINDOW_BORDER_HEIGHT          0x01FB
 78 | #define  GLUT_WINDOW_HEADER_HEIGHT          0x01FB  /* Docs say it should always have been GLUT_WINDOW_BORDER_HEIGHT, keep this for backward compatibility */
 79 | 
 80 | #define  GLUT_VERSION                       0x01FC
 81 | 
 82 | #define  GLUT_RENDERING_CONTEXT             0x01FD
 83 | #define  GLUT_DIRECT_RENDERING              0x01FE
 84 | 
 85 | #define  GLUT_FULL_SCREEN                   0x01FF
 86 | 
 87 | #define  GLUT_SKIP_STALE_MOTION_EVENTS      0x0204
 88 | 
 89 | #define  GLUT_GEOMETRY_VISUALIZE_NORMALS    0x0205
 90 | 
 91 | #define  GLUT_STROKE_FONT_DRAW_JOIN_DOTS    0x0206  /* Draw dots between line segments of stroke fonts? */
 92 | 
 93 | /*
 94 |  * New tokens for glutInitDisplayMode.
 95 |  * Only one GLUT_AUXn bit may be used at a time.
 96 |  * Value 0x0400 is defined in OpenGLUT.
 97 |  */
 98 | #define  GLUT_AUX                           0x1000
 99 | 
100 | #define  GLUT_AUX1                          0x1000
101 | #define  GLUT_AUX2                          0x2000
102 | #define  GLUT_AUX3                          0x4000
103 | #define  GLUT_AUX4                          0x8000
104 | 
105 | /*
106 |  * Context-related flags, see fg_state.c
107 |  * Set the requested OpenGL version
108 |  */
109 | #define  GLUT_INIT_MAJOR_VERSION            0x0200
110 | #define  GLUT_INIT_MINOR_VERSION            0x0201
111 | #define  GLUT_INIT_FLAGS                    0x0202
112 | #define  GLUT_INIT_PROFILE                  0x0203
113 | 
114 | /*
115 |  * Flags for glutInitContextFlags, see fg_init.c
116 |  */
117 | #define  GLUT_DEBUG                         0x0001
118 | #define  GLUT_FORWARD_COMPATIBLE            0x0002
119 | 
120 | 
121 | /*
122 |  * Flags for glutInitContextProfile, see fg_init.c
123 |  */
124 | #define GLUT_CORE_PROFILE                   0x0001
125 | #define	GLUT_COMPATIBILITY_PROFILE          0x0002
126 | 
127 | /*
128 |  * Process loop function, see fg_main.c
129 |  */
130 | FGAPI void    FGAPIENTRY glutMainLoopEvent( void );
131 | FGAPI void    FGAPIENTRY glutLeaveMainLoop( void );
132 | FGAPI void    FGAPIENTRY glutExit         ( void );
133 | 
134 | /*
135 |  * Window management functions, see fg_window.c
136 |  */
137 | FGAPI void    FGAPIENTRY glutFullScreenToggle( void );
138 | FGAPI void    FGAPIENTRY glutLeaveFullScreen( void );
139 | 
140 | /*
141 |  * Menu functions
142 |  */
143 | FGAPI void    FGAPIENTRY glutSetMenuFont( int menuID, void* font );
144 | 
145 | /*
146 |  * Window-specific callback functions, see fg_callbacks.c
147 |  */
148 | FGAPI void    FGAPIENTRY glutMouseWheelFunc( void (* callback)( int, int, int, int ) );
149 | FGAPI void    FGAPIENTRY glutPositionFunc( void (* callback)( int, int ) );
150 | FGAPI void    FGAPIENTRY glutCloseFunc( void (* callback)( void ) );
151 | FGAPI void    FGAPIENTRY glutWMCloseFunc( void (* callback)( void ) );
152 | /* And also a destruction callback for menus */
153 | FGAPI void    FGAPIENTRY glutMenuDestroyFunc( void (* callback)( void ) );
154 | 
155 | /*
156 |  * State setting and retrieval functions, see fg_state.c
157 |  */
158 | FGAPI void    FGAPIENTRY glutSetOption ( GLenum option_flag, int value );
159 | FGAPI int *   FGAPIENTRY glutGetModeValues(GLenum mode, int * size);
160 | /* A.Donev: User-data manipulation */
161 | FGAPI void*   FGAPIENTRY glutGetWindowData( void );
162 | FGAPI void    FGAPIENTRY glutSetWindowData(void* data);
163 | FGAPI void*   FGAPIENTRY glutGetMenuData( void );
164 | FGAPI void    FGAPIENTRY glutSetMenuData(void* data);
165 | 
166 | /*
167 |  * Font stuff, see fg_font.c
168 |  */
169 | FGAPI int     FGAPIENTRY glutBitmapHeight( void* font );
170 | FGAPI GLfloat FGAPIENTRY glutStrokeHeight( void* font );
171 | FGAPI void    FGAPIENTRY glutBitmapString( void* font, const unsigned char *string );
172 | FGAPI void    FGAPIENTRY glutStrokeString( void* font, const unsigned char *string );
173 | 
174 | /*
175 |  * Geometry functions, see fg_geometry.c
176 |  */
177 | FGAPI void    FGAPIENTRY glutWireRhombicDodecahedron( void );
178 | FGAPI void    FGAPIENTRY glutSolidRhombicDodecahedron( void );
179 | FGAPI void    FGAPIENTRY glutWireSierpinskiSponge ( int num_levels, double offset[3], double scale );
180 | FGAPI void    FGAPIENTRY glutSolidSierpinskiSponge ( int num_levels, double offset[3], double scale );
181 | FGAPI void    FGAPIENTRY glutWireCylinder( double radius, double height, GLint slices, GLint stacks);
182 | FGAPI void    FGAPIENTRY glutSolidCylinder( double radius, double height, GLint slices, GLint stacks);
183 | 
184 | /*
185 |  * Rest of functions for rendering Newell's teaset, found in fg_teapot.c
186 |  * NB: front facing polygons have clockwise winding, not counter clockwise
187 |  */
188 | FGAPI void    FGAPIENTRY glutWireTeacup( double size );
189 | FGAPI void    FGAPIENTRY glutSolidTeacup( double size );
190 | FGAPI void    FGAPIENTRY glutWireTeaspoon( double size );
191 | FGAPI void    FGAPIENTRY glutSolidTeaspoon( double size );
192 | 
193 | /*
194 |  * Extension functions, see fg_ext.c
195 |  */
196 | typedef void (*GLUTproc)();
197 | FGAPI GLUTproc FGAPIENTRY glutGetProcAddress( const char *procName );
198 | 
199 | /*
200 |  * Multi-touch/multi-pointer extensions
201 |  */
202 | 
203 | #define GLUT_HAS_MULTI 1
204 | 
205 | /* TODO: add device_id parameter,
206 |    cf. http://sourceforge.net/mailarchive/forum.php?thread_name=20120518071314.GA28061%40perso.beuc.net&forum_name=freeglut-developer */
207 | FGAPI void FGAPIENTRY glutMultiEntryFunc( void (* callback)( int, int ) );
208 | FGAPI void FGAPIENTRY glutMultiButtonFunc( void (* callback)( int, int, int, int, int ) );
209 | FGAPI void FGAPIENTRY glutMultiMotionFunc( void (* callback)( int, int, int ) );
210 | FGAPI void FGAPIENTRY glutMultiPassiveFunc( void (* callback)( int, int, int ) );
211 | 
212 | /*
213 |  * Joystick functions, see fg_joystick.c
214 |  */
215 | /* USE OF THESE FUNCTIONS IS DEPRECATED !!!!! */
216 | /* If you have a serious need for these functions in your application, please either
217 |  * contact the "freeglut" developer community at freeglut-developer@lists.sourceforge.net,
218 |  * switch to the OpenGLUT library, or else port your joystick functionality over to PLIB's
219 |  * "js" library.
220 |  */
221 | int     glutJoystickGetNumAxes( int ident );
222 | int     glutJoystickGetNumButtons( int ident );
223 | int     glutJoystickNotWorking( int ident );
224 | float   glutJoystickGetDeadBand( int ident, int axis );
225 | void    glutJoystickSetDeadBand( int ident, int axis, float db );
226 | float   glutJoystickGetSaturation( int ident, int axis );
227 | void    glutJoystickSetSaturation( int ident, int axis, float st );
228 | void    glutJoystickSetMinRange( int ident, float *axes );
229 | void    glutJoystickSetMaxRange( int ident, float *axes );
230 | void    glutJoystickSetCenter( int ident, float *axes );
231 | void    glutJoystickGetMinRange( int ident, float *axes );
232 | void    glutJoystickGetMaxRange( int ident, float *axes );
233 | void    glutJoystickGetCenter( int ident, float *axes );
234 | 
235 | /*
236 |  * Initialization functions, see fg_init.c
237 |  */
238 | /* to get the typedef for va_list */
239 | #include <stdarg.h>
240 | FGAPI void    FGAPIENTRY glutInitContextVersion( int majorVersion, int minorVersion );
241 | FGAPI void    FGAPIENTRY glutInitContextFlags( int flags );
242 | FGAPI void    FGAPIENTRY glutInitContextProfile( int profile );
243 | FGAPI void    FGAPIENTRY glutInitErrorFunc( void (* callback)( const char *fmt, va_list ap ) );
244 | FGAPI void    FGAPIENTRY glutInitWarningFunc( void (* callback)( const char *fmt, va_list ap ) );
245 | 
246 | /* OpenGL >= 2.0 support */
247 | FGAPI void    FGAPIENTRY glutSetVertexAttribCoord3(GLint attrib);
248 | FGAPI void    FGAPIENTRY glutSetVertexAttribNormal(GLint attrib);
249 | FGAPI void    FGAPIENTRY glutSetVertexAttribTexCoord2(GLint attrib);
250 | 
251 | /* Mobile platforms lifecycle */
252 | FGAPI void    FGAPIENTRY glutInitContextFunc(void (* callback)());
253 | FGAPI void    FGAPIENTRY glutAppStatusFunc(void (* callback)(int));
254 | /* state flags that can be passed to callback set by glutAppStatusFunc */
255 | #define GLUT_APPSTATUS_PAUSE                0x0001
256 | #define GLUT_APPSTATUS_RESUME               0x0002
257 | 
258 | /*
259 |  * GLUT API macro definitions -- the display mode definitions
260 |  */
261 | #define  GLUT_CAPTIONLESS                   0x0400
262 | #define  GLUT_BORDERLESS                    0x0800
263 | #define  GLUT_SRGB                          0x1000
264 | 
265 | #ifdef __cplusplus
266 |     }
267 | #endif
268 | 
269 | /*** END OF FILE ***/
270 | 
271 | #endif /* __FREEGLUT_EXT_H__ */
272 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/cuda_prescan/scan.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
  3 | *
  4 | * NVIDIA Corporation and its licensors retain all intellectual property and
  5 | * proprietary rights in and to this software and related documentation and
  6 | * any modifications thereto.  Any use, reproduction, disclosure, or distribution
  7 | * of this software and related documentation without an express license
  8 | * agreement from NVIDIA Corporation is strictly prohibited.
  9 | *
 10 | */
 11 | 
 12 | // includes, kernels
 13 | #include "scan.cuh"
 14 | #include <assert.h>
 15 | #include <math.h>
 16 | #include "..\cuda_call_check.h"
 17 | #include "scan_kern.cuh"
 18 | 
 19 | inline bool isPowerOfTwo(int n) { return ((n&(n - 1)) == 0); }
 20 | 
 21 | inline int floorPow2(int n) {
 22 | #ifdef WIN32
 23 |     return 1 << (int)logb((float)n);
 24 | #else
 25 |     int exp;
 26 |     frexp((float)n, &exp);
 27 |     return 1 << (exp - 1);
 28 | #endif
 29 | }
 30 | 
 31 | float**			g_scanBlockSums = 0;
 32 | int**			g_scanBlockSumsInt = 0;
 33 | unsigned int	g_numEltsAllocated = 0;
 34 | unsigned int	g_numLevelsAllocated = 0;
 35 | 
 36 | void preallocBlockSums(unsigned int maxNumElements)
 37 | {
 38 |     assert(g_numEltsAllocated == 0); // shouldn't be called 
 39 | 
 40 |     g_numEltsAllocated = maxNumElements;
 41 |     unsigned int blockSize = BLOCK_SIZE; // max size of the thread blocks
 42 |     unsigned int numElts = maxNumElements;
 43 |     int level = 0;
 44 | 
 45 |     do {
 46 |         unsigned int numBlocks = max(1, (int)ceil((float)numElts / (2.f * blockSize)));
 47 |         if (numBlocks > 1) level++;
 48 |         numElts = numBlocks;
 49 |     } while (numElts > 1);
 50 | 
 51 |     g_scanBlockSums = (float**)malloc(level * sizeof(float*));
 52 |     g_numLevelsAllocated = level;
 53 | 
 54 |     numElts = maxNumElements;
 55 |     level = 0;
 56 | 
 57 |     do {
 58 |         unsigned int numBlocks = max(1, (int)ceil((float)numElts / (2.f * blockSize)));
 59 |         if (numBlocks > 1)
 60 |             CUDA_SAFE_CALL(cudaMalloc((void**)&g_scanBlockSums[level++], numBlocks * sizeof(float)));
 61 |         numElts = numBlocks;
 62 |     } while (numElts > 1);
 63 | 
 64 | }
 65 | void preallocBlockSumsInt(unsigned int maxNumElements)
 66 | {
 67 |     assert(g_numEltsAllocated == 0); // shouldn't be called 
 68 | 
 69 |     g_numEltsAllocated = maxNumElements;
 70 |     unsigned int blockSize = BLOCK_SIZE; // max size of the thread blocks
 71 |     unsigned int numElts = maxNumElements;
 72 |     int level = 0;
 73 | 
 74 |     do {
 75 |         unsigned int numBlocks = max(1, (int)ceil((float)numElts / (2.f * blockSize)));
 76 |         if (numBlocks > 1) level++;
 77 |         numElts = numBlocks;
 78 |     } while (numElts > 1);
 79 | 
 80 |     g_scanBlockSumsInt = (int**)malloc(level * sizeof(int*));
 81 |     g_numLevelsAllocated = level;
 82 | 
 83 |     numElts = maxNumElements;
 84 |     level = 0;
 85 | 
 86 |     do {
 87 |         unsigned int numBlocks = max(1, (int)ceil((float)numElts / (2.f * blockSize)));
 88 |         if (numBlocks > 1) CUDA_SAFE_CALL(cudaMalloc((void**)&g_scanBlockSumsInt[level++], numBlocks * sizeof(int)));
 89 |         numElts = numBlocks;
 90 |     } while (numElts > 1);
 91 | }
 92 | 
 93 | void deallocBlockSums()
 94 | {
 95 |     if (g_scanBlockSums != 0x0) {
 96 |         for (unsigned int i = 0; i < g_numLevelsAllocated; i++)
 97 |             CUDA_SAFE_CALL(cudaFree(g_scanBlockSums[i]));
 98 | 
 99 |         free((void**)g_scanBlockSums);
100 |     }
101 | 
102 |     g_scanBlockSums = 0;
103 |     g_numEltsAllocated = 0;
104 |     g_numLevelsAllocated = 0;
105 | }
106 | void deallocBlockSumsInt()
107 | {
108 |     if (g_scanBlockSums != 0x0) {
109 |         for (unsigned int i = 0; i < g_numLevelsAllocated; i++)
110 |             CUDA_SAFE_CALL(cudaFree(g_scanBlockSumsInt[i]));
111 |         free((void**)g_scanBlockSumsInt);
112 |     }
113 | 
114 |     g_scanBlockSumsInt = 0;
115 |     g_numEltsAllocated = 0;
116 |     g_numLevelsAllocated = 0;
117 | }
118 | 
119 | 
120 | 
121 | void prescanArrayRecursive(float *outArray, const float *inArray, int numElements, int level)
122 | {
123 |     unsigned int blockSize = BLOCK_SIZE; // max size of the thread blocks
124 |     unsigned int numBlocks = max(1, (int)ceil((float)numElements / (2.f * blockSize)));
125 |     unsigned int numThreads;
126 | 
127 |     if (numBlocks > 1)
128 |         numThreads = blockSize;
129 |     else if (isPowerOfTwo(numElements))
130 |         numThreads = numElements / 2;
131 |     else
132 |         numThreads = floorPow2(numElements);
133 | 
134 |     unsigned int numEltsPerBlock = numThreads * 2;
135 | 
136 |     // if this is a non-power-of-2 array, the last block will be non-full
137 |     // compute the smallest power of 2 able to compute its scan.
138 |     unsigned int numEltsLastBlock = numElements - (numBlocks - 1) * numEltsPerBlock;
139 |     unsigned int numThreadsLastBlock = max(1, numEltsLastBlock / 2);
140 |     unsigned int np2LastBlock = 0;
141 |     unsigned int sharedMemLastBlock = 0;
142 | 
143 |     if (numEltsLastBlock != numEltsPerBlock) {
144 |         np2LastBlock = 1;
145 |         if (!isPowerOfTwo(numEltsLastBlock)) numThreadsLastBlock = floorPow2(numEltsLastBlock);
146 |         unsigned int extraSpace = (2 * numThreadsLastBlock) / NUM_BANKS;
147 |         sharedMemLastBlock = sizeof(float) * (2 * numThreadsLastBlock + extraSpace);
148 |     }
149 | 
150 |     // padding space is used to avoid shared memory bank conflicts
151 |     unsigned int extraSpace = numEltsPerBlock / NUM_BANKS;
152 |     unsigned int sharedMemSize = sizeof(float) * (numEltsPerBlock + extraSpace);
153 | 
154 | #ifdef DEBUG
155 |     if (numBlocks > 1) assert(g_numEltsAllocated >= numElements);
156 | #endif
157 | 
158 |     // setup execution parameters
159 |     // if NP2, we process the last block separately
160 |     dim3  grid(max(1, numBlocks - np2LastBlock), 1, 1);
161 |     dim3  threads(numThreads, 1, 1);
162 | 
163 |     // execute the scan
164 |     if (numBlocks > 1) {
165 |         prescan<true, false><<<grid, threads, sharedMemSize>>>(outArray, inArray, g_scanBlockSums[level], numThreads * 2, 0, 0);
166 |         if (np2LastBlock) {
167 |             prescan<true, true><<<1, numThreadsLastBlock, sharedMemLastBlock>>>(outArray, inArray, g_scanBlockSums[level], numEltsLastBlock, numBlocks - 1, numElements - numEltsLastBlock);
168 |         }
169 | 
170 |         // After scanning all the sub-blocks, we are mostly done.  But now we 
171 |         // need to take all of the last values of the sub-blocks and scan those.  
172 |         // This will give us a new value that must be added to each block to 
173 |         // get the final results.
174 |         // recursive (CPU) call
175 |         prescanArrayRecursive(g_scanBlockSums[level], g_scanBlockSums[level], numBlocks, level + 1);
176 | 
177 |         uniformAdd<<<grid, threads>>>(outArray, g_scanBlockSums[level], numElements - numEltsLastBlock, 0, 0);
178 |         if (np2LastBlock) {
179 |             uniformAdd<<<1, numThreadsLastBlock>>>(outArray, g_scanBlockSums[level], numEltsLastBlock, numBlocks - 1, numElements - numEltsLastBlock);
180 |         }
181 |     }
182 |     else if (isPowerOfTwo(numElements)) {
183 |         prescan<false, false><<<grid, threads, sharedMemSize>>>(outArray, inArray, 0, numThreads * 2, 0, 0);
184 |     }
185 |     else {
186 |         prescan<false, true><<<grid, threads, sharedMemSize>>>(outArray, inArray, 0, numElements, 0, 0);
187 |     }
188 | }
189 | 
190 | void prescanArrayRecursiveInt(int *outArray, const int *inArray, int numElements, int level)
191 | {
192 |     unsigned int blockSize = BLOCK_SIZE; // max size of the thread blocks
193 |     unsigned int numBlocks = max(1, (int)ceil((float)numElements / (2.f * blockSize)));
194 |     unsigned int numThreads;
195 | 
196 |     if (numBlocks > 1)
197 |         numThreads = blockSize;
198 |     else if (isPowerOfTwo(numElements))
199 |         numThreads = numElements / 2;
200 |     else
201 |         numThreads = floorPow2(numElements);
202 | 
203 |     unsigned int numEltsPerBlock = numThreads * 2;
204 | 
205 |     // if this is a non-power-of-2 array, the last block will be non-full
206 |     // compute the smallest power of 2 able to compute its scan.
207 |     unsigned int numEltsLastBlock = numElements - (numBlocks - 1) * numEltsPerBlock;
208 |     unsigned int numThreadsLastBlock = max(1, numEltsLastBlock / 2);
209 |     unsigned int np2LastBlock = 0;
210 |     unsigned int sharedMemLastBlock = 0;
211 | 
212 |     if (numEltsLastBlock != numEltsPerBlock) {
213 |         np2LastBlock = 1;
214 |         if (!isPowerOfTwo(numEltsLastBlock)) numThreadsLastBlock = floorPow2(numEltsLastBlock);
215 |         unsigned int extraSpace = (2 * numThreadsLastBlock) / NUM_BANKS;
216 |         sharedMemLastBlock = sizeof(float) * (2 * numThreadsLastBlock + extraSpace);
217 |     }
218 | 
219 |     // padding space is used to avoid shared memory bank conflicts
220 |     unsigned int extraSpace = numEltsPerBlock / NUM_BANKS;
221 |     unsigned int sharedMemSize = sizeof(float) * (numEltsPerBlock + extraSpace);
222 | 
223 | #ifdef DEBUG
224 |     if (numBlocks > 1) assert(g_numEltsAllocated >= numElements);
225 | #endif
226 | 
227 |     // setup execution parameters
228 |     // if NP2, we process the last block separately
229 |     dim3  grid(max(1, numBlocks - np2LastBlock), 1, 1);
230 |     dim3  threads(numThreads, 1, 1);
231 | 
232 |     // execute the scan
233 |     if (numBlocks > 1) {
234 |         prescanInt<true, false><<<grid, threads, sharedMemSize>>>(outArray, inArray, g_scanBlockSumsInt[level], numThreads * 2, 0, 0);
235 |         if (np2LastBlock) {
236 |             prescanInt<true, true><<<1, numThreadsLastBlock, sharedMemLastBlock>>>(outArray, inArray, g_scanBlockSumsInt[level], numEltsLastBlock, numBlocks - 1, numElements - numEltsLastBlock);
237 |         }
238 | 
239 |         // After scanning all the sub-blocks, we are mostly done.  But now we 
240 |         // need to take all of the last values of the sub-blocks and scan those.  
241 |         // This will give us a new value that must be added to each block to 
242 |         // get the final results.
243 |         // recursive (CPU) call
244 |         prescanArrayRecursiveInt(g_scanBlockSumsInt[level], g_scanBlockSumsInt[level], numBlocks, level + 1);
245 | 
246 |         uniformAddInt<<<grid, threads>>>(outArray, g_scanBlockSumsInt[level], numElements - numEltsLastBlock, 0, 0);
247 |         if (np2LastBlock) {
248 |             uniformAddInt<<<1, numThreadsLastBlock>>>(outArray, g_scanBlockSumsInt[level], numEltsLastBlock, numBlocks - 1, numElements - numEltsLastBlock);
249 |         }
250 |     }
251 |     else if (isPowerOfTwo(numElements)) {
252 |         prescanInt<false, false><<<grid, threads, sharedMemSize>>>(outArray, inArray, 0, numThreads * 2, 0, 0);
253 |     }
254 |     else {
255 |         prescanInt<false, true><<<grid, threads, sharedMemSize>>>(outArray, inArray, 0, numElements, 0, 0);
256 |     }
257 | }
258 | 
259 | 
260 | void prescanArray(float *d_odata, float *d_idata, int num)
261 | {
262 |     // preform prefix sum
263 |     preallocBlockSums(num);
264 |     prescanArrayRecursive(d_odata, d_idata, num, 0);
265 |     deallocBlockSums();
266 | }
267 | void prescanArrayInt(int *d_odata, int *d_idata, int num)
268 | {
269 |     // preform prefix sum
270 |     preallocBlockSumsInt(num);
271 |     prescanArrayRecursiveInt(d_odata, d_idata, num, 0);
272 |     deallocBlockSumsInt();
273 | }
274 | 
275 | char* d_idata = NULL;
276 | char* d_odata = NULL;
277 | 
278 | void prefixSum(int num)
279 | {
280 |     prescanArray((float*)d_odata, (float*)d_idata, num);
281 | }
282 | 
283 | void prefixSumInt(int num)
284 | {
285 |     prescanArrayInt((int*)d_odata, (int*)d_idata, num);
286 | }
287 | 
288 | void prefixSumToGPU(char* inArray, int num, int siz)
289 | {
290 |     CUDA_SAFE_CALL(cudaMalloc((void**)&d_idata, num*siz));
291 |     CUDA_SAFE_CALL(cudaMalloc((void**)&d_odata, num*siz));
292 |     CUDA_SAFE_CALL(cudaMemcpy(d_idata, inArray, num*siz, cudaMemcpyHostToDevice));
293 | }
294 | void prefixSumFromGPU(char* outArray, int num, int siz)
295 | {
296 |     CUDA_SAFE_CALL(cudaMemcpy(outArray, d_odata, num*siz, cudaMemcpyDeviceToHost));
297 |     CUDA_SAFE_CALL(cudaFree(d_idata));
298 |     CUDA_SAFE_CALL(cudaFree(d_odata));
299 |     d_idata = NULL;
300 |     d_odata = NULL;
301 | }
302 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/gpu_model.cu:
--------------------------------------------------------------------------------
  1 | //
  2 | // gpu_model.cu
  3 | // Hybrid_Parallel_SPH
  4 | //
  5 | // created by kmhuang and ruanjm on 2018/09/01
  6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved.
  7 | //
  8 | 
  9 | #include "gpu_model.cuh"
 10 | #include <device_launch_parameters.h>
 11 | #include "cuda_call_check.h"
 12 | #include "cuda_math.cuh"
 13 | #include "gpu_model.h"
 14 | #include "gpu_model_reader.h"
 15 | #include "sph_utils.cuh"
 16 | 
 17 | namespace gpu_model
 18 | {
 19 | 
 20 | using namespace sph;
 21 | 
 22 | const char *kLatencyFileName = "insts_latency.json";
 23 | const char *kPTXStatisticsFileNameSMS = "sph_sms_arti_block_statistics.json";
 24 | const char *kPTXStatisticsFileNameTRA = "sph_tra_arti_block_statistics.json";
 25 | const char *kFunNameDensitySMS = "_ZN3sph19knComputeDensitySMSENS_18ParticleBufferListEPiS1_PNS_9BlockTaskE";
 26 | const char *kFunNameForceSMS = "_ZN3sph17knComputeForceSMSENS_18ParticleBufferListEPiS1_PNS_9BlockTaskE";
 27 | const char *kFunNameDensityTRA = "_Z21knBmComputeDensityTRAN3SPH18ParticleBufferListEPjS1_jS1_";
 28 | const char *kFunNameForceTRA = "_Z19knBmComputeForceTRAN3SPH18ParticleBufferListEPjS1_jS1_";
 29 | 
 30 | #define kDensityTRA     0
 31 | #define kForceTRA       1
 32 | #define kDensitySMS     2
 33 | #define kForceSMS       3
 34 | 
 35 | const int kSelfID = 13U;
 36 | const int kNumNeighbor = 27U;
 37 | //const float v1_32 = 1.f / 32.f;
 38 | #define MIN(A, B) (A > B ? B : A)
 39 | 
 40 | const int kDefaultNumThread = 256;
 41 | __constant__ GPUDeviceInfo kDevGPUInfo;
 42 | __constant__ InstructionInfo kDevInstsLatency;
 43 | 
 44 | struct GPUModel
 45 | {
 46 |     PTXBlockStatistic *bs_tra_density;
 47 |     PTXBlockStatistic *bs_tra_force;
 48 |     PTXBlockStatistic *bs_sms_density;
 49 |     PTXBlockStatistic *bs_sms_force;
 50 | 
 51 |     PTXBlockStatistic *static_block;
 52 | 
 53 |     KernelRelatedParas *kn_paras;
 54 | };
 55 | 
 56 | /****************************** Kernel ******************************/
 57 | 
 58 | __global__
 59 | void knCalculateBlockRequirementSMSMode(int *block_req, int *cell_start, int *cell_end, int block_size, int numc)
 60 | {
 61 |     unsigned int idx = threadIdx.x + __umul24(blockDim.x, blockIdx.x);
 62 | 
 63 |     if (idx >= numc) return;
 64 |     
 65 |     int cs = cell_start[idx];
 66 |     int ce = cell_end[idx];
 67 |     block_req[idx] = cs == kInvalidCellIdx ? 0 : ceil_int(ce - cs, block_size);
 68 | }
 69 | 
 70 | __device__
 71 | float knCalculateAvgLatency(const PTXBlockStatistic &insts_count, float default_lat)
 72 | {
 73 |     float total_time = 0.0f;
 74 |     for (size_t i = 0; i < ARI_STAT_SIZE; ++i)
 75 |     {
 76 |         total_time += (float)insts_count.num_stat[i] * kDevInstsLatency[i];
 77 |     }
 78 |     for (size_t i = 0; i < MEM_STAT_SIZE; ++i)
 79 |     {
 80 |         total_time += (float)insts_count.num_mem[i] * default_lat;
 81 |     }
 82 |     total_time += (float)insts_count.num_bra * default_lat;
 83 |     total_time += (float)insts_count.num_unknown * default_lat;
 84 | 
 85 |     return total_time / (insts_count.num_insts - insts_count.num_sync);
 86 | }
 87 | 
 88 | __device__
 89 | float knCalculateKernelClock(PTXBlockStatistic *blocks, int *times, int num_inst_block, PTXBlockStatistic &basic_block, 
 90 |                              KernelRelatedParas &kn_para, float num_uncoal_per_warp, 
 91 |                              int num_blocks, int num_warps, int num_sms)
 92 | {
 93 |     PTXBlockStatistic insts_count = basic_block;
 94 |     for (size_t i = 0; i < num_inst_block; ++i)
 95 |     {
 96 |         if (NO_RECOMMENDATION == blocks[i].recommended_times)
 97 |         {
 98 |             insts_count += blocks[i] * times[i];
 99 |         }
100 |     }
101 | 
102 |     float avg_inst_lat = knCalculateAvgLatency(insts_count, kn_para.default_inst_lat);
103 |     float dram_lat = kn_para.dram_lat + (num_uncoal_per_warp - 1) * kn_para.delta;
104 | 
105 |     // compute ITILP
106 |     float itilp_max = 8 / (kDevGPUInfo.warp_size / kDevGPUInfo.simd_width);     // Eq.2-5
107 |     float itilp = MIN(kDevGPUInfo.ilp * num_warps, itilp_max);                   // Eq.2-4
108 | 
109 |     // compute ITMLP
110 |     float itmlp = 2;
111 | 
112 |     // compute execution time
113 |     float f_sync = kDevGPUInfo.gamma * dram_lat * insts_count.num_mem[GLOBAL_ACC] / insts_count.num_insts;  // Eq.2-8
114 |     float o_sync = ((insts_count.num_sync * num_blocks) / num_sms) * f_sync;                                // Eq.2-7
115 |     if (1 >= num_warps) o_sync = 0;
116 | 
117 |     float w_serial = o_sync;
118 |     float w_parallel = (((insts_count.num_insts - insts_count.num_sync) * num_warps) / num_sms) * (avg_inst_lat / itilp);   // Eq.2-3
119 | 
120 |     float t_comp = w_serial + w_parallel;   // Eq.2-2
121 | 
122 |     float t_mem = (insts_count.num_mem[GLOBAL_ACC] * num_warps) / (num_warps * itmlp) * dram_lat;   // Eq.2-11
123 | 
124 |     float t_overlap = MIN(t_comp, t_mem);
125 | 
126 |     //unsigned int idx = threadIdx.x + __umul24(blockDim.x, blockIdx.x);
127 |     //if (idx == 0)
128 |     //{
129 |     //    //printf("idx: %d, t_comp: %f, t_mem: %f, t_op: %f\n", idx, t_comp, t_mem, t_overlap);
130 |     //    //printf("avg_inst_lat: %f, dram_lat: %f\n", avg_inst_lat, dram_lat);
131 |     //    printf("%f, %f, %f\n", kn_para.default_inst_lat, kn_para.delta, kn_para.dram_lat);
132 |     //}
133 | 
134 |     return t_comp + t_mem - t_overlap;
135 | }
136 | 
137 | __global__
138 | void knCalculateBlockRequirementHybridMode(int *cell_type, int *d_cell_num, int *block_req, GPUModel gm, int *cell_offset, int *cell_num, ushort3 grid_size, int block_size)
139 | {
140 |     unsigned int idx = threadIdx.x + blockDim.x*blockIdx.x;
141 |     int numc = grid_size.x * grid_size.y * grid_size.z;
142 |     if (idx >= numc) return;
143 |     register int nump_self = d_cell_num[idx];
144 |    
145 |     
146 |     
147 |     register int totaln = nump_self;
148 | 	register ushort3 self_pos = CellIdx2CellPos(idx, grid_size);
149 |     register int nidx = CellPos2CellIdx(self_pos + make_ushort3(-1, 0, 0), grid_size);
150 |     if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx];
151 |     nidx = CellPos2CellIdx(self_pos + make_ushort3(1, 0, 0), grid_size);
152 |     if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx];
153 |     nidx = CellPos2CellIdx(self_pos + make_ushort3(0, 1, 0), grid_size);
154 |     if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx];
155 |     nidx = CellPos2CellIdx(self_pos + make_ushort3(0, -1, 0), grid_size);
156 |     if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx];
157 |     nidx = CellPos2CellIdx(self_pos + make_ushort3(0, 0, -1), grid_size);
158 |     if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx];
159 |     nidx = CellPos2CellIdx(self_pos + make_ushort3(0, 0, 1), grid_size);
160 |     if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx];
161 |                
162 |     
163 | 
164 | //    block_req[idx] = nump_self < 15 ? 0 : (nump_self + 25) >> 5;
165 |     if (totaln < 96 && nump_self < 15){ block_req[idx] = 0; }else if (totaln < 60){ block_req[idx] = (nump_self + 27) >> 5; }else{ block_req[idx] = (nump_self + 31) >> 5; }
166 | //    if (totaln < 60){block_req[idx] = (nump_self + 25) >> 5;}else if (totaln < 95 && nump_self < 19){block_req[idx] = 0;}else{block_req[idx] = (nump_self + 31) >> 5;}
167 | //    block_req[idx] = (totaln < 95) ? 0 : (nump_self + 31) >> 5;
168 | //    block_req[idx] = 0;// (nump_self + 31) >> 5;
169 | }
170 | 
171 | /****************************** Interface ******************************/
172 | 
173 | void calculateStaticBlock(PTXBlockStatistic &static_block, PTXBlockStatistic *blocks, size_t num_blocks)
174 | {
175 |     std::memset(&static_block, 0, sizeof PTXBlockStatistic);
176 | 
177 |     for (size_t i = 0; i < num_blocks; ++i)
178 |     {
179 |         if (NO_RECOMMENDATION != blocks[i].recommended_times)
180 |         {
181 |             static_block += blocks[i] * blocks[i].recommended_times;
182 |         }
183 |     }
184 | }
185 | 
186 | void setKernelParameters(KernelRelatedParas &kn_para, unsigned int type)
187 | {
188 |     switch (type)
189 |     {
190 |     case kDensityTRA:
191 |         kn_para.dram_lat = 250;
192 |         kn_para.delta = 0;
193 |         kn_para.default_inst_lat = 12;
194 |         break;
195 |     case kForceTRA:
196 |         kn_para.dram_lat = 230;
197 |         kn_para.delta = 0;
198 |         kn_para.default_inst_lat = 12;
199 |         break;
200 |     case kDensitySMS:
201 |         kn_para.dram_lat = 240;
202 |         kn_para.delta = 10;
203 |         kn_para.default_inst_lat = 14;
204 |         break;
205 |     case kForceSMS:
206 |         kn_para.dram_lat = 240;
207 |         kn_para.delta = 10;
208 |         kn_para.default_inst_lat = 10;
209 |         break;
210 |     default:
211 |         break;
212 |     }
213 | }
214 | 
215 | void allocateGPUModel(GPUModel *&gpu_model)
216 | {
217 |     unsigned int data_len;
218 |     PTXBlockStatistic *bs_tra_density = nullptr;
219 |     PTXBlockStatistic *bs_tra_force = nullptr;
220 |     PTXBlockStatistic *bs_sms_density = nullptr;
221 |     PTXBlockStatistic *bs_sms_force = nullptr;
222 |     PTXBlockStatistic static_block[4];
223 |     KernelRelatedParas kn_paras[4];
224 |     InstructionInfo insts_latency;
225 |     GPUDeviceInfo device_info;
226 | 
227 |     gpu_model = new GPUModel;
228 | 
229 |     // read block data and transfer to device
230 |     data_len = readPTXStatisticsFromFile(bs_tra_density, kFunNameDensityTRA, kPTXStatisticsFileNameTRA);
231 |     CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->bs_tra_density), data_len * sizeof(PTXBlockStatistic)));
232 |     CUDA_SAFE_CALL(cudaMemcpy(gpu_model->bs_tra_density, bs_tra_density, data_len * sizeof(PTXBlockStatistic), cudaMemcpyHostToDevice));
233 |     data_len = readPTXStatisticsFromFile(bs_tra_force, kFunNameForceTRA, kPTXStatisticsFileNameTRA);
234 |     CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->bs_tra_force), data_len * sizeof(PTXBlockStatistic)));
235 |     CUDA_SAFE_CALL(cudaMemcpy(gpu_model->bs_tra_force, bs_tra_force, data_len * sizeof(PTXBlockStatistic), cudaMemcpyHostToDevice));
236 |     data_len = readPTXStatisticsFromFile(bs_sms_density, kFunNameDensitySMS, kPTXStatisticsFileNameSMS);
237 |     CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->bs_sms_density), data_len * sizeof(PTXBlockStatistic)));
238 |     CUDA_SAFE_CALL(cudaMemcpy(gpu_model->bs_sms_density, bs_sms_density, data_len * sizeof(PTXBlockStatistic), cudaMemcpyHostToDevice));
239 |     data_len = readPTXStatisticsFromFile(bs_sms_force, kFunNameForceSMS, kPTXStatisticsFileNameSMS);
240 |     CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->bs_sms_force), data_len * sizeof(PTXBlockStatistic)));
241 |     CUDA_SAFE_CALL(cudaMemcpy(gpu_model->bs_sms_force, bs_sms_force, data_len * sizeof(PTXBlockStatistic), cudaMemcpyHostToDevice));
242 | 
243 |     // calculate static block and transfer to device
244 |     calculateStaticBlock(static_block[kDensityTRA], bs_tra_density, 5);
245 |     calculateStaticBlock(static_block[kForceTRA], bs_tra_force, 5);
246 |     calculateStaticBlock(static_block[kDensitySMS], bs_sms_density, 7);
247 |     calculateStaticBlock(static_block[kForceSMS], bs_sms_force, 7);
248 |     CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->static_block), 4 * sizeof(PTXBlockStatistic)));
249 |     CUDA_SAFE_CALL(cudaMemcpy(gpu_model->static_block, static_block, 4 * sizeof(PTXBlockStatistic), cudaMemcpyHostToDevice));
250 | 
251 |     // read instruction latency and transfer to device
252 |     readInstructionLatencyFromFile(insts_latency, kLatencyFileName);
253 |     CUDA_SAFE_CALL(cudaMemcpyToSymbol(kDevInstsLatency, &insts_latency, sizeof(InstructionInfo)));
254 | 
255 |     // set device info and transfer to device
256 |     device_info.simd_width = 64;
257 |     device_info.warp_size = 32;
258 |     device_info.gamma = 64;
259 |     device_info.ilp = 2;
260 |     device_info.mlp = 2;
261 |     CUDA_SAFE_CALL(cudaMemcpyToSymbol(kDevGPUInfo, &device_info, sizeof(GPUDeviceInfo)));
262 | 
263 |     // set kernel related paras and transfer to device
264 |     setKernelParameters(kn_paras[kDensityTRA], kDensityTRA);
265 |     setKernelParameters(kn_paras[kForceTRA], kForceTRA);
266 |     setKernelParameters(kn_paras[kDensitySMS], kDensitySMS);
267 |     setKernelParameters(kn_paras[kForceSMS], kForceSMS);
268 |     CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->kn_paras), 4 * sizeof(KernelRelatedParas)));
269 |     CUDA_SAFE_CALL(cudaMemcpy(gpu_model->kn_paras, kn_paras, 4 * sizeof(KernelRelatedParas), cudaMemcpyHostToDevice));
270 | 
271 |     if (nullptr != bs_tra_density) delete[]bs_tra_density;
272 |     if (nullptr != bs_tra_force) delete[]bs_tra_force;
273 |     if (nullptr != bs_sms_density) delete[]bs_sms_density;
274 |     if (nullptr != bs_sms_force) delete[]bs_sms_force;
275 | }
276 | 
277 | void freeGPUModel(GPUModel *gpu_model)
278 | {
279 |     CUDA_SAFE_CALL(cudaFree(gpu_model->bs_tra_density));
280 |     CUDA_SAFE_CALL(cudaFree(gpu_model->bs_tra_force));
281 |     CUDA_SAFE_CALL(cudaFree(gpu_model->bs_sms_density));
282 |     CUDA_SAFE_CALL(cudaFree(gpu_model->bs_sms_force));
283 |     CUDA_SAFE_CALL(cudaFree(gpu_model->kn_paras));
284 |     CUDA_SAFE_CALL(cudaFree(gpu_model->static_block));
285 | 
286 |     delete gpu_model;
287 | }
288 | 
289 | void calculateBlockRequirementSMSMode(int *block_req, int *cell_start, int *cell_end, int block_size, int numc)
290 | {
291 |     int num_thread = kDefaultNumThread;
292 |     int num_block = ceil_int(numc, num_thread);
293 | 
294 |     knCalculateBlockRequirementSMSMode<<<num_block, num_thread>>>(block_req, cell_start, cell_end, block_size, numc);
295 | }
296 | 
297 | void calculateBlockRequirementHybridMode(int *cell_type, int *d_cell_num, int *block_req, GPUModel *gm, int *cell_offset, int *cell_num, ushort3 grid_size, int block_size)
298 | {
299 |     int numc = grid_size.x * grid_size.y * grid_size.z;
300 | 
301 |     int num_thread = kDefaultNumThread;
302 |     int num_block = ceil_int(numc, num_thread);
303 | 
304 |     knCalculateBlockRequirementHybridMode << <num_block, num_thread >> >(cell_type, d_cell_num, block_req, *gm, cell_offset, cell_num, grid_size, block_size);
305 | }
306 | 
307 | }


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/json/writer.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
  2 | // Distributed under MIT license, or public domain if desired and
  3 | // recognized in your jurisdiction.
  4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
  5 | 
  6 | #ifndef JSON_WRITER_H_INCLUDED
  7 | #define JSON_WRITER_H_INCLUDED
  8 | 
  9 | #if !defined(JSON_IS_AMALGAMATION)
 10 | #include "value.h"
 11 | #endif // if !defined(JSON_IS_AMALGAMATION)
 12 | #include <ostream>
 13 | #include <string>
 14 | #include <vector>
 15 | 
 16 | // Disable warning C4251: <data member>: <type> needs to have dll-interface to
 17 | // be used by...
 18 | #if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) && defined(_MSC_VER)
 19 | #pragma warning(push)
 20 | #pragma warning(disable : 4251)
 21 | #endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING)
 22 | 
 23 | #pragma pack(push, 8)
 24 | 
 25 | namespace Json {
 26 | 
 27 | class Value;
 28 | 
 29 | /**
 30 | 
 31 | Usage:
 32 | \code
 33 |   using namespace Json;
 34 |   void writeToStdout(StreamWriter::Factory const& factory, Value const& value) {
 35 |     std::unique_ptr<StreamWriter> const writer(
 36 |       factory.newStreamWriter());
 37 |     writer->write(value, &std::cout);
 38 |     std::cout << std::endl;  // add lf and flush
 39 |   }
 40 | \endcode
 41 | */
 42 | class JSON_API StreamWriter {
 43 | protected:
 44 |   OStream* sout_; // not owned; will not delete
 45 | public:
 46 |   StreamWriter();
 47 |   virtual ~StreamWriter();
 48 |   /** Write Value into document as configured in sub-class.
 49 |       Do not take ownership of sout, but maintain a reference during function.
 50 |       \pre sout != NULL
 51 |       \return zero on success (For now, we always return zero, so check the
 52 |      stream instead.) \throw std::exception possibly, depending on configuration
 53 |    */
 54 |   virtual int write(Value const& root, OStream* sout) = 0;
 55 | 
 56 |   /** \brief A simple abstract factory.
 57 |    */
 58 |   class JSON_API Factory {
 59 |   public:
 60 |     virtual ~Factory();
 61 |     /** \brief Allocate a CharReader via operator new().
 62 |      * \throw std::exception if something goes wrong (e.g. invalid settings)
 63 |      */
 64 |     virtual StreamWriter* newStreamWriter() const = 0;
 65 |   }; // Factory
 66 | };   // StreamWriter
 67 | 
 68 | /** \brief Write into stringstream, then return string, for convenience.
 69 |  * A StreamWriter will be created from the factory, used, and then deleted.
 70 |  */
 71 | String JSON_API writeString(StreamWriter::Factory const& factory,
 72 |                             Value const& root);
 73 | 
 74 | /** \brief Build a StreamWriter implementation.
 75 | 
 76 | Usage:
 77 | \code
 78 |   using namespace Json;
 79 |   Value value = ...;
 80 |   StreamWriterBuilder builder;
 81 |   builder["commentStyle"] = "None";
 82 |   builder["indentation"] = "   ";  // or whatever you like
 83 |   std::unique_ptr<Json::StreamWriter> writer(
 84 |       builder.newStreamWriter());
 85 |   writer->write(value, &std::cout);
 86 |   std::cout << std::endl;  // add lf and flush
 87 | \endcode
 88 | */
 89 | class JSON_API StreamWriterBuilder : public StreamWriter::Factory {
 90 | public:
 91 |   // Note: We use a Json::Value so that we can add data-members to this class
 92 |   // without a major version bump.
 93 |   /** Configuration of this builder.
 94 |     Available settings (case-sensitive):
 95 |     - "commentStyle": "None" or "All"
 96 |     - "indentation":  "<anything>".
 97 |       - Setting this to an empty string also omits newline characters.
 98 |     - "enableYAMLCompatibility": false or true
 99 |       - slightly change the whitespace around colons
100 |     - "dropNullPlaceholders": false or true
101 |       - Drop the "null" string from the writer's output for nullValues.
102 |         Strictly speaking, this is not valid JSON. But when the output is being
103 |         fed to a browser's JavaScript, it makes for smaller output and the
104 |         browser can handle the output just fine.
105 |     - "useSpecialFloats": false or true
106 |       - If true, outputs non-finite floating point values in the following way:
107 |         NaN values as "NaN", positive infinity as "Infinity", and negative
108 |     infinity as "-Infinity".
109 |     - "precision": int
110 |       - Number of precision digits for formatting of real values.
111 |     - "precisionType": "significant"(default) or "decimal"
112 |       - Type of precision for formatting of real values.
113 | 
114 |     You can examine 'settings_` yourself
115 |     to see the defaults. You can also write and read them just like any
116 |     JSON Value.
117 |     \sa setDefaults()
118 |     */
119 |   Json::Value settings_;
120 | 
121 |   StreamWriterBuilder();
122 |   ~StreamWriterBuilder() override;
123 | 
124 |   /**
125 |    * \throw std::exception if something goes wrong (e.g. invalid settings)
126 |    */
127 |   StreamWriter* newStreamWriter() const override;
128 | 
129 |   /** \return true if 'settings' are legal and consistent;
130 |    *   otherwise, indicate bad settings via 'invalid'.
131 |    */
132 |   bool validate(Json::Value* invalid) const;
133 |   /** A simple way to update a specific setting.
134 |    */
135 |   Value& operator[](const String& key);
136 | 
137 |   /** Called by ctor, but you can use this to reset settings_.
138 |    * \pre 'settings' != NULL (but Json::null is fine)
139 |    * \remark Defaults:
140 |    * \snippet src/lib_json/json_writer.cpp StreamWriterBuilderDefaults
141 |    */
142 |   static void setDefaults(Json::Value* settings);
143 | };
144 | 
145 | /** \brief Abstract class for writers.
146 |  * \deprecated Use StreamWriter. (And really, this is an implementation detail.)
147 |  */
148 | class JSONCPP_DEPRECATED("Use StreamWriter instead") JSON_API Writer {
149 | public:
150 |   virtual ~Writer();
151 | 
152 |   virtual String write(const Value& root) = 0;
153 | };
154 | 
155 | /** \brief Outputs a Value in <a HREF="http://www.json.org">JSON</a> format
156 |  *without formatting (not human friendly).
157 |  *
158 |  * The JSON document is written in a single line. It is not intended for 'human'
159 |  *consumption,
160 |  * but may be useful to support feature such as RPC where bandwidth is limited.
161 |  * \sa Reader, Value
162 |  * \deprecated Use StreamWriterBuilder.
163 |  */
164 | #if defined(_MSC_VER)
165 | #pragma warning(push)
166 | #pragma warning(disable : 4996) // Deriving from deprecated class
167 | #endif
168 | class JSONCPP_DEPRECATED("Use StreamWriterBuilder instead") JSON_API FastWriter
169 |     : public Writer {
170 | public:
171 |   FastWriter();
172 |   ~FastWriter() override = default;
173 | 
174 |   void enableYAMLCompatibility();
175 | 
176 |   /** \brief Drop the "null" string from the writer's output for nullValues.
177 |    * Strictly speaking, this is not valid JSON. But when the output is being
178 |    * fed to a browser's JavaScript, it makes for smaller output and the
179 |    * browser can handle the output just fine.
180 |    */
181 |   void dropNullPlaceholders();
182 | 
183 |   void omitEndingLineFeed();
184 | 
185 | public: // overridden from Writer
186 |   String write(const Value& root) override;
187 | 
188 | private:
189 |   void writeValue(const Value& value);
190 | 
191 |   String document_;
192 |   bool yamlCompatibilityEnabled_{false};
193 |   bool dropNullPlaceholders_{false};
194 |   bool omitEndingLineFeed_{false};
195 | };
196 | #if defined(_MSC_VER)
197 | #pragma warning(pop)
198 | #endif
199 | 
200 | /** \brief Writes a Value in <a HREF="http://www.json.org">JSON</a> format in a
201 |  *human friendly way.
202 |  *
203 |  * The rules for line break and indent are as follow:
204 |  * - Object value:
205 |  *     - if empty then print {} without indent and line break
206 |  *     - if not empty the print '{', line break & indent, print one value per
207 |  *line
208 |  *       and then unindent and line break and print '}'.
209 |  * - Array value:
210 |  *     - if empty then print [] without indent and line break
211 |  *     - if the array contains no object value, empty array or some other value
212 |  *types,
213 |  *       and all the values fit on one lines, then print the array on a single
214 |  *line.
215 |  *     - otherwise, it the values do not fit on one line, or the array contains
216 |  *       object or non empty array, then print one value per line.
217 |  *
218 |  * If the Value have comments then they are outputed according to their
219 |  *#CommentPlacement.
220 |  *
221 |  * \sa Reader, Value, Value::setComment()
222 |  * \deprecated Use StreamWriterBuilder.
223 |  */
224 | #if defined(_MSC_VER)
225 | #pragma warning(push)
226 | #pragma warning(disable : 4996) // Deriving from deprecated class
227 | #endif
228 | class JSONCPP_DEPRECATED("Use StreamWriterBuilder instead") JSON_API
229 |     StyledWriter : public Writer {
230 | public:
231 |   StyledWriter();
232 |   ~StyledWriter() override = default;
233 | 
234 | public: // overridden from Writer
235 |   /** \brief Serialize a Value in <a HREF="http://www.json.org">JSON</a> format.
236 |    * \param root Value to serialize.
237 |    * \return String containing the JSON document that represents the root value.
238 |    */
239 |   String write(const Value& root) override;
240 | 
241 | private:
242 |   void writeValue(const Value& value);
243 |   void writeArrayValue(const Value& value);
244 |   bool isMultilineArray(const Value& value);
245 |   void pushValue(const String& value);
246 |   void writeIndent();
247 |   void writeWithIndent(const String& value);
248 |   void indent();
249 |   void unindent();
250 |   void writeCommentBeforeValue(const Value& root);
251 |   void writeCommentAfterValueOnSameLine(const Value& root);
252 |   static bool hasCommentForValue(const Value& value);
253 |   static String normalizeEOL(const String& text);
254 | 
255 |   typedef std::vector<String> ChildValues;
256 | 
257 |   ChildValues childValues_;
258 |   String document_;
259 |   String indentString_;
260 |   unsigned int rightMargin_{74};
261 |   unsigned int indentSize_{3};
262 |   bool addChildValues_{false};
263 | };
264 | #if defined(_MSC_VER)
265 | #pragma warning(pop)
266 | #endif
267 | 
268 | /** \brief Writes a Value in <a HREF="http://www.json.org">JSON</a> format in a
269 |  human friendly way,
270 |      to a stream rather than to a string.
271 |  *
272 |  * The rules for line break and indent are as follow:
273 |  * - Object value:
274 |  *     - if empty then print {} without indent and line break
275 |  *     - if not empty the print '{', line break & indent, print one value per
276 |  line
277 |  *       and then unindent and line break and print '}'.
278 |  * - Array value:
279 |  *     - if empty then print [] without indent and line break
280 |  *     - if the array contains no object value, empty array or some other value
281 |  types,
282 |  *       and all the values fit on one lines, then print the array on a single
283 |  line.
284 |  *     - otherwise, it the values do not fit on one line, or the array contains
285 |  *       object or non empty array, then print one value per line.
286 |  *
287 |  * If the Value have comments then they are outputed according to their
288 |  #CommentPlacement.
289 |  *
290 |  * \sa Reader, Value, Value::setComment()
291 |  * \deprecated Use StreamWriterBuilder.
292 |  */
293 | #if defined(_MSC_VER)
294 | #pragma warning(push)
295 | #pragma warning(disable : 4996) // Deriving from deprecated class
296 | #endif
297 | class JSONCPP_DEPRECATED("Use StreamWriterBuilder instead") JSON_API
298 |     StyledStreamWriter {
299 | public:
300 |   /**
301 |    * \param indentation Each level will be indented by this amount extra.
302 |    */
303 |   StyledStreamWriter(String indentation = "\t");
304 |   ~StyledStreamWriter() = default;
305 | 
306 | public:
307 |   /** \brief Serialize a Value in <a HREF="http://www.json.org">JSON</a> format.
308 |    * \param out Stream to write to. (Can be ostringstream, e.g.)
309 |    * \param root Value to serialize.
310 |    * \note There is no point in deriving from Writer, since write() should not
311 |    * return a value.
312 |    */
313 |   void write(OStream& out, const Value& root);
314 | 
315 | private:
316 |   void writeValue(const Value& value);
317 |   void writeArrayValue(const Value& value);
318 |   bool isMultilineArray(const Value& value);
319 |   void pushValue(const String& value);
320 |   void writeIndent();
321 |   void writeWithIndent(const String& value);
322 |   void indent();
323 |   void unindent();
324 |   void writeCommentBeforeValue(const Value& root);
325 |   void writeCommentAfterValueOnSameLine(const Value& root);
326 |   static bool hasCommentForValue(const Value& value);
327 |   static String normalizeEOL(const String& text);
328 | 
329 |   typedef std::vector<String> ChildValues;
330 | 
331 |   ChildValues childValues_;
332 |   OStream* document_;
333 |   String indentString_;
334 |   unsigned int rightMargin_{74};
335 |   String indentation_;
336 |   bool addChildValues_ : 1;
337 |   bool indented_ : 1;
338 | };
339 | #if defined(_MSC_VER)
340 | #pragma warning(pop)
341 | #endif
342 | 
343 | #if defined(JSON_HAS_INT64)
344 | String JSON_API valueToString(Int value);
345 | String JSON_API valueToString(UInt value);
346 | #endif // if defined(JSON_HAS_INT64)
347 | String JSON_API valueToString(LargestInt value);
348 | String JSON_API valueToString(LargestUInt value);
349 | String JSON_API
350 | valueToString(double value,
351 |               unsigned int precision = Value::defaultRealPrecision,
352 |               PrecisionType precisionType = PrecisionType::significantDigits);
353 | String JSON_API valueToString(bool value);
354 | String JSON_API valueToQuotedString(const char* value);
355 | 
356 | /// \brief Output using the StyledStreamWriter.
357 | /// \see Json::operator>>()
358 | JSON_API OStream& operator<<(OStream&, const Value& root);
359 | 
360 | } // namespace Json
361 | 
362 | #pragma pack(pop)
363 | 
364 | #if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING)
365 | #pragma warning(pop)
366 | #endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING)
367 | 
368 | #endif // JSON_WRITER_H_INCLUDED
369 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/Hybrid_Fluid_Simulation.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <ItemGroup>
 22 |     <ClCompile Include="gl_main.cpp" />
 23 |     <ClCompile Include="gpu_model_reader.cpp" />
 24 |     <ClCompile Include="lodepng.cpp" />
 25 |     <ClCompile Include="parameters.cpp" />
 26 |     <ClCompile Include="pcisph_factor.cpp" />
 27 |     <ClCompile Include="sph_marching_cube.cpp" />
 28 |     <ClCompile Include="sph_particle.cpp" />
 29 |     <ClCompile Include="sph_timer.cpp" />
 30 |   </ItemGroup>
 31 |   <ItemGroup>
 32 |     <ClInclude Include="main.h" />
 33 |     <ClInclude Include="nv_gui.h" />
 34 |     <ClInclude Include="parameters.h" />
 35 |     <ClInclude Include="cuda_prescan\scan.cuh" />
 36 |     <ClInclude Include="cuda_prescan\scan_kern.cuh" />
 37 |     <ClInclude Include="gl_main_header.h" />
 38 |     <ClInclude Include="gl_texture.h" />
 39 |     <ClInclude Include="gpu_model.cuh" />
 40 |     <ClInclude Include="gpu_model.h" />
 41 |     <ClInclude Include="gpu_model_reader.h" />
 42 |     <ClInclude Include="high_resolution_timer.h" />
 43 |     <ClInclude Include="lodepng.h" />
 44 |     <ClInclude Include="pcisph_factor.h" />
 45 |     <ClInclude Include="save_screen.h" />
 46 |     <ClInclude Include="sph_arrangement.cuh" />
 47 |     <ClInclude Include="sph_hybrid_system.h" />
 48 |     <ClInclude Include="sph_kernel.cuh" />
 49 |     <ClInclude Include="sph_kernel_shared_data.cuh" />
 50 |     <ClInclude Include="sph_marching_cube.h" />
 51 |     <ClInclude Include="sph_parameter.h" />
 52 |     <ClInclude Include="sph_particle.h" />
 53 |     <ClInclude Include="sph_timer.h" />
 54 |     <ClInclude Include="sph_utils.cuh" />
 55 |   </ItemGroup>
 56 |   <ItemGroup>
 57 |     <CudaCompile Include="cuda_prescan\prefix_sum.cu" />
 58 |     <CudaCompile Include="cuda_prescan\scan.cu" />
 59 |     <CudaCompile Include="gpu_model.cu" />
 60 |     <CudaCompile Include="sph_arrangement.cu" />
 61 |     <CudaCompile Include="sph_hybrid_system.cpp" />
 62 |     <CudaCompile Include="sph_kernel.cu" />
 63 |   </ItemGroup>
 64 |   <PropertyGroup Label="Globals">
 65 |     <ProjectGuid>{BC50E9FA-E95F-4E72-9F2B-D45567958A71}</ProjectGuid>
 66 |     <RootNamespace>Hybrid_Fluid_Simulation</RootNamespace>
 67 |     <ProjectName>Hybrid_Fluid_Simulation</ProjectName>
 68 |   </PropertyGroup>
 69 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 70 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 71 |     <ConfigurationType>Application</ConfigurationType>
 72 |     <UseDebugLibraries>true</UseDebugLibraries>
 73 |     <CharacterSet>MultiByte</CharacterSet>
 74 |     <PlatformToolset>v143</PlatformToolset>
 75 |   </PropertyGroup>
 76 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 77 |     <ConfigurationType>Application</ConfigurationType>
 78 |     <UseDebugLibraries>true</UseDebugLibraries>
 79 |     <CharacterSet>MultiByte</CharacterSet>
 80 |     <PlatformToolset>v143</PlatformToolset>
 81 |   </PropertyGroup>
 82 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 83 |     <ConfigurationType>Application</ConfigurationType>
 84 |     <UseDebugLibraries>false</UseDebugLibraries>
 85 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 86 |     <CharacterSet>MultiByte</CharacterSet>
 87 |     <PlatformToolset>v143</PlatformToolset>
 88 |   </PropertyGroup>
 89 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 90 |     <ConfigurationType>Application</ConfigurationType>
 91 |     <UseDebugLibraries>false</UseDebugLibraries>
 92 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 93 |     <CharacterSet>MultiByte</CharacterSet>
 94 |     <PlatformToolset>v143</PlatformToolset>
 95 |   </PropertyGroup>
 96 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 97 |   <ImportGroup Label="ExtensionSettings">
 98 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.6.props" />
 99 |   </ImportGroup>
100 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
101 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
102 |   </ImportGroup>
103 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
104 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
105 |   </ImportGroup>
106 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
107 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
108 |   </ImportGroup>
109 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
110 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
111 |   </ImportGroup>
112 |   <PropertyGroup Label="UserMacros" />
113 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
114 |     <LinkIncremental>true</LinkIncremental>
115 |     <IncludePath>$(GLEW_PATH)\include;$(GLM_PATH);$(FREEGLUT_PATH)\include;$(IncludePath)</IncludePath>
116 |     <LibraryPath>$(GLEW_PATH)\lib\Debug\Win32;$(FREEGLUT_PATH)\lib;$(LibraryPath)</LibraryPath>
117 |   </PropertyGroup>
118 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
119 |     <LinkIncremental>true</LinkIncremental>
120 |     <IncludePath>$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\glew_64\include;$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\freeglut_64\include;$(IncludePath)</IncludePath>
121 |   </PropertyGroup>
122 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
123 |     <IncludePath>$(GLEW_PATH)\include;$(GLM_PATH);$(FREEGLUT_PATH)\include;$(IncludePath)</IncludePath>
124 |     <LibraryPath>$(GLEW_PATH)\lib\Release\Win32;$(FREEGLUT_PATH)\lib;$(LibraryPath)</LibraryPath>
125 |   </PropertyGroup>
126 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
127 |     <IncludePath>$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\glew_64\include;$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\freeglut_64\include;$(SolutionDir)\Hybrid_Fluid_Simulation\json\include;$(IncludePath)</IncludePath>
128 |     <LibraryPath>$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\glew_64\libs\release;$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\freeglut_64\libs\release;$(SolutionDir)\Hybrid_Fluid_Simulation\json;$(LibraryPath)</LibraryPath>
129 |   </PropertyGroup>
130 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
131 |     <ClCompile>
132 |       <WarningLevel>Level3</WarningLevel>
133 |       <Optimization>Disabled</Optimization>
134 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
135 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
136 |       <AdditionalIncludeDirectories>$(JSONCPP_PATH)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
137 |     </ClCompile>
138 |     <Link>
139 |       <GenerateDebugInformation>true</GenerateDebugInformation>
140 |       <SubSystem>Console</SubSystem>
141 |       <AdditionalDependencies>freeglut.lib;glew32d.lib;lib_json.lib;cudadevrt.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
142 |       <AdditionalOptions>/SAFESEH:NO %(AdditionalOptions)</AdditionalOptions>
143 |       <AdditionalLibraryDirectories>$(JSONCPP_PATH)\makefiles\msvc2010\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
144 |     </Link>
145 |     <PostBuildEvent>
146 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
147 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
148 |     </PostBuildEvent>
149 |     <CudaCompile>
150 |       <GenerateRelocatableDeviceCode>false</GenerateRelocatableDeviceCode>
151 |       <AdditionalOptions>-Xcompiler "/wd 4819" -keep %(AdditionalOptions)</AdditionalOptions>
152 |       <CodeGeneration>compute_52,sm_52</CodeGeneration>
153 |     </CudaCompile>
154 |   </ItemDefinitionGroup>
155 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
156 |     <ClCompile>
157 |       <WarningLevel>Level3</WarningLevel>
158 |       <Optimization>Disabled</Optimization>
159 |       <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
160 |     </ClCompile>
161 |     <Link>
162 |       <GenerateDebugInformation>true</GenerateDebugInformation>
163 |       <SubSystem>Console</SubSystem>
164 |       <AdditionalDependencies>cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
165 |     </Link>
166 |     <PostBuildEvent>
167 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
168 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
169 |     </PostBuildEvent>
170 |     <CudaCompile>
171 |       <TargetMachinePlatform>64</TargetMachinePlatform>
172 |     </CudaCompile>
173 |   </ItemDefinitionGroup>
174 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
175 |     <ClCompile>
176 |       <WarningLevel>Level3</WarningLevel>
177 |       <Optimization>MaxSpeed</Optimization>
178 |       <FunctionLevelLinking>true</FunctionLevelLinking>
179 |       <IntrinsicFunctions>true</IntrinsicFunctions>
180 |       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
181 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
182 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
183 |     </ClCompile>
184 |     <Link>
185 |       <GenerateDebugInformation>true</GenerateDebugInformation>
186 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
187 |       <OptimizeReferences>true</OptimizeReferences>
188 |       <SubSystem>Console</SubSystem>
189 |       <AdditionalDependencies>freeglut.lib;glew32.lib;lib_json.lib;cudadevrt.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
190 |       <AdditionalOptions>/SAFESEH:NO %(AdditionalOptions)</AdditionalOptions>
191 |       <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
192 |       <IgnoreSpecificDefaultLibraries>%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
193 |     </Link>
194 |     <PostBuildEvent>
195 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
196 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
197 |     </PostBuildEvent>
198 |     <CudaCompile>
199 |       <GenerateRelocatableDeviceCode>true</GenerateRelocatableDeviceCode>
200 |       <AdditionalOptions>-Xcompiler "/wd 4819" -keep -use_fast_math %(AdditionalOptions)</AdditionalOptions>
201 |       <CodeGeneration>compute_52,sm_52</CodeGeneration>
202 |     </CudaCompile>
203 |   </ItemDefinitionGroup>
204 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
205 |     <ClCompile>
206 |       <WarningLevel>Level3</WarningLevel>
207 |       <Optimization>MaxSpeed</Optimization>
208 |       <FunctionLevelLinking>true</FunctionLevelLinking>
209 |       <IntrinsicFunctions>true</IntrinsicFunctions>
210 |       <PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
211 |       <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
212 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
213 |     </ClCompile>
214 |     <Link>
215 |       <GenerateDebugInformation>true</GenerateDebugInformation>
216 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
217 |       <OptimizeReferences>true</OptimizeReferences>
218 |       <SubSystem>Console</SubSystem>
219 |       <AdditionalDependencies>freeglut.lib;glew32.lib;lib_json.lib;cudadevrt.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
220 |       <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
221 |       <AdditionalOptions>/SAFESEH:NO %(AdditionalOptions)</AdditionalOptions>
222 |     </Link>
223 |     <PostBuildEvent>
224 |       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
225 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
226 |     </PostBuildEvent>
227 |     <CudaCompile>
228 |       <TargetMachinePlatform>64</TargetMachinePlatform>
229 |       <AdditionalOptions>-Xcompiler "/wd 4819" -keep -use_fast_math %(AdditionalOptions)</AdditionalOptions>
230 |       <CodeGeneration>compute_52,sm_52</CodeGeneration>
231 |     </CudaCompile>
232 |   </ItemDefinitionGroup>
233 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
234 |   <ImportGroup Label="ExtensionTargets">
235 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.6.targets" />
236 |   </ImportGroup>
237 | </Project>


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/sph_sms_arti_block_statistics.json:
--------------------------------------------------------------------------------
  1 | [
  2 |    {
  3 |       "block_array" : [
  4 |          {
  5 |             "CONST_ACC" : 13,
  6 |             "F_ADD_SUB" : 0,
  7 |             "F_DIV" : 0,
  8 |             "F_DIVIDEF" : 0,
  9 |             "F_EXP2" : 0,
 10 |             "F_LOG2" : 0,
 11 |             "F_MAD_MUL" : 0,
 12 |             "F_MIN_MAX" : 0,
 13 |             "F_RCP" : 0,
 14 |             "F_RSQRT" : 0,
 15 |             "F_SIN_COS" : 0,
 16 |             "F_SQRT" : 0,
 17 |             "GLOBAL_ACC" : 11,
 18 |             "I_ABS" : 0,
 19 |             "I_ADD_SUB" : 20,
 20 |             "I_DIV_REM" : 0,
 21 |             "I_LOGICAL" : 0,
 22 |             "I_MAD_MUL" : 12,
 23 |             "I_MIN_MAX" : 0,
 24 |             "I_MUL24" : 1,
 25 |             "I_SAD" : 0,
 26 |             "I_SHL_SHR" : 4,
 27 |             "LOCAL_ACC" : 0,
 28 |             "SHARED_ACC" : 3,
 29 |             "begin" : 0,
 30 |             "end" : 132,
 31 |             "name" : "arti - [0, 132]",
 32 |             "num_bra" : 8,
 33 |             "num_insts" : 125,
 34 |             "num_sync" : 1,
 35 |             "num_unknown" : 52
 36 |          },
 37 |          {
 38 |             "CONST_ACC" : 0,
 39 |             "F_ADD_SUB" : 0,
 40 |             "F_DIV" : 0,
 41 |             "F_DIVIDEF" : 0,
 42 |             "F_EXP2" : 0,
 43 |             "F_LOG2" : 0,
 44 |             "F_MAD_MUL" : 0,
 45 |             "F_MIN_MAX" : 0,
 46 |             "F_RCP" : 0,
 47 |             "F_RSQRT" : 0,
 48 |             "F_SIN_COS" : 0,
 49 |             "F_SQRT" : 0,
 50 |             "GLOBAL_ACC" : 0,
 51 |             "I_ABS" : 0,
 52 |             "I_ADD_SUB" : 2,
 53 |             "I_DIV_REM" : 0,
 54 |             "I_LOGICAL" : 0,
 55 |             "I_MAD_MUL" : 0,
 56 |             "I_MIN_MAX" : 0,
 57 |             "I_MUL24" : 0,
 58 |             "I_SAD" : 0,
 59 |             "I_SHL_SHR" : 1,
 60 |             "LOCAL_ACC" : 0,
 61 |             "SHARED_ACC" : 4,
 62 |             "begin" : 132,
 63 |             "end" : 153,
 64 |             "name" : "arti - [132, 153]",
 65 |             "num_bra" : 2,
 66 |             "num_insts" : 18,
 67 |             "num_sync" : 0,
 68 |             "num_unknown" : 9
 69 |          },
 70 |          {
 71 |             "CONST_ACC" : 0,
 72 |             "F_ADD_SUB" : 0,
 73 |             "F_DIV" : 0,
 74 |             "F_DIVIDEF" : 0,
 75 |             "F_EXP2" : 0,
 76 |             "F_LOG2" : 0,
 77 |             "F_MAD_MUL" : 0,
 78 |             "F_MIN_MAX" : 0,
 79 |             "F_RCP" : 0,
 80 |             "F_RSQRT" : 0,
 81 |             "F_SIN_COS" : 0,
 82 |             "F_SQRT" : 0,
 83 |             "GLOBAL_ACC" : 0,
 84 |             "I_ABS" : 0,
 85 |             "I_ADD_SUB" : 0,
 86 |             "I_DIV_REM" : 0,
 87 |             "I_LOGICAL" : 0,
 88 |             "I_MAD_MUL" : 1,
 89 |             "I_MIN_MAX" : 0,
 90 |             "I_MUL24" : 0,
 91 |             "I_SAD" : 0,
 92 |             "I_SHL_SHR" : 0,
 93 |             "LOCAL_ACC" : 0,
 94 |             "SHARED_ACC" : 0,
 95 |             "begin" : 153,
 96 |             "end" : 160,
 97 |             "name" : "arti - [153, 160]",
 98 |             "num_bra" : 0,
 99 |             "num_insts" : 6,
100 |             "num_sync" : 0,
101 |             "num_unknown" : 5
102 |          },
103 |          {
104 |             "CONST_ACC" : 0,
105 |             "F_ADD_SUB" : 1,
106 |             "F_DIV" : 0,
107 |             "F_DIVIDEF" : 0,
108 |             "F_EXP2" : 0,
109 |             "F_LOG2" : 0,
110 |             "F_MAD_MUL" : 0,
111 |             "F_MIN_MAX" : 0,
112 |             "F_RCP" : 0,
113 |             "F_RSQRT" : 0,
114 |             "F_SIN_COS" : 0,
115 |             "F_SQRT" : 0,
116 |             "GLOBAL_ACC" : 3,
117 |             "I_ABS" : 0,
118 |             "I_ADD_SUB" : 6,
119 |             "I_DIV_REM" : 0,
120 |             "I_LOGICAL" : 0,
121 |             "I_MAD_MUL" : 1,
122 |             "I_MIN_MAX" : 1,
123 |             "I_MUL24" : 0,
124 |             "I_SAD" : 0,
125 |             "I_SHL_SHR" : 1,
126 |             "LOCAL_ACC" : 0,
127 |             "SHARED_ACC" : 8,
128 |             "begin" : 160,
129 |             "end" : 198,
130 |             "name" : "arti - [160, 198]",
131 |             "num_bra" : 4,
132 |             "num_insts" : 34,
133 |             "num_sync" : 2,
134 |             "num_unknown" : 7
135 |          },
136 |          {
137 |             "CONST_ACC" : 0,
138 |             "F_ADD_SUB" : 0,
139 |             "F_DIV" : 0,
140 |             "F_DIVIDEF" : 0,
141 |             "F_EXP2" : 0,
142 |             "F_LOG2" : 0,
143 |             "F_MAD_MUL" : 0,
144 |             "F_MIN_MAX" : 0,
145 |             "F_RCP" : 0,
146 |             "F_RSQRT" : 0,
147 |             "F_SIN_COS" : 0,
148 |             "F_SQRT" : 0,
149 |             "GLOBAL_ACC" : 0,
150 |             "I_ABS" : 0,
151 |             "I_ADD_SUB" : 3,
152 |             "I_DIV_REM" : 0,
153 |             "I_LOGICAL" : 0,
154 |             "I_MAD_MUL" : 0,
155 |             "I_MIN_MAX" : 0,
156 |             "I_MUL24" : 0,
157 |             "I_SAD" : 0,
158 |             "I_SHL_SHR" : 1,
159 |             "LOCAL_ACC" : 0,
160 |             "SHARED_ACC" : 4,
161 |             "begin" : 198,
162 |             "end" : 215,
163 |             "name" : "arti - [198, 215]",
164 |             "num_bra" : 2,
165 |             "num_insts" : 14,
166 |             "num_sync" : 0,
167 |             "num_unknown" : 4
168 |          },
169 |          {
170 |             "CONST_ACC" : 1,
171 |             "F_ADD_SUB" : 4,
172 |             "F_DIV" : 0,
173 |             "F_DIVIDEF" : 0,
174 |             "F_EXP2" : 0,
175 |             "F_LOG2" : 0,
176 |             "F_MAD_MUL" : 5,
177 |             "F_MIN_MAX" : 0,
178 |             "F_RCP" : 0,
179 |             "F_RSQRT" : 0,
180 |             "F_SIN_COS" : 0,
181 |             "F_SQRT" : 0,
182 |             "GLOBAL_ACC" : 0,
183 |             "I_ABS" : 0,
184 |             "I_ADD_SUB" : 2,
185 |             "I_DIV_REM" : 0,
186 |             "I_LOGICAL" : 0,
187 |             "I_MAD_MUL" : 0,
188 |             "I_MIN_MAX" : 0,
189 |             "I_MUL24" : 0,
190 |             "I_SAD" : 0,
191 |             "I_SHL_SHR" : 0,
192 |             "LOCAL_ACC" : 0,
193 |             "SHARED_ACC" : 3,
194 |             "begin" : 215,
195 |             "end" : 248,
196 |             "name" : "arti - [215, 248]",
197 |             "num_bra" : 3,
198 |             "num_insts" : 30,
199 |             "num_sync" : 0,
200 |             "num_unknown" : 12
201 |          },
202 |          {
203 |             "CONST_ACC" : 5,
204 |             "F_ADD_SUB" : 0,
205 |             "F_DIV" : 0,
206 |             "F_DIVIDEF" : 1,
207 |             "F_EXP2" : 0,
208 |             "F_LOG2" : 0,
209 |             "F_MAD_MUL" : 9,
210 |             "F_MIN_MAX" : 0,
211 |             "F_RCP" : 0,
212 |             "F_RSQRT" : 0,
213 |             "F_SIN_COS" : 0,
214 |             "F_SQRT" : 0,
215 |             "GLOBAL_ACC" : 2,
216 |             "I_ABS" : 0,
217 |             "I_ADD_SUB" : 2,
218 |             "I_DIV_REM" : 0,
219 |             "I_LOGICAL" : 0,
220 |             "I_MAD_MUL" : 0,
221 |             "I_MIN_MAX" : 0,
222 |             "I_MUL24" : 0,
223 |             "I_SAD" : 0,
224 |             "I_SHL_SHR" : 1,
225 |             "LOCAL_ACC" : 0,
226 |             "SHARED_ACC" : 0,
227 |             "begin" : 248,
228 |             "end" : 273,
229 |             "name" : "arti - [248, 273]",
230 |             "num_bra" : 1,
231 |             "num_insts" : 24,
232 |             "num_sync" : 0,
233 |             "num_unknown" : 3
234 |          }
235 |       ],
236 |       "function_name" : "_ZN3sph19knComputeDensitySMSENS_18ParticleBufferListEPiS1_PNS_9BlockTaskE"
237 |    },
238 |    {
239 |       "block_array" : [
240 |          {
241 |             "CONST_ACC" : 16,
242 |             "F_ADD_SUB" : 0,
243 |             "F_DIV" : 0,
244 |             "F_DIVIDEF" : 0,
245 |             "F_EXP2" : 0,
246 |             "F_LOG2" : 0,
247 |             "F_MAD_MUL" : 0,
248 |             "F_MIN_MAX" : 0,
249 |             "F_RCP" : 0,
250 |             "F_RSQRT" : 0,
251 |             "F_SIN_COS" : 0,
252 |             "F_SQRT" : 0,
253 |             "GLOBAL_ACC" : 15,
254 |             "I_ABS" : 0,
255 |             "I_ADD_SUB" : 23,
256 |             "I_DIV_REM" : 0,
257 |             "I_LOGICAL" : 0,
258 |             "I_MAD_MUL" : 12,
259 |             "I_MIN_MAX" : 0,
260 |             "I_MUL24" : 1,
261 |             "I_SAD" : 0,
262 |             "I_SHL_SHR" : 5,
263 |             "LOCAL_ACC" : 0,
264 |             "SHARED_ACC" : 3,
265 |             "begin" : 0,
266 |             "end" : 146,
267 |             "name" : "arti - [0, 146]",
268 |             "num_bra" : 8,
269 |             "num_insts" : 139,
270 |             "num_sync" : 1,
271 |             "num_unknown" : 55
272 |          },
273 |          {
274 |             "CONST_ACC" : 0,
275 |             "F_ADD_SUB" : 0,
276 |             "F_DIV" : 0,
277 |             "F_DIVIDEF" : 0,
278 |             "F_EXP2" : 0,
279 |             "F_LOG2" : 0,
280 |             "F_MAD_MUL" : 0,
281 |             "F_MIN_MAX" : 0,
282 |             "F_RCP" : 0,
283 |             "F_RSQRT" : 0,
284 |             "F_SIN_COS" : 0,
285 |             "F_SQRT" : 0,
286 |             "GLOBAL_ACC" : 0,
287 |             "I_ABS" : 0,
288 |             "I_ADD_SUB" : 2,
289 |             "I_DIV_REM" : 0,
290 |             "I_LOGICAL" : 0,
291 |             "I_MAD_MUL" : 0,
292 |             "I_MIN_MAX" : 0,
293 |             "I_MUL24" : 0,
294 |             "I_SAD" : 0,
295 |             "I_SHL_SHR" : 1,
296 |             "LOCAL_ACC" : 0,
297 |             "SHARED_ACC" : 4,
298 |             "begin" : 146,
299 |             "end" : 166,
300 |             "name" : "arti - [146, 166]",
301 |             "num_bra" : 2,
302 |             "num_insts" : 17,
303 |             "num_sync" : 0,
304 |             "num_unknown" : 8
305 |          },
306 |          {
307 |             "CONST_ACC" : 0,
308 |             "F_ADD_SUB" : 0,
309 |             "F_DIV" : 0,
310 |             "F_DIVIDEF" : 0,
311 |             "F_EXP2" : 0,
312 |             "F_LOG2" : 0,
313 |             "F_MAD_MUL" : 0,
314 |             "F_MIN_MAX" : 0,
315 |             "F_RCP" : 0,
316 |             "F_RSQRT" : 0,
317 |             "F_SIN_COS" : 0,
318 |             "F_SQRT" : 0,
319 |             "GLOBAL_ACC" : 0,
320 |             "I_ABS" : 0,
321 |             "I_ADD_SUB" : 0,
322 |             "I_DIV_REM" : 0,
323 |             "I_LOGICAL" : 0,
324 |             "I_MAD_MUL" : 1,
325 |             "I_MIN_MAX" : 0,
326 |             "I_MUL24" : 0,
327 |             "I_SAD" : 0,
328 |             "I_SHL_SHR" : 0,
329 |             "LOCAL_ACC" : 0,
330 |             "SHARED_ACC" : 0,
331 |             "begin" : 166,
332 |             "end" : 181,
333 |             "name" : "arti - [166, 181]",
334 |             "num_bra" : 0,
335 |             "num_insts" : 14,
336 |             "num_sync" : 0,
337 |             "num_unknown" : 13
338 |          },
339 |          {
340 |             "CONST_ACC" : 2,
341 |             "F_ADD_SUB" : 12,
342 |             "F_DIV" : 0,
343 |             "F_DIVIDEF" : 1,
344 |             "F_EXP2" : 0,
345 |             "F_LOG2" : 0,
346 |             "F_MAD_MUL" : 21,
347 |             "F_MIN_MAX" : 0,
348 |             "F_RCP" : 1,
349 |             "F_RSQRT" : 0,
350 |             "F_SIN_COS" : 0,
351 |             "F_SQRT" : 1,
352 |             "GLOBAL_ACC" : 0,
353 |             "I_ABS" : 0,
354 |             "I_ADD_SUB" : 3,
355 |             "I_DIV_REM" : 0,
356 |             "I_LOGICAL" : 0,
357 |             "I_MAD_MUL" : 0,
358 |             "I_MIN_MAX" : 0,
359 |             "I_MUL24" : 0,
360 |             "I_SAD" : 0,
361 |             "I_SHL_SHR" : 0,
362 |             "LOCAL_ACC" : 0,
363 |             "SHARED_ACC" : 8,
364 |             "begin" : 181,
365 |             "end" : 245,
366 |             "name" : "arti - [181, 245]",
367 |             "num_bra" : 3,
368 |             "num_insts" : 61,
369 |             "num_sync" : 0,
370 |             "num_unknown" : 9
371 |          },
372 |          {
373 |             "CONST_ACC" : 0,
374 |             "F_ADD_SUB" : 0,
375 |             "F_DIV" : 0,
376 |             "F_DIVIDEF" : 0,
377 |             "F_EXP2" : 0,
378 |             "F_LOG2" : 0,
379 |             "F_MAD_MUL" : 0,
380 |             "F_MIN_MAX" : 0,
381 |             "F_RCP" : 0,
382 |             "F_RSQRT" : 0,
383 |             "F_SIN_COS" : 0,
384 |             "F_SQRT" : 0,
385 |             "GLOBAL_ACC" : 8,
386 |             "I_ABS" : 0,
387 |             "I_ADD_SUB" : 12,
388 |             "I_DIV_REM" : 0,
389 |             "I_LOGICAL" : 0,
390 |             "I_MAD_MUL" : 1,
391 |             "I_MIN_MAX" : 1,
392 |             "I_MUL24" : 0,
393 |             "I_SAD" : 0,
394 |             "I_SHL_SHR" : 2,
395 |             "LOCAL_ACC" : 0,
396 |             "SHARED_ACC" : 13,
397 |             "begin" : 245,
398 |             "end" : 297,
399 |             "name" : "arti - [245, 297]",
400 |             "num_bra" : 3,
401 |             "num_insts" : 49,
402 |             "num_sync" : 2,
403 |             "num_unknown" : 7
404 |          },
405 |          {
406 |             "CONST_ACC" : 0,
407 |             "F_ADD_SUB" : 0,
408 |             "F_DIV" : 0,
409 |             "F_DIVIDEF" : 0,
410 |             "F_EXP2" : 0,
411 |             "F_LOG2" : 0,
412 |             "F_MAD_MUL" : 0,
413 |             "F_MIN_MAX" : 0,
414 |             "F_RCP" : 0,
415 |             "F_RSQRT" : 0,
416 |             "F_SIN_COS" : 0,
417 |             "F_SQRT" : 0,
418 |             "GLOBAL_ACC" : 0,
419 |             "I_ABS" : 0,
420 |             "I_ADD_SUB" : 3,
421 |             "I_DIV_REM" : 0,
422 |             "I_LOGICAL" : 0,
423 |             "I_MAD_MUL" : 0,
424 |             "I_MIN_MAX" : 0,
425 |             "I_MUL24" : 0,
426 |             "I_SAD" : 0,
427 |             "I_SHL_SHR" : 1,
428 |             "LOCAL_ACC" : 0,
429 |             "SHARED_ACC" : 4,
430 |             "begin" : 297,
431 |             "end" : 315,
432 |             "name" : "arti - [297, 315]",
433 |             "num_bra" : 3,
434 |             "num_insts" : 15,
435 |             "num_sync" : 0,
436 |             "num_unknown" : 4
437 |          },
438 |          {
439 |             "CONST_ACC" : 9,
440 |             "F_ADD_SUB" : 0,
441 |             "F_DIV" : 3,
442 |             "F_DIVIDEF" : 1,
443 |             "F_EXP2" : 0,
444 |             "F_LOG2" : 0,
445 |             "F_MAD_MUL" : 30,
446 |             "F_MIN_MAX" : 0,
447 |             "F_RCP" : 0,
448 |             "F_RSQRT" : 0,
449 |             "F_SIN_COS" : 0,
450 |             "F_SQRT" : 1,
451 |             "GLOBAL_ACC" : 5,
452 |             "I_ABS" : 0,
453 |             "I_ADD_SUB" : 2,
454 |             "I_DIV_REM" : 0,
455 |             "I_LOGICAL" : 0,
456 |             "I_MAD_MUL" : 1,
457 |             "I_MIN_MAX" : 0,
458 |             "I_MUL24" : 0,
459 |             "I_SAD" : 0,
460 |             "I_SHL_SHR" : 1,
461 |             "LOCAL_ACC" : 0,
462 |             "SHARED_ACC" : 0,
463 |             "begin" : 315,
464 |             "end" : 385,
465 |             "name" : "arti - [315, 385]",
466 |             "num_bra" : 3,
467 |             "num_insts" : 66,
468 |             "num_sync" : 0,
469 |             "num_unknown" : 10
470 |          }
471 |       ],
472 |       "function_name" : "_ZN3sph17knComputeForceSMSENS_18ParticleBufferListEPiS1_PNS_9BlockTaskE"
473 |    }
474 | ]
475 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/main.h:
--------------------------------------------------------------------------------
  1 | //--------------------------------------------------------------------------------
  2 | // NVIDIA(R) GVDB VOXELS
  3 | // Copyright 2017, NVIDIA Corporation. 
  4 | //
  5 | // Redistribution and use in source and binary forms, with or without modification, 
  6 | // are permitted provided that the following conditions are met:
  7 | // 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
  8 | // 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer 
  9 | //    in the documentation and/or  other materials provided with the distribution.
 10 | // 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived 
 11 | //    from this software without specific prior written permission.
 12 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
 13 | // BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 
 14 | // SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 15 | // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 16 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
 17 | // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 18 | // 
 19 | // Version 1.0: Rama Hoetzlein, 5/1/2017
 20 | //----------------------------------------------------------------------------------
 21 | 
 22 | #ifndef __MAIN_H__
 23 | #define __MAIN_H__
 24 | 
 25 | #pragma warning(disable:4996) // preventing snprintf >> _snprintf_s
 26 | 
 27 | #include "sample_utils/platform.h"
 28 | 
 29 | // trick for pragma message so we can write:
 30 | // #pragma message(__FILE__"("S__LINE__"): blah")
 31 | #define S__(x) #x
 32 | #define S_(x) S__(x)
 33 | #define S__LINE__ S_(__LINE__)
 34 | 
 35 | #include <stdio.h>
 36 | #include <stdlib.h>
 37 | #include <vector>
 38 | #include <string>
 39 | #include <map>
 40 | 
 41 | #ifdef WIN32
 42 | #ifdef MEMORY_LEAKS_CHECK
 43 | #   pragma message("build will Check for Memory Leaks!")
 44 | #   define _CRTDBG_MAP_ALLOC
 45 | #   include <stdlib.h>
 46 | #   include <crtdbg.h>
 47 | inline void* operator new(size_t size, const char *file, int line)
 48 | {
 49 |     return ::operator new(size, 1, file, line);
 50 | }
 51 | 
 52 | inline void __cdecl operator delete(void *ptr, const char *file, int line)
 53 | {
 54 |     ::operator delete(ptr, _NORMAL_BLOCK, file, line);
 55 | }
 56 | 
 57 | #define DEBUG_NEW new( __FILE__, __LINE__)
 58 | #define MALLOC_DBG(x) _malloc_dbg(x, 1, __FILE__, __LINE__);
 59 | #define malloc(x) MALLOC_DBG(x)
 60 | #define new DEBUG_NEW
 61 | #endif
 62 | #endif
 63 | 
 64 | //----------------- to be declared in the code of the sample: so the sample can decide how to display messages
 65 | class NVPWindow {
 66 | public:
 67 |     enum ButtonAction {
 68 |         BUTTON_RELEASE = 0,
 69 |         BUTTON_PRESS = 1,
 70 |         BUTTON_REPEAT = 2,
 71 |     };
 72 |     enum MouseButton
 73 |     {
 74 |         MOUSE_BUTTON_LEFT = 0,
 75 |         MOUSE_BUTTON_RIGHT = 1,
 76 |         MOUSE_BUTTON_MIDDLE = 2,
 77 |         NUM_MOUSE_BUTTONIDX,
 78 |     };
 79 |     enum MouseButtonFlag
 80 |     {
 81 |         MOUSE_BUTTONFLAG_NONE = 0,
 82 |         MOUSE_BUTTONFLAG_LEFT = (1 << MOUSE_BUTTON_LEFT),
 83 |         MOUSE_BUTTONFLAG_RIGHT = (1 << MOUSE_BUTTON_RIGHT),
 84 |         MOUSE_BUTTONFLAG_MIDDLE = (1 << MOUSE_BUTTON_MIDDLE)
 85 |     };
 86 |     enum KeyCode {
 87 |         KEY_UNKNOWN = -1,
 88 |         KEY_SPACE = 32,
 89 |         KEY_APOSTROPHE = 39  /* ' */,
 90 |         KEY_LEFT_PARENTHESIS = 40  /* ( */,
 91 |         KEY_RIGHT_PARENTHESIS = 41  /* ) */,
 92 |         KEY_ASTERISK = 42  /* * */,
 93 |         KEY_PLUS = 43  /* + */,
 94 |         KEY_COMMA = 44  /* , */,
 95 |         KEY_MINUS = 45  /* - */,
 96 |         KEY_PERIOD = 46  /* . */,
 97 |         KEY_SLASH = 47  /* / */,
 98 |         KEY_0 = 48,
 99 |         KEY_1 = 49,
100 |         KEY_2 = 50,
101 |         KEY_3 = 51,
102 |         KEY_4 = 52,
103 |         KEY_5 = 53,
104 |         KEY_6 = 54,
105 |         KEY_7 = 55,
106 |         KEY_8 = 56,
107 |         KEY_9 = 57,
108 |         KEY_COLON = 58  /* : */,
109 |         KEY_SEMICOLON = 59  /* ; */,
110 |         KEY_LESS = 60  /* < */,
111 |         KEY_EQUAL = 61  /* = */,
112 |         KEY_GREATER = 62  /* > */,
113 |         KEY_A = 65,
114 |         KEY_B = 66,
115 |         KEY_C = 67,
116 |         KEY_D = 68,
117 |         KEY_E = 69,
118 |         KEY_F = 70,
119 |         KEY_G = 71,
120 |         KEY_H = 72,
121 |         KEY_I = 73,
122 |         KEY_J = 74,
123 |         KEY_K = 75,
124 |         KEY_L = 76,
125 |         KEY_M = 77,
126 |         KEY_N = 78,
127 |         KEY_O = 79,
128 |         KEY_P = 80,
129 |         KEY_Q = 81,
130 |         KEY_R = 82,
131 |         KEY_S = 83,
132 |         KEY_T = 84,
133 |         KEY_U = 85,
134 |         KEY_V = 86,
135 |         KEY_W = 87,
136 |         KEY_X = 88,
137 |         KEY_Y = 89,
138 |         KEY_Z = 90,
139 |         KEY_LEFT_BRACKET = 91  /* [ */,
140 |         KEY_BACKSLASH = 92  /* \ */,
141 |         KEY_RIGHT_BRACKET = 93  /* ] */,
142 |         KEY_GRAVE_ACCENT = 96  /* ` */,
143 |         KEY_WORLD_1 = 161 /* non-US #1 */,
144 |         KEY_WORLD_2 = 162 /* non-US #2 */,
145 |         /* Function keys */
146 |         KEY_ESCAPE = 256,
147 |         KEY_ENTER = 257,
148 |         KEY_TAB = 258,
149 |         KEY_BACKSPACE = 259,
150 |         KEY_INSERT = 260,
151 |         KEY_DELETE = 261,
152 |         KEY_RIGHT = 262,
153 |         KEY_LEFT = 263,
154 |         KEY_DOWN = 264,
155 |         KEY_UP = 265,
156 |         KEY_PAGE_UP = 266,
157 |         KEY_PAGE_DOWN = 267,
158 |         KEY_HOME = 268,
159 |         KEY_END = 269,
160 |         KEY_CAPS_LOCK = 280,
161 |         KEY_SCROLL_LOCK = 281,
162 |         KEY_NUM_LOCK = 282,
163 |         KEY_PRINT_SCREEN = 283,
164 |         KEY_PAUSE = 284,
165 |         KEY_F1 = 290,
166 |         KEY_F2 = 291,
167 |         KEY_F3 = 292,
168 |         KEY_F4 = 293,
169 |         KEY_F5 = 294,
170 |         KEY_F6 = 295,
171 |         KEY_F7 = 296,
172 |         KEY_F8 = 297,
173 |         KEY_F9 = 298,
174 |         KEY_F10 = 299,
175 |         KEY_F11 = 300,
176 |         KEY_F12 = 301,
177 |         KEY_F13 = 302,
178 |         KEY_F14 = 303,
179 |         KEY_F15 = 304,
180 |         KEY_F16 = 305,
181 |         KEY_F17 = 306,
182 |         KEY_F18 = 307,
183 |         KEY_F19 = 308,
184 |         KEY_F20 = 309,
185 |         KEY_F21 = 310,
186 |         KEY_F22 = 311,
187 |         KEY_F23 = 312,
188 |         KEY_F24 = 313,
189 |         KEY_F25 = 314,
190 |         KEY_KP_0 = 320,
191 |         KEY_KP_1 = 321,
192 |         KEY_KP_2 = 322,
193 |         KEY_KP_3 = 323,
194 |         KEY_KP_4 = 324,
195 |         KEY_KP_5 = 325,
196 |         KEY_KP_6 = 326,
197 |         KEY_KP_7 = 327,
198 |         KEY_KP_8 = 328,
199 |         KEY_KP_9 = 329,
200 |         KEY_KP_DECIMAL = 330,
201 |         KEY_KP_DIVIDE = 331,
202 |         KEY_KP_MULTIPLY = 332,
203 |         KEY_KP_SUBTRACT = 333,
204 |         KEY_KP_ADD = 334,
205 |         KEY_KP_ENTER = 335,
206 |         KEY_KP_EQUAL = 336,
207 |         KEY_LEFT_SHIFT = 340,
208 |         KEY_LEFT_CONTROL = 341,
209 |         KEY_LEFT_ALT = 342,
210 |         KEY_LEFT_SUPER = 343,
211 |         KEY_RIGHT_SHIFT = 344,
212 |         KEY_RIGHT_CONTROL = 345,
213 |         KEY_RIGHT_ALT = 346,
214 |         KEY_RIGHT_SUPER = 347,
215 |         KEY_MENU = 348,
216 |         KEY_LAST = KEY_MENU,
217 |     };
218 |     enum KeyModifiers {
219 |         KMOD_SHIFT = 0x0001,
220 |         KMOD_CONTROL = 0x0002,
221 |         KMOD_ALT = 0x0004,
222 |         KMOD_SUPER = 0x0008,
223 |     };
224 |     typedef struct WINinternal* WINhandle;
225 |     typedef void(*NVPproc)(void);
226 | 
227 |     // OpenGL specific
228 |     struct ContextFlags {
229 |         int         major;
230 |         int         minor;
231 |         int         MSAA;
232 |         int         depth;
233 |         int         stencil;
234 |         bool        debug;
235 |         bool        robust;
236 |         bool        core;
237 |         bool        forward;
238 |         bool        stereo;
239 |         NVPWindow*  share;
240 | 
241 |         ContextFlags(int _major = 3, int _minor = 0, bool _core = false, int _MSAA = 0, int _depth = 24, int _stencil = 8, bool _debug = false, bool _robust = false, bool _forward = false, bool _stereo = false, NVPWindow* _share = 0)
242 |         {
243 |             major = _major;
244 |             minor = _minor;
245 |             MSAA = _MSAA;
246 |             depth = _depth;
247 |             stencil = _stencil;
248 |             core = _core;
249 |             debug = _debug;
250 |             robust = _robust;
251 |             forward = _forward;
252 |             stereo = _stereo;
253 |             share = _share;
254 |         }
255 | 
256 |     };
257 |     unsigned int  m_debugFilter;
258 |     std::string   m_debugTitle;
259 | 
260 |     WINhandle     m_internal;
261 | 
262 |     int			m_renderCnt;
263 |     int			m_curX, m_curY;
264 |     int			m_wheel;
265 |     int			m_winSz[4];
266 |     int			m_mods;
267 |     ContextFlags	m_cflags;
268 |     bool			m_doSwap;
269 |     bool			m_active;
270 |     bool			m_vsync;
271 |     bool			m_keyPressed[KEY_LAST + 1];
272 |     bool			m_keyToggled[KEY_LAST + 1];
273 |     bool			m_fullscreen;
274 |     int			m_display_frame;
275 |     int			m_golden_frame;
276 |     int			m_screenquad_prog;
277 |     int			m_screenquad_vshader;
278 |     int			m_screenquad_fshader;
279 |     int			m_screenquad_vbo[3];
280 |     int			m_screenquad_utex1;
281 |     int			m_screenquad_utex2;
282 |     int			m_screenquad_utexflags;
283 |     int			m_screenquad_ucoords;
284 |     int			m_screenquad_uscreen;
285 | 
286 |     NVPWindow()
287 |         : m_renderCnt(1)
288 |         , m_internal(0)
289 |         , m_debugFilter(0)
290 |     {
291 |         m_curX = -1;
292 |         m_curY = -1;
293 |         m_mods = 0;
294 |         m_fullscreen = false;
295 |         memset(m_keyPressed, 0, sizeof(m_keyPressed));
296 |         memset(m_keyToggled, 0, sizeof(m_keyToggled));
297 |     }
298 |     bool isPressed(int key) { return m_keyPressed[key]; }
299 |     bool onPress(int key) { return m_keyPressed[key] && m_keyToggled[key]; }
300 | 
301 |     // Accessors
302 |     inline void         setWinSz(int w, int h) { m_winSz[0] = w; m_winSz[1] = h; }
303 |     inline const int*   getWinSz() const { return m_winSz; }
304 |     inline int          getWidth() const { return m_winSz[0]; }
305 |     inline int          getHeight() const { return m_winSz[1]; }
306 |     inline const int    getWheel() const { return m_wheel; }
307 |     inline int          getMods() const { return m_mods; }
308 |     inline void         setMods(int m) { m_mods = m; }
309 |     inline void         setCurMouse(int x, int y) { m_curX = x; m_curY = y; }
310 |     inline int          getCurX() { return m_curX; }
311 |     inline int          getCurY() { return m_curY; }
312 |     inline bool isFirstFrame()	{ return m_display_frame == 0; }
313 |     inline int getDisplayFrame() { return m_display_frame; }
314 | 
315 |     // activate and deactivate are not thread-safe, need to be wrapped in mutex if called from multiple threads
316 |     // invisible windows will not have any active callbacks, nor will they be affected by sysEvents
317 |     bool activate(int width, int height, const char* title, const ContextFlags* flags, int invisible = 0);
318 |     void deactivate();
319 | 
320 |     // compatibility hack
321 |     bool create(const char* title = NULL, const ContextFlags* cflags = 0, int width = 1024, int height = 768);
322 |     void setTitle(const char* title);
323 |     void maximize();
324 |     void resize_window(int w, int h);
325 |     void restore();
326 |     void minimize();
327 |     void postRedisplay(int n = 1) { m_renderCnt = n; }
328 |     void postQuit();
329 |     void makeContextCurrent();
330 |     void makeContextNonCurrent();
331 |     void swapBuffers();
332 |     void swapInterval(int i);
333 |     bool isOpen();
334 |     void vsync(bool state);
335 |     void setKeyPress(int key, bool state);
336 |     void setFullscreen(bool fullscreen);
337 |     void save_frame(char* fname);
338 | 
339 |     // from NVPWindow
340 |     virtual bool init() { return true; }
341 |     virtual void shutdown() {}
342 |     virtual void reshape(int w, int h) { }
343 |     virtual void motion(int x, int y, int dx, int dy) {}
344 |     virtual void mousewheel(int delta) {}
345 |     virtual void on_arg(std::string arg, std::string val) {}
346 |     virtual void mouse(MouseButton button, ButtonAction action, int mods, int x, int y) {}
347 |     virtual void keyboard(KeyCode key, ButtonAction action, int mods, int x, int y) {}
348 |     virtual void keyboardchar(unsigned char key, int mods, int x, int y) {}
349 |     virtual void display() {}
350 |     virtual bool begin() { return true; }
351 |     virtual void end() {}
352 | 
353 |     // from WindowProfiler
354 |     int run(const std::string &name, const std::string& shortname, int argc, const char** argv, int width, int height, int Major, int Minor, int GoldenFrame = 0);
355 |     void initGL();
356 |     void initScreenQuadGL();
357 |     void clearScreenGL();
358 |     void createScreenQuadGL(int* glid, int w, int h);
359 |     void renderScreenQuadGL(int glid, char inv1 = 0);
360 |     void compositeScreenQuadGL(int glid1, int glid2, char inv1 = 0, char inv2 = 0);
361 |     void renderScreenQuadGL(int glid1, int glid2, float x1, float y1, float x2, float y2, char inv1 = 0, char inv2 = 0);
362 | 
363 |     //////////////////////////////////////////////////////////////////////////
364 |     // system related
365 |     static void     sysInit();
366 |     static void     sysDeinit();
367 |     static bool     sysPollEvents(bool bLoop);
368 |     static void     sysWaitEvents();
369 |     static NVPproc  sysGetProcAddress(const char* name);
370 |     static int      sysExtensionSupported(const char* name);
371 |     static double   sysGetTime(); // in seconds
372 |     static void     sysSleep(double seconds);
373 |     static void     sysVisibleConsole();
374 |     static std::string sysExePath();
375 | };
376 | 
377 | extern int  sample_main(int argc, const char**argv);
378 | 
379 | // sample-specific implementation, called by nvprintfLevel. For example to redirect the message to a specific window or part of the viewport
380 | extern void sample_print(int level, const char * fmt2);
381 | 
382 | extern void checkGL(char* msg);
383 | 
384 | // sample-specific implementation, called by nvprintf*. For example to redirect the message to a specific window or part of the viewport
385 | extern void sample_print(int level, const char * fmt);
386 | 
387 | void nvprintf(const char * fmt, ...);
388 | void nvprintfLevel(int level, const char * fmt, ...);
389 | void nvprintSetLevel(int l);
390 | int  nvprintGetLevel();
391 | void nvprintSetLogging(bool b);
392 | void nverror();
393 | 
394 | bool getFileLocation(char* filename, char* outpath);
395 | bool getFileLocation(char* filename, char* outpath, std::vector<std::string> paths);
396 | 
397 | #endif
398 | 


--------------------------------------------------------------------------------
/sph_fastest/Hybrid_Fluid_Simulation/json/include/json/reader.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
  2 | // Distributed under MIT license, or public domain if desired and
  3 | // recognized in your jurisdiction.
  4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
  5 | 
  6 | #ifndef CPPTL_JSON_READER_H_INCLUDED
  7 | #define CPPTL_JSON_READER_H_INCLUDED
  8 | 
  9 | #if !defined(JSON_IS_AMALGAMATION)
 10 | #include "features.h"
 11 | #include "value.h"
 12 | #endif // if !defined(JSON_IS_AMALGAMATION)
 13 | #include <deque>
 14 | #include <iosfwd>
 15 | #include <istream>
 16 | #include <stack>
 17 | #include <string>
 18 | 
 19 | // Disable warning C4251: <data member>: <type> needs to have dll-interface to
 20 | // be used by...
 21 | #if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING)
 22 | #pragma warning(push)
 23 | #pragma warning(disable : 4251)
 24 | #endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING)
 25 | 
 26 | #pragma pack(push, 8)
 27 | 
 28 | namespace Json {
 29 | 
 30 | /** \brief Unserialize a <a HREF="http://www.json.org">JSON</a> document into a
 31 |  *Value.
 32 |  *
 33 |  * \deprecated Use CharReader and CharReaderBuilder.
 34 |  */
 35 | class JSON_API Reader {
 36 | public:
 37 |   typedef char Char;
 38 |   typedef const Char* Location;
 39 | 
 40 |   /** \brief An error tagged with where in the JSON text it was encountered.
 41 |    *
 42 |    * The offsets give the [start, limit) range of bytes within the text. Note
 43 |    * that this is bytes, not codepoints.
 44 |    *
 45 |    */
 46 |   struct StructuredError {
 47 |     ptrdiff_t offset_start;
 48 |     ptrdiff_t offset_limit;
 49 |     String message;
 50 |   };
 51 | 
 52 |   /** \brief Constructs a Reader allowing all features
 53 |    * for parsing.
 54 |    */
 55 |   JSONCPP_DEPRECATED("Use CharReader and CharReaderBuilder instead")
 56 |   Reader();
 57 | 
 58 |   /** \brief Constructs a Reader allowing the specified feature set
 59 |    * for parsing.
 60 |    */
 61 |   JSONCPP_DEPRECATED("Use CharReader and CharReaderBuilder instead")
 62 |   Reader(const Features& features);
 63 | 
 64 |   /** \brief Read a Value from a <a HREF="http://www.json.org">JSON</a>
 65 |    * document.
 66 |    * \param document UTF-8 encoded string containing the document to read.
 67 |    * \param root [out] Contains the root value of the document if it was
 68 |    *             successfully parsed.
 69 |    * \param collectComments \c true to collect comment and allow writing them
 70 |    * back during
 71 |    *                        serialization, \c false to discard comments.
 72 |    *                        This parameter is ignored if
 73 |    * Features::allowComments_
 74 |    *                        is \c false.
 75 |    * \return \c true if the document was successfully parsed, \c false if an
 76 |    * error occurred.
 77 |    */
 78 |   bool
 79 |   parse(const std::string& document, Value& root, bool collectComments = true);
 80 | 
 81 |   /** \brief Read a Value from a <a HREF="http://www.json.org">JSON</a>
 82 |    document.
 83 |    * \param beginDoc Pointer on the beginning of the UTF-8 encoded string of the
 84 |    document to read.
 85 |    * \param endDoc Pointer on the end of the UTF-8 encoded string of the
 86 |    document to read.
 87 |    *               Must be >= beginDoc.
 88 |    * \param root [out] Contains the root value of the document if it was
 89 |    *             successfully parsed.
 90 |    * \param collectComments \c true to collect comment and allow writing them
 91 |    back during
 92 |    *                        serialization, \c false to discard comments.
 93 |    *                        This parameter is ignored if
 94 |    Features::allowComments_
 95 |    *                        is \c false.
 96 |    * \return \c true if the document was successfully parsed, \c false if an
 97 |    error occurred.
 98 |    */
 99 |   bool parse(const char* beginDoc,
100 |              const char* endDoc,
101 |              Value& root,
102 |              bool collectComments = true);
103 | 
104 |   /// \brief Parse from input stream.
105 |   /// \see Json::operator>>(std::istream&, Json::Value&).
106 |   bool parse(IStream& is, Value& root, bool collectComments = true);
107 | 
108 |   /** \brief Returns a user friendly string that list errors in the parsed
109 |    * document.
110 |    * \return Formatted error message with the list of errors with their location
111 |    * in
112 |    *         the parsed document. An empty string is returned if no error
113 |    * occurred
114 |    *         during parsing.
115 |    * \deprecated Use getFormattedErrorMessages() instead (typo fix).
116 |    */
117 |   JSONCPP_DEPRECATED("Use getFormattedErrorMessages() instead.")
118 |   String getFormatedErrorMessages() const;
119 | 
120 |   /** \brief Returns a user friendly string that list errors in the parsed
121 |    * document.
122 |    * \return Formatted error message with the list of errors with their location
123 |    * in
124 |    *         the parsed document. An empty string is returned if no error
125 |    * occurred
126 |    *         during parsing.
127 |    */
128 |   String getFormattedErrorMessages() const;
129 | 
130 |   /** \brief Returns a vector of structured erros encounted while parsing.
131 |    * \return A (possibly empty) vector of StructuredError objects. Currently
132 |    *         only one error can be returned, but the caller should tolerate
133 |    * multiple
134 |    *         errors.  This can occur if the parser recovers from a non-fatal
135 |    *         parse error and then encounters additional errors.
136 |    */
137 |   std::vector<StructuredError> getStructuredErrors() const;
138 | 
139 |   /** \brief Add a semantic error message.
140 |    * \param value JSON Value location associated with the error
141 |    * \param message The error message.
142 |    * \return \c true if the error was successfully added, \c false if the
143 |    * Value offset exceeds the document size.
144 |    */
145 |   bool pushError(const Value& value, const String& message);
146 | 
147 |   /** \brief Add a semantic error message with extra context.
148 |    * \param value JSON Value location associated with the error
149 |    * \param message The error message.
150 |    * \param extra Additional JSON Value location to contextualize the error
151 |    * \return \c true if the error was successfully added, \c false if either
152 |    * Value offset exceeds the document size.
153 |    */
154 |   bool pushError(const Value& value, const String& message, const Value& extra);
155 | 
156 |   /** \brief Return whether there are any errors.
157 |    * \return \c true if there are no errors to report \c false if
158 |    * errors have occurred.
159 |    */
160 |   bool good() const;
161 | 
162 | private:
163 |   enum TokenType {
164 |     tokenEndOfStream = 0,
165 |     tokenObjectBegin,
166 |     tokenObjectEnd,
167 |     tokenArrayBegin,
168 |     tokenArrayEnd,
169 |     tokenString,
170 |     tokenNumber,
171 |     tokenTrue,
172 |     tokenFalse,
173 |     tokenNull,
174 |     tokenArraySeparator,
175 |     tokenMemberSeparator,
176 |     tokenComment,
177 |     tokenError
178 |   };
179 | 
180 |   class Token {
181 |   public:
182 |     TokenType type_;
183 |     Location start_;
184 |     Location end_;
185 |   };
186 | 
187 |   class ErrorInfo {
188 |   public:
189 |     Token token_;
190 |     String message_;
191 |     Location extra_;
192 |   };
193 | 
194 |   typedef std::deque<ErrorInfo> Errors;
195 | 
196 |   bool readToken(Token& token);
197 |   void skipSpaces();
198 |   bool match(Location pattern, int patternLength);
199 |   bool readComment();
200 |   bool readCStyleComment();
201 |   bool readCppStyleComment();
202 |   bool readString();
203 |   void readNumber();
204 |   bool readValue();
205 |   bool readObject(Token& token);
206 |   bool readArray(Token& token);
207 |   bool decodeNumber(Token& token);
208 |   bool decodeNumber(Token& token, Value& decoded);
209 |   bool decodeString(Token& token);
210 |   bool decodeString(Token& token, String& decoded);
211 |   bool decodeDouble(Token& token);
212 |   bool decodeDouble(Token& token, Value& decoded);
213 |   bool decodeUnicodeCodePoint(Token& token,
214 |                               Location& current,
215 |                               Location end,
216 |                               unsigned int& unicode);
217 |   bool decodeUnicodeEscapeSequence(Token& token,
218 |                                    Location& current,
219 |                                    Location end,
220 |                                    unsigned int& unicode);
221 |   bool addError(const String& message, Token& token, Location extra = nullptr);
222 |   bool recoverFromError(TokenType skipUntilToken);
223 |   bool addErrorAndRecover(const String& message,
224 |                           Token& token,
225 |                           TokenType skipUntilToken);
226 |   void skipUntilSpace();
227 |   Value& currentValue();
228 |   Char getNextChar();
229 |   void
230 |   getLocationLineAndColumn(Location location, int& line, int& column) const;
231 |   String getLocationLineAndColumn(Location location) const;
232 |   void addComment(Location begin, Location end, CommentPlacement placement);
233 |   void skipCommentTokens(Token& token);
234 | 
235 |   static bool containsNewLine(Location begin, Location end);
236 |   static String normalizeEOL(Location begin, Location end);
237 | 
238 |   typedef std::stack<Value*> Nodes;
239 |   Nodes nodes_;
240 |   Errors errors_;
241 |   String document_;
242 |   Location begin_{};
243 |   Location end_{};
244 |   Location current_{};
245 |   Location lastValueEnd_{};
246 |   Value* lastValue_{};
247 |   String commentsBefore_;
248 |   Features features_;
249 |   bool collectComments_{};
250 | }; // Reader
251 | 
252 | /** Interface for reading JSON from a char array.
253 |  */
254 | class JSON_API CharReader {
255 | public:
256 |   virtual ~CharReader() = default;
257 |   /** \brief Read a Value from a <a HREF="http://www.json.org">JSON</a>
258 |    document.
259 |    * The document must be a UTF-8 encoded string containing the document to
260 |    read.
261 |    *
262 |    * \param beginDoc Pointer on the beginning of the UTF-8 encoded string of the
263 |    document to read.
264 |    * \param endDoc Pointer on the end of the UTF-8 encoded string of the
265 |    document to read.
266 |    *        Must be >= beginDoc.
267 |    * \param root [out] Contains the root value of the document if it was
268 |    *             successfully parsed.
269 |    * \param errs [out] Formatted error messages (if not NULL)
270 |    *        a user friendly string that lists errors in the parsed
271 |    * document.
272 |    * \return \c true if the document was successfully parsed, \c false if an
273 |    error occurred.
274 |    */
275 |   virtual bool parse(char const* beginDoc,
276 |                      char const* endDoc,
277 |                      Value* root,
278 |                      String* errs) = 0;
279 | 
280 |   class JSON_API Factory {
281 |   public:
282 |     virtual ~Factory() = default;
283 |     /** \brief Allocate a CharReader via operator new().
284 |      * \throw std::exception if something goes wrong (e.g. invalid settings)
285 |      */
286 |     virtual CharReader* newCharReader() const = 0;
287 |   }; // Factory
288 | };   // CharReader
289 | 
290 | /** \brief Build a CharReader implementation.
291 | 
292 | Usage:
293 | \code
294 |   using namespace Json;
295 |   CharReaderBuilder builder;
296 |   builder["collectComments"] = false;
297 |   Value value;
298 |   String errs;
299 |   bool ok = parseFromStream(builder, std::cin, &value, &errs);
300 | \endcode
301 | */
302 | class JSON_API CharReaderBuilder : public CharReader::Factory {
303 | public:
304 |   // Note: We use a Json::Value so that we can add data-members to this class
305 |   // without a major version bump.
306 |   /** Configuration of this builder.
307 |     These are case-sensitive.
308 |     Available settings (case-sensitive):
309 |     - `"collectComments": false or true`
310 |       - true to collect comment and allow writing them
311 |         back during serialization, false to discard comments.
312 |         This parameter is ignored if allowComments is false.
313 |     - `"allowComments": false or true`
314 |       - true if comments are allowed.
315 |     - `"strictRoot": false or true`
316 |       - true if root must be either an array or an object value
317 |     - `"allowDroppedNullPlaceholders": false or true`
318 |       - true if dropped null placeholders are allowed. (See
319 |     StreamWriterBuilder.)
320 |     - `"allowNumericKeys": false or true`
321 |       - true if numeric object keys are allowed.
322 |     - `"allowSingleQuotes": false or true`
323 |       - true if '' are allowed for strings (both keys and values)
324 |     - `"stackLimit": integer`
325 |       - Exceeding stackLimit (recursive depth of `readValue()`) will
326 |         cause an exception.
327 |       - This is a security issue (seg-faults caused by deeply nested JSON),
328 |         so the default is low.
329 |     - `"failIfExtra": false or true`
330 |       - If true, `parse()` returns false when extra non-whitespace trails
331 |         the JSON value in the input string.
332 |     - `"rejectDupKeys": false or true`
333 |       - If true, `parse()` returns false when a key is duplicated within an
334 |     object.
335 |     - `"allowSpecialFloats": false or true`
336 |       - If true, special float values (NaNs and infinities) are allowed
337 |         and their values are lossfree restorable.
338 | 
339 |     You can examine 'settings_` yourself
340 |     to see the defaults. You can also write and read them just like any
341 |     JSON Value.
342 |     \sa setDefaults()
343 |     */
344 |   Json::Value settings_;
345 | 
346 |   CharReaderBuilder();
347 |   ~CharReaderBuilder() override;
348 | 
349 |   CharReader* newCharReader() const override;
350 | 
351 |   /** \return true if 'settings' are legal and consistent;
352 |    *   otherwise, indicate bad settings via 'invalid'.
353 |    */
354 |   bool validate(Json::Value* invalid) const;
355 | 
356 |   /** A simple way to update a specific setting.
357 |    */
358 |   Value& operator[](const String& key);
359 | 
360 |   /** Called by ctor, but you can use this to reset settings_.
361 |    * \pre 'settings' != NULL (but Json::null is fine)
362 |    * \remark Defaults:
363 |    * \snippet src/lib_json/json_reader.cpp CharReaderBuilderDefaults
364 |    */
365 |   static void setDefaults(Json::Value* settings);
366 |   /** Same as old Features::strictMode().
367 |    * \pre 'settings' != NULL (but Json::null is fine)
368 |    * \remark Defaults:
369 |    * \snippet src/lib_json/json_reader.cpp CharReaderBuilderStrictMode
370 |    */
371 |   static void strictMode(Json::Value* settings);
372 | };
373 | 
374 | /** Consume entire stream and use its begin/end.
375 |  * Someday we might have a real StreamReader, but for now this
376 |  * is convenient.
377 |  */
378 | bool JSON_API parseFromStream(CharReader::Factory const&,
379 |                               IStream&,
380 |                               Value* root,
381 |                               String* errs);
382 | 
383 | /** \brief Read from 'sin' into 'root'.
384 | 
385 |  Always keep comments from the input JSON.
386 | 
387 |  This can be used to read a file into a particular sub-object.
388 |  For example:
389 |  \code
390 |  Json::Value root;
391 |  cin >> root["dir"]["file"];
392 |  cout << root;
393 |  \endcode
394 |  Result:
395 |  \verbatim
396 |  {
397 |  "dir": {
398 |      "file": {
399 |      // The input stream JSON would be nested here.
400 |      }
401 |  }
402 |  }
403 |  \endverbatim
404 |  \throw std::exception on parse error.
405 |  \see Json::operator<<()
406 | */
407 | JSON_API IStream& operator>>(IStream&, Value&);
408 | 
409 | } // namespace Json
410 | 
411 | #pragma pack(pop)
412 | 
413 | #if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING)
414 | #pragma warning(pop)
415 | #endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING)
416 | 
417 | #endif // CPPTL_JSON_READER_H_INCLUDED
418 | 


--------------------------------------------------------------------------------