├── sph_fastest ├── Hybrid_Fluid_Simulation │ ├── nv_gui.h │ ├── ball32.png │ ├── gl_main.cpp │ ├── sph_kernel.cu │ ├── sph_particle.h │ ├── pcisph_factor.h │ ├── sph_parameter.h │ ├── GL_LIB │ │ ├── freeglut.dll │ │ ├── glew32.dll │ │ ├── glew_64 │ │ │ ├── dll │ │ │ │ ├── debug │ │ │ │ │ └── glew32d.dll │ │ │ │ └── release │ │ │ │ │ └── glew32.dll │ │ │ └── libs │ │ │ │ ├── debug │ │ │ │ └── glew32d.lib │ │ │ │ └── release │ │ │ │ └── glew32.lib │ │ └── freeglut_64 │ │ │ ├── dll │ │ │ ├── debug │ │ │ │ └── freeglutd.dll │ │ │ └── release │ │ │ │ └── freeglut.dll │ │ │ ├── libs │ │ │ ├── debug │ │ │ │ └── freeglutd.lib │ │ │ └── release │ │ │ │ └── freeglut.lib │ │ │ └── include │ │ │ └── GL │ │ │ ├── glut.h │ │ │ ├── freeglut.h │ │ │ └── freeglut_ext.h │ ├── json │ │ ├── lib_json.lib │ │ └── include │ │ │ ├── CMakeLists.txt │ │ │ ├── json │ │ │ ├── json.h │ │ │ ├── autolink.h │ │ │ ├── forwards.h │ │ │ ├── features.h │ │ │ ├── assertions.h │ │ │ ├── allocator.h │ │ │ ├── config.h │ │ │ ├── writer.h │ │ │ └── reader.h │ │ │ └── version.h │ ├── pcisph_factor.cpp │ ├── sph_arrangement.cu │ ├── sph_hybrid_system.cpp │ ├── sph_marching_cube.cpp │ ├── scene_default1.json │ ├── Shader │ │ ├── shader.vs │ │ └── shader.fs │ ├── sph_timer.h │ ├── scene_default.json │ ├── sph_timer.cpp │ ├── sph_header.h │ ├── insts_latency.json │ ├── sph_data.h │ ├── sph_marching_cube.h │ ├── Hybrid_Fluid_Simulation.vcxproj.user │ ├── gpu_model_reader.h │ ├── parameters.h │ ├── gpu_model.cuh │ ├── cuda_prescan │ │ ├── scan.cuh │ │ ├── scan_kern.cuh │ │ ├── prefix_sum.cu │ │ └── scan.cu │ ├── parameters.cpp │ ├── high_resolution_timer.h │ ├── cuda_call_check.h │ ├── gl_texture.h │ ├── cuda_math.cuh │ ├── save_screen.h │ ├── sph_hybrid_system.h │ ├── sph_utils.cuh │ ├── gl_main_header.h │ ├── sph_arrangement.cuh │ ├── Hybrid_Fluid_Simulation.vcxproj.filters │ ├── gpu_model.h │ ├── gpu_model_reader.cpp │ ├── sph_kernel.cuh │ ├── sph_tra_arti_block_statistics.json │ ├── gpu_model.cu │ ├── Hybrid_Fluid_Simulation.vcxproj │ ├── sph_sms_arti_block_statistics.json │ └── main.h └── Hybrid_Fluid_Simulation.sln ├── .gitignore └── README.md /sph_fastest/Hybrid_Fluid_Simulation/nv_gui.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/ball32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/ball32.png -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/gl_main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/gl_main.cpp -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_kernel.cu -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_particle.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_particle.h -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/pcisph_factor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/pcisph_factor.h -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_parameter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_parameter.h -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut.dll -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew32.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew32.dll -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/lib_json.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/json/lib_json.lib -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/pcisph_factor.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/pcisph_factor.cpp -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_arrangement.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_arrangement.cu -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_hybrid_system.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_hybrid_system.cpp -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_marching_cube.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/sph_marching_cube.cpp -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/dll/debug/glew32d.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/dll/debug/glew32d.dll -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/dll/release/glew32.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/dll/release/glew32.dll -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/libs/debug/glew32d.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/libs/debug/glew32d.lib -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/libs/release/glew32.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/glew_64/libs/release/glew32.lib -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/dll/debug/freeglutd.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/dll/debug/freeglutd.dll -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/dll/release/freeglut.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/dll/release/freeglut.dll -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/libs/debug/freeglutd.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/libs/debug/freeglutd.lib -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/libs/release/freeglut.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KemengHuang/Fast-General-GPU-SPH-framework/HEAD/sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/libs/release/freeglut.lib -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB INCLUDE_FILES "json/*.h") 2 | install(FILES 3 | ${INCLUDE_FILES} 4 | ${PROJECT_BINARY_DIR}/include/json/version.h 5 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/json) 6 | 7 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/scene_default1.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluid_block" : [ 3 | { 4 | "begin_x" : 0.001, 5 | "begin_y" : 0.001, 6 | "begin_z" : 0.001, 7 | "end_x" : 0.55, 8 | "end_y" : 0.55, 9 | "end_z" : 0.55 10 | } 11 | ], 12 | "mass" : 0.000088, 13 | "interval" : 0.105, 14 | "recomm_nump" : 15500000 15 | } -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/Shader/shader.vs: -------------------------------------------------------------------------------- 1 | void main() 2 | { 3 | vec3 posEye = vec3(gl_ModelViewMatrix * vec4(gl_Vertex.xyz, 1.0)); 4 | float dist = length(posEye); 5 | gl_PointSize = 200.0/dist; 6 | gl_TexCoord[0] = gl_MultiTexCoord0; 7 | gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex; 8 | gl_FrontColor = gl_Color; 9 | } -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_timer.h: -------------------------------------------------------------------------------- 1 | #ifndef __SPHTIMER_H__ 2 | #define __SPHTIMER_H__ 3 | 4 | #include 5 | 6 | class Timer 7 | { 8 | private: 9 | int frames; 10 | int update_time; 11 | int last_time; 12 | double FPS; 13 | 14 | public: 15 | Timer(); 16 | void update(); 17 | double get_fps(); 18 | }; 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/scene_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluid_block" : [ 3 | { 4 | "begin_x" : 0.091, 5 | "begin_y" : 0.091, 6 | "begin_z" : 0.091, 7 | "end_x" : 0.8, 8 | "end_y" : 0.8, 9 | "end_z" : 0.8 10 | } 11 | ], 12 | "mass" : 0.00027, 13 | "interval" : 0.15, 14 | "recomm_nump" : 15500000, 15 | "xx": 1.6, 16 | "yy": 1.6, 17 | "zz": 1.6 18 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | sph_fastest/.vs 2 | sph_fastest/Hybrid_Fluid_Simulation/x64 3 | sph_fastest/x64 4 | sph_fastest/saveScreen 5 | sph_fastest/*.zip 6 | sph_fastest/Hybrid_Fluid_Simulation/saveScreen* 7 | sph_fastest/Hybrid_Fluid_Simulation/saveSurface* 8 | sph_fastest/Hybrid_Fluid_Simulation/*.mp4 9 | sph_fastest/Hybrid_Fluid_Simulation/mesh 10 | sph_fastest/Hybrid_Fluid_Simulation/*.txt 11 | sph_fastest/Hybrid_Fluid_Simulation/Release 12 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_timer.cpp: -------------------------------------------------------------------------------- 1 | #include "sph_timer.h" 2 | 3 | Timer::Timer() 4 | { 5 | frames=0; 6 | update_time=1000; 7 | last_time=0; 8 | FPS=0; 9 | } 10 | 11 | void Timer::update() 12 | { 13 | frames++; 14 | 15 | if(GetTickCount()-last_time > update_time) 16 | { 17 | FPS=((double)frames/(double)(GetTickCount()-last_time))*1000.0; 18 | last_time=GetTickCount(); 19 | frames=0; 20 | } 21 | } 22 | 23 | double Timer::get_fps() 24 | { 25 | return FPS; 26 | } 27 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/Shader/shader.fs: -------------------------------------------------------------------------------- 1 | void main() 2 | { 3 | const vec3 lightDir = vec3(0.577, 0.577, 0.577); 4 | 5 | // calculate normal from texture coordinates 6 | vec3 N; 7 | N.xy = gl_TexCoord[0].xy*vec2(2.0, -2.0) + vec2(-1.0, 1.0); 8 | float mag = dot(N.xy, N.xy); 9 | if (mag > 1.0) discard; // kill pixels outside circle 10 | N.z = sqrt(1.0-mag); 11 | 12 | // calculate lighting 13 | float diffuse = max(0.0, dot(lightDir, N)); 14 | 15 | gl_FragColor = gl_Color * diffuse; 16 | } -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_header.h: -------------------------------------------------------------------------------- 1 | #ifndef __SPHHEADER_H__ 2 | #define __SPHHEADER_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #define PI 3.141592f 20 | #define INF 1E-12f 21 | #define BOUNDARY 0.01f 22 | 23 | #endif -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/json/json.h: -------------------------------------------------------------------------------- 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 2 | // Distributed under MIT license, or public domain if desired and 3 | // recognized in your jurisdiction. 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 | 6 | #ifndef JSON_JSON_H_INCLUDED 7 | #define JSON_JSON_H_INCLUDED 8 | 9 | #include "autolink.h" 10 | #include "features.h" 11 | #include "reader.h" 12 | #include "value.h" 13 | #include "writer.h" 14 | 15 | #endif // JSON_JSON_H_INCLUDED 16 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/insts_latency.json: -------------------------------------------------------------------------------- 1 | { 2 | "I_ADD_SUB" : 6.047, 3 | "I_MAD_MUL" : 13.164, 4 | "I_DIV_REM" : 255.544, 5 | "I_MIN_MAX" : 12.047, 6 | "I_ABS" : 15.086, 7 | "I_MUL24" : 19.3, 8 | "I_LOGICAL" : 0.145, 9 | "I_SHL_SHR" : 6.051, 10 | "I_SAD" : 6.074, 11 | "F_ADD_SUB" : 6.047, 12 | "F_MAD_MUL" : 6.047, 13 | "F_DIV" : 365.641, 14 | "F_DIVIDEF" : 34.07, 15 | "F_EXP2" : 40.117, 16 | "F_LOG2" : 34.07, 17 | "F_SIN_COS" : 15.094, 18 | "F_SQRT" : 130.840, 19 | "F_RSQRT" : 34.07, 20 | "F_MIN_MAX" : 12.074, 21 | "F_RCP" : 132.703, 22 | "DEFAULT" : 10.0 23 | } -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_data.h: -------------------------------------------------------------------------------- 1 | #ifndef __SPHDATA_H__ 2 | #define __SPHDATA_H__ 3 | 4 | #include "sph_header.h" 5 | 6 | float window_width = 1000; 7 | float window_height = 750; 8 | 9 | float xRot = 0.0f; 10 | float yRot = 0.0f; 11 | float xTrans = 0; 12 | float yTrans = 0; 13 | float zTrans = -175.0; 14 | 15 | int psize = 12; 16 | int ox; 17 | int oy; 18 | int buttonState; 19 | float xRotLength = 0.0f; 20 | float yRotLength = 0.0f; 21 | 22 | float3 real_world_origin; 23 | float3 real_world_side; 24 | float3 sim_ratio; 25 | 26 | float world_width; 27 | float world_height; 28 | float world_length; 29 | 30 | #endif -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_marching_cube.h: -------------------------------------------------------------------------------- 1 | // 2 | // sph_marching_cube.h 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by ruanjm on 22/04/16 6 | // Copyright (c) 2016 ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _SPH_MARCHING_CUBE_H 10 | #define _SPH_MARCHING_CUBE_H 11 | 12 | #include 13 | #include "sph_parameter.h" 14 | 15 | namespace sph 16 | { 17 | 18 | bool generateMesh(float3 *pos, unsigned int nump, SystemParameter *sys_para, unsigned int loop_times); 19 | 20 | void outputMesh(SystemParameter *sys_para, unsigned int loop_times); 21 | 22 | } 23 | 24 | #endif/*_SPH_MARCHING_CUBE_H*/ 25 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/Hybrid_Fluid_Simulation.vcxproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Auto 5 | 6 | 7 | PATH=$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB 8 | WindowsLocalDebugger 9 | 10 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/gpu_model_reader.h: -------------------------------------------------------------------------------- 1 | // 2 | // gpu_model_reader.h 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by kmhuang and ruanjm on 2018/09/01 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _GPU_MODEL_READER_H 10 | #define _GPU_MODEL_READER_H 11 | 12 | #include 13 | #include "gpu_model.h" 14 | 15 | namespace gpu_model 16 | { 17 | 18 | unsigned int readPTXStatisticsFromFile(PTXBlockStatistic *&output, const std::string &func_name, const std::string &file_name); 19 | 20 | void readInstructionLatencyFromFile(InstructionInfo &inst_info, const std::string &file_name); 21 | 22 | } 23 | 24 | #endif/*_GPU_MODEL_READER_H*/ -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/parameters.h: -------------------------------------------------------------------------------- 1 | #ifndef _PARAMETERS_H 2 | #define _PARAMETERS_H 3 | 4 | #include 5 | 6 | // common 7 | extern const bool DEBUG; 8 | extern const float TIME_STEP; 9 | extern const float3 WORLD_SIZE; 10 | extern const float3 GRAVITY; 11 | 12 | // SPH simulation 13 | extern const float KERNAL_RADIUS; 14 | extern const float MASS; 15 | extern const float VICOSITY_COEFFICIENT; 16 | extern const float REST_DENSITY; 17 | extern const float WALL_DAMPING; 18 | extern const float GAS_CONSTANT; 19 | extern const int pcisph_min_loops; 20 | extern const int pcisph_max_loops; 21 | extern const float pcisph_max_density_error_allowed; 22 | 23 | // Eulerian simulation 24 | extern const int eulerDim[3]; 25 | 26 | #endif /*_PARAMETERS_H*/ -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/include/GL/glut.h: -------------------------------------------------------------------------------- 1 | #ifndef __GLUT_H__ 2 | #define __GLUT_H__ 3 | 4 | /* 5 | * glut.h 6 | * 7 | * The freeglut library include file 8 | * 9 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 10 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 11 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 12 | * PAWEL W. OLSZTA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | */ 16 | 17 | #include "freeglut_std.h" 18 | 19 | /*** END OF FILE ***/ 20 | 21 | #endif /* __GLUT_H__ */ 22 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/gpu_model.cuh: -------------------------------------------------------------------------------- 1 | // 2 | // gpu_model.cuh 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by kmhuang and ruanjm on 2018/09/01 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _GPU_MODEL_CUH 10 | #define _GPU_MODEL_CUH 11 | 12 | namespace gpu_model 13 | { 14 | 15 | struct GPUModel; 16 | 17 | void allocateGPUModel(GPUModel *&gm); 18 | 19 | void freeGPUModel(GPUModel *gm); 20 | 21 | void calculateBlockRequirementSMSMode(int *block_req, int *cell_start, int *cell_end, int block_size, int numc); 22 | 23 | void calculateBlockRequirementHybridMode(int *cell_type, int *d_cell_num, int *block_req, GPUModel *gm, int *cell_offset, int *cell_num, ushort3 grid_size, int block_size); 24 | 25 | } 26 | 27 | #endif/*_GPU_MODEL_CUH*/ 28 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/include/GL/freeglut.h: -------------------------------------------------------------------------------- 1 | #ifndef __FREEGLUT_H__ 2 | #define __FREEGLUT_H__ 3 | 4 | /* 5 | * freeglut.h 6 | * 7 | * The freeglut library include file 8 | * 9 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 10 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 11 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 12 | * PAWEL W. OLSZTA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | */ 16 | 17 | #include "freeglut_std.h" 18 | #include "freeglut_ext.h" 19 | 20 | /*** END OF FILE ***/ 21 | 22 | #endif /* __FREEGLUT_H__ */ 23 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/json/autolink.h: -------------------------------------------------------------------------------- 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 2 | // Distributed under MIT license, or public domain if desired and 3 | // recognized in your jurisdiction. 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 | 6 | #ifndef JSON_AUTOLINK_H_INCLUDED 7 | #define JSON_AUTOLINK_H_INCLUDED 8 | 9 | #include "config.h" 10 | 11 | #ifdef JSON_IN_CPPTL 12 | #include 13 | #endif 14 | 15 | #if !defined(JSON_NO_AUTOLINK) && !defined(JSON_DLL_BUILD) && \ 16 | !defined(JSON_IN_CPPTL) 17 | #define CPPTL_AUTOLINK_NAME "json" 18 | #undef CPPTL_AUTOLINK_DLL 19 | #ifdef JSON_DLL 20 | #define CPPTL_AUTOLINK_DLL 21 | #endif 22 | #include "autolink.h" 23 | #endif 24 | 25 | #endif // JSON_AUTOLINK_H_INCLUDED 26 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/cuda_prescan/scan.cuh: -------------------------------------------------------------------------------- 1 | // 2 | // scan.cuh 3 | // prefix_sum 4 | // 5 | // created by ruanjm on 16/12/15 6 | // Copyright (c) 2015 ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _SCAN_CUH 10 | #define _SCAN_CUH 11 | 12 | extern "C" 13 | { 14 | 15 | void prefixSumToGPU(char* inArray, int num, int siz); 16 | void prefixSumFromGPU(char* outArray, int num, int siz); 17 | void prefixSum(int num); 18 | void prefixSumInt(int num); 19 | void preallocBlockSumsInt(unsigned int num); 20 | void deallocBlockSumsInt(); 21 | void prescanArray(float* outArray, float* inArray, int numElements); 22 | void prescanArrayInt(int* outArray, int* inArray, int numElements); 23 | void prescanArrayRecursiveInt(int *outArray, const int *inArray, int numElements, int level); 24 | 25 | } 26 | 27 | #endif/*_SCAN_CUH*/ -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/version.h: -------------------------------------------------------------------------------- 1 | // DO NOT EDIT. This file (and "version") is a template used by the build system 2 | // (either CMake or Meson) to generate a "version.h" header file. 3 | #ifndef JSON_VERSION_H_INCLUDED 4 | #define JSON_VERSION_H_INCLUDED 5 | 6 | #define JSONCPP_VERSION_STRING "1.9.0" 7 | #define JSONCPP_VERSION_MAJOR 1 8 | #define JSONCPP_VERSION_MINOR 9 9 | #define JSONCPP_VERSION_PATCH 0 10 | #define JSONCPP_VERSION_QUALIFIER 11 | #define JSONCPP_VERSION_HEXA ((JSONCPP_VERSION_MAJOR << 24) \ 12 | | (JSONCPP_VERSION_MINOR << 16) \ 13 | | (JSONCPP_VERSION_PATCH << 8)) 14 | 15 | #ifdef JSONCPP_USING_SECURE_MEMORY 16 | #undef JSONCPP_USING_SECURE_MEMORY 17 | #endif 18 | #define JSONCPP_USING_SECURE_MEMORY 0 19 | // If non-zero, the library zeroes any memory that it has allocated before 20 | // it frees its memory. 21 | 22 | #endif // JSON_VERSION_H_INCLUDED 23 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/json/forwards.h: -------------------------------------------------------------------------------- 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 2 | // Distributed under MIT license, or public domain if desired and 3 | // recognized in your jurisdiction. 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 | 6 | #ifndef JSON_FORWARDS_H_INCLUDED 7 | #define JSON_FORWARDS_H_INCLUDED 8 | 9 | #if !defined(JSON_IS_AMALGAMATION) 10 | #include "config.h" 11 | #endif // if !defined(JSON_IS_AMALGAMATION) 12 | 13 | namespace Json { 14 | 15 | // writer.h 16 | class StreamWriter; 17 | class StreamWriterBuilder; 18 | class Writer; 19 | class FastWriter; 20 | class StyledWriter; 21 | class StyledStreamWriter; 22 | 23 | // reader.h 24 | class Reader; 25 | class CharReader; 26 | class CharReaderBuilder; 27 | 28 | // features.h 29 | class Features; 30 | 31 | // value.h 32 | typedef unsigned int ArrayIndex; 33 | class StaticString; 34 | class Path; 35 | class PathArgument; 36 | class Value; 37 | class ValueIteratorBase; 38 | class ValueIterator; 39 | class ValueConstIterator; 40 | 41 | } // namespace Json 42 | 43 | #endif // JSON_FORWARDS_H_INCLUDED 44 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/parameters.cpp: -------------------------------------------------------------------------------- 1 | #include "parameters.h" 2 | 3 | /////////////////////////////////////////////////////////////////////////////////// 4 | // common 5 | /////////////////////////////////////////////////////////////////////////////////// 6 | const bool DEBUG = false; 7 | const float TIME_STEP = 0.004; 8 | const float3 WORLD_SIZE = make_float3(4, 4, 4); 9 | const float3 GRAVITY = make_float3(0.0f, -9.8f, 0.0f); 10 | 11 | /////////////////////////////////////////////////////////////////////////////////// 12 | // SPH simulation 13 | /////////////////////////////////////////////////////////////////////////////////// 14 | const float KERNAL_RADIUS = 0.03f; 15 | const float MASS = 0.002f; 16 | const float VICOSITY_COEFFICIENT = 10.0f; 17 | const float REST_DENSITY = 1000.0f; 18 | const float WALL_DAMPING = -0.5f; 19 | const float GAS_CONSTANT = 1.0f; 20 | 21 | const int pcisph_min_loops = 3; 22 | const int pcisph_max_loops = 5; 23 | const float pcisph_max_density_error_allowed = 10.0f; 24 | 25 | /////////////////////////////////////////////////////////////////////////////////// 26 | // Eulerian simulation 27 | /////////////////////////////////////////////////////////////////////////////////// 28 | const int eulerDim[3] = {16, 16, 16}; -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/high_resolution_timer.h: -------------------------------------------------------------------------------- 1 | // 2 | // high_resolution_timer.h 3 | // Heterogeneous_SPH 4 | // 5 | // created by ruanjm on 09/07/15 6 | // Copyright (c) 2015 ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _HIGH_RESOLUTION_TIMER_H 10 | #define _HIGH_RESOLUTION_TIMER_H 11 | 12 | class HighResolutionTimer 13 | { 14 | public: 15 | virtual void set_start() = 0; 16 | virtual void set_end() = 0; 17 | virtual float get_millisecond() = 0; 18 | }; 19 | 20 | #ifdef WIN32 21 | 22 | #include 23 | 24 | class HighResolutionTimerForWin : public HighResolutionTimer 25 | { 26 | public: 27 | 28 | HighResolutionTimerForWin(){ 29 | QueryPerformanceFrequency(&freq_); 30 | start_.QuadPart = 0; 31 | end_.QuadPart = 0; 32 | } 33 | 34 | void set_start(){ 35 | QueryPerformanceCounter(&start_); 36 | } 37 | 38 | void set_end(){ 39 | QueryPerformanceCounter(&end_); 40 | } 41 | 42 | float get_millisecond(){ 43 | return static_cast((end_.QuadPart - start_.QuadPart) * 1000 / (float)freq_.QuadPart); 44 | } 45 | 46 | private: 47 | LARGE_INTEGER freq_; 48 | LARGE_INTEGER start_, end_; 49 | }; 50 | 51 | #endif // WIN32 52 | 53 | #endif/*_HIGH_RESOLUTION_TIMER_H*/ -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/cuda_call_check.h: -------------------------------------------------------------------------------- 1 | // 2 | // cuda_call_check.h 3 | // cuda texture tester 4 | // 5 | // created by ruanjm on 12/03/15 6 | // Copyright (c) 2015 ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _CUDA_CALL_CHECK_H 10 | #define _CUDA_CALL_CHECK_H 11 | 12 | #include 13 | #include 14 | 15 | #define CUDA_SAFE_CALL(err) cuda_safe_call_(err, __FILE__, __LINE__) 16 | #define CUDA_KERNEL_CHECK(err) cuda_kernel_check_(err, __FILE__, __LINE__) 17 | 18 | inline void cuda_safe_call_(cudaError err, const char *file_name, const int num_line) 19 | { 20 | if (cudaSuccess != err) 21 | { 22 | exit(0); 23 | std::cerr << file_name << "[" << num_line << "]: " 24 | << "CUDA Running API error[" << (int)err << "]: " 25 | << cudaGetErrorString(err) << std::endl; 26 | } 27 | } 28 | 29 | inline void cuda_kernel_check_(const char *error_msg, const char *file_name, const int num_line) 30 | { 31 | cudaError_t err = cudaDeviceSynchronize(); 32 | if (cudaSuccess != err) 33 | { 34 | exit(0); 35 | std::cerr << file_name << "[" << num_line << "]: " 36 | << (error_msg == nullptr ? "NONE" : error_msg) 37 | << "[" << (int)err << "]: " 38 | << cudaGetErrorString(err) << std::endl; 39 | } 40 | } 41 | 42 | #endif/*_CUDA_CALL_CHECK_H*/ -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.30501.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Hybrid_Fluid_Simulation", "Hybrid_Fluid_Simulation\Hybrid_Fluid_Simulation.vcxproj", "{BC50E9FA-E95F-4E72-9F2B-D45567958A71}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Debug|Win32.Build.0 = Debug|Win32 18 | {BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Debug|x64.ActiveCfg = Debug|x64 19 | {BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Debug|x64.Build.0 = Debug|x64 20 | {BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Release|Win32.ActiveCfg = Release|Win32 21 | {BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Release|Win32.Build.0 = Release|Win32 22 | {BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Release|x64.ActiveCfg = Release|x64 23 | {BC50E9FA-E95F-4E72-9F2B-D45567958A71}.Release|x64.Build.0 = Release|x64 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/cuda_prescan/scan_kern.cuh: -------------------------------------------------------------------------------- 1 | // 2 | // scan_kern.cuh 3 | // prefix_sum 4 | // 5 | // created by ruanjm on 16/12/15 6 | // Copyright (c) 2015 ruanjm. All rights reserved. 7 | // 8 | 9 | 10 | #ifndef _SCAN_KERN_CUH 11 | #define _SCAN_KERN_CUH 12 | 13 | #define max(a,b) (((a) > (b)) ? (a) : (b)) 14 | 15 | #define NUM_BANKS 32 16 | #define BLOCK_SIZE 256 17 | 18 | /* prefix sum */ 19 | #include "prefix_sum.cu" 20 | // NOTE: Template functions must be defined in the header 21 | template __global__ void prescan(float *g_odata, const float *g_idata, float *g_blockSums, int n, int blockIndex, int baseIndex) { 22 | int ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB; 23 | extern __shared__ float s_data[]; 24 | loadSharedChunkFromMem(s_data, g_idata, n, (baseIndex == 0) ? __mul24(blockIdx.x, (blockDim.x << 1)) : baseIndex, ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB); 25 | prescanBlock(s_data, blockIndex, g_blockSums); 26 | storeSharedChunkToMem(g_odata, s_data, n, ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB); 27 | } 28 | template __global__ void prescanInt(int *g_odata, const int *g_idata, int *g_blockSums, int n, int blockIndex, int baseIndex) { 29 | int ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB; 30 | extern __shared__ int s_dataInt[]; 31 | loadSharedChunkFromMemInt (s_dataInt, g_idata, n, (baseIndex == 0) ? __mul24(blockIdx.x, (blockDim.x << 1)) : baseIndex, ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB); 32 | prescanBlockInt(s_dataInt, blockIndex, g_blockSums); 33 | storeSharedChunkToMemInt (g_odata, s_dataInt, n, ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB); 34 | } 35 | __global__ void uniformAddInt(int* g_data, int *uniforms, int n, int blockOffset, int baseIndex); 36 | __global__ void uniformAdd(float*g_data, float *uniforms, int n, int blockOffset, int baseIndex); 37 | 38 | 39 | #endif/*_SCAN_KERN_CUH*/ -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/json/features.h: -------------------------------------------------------------------------------- 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 2 | // Distributed under MIT license, or public domain if desired and 3 | // recognized in your jurisdiction. 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 | 6 | #ifndef CPPTL_JSON_FEATURES_H_INCLUDED 7 | #define CPPTL_JSON_FEATURES_H_INCLUDED 8 | 9 | #if !defined(JSON_IS_AMALGAMATION) 10 | #include "forwards.h" 11 | #endif // if !defined(JSON_IS_AMALGAMATION) 12 | 13 | #pragma pack(push, 8) 14 | 15 | namespace Json { 16 | 17 | /** \brief Configuration passed to reader and writer. 18 | * This configuration object can be used to force the Reader or Writer 19 | * to behave in a standard conforming way. 20 | */ 21 | class JSON_API Features { 22 | public: 23 | /** \brief A configuration that allows all features and assumes all strings 24 | * are UTF-8. 25 | * - C & C++ comments are allowed 26 | * - Root object can be any JSON value 27 | * - Assumes Value strings are encoded in UTF-8 28 | */ 29 | static Features all(); 30 | 31 | /** \brief A configuration that is strictly compatible with the JSON 32 | * specification. 33 | * - Comments are forbidden. 34 | * - Root object must be either an array or an object value. 35 | * - Assumes Value strings are encoded in UTF-8 36 | */ 37 | static Features strictMode(); 38 | 39 | /** \brief Initialize the configuration like JsonConfig::allFeatures; 40 | */ 41 | Features(); 42 | 43 | /// \c true if comments are allowed. Default: \c true. 44 | bool allowComments_{true}; 45 | 46 | /// \c true if root must be either an array or an object value. Default: \c 47 | /// false. 48 | bool strictRoot_{false}; 49 | 50 | /// \c true if dropped null placeholders are allowed. Default: \c false. 51 | bool allowDroppedNullPlaceholders_{false}; 52 | 53 | /// \c true if numeric object key are allowed. Default: \c false. 54 | bool allowNumericKeys_{false}; 55 | }; 56 | 57 | } // namespace Json 58 | 59 | #pragma pack(pop) 60 | 61 | #endif // CPPTL_JSON_FEATURES_H_INCLUDED 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fast-SPH-framework 2 | This framework represents a fast general implementation of GPU SPH method utilizing the uniform grid approach. 3 | 4 | DESCRIPTION 5 | =========== 6 | This project is the source code of ["Novel Hierarchical Strategies for SPH-centric Algorithms on GPGPU"](https://doi.org/10.1016/j.gmod.2020.101088) 7 | and ["A General Novel Parallel Framework for SPH-centric Algorithms"](https://dl.acm.org/doi/10.1145/3321360). 8 | 9 | This project offers fast optimization strategies based on uniform grid. When compared to a well-optimized GPU SPH method based on the uniform grid, the method proposed in the papers demonstrates a significant speed improvement of up to 3.5 times. As a result, it serves as an excellent benchmark for conducting further research on GPU SPH and facilitates meaningful comparisons. 10 | 11 | 12 | Source code contributor: [Kemeng Huang](https://kemenghuang.github.io), Jiming Ruan 13 | 14 | **Note: this software is released under the MPLv2.0 license. For commercial use, please email authors for negotiation.** 15 | 16 | ## BibTex 17 | 18 | Please cite the following papers if it helps. 19 | 20 | 21 | ``` 22 | @article{HUANG2020101088, 23 | title = {Novel hierarchical strategies for SPH-centric algorithms on GPGPU}, 24 | journal = {Graphical Models}, 25 | volume = {111}, 26 | pages = {101088}, 27 | year = {2020}, 28 | issn = {1524-0703}, 29 | doi = {https://doi.org/10.1016/j.gmod.2020.101088}, 30 | url = {https://www.sciencedirect.com/science/article/pii/S152407032030028X}, 31 | author = {Kemeng Huang and Zipeng Zhao and Chen Li and Changbo Wang and Hong Qin} 32 | } 33 | ``` 34 | 35 | 36 | ``` 37 | @article{10.1145/3321360, 38 | author = {Huang, Kemeng and Ruan, Jiming and Zhao, Zipeng and Li, Chen and Wang, Changbo and Qin, Hong}, 39 | title = {A General Novel Parallel Framework for SPH-Centric Algorithms}, 40 | year = {2019}, 41 | issue_date = {May 2019}, 42 | publisher = {Association for Computing Machinery}, 43 | address = {New York, NY, USA}, 44 | volume = {2}, 45 | number = {1}, 46 | url = {https://doi.org/10.1145/3321360}, 47 | doi = {10.1145/3321360}, 48 | journal = {Proc. ACM Comput. Graph. Interact. Tech.}, 49 | month = {jun}, 50 | articleno = {7}, 51 | numpages = {16} 52 | } 53 | ``` 54 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/gl_texture.h: -------------------------------------------------------------------------------- 1 | // 2 | // gl_texture.h 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by ruanjm on 2016/05/01 6 | // Copyright (c) 2016 ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _GL_TEXTURE_H 10 | #define _GL_TEXTURE_H 11 | 12 | #include 13 | #include 14 | #include "lodepng.h" 15 | 16 | #define IMG_RGB 0 17 | #define IMG_RGBA 1 18 | #define IMG_LUM 2 19 | 20 | class PNGTexture 21 | { 22 | public: 23 | ~PNGTexture(){ 24 | if (data_) free(data_); 25 | } 26 | 27 | bool loadPNG(const char *path){ 28 | std::vector out; 29 | unsigned int w, h; 30 | 31 | unsigned error = lodepng::decode(out, w, h, path); 32 | if (error) 33 | { 34 | printf("can not decode %s\n", path); 35 | return false; 36 | } 37 | 38 | x_resolution_ = w; 39 | y_resolution_ = h; 40 | size_ = 4 * w * h; 41 | format_ = IMG_RGBA; 42 | 43 | if (data_) free(data_); 44 | data_ = (unsigned int*)malloc(size_); 45 | memcpy(data_, &out[0], size_); 46 | 47 | updateTexture(); 48 | 49 | return true; 50 | } 51 | 52 | GLuint get_texture(){ 53 | return texture_; 54 | } 55 | 56 | private: 57 | void updateTexture(){ 58 | if (texture_) glDeleteTextures(1, &texture_); 59 | 60 | glGenTextures(1, &texture_); 61 | glBindTexture(GL_TEXTURE_2D, texture_); 62 | 63 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); 64 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); 65 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 66 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 67 | 68 | GLenum fmt; 69 | int size; 70 | switch (format_) { 71 | case IMG_RGB: fmt = GL_RGB; size = 3; break; 72 | case IMG_RGBA: fmt = GL_RGBA; size = 4; break; 73 | case IMG_LUM: fmt = GL_LUMINANCE; size = 1; break; 74 | } 75 | 76 | glTexImage2D(GL_TEXTURE_2D, 0, fmt, x_resolution_, y_resolution_, 0, fmt, GL_UNSIGNED_BYTE, data_); 77 | } 78 | 79 | GLuint texture_ = 0; 80 | unsigned int x_resolution_; 81 | unsigned int y_resolution_; 82 | unsigned int size_; 83 | unsigned int format_; 84 | unsigned int *data_ = nullptr; 85 | }; 86 | 87 | #endif/*_GL_TEXTURE_H*/ 88 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/cuda_math.cuh: -------------------------------------------------------------------------------- 1 | // 2 | // cuda_math.cuh 3 | // Heterogeneous_SPH 4 | // 5 | // created by kmhuang and ruanjm on 2018/09/01 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _CUDA_MATH_H 10 | #define _CUDA_MATH_H 11 | 12 | #include 13 | 14 | #define kFloatSmall (1e-12f) 15 | 16 | __host__ __device__ 17 | inline uint3 operator+(const uint3 &a, const uint3 &b) 18 | { 19 | return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); 20 | } 21 | 22 | __host__ __device__ 23 | inline uint3 operator+(const uint3 &a, const int3 &b) 24 | { 25 | return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); 26 | } 27 | 28 | __host__ __device__ 29 | inline int3 operator+(const int3 &a, const int3 &b) 30 | { 31 | return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); 32 | } 33 | 34 | __host__ __device__ 35 | inline ushort3 operator+(const ushort3 &a, const ushort3 &b) 36 | { 37 | return make_ushort3(a.x + b.x, a.y + b.y, a.z + b.z); 38 | } 39 | 40 | __host__ __device__ 41 | inline float3 operator+(const float3 &a, const float3 &b) 42 | { 43 | return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); 44 | } 45 | 46 | __host__ __device__ 47 | inline float3 operator-(const float3 &a, const float3 &b) 48 | { 49 | return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); 50 | } 51 | 52 | __host__ __device__ 53 | inline float3 operator*(const float3 &a, float b) 54 | { 55 | return make_float3(a.x * b, a.y * b, a.z * b); 56 | } 57 | 58 | __host__ __device__ 59 | inline float3 operator*(const float3 &a, const float3 &b) 60 | { 61 | return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); 62 | } 63 | 64 | __host__ __device__ 65 | inline float3 operator/(const float3 &a, float b) 66 | { 67 | return make_float3(a.x / b, a.y / b, a.z / b); 68 | } 69 | 70 | __host__ __device__ 71 | inline void operator-=(float3 &a, const float3 &b) 72 | { 73 | a.x -= b.x; a.y -= b.y; a.z -= b.z; 74 | } 75 | 76 | __host__ __device__ 77 | inline void operator+=(float3 &a, const float3 &b) 78 | { 79 | a.x += b.x; a.y += b.y; a.z += b.z; 80 | } 81 | 82 | __host__ __device__ 83 | inline void operator*=(float3 &a, const float b) 84 | { 85 | a.x *= b; a.y *= b; a.z *= b; 86 | } 87 | 88 | __host__ __device__ 89 | inline float distance_square(const float4 &a, const float4 &b) 90 | { 91 | float deltax = a.x - b.x; 92 | float deltay = a.y - b.y; 93 | float deltaz = a.z - b.z; 94 | return deltax* deltax + deltay * deltay + deltaz * deltaz; 95 | } 96 | 97 | #endif/*_CUDA_MATH_H*/ -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/save_screen.h: -------------------------------------------------------------------------------- 1 | // 2 | // save_screen.h 3 | // Heterogeneous_SPH 4 | // 5 | // created by ruanjm on 03/10/15 6 | // Copyright (c) 2015 ruanjm. All right reserved. 7 | // 8 | 9 | #ifndef _SAVE_SCREEN_H 10 | #define _SAVE_SCREEN_H 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #define BITMAP_ID 0x4D42 // the universal bitmap ID 17 | 18 | BITMAPINFOHEADER bitmapInfoHeader; 19 | 20 | bool WriteBitmapFile(int width, int height, const std::string &file_name, unsigned char *bitmapData) 21 | { 22 | BITMAPFILEHEADER bitmapFileHeader; 23 | memset(&bitmapFileHeader, 0, sizeof(BITMAPFILEHEADER)); 24 | bitmapFileHeader.bfSize = sizeof(BITMAPFILEHEADER); 25 | bitmapFileHeader.bfType = 0x4d42; //BM 26 | bitmapFileHeader.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER); 27 | 28 | BITMAPINFOHEADER bitmapInfoHeader; 29 | memset(&bitmapInfoHeader, 0, sizeof(BITMAPINFOHEADER)); 30 | bitmapInfoHeader.biSize = sizeof(BITMAPINFOHEADER); 31 | bitmapInfoHeader.biWidth = width; 32 | bitmapInfoHeader.biHeight = height; 33 | bitmapInfoHeader.biPlanes = 1; 34 | bitmapInfoHeader.biBitCount = 24; 35 | bitmapInfoHeader.biCompression = BI_RGB; 36 | bitmapInfoHeader.biSizeImage = width * abs(height) * 3; 37 | 38 | ////////////////////////////////////////////////////////////////////////// 39 | FILE * filePtr; 40 | unsigned char tempRGB; 41 | int imageIdx; 42 | 43 | for (imageIdx = 0; imageIdx < (int)bitmapInfoHeader.biSizeImage; imageIdx += 3) 44 | { 45 | tempRGB = bitmapData[imageIdx]; 46 | bitmapData[imageIdx] = bitmapData[imageIdx + 2]; 47 | bitmapData[imageIdx + 2] = tempRGB; 48 | } 49 | 50 | filePtr = fopen(file_name.c_str(), "wb"); 51 | if (NULL == filePtr) 52 | { 53 | return false; 54 | } 55 | 56 | fwrite(&bitmapFileHeader, sizeof(BITMAPFILEHEADER), 1, filePtr); 57 | 58 | fwrite(&bitmapInfoHeader, sizeof(BITMAPINFOHEADER), 1, filePtr); 59 | 60 | fwrite(bitmapData, bitmapInfoHeader.biSizeImage, 1, filePtr); 61 | 62 | fclose(filePtr); 63 | return true; 64 | } 65 | 66 | void SaveScreenShot(int width, int height, const std::string &file_name) 67 | { 68 | int data_len = height * width * 3; // bytes 69 | void *screen_data = malloc(data_len); 70 | memset(screen_data, 0, data_len); 71 | glReadPixels(0, 0, width, height, GL_RGB, GL_UNSIGNED_BYTE, screen_data); 72 | 73 | WriteBitmapFile(width, height, file_name + ".bmp", (unsigned char*)screen_data); 74 | 75 | free(screen_data); 76 | } 77 | 78 | 79 | #endif/*_SAVE_SCREEN_H*/ -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/json/assertions.h: -------------------------------------------------------------------------------- 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 2 | // Distributed under MIT license, or public domain if desired and 3 | // recognized in your jurisdiction. 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 | 6 | #ifndef CPPTL_JSON_ASSERTIONS_H_INCLUDED 7 | #define CPPTL_JSON_ASSERTIONS_H_INCLUDED 8 | 9 | #include 10 | #include 11 | 12 | #if !defined(JSON_IS_AMALGAMATION) 13 | #include "config.h" 14 | #endif // if !defined(JSON_IS_AMALGAMATION) 15 | 16 | /** It should not be possible for a maliciously designed file to 17 | * cause an abort() or seg-fault, so these macros are used only 18 | * for pre-condition violations and internal logic errors. 19 | */ 20 | #if JSON_USE_EXCEPTION 21 | 22 | // @todo <= add detail about condition in exception 23 | #define JSON_ASSERT(condition) \ 24 | { \ 25 | if (!(condition)) { \ 26 | Json::throwLogicError("assert json failed"); \ 27 | } \ 28 | } 29 | 30 | #define JSON_FAIL_MESSAGE(message) \ 31 | { \ 32 | OStringStream oss; \ 33 | oss << message; \ 34 | Json::throwLogicError(oss.str()); \ 35 | abort(); \ 36 | } 37 | 38 | #else // JSON_USE_EXCEPTION 39 | 40 | #define JSON_ASSERT(condition) assert(condition) 41 | 42 | // The call to assert() will show the failure message in debug builds. In 43 | // release builds we abort, for a core-dump or debugger. 44 | #define JSON_FAIL_MESSAGE(message) \ 45 | { \ 46 | OStringStream oss; \ 47 | oss << message; \ 48 | assert(false && oss.str().c_str()); \ 49 | abort(); \ 50 | } 51 | 52 | #endif 53 | 54 | #define JSON_ASSERT_MESSAGE(condition, message) \ 55 | if (!(condition)) { \ 56 | JSON_FAIL_MESSAGE(message); \ 57 | } 58 | 59 | #endif // CPPTL_JSON_ASSERTIONS_H_INCLUDED 60 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_hybrid_system.h: -------------------------------------------------------------------------------- 1 | // 2 | // sph_hybrid_system.h 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by kmhuang and ruanjm on 2018/09/01 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _SPH_HYBRID_SYSTEM_H 10 | #define _SPH_HYBRID_SYSTEM_H 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "gl_texture.h" 19 | #include "high_resolution_timer.h" 20 | #include "sph_arrangement.cuh" 21 | #include "sph_parameter.h" 22 | #include "sph_particle.h" 23 | 24 | typedef unsigned int uint; 25 | 26 | namespace sph 27 | { 28 | 29 | const int kDefaultNumParticles = 65536; 30 | 31 | struct Scene 32 | { 33 | std::vector> fluid_blocks; 34 | float interval = 0.5f; 35 | float mass = 0.02f; 36 | uint recomm_nump = kDefaultNumParticles; 37 | float x, y, z; 38 | }; 39 | 40 | class HybridSystem 41 | { 42 | public: 43 | HybridSystem(const float3 &real_world_side, const float3 &sim_origin); 44 | ~HybridSystem(); 45 | 46 | void tick(); 47 | void setPause(); 48 | bool isRunning(); 49 | uint getNumParticles(); 50 | float3 getPosition(uint idx); 51 | 52 | void insertParticles(unsigned int type); 53 | 54 | void drawParticles(float rad, int size); 55 | void drawInfo(GLdouble w, GLdouble h); 56 | int loop; 57 | 58 | private: 59 | void initializeScene(const std::string &file_name, Scene scene); 60 | void initializeScene2(const std::string &file_name); 61 | void resetBuffer(uint nump); 62 | void addParticle2(float3 position, float3 velocity, condition phase, float temperature); 63 | void addParticle(float3 position, float3 velocity = make_float3(0.0f, 0.0f, 0.0f), int color_type = 1); 64 | bool is_running_ = false; 65 | uint nump_ = 0U; 66 | uint buff_capacity_ = 0U; 67 | ParticleBufferObject host_buff_; 68 | ParticleBufferObject device_buff_; 69 | ParticleBufferObject device_buff_temp_; 70 | 71 | ParticleBufferObject device_buff_data_; 72 | 73 | SystemParameter sys_para_; 74 | //std::unique_ptr arrangement_; 75 | Arrangement *arrangement_; 76 | float particle_interval = 0.5f; 77 | HighResolutionTimerForWin frame_timer_; 78 | bool get_detailed_time_; 79 | float total_time_; 80 | float pre_time_, density_time_, force_time_; 81 | bool generate_mesh_; 82 | bool add_smoke_; 83 | 84 | // render 85 | PNGTexture particle_texture_; 86 | GLuint position_vbo_; 87 | GLuint color_vbo_; 88 | 89 | // action 90 | void action1(); 91 | bool action1_ = false; 92 | 93 | //sf add 94 | float pcisph_density_factor; 95 | }; 96 | 97 | } 98 | 99 | #endif/*_SPH_HYBRID_SYSTEM_H*/ 100 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/json/allocator.h: -------------------------------------------------------------------------------- 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 2 | // Distributed under MIT license, or public domain if desired and 3 | // recognized in your jurisdiction. 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 | 6 | #ifndef CPPTL_JSON_ALLOCATOR_H_INCLUDED 7 | #define CPPTL_JSON_ALLOCATOR_H_INCLUDED 8 | 9 | #include 10 | #include 11 | 12 | #pragma pack(push, 8) 13 | 14 | namespace Json { 15 | template class SecureAllocator { 16 | public: 17 | // Type definitions 18 | using value_type = T; 19 | using pointer = T*; 20 | using const_pointer = const T*; 21 | using reference = T&; 22 | using const_reference = const T&; 23 | using size_type = std::size_t; 24 | using difference_type = std::ptrdiff_t; 25 | 26 | /** 27 | * Allocate memory for N items using the standard allocator. 28 | */ 29 | pointer allocate(size_type n) { 30 | // allocate using "global operator new" 31 | return static_cast(::operator new(n * sizeof(T))); 32 | } 33 | 34 | /** 35 | * Release memory which was allocated for N items at pointer P. 36 | * 37 | * The memory block is filled with zeroes before being released. 38 | * The pointer argument is tagged as "volatile" to prevent the 39 | * compiler optimizing out this critical step. 40 | */ 41 | void deallocate(volatile pointer p, size_type n) { 42 | std::memset(p, 0, n * sizeof(T)); 43 | // free using "global operator delete" 44 | ::operator delete(p); 45 | } 46 | 47 | /** 48 | * Construct an item in-place at pointer P. 49 | */ 50 | template void construct(pointer p, Args&&... args) { 51 | // construct using "placement new" and "perfect forwarding" 52 | ::new (static_cast(p)) T(std::forward(args)...); 53 | } 54 | 55 | size_type max_size() const { return size_t(-1) / sizeof(T); } 56 | 57 | pointer address(reference x) const { return std::addressof(x); } 58 | 59 | const_pointer address(const_reference x) const { return std::addressof(x); } 60 | 61 | /** 62 | * Destroy an item in-place at pointer P. 63 | */ 64 | void destroy(pointer p) { 65 | // destroy using "explicit destructor" 66 | p->~T(); 67 | } 68 | 69 | // Boilerplate 70 | SecureAllocator() {} 71 | template SecureAllocator(const SecureAllocator&) {} 72 | template struct rebind { using other = SecureAllocator; }; 73 | }; 74 | 75 | template 76 | bool operator==(const SecureAllocator&, const SecureAllocator&) { 77 | return true; 78 | } 79 | 80 | template 81 | bool operator!=(const SecureAllocator&, const SecureAllocator&) { 82 | return false; 83 | } 84 | 85 | } // namespace Json 86 | 87 | #pragma pack(pop) 88 | 89 | #endif // CPPTL_JSON_ALLOCATOR_H_INCLUDED 90 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_utils.cuh: -------------------------------------------------------------------------------- 1 | // 2 | // sph_utils.cuh 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by kmhuang and ruanjm on 2018/09/01 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _SPH_UTILS_CUH 10 | #define _SPH_UTILS_CUH 11 | 12 | #include 13 | #include 14 | 15 | namespace sph 16 | { 17 | 18 | const int kInvalidCellIdx = 0xffffffff; 19 | 20 | __device__ __host__ 21 | inline int ceil_int(int a, int b) { return (a + b - 1) / b; } 22 | 23 | __device__ 24 | inline ushort3 ParticlePos2CellPos(const float4 &pos, float cell_size) 25 | { 26 | return make_ushort3(floorf(pos.x / cell_size), 27 | floorf(pos.y / cell_size), 28 | floorf(pos.z / cell_size)); 29 | } 30 | __device__ 31 | inline ushort3 ParticlePos2CellPosM(const float4 &pos, float cell_size) 32 | { 33 | float rat = 4.f / cell_size; 34 | return make_ushort3(floorf(pos.x *rat), 35 | floorf(pos.y *rat), 36 | floorf(pos.z *rat)); 37 | } 38 | __device__ 39 | inline int CellPos2CellIdx(const ushort3 &cell_pos, const ushort3 &grid_size) 40 | { 41 | if (cell_pos.x >= grid_size.x || cell_pos.x < 0 || 42 | cell_pos.y >= grid_size.y || cell_pos.y < 0 || 43 | cell_pos.z >= grid_size.z || cell_pos.z < 0) 44 | return kInvalidCellIdx; 45 | return cell_pos.x + grid_size.x * (cell_pos.y + grid_size.y * cell_pos.z); 46 | } 47 | 48 | __device__ 49 | inline int ParticlePos2CellIdx(const float4 &pos, const ushort3 &grid_size, float cell_size) 50 | { 51 | ushort3 cell_pos = ParticlePos2CellPos(pos, cell_size); 52 | return CellPos2CellIdx(cell_pos, grid_size); 53 | } 54 | __device__ 55 | inline int CellPos2CellIdxM(const ushort3 &cell_pos, const ushort3 &grid_size) 56 | { 57 | if (cell_pos.x >= (grid_size.x << 2) || cell_pos.x < 0 || 58 | cell_pos.y >= (grid_size.y << 2) || cell_pos.y < 0 || 59 | cell_pos.z >= (grid_size.z << 2) || cell_pos.z < 0) 60 | return kInvalidCellIdx; 61 | 62 | int x = cell_pos.x & 0x03; 63 | int y = cell_pos.y & 0x03; 64 | int z = cell_pos.z & 0x03; 65 | int xx = (cell_pos.x >> 2); 66 | int yy = (cell_pos.y >> 2); 67 | int zz = (cell_pos.z >> 2); 68 | 69 | int idc = xx + grid_size.x * (yy + grid_size.y * zz); 70 | 71 | // int idi = (x << 4) | (y & 0x01) | ((y & 0x02) << 1) | ((z & 0x01) << 1) | ((z & 0x02) << 2); 72 | 73 | int idi = y + ((z + (x<<2))<<2); 74 | int id = (idc << 6) | (idi); 75 | return id; 76 | } 77 | __device__ 78 | inline int ParticlePos2CellIdxM(const float4 &pos, const ushort3 &grid_size, float cell_size) 79 | { 80 | ushort3 cell_pos = ParticlePos2CellPosM(pos, cell_size); 81 | return CellPos2CellIdxM(cell_pos, grid_size); 82 | } 83 | __device__ 84 | inline ushort3 CellIdx2CellPos(int idx, const ushort3 &grid_size) 85 | { 86 | return make_ushort3(idx % grid_size.x, 87 | idx / grid_size.x % grid_size.y, 88 | idx / grid_size.x / grid_size.y); 89 | } 90 | 91 | } 92 | 93 | #endif/*_SPH_UTILS_CUH*/ 94 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/gl_main_header.h: -------------------------------------------------------------------------------- 1 | #ifndef _GL_MAIN_HEADER_H 2 | #define _GL_MAIN_HEADER_H 3 | 4 | #define VNAME 4DF 5 | #define VTYPE float 6 | 7 | class Vector4DF { 8 | public: 9 | VTYPE x, y, z, w; 10 | 11 | Vector4DF &Set(const float xa, const float ya, const float za) { x = xa; y = ya; z = za; w = 1; return *this; } 12 | Vector4DF &Set(const float xa, const float ya, const float za, const float wa) { x = xa; y = ya; z = za; w = wa; return *this; } 13 | 14 | // Constructors/Destructors 15 | Vector4DF() { x = 0; y = 0; z = 0; w = 0; } 16 | Vector4DF(const VTYPE xa, const VTYPE ya, const VTYPE za, const VTYPE wa); 17 | 18 | Vector4DF(const Vector4DF &op); 19 | 20 | // Member Functions 21 | Vector4DF &operator= (const int op); 22 | Vector4DF &operator= (const double op); 23 | 24 | Vector4DF &operator= (const Vector4DF &op); 25 | 26 | Vector4DF &operator+= (const int op); 27 | Vector4DF &operator+= (const float op); 28 | Vector4DF &operator+= (const double op); 29 | 30 | Vector4DF &operator+= (const Vector4DF &op); 31 | 32 | Vector4DF &operator-= (const int op); 33 | Vector4DF &operator-= (const double op); 34 | 35 | Vector4DF &operator-= (const Vector4DF &op); 36 | 37 | Vector4DF &operator*= (const int op); 38 | Vector4DF &operator*= (const double op); 39 | 40 | Vector4DF &operator*= (const Vector4DF &op); 41 | Vector4DF &operator*= (const float* op); 42 | 43 | Vector4DF &operator/= (const int op); 44 | Vector4DF &operator/= (const double op); 45 | 46 | // Slow operations - require temporary variables 47 | Vector4DF operator+ (const int op) { return Vector4DF(x + float(op), y + float(op), z + float(op), w + float(op)); } 48 | Vector4DF operator+ (const float op) { return Vector4DF(x + op, y + op, z + op, w*op); } 49 | Vector4DF operator+ (const Vector4DF &op) { return Vector4DF(x + op.x, y + op.y, z + op.z, w + op.w); } 50 | Vector4DF operator- (const int op) { return Vector4DF(x - float(op), y - float(op), z - float(op), w - float(op)); } 51 | Vector4DF operator- (const float op) { return Vector4DF(x - op, y - op, z - op, w*op); } 52 | Vector4DF operator- (const Vector4DF &op) { return Vector4DF(x - op.x, y - op.y, z - op.z, w - op.w); } 53 | Vector4DF operator* (const int op) { return Vector4DF(x*float(op), y*float(op), z*float(op), w*float(op)); } 54 | Vector4DF operator* (const float op) { return Vector4DF(x*op, y*op, z*op, w*op); } 55 | Vector4DF operator* (const Vector4DF &op) { return Vector4DF(x*op.x, y*op.y, z*op.z, w*op.w); } 56 | // -- 57 | 58 | Vector4DF& Clamp(float xc, float yc, float zc, float wc) 59 | { 60 | x = (x > xc) ? xc : x; 61 | y = (y > yc) ? yc : y; 62 | z = (z > zc) ? zc : z; 63 | w = (w > wc) ? wc : w; 64 | return *this; 65 | } 66 | 67 | Vector4DF &Cross(const Vector4DF &v); 68 | 69 | double Dot(const Vector4DF &v); 70 | 71 | double Dist(const Vector4DF &v); 72 | 73 | double DistSq(const Vector4DF &v); 74 | 75 | Vector4DF &Normalize(void); 76 | double Length(void); 77 | 78 | VTYPE &X(void) { return x; } 79 | VTYPE &Y(void) { return y; } 80 | VTYPE &Z(void) { return z; } 81 | VTYPE &W(void) { return w; } 82 | const VTYPE &X(void) const { return x; } 83 | const VTYPE &Y(void) const { return y; } 84 | const VTYPE &Z(void) const { return z; } 85 | const VTYPE &W(void) const { return w; } 86 | VTYPE *Data(void) { return &x; } 87 | }; 88 | 89 | #undef VNAME 90 | #undef VTYPE 91 | 92 | #endif/*_GL_MAIN_HEADER_H*/ 93 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_arrangement.cuh: -------------------------------------------------------------------------------- 1 | // 2 | // sph_arrangement.cuh 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by kmhuang and ruanjm on 2018/09/01 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _SPH_ARRANGEMENT_CUH 10 | #define _SPH_ARRANGEMENT_CUH 11 | 12 | #include "sph_particle.h" 13 | 14 | namespace gpu_model { struct GPUModel; } 15 | 16 | namespace sph 17 | { 18 | 19 | class Arrangement 20 | { 21 | public: 22 | Arrangement(ParticleBufferObject &buff_list, 23 | ParticleBufferObject &buff_temp, 24 | 25 | unsigned int nump, 26 | unsigned int nump_capacity, 27 | float cell_size, 28 | ushort3 grid_size); 29 | 30 | ~Arrangement(); 31 | 32 | // return middle value of 2 parallel framework 33 | int arrangeTRAMode(); 34 | void arrangeSMSMode(); 35 | int arrangeHybridMode(); 36 | int arrangeHybridMode9(); 37 | void test(); 38 | 39 | void sortParticles(); 40 | void assignTasksFixedCTA(); 41 | 42 | int* getDevCellStartIdx(); 43 | int* getDevCellEndIdx(); 44 | int getNumBlockSMSMode(); 45 | BlockTask *getBlockTasks(); 46 | 47 | void resetNumParticle(unsigned int nump); 48 | 49 | 50 | int* getDevOffsetData() { return d_cell_offset_data; } 51 | int* getDevCellOffset() { return d_cell_offset_; } 52 | int* getDevCellOffsetM() { return d_cell_offset_M; } 53 | int* getDevCellIndex() { return d_index_; } 54 | 55 | int* getDevCellNumP() { return d_cell_nump_; } 56 | 57 | unsigned int getNumC() { return numc_; } 58 | 59 | 60 | void CountingSortCUDA(); 61 | void CountingSort_O(); 62 | 63 | void CountingSortCUDA_Two(); 64 | void CountingSortCUDA_Two9(); 65 | void countNum(); 66 | 67 | void CountingSort_O_M(); 68 | void CountingSortCUDA_Two9_M(); 69 | int arrangeHybridMode9M(); 70 | private: 71 | void calculateHash(); 72 | void calculateHashWithBlockReq(); 73 | void sortHash(); 74 | void sortIndexByHash(); 75 | void reindexParticles(); // use index sorted by hash to reindex 76 | void reindexParticles2(); // use "particle offset in cell" and "prefix summed cell offset" to reindex 77 | void findCellRange(); 78 | void findCellRangeAndHybridModeMiddleValue(); 79 | void insertParticles(); 80 | void arrangeBlockTasks(); 81 | 82 | 83 | 84 | void CSInsertParticles(); 85 | void CSCountingSortFull(); 86 | void arrangeBlockTasksFixedM(int *hash, int *celloff, int *cellnum, BlockTask* d_task_array, int* d_cta_reqs, int* d_task_array_offset, int cta_size); 87 | void arrangeBlockTasksFixed(BlockTask* d_task_array, int* d_cta_reqs, int* d_task_array_offset, int cta_size); 88 | void arrangeBlockTasksFloat(); 89 | 90 | void CSCalculateRequiredCTAsFixed(int *cat_offset, int* d_cta_reqs, int cta_size); 91 | 92 | 93 | 94 | 95 | ParticleBufferObject &buff_list_; // particle device buffer 96 | ParticleBufferObject &buff_temp_; // particle device buffer for replacement 97 | unsigned int nump_; // #particles 98 | unsigned int nump_capacity_; 99 | unsigned int numc_; // #cells 100 | float cell_size_; 101 | ushort3 grid_size_; 102 | int middle_value_ = 0; 103 | 104 | int h_num_cta_; 105 | 106 | int* d_num_cta_; 107 | int* d_cell_offset_; // [numc] the offset in memory of the particles in each cell 108 | int* d_cell_nump_; // [numc] the number of particles in each cell 109 | int* d_p_offset_; 110 | 111 | int* d_p_offset_p; 112 | 113 | int* d_cell_offset_data; 114 | 115 | int *d_start_index_; // [numc]device buffer, cell start index 116 | int *d_end_index_; // [numc]device buffer, cell end index 117 | int *d_hash_; // [nump] 118 | int *d_index_; // [nump] 119 | 120 | int *hashp; // [nump] 121 | int *d_hash_p; // [nump] 122 | // int *indexp; 123 | int *cell_num_; 124 | int *cell_num_two; 125 | int *cell_type; 126 | int* d_cell_nump_M; 127 | int* d_cell_offset_M; 128 | 129 | 130 | int* d_task_array_offset_32_; // [numc]result of prescan 131 | int *d_block_reqs_; // [numc]for SMS Mode 132 | int *d_breqs_offset_; // [numc]result of prescan 133 | int *d_num_block_; // [1] 134 | int h_num_block_ = 0; 135 | BlockTask *d_block_task_; // [numb] 136 | 137 | int *d_middle_value_; // [1]for Hybrid Mode 138 | 139 | gpu_model::GPUModel *p_gpu_model_ = nullptr; 140 | }; 141 | 142 | } 143 | 144 | #endif/*_SPH_ARRANGEMENT_CUH*/ 145 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/Hybrid_Fluid_Simulation.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {55d31f92-036c-4a57-b177-1d7d5d7355b6} 6 | 7 | 8 | {f758ad77-964e-4ddf-ad45-aba65e76286e} 9 | 10 | 11 | {df5589b3-0608-4b75-8f82-5a3ca8af24bd} 12 | 13 | 14 | {9b82fa81-bedb-4faf-8d92-e1fa93169963} 15 | 16 | 17 | {6e33e5de-acd1-4615-9555-f3cf68a606ef} 18 | 19 | 20 | {a08c016c-896a-4c51-b108-2eec44fe4591} 21 | 22 | 23 | 24 | 25 | 26 | GPU Model 27 | 28 | 29 | Marching Cubes 30 | 31 | 32 | SPH System 33 | 34 | 35 | SPH System 36 | 37 | 38 | Utils 39 | 40 | 41 | Utils 42 | 43 | 44 | 45 | 46 | 47 | SPH System 48 | 49 | 50 | SPH System 51 | 52 | 53 | SPH System 54 | 55 | 56 | SPH System 57 | 58 | 59 | SPH System 60 | 61 | 62 | SPH System 63 | 64 | 65 | SPH System 66 | 67 | 68 | GPU Prescan 69 | 70 | 71 | GPU Prescan 72 | 73 | 74 | GPU Model 75 | 76 | 77 | GPU Model 78 | 79 | 80 | GPU Model 81 | 82 | 83 | 84 | Marching Cubes 85 | 86 | 87 | SPH System 88 | 89 | 90 | Utils 91 | 92 | 93 | Utils 94 | 95 | 96 | Utils 97 | 98 | 99 | Utils 100 | 101 | 102 | Utils 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | SPH System 111 | 112 | 113 | SPH System 114 | 115 | 116 | SPH System 117 | 118 | 119 | GPU Prescan 120 | 121 | 122 | GPU Prescan 123 | 124 | 125 | GPU Model 126 | 127 | 128 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/gpu_model.h: -------------------------------------------------------------------------------- 1 | // 2 | // gpu_model.h 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by kmhuang and ruanjm on 2018/09/01 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _GPU_MODEL_H 10 | #define _GPU_MODEL_H 11 | 12 | #include 13 | 14 | #define I_ADD_SUB 0 // add.s32, sub.s32 15 | #define I_MAD_MUL 1 // mul.xx.s32, mad.xx.s32, xx=lo 16 | #define I_DIV_REM 2 // div.u32, rem.u32, div.s32, rem.s32 17 | #define I_MIN_MAX 3 // min.u32, max.u32, min.s32, max.s32 18 | #define I_ABS 4 // abs.s32 19 | #define I_MUL24 5 // mul24.xx.u32, mul24.xx.s32, xx=lo 20 | #define I_LOGICAL 6 // AND, OR and XOR 21 | #define I_SHL_SHR 7 // shl.b32, shr.u32 22 | #define I_SAD 8 // sad.u32, sad.s32 23 | #define F_ADD_SUB 9 // add.f32, sub.f32 24 | #define F_MAD_MUL 10 // mul.f32, fma.xx.f32, xx=rn 25 | #define F_DIV 11 // div.xx.f32, xx=rn 26 | #define F_DIVIDEF 12 // div.approx.f32 27 | #define F_EXP2 13 // ex2.approx.f32 28 | #define F_LOG2 14 // lg2.approx.f32 29 | #define F_SIN_COS 15 // sin.approx.f32, cos.approx.f32 30 | #define F_SQRT 16 // sqrt.xx.f32, xx=rn 31 | #define F_RSQRT 17 // rsqrt.approx.f32 32 | #define F_MIN_MAX 18 // min.f32, max.f32 33 | #define F_RCP 19 // rcp.xx.f32, xx=rn 34 | 35 | #define ARI_STAT_SIZE 20 // arithmetical insts 36 | #define DEFAULT_INST ARI_STAT_SIZE 37 | 38 | #define GLOBAL_ACC 0 39 | #define SHARED_ACC 1 40 | #define LOCAL_ACC 2 41 | #define CONST_ACC 3 42 | 43 | #define MEM_STAT_SIZE 4 44 | 45 | #define NO_RECOMMENDATION -1 46 | 47 | struct PTXBlockStatistic 48 | { 49 | __host__ __device__ 50 | inline PTXBlockStatistic operator*(unsigned int t) const{ 51 | PTXBlockStatistic result; 52 | 53 | result.recommended_times = 1; 54 | 55 | result.num_insts = num_insts * t; 56 | result.num_unknown = num_unknown * t; 57 | result.num_sync = num_sync * t; 58 | result.num_bra = num_bra * t; 59 | for (size_t i = 0; i < ARI_STAT_SIZE; ++i){ 60 | result.num_stat[i] = num_stat[i] * t; 61 | } 62 | for (size_t i = 0; i < MEM_STAT_SIZE; ++i){ 63 | result.num_mem[i] = num_mem[i] * t; 64 | } 65 | 66 | return result; 67 | } 68 | 69 | __host__ __device__ 70 | inline void operator*=(unsigned int t) { 71 | recommended_times = 1; 72 | num_insts *= t; 73 | num_unknown *= t; 74 | num_sync *= t; 75 | num_bra *= t; 76 | for (size_t i = 0; i < ARI_STAT_SIZE; ++i){ 77 | num_stat[i] *= t; 78 | } 79 | for (size_t i = 0; i < MEM_STAT_SIZE; ++i){ 80 | num_mem[i] *= t; 81 | } 82 | } 83 | 84 | __host__ __device__ 85 | inline PTXBlockStatistic operator+(const PTXBlockStatistic &a) const{ 86 | PTXBlockStatistic result; 87 | 88 | result.recommended_times = NO_RECOMMENDATION; 89 | 90 | result.num_insts = num_insts + a.num_insts; 91 | result.num_unknown = num_unknown + a.num_unknown; 92 | result.num_sync = num_sync + a.num_sync; 93 | result.num_bra = num_bra + a.num_bra; 94 | for (size_t i = 0; i < ARI_STAT_SIZE; ++i){ 95 | result.num_stat[i] = num_stat[i] + a.num_stat[i]; 96 | } 97 | for (size_t i = 0; i < MEM_STAT_SIZE; ++i){ 98 | result.num_mem[i] = num_mem[i] + a.num_mem[i]; 99 | } 100 | 101 | return result; 102 | } 103 | 104 | __host__ __device__ 105 | inline void operator +=(const PTXBlockStatistic &a){ 106 | recommended_times = NO_RECOMMENDATION; 107 | 108 | num_insts += a.num_insts; 109 | num_unknown += a.num_unknown; 110 | num_sync += a.num_sync; 111 | num_bra += a.num_bra; 112 | for (size_t i = 0; i < ARI_STAT_SIZE; ++i){ 113 | num_stat[i] += a.num_stat[i]; 114 | } 115 | for (size_t i = 0; i < MEM_STAT_SIZE; ++i){ 116 | num_mem[i] += a.num_mem[i]; 117 | } 118 | } 119 | 120 | int recommended_times; 121 | 122 | // statistics 123 | unsigned int num_insts; 124 | unsigned int num_unknown; 125 | unsigned int num_sync; 126 | unsigned int num_bra; 127 | unsigned int num_stat[ARI_STAT_SIZE]; 128 | unsigned int num_mem[MEM_STAT_SIZE]; 129 | }; 130 | 131 | struct KernelRelatedParas 132 | { 133 | //float l2_hit_rate; // L2 cache hit rate, get from NSIGHT profiler 134 | //float num_uncoal_per_warp; // #memory_transactions per warp(uncoalesced access) 135 | float dram_lat; // baseline DRAM access latency 136 | float delta; // transaction departure delay 137 | float default_inst_lat; // default instruction latency 138 | //float block_size; // #threads in a CTA 139 | }; 140 | 141 | struct GPUDeviceInfo 142 | { 143 | //float freq; // GPU core clock frequency in GHz 144 | //float mem_peak_bw; // GPU memory bandwidth in GBps 145 | float simd_width; // #SPs per SM 146 | float warp_size; // #threads in a warp 147 | //float transaction_size; // transaction size for a DRAM request in Bytes 148 | //float l2_lat; 149 | float gamma; // machine dependent parameter(for thread sync) 150 | float ilp; 151 | float mlp; 152 | }; 153 | 154 | struct InstructionInfo 155 | { 156 | float inst_info[ARI_STAT_SIZE]; 157 | 158 | __device__ __host__ 159 | float operator[](unsigned int i) const { 160 | return inst_info[i]; 161 | } 162 | 163 | __device__ __host__ 164 | float &operator[](unsigned int i){ 165 | return inst_info[i]; 166 | } 167 | }; 168 | 169 | #endif/*_GPU_MODEL_H*/ 170 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/gpu_model_reader.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // gpu_model_reader.cpp 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by kmhuang and ruanjm on 2018/09/01 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved. 7 | // 8 | 9 | #include "gpu_model_reader.h" 10 | #include 11 | #include 12 | #include "json/json.h" 13 | #include "json/reader.h" 14 | 15 | namespace gpu_model 16 | { 17 | 18 | unsigned int readPTXStatisticsFromFile(PTXBlockStatistic *&output, const std::string &func_name, const std::string &file_name) 19 | { 20 | std::ifstream input(file_name, std::ios::binary); 21 | if (!input.is_open()) 22 | { 23 | std::cout << "can not open " << file_name << std::endl; 24 | return 0; 25 | } 26 | 27 | unsigned int num_block = 0; 28 | 29 | Json::Reader js_reader; 30 | Json::Value root; 31 | 32 | if (js_reader.parse(input, root)) 33 | { 34 | for (int i = 0; i < root.size(); ++i) 35 | { 36 | if (func_name == root[i]["function_name"].asString()) 37 | { 38 | Json::Value blocks = root[i]["block_array"]; 39 | num_block = blocks.size(); 40 | output = new PTXBlockStatistic[blocks.size()]; 41 | for (int j = 0; j < blocks.size(); ++j) 42 | { 43 | PTXBlockStatistic stat; 44 | 45 | stat.num_insts = blocks[j]["num_insts"].asUInt(); 46 | stat.num_unknown = blocks[j]["num_unknown"].asUInt(); 47 | stat.num_sync = blocks[j]["num_sync"].asUInt(); 48 | stat.num_bra = blocks[j]["num_bra"].asUInt(); 49 | stat.num_stat[I_ADD_SUB] = blocks[j]["I_ADD_SUB"].asUInt(); 50 | stat.num_stat[I_MAD_MUL] = blocks[j]["I_MAD_MUL"].asUInt(); 51 | stat.num_stat[I_DIV_REM] = blocks[j]["I_DIV_REM"].asUInt(); 52 | stat.num_stat[I_MIN_MAX] = blocks[j]["I_MIN_MAX"].asUInt(); 53 | stat.num_stat[I_ABS] = blocks[j]["I_ABS"].asUInt(); 54 | stat.num_stat[I_MUL24] = blocks[j]["I_MUL24"].asUInt(); 55 | stat.num_stat[I_LOGICAL] = blocks[j]["I_LOGICAL"].asUInt(); 56 | stat.num_stat[I_SHL_SHR] = blocks[j]["I_SHL_SHR"].asUInt(); 57 | stat.num_stat[I_SAD] = blocks[j]["I_SAD"].asUInt(); 58 | stat.num_stat[F_ADD_SUB] = blocks[j]["F_ADD_SUB"].asUInt(); 59 | stat.num_stat[F_MAD_MUL] = blocks[j]["F_MAD_MUL"].asUInt(); 60 | stat.num_stat[F_DIV] = blocks[j]["F_DIV"].asUInt(); 61 | stat.num_stat[F_DIVIDEF] = blocks[j]["F_DIVIDEF"].asUInt(); 62 | stat.num_stat[F_EXP2] = blocks[j]["F_EXP2"].asUInt(); 63 | stat.num_stat[F_LOG2] = blocks[j]["F_LOG2"].asUInt(); 64 | stat.num_stat[F_SIN_COS] = blocks[j]["F_SIN_COS"].asUInt(); 65 | stat.num_stat[F_SQRT] = blocks[j]["F_SQRT"].asUInt(); 66 | stat.num_stat[F_RSQRT] = blocks[j]["F_RSQRT"].asUInt(); 67 | stat.num_stat[F_MIN_MAX] = blocks[j]["F_MIN_MAX"].asUInt(); 68 | stat.num_stat[F_RCP] = blocks[j]["F_RCP"].asUInt(); 69 | stat.num_mem[GLOBAL_ACC] = blocks[j]["GLOBAL_ACC"].asUInt(); 70 | stat.num_mem[SHARED_ACC] = blocks[j]["SHARED_ACC"].asUInt(); 71 | stat.num_mem[LOCAL_ACC] = blocks[j]["LOCAL_ACC"].asUInt(); 72 | stat.num_mem[CONST_ACC] = blocks[j]["CONST_ACC"].asUInt(); 73 | 74 | output[j] = stat; 75 | } 76 | 77 | break; 78 | } 79 | } 80 | 81 | // set recommendation 82 | if (num_block == 7) // SMS 83 | { 84 | output[0].recommended_times = 1; 85 | output[1].recommended_times = 27 / 2; 86 | output[2].recommended_times = 1; 87 | output[3].recommended_times = NO_RECOMMENDATION; 88 | output[4].recommended_times = NO_RECOMMENDATION; 89 | output[5].recommended_times = NO_RECOMMENDATION; 90 | output[6].recommended_times = 1; 91 | } 92 | 93 | if (num_block == 5) // traditional 94 | { 95 | output[0].recommended_times = 1; 96 | output[1].recommended_times = 27; 97 | output[2].recommended_times = NO_RECOMMENDATION; 98 | output[3].recommended_times = 27; 99 | output[4].recommended_times = 1; 100 | } 101 | } 102 | 103 | return num_block; 104 | } 105 | 106 | void readInstructionLatencyFromFile(InstructionInfo &inst_info, const std::string &file_name) 107 | { 108 | std::ifstream input(file_name, std::ios::binary); 109 | if (!input.is_open()) 110 | { 111 | std::cout << "can not open " << file_name << std::endl; 112 | return; 113 | } 114 | 115 | Json::Reader js_reader; 116 | Json::Value root; 117 | 118 | if (js_reader.parse(input, root)) 119 | { 120 | inst_info[I_ADD_SUB] = root["I_ADD_SUB"].asFloat(); 121 | inst_info[I_MAD_MUL] = root["I_MAD_MUL"].asFloat(); 122 | inst_info[I_DIV_REM] = root["I_DIV_REM"].asFloat(); 123 | inst_info[I_MIN_MAX] = root["I_MIN_MAX"].asFloat(); 124 | inst_info[I_ABS] = root["I_ABS"].asFloat(); 125 | inst_info[I_MUL24] = root["I_MUL24"].asFloat(); 126 | inst_info[I_LOGICAL] = root["I_LOGICAL"].asFloat(); 127 | inst_info[I_SHL_SHR] = root["I_SHL_SHR"].asFloat(); 128 | inst_info[I_SAD] = root["I_SAD"].asFloat(); 129 | inst_info[F_ADD_SUB] = root["F_ADD_SUB"].asFloat(); 130 | inst_info[F_MAD_MUL] = root["F_MAD_MUL"].asFloat(); 131 | inst_info[F_DIV] = root["F_DIV"].asFloat(); 132 | inst_info[F_DIVIDEF] = root["F_DIVIDEF"].asFloat(); 133 | inst_info[F_EXP2] = root["F_EXP2"].asFloat(); 134 | inst_info[F_LOG2] = root["F_LOG2"].asFloat(); 135 | inst_info[F_SIN_COS] = root["F_SIN_COS"].asFloat(); 136 | inst_info[F_SQRT] = root["F_SQRT"].asFloat(); 137 | inst_info[F_RSQRT] = root["F_RSQRT"].asFloat(); 138 | inst_info[F_MIN_MAX] = root["F_MIN_MAX"].asFloat(); 139 | inst_info[F_RCP] = root["F_RCP"].asFloat(); 140 | } 141 | } 142 | 143 | } -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/json/config.h: -------------------------------------------------------------------------------- 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 2 | // Distributed under MIT license, or public domain if desired and 3 | // recognized in your jurisdiction. 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 | 6 | #ifndef JSON_CONFIG_H_INCLUDED 7 | #define JSON_CONFIG_H_INCLUDED 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | /// If defined, indicates that json library is embedded in CppTL library. 18 | //# define JSON_IN_CPPTL 1 19 | 20 | /// If defined, indicates that json may leverage CppTL library 21 | //# define JSON_USE_CPPTL 1 22 | /// If defined, indicates that cpptl vector based map should be used instead of 23 | /// std::map 24 | /// as Value container. 25 | //# define JSON_USE_CPPTL_SMALLMAP 1 26 | 27 | // If non-zero, the library uses exceptions to report bad input instead of C 28 | // assertion macros. The default is to use exceptions. 29 | #ifndef JSON_USE_EXCEPTION 30 | #define JSON_USE_EXCEPTION 1 31 | #endif 32 | 33 | // Temporary, tracked for removal with issue #982. 34 | #ifndef JSON_USE_NULLREF 35 | #define JSON_USE_NULLREF 1 36 | #endif 37 | 38 | /// If defined, indicates that the source file is amalgamated 39 | /// to prevent private header inclusion. 40 | /// Remarks: it is automatically defined in the generated amalgamated header. 41 | // #define JSON_IS_AMALGAMATION 42 | 43 | #ifdef JSON_IN_CPPTL 44 | #include 45 | #ifndef JSON_USE_CPPTL 46 | #define JSON_USE_CPPTL 1 47 | #endif 48 | #endif 49 | 50 | #ifdef JSON_IN_CPPTL 51 | #define JSON_API CPPTL_API 52 | #elif defined(JSON_DLL_BUILD) 53 | #if defined(_MSC_VER) || defined(__MINGW32__) 54 | #define JSON_API __declspec(dllexport) 55 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING 56 | #elif defined(__GNUC__) || defined(__clang__) 57 | #define JSON_API __attribute__((visibility("default"))) 58 | #endif // if defined(_MSC_VER) 59 | #elif defined(JSON_DLL) 60 | #if defined(_MSC_VER) || defined(__MINGW32__) 61 | #define JSON_API __declspec(dllimport) 62 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING 63 | #endif // if defined(_MSC_VER) 64 | #endif // ifdef JSON_IN_CPPTL 65 | #if !defined(JSON_API) 66 | #define JSON_API 67 | #endif 68 | 69 | #if defined(_MSC_VER) && _MSC_VER < 1800 70 | #error \ 71 | "ERROR: Visual Studio 12 (2013) with _MSC_VER=1800 is the oldest supported compiler with sufficient C++11 capabilities" 72 | #endif 73 | 74 | #if defined(_MSC_VER) && _MSC_VER < 1900 75 | // As recommended at 76 | // https://stackoverflow.com/questions/2915672/snprintf-and-visual-studio-2010 77 | extern JSON_API int 78 | msvc_pre1900_c99_snprintf(char* outBuf, size_t size, const char* format, ...); 79 | #define jsoncpp_snprintf msvc_pre1900_c99_snprintf 80 | #else 81 | #define jsoncpp_snprintf std::snprintf 82 | #endif 83 | 84 | // If JSON_NO_INT64 is defined, then Json only support C++ "int" type for 85 | // integer 86 | // Storages, and 64 bits integer support is disabled. 87 | // #define JSON_NO_INT64 1 88 | 89 | // JSONCPP_OVERRIDE is maintained for backwards compatibility of external tools. 90 | // C++11 should be used directly in JSONCPP. 91 | #define JSONCPP_OVERRIDE override 92 | 93 | #if __cplusplus >= 201103L 94 | #define JSONCPP_NOEXCEPT noexcept 95 | #define JSONCPP_OP_EXPLICIT explicit 96 | #elif defined(_MSC_VER) && _MSC_VER < 1900 97 | #define JSONCPP_NOEXCEPT throw() 98 | #define JSONCPP_OP_EXPLICIT explicit 99 | #elif defined(_MSC_VER) && _MSC_VER >= 1900 100 | #define JSONCPP_NOEXCEPT noexcept 101 | #define JSONCPP_OP_EXPLICIT explicit 102 | #else 103 | #define JSONCPP_NOEXCEPT throw() 104 | #define JSONCPP_OP_EXPLICIT 105 | #endif 106 | 107 | #ifdef __clang__ 108 | #if __has_extension(attribute_deprecated_with_message) 109 | #define JSONCPP_DEPRECATED(message) __attribute__((deprecated(message))) 110 | #endif 111 | #elif defined __GNUC__ // not clang (gcc comes later since clang emulates gcc) 112 | #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) 113 | #define JSONCPP_DEPRECATED(message) __attribute__((deprecated(message))) 114 | #elif (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 115 | #define JSONCPP_DEPRECATED(message) __attribute__((__deprecated__)) 116 | #endif // GNUC version 117 | #elif defined(_MSC_VER) // MSVC (after clang because clang on Windows emulates 118 | // MSVC) 119 | #define JSONCPP_DEPRECATED(message) __declspec(deprecated(message)) 120 | #endif // __clang__ || __GNUC__ || _MSC_VER 121 | 122 | #if !defined(JSONCPP_DEPRECATED) 123 | #define JSONCPP_DEPRECATED(message) 124 | #endif // if !defined(JSONCPP_DEPRECATED) 125 | 126 | #if __GNUC__ >= 6 127 | #define JSON_USE_INT64_DOUBLE_CONVERSION 1 128 | #endif 129 | 130 | #if !defined(JSON_IS_AMALGAMATION) 131 | 132 | #include "allocator.h" 133 | #include "version.h" 134 | 135 | #endif // if !defined(JSON_IS_AMALGAMATION) 136 | 137 | namespace Json { 138 | typedef int Int; 139 | typedef unsigned int UInt; 140 | #if defined(JSON_NO_INT64) 141 | typedef int LargestInt; 142 | typedef unsigned int LargestUInt; 143 | #undef JSON_HAS_INT64 144 | #else // if defined(JSON_NO_INT64) 145 | // For Microsoft Visual use specific types as long long is not supported 146 | #if defined(_MSC_VER) // Microsoft Visual Studio 147 | typedef __int64 Int64; 148 | typedef unsigned __int64 UInt64; 149 | #else // if defined(_MSC_VER) // Other platforms, use long long 150 | typedef int64_t Int64; 151 | typedef uint64_t UInt64; 152 | #endif // if defined(_MSC_VER) 153 | typedef Int64 LargestInt; 154 | typedef UInt64 LargestUInt; 155 | #define JSON_HAS_INT64 156 | #endif // if defined(JSON_NO_INT64) 157 | 158 | template 159 | using Allocator = typename std::conditional, 161 | std::allocator>::type; 162 | using String = std::basic_string, Allocator>; 163 | using IStringStream = std::basic_istringstream; 166 | using OStringStream = std::basic_ostringstream; 169 | using IStream = std::istream; 170 | using OStream = std::ostream; 171 | } // namespace Json 172 | 173 | // Legacy names (formerly macros). 174 | using JSONCPP_STRING = Json::String; 175 | using JSONCPP_ISTRINGSTREAM = Json::IStringStream; 176 | using JSONCPP_OSTRINGSTREAM = Json::OStringStream; 177 | using JSONCPP_ISTREAM = Json::IStream; 178 | using JSONCPP_OSTREAM = Json::OStream; 179 | 180 | #endif // JSON_CONFIG_H_INCLUDED 181 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_kernel.cuh: -------------------------------------------------------------------------------- 1 | // 2 | // sph_kernel.cuh 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by kmhuang and ruanjm on 2018/09/01 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved. 7 | // 8 | 9 | #ifndef _SPH_KERNEL_CUH 10 | #define _SPH_KERNEL_CUH 11 | 12 | #include 13 | #include "sph_parameter.h" 14 | #include "sph_particle.h" 15 | #include "pcisph_factor.h" 16 | 17 | namespace sph 18 | { 19 | 20 | struct ParticleIdxRange // [begin, end), zero-based numbering 21 | { 22 | __host__ __device__ 23 | ParticleIdxRange(){} 24 | __host__ __device__ 25 | ParticleIdxRange(int b, int e) : begin(b), end(e) {} 26 | int begin, end; 27 | }; 28 | 29 | 30 | void BuffInit(ParticleBufferList buff_list_n, int nm); 31 | 32 | void transSysParaToDevice(const SystemParameter *host_para); 33 | 34 | void initializeKernel(); 35 | 36 | void releaseKernel(); 37 | 38 | void find_max_P(int blocks, int tds, sumGrad *id_value, int numbers); 39 | 40 | 41 | 42 | void computeMixDensityTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num); 43 | 44 | void computeDriftVelocityTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num); 45 | 46 | void computeVolumeFracTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num); 47 | 48 | void computeAccelTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num); 49 | 50 | void advanceMix(ParticleBufferList buff_list, int nump); 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | void computeDensityTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num); 68 | 69 | void computeForceTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num); 70 | 71 | void computeDensitySMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 72 | 73 | void computeDensitySMS64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 74 | void computeDensityHybrid128n(int *cell_offset_M, ParticleIdxRange range, ParticleBufferList buff_list_n, int* cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 75 | 76 | void computeForceHybrid128n(int *cell_offset_M, ParticleIdxRange range, ParticleBufferList buff_list_n, int* cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 77 | //void computeDensityHybrid128n(ParticleIdxRange range, ParticleBufferList buff_list_n, int* cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 78 | 79 | //void computeForceHybrid128n(ParticleIdxRange range, ParticleBufferList buff_list_n, int* cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 80 | 81 | void computeForceSMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 82 | 83 | void computeForceSMS64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 84 | 85 | void computeOtherForceSMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 86 | 87 | void computeOtherForceTRAS(ParticleBufferList buff_list, int *cell_offset, int *cell_number, BlockTask *block_task, int num_block); 88 | 89 | void computeOtherForceSMS64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 90 | 91 | void computeOtherForceHybrid(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 92 | 93 | 94 | void computeOtherForceHybrid128(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 95 | 96 | void computeOtherForceHybrid128n(ParticleIdxRange range, ParticleBufferList buff_list_n, int* cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 97 | 98 | void computeOtherForceTRA(ParticleBufferList buff_list, ParticleIdxRange range, int *cell_offset, int *cell_num); 99 | 100 | void manualSetting(ParticleBufferList buff_list, int nump, int step); 101 | 102 | void advance(ParticleBufferList buff_list, int nump); 103 | void advanceWave(ParticleBufferList buff_list, int nump, float time); 104 | //sf pcisph----------------------- 105 | 106 | void advancePCI(ParticleBufferList buff_list, int nump); 107 | 108 | float computeDensityErrorFactorTRA(float mass, float rest_density, float time_step, ParticleBufferList buff_list, int *cell_offset, int *cell_num, uint nump); 109 | 110 | 111 | void computeGradWValuesSimpleSMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, sumGrad *particle_device); 112 | 113 | void computeGradWValuesSimpleTRA(ParticleBufferList buff_list, int *cell_offset, int *cell_num, ParticleIdxRange range, sumGrad *particle_device); 114 | 115 | void predictionCorrectionStepSMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, 116 | float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float pcisph_max_density_error_allowed); 117 | 118 | 119 | 120 | 121 | 122 | void predictionCorrectionStepTRAS(ParticleBufferList buff_list, int *cell_offset, int *cell_number, BlockTask *block_task, int num_block 123 | , float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float pcisph_max_density_error_allowed); 124 | 125 | 126 | 127 | 128 | 129 | void predictionCorrectionStepSMS64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, 130 | float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float pcisph_max_density_error_allowed); 131 | 132 | 133 | void predictionCorrectionStepHybrid(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, 134 | float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float pcisph_max_density_error_allowed, ParticleIdxRange range); 135 | 136 | void predictionCorrectionStepHybrid128(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, 137 | float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float pcisph_max_density_error_allowed, ParticleIdxRange range); 138 | 139 | void predictionCorrectionStepHybrid128n(ParticleBufferList buff_list_n,int *cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, 140 | float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float pcisph_max_density_error_allowed, ParticleIdxRange range); 141 | 142 | void predictionCorrectionStepTRA(ParticleBufferList buff_list, int *cell_offset, int *cell_num, 143 | float pcisph_density_factor, unsigned int nump, int pcisph_min_loop, int pcisph_max_loop, float pcisph_max_density_error_allowed, ParticleIdxRange range); 144 | 145 | 146 | 147 | void predictPositionAndVelocity(ParticleBufferList buff_list, unsigned int nump); 148 | void computePredictedDensityAndPressureSMS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor); 149 | 150 | 151 | 152 | void computePredictedDensityAndPressureTRAS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor); 153 | 154 | 155 | 156 | void computePredictedDensityAndPressureSMS64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor); 157 | 158 | void computePredictedDensityAndPressureHybrid(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor); 159 | 160 | void computePredictedDensityAndPressureHybrid128(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor); 161 | 162 | void computePredictedDensityAndPressureHybrid128n(ParticleIdxRange range, ParticleBufferList buff_list_n, int *cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block, float pcisph_density_factor); 163 | void computePredictedDensityAndPressureTRA(ParticleBufferList buff_list, int *cell_offset, int *cell_num, ParticleIdxRange range, float pcisph_density_factor); 164 | 165 | 166 | 167 | 168 | void getMaxPredictedDensityCUDA(ParticleBufferList buff_list, float& max_predicted_density, unsigned int nump); 169 | void computeCorrectivePressureForce(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 170 | 171 | 172 | void computeCorrectivePressureForceTRAS(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 173 | 174 | 175 | 176 | void computeCorrectivePressureForce64(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 177 | 178 | void computeCorrectivePressureForceHybrid(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 179 | 180 | void computeCorrectivePressureForceHybrid128(ParticleIdxRange range, ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 181 | void computeCorrectivePressureForceHybrid128n(ParticleIdxRange range, ParticleBufferList buff_list_n, int *cindex, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 182 | 183 | void computeCorrectivePressureForceTRA(ParticleBufferList buff_list, int *cell_offset, int *cell_num, ParticleIdxRange range); 184 | 185 | //sf heat conduction------------------- 186 | void computeHeatFlux(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 187 | 188 | void computeTemperatureAndPhaseTransAndGetVis(ParticleBufferList buff_list, int *cell_offset, int *cell_num, BlockTask *block_task, int num_block); 189 | 190 | } 191 | 192 | #endif/*_SPH_KERNEL_CUH*/ 193 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_tra_arti_block_statistics.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "block_array" : [ 4 | { 5 | "CONST_ACC" : 11, 6 | "F_ADD_SUB" : 0, 7 | "F_DIV" : 3, 8 | "F_DIVIDEF" : 0, 9 | "F_EXP2" : 0, 10 | "F_LOG2" : 0, 11 | "F_MAD_MUL" : 0, 12 | "F_MIN_MAX" : 0, 13 | "F_RCP" : 0, 14 | "F_RSQRT" : 0, 15 | "F_SIN_COS" : 0, 16 | "F_SQRT" : 0, 17 | "GLOBAL_ACC" : 5, 18 | "I_ABS" : 0, 19 | "I_ADD_SUB" : 7, 20 | "I_DIV_REM" : 0, 21 | "I_LOGICAL" : 0, 22 | "I_MAD_MUL" : 3, 23 | "I_MIN_MAX" : 0, 24 | "I_MUL24" : 1, 25 | "I_SAD" : 0, 26 | "I_SHL_SHR" : 1, 27 | "LOCAL_ACC" : 0, 28 | "SHARED_ACC" : 0, 29 | "begin" : 0, 30 | "end" : 55, 31 | "name" : "arti - [0, 55]", 32 | "num_bra" : 1, 33 | "num_insts" : 52, 34 | "num_sync" : 0, 35 | "num_unknown" : 20 36 | }, 37 | { 38 | "CONST_ACC" : 0, 39 | "F_ADD_SUB" : 0, 40 | "F_DIV" : 0, 41 | "F_DIVIDEF" : 0, 42 | "F_EXP2" : 0, 43 | "F_LOG2" : 0, 44 | "F_MAD_MUL" : 0, 45 | "F_MIN_MAX" : 0, 46 | "F_RCP" : 0, 47 | "F_RSQRT" : 0, 48 | "F_SIN_COS" : 0, 49 | "F_SQRT" : 0, 50 | "GLOBAL_ACC" : 2, 51 | "I_ABS" : 0, 52 | "I_ADD_SUB" : 4, 53 | "I_DIV_REM" : 0, 54 | "I_LOGICAL" : 0, 55 | "I_MAD_MUL" : 1, 56 | "I_MIN_MAX" : 0, 57 | "I_MUL24" : 0, 58 | "I_SAD" : 0, 59 | "I_SHL_SHR" : 1, 60 | "LOCAL_ACC" : 0, 61 | "SHARED_ACC" : 0, 62 | "begin" : 55, 63 | "end" : 81, 64 | "name" : "arti - [55, 81]", 65 | "num_bra" : 2, 66 | "num_insts" : 24, 67 | "num_sync" : 0, 68 | "num_unknown" : 14 69 | }, 70 | { 71 | "CONST_ACC" : 0, 72 | "F_ADD_SUB" : 5, 73 | "F_DIV" : 0, 74 | "F_DIVIDEF" : 0, 75 | "F_EXP2" : 1, 76 | "F_LOG2" : 1, 77 | "F_MAD_MUL" : 4, 78 | "F_MIN_MAX" : 0, 79 | "F_RCP" : 0, 80 | "F_RSQRT" : 0, 81 | "F_SIN_COS" : 0, 82 | "F_SQRT" : 0, 83 | "GLOBAL_ACC" : 3, 84 | "I_ABS" : 0, 85 | "I_ADD_SUB" : 2, 86 | "I_DIV_REM" : 0, 87 | "I_LOGICAL" : 0, 88 | "I_MAD_MUL" : 0, 89 | "I_MIN_MAX" : 0, 90 | "I_MUL24" : 0, 91 | "I_SAD" : 0, 92 | "I_SHL_SHR" : 0, 93 | "LOCAL_ACC" : 0, 94 | "SHARED_ACC" : 0, 95 | "begin" : 81, 96 | "end" : 104, 97 | "name" : "arti - [81, 104]", 98 | "num_bra" : 2, 99 | "num_insts" : 22, 100 | "num_sync" : 0, 101 | "num_unknown" : 4 102 | }, 103 | { 104 | "CONST_ACC" : 0, 105 | "F_ADD_SUB" : 1, 106 | "F_DIV" : 0, 107 | "F_DIVIDEF" : 0, 108 | "F_EXP2" : 0, 109 | "F_LOG2" : 0, 110 | "F_MAD_MUL" : 0, 111 | "F_MIN_MAX" : 0, 112 | "F_RCP" : 0, 113 | "F_RSQRT" : 0, 114 | "F_SIN_COS" : 0, 115 | "F_SQRT" : 0, 116 | "GLOBAL_ACC" : 0, 117 | "I_ABS" : 0, 118 | "I_ADD_SUB" : 1, 119 | "I_DIV_REM" : 0, 120 | "I_LOGICAL" : 0, 121 | "I_MAD_MUL" : 0, 122 | "I_MIN_MAX" : 0, 123 | "I_MUL24" : 0, 124 | "I_SAD" : 0, 125 | "I_SHL_SHR" : 0, 126 | "LOCAL_ACC" : 0, 127 | "SHARED_ACC" : 0, 128 | "begin" : 104, 129 | "end" : 109, 130 | "name" : "arti - [104, 109]", 131 | "num_bra" : 0, 132 | "num_insts" : 4, 133 | "num_sync" : 0, 134 | "num_unknown" : 2 135 | }, 136 | { 137 | "CONST_ACC" : 5, 138 | "F_ADD_SUB" : 1, 139 | "F_DIV" : 0, 140 | "F_DIVIDEF" : 1, 141 | "F_EXP2" : 1, 142 | "F_LOG2" : 1, 143 | "F_MAD_MUL" : 4, 144 | "F_MIN_MAX" : 0, 145 | "F_RCP" : 0, 146 | "F_RSQRT" : 0, 147 | "F_SIN_COS" : 0, 148 | "F_SQRT" : 0, 149 | "GLOBAL_ACC" : 2, 150 | "I_ABS" : 0, 151 | "I_ADD_SUB" : 4, 152 | "I_DIV_REM" : 0, 153 | "I_LOGICAL" : 0, 154 | "I_MAD_MUL" : 0, 155 | "I_MIN_MAX" : 0, 156 | "I_MUL24" : 0, 157 | "I_SAD" : 0, 158 | "I_SHL_SHR" : 1, 159 | "LOCAL_ACC" : 0, 160 | "SHARED_ACC" : 0, 161 | "begin" : 109, 162 | "end" : 136, 163 | "name" : "arti - [109, 136]", 164 | "num_bra" : 3, 165 | "num_insts" : 26, 166 | "num_sync" : 0, 167 | "num_unknown" : 3 168 | } 169 | ], 170 | "function_name" : "_Z21knBmComputeDensityTRAN3SPH18ParticleBufferListEPjS1_jS1_" 171 | }, 172 | { 173 | "block_array" : [ 174 | { 175 | "CONST_ACC" : 11, 176 | "F_ADD_SUB" : 0, 177 | "F_DIV" : 3, 178 | "F_DIVIDEF" : 0, 179 | "F_EXP2" : 0, 180 | "F_LOG2" : 0, 181 | "F_MAD_MUL" : 0, 182 | "F_MIN_MAX" : 0, 183 | "F_RCP" : 0, 184 | "F_RSQRT" : 0, 185 | "F_SIN_COS" : 0, 186 | "F_SQRT" : 0, 187 | "GLOBAL_ACC" : 9, 188 | "I_ABS" : 0, 189 | "I_ADD_SUB" : 10, 190 | "I_DIV_REM" : 0, 191 | "I_LOGICAL" : 0, 192 | "I_MAD_MUL" : 3, 193 | "I_MIN_MAX" : 0, 194 | "I_MUL24" : 1, 195 | "I_SAD" : 0, 196 | "I_SHL_SHR" : 2, 197 | "LOCAL_ACC" : 0, 198 | "SHARED_ACC" : 0, 199 | "begin" : 0, 200 | "end" : 70, 201 | "name" : "arti - [0, 70]", 202 | "num_bra" : 1, 203 | "num_insts" : 67, 204 | "num_sync" : 0, 205 | "num_unknown" : 27 206 | }, 207 | { 208 | "CONST_ACC" : 4, 209 | "F_ADD_SUB" : 0, 210 | "F_DIV" : 0, 211 | "F_DIVIDEF" : 0, 212 | "F_EXP2" : 0, 213 | "F_LOG2" : 0, 214 | "F_MAD_MUL" : 0, 215 | "F_MIN_MAX" : 0, 216 | "F_RCP" : 0, 217 | "F_RSQRT" : 0, 218 | "F_SIN_COS" : 0, 219 | "F_SQRT" : 0, 220 | "GLOBAL_ACC" : 2, 221 | "I_ABS" : 0, 222 | "I_ADD_SUB" : 6, 223 | "I_DIV_REM" : 0, 224 | "I_LOGICAL" : 0, 225 | "I_MAD_MUL" : 1, 226 | "I_MIN_MAX" : 0, 227 | "I_MUL24" : 0, 228 | "I_SAD" : 0, 229 | "I_SHL_SHR" : 2, 230 | "LOCAL_ACC" : 0, 231 | "SHARED_ACC" : 0, 232 | "begin" : 70, 233 | "end" : 117, 234 | "name" : "arti - [70, 117]", 235 | "num_bra" : 3, 236 | "num_insts" : 46, 237 | "num_sync" : 0, 238 | "num_unknown" : 28 239 | }, 240 | { 241 | "CONST_ACC" : 6, 242 | "F_ADD_SUB" : 13, 243 | "F_DIV" : 0, 244 | "F_DIVIDEF" : 1, 245 | "F_EXP2" : 1, 246 | "F_LOG2" : 1, 247 | "F_MAD_MUL" : 24, 248 | "F_MIN_MAX" : 0, 249 | "F_RCP" : 1, 250 | "F_RSQRT" : 0, 251 | "F_SIN_COS" : 0, 252 | "F_SQRT" : 1, 253 | "GLOBAL_ACC" : 8, 254 | "I_ABS" : 0, 255 | "I_ADD_SUB" : 8, 256 | "I_DIV_REM" : 0, 257 | "I_LOGICAL" : 0, 258 | "I_MAD_MUL" : 0, 259 | "I_MIN_MAX" : 0, 260 | "I_MUL24" : 0, 261 | "I_SAD" : 0, 262 | "I_SHL_SHR" : 0, 263 | "LOCAL_ACC" : 0, 264 | "SHARED_ACC" : 0, 265 | "begin" : 117, 266 | "end" : 191, 267 | "name" : "arti - [117, 191]", 268 | "num_bra" : 1, 269 | "num_insts" : 73, 270 | "num_sync" : 0, 271 | "num_unknown" : 8 272 | }, 273 | { 274 | "CONST_ACC" : 0, 275 | "F_ADD_SUB" : 3, 276 | "F_DIV" : 0, 277 | "F_DIVIDEF" : 0, 278 | "F_EXP2" : 0, 279 | "F_LOG2" : 0, 280 | "F_MAD_MUL" : 0, 281 | "F_MIN_MAX" : 0, 282 | "F_RCP" : 0, 283 | "F_RSQRT" : 0, 284 | "F_SIN_COS" : 0, 285 | "F_SQRT" : 0, 286 | "GLOBAL_ACC" : 0, 287 | "I_ABS" : 0, 288 | "I_ADD_SUB" : 1, 289 | "I_DIV_REM" : 0, 290 | "I_LOGICAL" : 0, 291 | "I_MAD_MUL" : 0, 292 | "I_MIN_MAX" : 0, 293 | "I_MUL24" : 0, 294 | "I_SAD" : 0, 295 | "I_SHL_SHR" : 0, 296 | "LOCAL_ACC" : 0, 297 | "SHARED_ACC" : 0, 298 | "begin" : 191, 299 | "end" : 203, 300 | "name" : "arti - [191, 203]", 301 | "num_bra" : 1, 302 | "num_insts" : 10, 303 | "num_sync" : 0, 304 | "num_unknown" : 5 305 | }, 306 | { 307 | "CONST_ACC" : 8, 308 | "F_ADD_SUB" : 0, 309 | "F_DIV" : 0, 310 | "F_DIVIDEF" : 2, 311 | "F_EXP2" : 0, 312 | "F_LOG2" : 0, 313 | "F_MAD_MUL" : 21, 314 | "F_MIN_MAX" : 0, 315 | "F_RCP" : 0, 316 | "F_RSQRT" : 0, 317 | "F_SIN_COS" : 0, 318 | "F_SQRT" : 1, 319 | "GLOBAL_ACC" : 5, 320 | "I_ABS" : 0, 321 | "I_ADD_SUB" : 8, 322 | "I_DIV_REM" : 0, 323 | "I_LOGICAL" : 0, 324 | "I_MAD_MUL" : 1, 325 | "I_MIN_MAX" : 0, 326 | "I_MUL24" : 2, 327 | "I_SAD" : 0, 328 | "I_SHL_SHR" : 1, 329 | "LOCAL_ACC" : 0, 330 | "SHARED_ACC" : 0, 331 | "begin" : 203, 332 | "end" : 276, 333 | "name" : "arti - [203, 276]", 334 | "num_bra" : 4, 335 | "num_insts" : 71, 336 | "num_sync" : 0, 337 | "num_unknown" : 18 338 | } 339 | ], 340 | "function_name" : "_Z19knBmComputeForceTRAN3SPH18ParticleBufferListEPjS1_jS1_" 341 | } 342 | ] 343 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/cuda_prescan/prefix_sum.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2009 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NVIDIA Corporation and its licensors retain all intellectual property and 5 | * proprietary rights in and to this software and related documentation and 6 | * any modifications thereto. Any use, reproduction, disclosure, or distribution 7 | * of this software and related documentation without an express license 8 | * agreement from NVIDIA Corporation is strictly prohibited. 9 | * 10 | */ 11 | 12 | #ifndef _SCAN_BEST_KERNEL_CU_ 13 | #define _SCAN_BEST_KERNEL_CU_ 14 | 15 | // Define this to more rigorously avoid bank conflicts, 16 | // even at the lower (root) levels of the tree 17 | // Note that due to the higher addressing overhead, performance 18 | // is lower with ZERO_BANK_CONFLICTS enabled. It is provided 19 | // as an example. 20 | //#define ZERO_BANK_CONFLICTS 21 | 22 | #define LOG_NUM_BANKS 4 23 | 24 | #ifdef ZERO_BANK_CONFLICTS 25 | #define CONFLICT_FREE_OFFSET(index) ((index) >> LOG_NUM_BANKS + (index) >> (2*LOG_NUM_BANKS)) 26 | #else 27 | #define CONFLICT_FREE_OFFSET(index) ((index) >> LOG_NUM_BANKS) 28 | #endif 29 | 30 | /////////////////////////////////////////////////////////////////////////////// 31 | // Work-efficient compute implementation of scan, one thread per 2 elements 32 | // Work-efficient: O(log(n)) steps, and O(n) adds. 33 | // Also shared storage efficient: Uses n + n/NUM_BANKS shared memory -- no ping-ponging 34 | // Also avoids most bank conflicts using single-element offsets every NUM_BANKS elements. 35 | // 36 | // In addition, If ZERO_BANK_CONFLICTS is defined, uses 37 | // n + n/NUM_BANKS + n/(NUM_BANKS*NUM_BANKS) 38 | // shared memory. If ZERO_BANK_CONFLICTS is defined, avoids ALL bank conflicts using 39 | // single-element offsets every NUM_BANKS elements, plus additional single-element offsets 40 | // after every NUM_BANKS^2 elements. 41 | // 42 | // Uses a balanced tree type algorithm. See Blelloch, 1990 "Prefix Sums 43 | // and Their Applications", or Prins and Chatterjee PRAM course notes: 44 | // https://www.cs.unc.edu/~prins/Classes/633/Handouts/pram.pdf 45 | // 46 | // This work-efficient version is based on the algorithm presented in Guy Blelloch's 47 | // excellent paper "Prefix sums and their applications". 48 | // http://www.cs.cmu.edu/~blelloch/papers/Ble93.pdf 49 | // 50 | // Pro: Work Efficient, very few bank conflicts (or zero if ZERO_BANK_CONFLICTS is defined) 51 | // Con: More instructions to compute bank-conflict-free shared memory addressing, 52 | // and slightly more shared memory storage used. 53 | // 54 | 55 | template __device__ void loadSharedChunkFromMem (float *s_data, const float *g_idata, int n, int baseIndex, int& ai, int& bi, int& mem_ai, int& mem_bi, int& bankOffsetA, int& bankOffsetB ) 56 | { 57 | int thid = threadIdx.x; 58 | mem_ai = baseIndex + threadIdx.x; 59 | mem_bi = mem_ai + blockDim.x; 60 | 61 | ai = thid; 62 | bi = thid + blockDim.x; 63 | bankOffsetA = CONFLICT_FREE_OFFSET(ai); // compute spacing to avoid bank conflicts 64 | bankOffsetB = CONFLICT_FREE_OFFSET(bi); 65 | 66 | s_data[ai + bankOffsetA] = g_idata[mem_ai]; // Cache the computational window in shared memory pad values beyond n with zeros 67 | 68 | if (isNP2) { // compile-time decision 69 | s_data[bi + bankOffsetB] = (bi < n) ? g_idata[mem_bi] : 0; 70 | } else { 71 | s_data[bi + bankOffsetB] = g_idata[mem_bi]; 72 | } 73 | } 74 | 75 | 76 | template __device__ void loadSharedChunkFromMemInt (int *s_data, const int *g_idata, int n, int baseIndex, int& ai, int& bi, int& mem_ai, int& mem_bi, int& bankOffsetA, int& bankOffsetB ) 77 | { 78 | int thid = threadIdx.x; 79 | mem_ai = baseIndex + threadIdx.x; 80 | mem_bi = mem_ai + blockDim.x; 81 | 82 | ai = thid; 83 | bi = thid + blockDim.x; 84 | bankOffsetA = CONFLICT_FREE_OFFSET(ai); // compute spacing to avoid bank conflicts 85 | bankOffsetB = CONFLICT_FREE_OFFSET(bi); 86 | 87 | s_data[ai + bankOffsetA] = g_idata[mem_ai]; // Cache the computational window in shared memory pad values beyond n with zeros 88 | 89 | if (isNP2) { // compile-time decision 90 | s_data[bi + bankOffsetB] = (bi < n) ? g_idata[mem_bi] : 0; 91 | } else { 92 | s_data[bi + bankOffsetB] = g_idata[mem_bi]; 93 | } 94 | } 95 | 96 | template __device__ void storeSharedChunkToMem(float* g_odata, const float* s_data, int n, int ai, int bi, int mem_ai, int mem_bi,int bankOffsetA, int bankOffsetB) 97 | { 98 | __syncthreads(); 99 | 100 | g_odata[mem_ai] = s_data[ai + bankOffsetA]; // write results to global memory 101 | if (isNP2) { // compile-time decision 102 | if (bi < n) g_odata[mem_bi] = s_data[bi + bankOffsetB]; 103 | } else { 104 | g_odata[mem_bi] = s_data[bi + bankOffsetB]; 105 | } 106 | } 107 | template __device__ void storeSharedChunkToMemInt (int* g_odata, const int* s_data, int n, int ai, int bi, int mem_ai, int mem_bi,int bankOffsetA, int bankOffsetB) 108 | { 109 | __syncthreads(); 110 | 111 | g_odata[mem_ai] = s_data[ai + bankOffsetA]; // write results to global memory 112 | if (isNP2) { // compile-time decision 113 | if (bi < n) g_odata[mem_bi] = s_data[bi + bankOffsetB]; 114 | } else { 115 | g_odata[mem_bi] = s_data[bi + bankOffsetB]; 116 | } 117 | } 118 | 119 | 120 | template __device__ void clearLastElement( float* s_data, float *g_blockSums, int blockIndex) 121 | { 122 | if (threadIdx.x == 0) { 123 | int index = (blockDim.x << 1) - 1; 124 | index += CONFLICT_FREE_OFFSET(index); 125 | if (storeSum) { // compile-time decision 126 | // write this block's total sum to the corresponding index in the blockSums array 127 | g_blockSums[blockIndex] = s_data[index]; 128 | } 129 | s_data[index] = 0; // zero the last element in the scan so it will propagate back to the front 130 | } 131 | } 132 | 133 | template __device__ void clearLastElementInt ( int* s_data, int *g_blockSums, int blockIndex) 134 | { 135 | if (threadIdx.x == 0) { 136 | int index = (blockDim.x << 1) - 1; 137 | index += CONFLICT_FREE_OFFSET(index); 138 | if (storeSum) { // compile-time decision 139 | // write this block's total sum to the corresponding index in the blockSums array 140 | g_blockSums[blockIndex] = s_data[index]; 141 | } 142 | s_data[index] = 0; // zero the last element in the scan so it will propagate back to the front 143 | } 144 | } 145 | 146 | 147 | __device__ static unsigned int buildSum(float *s_data) 148 | { 149 | unsigned int thid = threadIdx.x; 150 | unsigned int stride = 1; 151 | 152 | // build the sum in place up the tree 153 | for (int d = blockDim.x; d > 0; d >>= 1) { 154 | __syncthreads(); 155 | 156 | if (thid < d) { 157 | int i = __mul24(__mul24(2, stride), thid); 158 | int ai = i + stride - 1; 159 | int bi = ai + stride; 160 | ai += CONFLICT_FREE_OFFSET(ai); 161 | bi += CONFLICT_FREE_OFFSET(bi); 162 | s_data[bi] += s_data[ai]; 163 | } 164 | stride *= 2; 165 | } 166 | return stride; 167 | } 168 | __device__ static unsigned int buildSumInt(int *s_data) 169 | { 170 | unsigned int thid = threadIdx.x; 171 | unsigned int stride = 1; 172 | 173 | // build the sum in place up the tree 174 | for (int d = blockDim.x; d > 0; d >>= 1) { 175 | __syncthreads(); 176 | if (thid < d) { 177 | int i = __mul24(__mul24(2, stride), thid); 178 | int ai = i + stride - 1; 179 | int bi = ai + stride; 180 | ai += CONFLICT_FREE_OFFSET(ai); 181 | bi += CONFLICT_FREE_OFFSET(bi); 182 | s_data[bi] += s_data[ai]; 183 | } 184 | stride *= 2; 185 | } 186 | return stride; 187 | } 188 | 189 | __device__ static void scanRootToLeaves(float *s_data, unsigned int stride) 190 | { 191 | unsigned int thid = threadIdx.x; 192 | 193 | // traverse down the tree building the scan in place 194 | for (int d = 1; d <= blockDim.x; d *= 2) { 195 | stride >>= 1; 196 | __syncthreads(); 197 | 198 | if (thid < d) { 199 | int i = __mul24(__mul24(2, stride), thid); 200 | int ai = i + stride - 1; 201 | int bi = ai + stride; 202 | ai += CONFLICT_FREE_OFFSET(ai); 203 | bi += CONFLICT_FREE_OFFSET(bi); 204 | float t = s_data[ai]; 205 | s_data[ai] = s_data[bi]; 206 | s_data[bi] += t; 207 | } 208 | } 209 | } 210 | 211 | __device__ static void scanRootToLeavesInt(int *s_data, unsigned int stride) 212 | { 213 | unsigned int thid = threadIdx.x; 214 | 215 | // traverse down the tree building the scan in place 216 | for (int d = 1; d <= blockDim.x; d *= 2) { 217 | stride >>= 1; 218 | __syncthreads(); 219 | 220 | if (thid < d) { 221 | int i = __mul24(__mul24(2, stride), thid); 222 | int ai = i + stride - 1; 223 | int bi = ai + stride; 224 | ai += CONFLICT_FREE_OFFSET(ai); 225 | bi += CONFLICT_FREE_OFFSET(bi); 226 | int t = s_data[ai]; 227 | s_data[ai] = s_data[bi]; 228 | s_data[bi] += t; 229 | } 230 | } 231 | } 232 | 233 | template __device__ void prescanBlock(float *data, int blockIndex, float *blockSums) 234 | { 235 | int stride = buildSum (data); // build the sum in place up the tree 236 | clearLastElement (data, blockSums, (blockIndex == 0) ? blockIdx.x : blockIndex); 237 | scanRootToLeaves (data, stride); // traverse down tree to build the scan 238 | } 239 | template __device__ void prescanBlockInt (int *data, int blockIndex, int *blockSums) 240 | { 241 | int stride = buildSumInt (data); // build the sum in place up the tree 242 | clearLastElementInt (data, blockSums, (blockIndex == 0) ? blockIdx.x : blockIndex); 243 | scanRootToLeavesInt (data, stride); // traverse down tree to build the scan 244 | } 245 | 246 | __global__ static void uniformAdd(float *g_data, float *uniforms, int n, int blockOffset, int baseIndex) 247 | { 248 | __shared__ float uni; 249 | if (threadIdx.x == 0) uni = uniforms[blockIdx.x + blockOffset]; 250 | unsigned int address = __mul24(blockIdx.x, (blockDim.x << 1)) + baseIndex + threadIdx.x; 251 | 252 | __syncthreads(); 253 | // note two adds per thread 254 | g_data[address] += uni; 255 | g_data[address + blockDim.x] += (threadIdx.x + blockDim.x < n) * uni; 256 | } 257 | __global__ static void uniformAddInt(int *g_data, int *uniforms, int n, int blockOffset, int baseIndex) 258 | { 259 | __shared__ int uni; 260 | if (threadIdx.x == 0) uni = uniforms[blockIdx.x + blockOffset]; 261 | unsigned int address = __mul24(blockIdx.x, (blockDim.x << 1)) + baseIndex + threadIdx.x; 262 | 263 | __syncthreads(); 264 | // note two adds per thread 265 | g_data[address] += uni; 266 | g_data[address + blockDim.x] += (threadIdx.x + blockDim.x < n) * uni; 267 | } 268 | 269 | 270 | #endif // #ifndef _SCAN_BEST_KERNEL_CU_ 271 | 272 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/GL_LIB/freeglut_64/include/GL/freeglut_ext.h: -------------------------------------------------------------------------------- 1 | #ifndef __FREEGLUT_EXT_H__ 2 | #define __FREEGLUT_EXT_H__ 3 | 4 | /* 5 | * freeglut_ext.h 6 | * 7 | * The non-GLUT-compatible extensions to the freeglut library include file 8 | * 9 | * Copyright (c) 1999-2000 Pawel W. Olszta. All Rights Reserved. 10 | * Written by Pawel W. Olszta, 11 | * Creation date: Thu Dec 2 1999 12 | * 13 | * Permission is hereby granted, free of charge, to any person obtaining a 14 | * copy of this software and associated documentation files (the "Software"), 15 | * to deal in the Software without restriction, including without limitation 16 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 17 | * and/or sell copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following conditions: 19 | * 20 | * The above copyright notice and this permission notice shall be included 21 | * in all copies or substantial portions of the Software. 22 | * 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 24 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 26 | * PAWEL W. OLSZTA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 27 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 | */ 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | 35 | /* 36 | * Additional GLUT Key definitions for the Special key function 37 | */ 38 | #define GLUT_KEY_NUM_LOCK 0x006D 39 | #define GLUT_KEY_BEGIN 0x006E 40 | #define GLUT_KEY_DELETE 0x006F 41 | #define GLUT_KEY_SHIFT_L 0x0070 42 | #define GLUT_KEY_SHIFT_R 0x0071 43 | #define GLUT_KEY_CTRL_L 0x0072 44 | #define GLUT_KEY_CTRL_R 0x0073 45 | #define GLUT_KEY_ALT_L 0x0074 46 | #define GLUT_KEY_ALT_R 0x0075 47 | 48 | /* 49 | * GLUT API Extension macro definitions -- behaviour when the user clicks on an "x" to close a window 50 | */ 51 | #define GLUT_ACTION_EXIT 0 52 | #define GLUT_ACTION_GLUTMAINLOOP_RETURNS 1 53 | #define GLUT_ACTION_CONTINUE_EXECUTION 2 54 | 55 | /* 56 | * Create a new rendering context when the user opens a new window? 57 | */ 58 | #define GLUT_CREATE_NEW_CONTEXT 0 59 | #define GLUT_USE_CURRENT_CONTEXT 1 60 | 61 | /* 62 | * Direct/Indirect rendering context options (has meaning only in Unix/X11) 63 | */ 64 | #define GLUT_FORCE_INDIRECT_CONTEXT 0 65 | #define GLUT_ALLOW_DIRECT_CONTEXT 1 66 | #define GLUT_TRY_DIRECT_CONTEXT 2 67 | #define GLUT_FORCE_DIRECT_CONTEXT 3 68 | 69 | /* 70 | * GLUT API Extension macro definitions -- the glutGet parameters 71 | */ 72 | #define GLUT_INIT_STATE 0x007C 73 | 74 | #define GLUT_ACTION_ON_WINDOW_CLOSE 0x01F9 75 | 76 | #define GLUT_WINDOW_BORDER_WIDTH 0x01FA 77 | #define GLUT_WINDOW_BORDER_HEIGHT 0x01FB 78 | #define GLUT_WINDOW_HEADER_HEIGHT 0x01FB /* Docs say it should always have been GLUT_WINDOW_BORDER_HEIGHT, keep this for backward compatibility */ 79 | 80 | #define GLUT_VERSION 0x01FC 81 | 82 | #define GLUT_RENDERING_CONTEXT 0x01FD 83 | #define GLUT_DIRECT_RENDERING 0x01FE 84 | 85 | #define GLUT_FULL_SCREEN 0x01FF 86 | 87 | #define GLUT_SKIP_STALE_MOTION_EVENTS 0x0204 88 | 89 | #define GLUT_GEOMETRY_VISUALIZE_NORMALS 0x0205 90 | 91 | #define GLUT_STROKE_FONT_DRAW_JOIN_DOTS 0x0206 /* Draw dots between line segments of stroke fonts? */ 92 | 93 | /* 94 | * New tokens for glutInitDisplayMode. 95 | * Only one GLUT_AUXn bit may be used at a time. 96 | * Value 0x0400 is defined in OpenGLUT. 97 | */ 98 | #define GLUT_AUX 0x1000 99 | 100 | #define GLUT_AUX1 0x1000 101 | #define GLUT_AUX2 0x2000 102 | #define GLUT_AUX3 0x4000 103 | #define GLUT_AUX4 0x8000 104 | 105 | /* 106 | * Context-related flags, see fg_state.c 107 | * Set the requested OpenGL version 108 | */ 109 | #define GLUT_INIT_MAJOR_VERSION 0x0200 110 | #define GLUT_INIT_MINOR_VERSION 0x0201 111 | #define GLUT_INIT_FLAGS 0x0202 112 | #define GLUT_INIT_PROFILE 0x0203 113 | 114 | /* 115 | * Flags for glutInitContextFlags, see fg_init.c 116 | */ 117 | #define GLUT_DEBUG 0x0001 118 | #define GLUT_FORWARD_COMPATIBLE 0x0002 119 | 120 | 121 | /* 122 | * Flags for glutInitContextProfile, see fg_init.c 123 | */ 124 | #define GLUT_CORE_PROFILE 0x0001 125 | #define GLUT_COMPATIBILITY_PROFILE 0x0002 126 | 127 | /* 128 | * Process loop function, see fg_main.c 129 | */ 130 | FGAPI void FGAPIENTRY glutMainLoopEvent( void ); 131 | FGAPI void FGAPIENTRY glutLeaveMainLoop( void ); 132 | FGAPI void FGAPIENTRY glutExit ( void ); 133 | 134 | /* 135 | * Window management functions, see fg_window.c 136 | */ 137 | FGAPI void FGAPIENTRY glutFullScreenToggle( void ); 138 | FGAPI void FGAPIENTRY glutLeaveFullScreen( void ); 139 | 140 | /* 141 | * Menu functions 142 | */ 143 | FGAPI void FGAPIENTRY glutSetMenuFont( int menuID, void* font ); 144 | 145 | /* 146 | * Window-specific callback functions, see fg_callbacks.c 147 | */ 148 | FGAPI void FGAPIENTRY glutMouseWheelFunc( void (* callback)( int, int, int, int ) ); 149 | FGAPI void FGAPIENTRY glutPositionFunc( void (* callback)( int, int ) ); 150 | FGAPI void FGAPIENTRY glutCloseFunc( void (* callback)( void ) ); 151 | FGAPI void FGAPIENTRY glutWMCloseFunc( void (* callback)( void ) ); 152 | /* And also a destruction callback for menus */ 153 | FGAPI void FGAPIENTRY glutMenuDestroyFunc( void (* callback)( void ) ); 154 | 155 | /* 156 | * State setting and retrieval functions, see fg_state.c 157 | */ 158 | FGAPI void FGAPIENTRY glutSetOption ( GLenum option_flag, int value ); 159 | FGAPI int * FGAPIENTRY glutGetModeValues(GLenum mode, int * size); 160 | /* A.Donev: User-data manipulation */ 161 | FGAPI void* FGAPIENTRY glutGetWindowData( void ); 162 | FGAPI void FGAPIENTRY glutSetWindowData(void* data); 163 | FGAPI void* FGAPIENTRY glutGetMenuData( void ); 164 | FGAPI void FGAPIENTRY glutSetMenuData(void* data); 165 | 166 | /* 167 | * Font stuff, see fg_font.c 168 | */ 169 | FGAPI int FGAPIENTRY glutBitmapHeight( void* font ); 170 | FGAPI GLfloat FGAPIENTRY glutStrokeHeight( void* font ); 171 | FGAPI void FGAPIENTRY glutBitmapString( void* font, const unsigned char *string ); 172 | FGAPI void FGAPIENTRY glutStrokeString( void* font, const unsigned char *string ); 173 | 174 | /* 175 | * Geometry functions, see fg_geometry.c 176 | */ 177 | FGAPI void FGAPIENTRY glutWireRhombicDodecahedron( void ); 178 | FGAPI void FGAPIENTRY glutSolidRhombicDodecahedron( void ); 179 | FGAPI void FGAPIENTRY glutWireSierpinskiSponge ( int num_levels, double offset[3], double scale ); 180 | FGAPI void FGAPIENTRY glutSolidSierpinskiSponge ( int num_levels, double offset[3], double scale ); 181 | FGAPI void FGAPIENTRY glutWireCylinder( double radius, double height, GLint slices, GLint stacks); 182 | FGAPI void FGAPIENTRY glutSolidCylinder( double radius, double height, GLint slices, GLint stacks); 183 | 184 | /* 185 | * Rest of functions for rendering Newell's teaset, found in fg_teapot.c 186 | * NB: front facing polygons have clockwise winding, not counter clockwise 187 | */ 188 | FGAPI void FGAPIENTRY glutWireTeacup( double size ); 189 | FGAPI void FGAPIENTRY glutSolidTeacup( double size ); 190 | FGAPI void FGAPIENTRY glutWireTeaspoon( double size ); 191 | FGAPI void FGAPIENTRY glutSolidTeaspoon( double size ); 192 | 193 | /* 194 | * Extension functions, see fg_ext.c 195 | */ 196 | typedef void (*GLUTproc)(); 197 | FGAPI GLUTproc FGAPIENTRY glutGetProcAddress( const char *procName ); 198 | 199 | /* 200 | * Multi-touch/multi-pointer extensions 201 | */ 202 | 203 | #define GLUT_HAS_MULTI 1 204 | 205 | /* TODO: add device_id parameter, 206 | cf. http://sourceforge.net/mailarchive/forum.php?thread_name=20120518071314.GA28061%40perso.beuc.net&forum_name=freeglut-developer */ 207 | FGAPI void FGAPIENTRY glutMultiEntryFunc( void (* callback)( int, int ) ); 208 | FGAPI void FGAPIENTRY glutMultiButtonFunc( void (* callback)( int, int, int, int, int ) ); 209 | FGAPI void FGAPIENTRY glutMultiMotionFunc( void (* callback)( int, int, int ) ); 210 | FGAPI void FGAPIENTRY glutMultiPassiveFunc( void (* callback)( int, int, int ) ); 211 | 212 | /* 213 | * Joystick functions, see fg_joystick.c 214 | */ 215 | /* USE OF THESE FUNCTIONS IS DEPRECATED !!!!! */ 216 | /* If you have a serious need for these functions in your application, please either 217 | * contact the "freeglut" developer community at freeglut-developer@lists.sourceforge.net, 218 | * switch to the OpenGLUT library, or else port your joystick functionality over to PLIB's 219 | * "js" library. 220 | */ 221 | int glutJoystickGetNumAxes( int ident ); 222 | int glutJoystickGetNumButtons( int ident ); 223 | int glutJoystickNotWorking( int ident ); 224 | float glutJoystickGetDeadBand( int ident, int axis ); 225 | void glutJoystickSetDeadBand( int ident, int axis, float db ); 226 | float glutJoystickGetSaturation( int ident, int axis ); 227 | void glutJoystickSetSaturation( int ident, int axis, float st ); 228 | void glutJoystickSetMinRange( int ident, float *axes ); 229 | void glutJoystickSetMaxRange( int ident, float *axes ); 230 | void glutJoystickSetCenter( int ident, float *axes ); 231 | void glutJoystickGetMinRange( int ident, float *axes ); 232 | void glutJoystickGetMaxRange( int ident, float *axes ); 233 | void glutJoystickGetCenter( int ident, float *axes ); 234 | 235 | /* 236 | * Initialization functions, see fg_init.c 237 | */ 238 | /* to get the typedef for va_list */ 239 | #include 240 | FGAPI void FGAPIENTRY glutInitContextVersion( int majorVersion, int minorVersion ); 241 | FGAPI void FGAPIENTRY glutInitContextFlags( int flags ); 242 | FGAPI void FGAPIENTRY glutInitContextProfile( int profile ); 243 | FGAPI void FGAPIENTRY glutInitErrorFunc( void (* callback)( const char *fmt, va_list ap ) ); 244 | FGAPI void FGAPIENTRY glutInitWarningFunc( void (* callback)( const char *fmt, va_list ap ) ); 245 | 246 | /* OpenGL >= 2.0 support */ 247 | FGAPI void FGAPIENTRY glutSetVertexAttribCoord3(GLint attrib); 248 | FGAPI void FGAPIENTRY glutSetVertexAttribNormal(GLint attrib); 249 | FGAPI void FGAPIENTRY glutSetVertexAttribTexCoord2(GLint attrib); 250 | 251 | /* Mobile platforms lifecycle */ 252 | FGAPI void FGAPIENTRY glutInitContextFunc(void (* callback)()); 253 | FGAPI void FGAPIENTRY glutAppStatusFunc(void (* callback)(int)); 254 | /* state flags that can be passed to callback set by glutAppStatusFunc */ 255 | #define GLUT_APPSTATUS_PAUSE 0x0001 256 | #define GLUT_APPSTATUS_RESUME 0x0002 257 | 258 | /* 259 | * GLUT API macro definitions -- the display mode definitions 260 | */ 261 | #define GLUT_CAPTIONLESS 0x0400 262 | #define GLUT_BORDERLESS 0x0800 263 | #define GLUT_SRGB 0x1000 264 | 265 | #ifdef __cplusplus 266 | } 267 | #endif 268 | 269 | /*** END OF FILE ***/ 270 | 271 | #endif /* __FREEGLUT_EXT_H__ */ 272 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/cuda_prescan/scan.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2009 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NVIDIA Corporation and its licensors retain all intellectual property and 5 | * proprietary rights in and to this software and related documentation and 6 | * any modifications thereto. Any use, reproduction, disclosure, or distribution 7 | * of this software and related documentation without an express license 8 | * agreement from NVIDIA Corporation is strictly prohibited. 9 | * 10 | */ 11 | 12 | // includes, kernels 13 | #include "scan.cuh" 14 | #include 15 | #include 16 | #include "..\cuda_call_check.h" 17 | #include "scan_kern.cuh" 18 | 19 | inline bool isPowerOfTwo(int n) { return ((n&(n - 1)) == 0); } 20 | 21 | inline int floorPow2(int n) { 22 | #ifdef WIN32 23 | return 1 << (int)logb((float)n); 24 | #else 25 | int exp; 26 | frexp((float)n, &exp); 27 | return 1 << (exp - 1); 28 | #endif 29 | } 30 | 31 | float** g_scanBlockSums = 0; 32 | int** g_scanBlockSumsInt = 0; 33 | unsigned int g_numEltsAllocated = 0; 34 | unsigned int g_numLevelsAllocated = 0; 35 | 36 | void preallocBlockSums(unsigned int maxNumElements) 37 | { 38 | assert(g_numEltsAllocated == 0); // shouldn't be called 39 | 40 | g_numEltsAllocated = maxNumElements; 41 | unsigned int blockSize = BLOCK_SIZE; // max size of the thread blocks 42 | unsigned int numElts = maxNumElements; 43 | int level = 0; 44 | 45 | do { 46 | unsigned int numBlocks = max(1, (int)ceil((float)numElts / (2.f * blockSize))); 47 | if (numBlocks > 1) level++; 48 | numElts = numBlocks; 49 | } while (numElts > 1); 50 | 51 | g_scanBlockSums = (float**)malloc(level * sizeof(float*)); 52 | g_numLevelsAllocated = level; 53 | 54 | numElts = maxNumElements; 55 | level = 0; 56 | 57 | do { 58 | unsigned int numBlocks = max(1, (int)ceil((float)numElts / (2.f * blockSize))); 59 | if (numBlocks > 1) 60 | CUDA_SAFE_CALL(cudaMalloc((void**)&g_scanBlockSums[level++], numBlocks * sizeof(float))); 61 | numElts = numBlocks; 62 | } while (numElts > 1); 63 | 64 | } 65 | void preallocBlockSumsInt(unsigned int maxNumElements) 66 | { 67 | assert(g_numEltsAllocated == 0); // shouldn't be called 68 | 69 | g_numEltsAllocated = maxNumElements; 70 | unsigned int blockSize = BLOCK_SIZE; // max size of the thread blocks 71 | unsigned int numElts = maxNumElements; 72 | int level = 0; 73 | 74 | do { 75 | unsigned int numBlocks = max(1, (int)ceil((float)numElts / (2.f * blockSize))); 76 | if (numBlocks > 1) level++; 77 | numElts = numBlocks; 78 | } while (numElts > 1); 79 | 80 | g_scanBlockSumsInt = (int**)malloc(level * sizeof(int*)); 81 | g_numLevelsAllocated = level; 82 | 83 | numElts = maxNumElements; 84 | level = 0; 85 | 86 | do { 87 | unsigned int numBlocks = max(1, (int)ceil((float)numElts / (2.f * blockSize))); 88 | if (numBlocks > 1) CUDA_SAFE_CALL(cudaMalloc((void**)&g_scanBlockSumsInt[level++], numBlocks * sizeof(int))); 89 | numElts = numBlocks; 90 | } while (numElts > 1); 91 | } 92 | 93 | void deallocBlockSums() 94 | { 95 | if (g_scanBlockSums != 0x0) { 96 | for (unsigned int i = 0; i < g_numLevelsAllocated; i++) 97 | CUDA_SAFE_CALL(cudaFree(g_scanBlockSums[i])); 98 | 99 | free((void**)g_scanBlockSums); 100 | } 101 | 102 | g_scanBlockSums = 0; 103 | g_numEltsAllocated = 0; 104 | g_numLevelsAllocated = 0; 105 | } 106 | void deallocBlockSumsInt() 107 | { 108 | if (g_scanBlockSums != 0x0) { 109 | for (unsigned int i = 0; i < g_numLevelsAllocated; i++) 110 | CUDA_SAFE_CALL(cudaFree(g_scanBlockSumsInt[i])); 111 | free((void**)g_scanBlockSumsInt); 112 | } 113 | 114 | g_scanBlockSumsInt = 0; 115 | g_numEltsAllocated = 0; 116 | g_numLevelsAllocated = 0; 117 | } 118 | 119 | 120 | 121 | void prescanArrayRecursive(float *outArray, const float *inArray, int numElements, int level) 122 | { 123 | unsigned int blockSize = BLOCK_SIZE; // max size of the thread blocks 124 | unsigned int numBlocks = max(1, (int)ceil((float)numElements / (2.f * blockSize))); 125 | unsigned int numThreads; 126 | 127 | if (numBlocks > 1) 128 | numThreads = blockSize; 129 | else if (isPowerOfTwo(numElements)) 130 | numThreads = numElements / 2; 131 | else 132 | numThreads = floorPow2(numElements); 133 | 134 | unsigned int numEltsPerBlock = numThreads * 2; 135 | 136 | // if this is a non-power-of-2 array, the last block will be non-full 137 | // compute the smallest power of 2 able to compute its scan. 138 | unsigned int numEltsLastBlock = numElements - (numBlocks - 1) * numEltsPerBlock; 139 | unsigned int numThreadsLastBlock = max(1, numEltsLastBlock / 2); 140 | unsigned int np2LastBlock = 0; 141 | unsigned int sharedMemLastBlock = 0; 142 | 143 | if (numEltsLastBlock != numEltsPerBlock) { 144 | np2LastBlock = 1; 145 | if (!isPowerOfTwo(numEltsLastBlock)) numThreadsLastBlock = floorPow2(numEltsLastBlock); 146 | unsigned int extraSpace = (2 * numThreadsLastBlock) / NUM_BANKS; 147 | sharedMemLastBlock = sizeof(float) * (2 * numThreadsLastBlock + extraSpace); 148 | } 149 | 150 | // padding space is used to avoid shared memory bank conflicts 151 | unsigned int extraSpace = numEltsPerBlock / NUM_BANKS; 152 | unsigned int sharedMemSize = sizeof(float) * (numEltsPerBlock + extraSpace); 153 | 154 | #ifdef DEBUG 155 | if (numBlocks > 1) assert(g_numEltsAllocated >= numElements); 156 | #endif 157 | 158 | // setup execution parameters 159 | // if NP2, we process the last block separately 160 | dim3 grid(max(1, numBlocks - np2LastBlock), 1, 1); 161 | dim3 threads(numThreads, 1, 1); 162 | 163 | // execute the scan 164 | if (numBlocks > 1) { 165 | prescan<<>>(outArray, inArray, g_scanBlockSums[level], numThreads * 2, 0, 0); 166 | if (np2LastBlock) { 167 | prescan<<<1, numThreadsLastBlock, sharedMemLastBlock>>>(outArray, inArray, g_scanBlockSums[level], numEltsLastBlock, numBlocks - 1, numElements - numEltsLastBlock); 168 | } 169 | 170 | // After scanning all the sub-blocks, we are mostly done. But now we 171 | // need to take all of the last values of the sub-blocks and scan those. 172 | // This will give us a new value that must be added to each block to 173 | // get the final results. 174 | // recursive (CPU) call 175 | prescanArrayRecursive(g_scanBlockSums[level], g_scanBlockSums[level], numBlocks, level + 1); 176 | 177 | uniformAdd<<>>(outArray, g_scanBlockSums[level], numElements - numEltsLastBlock, 0, 0); 178 | if (np2LastBlock) { 179 | uniformAdd<<<1, numThreadsLastBlock>>>(outArray, g_scanBlockSums[level], numEltsLastBlock, numBlocks - 1, numElements - numEltsLastBlock); 180 | } 181 | } 182 | else if (isPowerOfTwo(numElements)) { 183 | prescan<<>>(outArray, inArray, 0, numThreads * 2, 0, 0); 184 | } 185 | else { 186 | prescan<<>>(outArray, inArray, 0, numElements, 0, 0); 187 | } 188 | } 189 | 190 | void prescanArrayRecursiveInt(int *outArray, const int *inArray, int numElements, int level) 191 | { 192 | unsigned int blockSize = BLOCK_SIZE; // max size of the thread blocks 193 | unsigned int numBlocks = max(1, (int)ceil((float)numElements / (2.f * blockSize))); 194 | unsigned int numThreads; 195 | 196 | if (numBlocks > 1) 197 | numThreads = blockSize; 198 | else if (isPowerOfTwo(numElements)) 199 | numThreads = numElements / 2; 200 | else 201 | numThreads = floorPow2(numElements); 202 | 203 | unsigned int numEltsPerBlock = numThreads * 2; 204 | 205 | // if this is a non-power-of-2 array, the last block will be non-full 206 | // compute the smallest power of 2 able to compute its scan. 207 | unsigned int numEltsLastBlock = numElements - (numBlocks - 1) * numEltsPerBlock; 208 | unsigned int numThreadsLastBlock = max(1, numEltsLastBlock / 2); 209 | unsigned int np2LastBlock = 0; 210 | unsigned int sharedMemLastBlock = 0; 211 | 212 | if (numEltsLastBlock != numEltsPerBlock) { 213 | np2LastBlock = 1; 214 | if (!isPowerOfTwo(numEltsLastBlock)) numThreadsLastBlock = floorPow2(numEltsLastBlock); 215 | unsigned int extraSpace = (2 * numThreadsLastBlock) / NUM_BANKS; 216 | sharedMemLastBlock = sizeof(float) * (2 * numThreadsLastBlock + extraSpace); 217 | } 218 | 219 | // padding space is used to avoid shared memory bank conflicts 220 | unsigned int extraSpace = numEltsPerBlock / NUM_BANKS; 221 | unsigned int sharedMemSize = sizeof(float) * (numEltsPerBlock + extraSpace); 222 | 223 | #ifdef DEBUG 224 | if (numBlocks > 1) assert(g_numEltsAllocated >= numElements); 225 | #endif 226 | 227 | // setup execution parameters 228 | // if NP2, we process the last block separately 229 | dim3 grid(max(1, numBlocks - np2LastBlock), 1, 1); 230 | dim3 threads(numThreads, 1, 1); 231 | 232 | // execute the scan 233 | if (numBlocks > 1) { 234 | prescanInt<<>>(outArray, inArray, g_scanBlockSumsInt[level], numThreads * 2, 0, 0); 235 | if (np2LastBlock) { 236 | prescanInt<<<1, numThreadsLastBlock, sharedMemLastBlock>>>(outArray, inArray, g_scanBlockSumsInt[level], numEltsLastBlock, numBlocks - 1, numElements - numEltsLastBlock); 237 | } 238 | 239 | // After scanning all the sub-blocks, we are mostly done. But now we 240 | // need to take all of the last values of the sub-blocks and scan those. 241 | // This will give us a new value that must be added to each block to 242 | // get the final results. 243 | // recursive (CPU) call 244 | prescanArrayRecursiveInt(g_scanBlockSumsInt[level], g_scanBlockSumsInt[level], numBlocks, level + 1); 245 | 246 | uniformAddInt<<>>(outArray, g_scanBlockSumsInt[level], numElements - numEltsLastBlock, 0, 0); 247 | if (np2LastBlock) { 248 | uniformAddInt<<<1, numThreadsLastBlock>>>(outArray, g_scanBlockSumsInt[level], numEltsLastBlock, numBlocks - 1, numElements - numEltsLastBlock); 249 | } 250 | } 251 | else if (isPowerOfTwo(numElements)) { 252 | prescanInt<<>>(outArray, inArray, 0, numThreads * 2, 0, 0); 253 | } 254 | else { 255 | prescanInt<<>>(outArray, inArray, 0, numElements, 0, 0); 256 | } 257 | } 258 | 259 | 260 | void prescanArray(float *d_odata, float *d_idata, int num) 261 | { 262 | // preform prefix sum 263 | preallocBlockSums(num); 264 | prescanArrayRecursive(d_odata, d_idata, num, 0); 265 | deallocBlockSums(); 266 | } 267 | void prescanArrayInt(int *d_odata, int *d_idata, int num) 268 | { 269 | // preform prefix sum 270 | preallocBlockSumsInt(num); 271 | prescanArrayRecursiveInt(d_odata, d_idata, num, 0); 272 | deallocBlockSumsInt(); 273 | } 274 | 275 | char* d_idata = NULL; 276 | char* d_odata = NULL; 277 | 278 | void prefixSum(int num) 279 | { 280 | prescanArray((float*)d_odata, (float*)d_idata, num); 281 | } 282 | 283 | void prefixSumInt(int num) 284 | { 285 | prescanArrayInt((int*)d_odata, (int*)d_idata, num); 286 | } 287 | 288 | void prefixSumToGPU(char* inArray, int num, int siz) 289 | { 290 | CUDA_SAFE_CALL(cudaMalloc((void**)&d_idata, num*siz)); 291 | CUDA_SAFE_CALL(cudaMalloc((void**)&d_odata, num*siz)); 292 | CUDA_SAFE_CALL(cudaMemcpy(d_idata, inArray, num*siz, cudaMemcpyHostToDevice)); 293 | } 294 | void prefixSumFromGPU(char* outArray, int num, int siz) 295 | { 296 | CUDA_SAFE_CALL(cudaMemcpy(outArray, d_odata, num*siz, cudaMemcpyDeviceToHost)); 297 | CUDA_SAFE_CALL(cudaFree(d_idata)); 298 | CUDA_SAFE_CALL(cudaFree(d_odata)); 299 | d_idata = NULL; 300 | d_odata = NULL; 301 | } 302 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/gpu_model.cu: -------------------------------------------------------------------------------- 1 | // 2 | // gpu_model.cu 3 | // Hybrid_Parallel_SPH 4 | // 5 | // created by kmhuang and ruanjm on 2018/09/01 6 | // Copyright (c) 2019 kmhuang and ruanjm. All rights reserved. 7 | // 8 | 9 | #include "gpu_model.cuh" 10 | #include 11 | #include "cuda_call_check.h" 12 | #include "cuda_math.cuh" 13 | #include "gpu_model.h" 14 | #include "gpu_model_reader.h" 15 | #include "sph_utils.cuh" 16 | 17 | namespace gpu_model 18 | { 19 | 20 | using namespace sph; 21 | 22 | const char *kLatencyFileName = "insts_latency.json"; 23 | const char *kPTXStatisticsFileNameSMS = "sph_sms_arti_block_statistics.json"; 24 | const char *kPTXStatisticsFileNameTRA = "sph_tra_arti_block_statistics.json"; 25 | const char *kFunNameDensitySMS = "_ZN3sph19knComputeDensitySMSENS_18ParticleBufferListEPiS1_PNS_9BlockTaskE"; 26 | const char *kFunNameForceSMS = "_ZN3sph17knComputeForceSMSENS_18ParticleBufferListEPiS1_PNS_9BlockTaskE"; 27 | const char *kFunNameDensityTRA = "_Z21knBmComputeDensityTRAN3SPH18ParticleBufferListEPjS1_jS1_"; 28 | const char *kFunNameForceTRA = "_Z19knBmComputeForceTRAN3SPH18ParticleBufferListEPjS1_jS1_"; 29 | 30 | #define kDensityTRA 0 31 | #define kForceTRA 1 32 | #define kDensitySMS 2 33 | #define kForceSMS 3 34 | 35 | const int kSelfID = 13U; 36 | const int kNumNeighbor = 27U; 37 | //const float v1_32 = 1.f / 32.f; 38 | #define MIN(A, B) (A > B ? B : A) 39 | 40 | const int kDefaultNumThread = 256; 41 | __constant__ GPUDeviceInfo kDevGPUInfo; 42 | __constant__ InstructionInfo kDevInstsLatency; 43 | 44 | struct GPUModel 45 | { 46 | PTXBlockStatistic *bs_tra_density; 47 | PTXBlockStatistic *bs_tra_force; 48 | PTXBlockStatistic *bs_sms_density; 49 | PTXBlockStatistic *bs_sms_force; 50 | 51 | PTXBlockStatistic *static_block; 52 | 53 | KernelRelatedParas *kn_paras; 54 | }; 55 | 56 | /****************************** Kernel ******************************/ 57 | 58 | __global__ 59 | void knCalculateBlockRequirementSMSMode(int *block_req, int *cell_start, int *cell_end, int block_size, int numc) 60 | { 61 | unsigned int idx = threadIdx.x + __umul24(blockDim.x, blockIdx.x); 62 | 63 | if (idx >= numc) return; 64 | 65 | int cs = cell_start[idx]; 66 | int ce = cell_end[idx]; 67 | block_req[idx] = cs == kInvalidCellIdx ? 0 : ceil_int(ce - cs, block_size); 68 | } 69 | 70 | __device__ 71 | float knCalculateAvgLatency(const PTXBlockStatistic &insts_count, float default_lat) 72 | { 73 | float total_time = 0.0f; 74 | for (size_t i = 0; i < ARI_STAT_SIZE; ++i) 75 | { 76 | total_time += (float)insts_count.num_stat[i] * kDevInstsLatency[i]; 77 | } 78 | for (size_t i = 0; i < MEM_STAT_SIZE; ++i) 79 | { 80 | total_time += (float)insts_count.num_mem[i] * default_lat; 81 | } 82 | total_time += (float)insts_count.num_bra * default_lat; 83 | total_time += (float)insts_count.num_unknown * default_lat; 84 | 85 | return total_time / (insts_count.num_insts - insts_count.num_sync); 86 | } 87 | 88 | __device__ 89 | float knCalculateKernelClock(PTXBlockStatistic *blocks, int *times, int num_inst_block, PTXBlockStatistic &basic_block, 90 | KernelRelatedParas &kn_para, float num_uncoal_per_warp, 91 | int num_blocks, int num_warps, int num_sms) 92 | { 93 | PTXBlockStatistic insts_count = basic_block; 94 | for (size_t i = 0; i < num_inst_block; ++i) 95 | { 96 | if (NO_RECOMMENDATION == blocks[i].recommended_times) 97 | { 98 | insts_count += blocks[i] * times[i]; 99 | } 100 | } 101 | 102 | float avg_inst_lat = knCalculateAvgLatency(insts_count, kn_para.default_inst_lat); 103 | float dram_lat = kn_para.dram_lat + (num_uncoal_per_warp - 1) * kn_para.delta; 104 | 105 | // compute ITILP 106 | float itilp_max = 8 / (kDevGPUInfo.warp_size / kDevGPUInfo.simd_width); // Eq.2-5 107 | float itilp = MIN(kDevGPUInfo.ilp * num_warps, itilp_max); // Eq.2-4 108 | 109 | // compute ITMLP 110 | float itmlp = 2; 111 | 112 | // compute execution time 113 | float f_sync = kDevGPUInfo.gamma * dram_lat * insts_count.num_mem[GLOBAL_ACC] / insts_count.num_insts; // Eq.2-8 114 | float o_sync = ((insts_count.num_sync * num_blocks) / num_sms) * f_sync; // Eq.2-7 115 | if (1 >= num_warps) o_sync = 0; 116 | 117 | float w_serial = o_sync; 118 | float w_parallel = (((insts_count.num_insts - insts_count.num_sync) * num_warps) / num_sms) * (avg_inst_lat / itilp); // Eq.2-3 119 | 120 | float t_comp = w_serial + w_parallel; // Eq.2-2 121 | 122 | float t_mem = (insts_count.num_mem[GLOBAL_ACC] * num_warps) / (num_warps * itmlp) * dram_lat; // Eq.2-11 123 | 124 | float t_overlap = MIN(t_comp, t_mem); 125 | 126 | //unsigned int idx = threadIdx.x + __umul24(blockDim.x, blockIdx.x); 127 | //if (idx == 0) 128 | //{ 129 | // //printf("idx: %d, t_comp: %f, t_mem: %f, t_op: %f\n", idx, t_comp, t_mem, t_overlap); 130 | // //printf("avg_inst_lat: %f, dram_lat: %f\n", avg_inst_lat, dram_lat); 131 | // printf("%f, %f, %f\n", kn_para.default_inst_lat, kn_para.delta, kn_para.dram_lat); 132 | //} 133 | 134 | return t_comp + t_mem - t_overlap; 135 | } 136 | 137 | __global__ 138 | void knCalculateBlockRequirementHybridMode(int *cell_type, int *d_cell_num, int *block_req, GPUModel gm, int *cell_offset, int *cell_num, ushort3 grid_size, int block_size) 139 | { 140 | unsigned int idx = threadIdx.x + blockDim.x*blockIdx.x; 141 | int numc = grid_size.x * grid_size.y * grid_size.z; 142 | if (idx >= numc) return; 143 | register int nump_self = d_cell_num[idx]; 144 | 145 | 146 | 147 | register int totaln = nump_self; 148 | register ushort3 self_pos = CellIdx2CellPos(idx, grid_size); 149 | register int nidx = CellPos2CellIdx(self_pos + make_ushort3(-1, 0, 0), grid_size); 150 | if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx]; 151 | nidx = CellPos2CellIdx(self_pos + make_ushort3(1, 0, 0), grid_size); 152 | if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx]; 153 | nidx = CellPos2CellIdx(self_pos + make_ushort3(0, 1, 0), grid_size); 154 | if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx]; 155 | nidx = CellPos2CellIdx(self_pos + make_ushort3(0, -1, 0), grid_size); 156 | if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx]; 157 | nidx = CellPos2CellIdx(self_pos + make_ushort3(0, 0, -1), grid_size); 158 | if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx]; 159 | nidx = CellPos2CellIdx(self_pos + make_ushort3(0, 0, 1), grid_size); 160 | if (kInvalidCellIdx != nidx) totaln += d_cell_num[nidx]; 161 | 162 | 163 | 164 | // block_req[idx] = nump_self < 15 ? 0 : (nump_self + 25) >> 5; 165 | if (totaln < 96 && nump_self < 15){ block_req[idx] = 0; }else if (totaln < 60){ block_req[idx] = (nump_self + 27) >> 5; }else{ block_req[idx] = (nump_self + 31) >> 5; } 166 | // if (totaln < 60){block_req[idx] = (nump_self + 25) >> 5;}else if (totaln < 95 && nump_self < 19){block_req[idx] = 0;}else{block_req[idx] = (nump_self + 31) >> 5;} 167 | // block_req[idx] = (totaln < 95) ? 0 : (nump_self + 31) >> 5; 168 | // block_req[idx] = 0;// (nump_self + 31) >> 5; 169 | } 170 | 171 | /****************************** Interface ******************************/ 172 | 173 | void calculateStaticBlock(PTXBlockStatistic &static_block, PTXBlockStatistic *blocks, size_t num_blocks) 174 | { 175 | std::memset(&static_block, 0, sizeof PTXBlockStatistic); 176 | 177 | for (size_t i = 0; i < num_blocks; ++i) 178 | { 179 | if (NO_RECOMMENDATION != blocks[i].recommended_times) 180 | { 181 | static_block += blocks[i] * blocks[i].recommended_times; 182 | } 183 | } 184 | } 185 | 186 | void setKernelParameters(KernelRelatedParas &kn_para, unsigned int type) 187 | { 188 | switch (type) 189 | { 190 | case kDensityTRA: 191 | kn_para.dram_lat = 250; 192 | kn_para.delta = 0; 193 | kn_para.default_inst_lat = 12; 194 | break; 195 | case kForceTRA: 196 | kn_para.dram_lat = 230; 197 | kn_para.delta = 0; 198 | kn_para.default_inst_lat = 12; 199 | break; 200 | case kDensitySMS: 201 | kn_para.dram_lat = 240; 202 | kn_para.delta = 10; 203 | kn_para.default_inst_lat = 14; 204 | break; 205 | case kForceSMS: 206 | kn_para.dram_lat = 240; 207 | kn_para.delta = 10; 208 | kn_para.default_inst_lat = 10; 209 | break; 210 | default: 211 | break; 212 | } 213 | } 214 | 215 | void allocateGPUModel(GPUModel *&gpu_model) 216 | { 217 | unsigned int data_len; 218 | PTXBlockStatistic *bs_tra_density = nullptr; 219 | PTXBlockStatistic *bs_tra_force = nullptr; 220 | PTXBlockStatistic *bs_sms_density = nullptr; 221 | PTXBlockStatistic *bs_sms_force = nullptr; 222 | PTXBlockStatistic static_block[4]; 223 | KernelRelatedParas kn_paras[4]; 224 | InstructionInfo insts_latency; 225 | GPUDeviceInfo device_info; 226 | 227 | gpu_model = new GPUModel; 228 | 229 | // read block data and transfer to device 230 | data_len = readPTXStatisticsFromFile(bs_tra_density, kFunNameDensityTRA, kPTXStatisticsFileNameTRA); 231 | CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->bs_tra_density), data_len * sizeof(PTXBlockStatistic))); 232 | CUDA_SAFE_CALL(cudaMemcpy(gpu_model->bs_tra_density, bs_tra_density, data_len * sizeof(PTXBlockStatistic), cudaMemcpyHostToDevice)); 233 | data_len = readPTXStatisticsFromFile(bs_tra_force, kFunNameForceTRA, kPTXStatisticsFileNameTRA); 234 | CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->bs_tra_force), data_len * sizeof(PTXBlockStatistic))); 235 | CUDA_SAFE_CALL(cudaMemcpy(gpu_model->bs_tra_force, bs_tra_force, data_len * sizeof(PTXBlockStatistic), cudaMemcpyHostToDevice)); 236 | data_len = readPTXStatisticsFromFile(bs_sms_density, kFunNameDensitySMS, kPTXStatisticsFileNameSMS); 237 | CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->bs_sms_density), data_len * sizeof(PTXBlockStatistic))); 238 | CUDA_SAFE_CALL(cudaMemcpy(gpu_model->bs_sms_density, bs_sms_density, data_len * sizeof(PTXBlockStatistic), cudaMemcpyHostToDevice)); 239 | data_len = readPTXStatisticsFromFile(bs_sms_force, kFunNameForceSMS, kPTXStatisticsFileNameSMS); 240 | CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->bs_sms_force), data_len * sizeof(PTXBlockStatistic))); 241 | CUDA_SAFE_CALL(cudaMemcpy(gpu_model->bs_sms_force, bs_sms_force, data_len * sizeof(PTXBlockStatistic), cudaMemcpyHostToDevice)); 242 | 243 | // calculate static block and transfer to device 244 | calculateStaticBlock(static_block[kDensityTRA], bs_tra_density, 5); 245 | calculateStaticBlock(static_block[kForceTRA], bs_tra_force, 5); 246 | calculateStaticBlock(static_block[kDensitySMS], bs_sms_density, 7); 247 | calculateStaticBlock(static_block[kForceSMS], bs_sms_force, 7); 248 | CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->static_block), 4 * sizeof(PTXBlockStatistic))); 249 | CUDA_SAFE_CALL(cudaMemcpy(gpu_model->static_block, static_block, 4 * sizeof(PTXBlockStatistic), cudaMemcpyHostToDevice)); 250 | 251 | // read instruction latency and transfer to device 252 | readInstructionLatencyFromFile(insts_latency, kLatencyFileName); 253 | CUDA_SAFE_CALL(cudaMemcpyToSymbol(kDevInstsLatency, &insts_latency, sizeof(InstructionInfo))); 254 | 255 | // set device info and transfer to device 256 | device_info.simd_width = 64; 257 | device_info.warp_size = 32; 258 | device_info.gamma = 64; 259 | device_info.ilp = 2; 260 | device_info.mlp = 2; 261 | CUDA_SAFE_CALL(cudaMemcpyToSymbol(kDevGPUInfo, &device_info, sizeof(GPUDeviceInfo))); 262 | 263 | // set kernel related paras and transfer to device 264 | setKernelParameters(kn_paras[kDensityTRA], kDensityTRA); 265 | setKernelParameters(kn_paras[kForceTRA], kForceTRA); 266 | setKernelParameters(kn_paras[kDensitySMS], kDensitySMS); 267 | setKernelParameters(kn_paras[kForceSMS], kForceSMS); 268 | CUDA_SAFE_CALL(cudaMalloc(&(gpu_model->kn_paras), 4 * sizeof(KernelRelatedParas))); 269 | CUDA_SAFE_CALL(cudaMemcpy(gpu_model->kn_paras, kn_paras, 4 * sizeof(KernelRelatedParas), cudaMemcpyHostToDevice)); 270 | 271 | if (nullptr != bs_tra_density) delete[]bs_tra_density; 272 | if (nullptr != bs_tra_force) delete[]bs_tra_force; 273 | if (nullptr != bs_sms_density) delete[]bs_sms_density; 274 | if (nullptr != bs_sms_force) delete[]bs_sms_force; 275 | } 276 | 277 | void freeGPUModel(GPUModel *gpu_model) 278 | { 279 | CUDA_SAFE_CALL(cudaFree(gpu_model->bs_tra_density)); 280 | CUDA_SAFE_CALL(cudaFree(gpu_model->bs_tra_force)); 281 | CUDA_SAFE_CALL(cudaFree(gpu_model->bs_sms_density)); 282 | CUDA_SAFE_CALL(cudaFree(gpu_model->bs_sms_force)); 283 | CUDA_SAFE_CALL(cudaFree(gpu_model->kn_paras)); 284 | CUDA_SAFE_CALL(cudaFree(gpu_model->static_block)); 285 | 286 | delete gpu_model; 287 | } 288 | 289 | void calculateBlockRequirementSMSMode(int *block_req, int *cell_start, int *cell_end, int block_size, int numc) 290 | { 291 | int num_thread = kDefaultNumThread; 292 | int num_block = ceil_int(numc, num_thread); 293 | 294 | knCalculateBlockRequirementSMSMode<<>>(block_req, cell_start, cell_end, block_size, numc); 295 | } 296 | 297 | void calculateBlockRequirementHybridMode(int *cell_type, int *d_cell_num, int *block_req, GPUModel *gm, int *cell_offset, int *cell_num, ushort3 grid_size, int block_size) 298 | { 299 | int numc = grid_size.x * grid_size.y * grid_size.z; 300 | 301 | int num_thread = kDefaultNumThread; 302 | int num_block = ceil_int(numc, num_thread); 303 | 304 | knCalculateBlockRequirementHybridMode << > >(cell_type, d_cell_num, block_req, *gm, cell_offset, cell_num, grid_size, block_size); 305 | } 306 | 307 | } -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/json/writer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 2 | // Distributed under MIT license, or public domain if desired and 3 | // recognized in your jurisdiction. 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 | 6 | #ifndef JSON_WRITER_H_INCLUDED 7 | #define JSON_WRITER_H_INCLUDED 8 | 9 | #if !defined(JSON_IS_AMALGAMATION) 10 | #include "value.h" 11 | #endif // if !defined(JSON_IS_AMALGAMATION) 12 | #include 13 | #include 14 | #include 15 | 16 | // Disable warning C4251: : needs to have dll-interface to 17 | // be used by... 18 | #if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) && defined(_MSC_VER) 19 | #pragma warning(push) 20 | #pragma warning(disable : 4251) 21 | #endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) 22 | 23 | #pragma pack(push, 8) 24 | 25 | namespace Json { 26 | 27 | class Value; 28 | 29 | /** 30 | 31 | Usage: 32 | \code 33 | using namespace Json; 34 | void writeToStdout(StreamWriter::Factory const& factory, Value const& value) { 35 | std::unique_ptr const writer( 36 | factory.newStreamWriter()); 37 | writer->write(value, &std::cout); 38 | std::cout << std::endl; // add lf and flush 39 | } 40 | \endcode 41 | */ 42 | class JSON_API StreamWriter { 43 | protected: 44 | OStream* sout_; // not owned; will not delete 45 | public: 46 | StreamWriter(); 47 | virtual ~StreamWriter(); 48 | /** Write Value into document as configured in sub-class. 49 | Do not take ownership of sout, but maintain a reference during function. 50 | \pre sout != NULL 51 | \return zero on success (For now, we always return zero, so check the 52 | stream instead.) \throw std::exception possibly, depending on configuration 53 | */ 54 | virtual int write(Value const& root, OStream* sout) = 0; 55 | 56 | /** \brief A simple abstract factory. 57 | */ 58 | class JSON_API Factory { 59 | public: 60 | virtual ~Factory(); 61 | /** \brief Allocate a CharReader via operator new(). 62 | * \throw std::exception if something goes wrong (e.g. invalid settings) 63 | */ 64 | virtual StreamWriter* newStreamWriter() const = 0; 65 | }; // Factory 66 | }; // StreamWriter 67 | 68 | /** \brief Write into stringstream, then return string, for convenience. 69 | * A StreamWriter will be created from the factory, used, and then deleted. 70 | */ 71 | String JSON_API writeString(StreamWriter::Factory const& factory, 72 | Value const& root); 73 | 74 | /** \brief Build a StreamWriter implementation. 75 | 76 | Usage: 77 | \code 78 | using namespace Json; 79 | Value value = ...; 80 | StreamWriterBuilder builder; 81 | builder["commentStyle"] = "None"; 82 | builder["indentation"] = " "; // or whatever you like 83 | std::unique_ptr writer( 84 | builder.newStreamWriter()); 85 | writer->write(value, &std::cout); 86 | std::cout << std::endl; // add lf and flush 87 | \endcode 88 | */ 89 | class JSON_API StreamWriterBuilder : public StreamWriter::Factory { 90 | public: 91 | // Note: We use a Json::Value so that we can add data-members to this class 92 | // without a major version bump. 93 | /** Configuration of this builder. 94 | Available settings (case-sensitive): 95 | - "commentStyle": "None" or "All" 96 | - "indentation": "". 97 | - Setting this to an empty string also omits newline characters. 98 | - "enableYAMLCompatibility": false or true 99 | - slightly change the whitespace around colons 100 | - "dropNullPlaceholders": false or true 101 | - Drop the "null" string from the writer's output for nullValues. 102 | Strictly speaking, this is not valid JSON. But when the output is being 103 | fed to a browser's JavaScript, it makes for smaller output and the 104 | browser can handle the output just fine. 105 | - "useSpecialFloats": false or true 106 | - If true, outputs non-finite floating point values in the following way: 107 | NaN values as "NaN", positive infinity as "Infinity", and negative 108 | infinity as "-Infinity". 109 | - "precision": int 110 | - Number of precision digits for formatting of real values. 111 | - "precisionType": "significant"(default) or "decimal" 112 | - Type of precision for formatting of real values. 113 | 114 | You can examine 'settings_` yourself 115 | to see the defaults. You can also write and read them just like any 116 | JSON Value. 117 | \sa setDefaults() 118 | */ 119 | Json::Value settings_; 120 | 121 | StreamWriterBuilder(); 122 | ~StreamWriterBuilder() override; 123 | 124 | /** 125 | * \throw std::exception if something goes wrong (e.g. invalid settings) 126 | */ 127 | StreamWriter* newStreamWriter() const override; 128 | 129 | /** \return true if 'settings' are legal and consistent; 130 | * otherwise, indicate bad settings via 'invalid'. 131 | */ 132 | bool validate(Json::Value* invalid) const; 133 | /** A simple way to update a specific setting. 134 | */ 135 | Value& operator[](const String& key); 136 | 137 | /** Called by ctor, but you can use this to reset settings_. 138 | * \pre 'settings' != NULL (but Json::null is fine) 139 | * \remark Defaults: 140 | * \snippet src/lib_json/json_writer.cpp StreamWriterBuilderDefaults 141 | */ 142 | static void setDefaults(Json::Value* settings); 143 | }; 144 | 145 | /** \brief Abstract class for writers. 146 | * \deprecated Use StreamWriter. (And really, this is an implementation detail.) 147 | */ 148 | class JSONCPP_DEPRECATED("Use StreamWriter instead") JSON_API Writer { 149 | public: 150 | virtual ~Writer(); 151 | 152 | virtual String write(const Value& root) = 0; 153 | }; 154 | 155 | /** \brief Outputs a Value in JSON format 156 | *without formatting (not human friendly). 157 | * 158 | * The JSON document is written in a single line. It is not intended for 'human' 159 | *consumption, 160 | * but may be useful to support feature such as RPC where bandwidth is limited. 161 | * \sa Reader, Value 162 | * \deprecated Use StreamWriterBuilder. 163 | */ 164 | #if defined(_MSC_VER) 165 | #pragma warning(push) 166 | #pragma warning(disable : 4996) // Deriving from deprecated class 167 | #endif 168 | class JSONCPP_DEPRECATED("Use StreamWriterBuilder instead") JSON_API FastWriter 169 | : public Writer { 170 | public: 171 | FastWriter(); 172 | ~FastWriter() override = default; 173 | 174 | void enableYAMLCompatibility(); 175 | 176 | /** \brief Drop the "null" string from the writer's output for nullValues. 177 | * Strictly speaking, this is not valid JSON. But when the output is being 178 | * fed to a browser's JavaScript, it makes for smaller output and the 179 | * browser can handle the output just fine. 180 | */ 181 | void dropNullPlaceholders(); 182 | 183 | void omitEndingLineFeed(); 184 | 185 | public: // overridden from Writer 186 | String write(const Value& root) override; 187 | 188 | private: 189 | void writeValue(const Value& value); 190 | 191 | String document_; 192 | bool yamlCompatibilityEnabled_{false}; 193 | bool dropNullPlaceholders_{false}; 194 | bool omitEndingLineFeed_{false}; 195 | }; 196 | #if defined(_MSC_VER) 197 | #pragma warning(pop) 198 | #endif 199 | 200 | /** \brief Writes a Value in JSON format in a 201 | *human friendly way. 202 | * 203 | * The rules for line break and indent are as follow: 204 | * - Object value: 205 | * - if empty then print {} without indent and line break 206 | * - if not empty the print '{', line break & indent, print one value per 207 | *line 208 | * and then unindent and line break and print '}'. 209 | * - Array value: 210 | * - if empty then print [] without indent and line break 211 | * - if the array contains no object value, empty array or some other value 212 | *types, 213 | * and all the values fit on one lines, then print the array on a single 214 | *line. 215 | * - otherwise, it the values do not fit on one line, or the array contains 216 | * object or non empty array, then print one value per line. 217 | * 218 | * If the Value have comments then they are outputed according to their 219 | *#CommentPlacement. 220 | * 221 | * \sa Reader, Value, Value::setComment() 222 | * \deprecated Use StreamWriterBuilder. 223 | */ 224 | #if defined(_MSC_VER) 225 | #pragma warning(push) 226 | #pragma warning(disable : 4996) // Deriving from deprecated class 227 | #endif 228 | class JSONCPP_DEPRECATED("Use StreamWriterBuilder instead") JSON_API 229 | StyledWriter : public Writer { 230 | public: 231 | StyledWriter(); 232 | ~StyledWriter() override = default; 233 | 234 | public: // overridden from Writer 235 | /** \brief Serialize a Value in JSON format. 236 | * \param root Value to serialize. 237 | * \return String containing the JSON document that represents the root value. 238 | */ 239 | String write(const Value& root) override; 240 | 241 | private: 242 | void writeValue(const Value& value); 243 | void writeArrayValue(const Value& value); 244 | bool isMultilineArray(const Value& value); 245 | void pushValue(const String& value); 246 | void writeIndent(); 247 | void writeWithIndent(const String& value); 248 | void indent(); 249 | void unindent(); 250 | void writeCommentBeforeValue(const Value& root); 251 | void writeCommentAfterValueOnSameLine(const Value& root); 252 | static bool hasCommentForValue(const Value& value); 253 | static String normalizeEOL(const String& text); 254 | 255 | typedef std::vector ChildValues; 256 | 257 | ChildValues childValues_; 258 | String document_; 259 | String indentString_; 260 | unsigned int rightMargin_{74}; 261 | unsigned int indentSize_{3}; 262 | bool addChildValues_{false}; 263 | }; 264 | #if defined(_MSC_VER) 265 | #pragma warning(pop) 266 | #endif 267 | 268 | /** \brief Writes a Value in JSON format in a 269 | human friendly way, 270 | to a stream rather than to a string. 271 | * 272 | * The rules for line break and indent are as follow: 273 | * - Object value: 274 | * - if empty then print {} without indent and line break 275 | * - if not empty the print '{', line break & indent, print one value per 276 | line 277 | * and then unindent and line break and print '}'. 278 | * - Array value: 279 | * - if empty then print [] without indent and line break 280 | * - if the array contains no object value, empty array or some other value 281 | types, 282 | * and all the values fit on one lines, then print the array on a single 283 | line. 284 | * - otherwise, it the values do not fit on one line, or the array contains 285 | * object or non empty array, then print one value per line. 286 | * 287 | * If the Value have comments then they are outputed according to their 288 | #CommentPlacement. 289 | * 290 | * \sa Reader, Value, Value::setComment() 291 | * \deprecated Use StreamWriterBuilder. 292 | */ 293 | #if defined(_MSC_VER) 294 | #pragma warning(push) 295 | #pragma warning(disable : 4996) // Deriving from deprecated class 296 | #endif 297 | class JSONCPP_DEPRECATED("Use StreamWriterBuilder instead") JSON_API 298 | StyledStreamWriter { 299 | public: 300 | /** 301 | * \param indentation Each level will be indented by this amount extra. 302 | */ 303 | StyledStreamWriter(String indentation = "\t"); 304 | ~StyledStreamWriter() = default; 305 | 306 | public: 307 | /** \brief Serialize a Value in JSON format. 308 | * \param out Stream to write to. (Can be ostringstream, e.g.) 309 | * \param root Value to serialize. 310 | * \note There is no point in deriving from Writer, since write() should not 311 | * return a value. 312 | */ 313 | void write(OStream& out, const Value& root); 314 | 315 | private: 316 | void writeValue(const Value& value); 317 | void writeArrayValue(const Value& value); 318 | bool isMultilineArray(const Value& value); 319 | void pushValue(const String& value); 320 | void writeIndent(); 321 | void writeWithIndent(const String& value); 322 | void indent(); 323 | void unindent(); 324 | void writeCommentBeforeValue(const Value& root); 325 | void writeCommentAfterValueOnSameLine(const Value& root); 326 | static bool hasCommentForValue(const Value& value); 327 | static String normalizeEOL(const String& text); 328 | 329 | typedef std::vector ChildValues; 330 | 331 | ChildValues childValues_; 332 | OStream* document_; 333 | String indentString_; 334 | unsigned int rightMargin_{74}; 335 | String indentation_; 336 | bool addChildValues_ : 1; 337 | bool indented_ : 1; 338 | }; 339 | #if defined(_MSC_VER) 340 | #pragma warning(pop) 341 | #endif 342 | 343 | #if defined(JSON_HAS_INT64) 344 | String JSON_API valueToString(Int value); 345 | String JSON_API valueToString(UInt value); 346 | #endif // if defined(JSON_HAS_INT64) 347 | String JSON_API valueToString(LargestInt value); 348 | String JSON_API valueToString(LargestUInt value); 349 | String JSON_API 350 | valueToString(double value, 351 | unsigned int precision = Value::defaultRealPrecision, 352 | PrecisionType precisionType = PrecisionType::significantDigits); 353 | String JSON_API valueToString(bool value); 354 | String JSON_API valueToQuotedString(const char* value); 355 | 356 | /// \brief Output using the StyledStreamWriter. 357 | /// \see Json::operator>>() 358 | JSON_API OStream& operator<<(OStream&, const Value& root); 359 | 360 | } // namespace Json 361 | 362 | #pragma pack(pop) 363 | 364 | #if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) 365 | #pragma warning(pop) 366 | #endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) 367 | 368 | #endif // JSON_WRITER_H_INCLUDED 369 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/Hybrid_Fluid_Simulation.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | {BC50E9FA-E95F-4E72-9F2B-D45567958A71} 66 | Hybrid_Fluid_Simulation 67 | Hybrid_Fluid_Simulation 68 | 69 | 70 | 71 | Application 72 | true 73 | MultiByte 74 | v143 75 | 76 | 77 | Application 78 | true 79 | MultiByte 80 | v143 81 | 82 | 83 | Application 84 | false 85 | true 86 | MultiByte 87 | v143 88 | 89 | 90 | Application 91 | false 92 | true 93 | MultiByte 94 | v143 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | true 115 | $(GLEW_PATH)\include;$(GLM_PATH);$(FREEGLUT_PATH)\include;$(IncludePath) 116 | $(GLEW_PATH)\lib\Debug\Win32;$(FREEGLUT_PATH)\lib;$(LibraryPath) 117 | 118 | 119 | true 120 | $(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\glew_64\include;$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\freeglut_64\include;$(IncludePath) 121 | 122 | 123 | $(GLEW_PATH)\include;$(GLM_PATH);$(FREEGLUT_PATH)\include;$(IncludePath) 124 | $(GLEW_PATH)\lib\Release\Win32;$(FREEGLUT_PATH)\lib;$(LibraryPath) 125 | 126 | 127 | $(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\glew_64\include;$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\freeglut_64\include;$(SolutionDir)\Hybrid_Fluid_Simulation\json\include;$(IncludePath) 128 | $(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\glew_64\libs\release;$(SolutionDir)\Hybrid_Fluid_Simulation\GL_LIB\freeglut_64\libs\release;$(SolutionDir)\Hybrid_Fluid_Simulation\json;$(LibraryPath) 129 | 130 | 131 | 132 | Level3 133 | Disabled 134 | WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 135 | MultiThreadedDebug 136 | $(JSONCPP_PATH)\include;%(AdditionalIncludeDirectories) 137 | 138 | 139 | true 140 | Console 141 | freeglut.lib;glew32d.lib;lib_json.lib;cudadevrt.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 142 | /SAFESEH:NO %(AdditionalOptions) 143 | $(JSONCPP_PATH)\makefiles\msvc2010\Debug;%(AdditionalLibraryDirectories) 144 | 145 | 146 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 147 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 148 | 149 | 150 | false 151 | -Xcompiler "/wd 4819" -keep %(AdditionalOptions) 152 | compute_52,sm_52 153 | 154 | 155 | 156 | 157 | Level3 158 | Disabled 159 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 160 | 161 | 162 | true 163 | Console 164 | cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 165 | 166 | 167 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 168 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 169 | 170 | 171 | 64 172 | 173 | 174 | 175 | 176 | Level3 177 | MaxSpeed 178 | true 179 | true 180 | WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 181 | MultiThreaded 182 | %(AdditionalIncludeDirectories) 183 | 184 | 185 | true 186 | true 187 | true 188 | Console 189 | freeglut.lib;glew32.lib;lib_json.lib;cudadevrt.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 190 | /SAFESEH:NO %(AdditionalOptions) 191 | %(AdditionalLibraryDirectories) 192 | %(IgnoreSpecificDefaultLibraries) 193 | 194 | 195 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 196 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 197 | 198 | 199 | true 200 | -Xcompiler "/wd 4819" -keep -use_fast_math %(AdditionalOptions) 201 | compute_52,sm_52 202 | 203 | 204 | 205 | 206 | Level3 207 | MaxSpeed 208 | true 209 | true 210 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 211 | %(AdditionalIncludeDirectories) 212 | MultiThreaded 213 | 214 | 215 | true 216 | true 217 | true 218 | Console 219 | freeglut.lib;glew32.lib;lib_json.lib;cudadevrt.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 220 | %(AdditionalLibraryDirectories) 221 | /SAFESEH:NO %(AdditionalOptions) 222 | 223 | 224 | echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 225 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 226 | 227 | 228 | 64 229 | -Xcompiler "/wd 4819" -keep -use_fast_math %(AdditionalOptions) 230 | compute_52,sm_52 231 | 232 | 233 | 234 | 235 | 236 | 237 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/sph_sms_arti_block_statistics.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "block_array" : [ 4 | { 5 | "CONST_ACC" : 13, 6 | "F_ADD_SUB" : 0, 7 | "F_DIV" : 0, 8 | "F_DIVIDEF" : 0, 9 | "F_EXP2" : 0, 10 | "F_LOG2" : 0, 11 | "F_MAD_MUL" : 0, 12 | "F_MIN_MAX" : 0, 13 | "F_RCP" : 0, 14 | "F_RSQRT" : 0, 15 | "F_SIN_COS" : 0, 16 | "F_SQRT" : 0, 17 | "GLOBAL_ACC" : 11, 18 | "I_ABS" : 0, 19 | "I_ADD_SUB" : 20, 20 | "I_DIV_REM" : 0, 21 | "I_LOGICAL" : 0, 22 | "I_MAD_MUL" : 12, 23 | "I_MIN_MAX" : 0, 24 | "I_MUL24" : 1, 25 | "I_SAD" : 0, 26 | "I_SHL_SHR" : 4, 27 | "LOCAL_ACC" : 0, 28 | "SHARED_ACC" : 3, 29 | "begin" : 0, 30 | "end" : 132, 31 | "name" : "arti - [0, 132]", 32 | "num_bra" : 8, 33 | "num_insts" : 125, 34 | "num_sync" : 1, 35 | "num_unknown" : 52 36 | }, 37 | { 38 | "CONST_ACC" : 0, 39 | "F_ADD_SUB" : 0, 40 | "F_DIV" : 0, 41 | "F_DIVIDEF" : 0, 42 | "F_EXP2" : 0, 43 | "F_LOG2" : 0, 44 | "F_MAD_MUL" : 0, 45 | "F_MIN_MAX" : 0, 46 | "F_RCP" : 0, 47 | "F_RSQRT" : 0, 48 | "F_SIN_COS" : 0, 49 | "F_SQRT" : 0, 50 | "GLOBAL_ACC" : 0, 51 | "I_ABS" : 0, 52 | "I_ADD_SUB" : 2, 53 | "I_DIV_REM" : 0, 54 | "I_LOGICAL" : 0, 55 | "I_MAD_MUL" : 0, 56 | "I_MIN_MAX" : 0, 57 | "I_MUL24" : 0, 58 | "I_SAD" : 0, 59 | "I_SHL_SHR" : 1, 60 | "LOCAL_ACC" : 0, 61 | "SHARED_ACC" : 4, 62 | "begin" : 132, 63 | "end" : 153, 64 | "name" : "arti - [132, 153]", 65 | "num_bra" : 2, 66 | "num_insts" : 18, 67 | "num_sync" : 0, 68 | "num_unknown" : 9 69 | }, 70 | { 71 | "CONST_ACC" : 0, 72 | "F_ADD_SUB" : 0, 73 | "F_DIV" : 0, 74 | "F_DIVIDEF" : 0, 75 | "F_EXP2" : 0, 76 | "F_LOG2" : 0, 77 | "F_MAD_MUL" : 0, 78 | "F_MIN_MAX" : 0, 79 | "F_RCP" : 0, 80 | "F_RSQRT" : 0, 81 | "F_SIN_COS" : 0, 82 | "F_SQRT" : 0, 83 | "GLOBAL_ACC" : 0, 84 | "I_ABS" : 0, 85 | "I_ADD_SUB" : 0, 86 | "I_DIV_REM" : 0, 87 | "I_LOGICAL" : 0, 88 | "I_MAD_MUL" : 1, 89 | "I_MIN_MAX" : 0, 90 | "I_MUL24" : 0, 91 | "I_SAD" : 0, 92 | "I_SHL_SHR" : 0, 93 | "LOCAL_ACC" : 0, 94 | "SHARED_ACC" : 0, 95 | "begin" : 153, 96 | "end" : 160, 97 | "name" : "arti - [153, 160]", 98 | "num_bra" : 0, 99 | "num_insts" : 6, 100 | "num_sync" : 0, 101 | "num_unknown" : 5 102 | }, 103 | { 104 | "CONST_ACC" : 0, 105 | "F_ADD_SUB" : 1, 106 | "F_DIV" : 0, 107 | "F_DIVIDEF" : 0, 108 | "F_EXP2" : 0, 109 | "F_LOG2" : 0, 110 | "F_MAD_MUL" : 0, 111 | "F_MIN_MAX" : 0, 112 | "F_RCP" : 0, 113 | "F_RSQRT" : 0, 114 | "F_SIN_COS" : 0, 115 | "F_SQRT" : 0, 116 | "GLOBAL_ACC" : 3, 117 | "I_ABS" : 0, 118 | "I_ADD_SUB" : 6, 119 | "I_DIV_REM" : 0, 120 | "I_LOGICAL" : 0, 121 | "I_MAD_MUL" : 1, 122 | "I_MIN_MAX" : 1, 123 | "I_MUL24" : 0, 124 | "I_SAD" : 0, 125 | "I_SHL_SHR" : 1, 126 | "LOCAL_ACC" : 0, 127 | "SHARED_ACC" : 8, 128 | "begin" : 160, 129 | "end" : 198, 130 | "name" : "arti - [160, 198]", 131 | "num_bra" : 4, 132 | "num_insts" : 34, 133 | "num_sync" : 2, 134 | "num_unknown" : 7 135 | }, 136 | { 137 | "CONST_ACC" : 0, 138 | "F_ADD_SUB" : 0, 139 | "F_DIV" : 0, 140 | "F_DIVIDEF" : 0, 141 | "F_EXP2" : 0, 142 | "F_LOG2" : 0, 143 | "F_MAD_MUL" : 0, 144 | "F_MIN_MAX" : 0, 145 | "F_RCP" : 0, 146 | "F_RSQRT" : 0, 147 | "F_SIN_COS" : 0, 148 | "F_SQRT" : 0, 149 | "GLOBAL_ACC" : 0, 150 | "I_ABS" : 0, 151 | "I_ADD_SUB" : 3, 152 | "I_DIV_REM" : 0, 153 | "I_LOGICAL" : 0, 154 | "I_MAD_MUL" : 0, 155 | "I_MIN_MAX" : 0, 156 | "I_MUL24" : 0, 157 | "I_SAD" : 0, 158 | "I_SHL_SHR" : 1, 159 | "LOCAL_ACC" : 0, 160 | "SHARED_ACC" : 4, 161 | "begin" : 198, 162 | "end" : 215, 163 | "name" : "arti - [198, 215]", 164 | "num_bra" : 2, 165 | "num_insts" : 14, 166 | "num_sync" : 0, 167 | "num_unknown" : 4 168 | }, 169 | { 170 | "CONST_ACC" : 1, 171 | "F_ADD_SUB" : 4, 172 | "F_DIV" : 0, 173 | "F_DIVIDEF" : 0, 174 | "F_EXP2" : 0, 175 | "F_LOG2" : 0, 176 | "F_MAD_MUL" : 5, 177 | "F_MIN_MAX" : 0, 178 | "F_RCP" : 0, 179 | "F_RSQRT" : 0, 180 | "F_SIN_COS" : 0, 181 | "F_SQRT" : 0, 182 | "GLOBAL_ACC" : 0, 183 | "I_ABS" : 0, 184 | "I_ADD_SUB" : 2, 185 | "I_DIV_REM" : 0, 186 | "I_LOGICAL" : 0, 187 | "I_MAD_MUL" : 0, 188 | "I_MIN_MAX" : 0, 189 | "I_MUL24" : 0, 190 | "I_SAD" : 0, 191 | "I_SHL_SHR" : 0, 192 | "LOCAL_ACC" : 0, 193 | "SHARED_ACC" : 3, 194 | "begin" : 215, 195 | "end" : 248, 196 | "name" : "arti - [215, 248]", 197 | "num_bra" : 3, 198 | "num_insts" : 30, 199 | "num_sync" : 0, 200 | "num_unknown" : 12 201 | }, 202 | { 203 | "CONST_ACC" : 5, 204 | "F_ADD_SUB" : 0, 205 | "F_DIV" : 0, 206 | "F_DIVIDEF" : 1, 207 | "F_EXP2" : 0, 208 | "F_LOG2" : 0, 209 | "F_MAD_MUL" : 9, 210 | "F_MIN_MAX" : 0, 211 | "F_RCP" : 0, 212 | "F_RSQRT" : 0, 213 | "F_SIN_COS" : 0, 214 | "F_SQRT" : 0, 215 | "GLOBAL_ACC" : 2, 216 | "I_ABS" : 0, 217 | "I_ADD_SUB" : 2, 218 | "I_DIV_REM" : 0, 219 | "I_LOGICAL" : 0, 220 | "I_MAD_MUL" : 0, 221 | "I_MIN_MAX" : 0, 222 | "I_MUL24" : 0, 223 | "I_SAD" : 0, 224 | "I_SHL_SHR" : 1, 225 | "LOCAL_ACC" : 0, 226 | "SHARED_ACC" : 0, 227 | "begin" : 248, 228 | "end" : 273, 229 | "name" : "arti - [248, 273]", 230 | "num_bra" : 1, 231 | "num_insts" : 24, 232 | "num_sync" : 0, 233 | "num_unknown" : 3 234 | } 235 | ], 236 | "function_name" : "_ZN3sph19knComputeDensitySMSENS_18ParticleBufferListEPiS1_PNS_9BlockTaskE" 237 | }, 238 | { 239 | "block_array" : [ 240 | { 241 | "CONST_ACC" : 16, 242 | "F_ADD_SUB" : 0, 243 | "F_DIV" : 0, 244 | "F_DIVIDEF" : 0, 245 | "F_EXP2" : 0, 246 | "F_LOG2" : 0, 247 | "F_MAD_MUL" : 0, 248 | "F_MIN_MAX" : 0, 249 | "F_RCP" : 0, 250 | "F_RSQRT" : 0, 251 | "F_SIN_COS" : 0, 252 | "F_SQRT" : 0, 253 | "GLOBAL_ACC" : 15, 254 | "I_ABS" : 0, 255 | "I_ADD_SUB" : 23, 256 | "I_DIV_REM" : 0, 257 | "I_LOGICAL" : 0, 258 | "I_MAD_MUL" : 12, 259 | "I_MIN_MAX" : 0, 260 | "I_MUL24" : 1, 261 | "I_SAD" : 0, 262 | "I_SHL_SHR" : 5, 263 | "LOCAL_ACC" : 0, 264 | "SHARED_ACC" : 3, 265 | "begin" : 0, 266 | "end" : 146, 267 | "name" : "arti - [0, 146]", 268 | "num_bra" : 8, 269 | "num_insts" : 139, 270 | "num_sync" : 1, 271 | "num_unknown" : 55 272 | }, 273 | { 274 | "CONST_ACC" : 0, 275 | "F_ADD_SUB" : 0, 276 | "F_DIV" : 0, 277 | "F_DIVIDEF" : 0, 278 | "F_EXP2" : 0, 279 | "F_LOG2" : 0, 280 | "F_MAD_MUL" : 0, 281 | "F_MIN_MAX" : 0, 282 | "F_RCP" : 0, 283 | "F_RSQRT" : 0, 284 | "F_SIN_COS" : 0, 285 | "F_SQRT" : 0, 286 | "GLOBAL_ACC" : 0, 287 | "I_ABS" : 0, 288 | "I_ADD_SUB" : 2, 289 | "I_DIV_REM" : 0, 290 | "I_LOGICAL" : 0, 291 | "I_MAD_MUL" : 0, 292 | "I_MIN_MAX" : 0, 293 | "I_MUL24" : 0, 294 | "I_SAD" : 0, 295 | "I_SHL_SHR" : 1, 296 | "LOCAL_ACC" : 0, 297 | "SHARED_ACC" : 4, 298 | "begin" : 146, 299 | "end" : 166, 300 | "name" : "arti - [146, 166]", 301 | "num_bra" : 2, 302 | "num_insts" : 17, 303 | "num_sync" : 0, 304 | "num_unknown" : 8 305 | }, 306 | { 307 | "CONST_ACC" : 0, 308 | "F_ADD_SUB" : 0, 309 | "F_DIV" : 0, 310 | "F_DIVIDEF" : 0, 311 | "F_EXP2" : 0, 312 | "F_LOG2" : 0, 313 | "F_MAD_MUL" : 0, 314 | "F_MIN_MAX" : 0, 315 | "F_RCP" : 0, 316 | "F_RSQRT" : 0, 317 | "F_SIN_COS" : 0, 318 | "F_SQRT" : 0, 319 | "GLOBAL_ACC" : 0, 320 | "I_ABS" : 0, 321 | "I_ADD_SUB" : 0, 322 | "I_DIV_REM" : 0, 323 | "I_LOGICAL" : 0, 324 | "I_MAD_MUL" : 1, 325 | "I_MIN_MAX" : 0, 326 | "I_MUL24" : 0, 327 | "I_SAD" : 0, 328 | "I_SHL_SHR" : 0, 329 | "LOCAL_ACC" : 0, 330 | "SHARED_ACC" : 0, 331 | "begin" : 166, 332 | "end" : 181, 333 | "name" : "arti - [166, 181]", 334 | "num_bra" : 0, 335 | "num_insts" : 14, 336 | "num_sync" : 0, 337 | "num_unknown" : 13 338 | }, 339 | { 340 | "CONST_ACC" : 2, 341 | "F_ADD_SUB" : 12, 342 | "F_DIV" : 0, 343 | "F_DIVIDEF" : 1, 344 | "F_EXP2" : 0, 345 | "F_LOG2" : 0, 346 | "F_MAD_MUL" : 21, 347 | "F_MIN_MAX" : 0, 348 | "F_RCP" : 1, 349 | "F_RSQRT" : 0, 350 | "F_SIN_COS" : 0, 351 | "F_SQRT" : 1, 352 | "GLOBAL_ACC" : 0, 353 | "I_ABS" : 0, 354 | "I_ADD_SUB" : 3, 355 | "I_DIV_REM" : 0, 356 | "I_LOGICAL" : 0, 357 | "I_MAD_MUL" : 0, 358 | "I_MIN_MAX" : 0, 359 | "I_MUL24" : 0, 360 | "I_SAD" : 0, 361 | "I_SHL_SHR" : 0, 362 | "LOCAL_ACC" : 0, 363 | "SHARED_ACC" : 8, 364 | "begin" : 181, 365 | "end" : 245, 366 | "name" : "arti - [181, 245]", 367 | "num_bra" : 3, 368 | "num_insts" : 61, 369 | "num_sync" : 0, 370 | "num_unknown" : 9 371 | }, 372 | { 373 | "CONST_ACC" : 0, 374 | "F_ADD_SUB" : 0, 375 | "F_DIV" : 0, 376 | "F_DIVIDEF" : 0, 377 | "F_EXP2" : 0, 378 | "F_LOG2" : 0, 379 | "F_MAD_MUL" : 0, 380 | "F_MIN_MAX" : 0, 381 | "F_RCP" : 0, 382 | "F_RSQRT" : 0, 383 | "F_SIN_COS" : 0, 384 | "F_SQRT" : 0, 385 | "GLOBAL_ACC" : 8, 386 | "I_ABS" : 0, 387 | "I_ADD_SUB" : 12, 388 | "I_DIV_REM" : 0, 389 | "I_LOGICAL" : 0, 390 | "I_MAD_MUL" : 1, 391 | "I_MIN_MAX" : 1, 392 | "I_MUL24" : 0, 393 | "I_SAD" : 0, 394 | "I_SHL_SHR" : 2, 395 | "LOCAL_ACC" : 0, 396 | "SHARED_ACC" : 13, 397 | "begin" : 245, 398 | "end" : 297, 399 | "name" : "arti - [245, 297]", 400 | "num_bra" : 3, 401 | "num_insts" : 49, 402 | "num_sync" : 2, 403 | "num_unknown" : 7 404 | }, 405 | { 406 | "CONST_ACC" : 0, 407 | "F_ADD_SUB" : 0, 408 | "F_DIV" : 0, 409 | "F_DIVIDEF" : 0, 410 | "F_EXP2" : 0, 411 | "F_LOG2" : 0, 412 | "F_MAD_MUL" : 0, 413 | "F_MIN_MAX" : 0, 414 | "F_RCP" : 0, 415 | "F_RSQRT" : 0, 416 | "F_SIN_COS" : 0, 417 | "F_SQRT" : 0, 418 | "GLOBAL_ACC" : 0, 419 | "I_ABS" : 0, 420 | "I_ADD_SUB" : 3, 421 | "I_DIV_REM" : 0, 422 | "I_LOGICAL" : 0, 423 | "I_MAD_MUL" : 0, 424 | "I_MIN_MAX" : 0, 425 | "I_MUL24" : 0, 426 | "I_SAD" : 0, 427 | "I_SHL_SHR" : 1, 428 | "LOCAL_ACC" : 0, 429 | "SHARED_ACC" : 4, 430 | "begin" : 297, 431 | "end" : 315, 432 | "name" : "arti - [297, 315]", 433 | "num_bra" : 3, 434 | "num_insts" : 15, 435 | "num_sync" : 0, 436 | "num_unknown" : 4 437 | }, 438 | { 439 | "CONST_ACC" : 9, 440 | "F_ADD_SUB" : 0, 441 | "F_DIV" : 3, 442 | "F_DIVIDEF" : 1, 443 | "F_EXP2" : 0, 444 | "F_LOG2" : 0, 445 | "F_MAD_MUL" : 30, 446 | "F_MIN_MAX" : 0, 447 | "F_RCP" : 0, 448 | "F_RSQRT" : 0, 449 | "F_SIN_COS" : 0, 450 | "F_SQRT" : 1, 451 | "GLOBAL_ACC" : 5, 452 | "I_ABS" : 0, 453 | "I_ADD_SUB" : 2, 454 | "I_DIV_REM" : 0, 455 | "I_LOGICAL" : 0, 456 | "I_MAD_MUL" : 1, 457 | "I_MIN_MAX" : 0, 458 | "I_MUL24" : 0, 459 | "I_SAD" : 0, 460 | "I_SHL_SHR" : 1, 461 | "LOCAL_ACC" : 0, 462 | "SHARED_ACC" : 0, 463 | "begin" : 315, 464 | "end" : 385, 465 | "name" : "arti - [315, 385]", 466 | "num_bra" : 3, 467 | "num_insts" : 66, 468 | "num_sync" : 0, 469 | "num_unknown" : 10 470 | } 471 | ], 472 | "function_name" : "_ZN3sph17knComputeForceSMSENS_18ParticleBufferListEPiS1_PNS_9BlockTaskE" 473 | } 474 | ] 475 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/main.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------- 2 | // NVIDIA(R) GVDB VOXELS 3 | // Copyright 2017, NVIDIA Corporation. 4 | // 5 | // Redistribution and use in source and binary forms, with or without modification, 6 | // are permitted provided that the following conditions are met: 7 | // 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 8 | // 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer 9 | // in the documentation and/or other materials provided with the distribution. 10 | // 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived 11 | // from this software without specific prior written permission. 12 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, 13 | // BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 14 | // SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 15 | // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 16 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 17 | // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 18 | // 19 | // Version 1.0: Rama Hoetzlein, 5/1/2017 20 | //---------------------------------------------------------------------------------- 21 | 22 | #ifndef __MAIN_H__ 23 | #define __MAIN_H__ 24 | 25 | #pragma warning(disable:4996) // preventing snprintf >> _snprintf_s 26 | 27 | #include "sample_utils/platform.h" 28 | 29 | // trick for pragma message so we can write: 30 | // #pragma message(__FILE__"("S__LINE__"): blah") 31 | #define S__(x) #x 32 | #define S_(x) S__(x) 33 | #define S__LINE__ S_(__LINE__) 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #ifdef WIN32 42 | #ifdef MEMORY_LEAKS_CHECK 43 | # pragma message("build will Check for Memory Leaks!") 44 | # define _CRTDBG_MAP_ALLOC 45 | # include 46 | # include 47 | inline void* operator new(size_t size, const char *file, int line) 48 | { 49 | return ::operator new(size, 1, file, line); 50 | } 51 | 52 | inline void __cdecl operator delete(void *ptr, const char *file, int line) 53 | { 54 | ::operator delete(ptr, _NORMAL_BLOCK, file, line); 55 | } 56 | 57 | #define DEBUG_NEW new( __FILE__, __LINE__) 58 | #define MALLOC_DBG(x) _malloc_dbg(x, 1, __FILE__, __LINE__); 59 | #define malloc(x) MALLOC_DBG(x) 60 | #define new DEBUG_NEW 61 | #endif 62 | #endif 63 | 64 | //----------------- to be declared in the code of the sample: so the sample can decide how to display messages 65 | class NVPWindow { 66 | public: 67 | enum ButtonAction { 68 | BUTTON_RELEASE = 0, 69 | BUTTON_PRESS = 1, 70 | BUTTON_REPEAT = 2, 71 | }; 72 | enum MouseButton 73 | { 74 | MOUSE_BUTTON_LEFT = 0, 75 | MOUSE_BUTTON_RIGHT = 1, 76 | MOUSE_BUTTON_MIDDLE = 2, 77 | NUM_MOUSE_BUTTONIDX, 78 | }; 79 | enum MouseButtonFlag 80 | { 81 | MOUSE_BUTTONFLAG_NONE = 0, 82 | MOUSE_BUTTONFLAG_LEFT = (1 << MOUSE_BUTTON_LEFT), 83 | MOUSE_BUTTONFLAG_RIGHT = (1 << MOUSE_BUTTON_RIGHT), 84 | MOUSE_BUTTONFLAG_MIDDLE = (1 << MOUSE_BUTTON_MIDDLE) 85 | }; 86 | enum KeyCode { 87 | KEY_UNKNOWN = -1, 88 | KEY_SPACE = 32, 89 | KEY_APOSTROPHE = 39 /* ' */, 90 | KEY_LEFT_PARENTHESIS = 40 /* ( */, 91 | KEY_RIGHT_PARENTHESIS = 41 /* ) */, 92 | KEY_ASTERISK = 42 /* * */, 93 | KEY_PLUS = 43 /* + */, 94 | KEY_COMMA = 44 /* , */, 95 | KEY_MINUS = 45 /* - */, 96 | KEY_PERIOD = 46 /* . */, 97 | KEY_SLASH = 47 /* / */, 98 | KEY_0 = 48, 99 | KEY_1 = 49, 100 | KEY_2 = 50, 101 | KEY_3 = 51, 102 | KEY_4 = 52, 103 | KEY_5 = 53, 104 | KEY_6 = 54, 105 | KEY_7 = 55, 106 | KEY_8 = 56, 107 | KEY_9 = 57, 108 | KEY_COLON = 58 /* : */, 109 | KEY_SEMICOLON = 59 /* ; */, 110 | KEY_LESS = 60 /* < */, 111 | KEY_EQUAL = 61 /* = */, 112 | KEY_GREATER = 62 /* > */, 113 | KEY_A = 65, 114 | KEY_B = 66, 115 | KEY_C = 67, 116 | KEY_D = 68, 117 | KEY_E = 69, 118 | KEY_F = 70, 119 | KEY_G = 71, 120 | KEY_H = 72, 121 | KEY_I = 73, 122 | KEY_J = 74, 123 | KEY_K = 75, 124 | KEY_L = 76, 125 | KEY_M = 77, 126 | KEY_N = 78, 127 | KEY_O = 79, 128 | KEY_P = 80, 129 | KEY_Q = 81, 130 | KEY_R = 82, 131 | KEY_S = 83, 132 | KEY_T = 84, 133 | KEY_U = 85, 134 | KEY_V = 86, 135 | KEY_W = 87, 136 | KEY_X = 88, 137 | KEY_Y = 89, 138 | KEY_Z = 90, 139 | KEY_LEFT_BRACKET = 91 /* [ */, 140 | KEY_BACKSLASH = 92 /* \ */, 141 | KEY_RIGHT_BRACKET = 93 /* ] */, 142 | KEY_GRAVE_ACCENT = 96 /* ` */, 143 | KEY_WORLD_1 = 161 /* non-US #1 */, 144 | KEY_WORLD_2 = 162 /* non-US #2 */, 145 | /* Function keys */ 146 | KEY_ESCAPE = 256, 147 | KEY_ENTER = 257, 148 | KEY_TAB = 258, 149 | KEY_BACKSPACE = 259, 150 | KEY_INSERT = 260, 151 | KEY_DELETE = 261, 152 | KEY_RIGHT = 262, 153 | KEY_LEFT = 263, 154 | KEY_DOWN = 264, 155 | KEY_UP = 265, 156 | KEY_PAGE_UP = 266, 157 | KEY_PAGE_DOWN = 267, 158 | KEY_HOME = 268, 159 | KEY_END = 269, 160 | KEY_CAPS_LOCK = 280, 161 | KEY_SCROLL_LOCK = 281, 162 | KEY_NUM_LOCK = 282, 163 | KEY_PRINT_SCREEN = 283, 164 | KEY_PAUSE = 284, 165 | KEY_F1 = 290, 166 | KEY_F2 = 291, 167 | KEY_F3 = 292, 168 | KEY_F4 = 293, 169 | KEY_F5 = 294, 170 | KEY_F6 = 295, 171 | KEY_F7 = 296, 172 | KEY_F8 = 297, 173 | KEY_F9 = 298, 174 | KEY_F10 = 299, 175 | KEY_F11 = 300, 176 | KEY_F12 = 301, 177 | KEY_F13 = 302, 178 | KEY_F14 = 303, 179 | KEY_F15 = 304, 180 | KEY_F16 = 305, 181 | KEY_F17 = 306, 182 | KEY_F18 = 307, 183 | KEY_F19 = 308, 184 | KEY_F20 = 309, 185 | KEY_F21 = 310, 186 | KEY_F22 = 311, 187 | KEY_F23 = 312, 188 | KEY_F24 = 313, 189 | KEY_F25 = 314, 190 | KEY_KP_0 = 320, 191 | KEY_KP_1 = 321, 192 | KEY_KP_2 = 322, 193 | KEY_KP_3 = 323, 194 | KEY_KP_4 = 324, 195 | KEY_KP_5 = 325, 196 | KEY_KP_6 = 326, 197 | KEY_KP_7 = 327, 198 | KEY_KP_8 = 328, 199 | KEY_KP_9 = 329, 200 | KEY_KP_DECIMAL = 330, 201 | KEY_KP_DIVIDE = 331, 202 | KEY_KP_MULTIPLY = 332, 203 | KEY_KP_SUBTRACT = 333, 204 | KEY_KP_ADD = 334, 205 | KEY_KP_ENTER = 335, 206 | KEY_KP_EQUAL = 336, 207 | KEY_LEFT_SHIFT = 340, 208 | KEY_LEFT_CONTROL = 341, 209 | KEY_LEFT_ALT = 342, 210 | KEY_LEFT_SUPER = 343, 211 | KEY_RIGHT_SHIFT = 344, 212 | KEY_RIGHT_CONTROL = 345, 213 | KEY_RIGHT_ALT = 346, 214 | KEY_RIGHT_SUPER = 347, 215 | KEY_MENU = 348, 216 | KEY_LAST = KEY_MENU, 217 | }; 218 | enum KeyModifiers { 219 | KMOD_SHIFT = 0x0001, 220 | KMOD_CONTROL = 0x0002, 221 | KMOD_ALT = 0x0004, 222 | KMOD_SUPER = 0x0008, 223 | }; 224 | typedef struct WINinternal* WINhandle; 225 | typedef void(*NVPproc)(void); 226 | 227 | // OpenGL specific 228 | struct ContextFlags { 229 | int major; 230 | int minor; 231 | int MSAA; 232 | int depth; 233 | int stencil; 234 | bool debug; 235 | bool robust; 236 | bool core; 237 | bool forward; 238 | bool stereo; 239 | NVPWindow* share; 240 | 241 | ContextFlags(int _major = 3, int _minor = 0, bool _core = false, int _MSAA = 0, int _depth = 24, int _stencil = 8, bool _debug = false, bool _robust = false, bool _forward = false, bool _stereo = false, NVPWindow* _share = 0) 242 | { 243 | major = _major; 244 | minor = _minor; 245 | MSAA = _MSAA; 246 | depth = _depth; 247 | stencil = _stencil; 248 | core = _core; 249 | debug = _debug; 250 | robust = _robust; 251 | forward = _forward; 252 | stereo = _stereo; 253 | share = _share; 254 | } 255 | 256 | }; 257 | unsigned int m_debugFilter; 258 | std::string m_debugTitle; 259 | 260 | WINhandle m_internal; 261 | 262 | int m_renderCnt; 263 | int m_curX, m_curY; 264 | int m_wheel; 265 | int m_winSz[4]; 266 | int m_mods; 267 | ContextFlags m_cflags; 268 | bool m_doSwap; 269 | bool m_active; 270 | bool m_vsync; 271 | bool m_keyPressed[KEY_LAST + 1]; 272 | bool m_keyToggled[KEY_LAST + 1]; 273 | bool m_fullscreen; 274 | int m_display_frame; 275 | int m_golden_frame; 276 | int m_screenquad_prog; 277 | int m_screenquad_vshader; 278 | int m_screenquad_fshader; 279 | int m_screenquad_vbo[3]; 280 | int m_screenquad_utex1; 281 | int m_screenquad_utex2; 282 | int m_screenquad_utexflags; 283 | int m_screenquad_ucoords; 284 | int m_screenquad_uscreen; 285 | 286 | NVPWindow() 287 | : m_renderCnt(1) 288 | , m_internal(0) 289 | , m_debugFilter(0) 290 | { 291 | m_curX = -1; 292 | m_curY = -1; 293 | m_mods = 0; 294 | m_fullscreen = false; 295 | memset(m_keyPressed, 0, sizeof(m_keyPressed)); 296 | memset(m_keyToggled, 0, sizeof(m_keyToggled)); 297 | } 298 | bool isPressed(int key) { return m_keyPressed[key]; } 299 | bool onPress(int key) { return m_keyPressed[key] && m_keyToggled[key]; } 300 | 301 | // Accessors 302 | inline void setWinSz(int w, int h) { m_winSz[0] = w; m_winSz[1] = h; } 303 | inline const int* getWinSz() const { return m_winSz; } 304 | inline int getWidth() const { return m_winSz[0]; } 305 | inline int getHeight() const { return m_winSz[1]; } 306 | inline const int getWheel() const { return m_wheel; } 307 | inline int getMods() const { return m_mods; } 308 | inline void setMods(int m) { m_mods = m; } 309 | inline void setCurMouse(int x, int y) { m_curX = x; m_curY = y; } 310 | inline int getCurX() { return m_curX; } 311 | inline int getCurY() { return m_curY; } 312 | inline bool isFirstFrame() { return m_display_frame == 0; } 313 | inline int getDisplayFrame() { return m_display_frame; } 314 | 315 | // activate and deactivate are not thread-safe, need to be wrapped in mutex if called from multiple threads 316 | // invisible windows will not have any active callbacks, nor will they be affected by sysEvents 317 | bool activate(int width, int height, const char* title, const ContextFlags* flags, int invisible = 0); 318 | void deactivate(); 319 | 320 | // compatibility hack 321 | bool create(const char* title = NULL, const ContextFlags* cflags = 0, int width = 1024, int height = 768); 322 | void setTitle(const char* title); 323 | void maximize(); 324 | void resize_window(int w, int h); 325 | void restore(); 326 | void minimize(); 327 | void postRedisplay(int n = 1) { m_renderCnt = n; } 328 | void postQuit(); 329 | void makeContextCurrent(); 330 | void makeContextNonCurrent(); 331 | void swapBuffers(); 332 | void swapInterval(int i); 333 | bool isOpen(); 334 | void vsync(bool state); 335 | void setKeyPress(int key, bool state); 336 | void setFullscreen(bool fullscreen); 337 | void save_frame(char* fname); 338 | 339 | // from NVPWindow 340 | virtual bool init() { return true; } 341 | virtual void shutdown() {} 342 | virtual void reshape(int w, int h) { } 343 | virtual void motion(int x, int y, int dx, int dy) {} 344 | virtual void mousewheel(int delta) {} 345 | virtual void on_arg(std::string arg, std::string val) {} 346 | virtual void mouse(MouseButton button, ButtonAction action, int mods, int x, int y) {} 347 | virtual void keyboard(KeyCode key, ButtonAction action, int mods, int x, int y) {} 348 | virtual void keyboardchar(unsigned char key, int mods, int x, int y) {} 349 | virtual void display() {} 350 | virtual bool begin() { return true; } 351 | virtual void end() {} 352 | 353 | // from WindowProfiler 354 | int run(const std::string &name, const std::string& shortname, int argc, const char** argv, int width, int height, int Major, int Minor, int GoldenFrame = 0); 355 | void initGL(); 356 | void initScreenQuadGL(); 357 | void clearScreenGL(); 358 | void createScreenQuadGL(int* glid, int w, int h); 359 | void renderScreenQuadGL(int glid, char inv1 = 0); 360 | void compositeScreenQuadGL(int glid1, int glid2, char inv1 = 0, char inv2 = 0); 361 | void renderScreenQuadGL(int glid1, int glid2, float x1, float y1, float x2, float y2, char inv1 = 0, char inv2 = 0); 362 | 363 | ////////////////////////////////////////////////////////////////////////// 364 | // system related 365 | static void sysInit(); 366 | static void sysDeinit(); 367 | static bool sysPollEvents(bool bLoop); 368 | static void sysWaitEvents(); 369 | static NVPproc sysGetProcAddress(const char* name); 370 | static int sysExtensionSupported(const char* name); 371 | static double sysGetTime(); // in seconds 372 | static void sysSleep(double seconds); 373 | static void sysVisibleConsole(); 374 | static std::string sysExePath(); 375 | }; 376 | 377 | extern int sample_main(int argc, const char**argv); 378 | 379 | // sample-specific implementation, called by nvprintfLevel. For example to redirect the message to a specific window or part of the viewport 380 | extern void sample_print(int level, const char * fmt2); 381 | 382 | extern void checkGL(char* msg); 383 | 384 | // sample-specific implementation, called by nvprintf*. For example to redirect the message to a specific window or part of the viewport 385 | extern void sample_print(int level, const char * fmt); 386 | 387 | void nvprintf(const char * fmt, ...); 388 | void nvprintfLevel(int level, const char * fmt, ...); 389 | void nvprintSetLevel(int l); 390 | int nvprintGetLevel(); 391 | void nvprintSetLogging(bool b); 392 | void nverror(); 393 | 394 | bool getFileLocation(char* filename, char* outpath); 395 | bool getFileLocation(char* filename, char* outpath, std::vector paths); 396 | 397 | #endif 398 | -------------------------------------------------------------------------------- /sph_fastest/Hybrid_Fluid_Simulation/json/include/json/reader.h: -------------------------------------------------------------------------------- 1 | // Copyright 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 2 | // Distributed under MIT license, or public domain if desired and 3 | // recognized in your jurisdiction. 4 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 | 6 | #ifndef CPPTL_JSON_READER_H_INCLUDED 7 | #define CPPTL_JSON_READER_H_INCLUDED 8 | 9 | #if !defined(JSON_IS_AMALGAMATION) 10 | #include "features.h" 11 | #include "value.h" 12 | #endif // if !defined(JSON_IS_AMALGAMATION) 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | // Disable warning C4251: : needs to have dll-interface to 20 | // be used by... 21 | #if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) 22 | #pragma warning(push) 23 | #pragma warning(disable : 4251) 24 | #endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) 25 | 26 | #pragma pack(push, 8) 27 | 28 | namespace Json { 29 | 30 | /** \brief Unserialize a JSON document into a 31 | *Value. 32 | * 33 | * \deprecated Use CharReader and CharReaderBuilder. 34 | */ 35 | class JSON_API Reader { 36 | public: 37 | typedef char Char; 38 | typedef const Char* Location; 39 | 40 | /** \brief An error tagged with where in the JSON text it was encountered. 41 | * 42 | * The offsets give the [start, limit) range of bytes within the text. Note 43 | * that this is bytes, not codepoints. 44 | * 45 | */ 46 | struct StructuredError { 47 | ptrdiff_t offset_start; 48 | ptrdiff_t offset_limit; 49 | String message; 50 | }; 51 | 52 | /** \brief Constructs a Reader allowing all features 53 | * for parsing. 54 | */ 55 | JSONCPP_DEPRECATED("Use CharReader and CharReaderBuilder instead") 56 | Reader(); 57 | 58 | /** \brief Constructs a Reader allowing the specified feature set 59 | * for parsing. 60 | */ 61 | JSONCPP_DEPRECATED("Use CharReader and CharReaderBuilder instead") 62 | Reader(const Features& features); 63 | 64 | /** \brief Read a Value from a JSON 65 | * document. 66 | * \param document UTF-8 encoded string containing the document to read. 67 | * \param root [out] Contains the root value of the document if it was 68 | * successfully parsed. 69 | * \param collectComments \c true to collect comment and allow writing them 70 | * back during 71 | * serialization, \c false to discard comments. 72 | * This parameter is ignored if 73 | * Features::allowComments_ 74 | * is \c false. 75 | * \return \c true if the document was successfully parsed, \c false if an 76 | * error occurred. 77 | */ 78 | bool 79 | parse(const std::string& document, Value& root, bool collectComments = true); 80 | 81 | /** \brief Read a Value from a JSON 82 | document. 83 | * \param beginDoc Pointer on the beginning of the UTF-8 encoded string of the 84 | document to read. 85 | * \param endDoc Pointer on the end of the UTF-8 encoded string of the 86 | document to read. 87 | * Must be >= beginDoc. 88 | * \param root [out] Contains the root value of the document if it was 89 | * successfully parsed. 90 | * \param collectComments \c true to collect comment and allow writing them 91 | back during 92 | * serialization, \c false to discard comments. 93 | * This parameter is ignored if 94 | Features::allowComments_ 95 | * is \c false. 96 | * \return \c true if the document was successfully parsed, \c false if an 97 | error occurred. 98 | */ 99 | bool parse(const char* beginDoc, 100 | const char* endDoc, 101 | Value& root, 102 | bool collectComments = true); 103 | 104 | /// \brief Parse from input stream. 105 | /// \see Json::operator>>(std::istream&, Json::Value&). 106 | bool parse(IStream& is, Value& root, bool collectComments = true); 107 | 108 | /** \brief Returns a user friendly string that list errors in the parsed 109 | * document. 110 | * \return Formatted error message with the list of errors with their location 111 | * in 112 | * the parsed document. An empty string is returned if no error 113 | * occurred 114 | * during parsing. 115 | * \deprecated Use getFormattedErrorMessages() instead (typo fix). 116 | */ 117 | JSONCPP_DEPRECATED("Use getFormattedErrorMessages() instead.") 118 | String getFormatedErrorMessages() const; 119 | 120 | /** \brief Returns a user friendly string that list errors in the parsed 121 | * document. 122 | * \return Formatted error message with the list of errors with their location 123 | * in 124 | * the parsed document. An empty string is returned if no error 125 | * occurred 126 | * during parsing. 127 | */ 128 | String getFormattedErrorMessages() const; 129 | 130 | /** \brief Returns a vector of structured erros encounted while parsing. 131 | * \return A (possibly empty) vector of StructuredError objects. Currently 132 | * only one error can be returned, but the caller should tolerate 133 | * multiple 134 | * errors. This can occur if the parser recovers from a non-fatal 135 | * parse error and then encounters additional errors. 136 | */ 137 | std::vector getStructuredErrors() const; 138 | 139 | /** \brief Add a semantic error message. 140 | * \param value JSON Value location associated with the error 141 | * \param message The error message. 142 | * \return \c true if the error was successfully added, \c false if the 143 | * Value offset exceeds the document size. 144 | */ 145 | bool pushError(const Value& value, const String& message); 146 | 147 | /** \brief Add a semantic error message with extra context. 148 | * \param value JSON Value location associated with the error 149 | * \param message The error message. 150 | * \param extra Additional JSON Value location to contextualize the error 151 | * \return \c true if the error was successfully added, \c false if either 152 | * Value offset exceeds the document size. 153 | */ 154 | bool pushError(const Value& value, const String& message, const Value& extra); 155 | 156 | /** \brief Return whether there are any errors. 157 | * \return \c true if there are no errors to report \c false if 158 | * errors have occurred. 159 | */ 160 | bool good() const; 161 | 162 | private: 163 | enum TokenType { 164 | tokenEndOfStream = 0, 165 | tokenObjectBegin, 166 | tokenObjectEnd, 167 | tokenArrayBegin, 168 | tokenArrayEnd, 169 | tokenString, 170 | tokenNumber, 171 | tokenTrue, 172 | tokenFalse, 173 | tokenNull, 174 | tokenArraySeparator, 175 | tokenMemberSeparator, 176 | tokenComment, 177 | tokenError 178 | }; 179 | 180 | class Token { 181 | public: 182 | TokenType type_; 183 | Location start_; 184 | Location end_; 185 | }; 186 | 187 | class ErrorInfo { 188 | public: 189 | Token token_; 190 | String message_; 191 | Location extra_; 192 | }; 193 | 194 | typedef std::deque Errors; 195 | 196 | bool readToken(Token& token); 197 | void skipSpaces(); 198 | bool match(Location pattern, int patternLength); 199 | bool readComment(); 200 | bool readCStyleComment(); 201 | bool readCppStyleComment(); 202 | bool readString(); 203 | void readNumber(); 204 | bool readValue(); 205 | bool readObject(Token& token); 206 | bool readArray(Token& token); 207 | bool decodeNumber(Token& token); 208 | bool decodeNumber(Token& token, Value& decoded); 209 | bool decodeString(Token& token); 210 | bool decodeString(Token& token, String& decoded); 211 | bool decodeDouble(Token& token); 212 | bool decodeDouble(Token& token, Value& decoded); 213 | bool decodeUnicodeCodePoint(Token& token, 214 | Location& current, 215 | Location end, 216 | unsigned int& unicode); 217 | bool decodeUnicodeEscapeSequence(Token& token, 218 | Location& current, 219 | Location end, 220 | unsigned int& unicode); 221 | bool addError(const String& message, Token& token, Location extra = nullptr); 222 | bool recoverFromError(TokenType skipUntilToken); 223 | bool addErrorAndRecover(const String& message, 224 | Token& token, 225 | TokenType skipUntilToken); 226 | void skipUntilSpace(); 227 | Value& currentValue(); 228 | Char getNextChar(); 229 | void 230 | getLocationLineAndColumn(Location location, int& line, int& column) const; 231 | String getLocationLineAndColumn(Location location) const; 232 | void addComment(Location begin, Location end, CommentPlacement placement); 233 | void skipCommentTokens(Token& token); 234 | 235 | static bool containsNewLine(Location begin, Location end); 236 | static String normalizeEOL(Location begin, Location end); 237 | 238 | typedef std::stack Nodes; 239 | Nodes nodes_; 240 | Errors errors_; 241 | String document_; 242 | Location begin_{}; 243 | Location end_{}; 244 | Location current_{}; 245 | Location lastValueEnd_{}; 246 | Value* lastValue_{}; 247 | String commentsBefore_; 248 | Features features_; 249 | bool collectComments_{}; 250 | }; // Reader 251 | 252 | /** Interface for reading JSON from a char array. 253 | */ 254 | class JSON_API CharReader { 255 | public: 256 | virtual ~CharReader() = default; 257 | /** \brief Read a Value from a JSON 258 | document. 259 | * The document must be a UTF-8 encoded string containing the document to 260 | read. 261 | * 262 | * \param beginDoc Pointer on the beginning of the UTF-8 encoded string of the 263 | document to read. 264 | * \param endDoc Pointer on the end of the UTF-8 encoded string of the 265 | document to read. 266 | * Must be >= beginDoc. 267 | * \param root [out] Contains the root value of the document if it was 268 | * successfully parsed. 269 | * \param errs [out] Formatted error messages (if not NULL) 270 | * a user friendly string that lists errors in the parsed 271 | * document. 272 | * \return \c true if the document was successfully parsed, \c false if an 273 | error occurred. 274 | */ 275 | virtual bool parse(char const* beginDoc, 276 | char const* endDoc, 277 | Value* root, 278 | String* errs) = 0; 279 | 280 | class JSON_API Factory { 281 | public: 282 | virtual ~Factory() = default; 283 | /** \brief Allocate a CharReader via operator new(). 284 | * \throw std::exception if something goes wrong (e.g. invalid settings) 285 | */ 286 | virtual CharReader* newCharReader() const = 0; 287 | }; // Factory 288 | }; // CharReader 289 | 290 | /** \brief Build a CharReader implementation. 291 | 292 | Usage: 293 | \code 294 | using namespace Json; 295 | CharReaderBuilder builder; 296 | builder["collectComments"] = false; 297 | Value value; 298 | String errs; 299 | bool ok = parseFromStream(builder, std::cin, &value, &errs); 300 | \endcode 301 | */ 302 | class JSON_API CharReaderBuilder : public CharReader::Factory { 303 | public: 304 | // Note: We use a Json::Value so that we can add data-members to this class 305 | // without a major version bump. 306 | /** Configuration of this builder. 307 | These are case-sensitive. 308 | Available settings (case-sensitive): 309 | - `"collectComments": false or true` 310 | - true to collect comment and allow writing them 311 | back during serialization, false to discard comments. 312 | This parameter is ignored if allowComments is false. 313 | - `"allowComments": false or true` 314 | - true if comments are allowed. 315 | - `"strictRoot": false or true` 316 | - true if root must be either an array or an object value 317 | - `"allowDroppedNullPlaceholders": false or true` 318 | - true if dropped null placeholders are allowed. (See 319 | StreamWriterBuilder.) 320 | - `"allowNumericKeys": false or true` 321 | - true if numeric object keys are allowed. 322 | - `"allowSingleQuotes": false or true` 323 | - true if '' are allowed for strings (both keys and values) 324 | - `"stackLimit": integer` 325 | - Exceeding stackLimit (recursive depth of `readValue()`) will 326 | cause an exception. 327 | - This is a security issue (seg-faults caused by deeply nested JSON), 328 | so the default is low. 329 | - `"failIfExtra": false or true` 330 | - If true, `parse()` returns false when extra non-whitespace trails 331 | the JSON value in the input string. 332 | - `"rejectDupKeys": false or true` 333 | - If true, `parse()` returns false when a key is duplicated within an 334 | object. 335 | - `"allowSpecialFloats": false or true` 336 | - If true, special float values (NaNs and infinities) are allowed 337 | and their values are lossfree restorable. 338 | 339 | You can examine 'settings_` yourself 340 | to see the defaults. You can also write and read them just like any 341 | JSON Value. 342 | \sa setDefaults() 343 | */ 344 | Json::Value settings_; 345 | 346 | CharReaderBuilder(); 347 | ~CharReaderBuilder() override; 348 | 349 | CharReader* newCharReader() const override; 350 | 351 | /** \return true if 'settings' are legal and consistent; 352 | * otherwise, indicate bad settings via 'invalid'. 353 | */ 354 | bool validate(Json::Value* invalid) const; 355 | 356 | /** A simple way to update a specific setting. 357 | */ 358 | Value& operator[](const String& key); 359 | 360 | /** Called by ctor, but you can use this to reset settings_. 361 | * \pre 'settings' != NULL (but Json::null is fine) 362 | * \remark Defaults: 363 | * \snippet src/lib_json/json_reader.cpp CharReaderBuilderDefaults 364 | */ 365 | static void setDefaults(Json::Value* settings); 366 | /** Same as old Features::strictMode(). 367 | * \pre 'settings' != NULL (but Json::null is fine) 368 | * \remark Defaults: 369 | * \snippet src/lib_json/json_reader.cpp CharReaderBuilderStrictMode 370 | */ 371 | static void strictMode(Json::Value* settings); 372 | }; 373 | 374 | /** Consume entire stream and use its begin/end. 375 | * Someday we might have a real StreamReader, but for now this 376 | * is convenient. 377 | */ 378 | bool JSON_API parseFromStream(CharReader::Factory const&, 379 | IStream&, 380 | Value* root, 381 | String* errs); 382 | 383 | /** \brief Read from 'sin' into 'root'. 384 | 385 | Always keep comments from the input JSON. 386 | 387 | This can be used to read a file into a particular sub-object. 388 | For example: 389 | \code 390 | Json::Value root; 391 | cin >> root["dir"]["file"]; 392 | cout << root; 393 | \endcode 394 | Result: 395 | \verbatim 396 | { 397 | "dir": { 398 | "file": { 399 | // The input stream JSON would be nested here. 400 | } 401 | } 402 | } 403 | \endverbatim 404 | \throw std::exception on parse error. 405 | \see Json::operator<<() 406 | */ 407 | JSON_API IStream& operator>>(IStream&, Value&); 408 | 409 | } // namespace Json 410 | 411 | #pragma pack(pop) 412 | 413 | #if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) 414 | #pragma warning(pop) 415 | #endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) 416 | 417 | #endif // CPPTL_JSON_READER_H_INCLUDED 418 | --------------------------------------------------------------------------------