├── .gitignore ├── LICENSE-2.0.txt ├── README.md ├── Titan_jobScripts ├── analyse.sh ├── extract.sh ├── readme ├── runMW-A.py ├── runMWStrong.py ├── runPLStrong.py └── runPlummerWeak.py ├── images ├── deepfield1.ppm ├── deepfield1_1k.ppm ├── deepfield2.ppm ├── deepfield2_1k.ppm ├── deepfield3.ppm ├── deepfield3_1k.ppm ├── deepfield4.ppm ├── deepfield4_1k.ppm ├── deepfield5.ppm ├── deepfield5_1k.ppm ├── deepfield6.ppm └── deepfield6_1k.ppm ├── inputExamples ├── component_numbers.txt ├── galactics_mw_df │ ├── cordbh.dat.gz │ ├── dbh.dat.gz │ ├── denspsibulge.dat.gz │ ├── denspsihalo.dat.gz │ ├── freqdbh.dat.gz │ └── mr.dat.gz └── model3_child_compact.tipsy ├── runtime ├── CMakeLists.txt ├── CUDAkernels │ ├── build_tree.cu │ ├── compute_propertiesD.cu │ ├── depthSort.cu │ ├── dev_approximate_gravity.cu │ ├── dev_approximate_gravity_fermi.cu │ ├── dev_approximate_gravity_kepler.cu │ ├── dev_approximate_gravity_let.cu │ ├── dev_approximate_gravity_warp.cu │ ├── dev_approximate_gravity_warp_fermi.cu │ ├── dev_approximate_gravity_warp_new.cu │ ├── dev_direct_gravity.cu │ ├── parallel.cu │ ├── scanKernels.cu │ ├── sortKernels.cu │ ├── support_kernels.cu │ └── timestep.cu ├── Makefile_ogl ├── README.md ├── component_numbers.txt ├── include │ ├── BonsaiIO.h │ ├── BonsaiSharedData.h │ ├── FileIO.h │ ├── GL │ │ ├── freeglut.h │ │ ├── freeglut_ext.h │ │ ├── freeglut_std.h │ │ ├── glew.h │ │ ├── glut.h │ │ ├── glxew.h │ │ └── wglew.h │ ├── GpuArray.h │ ├── ICGenerators.h │ ├── IDType.h │ ├── MPIComm.h │ ├── SharedMemory.h │ ├── anyoption.h │ ├── b40c │ │ ├── radix_sort │ │ │ ├── downsweep │ │ │ │ ├── 6bit_prmt │ │ │ │ │ ├── cta.cuh │ │ │ │ │ ├── kernel_policy.cuh │ │ │ │ │ └── tile.cuh │ │ │ │ ├── cta.cuh │ │ │ │ ├── kernel.cuh │ │ │ │ ├── kernel_policy.cuh │ │ │ │ └── tex_ref.cuh │ │ │ ├── enactor.cuh │ │ │ ├── policy.cuh │ │ │ ├── sort_utils.cuh │ │ │ ├── spine │ │ │ │ ├── cta.cuh │ │ │ │ ├── kernel.cuh │ │ │ │ ├── kernel_policy.cuh │ │ │ │ └── tex_ref.cuh │ │ │ └── upsweep │ │ │ │ ├── cta.cuh │ │ │ │ ├── kernel.cuh │ │ │ │ ├── kernel_policy.cuh │ │ │ │ └── tile.cuh │ │ └── util │ │ │ ├── arch_dispatch.cuh │ │ │ ├── basic_utils.cuh │ │ │ ├── cta_work_distribution.cuh │ │ │ ├── cta_work_progress.cuh │ │ │ ├── cuda_properties.cuh │ │ │ ├── device_intrinsics.cuh │ │ │ ├── enactor_base.cuh │ │ │ ├── error_utils.cuh │ │ │ ├── global_barrier.cuh │ │ │ ├── io │ │ │ ├── gather_tile.cuh │ │ │ ├── initialize_tile.cuh │ │ │ ├── load_tile.cuh │ │ │ ├── load_tile_discontinuity.cuh │ │ │ ├── modified_load.cuh │ │ │ ├── modified_store.cuh │ │ │ ├── scatter_tile.cuh │ │ │ ├── store_tile.cuh │ │ │ └── two_phase_scatter_tile.cuh │ │ │ ├── kernel_props.cuh │ │ │ ├── kernel_runtime_stats.cuh │ │ │ ├── memset_kernel.cuh │ │ │ ├── multi_buffer.cuh │ │ │ ├── numeric_traits.cuh │ │ │ ├── operators.cuh │ │ │ ├── parameter_generation.cuh │ │ │ ├── ping_pong_storage.cuh │ │ │ ├── raking_grid.cuh │ │ │ ├── random_bits.cuh │ │ │ ├── reduction │ │ │ ├── cooperative_reduction.cuh │ │ │ ├── serial_reduce.cuh │ │ │ ├── soa │ │ │ │ ├── cooperative_soa_reduction.cuh │ │ │ │ ├── serial_soa_reduce.cuh │ │ │ │ └── warp_soa_reduce.cuh │ │ │ ├── tree_reduce.cuh │ │ │ └── warp_reduce.cuh │ │ │ ├── scan │ │ │ ├── cooperative_scan.cuh │ │ │ ├── serial_scan.cuh │ │ │ ├── soa │ │ │ │ ├── cooperative_soa_scan.cuh │ │ │ │ ├── serial_soa_scan.cuh │ │ │ │ └── warp_soa_scan.cuh │ │ │ └── warp_scan.cuh │ │ │ ├── soa_tuple.cuh │ │ │ ├── spine.cuh │ │ │ ├── srts_details.cuh │ │ │ ├── srts_grid.cuh │ │ │ ├── srts_soa_details.cuh │ │ │ ├── tex_vector.cuh │ │ │ └── vector_types.cuh │ ├── bonsai.h │ ├── build.h │ ├── dd2d.h │ ├── depthSort.h │ ├── devFunctionDefinitions.h │ ├── galactics.h │ ├── hostTreeBuild.h │ ├── log.h │ ├── logFileWriter.h │ ├── my_cuda.h │ ├── my_cuda_rt.h │ ├── my_ocl.h │ ├── node_specs.h │ ├── octree.h │ ├── plummer.h │ ├── postProcessModules.h │ ├── radix.h │ ├── render_particles.h │ ├── renderloop.h │ ├── tipsyIO.h │ ├── tipsydefs.h │ ├── tr.h │ ├── vector3.h │ └── vector_math.h ├── ioscript.sh ├── lib │ ├── linux │ │ ├── libGLEW.a │ │ └── libGLEW_x86_64.a │ ├── linux_fpic_glew │ │ ├── libGLEW.a │ │ └── libGLEW_x86_64.a │ ├── win32 │ │ ├── freeglut.dll │ │ ├── freeglut.lib │ │ ├── glew32.dll │ │ └── glew32.lib │ └── win64 │ │ ├── freeglut.dll │ │ ├── freeglut.lib │ │ ├── glew64.dll │ │ └── glew64.lib ├── paramsDDASYNC.txt ├── paramsMW.txt ├── paramsNew.txt ├── params_movie4k.txt ├── profileCommand ├── profiler.conf ├── profiling │ ├── bonsai_timing.h │ ├── cuxTimer.cu │ ├── cuxTimer.h │ ├── cuxTimer_host.cu │ ├── cuxTimer_private.h │ ├── cuxTimer_readme.txt │ ├── derived_atomic_functions.h │ └── warp_functions.h ├── renderer │ ├── Cubemap.cpp │ ├── Cubemap.h │ ├── GLSLProgram.cpp │ ├── GLSLProgram.h │ ├── SmokeRenderer.cpp │ ├── SmokeRenderer.h │ ├── SmokeShaders.cpp │ ├── SmokeShaders.h │ ├── framebufferObject.cpp │ ├── framebufferObject.h │ ├── loadPPM.cpp │ ├── loadPPM.h │ ├── nvMath.h │ ├── nvMatrix.h │ ├── nvQuaternion.h │ ├── nvVector.h │ ├── param.cpp │ ├── param.h │ ├── paramgl.cpp │ ├── paramgl.h │ └── timer.h ├── src │ ├── anyoption.cpp │ ├── bonsai_clrshm.cpp │ ├── bonsai_io.cpp │ ├── build.cpp │ ├── compute_properties.cpp │ ├── driver.cpp │ ├── gpu_iterate.cpp │ ├── hostConstruction.cpp │ ├── initorbit_standalone.cpp │ ├── libraryInterface.cpp │ ├── load_kernels.cpp │ ├── log.cpp │ ├── main.cpp │ ├── octree.cpp │ ├── parallel.cpp │ ├── render_particles.cpp │ ├── renderloop.cpp │ ├── sort_bodies_gpu.cpp │ ├── tipsyIO.cpp │ └── tr.c └── vizscript.sh └── tools ├── CMakeLists.txt ├── IO ├── BonsaiIO.cpp ├── BonsaiIO.h ├── IDType.h ├── Makefile ├── benchmark.cpp ├── cvt_amuseASCII2bonsai.cpp ├── cvt_bonsai2amuseASCII.cpp ├── cvt_bonsai2dumbp.cpp ├── cvt_tipsy2bonsai.cpp ├── cvt_tipsy2bonsaiExtended.cpp ├── cvt_tipsy_gtc12_2bonsai.cpp ├── main.cpp ├── readBonsai.cpp ├── readBonsaiExtended.cpp ├── read_tipsy.h └── tipsydefs.h ├── add_dust ├── DustRing.h ├── IC_MWM31 ├── Makefile ├── add_dust.cpp ├── anyoption.cpp ├── anyoption.h ├── initM31_MW.cpp ├── initorbit.cpp ├── initorbit_standalone_dust.cpp ├── kepler.h ├── tipsydefs.h └── vector3.h ├── bonsaiRenderer ├── BonsaiIO.h ├── BonsaiSharedData.h ├── CameraPath.h ├── IDType.h ├── Makefile ├── Makefile.in ├── Makefile_daint ├── RendererData.cpp ├── RendererData.h ├── RendererDataDistribute.cpp ├── SharedMemory.h ├── anyoption.cpp ├── anyoption.h ├── camera.orbit.rotations.txt ├── colorMap ├── color_map.bmp ├── cvtBmp2Ascii.cpp ├── density │ ├── Cubemap.cpp │ ├── Cubemap.h │ ├── GLSLProgram.cpp │ ├── GLSLProgram.h │ ├── GpuArray.h │ ├── SmokeShaders.cpp │ ├── SmokeShaders.h │ ├── depthSort.cpp │ ├── depthSort.h │ ├── framebufferObject.cpp │ ├── framebufferObject.h │ ├── loadPPM.cpp │ ├── loadPPM.h │ ├── nvMath.h │ ├── nvMatrix.h │ ├── nvQuaternion.h │ ├── nvVector.h │ ├── renderer.cpp │ ├── renderer.h │ ├── renderer_with_test.cpp │ ├── renderloop.cpp │ ├── renderloop.h │ ├── renderloop4k.cpp │ ├── timer.h │ └── tr.h ├── main.cpp ├── param.cpp ├── param.h ├── paramgl.cpp ├── paramgl.h ├── params.txt ├── particles │ ├── renderer.cpp │ ├── renderer.h │ ├── renderloop.cpp │ └── renderloop.h ├── project.c ├── smoke │ ├── Cubemap.cpp │ ├── Cubemap.h │ ├── GLSLProgram.cpp │ ├── GLSLProgram.h │ ├── GpuArray.h │ ├── SmokeShaders.cpp │ ├── SmokeShaders.h │ ├── depthSort.cpp │ ├── depthSort.cu │ ├── depthSort.h │ ├── framebufferObject.cpp │ ├── framebufferObject.h │ ├── loadPPM.cpp │ ├── loadPPM.h │ ├── nvMath.h │ ├── nvMatrix.h │ ├── nvQuaternion.h │ ├── nvVector.h │ ├── renderer.cpp │ ├── renderer.h │ ├── renderloop.cpp │ ├── renderloop.h │ ├── timer.h │ └── tr.h ├── splotch │ ├── Blending.h │ ├── GLSLProgram.cpp │ ├── GLSLProgram.h │ ├── MathArray.h │ ├── Splotch.h │ ├── Texture.h │ ├── Vertex.h │ ├── renderer.cpp │ ├── renderer.h │ ├── renderloop.cpp │ ├── renderloop.h │ └── splotch.cpp ├── tr.c └── vector_math.h ├── catalyst ├── BonsaiCatalystData.cpp ├── BonsaiCatalystData.h ├── CMakeLists.txt ├── Makefile ├── RendererData.cpp ├── RendererData.h ├── main.cpp ├── vtkBonsaiPipeline.cpp └── vtkBonsaiPipeline.h ├── common ├── BonsaiIO.h ├── BonsaiSharedData.h ├── IDType.h ├── SharedMemory.h ├── anyoption.cpp └── anyoption.h ├── cvt2grid ├── BonsaiIO.h ├── IDType.h ├── Makefile ├── Node.h ├── Particle.h ├── Tree.h ├── boundary.h ├── cvt2grid.cpp ├── cvt2gridTree.cpp ├── densCalc.cpp ├── key_table ├── morton_key.h ├── vector3.h └── wtime.h ├── density_estimator ├── Makefile ├── Node.h ├── Particle.h ├── README ├── boundary.h ├── density.cpp ├── density.h ├── img │ ├── snap000.png │ ├── snap050.png │ ├── snap100.png │ ├── snap150.png │ ├── snap200.png │ ├── snap250.png │ └── snap300.png ├── key_table ├── memalign_allocator.h ├── morton_key.h ├── ngb_fast.gz ├── ngb_slow.gz ├── plot2d.py ├── read_tipsy.cpp ├── read_tipsy.h ├── tipsydefs.h ├── vector3.h └── wtime.h ├── parallelIO ├── Makefile ├── bonsaiIO.h ├── sion_write_snapshot.h ├── tipsydefs.h ├── write_snapshot.h └── write_test.cpp ├── postProcessTools └── density │ ├── bmpVisualizer │ ├── Makefile │ ├── color_map.bmp │ ├── display.cpp │ ├── display.h │ ├── gen_image_voxel.cpp │ ├── voxel.cpp │ └── voxel.h │ ├── convDensBinToAscii.cpp │ ├── runDensityBMP.py │ └── runDensityGnuplotBinary.py ├── readSnap ├── Makefile ├── read_tipsy.cpp ├── read_tipsy.h └── tipsydefs.h ├── snapServe ├── CMakeLists.txt ├── filelist.example.txt └── main.cpp └── tarScripts ├── README ├── sortSnapshots.py └── tarSnapshots.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.cu_o 6 | 7 | # Compiled Dynamic libraries 8 | *.so 9 | 10 | # Compiled Static libraries 11 | *.lai 12 | *.la 13 | *.a 14 | .*.swp 15 | 16 | #Temporary editor files 17 | *~ 18 | -------------------------------------------------------------------------------- /Titan_jobScripts/analyse.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | filename=$1 3 | ninter=`cat $filename|grep direct|awk '{if ($8 >= 31) {dir+= $10; apprx += $12; print dir/1e12," ", apprx/1e12}}'|tail -n 1` 4 | nflops=`cat $filename|grep direct|awk '{if ($8 >= 31) {dir+= $10*23; apprx += $12*65; print dir/1e12," ", apprx/1e12}}'|tail -n 1` 5 | time=`cat $filename|grep TOTAL|tail -n 1|awk '{print "TOTAL=", $4, " GRAV=", $6, " GPU=", $8}'` 6 | pflop=`echo $nflops $time | awk '{print "Performance [TFlop/s]: GPU= ", ($1+$2)/$8, " GPU+LET=", ($1+$2)/$6, " Effective=", ($1+$2)/$4}'` 7 | 8 | echo $ninter 9 | echo $nflops 10 | echo $time 11 | echo $pflop 12 | 13 | -------------------------------------------------------------------------------- /Titan_jobScripts/extract.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | awk '{if (NR==1) {dir=$1; app= $2}; if (NR==3) {tot= $2; grav= $4; gpu= $6}; if (NR==4) {print $4, $6, $8, tot, grav, gpu, dir,app} }' 3 | -------------------------------------------------------------------------------- /Titan_jobScripts/readme: -------------------------------------------------------------------------------- 1 | These are job script generators that we used to run jobs on titan 2 | runMWStrong.py requires an input files that can be downloaded from 3 | http://castle.strw.leideuniv.nl/WPD09_test3_1B.tipsy [37GB] 4 | 5 | -------------------------------------------------------------------------------- /Titan_jobScripts/runMW-A.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | 5 | def makeJob(n): 6 | run = "runMW-A" 7 | path = os.getcwd() +"/"+run+"/"+str(n)+"/" 8 | print path 9 | os.system("mkdir -p "+path) 10 | fileOut = open(path+"jobScript", "w") 11 | fileOut.write("#!/bin/bash \n") 12 | fileOut.write("# Begin PBS directives \n") 13 | fileOut.write("#PBS -A ast032 \n") 14 | fileOut.write("#PBS -N run_" + run + "\n") 15 | fileOut.write("#PBS -j oe \n") 16 | fileOut.write("#PBS -l walltime=00:30:00,nodes="+str(n) +" \n") 17 | fileOut.write("#PBS -l gres=widow2%widow3 \n") 18 | fileOut.write("# End PBS directives and begin shell commands \n") 19 | 20 | fileOut.write("module load cmake\n") 21 | fileOut.write("module load vim/7.3\n") 22 | fileOut.write("module load cudatoolkit\n") 23 | fileOut.write("module load git\n") 24 | 25 | fileOut.write("module swap PrgEnv-pgi PrgEnv-gnu\n") 26 | fileOut.write("module module intel\n") 27 | fileOut.write("cd " + path +" \n") 28 | fileOut.write("export OMP_SET_NUM_THREADS=16\n") 29 | fileOut.write("date\n") 30 | fileOut.write("aprun -n${PBS_NUM_NODES} -d16 -cc 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -N 1 "); 31 | fileOut.write("/tmp/work/jbedorf/BonsaiEv/runtime/bonsai2-mw ") 32 | os.system("/bin/cp -f /tmp/work/jbedorf/BonsaiEv/runtime/galactics_mw_df/*.gz "+path) 33 | os.system("gzip -vfd "+path+"*.gz") 34 | fileOut.write(" --milkyway 16000000 --mwfork 16 -I 64 -T 2 -r 1 -o 0.4 -t 0.0078125 -e 0.005 --prepend-rank \n") 35 | # fileOut.write(" --milkyway 16000000 --mwfork 16 -I 64 -T 2 -r 1 -o 0.4 -t 0.00390625 -e 0.005 --prepend-rank \n") 36 | # fileOut.write(" --milkyway 16000000 --mwfork 16 -I 64 -T 2 -r 1 -o 0.4 -t 0.001953125 -e 0.005 --prepend-rank \n") 37 | # fileOut.write(" --milkyway 16000000 --mwfork 16 -I 64 -T 2 -r 1 -o 0.4 -t 0.0009765625 -e 0.005 --prepend-rank \n") 38 | 39 | fileOut.close() 40 | 41 | makeJob(1) 42 | makeJob(2) 43 | makeJob(4) 44 | makeJob(16) 45 | makeJob(64) 46 | makeJob(256) 47 | makeJob(1024) 48 | makeJob(2048) 49 | makeJob(4096) 50 | makeJob(8192) 51 | makeJob(16384) 52 | makeJob(18600) 53 | 54 | -------------------------------------------------------------------------------- /Titan_jobScripts/runMWStrong.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | 5 | def makeJob(n): 6 | run = "MW" 7 | path = os.getcwd() +"/"+run+"/"+str(n)+"/" 8 | print path 9 | os.system("mkdir -p "+path) 10 | fileOut = open(path+"jobScript", "w") 11 | fileOut.write("#!/bin/bash \n") 12 | fileOut.write("# Begin PBS directives \n") 13 | fileOut.write("#PBS -A ast032 \n") 14 | fileOut.write("#PBS -N run_" + run + "\n") 15 | fileOut.write("#PBS -j oe \n") 16 | fileOut.write("#PBS -l walltime=00:10:00,nodes="+str(n) +" \n") 17 | fileOut.write("#PBS -l gres=widow2%widow3 \n") 18 | fileOut.write("# End PBS directives and begin shell commands \n") 19 | 20 | fileOut.write("module load cmake\n") 21 | fileOut.write("module load vim/7.3\n") 22 | fileOut.write("module load cudatoolkit\n") 23 | fileOut.write("module load git\n") 24 | 25 | fileOut.write("module swap PrgEnv-pgi PrgEnv-gnu\n") 26 | fileOut.write("cd " + path +" \n") 27 | fileOut.write("export OMP_SET_NUM_THREADS=16\n") 28 | fileOut.write("date\n") 29 | fileOut.write("aprun -n${PBS_NUM_NODES} -d16 -cc 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -N 1 "); 30 | 31 | fileOut.write("/lustre/widow2/scratch/jbedorf/BonsaiGitStrong/runtime/bonsai2_slowdust_run "); 32 | fileOut.write("-i /tmp/work/jbedorf/1BTest/IC/WPD09_test3_1B.tipsy ") 33 | fileOut.write("-I 64 -T 2 -r 1 -o 0.4 -t 0.000001 -e 0.01 --prepend-rank\n") 34 | 35 | fileOut.close() 36 | 37 | makeJob(1) 38 | makeJob(2) 39 | makeJob(4) 40 | makeJob(16) 41 | makeJob(64) 42 | makeJob(128) 43 | makeJob(256) 44 | makeJob(512) 45 | makeJob(1024) 46 | makeJob(2048) 47 | makeJob(4096) 48 | makeJob(8192) 49 | makeJob(16384) 50 | makeJob(18600) 51 | 52 | -------------------------------------------------------------------------------- /Titan_jobScripts/runPLStrong.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | 5 | def makeJob(n): 6 | run = "PL" 7 | path = os.getcwd() +"/"+run+"/"+str(n)+"/" 8 | print path 9 | os.system("mkdir -p "+path) 10 | fileOut = open(path+"jobScript", "w") 11 | fileOut.write("#!/bin/bash \n") 12 | fileOut.write("# Begin PBS directives \n") 13 | fileOut.write("#PBS -A ast032 \n") 14 | fileOut.write("#PBS -N run_" + run + "\n") 15 | fileOut.write("#PBS -j oe \n") 16 | fileOut.write("#PBS -l walltime=00:10:00,nodes="+str(n) +" \n") 17 | fileOut.write("#PBS -l gres=widow2%widow3 \n") 18 | fileOut.write("# End PBS directives and begin shell commands \n") 19 | 20 | fileOut.write("module load cmake\n") 21 | fileOut.write("module load vim/7.3\n") 22 | fileOut.write("module load cudatoolkit\n") 23 | fileOut.write("module load git\n") 24 | 25 | fileOut.write("module swap PrgEnv-pgi PrgEnv-gnu\n") 26 | fileOut.write("cd " + path +" \n") 27 | fileOut.write("export OMP_SET_NUM_THREADS=16\n") 28 | fileOut.write("date\n") 29 | fileOut.write("aprun -n${PBS_NUM_NODES} -d16 -cc 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -N 1 "); 30 | 31 | fileOut.write("/lustre/widow2/scratch/jbedorf/BonsaiGitStrong/runtime/bonsai2_slowdust_run "); 32 | fileOut.write("-i /tmp/work/jbedorf/1BTest/IC/WPD09_test3_1B.tipsy ") 33 | nptcl = 16000000*64/n 34 | fileOut.write("-I 64 -T 2 -r 1 -o 0.4 -t 0.000001 -e 0.01 --prepend-rank --plummer "+str(nptcl)+"\n") 35 | 36 | fileOut.close() 37 | 38 | makeJob(1) 39 | makeJob(2) 40 | makeJob(4) 41 | makeJob(16) 42 | makeJob(64) 43 | makeJob(128) 44 | makeJob(256) 45 | makeJob(512) 46 | makeJob(1024) 47 | makeJob(2048) 48 | makeJob(4096) 49 | makeJob(8192) 50 | makeJob(16384) 51 | makeJob(18600) 52 | 53 | -------------------------------------------------------------------------------- /Titan_jobScripts/runPlummerWeak.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | 5 | def makeJob(n): 6 | run = "F" 7 | path = os.getcwd() +"/"+run+"/"+str(n)+"/" 8 | print path 9 | os.system("mkdir -p "+path) 10 | fileOut = open(path+"jobScript", "w") 11 | fileOut.write("#!/bin/bash \n") 12 | fileOut.write("# Begin PBS directives \n") 13 | fileOut.write("#PBS -A ast032 \n") 14 | fileOut.write("#PBS -N run_" + run + "\n") 15 | fileOut.write("#PBS -j oe \n") 16 | fileOut.write("#PBS -l walltime=00:30:00,nodes="+str(n) +" \n") 17 | fileOut.write("#PBS -l gres=widow2%widow3 \n") 18 | fileOut.write("# End PBS directives and begin shell commands \n") 19 | 20 | fileOut.write("module load cmake\n") 21 | fileOut.write("module load vim/7.3\n") 22 | fileOut.write("module load cudatoolkit\n") 23 | fileOut.write("module load git\n") 24 | 25 | fileOut.write("module swap PrgEnv-pgi PrgEnv-gnu\n") 26 | fileOut.write("cd " + path +" \n") 27 | fileOut.write("export OMP_SET_NUM_THREADS=16\n") 28 | fileOut.write("date\n") 29 | fileOut.write("aprun -n${PBS_NUM_NODES} -d16 -cc 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -N 1 "); 30 | fileOut.write("/tmp/work/jbedorf/BonsaiNew/runtime/bonsai2_jb_final ") 31 | fileOut.write("-i /tmp/work/jbedorf/1BTest/IC/WPD09_test3_1B.tipsy ") 32 | fileOut.write(" --plummer 16000000 -I 64 -T 2 -r 1 -o 0.4 -t 0.000001 -e 0.01 --prepend-rank \n") 33 | 34 | fileOut.close() 35 | 36 | makeJob(1) 37 | makeJob(2) 38 | makeJob(4) 39 | makeJob(16) 40 | makeJob(64) 41 | makeJob(256) 42 | makeJob(1024) 43 | makeJob(2048) 44 | makeJob(4096) 45 | makeJob(8192) 46 | makeJob(16384) 47 | makeJob(18600) 48 | 49 | -------------------------------------------------------------------------------- /images/deepfield1.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield1.ppm -------------------------------------------------------------------------------- /images/deepfield1_1k.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield1_1k.ppm -------------------------------------------------------------------------------- /images/deepfield2.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield2.ppm -------------------------------------------------------------------------------- /images/deepfield2_1k.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield2_1k.ppm -------------------------------------------------------------------------------- /images/deepfield3.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield3.ppm -------------------------------------------------------------------------------- /images/deepfield3_1k.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield3_1k.ppm -------------------------------------------------------------------------------- /images/deepfield4.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield4.ppm -------------------------------------------------------------------------------- /images/deepfield4_1k.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield4_1k.ppm -------------------------------------------------------------------------------- /images/deepfield5.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield5.ppm -------------------------------------------------------------------------------- /images/deepfield5_1k.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield5_1k.ppm -------------------------------------------------------------------------------- /images/deepfield6.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield6.ppm -------------------------------------------------------------------------------- /images/deepfield6_1k.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/images/deepfield6_1k.ppm -------------------------------------------------------------------------------- /inputExamples/component_numbers.txt: -------------------------------------------------------------------------------- 1 | 9230 194 575 2 | 3 | #First line should contain three numbers that will be used 4 | #to compute the final particle numbers used 5 | #First number: number of halo particles 6 | #Second number: number of bulge particles 7 | #Third number: number of disk particles 8 | #Note that the ratio's are used so you do not need the accurate numbers for example: 9 | 90 2 8 10 | Will create 90% dark-matter, 2% bulge and 8% disk particles 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /inputExamples/galactics_mw_df/cordbh.dat.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/inputExamples/galactics_mw_df/cordbh.dat.gz -------------------------------------------------------------------------------- /inputExamples/galactics_mw_df/dbh.dat.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/inputExamples/galactics_mw_df/dbh.dat.gz -------------------------------------------------------------------------------- /inputExamples/galactics_mw_df/denspsibulge.dat.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/inputExamples/galactics_mw_df/denspsibulge.dat.gz -------------------------------------------------------------------------------- /inputExamples/galactics_mw_df/denspsihalo.dat.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/inputExamples/galactics_mw_df/denspsihalo.dat.gz -------------------------------------------------------------------------------- /inputExamples/galactics_mw_df/freqdbh.dat.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/inputExamples/galactics_mw_df/freqdbh.dat.gz -------------------------------------------------------------------------------- /inputExamples/galactics_mw_df/mr.dat.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/inputExamples/galactics_mw_df/mr.dat.gz -------------------------------------------------------------------------------- /inputExamples/model3_child_compact.tipsy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/inputExamples/model3_child_compact.tipsy -------------------------------------------------------------------------------- /runtime/README.md: -------------------------------------------------------------------------------- 1 | CUDA Runtime API version of the Bonsai 2 | 3 | 4 | To quickly test the single GPU code: 5 | ``` 6 | cmake -DUSE_CUB=0 -DUSE_DUST=0 -DUSE_MPI=0 -DUSE_MPIMT=0 7 | # Optionally run ccmake . to modify the CMake settings 8 | make 9 | ulimit -s unlimited 10 | ./bonsai2_slowdust -i model3_child_compact.tipsy -T 10 11 | ``` 12 | 13 | It should give something like: 14 | iter=160 : time= 10 Etot= -417.1994462 Ekin= 430.589 Epot= -847.788 : de= -0.00416569 ( 0.00416569 ) d(de)= -0 ( 0.00031664 ) t_sim= 0.545423 sec 15 | 16 | 17 | To generate MilkyWay galaxy you need galactics.parallel fork of John Dubinsky galactics code. Once you have tarball do the following in this folder: 18 | 1) tar xzf galactics.parallel.tar.gz 19 | 2) cd galactic.parallel/src 20 | 3) make -f Makefile.[ifort/gnu] use ifort if you have Intel Fortran compiler (3x faster than gfortran), otherwise use Makefile.gnu 21 | 4) cp libgengalaxy.a ../../ 22 | 5) cd ../../ 23 | 6) 24 | cmake -DUSE_GALACTICS=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_CXX_COMPILER=mpicxx 25 | or 26 | cmake -DUSE_GALACTICS_IFORT=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_CXX_COMPILER=mpicxx 27 | 28 | 7) (not needed, but kept here for references) ccmake . 29 | change USE_GALACTICS from OFF to ON 30 | change USE_GALACTICS_IFORT from OFF to ON if you use Intel Fortran compiler 31 | type c, c, g 32 | 8) make -j 33 | 34 | To run galactic, you need the DF data, these are located in galactics_df_mw. Copy them to the exection folder and unzip 35 | 1) cd my_exec_folder 36 | 2) cp path_to_bonsai/runtime/galactics_mw_df/*gz . 37 | 3) gzip -vd *.gz 38 | 4) mpirun -np 4 path_to_bonsai/runtime/bonsai2_slowdust --milkyway 500000 -o 0.4 -T 10 -r 1 --eps 0.1 -t 0.001 2>&1 | tee log_MWa 39 | 40 | Units: 41 | basic: distance = 1 kpc, speed= 100 km/s 42 | derived: mass= distance*speed^2/G= 2.324876e9 Msun, time= sqrt(distance^3/G/mass) = 9.778145 Myr 43 | 44 | Enjoy the results! 45 | 46 | 47 | -------------------------------------------------------------------------------- /runtime/component_numbers.txt: -------------------------------------------------------------------------------- 1 | 90 2 8 2 | -------------------------------------------------------------------------------- /runtime/include/BonsaiSharedData.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "IDType.h" 5 | #ifdef BONSAI_CATALYST_STDLIB 6 | #include 7 | #define bonsaistd boost 8 | #define jb_to_string boost::lexical_cast 9 | #else 10 | #define jb_to_string std::to_string 11 | #endif 12 | 13 | struct BonsaiSharedHeader 14 | { 15 | float tCurrent; 16 | size_t nBodies; 17 | char fileName[256]; 18 | bool handshake; 19 | bool done_writing; 20 | }; 21 | 22 | struct BonsaiSharedData 23 | { 24 | IDType ID; 25 | float x,y,z,mass; 26 | float vx,vy,vz,vw; 27 | float rho,h; 28 | }; 29 | 30 | struct BonsaiSharedQuickHeader : public BonsaiSharedHeader 31 | { 32 | static std::string sharedFile(const int rank, const int pid) 33 | { 34 | const std::string fn = "/BonsaiQuickHeader-"+jb_to_string(pid)+"-"+jb_to_string(rank); 35 | return fn; 36 | } 37 | }; 38 | 39 | struct BonsaiSharedQuickData : public BonsaiSharedData 40 | { 41 | static std::string sharedFile(const int rank, const int pid) 42 | { 43 | const std::string fn = "/BonsaiQuickData-"+jb_to_string(pid)+"-"+jb_to_string(rank); 44 | return fn; 45 | } 46 | }; 47 | 48 | struct BonsaiSharedSnapHeader : public BonsaiSharedHeader 49 | { 50 | static std::string sharedFile(const int rank, const int pid) 51 | { 52 | const std::string fn = "/BonsaiSnapHeader-"+jb_to_string(pid)+"-"+jb_to_string(rank); 53 | return fn; 54 | } 55 | }; 56 | 57 | struct BonsaiSharedSnapData : public BonsaiSharedData 58 | { 59 | static std::string sharedFile(const int rank, const int pid) 60 | { 61 | const std::string fn = "/BonsaiSnapData-"+jb_to_string(pid)+"-"+jb_to_string(rank); 62 | return fn; 63 | } 64 | }; 65 | -------------------------------------------------------------------------------- /runtime/include/FileIO.h: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * Functions to read / write to various file formats 4 | * 5 | * 6 | * 7 | */ 8 | 9 | #pragma once 10 | 11 | 12 | 13 | #include "IDType.h" 14 | 15 | #ifdef USE_MPI 16 | #include "BonsaiIO.h" 17 | #endif 18 | 19 | /************* data exchange containers for async IO ***************/ 20 | 21 | struct IOSharedData_t 22 | { 23 | volatile bool writingFinished; 24 | volatile float t_current; 25 | volatile int nBodies; 26 | unsigned long long * volatile IDs; 27 | real4 * volatile Pos, * volatile Vel; 28 | IOSharedData_t() : writingFinished(true), nBodies(0), IDs(NULL), Pos(NULL), Vel(NULL) {} 29 | void malloc(const int n) volatile 30 | { 31 | assert(nBodies == 0); 32 | nBodies = n; 33 | IDs = (unsigned long long*volatile)::malloc(n*sizeof(unsigned long long)); 34 | Pos = (real4*volatile)::malloc(n*sizeof(real4)); 35 | Vel = (real4*volatile)::malloc(n*sizeof(real4)); 36 | } 37 | void free() volatile 38 | { 39 | assert(nBodies > 0); 40 | nBodies = 0; 41 | ::free(IDs); 42 | ::free(Pos); 43 | ::free(Vel); 44 | } 45 | ~IOSharedData_t() 46 | { 47 | if (nBodies > 0) 48 | free(); 49 | } 50 | }; 51 | 52 | 53 | extern volatile IOSharedData_t ioSharedData; 54 | 55 | 56 | template 57 | static void lHandShake(SharedMemoryBase &header) 58 | { 59 | header.acquireLock(); 60 | header[0].handshake = false; 61 | header.releaseLock(); 62 | 63 | while (!header[0].handshake) 64 | usleep(10000); 65 | 66 | header.acquireLock(); 67 | header[0].handshake = false; 68 | header.releaseLock(); 69 | } 70 | 71 | static IDType lGetIDType(const long long id) 72 | { 73 | IDType ID; 74 | ID.setID(id); 75 | ID.setType(3); /* Everything is Dust until told otherwise */ 76 | if(id >= DISKID && id < BULGEID) 77 | { 78 | ID.setType(2); /* Disk */ 79 | ID.setID(id - DISKID); 80 | } 81 | else if(id >= BULGEID && id < DARKMATTERID) 82 | { 83 | ID.setType(1); /* Bulge */ 84 | ID.setID(id - BULGEID); 85 | } 86 | else if (id >= DARKMATTERID) 87 | { 88 | ID.setType(0); /* DM */ 89 | ID.setID(id - DARKMATTERID); 90 | } 91 | return ID; 92 | }; 93 | -------------------------------------------------------------------------------- /runtime/include/GL/freeglut.h: -------------------------------------------------------------------------------- 1 | #ifndef __FREEGLUT_H__ 2 | #define __FREEGLUT_H__ 3 | 4 | /* 5 | * freeglut.h 6 | * 7 | * The freeglut library include file 8 | * 9 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 10 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 11 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 12 | * PAWEL W. OLSZTA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | */ 16 | 17 | #include "freeglut_std.h" 18 | #include "freeglut_ext.h" 19 | 20 | /*** END OF FILE ***/ 21 | 22 | #endif /* __FREEGLUT_H__ */ 23 | -------------------------------------------------------------------------------- /runtime/include/IDType.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | class IDType 4 | { 5 | private: 6 | uint64_t _IDTypePacked; 7 | public: 8 | IDType() : _IDTypePacked(0) {} 9 | #if 0 /* eg: unsafe methods. Marked for removal in future commits */ 10 | IDType(const uint64_t ID) : _IDTypePacked(ID) {} 11 | uint64_t getPacked() const { return _IDTypePacked; } 12 | #endif 13 | void operator=(const IDType &id) volatile 14 | { 15 | _IDTypePacked = id._IDTypePacked; 16 | } 17 | void operator=(const volatile IDType &id) volatile 18 | { 19 | _IDTypePacked = id._IDTypePacked; 20 | } 21 | uint64_t get() const volatile 22 | { 23 | return _IDTypePacked; 24 | } 25 | uint64_t getID() const volatile 26 | { 27 | return _IDTypePacked & ~0xFFFF000000000000ULL; 28 | } 29 | uint32_t getType() const volatile 30 | { 31 | return static_cast(_IDTypePacked >> 48); 32 | } 33 | void setID(const int64_t ID) volatile 34 | { 35 | const uint32_t type = getType(); 36 | _IDTypePacked = (ID & ~0xFFFF000000000000ULL) | (static_cast(type) << 48); 37 | } 38 | void setType(const int type) volatile 39 | { 40 | const uint64_t ID = getID(); 41 | _IDTypePacked = ID | (static_cast(type) << 48); 42 | } 43 | }; 44 | -------------------------------------------------------------------------------- /runtime/include/b40c/radix_sort/downsweep/kernel.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | ******************************************************************************/ 21 | 22 | /****************************************************************************** 23 | * Radix sort downsweep scan kernel (scatter into bins) 24 | ******************************************************************************/ 25 | 26 | #pragma once 27 | 28 | #include 29 | #include 30 | 31 | #include 32 | 33 | namespace b40c { 34 | namespace radix_sort { 35 | namespace downsweep { 36 | 37 | 38 | /** 39 | * Radix sort downsweep scan kernel entry point 40 | */ 41 | template < 42 | typename KernelPolicy, 43 | typename SizeT, 44 | typename KeyType, 45 | typename ValueType> 46 | __launch_bounds__ (KernelPolicy::THREADS, KernelPolicy::MIN_CTA_OCCUPANCY) 47 | __global__ 48 | void Kernel( 49 | SizeT *d_spine, 50 | KeyType *d_keys0, 51 | KeyType *d_keys1, 52 | ValueType *d_values0, 53 | ValueType *d_values1, 54 | util::CtaWorkDistribution work_decomposition) 55 | { 56 | // CTA abstraction type 57 | typedef Cta Cta; 58 | 59 | // Shared memory pool 60 | __shared__ typename Cta::SmemStorage smem_storage; 61 | 62 | if (threadIdx.x == 0) { 63 | 64 | // Determine our threadblock's work range 65 | work_decomposition.GetCtaWorkLimits( 66 | smem_storage.work_limits, 67 | KernelPolicy::LOG_TILE_ELEMENTS); 68 | 69 | smem_storage.tex_offset = 70 | smem_storage.work_limits.offset / Cta::ELEMENTS_PER_TEX; 71 | 72 | smem_storage.tex_offset_limit = 73 | smem_storage.work_limits.guarded_offset / Cta::ELEMENTS_PER_TEX; 74 | } 75 | 76 | // Sync to acquire work limits 77 | __syncthreads(); 78 | 79 | Cta cta( 80 | smem_storage, 81 | d_keys0, 82 | d_keys1, 83 | d_values0, 84 | d_values1, 85 | d_spine); 86 | 87 | cta.ProcessWorkRange(smem_storage.work_limits); 88 | } 89 | 90 | 91 | 92 | } // namespace downsweep 93 | } // namespace radix_sort 94 | } // namespace b40c 95 | 96 | -------------------------------------------------------------------------------- /runtime/include/b40c/radix_sort/downsweep/kernel_policy.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | ******************************************************************************/ 21 | 22 | /****************************************************************************** 23 | * Configuration policy for radix sort downsweep scan kernel 24 | ******************************************************************************/ 25 | 26 | #pragma once 27 | 28 | #include 29 | #include 30 | 31 | namespace b40c { 32 | namespace radix_sort { 33 | namespace downsweep { 34 | 35 | /** 36 | * Types of scattering strategies 37 | */ 38 | enum ScatterStrategy { 39 | SCATTER_DIRECT = 0, 40 | SCATTER_TWO_PHASE, 41 | SCATTER_WARP_TWO_PHASE, 42 | }; 43 | 44 | 45 | /** 46 | * Downsweep tuning policy. 47 | */ 48 | template < 49 | int _RADIX_BITS, 50 | int _CURRENT_BIT, 51 | int _CURRENT_PASS, 52 | int _MIN_CTA_OCCUPANCY, 53 | int _LOG_THREADS, 54 | int _LOG_THREAD_ELEMENTS, 55 | util::io::ld::CacheModifier _READ_MODIFIER, 56 | util::io::st::CacheModifier _WRITE_MODIFIER, 57 | ScatterStrategy _SCATTER_STRATEGY, 58 | bool _SMEM_8BYTE_BANKS, 59 | bool _EARLY_EXIT> 60 | struct KernelPolicy 61 | { 62 | enum { 63 | RADIX_BITS = _RADIX_BITS, 64 | CURRENT_BIT = _CURRENT_BIT, 65 | CURRENT_PASS = _CURRENT_PASS, 66 | MIN_CTA_OCCUPANCY = _MIN_CTA_OCCUPANCY, 67 | LOG_THREADS = _LOG_THREADS, 68 | LOG_THREAD_ELEMENTS = _LOG_THREAD_ELEMENTS, 69 | SMEM_8BYTE_BANKS = _SMEM_8BYTE_BANKS, 70 | EARLY_EXIT = _EARLY_EXIT, 71 | 72 | THREADS = 1 << LOG_THREADS, 73 | LOG_TILE_ELEMENTS = LOG_THREADS + LOG_THREAD_ELEMENTS, 74 | }; 75 | 76 | static const util::io::ld::CacheModifier READ_MODIFIER = _READ_MODIFIER; 77 | static const util::io::st::CacheModifier WRITE_MODIFIER = _WRITE_MODIFIER; 78 | static const ScatterStrategy SCATTER_STRATEGY = _SCATTER_STRATEGY; 79 | 80 | }; 81 | 82 | 83 | 84 | } // namespace downsweep 85 | } // namespace partition 86 | } // namespace b40c 87 | 88 | -------------------------------------------------------------------------------- /runtime/include/b40c/radix_sort/policy.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | ******************************************************************************/ 21 | 22 | /****************************************************************************** 23 | * Radix sort policy 24 | ******************************************************************************/ 25 | 26 | #pragma once 27 | 28 | namespace b40c { 29 | namespace radix_sort { 30 | 31 | 32 | /****************************************************************************** 33 | * Dispatch policy 34 | ******************************************************************************/ 35 | 36 | /** 37 | * Dispatch policy 38 | */ 39 | template < 40 | int _TUNE_ARCH, 41 | int _RADIX_BITS, 42 | bool _UNIFORM_SMEM_ALLOCATION, 43 | bool _UNIFORM_GRID_SIZE> 44 | struct DispatchPolicy 45 | { 46 | enum { 47 | TUNE_ARCH = _TUNE_ARCH, 48 | RADIX_BITS = _RADIX_BITS, 49 | UNIFORM_SMEM_ALLOCATION = _UNIFORM_SMEM_ALLOCATION, 50 | UNIFORM_GRID_SIZE = _UNIFORM_GRID_SIZE, 51 | }; 52 | }; 53 | 54 | 55 | /****************************************************************************** 56 | * Pass policy 57 | ******************************************************************************/ 58 | 59 | /** 60 | * Pass policy 61 | */ 62 | template < 63 | typename _UpsweepPolicy, 64 | typename _SpinePolicy, 65 | typename _DownsweepPolicy, 66 | typename _DispatchPolicy> 67 | struct PassPolicy 68 | { 69 | typedef _UpsweepPolicy UpsweepPolicy; 70 | typedef _SpinePolicy SpinePolicy; 71 | typedef _DownsweepPolicy DownsweepPolicy; 72 | typedef _DispatchPolicy DispatchPolicy; 73 | }; 74 | 75 | 76 | }// namespace radix_sort 77 | }// namespace b40c 78 | 79 | -------------------------------------------------------------------------------- /runtime/include/b40c/radix_sort/spine/kernel.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | ******************************************************************************/ 21 | 22 | /****************************************************************************** 23 | * Radix sort spine scan kernel 24 | ******************************************************************************/ 25 | 26 | #pragma once 27 | 28 | #include 29 | 30 | namespace b40c { 31 | namespace radix_sort { 32 | namespace spine { 33 | 34 | 35 | /** 36 | * Consecutive removal spine scan kernel entry point 37 | */ 38 | template < 39 | typename KernelPolicy, 40 | typename T, 41 | typename SizeT> 42 | __launch_bounds__ (KernelPolicy::THREADS, 1) 43 | __global__ 44 | void Kernel( 45 | T *d_in, 46 | T *d_out, 47 | SizeT spine_elements) 48 | { 49 | // CTA abstraction type 50 | typedef Cta Cta; 51 | 52 | // Shared memory pool 53 | __shared__ typename Cta::SmemStorage smem_storage; 54 | 55 | // Only CTA-0 needs to run 56 | if (blockIdx.x > 0) return; 57 | 58 | Cta cta(smem_storage, d_in, d_out); 59 | cta.ProcessWorkRange(spine_elements); 60 | } 61 | 62 | } // namespace spine 63 | } // namespace radix_sort 64 | } // namespace b40c 65 | 66 | -------------------------------------------------------------------------------- /runtime/include/b40c/radix_sort/spine/kernel_policy.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | ******************************************************************************/ 21 | 22 | /****************************************************************************** 23 | * Configuration policy for radix sort spine scan kernel 24 | ******************************************************************************/ 25 | 26 | #pragma once 27 | 28 | #include 29 | #include 30 | 31 | namespace b40c { 32 | namespace radix_sort { 33 | namespace spine { 34 | 35 | 36 | /** 37 | * Spine tuning policy. 38 | */ 39 | template < 40 | int _LOG_THREADS, 41 | int _LOG_LOAD_VEC_SIZE, 42 | int _LOG_LOADS_PER_TILE, 43 | util::io::ld::CacheModifier _READ_MODIFIER, 44 | util::io::st::CacheModifier _WRITE_MODIFIER> 45 | struct KernelPolicy 46 | { 47 | enum { 48 | LOG_THREADS = _LOG_THREADS, 49 | LOG_LOAD_VEC_SIZE = _LOG_LOAD_VEC_SIZE, 50 | LOG_LOADS_PER_TILE = _LOG_LOADS_PER_TILE, 51 | 52 | THREADS = 1 << LOG_THREADS, 53 | LOG_TILE_ELEMENTS = LOG_THREADS + LOG_LOAD_VEC_SIZE + LOG_LOADS_PER_TILE, 54 | }; 55 | 56 | static const util::io::ld::CacheModifier READ_MODIFIER = _READ_MODIFIER; 57 | static const util::io::st::CacheModifier WRITE_MODIFIER = _WRITE_MODIFIER; 58 | }; 59 | 60 | 61 | } // namespace spine 62 | } // namespace radix_sort 63 | } // namespace b40c 64 | 65 | -------------------------------------------------------------------------------- /runtime/include/b40c/radix_sort/spine/tex_ref.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | ******************************************************************************/ 21 | 22 | /****************************************************************************** 23 | * Texture references for spine kernels 24 | ******************************************************************************/ 25 | 26 | #pragma once 27 | 28 | #include 29 | 30 | namespace b40c { 31 | namespace radix_sort { 32 | namespace spine { 33 | 34 | /** 35 | * Templated texture reference for spine 36 | */ 37 | template 38 | struct TexSpine 39 | { 40 | typedef texture TexRef; 41 | 42 | static TexRef ref; 43 | 44 | /** 45 | * Bind textures 46 | */ 47 | static cudaError_t BindTexture(void *d_spine, size_t bytes) 48 | { 49 | cudaError_t retval = cudaSuccess; 50 | do { 51 | cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc(); 52 | 53 | // Bind key texture ref0 54 | if (retval = util::B40CPerror(cudaBindTexture( 55 | 0, 56 | ref, 57 | d_spine, 58 | tex_desc, 59 | bytes), 60 | "cudaBindTexture TexSpine failed", __FILE__, __LINE__)) break; 61 | 62 | } while (0); 63 | 64 | return retval; 65 | } 66 | 67 | }; 68 | 69 | // Reference definition 70 | template 71 | typename TexSpine::TexRef TexSpine::ref; 72 | 73 | 74 | 75 | 76 | 77 | 78 | } // namespace spine 79 | } // namespace radix_sort 80 | } // namespace b40c 81 | 82 | -------------------------------------------------------------------------------- /runtime/include/b40c/radix_sort/upsweep/kernel.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | ******************************************************************************/ 21 | 22 | /****************************************************************************** 23 | * Radix sort upsweep reduction kernel 24 | ******************************************************************************/ 25 | 26 | #pragma once 27 | 28 | #include 29 | #include 30 | 31 | #include 32 | 33 | namespace b40c { 34 | namespace radix_sort { 35 | namespace upsweep { 36 | 37 | 38 | /** 39 | * Radix sort upsweep reduction kernel entry point 40 | */ 41 | template < 42 | typename KernelPolicy, 43 | typename SizeT, 44 | typename KeyType> 45 | __launch_bounds__ (KernelPolicy::THREADS, KernelPolicy::MIN_CTA_OCCUPANCY) 46 | __global__ 47 | void Kernel( 48 | SizeT *d_spine, 49 | KeyType *d_in_keys, 50 | KeyType *d_out_keys, 51 | util::CtaWorkDistribution work_decomposition) 52 | { 53 | 54 | // CTA abstraction type 55 | typedef Cta Cta; 56 | 57 | // Shared memory pool 58 | __shared__ typename Cta::SmemStorage smem_storage; 59 | 60 | // Determine where to read our input 61 | KeyType *d_keys = (KernelPolicy::CURRENT_PASS & 0x1) ? 62 | d_out_keys : 63 | d_in_keys; 64 | 65 | // Determine our threadblock's work range 66 | util::CtaWorkLimits work_limits; 67 | work_decomposition.GetCtaWorkLimits( 68 | work_limits, 69 | KernelPolicy::LOG_TILE_ELEMENTS); 70 | 71 | Cta cta(smem_storage, d_keys, d_spine); 72 | cta.ProcessWorkRange(work_limits); 73 | } 74 | 75 | 76 | } // namespace upsweep 77 | } // namespace radix_sort 78 | } // namespace b40c 79 | 80 | -------------------------------------------------------------------------------- /runtime/include/b40c/radix_sort/upsweep/kernel_policy.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | ******************************************************************************/ 21 | 22 | /****************************************************************************** 23 | * Configuration policy for radix sort upsweep reduction kernel 24 | ******************************************************************************/ 25 | 26 | #pragma once 27 | 28 | #include 29 | #include 30 | 31 | namespace b40c { 32 | namespace radix_sort { 33 | namespace upsweep { 34 | 35 | /** 36 | * Radix sort upsweep reduction tuning policy. 37 | */ 38 | template < 39 | int _RADIX_BITS, 40 | int _CURRENT_BIT, 41 | int _CURRENT_PASS, 42 | int _MIN_CTA_OCCUPANCY, 43 | int _LOG_THREADS, 44 | int _LOG_LOAD_VEC_SIZE, 45 | int _LOG_LOADS_PER_TILE, 46 | util::io::ld::CacheModifier _READ_MODIFIER, 47 | util::io::st::CacheModifier _WRITE_MODIFIER, 48 | bool _EARLY_EXIT> 49 | struct KernelPolicy 50 | { 51 | enum { 52 | RADIX_BITS = _RADIX_BITS, 53 | CURRENT_BIT = _CURRENT_BIT, 54 | CURRENT_PASS = _CURRENT_PASS, 55 | MIN_CTA_OCCUPANCY = _MIN_CTA_OCCUPANCY, 56 | LOG_THREADS = _LOG_THREADS, 57 | LOG_LOAD_VEC_SIZE = _LOG_LOAD_VEC_SIZE, 58 | LOG_LOADS_PER_TILE = _LOG_LOADS_PER_TILE, 59 | EARLY_EXIT = _EARLY_EXIT, 60 | 61 | THREADS = 1 << LOG_THREADS, 62 | LOG_TILE_ELEMENTS = LOG_THREADS + LOG_LOAD_VEC_SIZE + LOG_LOADS_PER_TILE, 63 | }; 64 | 65 | static const util::io::ld::CacheModifier READ_MODIFIER = _READ_MODIFIER; 66 | static const util::io::st::CacheModifier WRITE_MODIFIER = _WRITE_MODIFIER; 67 | }; 68 | 69 | 70 | 71 | } // namespace upsweep 72 | } // namespace radix_sort 73 | } // namespace b40c 74 | 75 | -------------------------------------------------------------------------------- /runtime/include/b40c/util/memset_kernel.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | * Thanks! 21 | * 22 | ******************************************************************************/ 23 | 24 | /****************************************************************************** 25 | * Simple Memset Kernel 26 | ******************************************************************************/ 27 | 28 | #pragma once 29 | 30 | namespace b40c { 31 | namespace util { 32 | 33 | /** 34 | * Memset a device vector. 35 | */ 36 | template 37 | __global__ void MemsetKernel(T *d_out, T value, int length) 38 | { 39 | const int STRIDE = gridDim.x * blockDim.x; 40 | for (int idx = (blockIdx.x * blockDim.x) + threadIdx.x; idx < length; idx += STRIDE) { 41 | d_out[idx] = value; 42 | } 43 | } 44 | 45 | 46 | } // namespace util 47 | } // namespace b40c 48 | 49 | -------------------------------------------------------------------------------- /runtime/include/b40c/util/operators.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | * Thanks! 21 | * 22 | ******************************************************************************/ 23 | 24 | /****************************************************************************** 25 | * Simple reduction operators 26 | ******************************************************************************/ 27 | 28 | #pragma once 29 | 30 | namespace b40c { 31 | namespace util { 32 | 33 | /** 34 | * Static operator wrapping structure. 35 | * 36 | * (N.B. due to an NVCC/cudafe 4.0 regression, we can't specify static templated 37 | * functions inside other types...) 38 | */ 39 | template 40 | struct Operators 41 | { 42 | /** 43 | * Empty default transform function 44 | */ 45 | static __device__ __forceinline__ void NopTransform(T &val) {} 46 | 47 | }; 48 | 49 | 50 | /** 51 | * Default equality functor 52 | */ 53 | template 54 | struct Equality 55 | { 56 | __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) 57 | { 58 | return a == b; 59 | } 60 | }; 61 | 62 | 63 | /** 64 | * Default sum functor 65 | */ 66 | template 67 | struct Sum 68 | { 69 | // Binary reduction 70 | __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) 71 | { 72 | return a + b; 73 | } 74 | 75 | // Identity 76 | __host__ __device__ __forceinline__ T operator()() 77 | { 78 | return (T) 0; 79 | } 80 | }; 81 | 82 | 83 | 84 | 85 | } // namespace util 86 | } // namespace b40c 87 | 88 | -------------------------------------------------------------------------------- /runtime/include/b40c/util/ping_pong_storage.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | * Thanks! 21 | * 22 | ******************************************************************************/ 23 | 24 | 25 | /****************************************************************************** 26 | * Storage wrapper for double-buffered vectors (deprecated). 27 | ******************************************************************************/ 28 | 29 | #pragma once 30 | 31 | #include 32 | 33 | namespace b40c { 34 | namespace util { 35 | 36 | /** 37 | * Ping-pong buffer (a.k.a. page-flip, double-buffer, etc.). 38 | * Deprecated: see b40c::util::DoubleBuffer instead. 39 | */ 40 | template < 41 | typename KeyType, 42 | typename ValueType = util::NullType> 43 | struct PingPongStorage : DoubleBuffer 44 | { 45 | typedef DoubleBuffer ParentType; 46 | 47 | // Constructor 48 | PingPongStorage() : ParentType() {} 49 | 50 | // Constructor 51 | PingPongStorage( 52 | KeyType* keys) : ParentType(keys) {} 53 | 54 | // Constructor 55 | PingPongStorage( 56 | KeyType* keys, 57 | ValueType* values) : ParentType(keys, values) {} 58 | 59 | // Constructor 60 | PingPongStorage( 61 | KeyType* keys0, 62 | KeyType* keys1, 63 | ValueType* values0, 64 | ValueType* values1) : ParentType(keys0, keys1, values0, values1) {} 65 | }; 66 | 67 | 68 | } // namespace util 69 | } // namespace b40c 70 | 71 | -------------------------------------------------------------------------------- /runtime/include/b40c/util/random_bits.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2010-2012 Duane Merrill 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * For more information, see our Google Code project site: 17 | * http://code.google.com/p/back40computing/ 18 | * 19 | * Thanks! 20 | ******************************************************************************/ 21 | 22 | /****************************************************************************** 23 | * Random bits generator 24 | ******************************************************************************/ 25 | 26 | #pragma once 27 | 28 | #include 29 | 30 | namespace b40c { 31 | namespace util { 32 | 33 | /** 34 | * Generates random 32-bit keys. 35 | * 36 | * We always take the second-order byte from rand() because the higher-order 37 | * bits returned by rand() are commonly considered more uniformly distributed 38 | * than the lower-order bits. 39 | * 40 | * We can decrease the entropy level of keys by adopting the technique 41 | * of Thearling and Smith in which keys are computed from the bitwise AND of 42 | * multiple random samples: 43 | * 44 | * entropy_reduction | Effectively-unique bits per key 45 | * ----------------------------------------------------- 46 | * -1 | 0 47 | * 0 | 32 48 | * 1 | 25.95 49 | * 2 | 17.41 50 | * 3 | 10.78 51 | * 4 | 6.42 52 | * ... | ... 53 | * 54 | */ 55 | template 56 | void RandomBits(K &key, int entropy_reduction = 0, int lower_key_bits = sizeof(K) * 8) 57 | { 58 | const unsigned int NUM_UCHARS = (sizeof(K) + sizeof(unsigned char) - 1) / sizeof(unsigned char); 59 | unsigned char key_bits[NUM_UCHARS]; 60 | 61 | do { 62 | 63 | for (int j = 0; j < NUM_UCHARS; j++) { 64 | unsigned char quarterword = 0xff; 65 | for (int i = 0; i <= entropy_reduction; i++) { 66 | quarterword &= (rand() >> 7); 67 | } 68 | key_bits[j] = quarterword; 69 | } 70 | 71 | if (lower_key_bits < sizeof(K) * 8) { 72 | unsigned long long base = 0; 73 | memcpy(&base, key_bits, sizeof(K)); 74 | base &= (1ull << lower_key_bits) - 1; 75 | memcpy(key_bits, &base, sizeof(K)); 76 | } 77 | 78 | memcpy(&key, key_bits, sizeof(K)); 79 | 80 | } while (key != key); // avoids NaNs when generating random floating point numbers 81 | } 82 | 83 | } // namespace util 84 | } // namespace b40c 85 | -------------------------------------------------------------------------------- /runtime/include/b40c/util/tex_vector.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright 2010-2012 Duane Merrill 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | * For more information, see our Google Code project site: 18 | * http://code.google.com/p/back40computing/ 19 | * 20 | * Thanks! 21 | * 22 | ******************************************************************************/ 23 | 24 | /****************************************************************************** 25 | * Kernel utilities loading for CTA-sized tiles of data from memory 26 | ******************************************************************************/ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | namespace b40c { 35 | namespace util { 36 | 37 | /** 38 | * Texture vector types for reading ELEMENTS consecutive elements of T per thread 39 | */ 40 | template 41 | struct TexVector 42 | { 43 | enum { 44 | TEX_VEC_SIZE = (NumericTraits::BUILT_IN) ? 45 | 4 : // cast as vec-4 for non-built-ins (don't actually use!) 46 | (sizeof(T) > 4) ? 47 | (ELEMENTS % 2 == 1) ? // 64-bit built-in types 48 | 2 : // cast as vec-2 ints (odd) 49 | 4 : // cast as vec-4 ints (multiple of two) 50 | (ELEMENTS % 2 == 1) ? // 32-bit built-in types 51 | 1 : // vec-1 (odd) 52 | (ELEMENTS % 4 == 0) ? 53 | 4 : // vec-4 (multiple of 4) 54 | 2, // vec-2 (multiple of 2) 55 | }; 56 | 57 | // Texture base type 58 | typedef typename If<(NumericTraits::BUILT_IN), 59 | char, // use char for non-built-ins (don't actually use!) 60 | typename If<(sizeof(T) > 4), 61 | int, // use int for 64-bit built-in types 62 | T>::Type>::Type TexBase; // use T for other built-in types 63 | 64 | // Texture vector type 65 | typedef typename util::VecType::Type VecType; 66 | 67 | // Number of T loaded per texture load 68 | enum { 69 | ELEMENTS_PER_TEX = sizeof(VecType) / sizeof(T), 70 | }; 71 | 72 | // Texture reference type 73 | typedef texture TexRef; 74 | }; 75 | 76 | 77 | } // namespace util 78 | } // namespace b40c 79 | 80 | -------------------------------------------------------------------------------- /runtime/include/bonsai.h: -------------------------------------------------------------------------------- 1 | // BONSAI.H 2 | // 3 | // Every CUDA file includes this. It contains the wrapper for __global__ 4 | // functions to allow easy redefinition of names and types. 5 | // 6 | // The wrapper defaults evaluates to a standard definition. 7 | // 8 | #ifndef BONSAI_H 9 | #define BONSAI_H 10 | 11 | // Macro to map between separate compilation and non-separate compilation names 12 | #ifndef KERNEL_DECLARE // Avoids redefinition errors from multiple includes 13 | #ifndef KERNEL_SEPARATE // Separate declaration option 14 | #define KERNEL_NAME(funcname) funcname 15 | #define KERNEL_DECLARE(funcname) extern "C" __global__ void KERNEL_NAME(funcname) 16 | #else 17 | #define KERNEL_NAME(funcname) gpu_ ## funcname 18 | #define KERNEL_DECLARE(funcname) __global__ void KERNEL_NAME(funcname) 19 | #endif // KERNEL_SEPARATE 20 | #endif // KERNEL_DECLARE 21 | 22 | 23 | // These are the call-out routines to do kernel launches separately from when 24 | // embedded inside classes. It allows for alternate launch paths. 25 | #include "my_cuda_rt.h" 26 | class octree; 27 | 28 | void build_tree_node_levels(octree &tree, 29 | my_dev::dev_mem &validList, 30 | my_dev::dev_mem &compactList, 31 | my_dev::dev_mem &levelOffset, 32 | my_dev::dev_mem &maxLevel, 33 | cudaStream_t stream); 34 | 35 | 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /runtime/include/build.h: -------------------------------------------------------------------------------- 1 | #ifndef __BUILD_H__ 2 | #define __BUILD_H__ 3 | 4 | void build_tree_node_levels(octree &tree, 5 | my_dev::dev_mem &validList, 6 | my_dev::dev_mem &compactList, 7 | my_dev::dev_mem &levelOffset, 8 | my_dev::dev_mem &maxLevel, 9 | cudaStream_t stream); 10 | 11 | #endif -------------------------------------------------------------------------------- /runtime/include/depthSort.h: -------------------------------------------------------------------------------- 1 | #include "vector_math.h" 2 | 3 | extern "C" void initCUDA(); 4 | extern "C" void depthSortCUDA(float4 *pos, float *depth, int *indices, float4 modelViewZ, int numParticles); 5 | extern "C" void assignColors(float4 *colors, ulonglong1 *ids, int numParticles, 6 | float2 *density, float maxDensity, 7 | float4 color2, float4 color3, float4 color4, 8 | float4 starColor, float4 bulgeColor, float4 darkMatterColor, float4 dustColor, 9 | int m_brightFreq, float4 t_current); 10 | -------------------------------------------------------------------------------- /runtime/include/log.h: -------------------------------------------------------------------------------- 1 | 2 | #define ENABLE_LOG 1 3 | 4 | #if ENABLE_LOG 5 | extern bool ENABLE_RUNTIME_LOG; 6 | extern bool PREPEND_RANK; 7 | #endif 8 | 9 | #if ENABLE_LOG 10 | #ifdef WIN32 11 | #define LOG(fmt, ...) {if (ENABLE_RUNTIME_LOG) printf(fmt, __VA_ARGS__);} 12 | #else 13 | 14 | 15 | 16 | #ifdef USE_MPI 17 | extern void prependrankLOG(const char *fmt, ...); 18 | #define LOG(...) {if (ENABLE_RUNTIME_LOG) if(PREPEND_RANK) prependrankLOG(__VA_ARGS__); else printf(__VA_ARGS__);} 19 | #else 20 | #define LOG(...) {if (ENABLE_RUNTIME_LOG) printf(__VA_ARGS__);} 21 | #endif 22 | 23 | // 24 | // 25 | #endif 26 | 27 | 28 | //#define LOGF(file, fmt, ...) {if (ENABLE_RUNTIME_LOG) fprintf(file, fmt, __VA_ARGS__);} 29 | 30 | 31 | #ifdef USE_MPI 32 | extern void prependrankLOGF(const char *fmt, ...); 33 | #define LOGF(file, ...) {if (ENABLE_RUNTIME_LOG) if(PREPEND_RANK) prependrankLOGF(__VA_ARGS__); else fprintf(file, __VA_ARGS__);} 34 | #else 35 | //##__VA_ARGS__ is for GCC only, if using something else use __VA_ARGS__ and try to fix the locations where VA_ARGS is empty 36 | #define LOGF(file, fmt, ...) {if (ENABLE_RUNTIME_LOG) fprintf(file, fmt,##__VA_ARGS__);} 37 | #endif 38 | 39 | 40 | #else 41 | #define LOG(fmt, ...) ((void)0) 42 | #define LOGF(file, fmt, ...) ((void)0) 43 | #endif 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | #if 0 52 | #define ENABLE_LOG 1 53 | 54 | #if ENABLE_LOG 55 | extern bool ENABLE_RUNTIME_LOG; 56 | #endif 57 | 58 | #if ENABLE_LOG 59 | #ifdef WIN32 60 | #define LOG(fmt, ...) {if (ENABLE_RUNTIME_LOG) printf(fmt, __VA_ARGS__);} 61 | #else 62 | #define LOG(...) {if (ENABLE_RUNTIME_LOG) printf(__VA_ARGS__);} 63 | #endif 64 | #define LOGF(file, fmt, ...) {if (ENABLE_RUNTIME_LOG) fprintf(file, fmt, __VA_ARGS__);} 65 | #else 66 | #define LOG(fmt, ...) ((void)0) 67 | #define LOGF(file, fmt, ...) ((void)0) 68 | #endif 69 | #endif 70 | -------------------------------------------------------------------------------- /runtime/include/logFileWriter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #ifdef USE_MPI 9 | #include 10 | #else 11 | // #define MPI_Comm int 12 | typedef int MPI_Comm; 13 | #endif 14 | 15 | struct LOGFILEWRITER 16 | { 17 | private: 18 | 19 | int procId, nProc, fileId; 20 | const int fullNProc; 21 | 22 | #ifdef USE_MPI 23 | const MPI_Comm mpi_comm; 24 | #else 25 | const int mpi_comm; 26 | #endif 27 | 28 | 29 | ofstream logFile; 30 | 31 | 32 | private: 33 | 34 | void GatherLogData(const std::string &logData, std::string &fullLog) 35 | { 36 | #ifdef USE_MPI 37 | //Gather sizes 38 | std::vector logSizes(nProc); 39 | int logSize = logData.size(); 40 | MPI_Gather(&logSize, 1, MPI_INT, &logSizes[0], 1, MPI_INT, 0, mpi_comm); 41 | 42 | //Compute displacements 43 | std::vector logDispl(nProc+1,0); 44 | for (int i = 0; i < nProc; i++) 45 | logDispl[i+1] = logDispl[i] + logSizes[i]; 46 | 47 | //Receive the data 48 | const int logRecv = logDispl[nProc]; 49 | fullLog.resize(logRecv); 50 | MPI_Gatherv( 51 | (void*)&logData[0], logSize, MPI_BYTE, 52 | (void*)&fullLog[0], &logSizes[0], &logDispl[0], MPI_BYTE, 0, mpi_comm); 53 | #else 54 | fullLog = logData; 55 | #endif 56 | } 57 | 58 | 59 | public: 60 | 61 | /* sample_keys must be sorted by Key in an increaing order, otherwise 62 | * assignKeyToProc will fail */ 63 | LOGFILEWRITER(const int _fullNProc, 64 | const MPI_Comm &_mpi_comm, 65 | const MPI_Comm &_mpi_comm2) : 66 | fullNProc(_fullNProc),mpi_comm(_mpi_comm) 67 | { 68 | //Get the local rank and number of ranks 69 | #ifdef USE_MPI 70 | MPI_Comm_rank (_mpi_comm, &procId); 71 | MPI_Comm_size (_mpi_comm, &nProc); 72 | MPI_Comm_rank (_mpi_comm2, &fileId); 73 | #else 74 | procId = 0; 75 | nProc = 1; 76 | fileId = 0; 77 | #endif 78 | 79 | char fileName[64]; 80 | sprintf(fileName, "gpuLog.log-%d-%d", fullNProc, fileId); 81 | 82 | logFile.open(fileName); 83 | } 84 | 85 | ~LOGFILEWRITER() 86 | { 87 | logFile.close(); 88 | } 89 | 90 | void updateLogData(const std::string &logData) 91 | { 92 | //Write the data 93 | std::string fullLog; 94 | GatherLogData(logData, fullLog); 95 | if(procId == 0) { 96 | logFile << fullLog; 97 | } 98 | } 99 | 100 | 101 | }; 102 | -------------------------------------------------------------------------------- /runtime/include/render_particles.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #ifndef __RENDER_PARTICLES__ 13 | #define __RENDER_PARTICLES__ 14 | 15 | void checkGLErrors(const char *s); 16 | 17 | class ParticleRenderer 18 | { 19 | public: 20 | ParticleRenderer(); 21 | ~ParticleRenderer(); 22 | 23 | void setPositions(float *pos, int numParticles); 24 | void setPositions(double *pos, int numParticles); 25 | void setBaseColor(float color[4]); 26 | void setColors(float *color, int numParticles); 27 | void setPBO(unsigned int pbo, int numParticles, bool fp64); 28 | 29 | enum DisplayMode 30 | { 31 | PARTICLE_POINTS, 32 | PARTICLE_SPRITES, 33 | PARTICLE_SPRITES_COLOR, 34 | PARTICLE_NUM_MODES 35 | }; 36 | 37 | void display(DisplayMode mode = PARTICLE_POINTS); 38 | 39 | void setPointSize(float size) { m_pointSize = size; } 40 | void setSpriteSize(float size) { m_spriteSize = size; } 41 | 42 | void resetPBO(); 43 | 44 | protected: // methods 45 | void _initGL(); 46 | void _createTexture(int resolution); 47 | void _drawPoints(bool color = false); 48 | 49 | 50 | protected: // data 51 | float *m_pos; 52 | double *m_pos_fp64; 53 | int m_numParticles; 54 | 55 | float m_pointSize; 56 | float m_spriteSize; 57 | 58 | unsigned int m_vertexShader; 59 | unsigned int m_vertexShaderPoints; 60 | unsigned int m_pixelShader; 61 | unsigned int m_programPoints; 62 | unsigned int m_programSprites; 63 | unsigned int m_texture; 64 | unsigned int m_pbo; 65 | unsigned int m_vboColor; 66 | 67 | float m_baseColor[4]; 68 | 69 | bool m_bFp64Positions; 70 | }; 71 | 72 | #endif //__ RENDER_PARTICLES__ 73 | -------------------------------------------------------------------------------- /runtime/include/renderloop.h: -------------------------------------------------------------------------------- 1 | #ifndef _RENDERLOOP_H_ 2 | #define _RENDERLOOP_H_ 3 | 4 | #include "octree.h" 5 | extern float TstartGlow; 6 | extern float dTstartGlow; 7 | 8 | void initGL(int argc, char** argv, const char *fullScreenMode, bool &stereo); 9 | void initAppRenderer(int argc, char** argv, octree *tree, 10 | octree::IterationData &idata, 11 | bool showFPS, bool stereo); 12 | 13 | #endif // _RENDERLOOP_H_ 14 | -------------------------------------------------------------------------------- /runtime/include/tipsydefs.h: -------------------------------------------------------------------------------- 1 | #ifndef TIPSYDEFS_H 2 | #define TIPSYDEFS_H 3 | 4 | #define MAXDIM 3 5 | #define forever for(;;) 6 | 7 | typedef float Real; 8 | 9 | struct gas_particle { 10 | Real mass; 11 | Real pos[MAXDIM]; 12 | Real vel[MAXDIM]; 13 | Real rho; 14 | Real temp; 15 | Real hsmooth; 16 | Real metals ; 17 | Real phi ; 18 | } ; 19 | 20 | //struct gas_particle *gas_particles; 21 | 22 | struct dark_particle { 23 | Real mass; 24 | Real pos[MAXDIM]; 25 | Real vel[MAXDIM]; 26 | Real eps; 27 | int phi ; 28 | public: 29 | int getID() const {return phi;} 30 | void setID(int ID) { phi = ID; } 31 | } ; 32 | 33 | struct star_particle { 34 | Real mass; 35 | Real pos[MAXDIM]; 36 | Real vel[MAXDIM]; 37 | Real metals ; 38 | Real tform ; 39 | Real eps; 40 | int phi ; 41 | public: 42 | int getID() const {return phi;} 43 | void setID(int ID) { phi = ID; } 44 | } ; 45 | 46 | 47 | //V2 structures use 64 bit integers for particle storage 48 | //otherwise they take up the same space for compatibility 49 | 50 | struct dark_particleV2 { 51 | Real mass; 52 | Real pos[MAXDIM]; 53 | Real vel[MAXDIM]; 54 | private: 55 | int _ID[2]; //replaces phi and eps 56 | public: 57 | unsigned long long getID() const {return *(unsigned long long*)_ID;} 58 | void setID(unsigned long long ID) { *(unsigned long long*)_ID = ID; } 59 | int getID_V1() const {return _ID[1];} 60 | // Real eps; 61 | } ; 62 | struct star_particleV2 { 63 | Real mass; 64 | Real pos[MAXDIM]; 65 | Real vel[MAXDIM]; 66 | Real metals ; 67 | Real tform ; 68 | private: 69 | int _ID[2]; //replaces phi and eps 70 | public: 71 | unsigned long long getID() const {return *(unsigned long long*)_ID;} 72 | void setID(unsigned long long ID) { *(unsigned long long*)_ID = ID; } 73 | int getID_V1() const {return _ID[1];} 74 | // Real eps; 75 | // int ID; //replaces phi and eps 76 | } ; 77 | 78 | 79 | struct dump { 80 | double time ; 81 | int nbodies ; 82 | int ndim ; 83 | int nsph ; 84 | int ndark ; 85 | int nstar ; 86 | } ; 87 | 88 | struct dumpV2 { 89 | double time ; 90 | int nbodies ; 91 | int ndim ; 92 | int nsph ; 93 | int ndark ; 94 | int nstar ; 95 | int version; 96 | } ; 97 | 98 | 99 | typedef struct dump header ; 100 | 101 | #endif 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /runtime/ioscript.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ulimit -s unlimited 3 | export CUDA_VISIBLE_DEVICES=1 4 | mpirun hostname -s > /tmp/hostfile 5 | nhost=`cat /tmp/hostfile|wc -l` 6 | nprog=3 7 | np=$(($nhost*$nprog)) 8 | echo "Nhost= $nhost Np= $np" 9 | mpirun ./bonsai_clrshm $np 10 | sleep 1 11 | mpirun -hostfile /tmp/hostfile -np $np -loadbalance bash -c ' 12 | ulimit -s unlimited && 13 | vglrun -d :0.0 ./bonsai_driver << EOF 14 | ./bonsai2_slowdust -f ./dataIn/snap__00510.0000.bonsai -t 0.015625 -T 1000 --snapiter 1 --usempiio --snapname data/snap_ --quickdump 0.125 --quickratio 0.2 --usempiio 15 | ./bonsai_io 16 | ./bonsai_io -q 17 | EOF 18 | ' 19 | sleep 1 20 | mpirun ./bonsai_clrshm $np 21 | -------------------------------------------------------------------------------- /runtime/lib/linux/libGLEW.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/linux/libGLEW.a -------------------------------------------------------------------------------- /runtime/lib/linux/libGLEW_x86_64.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/linux/libGLEW_x86_64.a -------------------------------------------------------------------------------- /runtime/lib/linux_fpic_glew/libGLEW.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/linux_fpic_glew/libGLEW.a -------------------------------------------------------------------------------- /runtime/lib/linux_fpic_glew/libGLEW_x86_64.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/linux_fpic_glew/libGLEW_x86_64.a -------------------------------------------------------------------------------- /runtime/lib/win32/freeglut.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/win32/freeglut.dll -------------------------------------------------------------------------------- /runtime/lib/win32/freeglut.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/win32/freeglut.lib -------------------------------------------------------------------------------- /runtime/lib/win32/glew32.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/win32/glew32.dll -------------------------------------------------------------------------------- /runtime/lib/win32/glew32.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/win32/glew32.lib -------------------------------------------------------------------------------- /runtime/lib/win64/freeglut.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/win64/freeglut.dll -------------------------------------------------------------------------------- /runtime/lib/win64/freeglut.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/win64/freeglut.lib -------------------------------------------------------------------------------- /runtime/lib/win64/glew64.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/win64/glew64.dll -------------------------------------------------------------------------------- /runtime/lib/win64/glew64.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/runtime/lib/win64/glew64.lib -------------------------------------------------------------------------------- /runtime/paramsDDASYNC.txt: -------------------------------------------------------------------------------- 1 | render_params_points 2 | render_params_volumetric 3 | slices 1 4 | displayed_slices 1 5 | sprite_size 0.074 6 | scale_[log] 0 7 | dust_scale 12 8 | dust_alpha 0.316 9 | light_color_r 0.032 10 | light_color_g 0.036 11 | light_color_b 0.036 12 | alpha 0.1 13 | shadow_alpha 0.1 14 | transmission 0.004 15 | indirect_lighting 0.5 16 | fog 0.001 17 | over_bright_multiplier 3.6 18 | star_brightness 0.44 19 | image_brightness 1.3 20 | image_gamma 0.454545 21 | blur_radius 0.98 22 | blur_passes 4 23 | source_intensity 0.5 24 | star_blur_radius 40 25 | star_threshold 1.2 26 | star_power 1.2 27 | star_intensity 0 28 | glow_radius 25.2 29 | glow_intensity 0.1 30 | flare_intensity 0 31 | flare_threshold 0.72 32 | flare_radius 40.8 33 | skybox_brightness 0.24 34 | render_params_splotch 35 | star_scale_[log] 0 36 | star_alpha______ 1 37 | dm_scale___[log] -0.4 38 | dm_alpha________ 0.1 39 | max_size_[log]__ 0 40 | alpha___________ 0.02 41 | transmission____ 0.001 42 | brightness_[pre] 0.08 43 | gamma_[pre] 0.4 44 | brightness_[post] 1 45 | gamma_[post] 1 46 | render_params_splotch_sorted 47 | star_scale_[log] 0 48 | star_alpha______ 1 49 | dm_scale___[log] -0.4 50 | dm_alpha________ 0.1 51 | max_size_[log]__ 0 52 | alpha___________ 0.02 53 | transmission____ 0.001 54 | brightness_[pre] 0.08 55 | gamma_[pre] 0.4 56 | brightness_[post] 1 57 | gamma_[post] 1 58 | render_params_volumetric_new 59 | star_scale_[log] 0 60 | star_alpha______ 1 61 | dm_scale___[log] -0.4 62 | dm_alpha________ 0.1 63 | max_size_[log]__ 0 64 | alpha___________ 0.02 65 | transmission____ 0.001 66 | brightness_[pre] 0.08 67 | gamma_[pre] 0.4 68 | brightness_[post] 1 69 | gamma_[post] 1 70 | slices 1 71 | displayed_slices 1 72 | sprite_size 0.074 73 | scale_[log] 0 74 | dust_scale 12 75 | dust_alpha 0.316 76 | light_color_r 0.032 77 | light_color_g 0.036 78 | light_color_b 0.036 79 | alpha 0.1 80 | shadow_alpha 0.1 81 | transmission 0.001 82 | indirect_lighting 0.5 83 | fog 0.001 84 | over_bright_multiplier 3.6 85 | star_brightness 0.44 86 | image_brightness 1.3 87 | image_gamma 0.454545 88 | blur_radius 0.98 89 | blur_passes 4 90 | source_intensity 0.5 91 | star_blur_radius 40 92 | star_threshold 1.2 93 | star_power 1.2 94 | star_intensity 0 95 | glow_radius 25.2 96 | glow_intensity 0.1 97 | flare_intensity 0 98 | flare_threshold 0.72 99 | flare_radius 40.8 100 | skybox_brightness 0.24 101 | -------------------------------------------------------------------------------- /runtime/paramsMW.txt: -------------------------------------------------------------------------------- 1 | render_params_points 2 | render_params_volumetric 3 | slices 1 4 | displayed_slices 1 5 | sprite_size 0.0416 6 | scale_[log] -0.464 7 | dust_scale 12 8 | dust_alpha 0.316 9 | light_color_r 0.032 10 | light_color_g 0.036 11 | light_color_b 0.036 12 | alpha 0.1 13 | shadow_alpha 0.1 14 | transmission 0.001 15 | indirect_lighting 0.5 16 | fog 0.001 17 | over_bright_multiplier 3.6 18 | star_brightness 0.44 19 | image_brightness 1.3 20 | image_gamma 0.454545 21 | blur_radius 0.98 22 | blur_passes 0 23 | source_intensity 1.0 24 | star_blur_radius 16.4 25 | star_threshold 0.4 26 | star_power 0.9 27 | star_intensity 0.504 28 | glow_radius 17.6 29 | glow_intensity 0.24 30 | flare_intensity 0.044 31 | flare_threshold 0.44 32 | flare_radius 8.8 33 | skybox_brightness 0.24 34 | render_params_splotch 35 | star_scale_[log] 0 36 | star_alpha______ 1 37 | dm_scale___[log] -0.4 38 | dm_alpha________ 0.1 39 | max_size_[log]__ 0 40 | alpha___________ 0.02 41 | transmission____ 0.001 42 | brightness_[pre] 0.08 43 | gamma_[pre] 0.4 44 | brightness_[post] 1 45 | gamma_[post] 1 46 | render_params_splotch_sorted 47 | star_scale_[log] 0 48 | star_alpha______ 1 49 | dm_scale___[log] -0.4 50 | dm_alpha________ 0.1 51 | max_size_[log]__ 0 52 | alpha___________ 0.02 53 | transmission____ 0.001 54 | brightness_[pre] 0.08 55 | gamma_[pre] 0.4 56 | brightness_[post] 1 57 | gamma_[post] 1 58 | render_params_volumetric_new 59 | star_scale_[log] 0 60 | star_alpha______ 1 61 | dm_scale___[log] -0.4 62 | dm_alpha________ 0.1 63 | max_size_[log]__ 0 64 | alpha___________ 0.02 65 | transmission____ 0.001 66 | brightness_[pre] 0.08 67 | gamma_[pre] 0.4 68 | brightness_[post] 1 69 | gamma_[post] 1 70 | slices 1 71 | displayed_slices 1 72 | sprite_size 0.0416 73 | scale_[log] -0.464 74 | dust_scale 12 75 | dust_alpha 0.316 76 | light_color_r 0.032 77 | light_color_g 0.036 78 | light_color_b 0.036 79 | alpha 0.1 80 | shadow_alpha 0.1 81 | transmission 0.001 82 | indirect_lighting 0.5 83 | fog 0.001 84 | over_bright_multiplier 3.6 85 | star_brightness 0.44 86 | image_brightness 1.3 87 | image_gamma 0.454545 88 | blur_radius 0.98 89 | blur_passes 0 90 | source_intensity 1.0 91 | star_blur_radius 16.4 92 | star_threshold 0.4 93 | star_power 0.9 94 | star_intensity 0.504 95 | glow_radius 17.6 96 | glow_intensity 0.24 97 | flare_intensity 0.044 98 | flare_threshold 0.44 99 | flare_radius 8.8 100 | skybox_brightness 0.24 101 | -------------------------------------------------------------------------------- /runtime/paramsNew.txt: -------------------------------------------------------------------------------- 1 | render_params_points 2 | render_params_volumetric 3 | slices 256 4 | displayed_slices 256 5 | sprite_size 0.074 6 | scale_[log] 0 7 | dust_scale 12 8 | dust_alpha 0.316 9 | light_color_r 0.032 10 | light_color_g 0.036 11 | light_color_b 0.036 12 | alpha 0.1 13 | shadow_alpha 0.1 14 | transmission 0.0022 15 | indirect_lighting 0.5 16 | fog 0.001 17 | over_bright_multiplier 3.6 18 | star_brightness 0.44 19 | image_brightness 0.952 20 | image_gamma 0.584 21 | blur_radius 0.98 22 | blur_passes 4 23 | source_intensity 0.5 24 | star_blur_radius 40 25 | star_threshold 1.2 26 | star_power 1.2 27 | star_intensity 0 28 | glow_radius 25.2 29 | glow_intensity 0.1 30 | flare_intensity 0 31 | flare_threshold 0.72 32 | flare_radius 40.8 33 | skybox_brightness 0.24 34 | render_params_splotch 35 | star_scale_[log] -0.384 36 | star_alpha______ 1 37 | dm_scale___[log] -0.712 38 | dm_alpha________ 0.088 39 | max_size_[log]__ 0.056 40 | alpha___________ 0.027 41 | transmission____ 0.0022 42 | brightness_[pre] 0.036 43 | gamma_[pre] 0.592 44 | brightness_[post] 0.364 45 | gamma_[post] 1 46 | render_params_splotch_sorted 47 | star_scale_[log] -0.384 48 | star_alpha______ 1 49 | dm_scale___[log] -0.712 50 | dm_alpha________ 0.088 51 | max_size_[log]__ 0.056 52 | alpha___________ 0.027 53 | transmission____ 0.0022 54 | brightness_[pre] 0.036 55 | gamma_[pre] 0.592 56 | brightness_[post] 0.364 57 | gamma_[post] 1 58 | render_params_volumetric_new 59 | star_scale_[log] -0.384 60 | star_alpha______ 1 61 | dm_scale___[log] -0.712 62 | dm_alpha________ 0.088 63 | max_size_[log]__ 0.056 64 | alpha___________ 0.027 65 | transmission____ 0.0022 66 | brightness_[pre] 0.036 67 | gamma_[pre] 0.592 68 | brightness_[post] 0.364 69 | gamma_[post] 1 70 | slices 256 71 | displayed_slices 256 72 | sprite_size 0.074 73 | scale_[log] 0 74 | dust_scale 12 75 | dust_alpha 0.316 76 | light_color_r 0.032 77 | light_color_g 0.036 78 | light_color_b 0.036 79 | alpha 0.1 80 | shadow_alpha 0.1 81 | transmission 0.0022 82 | indirect_lighting 0.5 83 | fog 0.001 84 | over_bright_multiplier 3.6 85 | star_brightness 0.44 86 | image_brightness 0.952 87 | image_gamma 0.584 88 | blur_radius 0.98 89 | blur_passes 4 90 | source_intensity 0.5 91 | star_blur_radius 40 92 | star_threshold 1.2 93 | star_power 1.2 94 | star_intensity 0 95 | glow_radius 25.2 96 | glow_intensity 0.1 97 | flare_intensity 0 98 | flare_threshold 0.72 99 | flare_radius 40.8 100 | skybox_brightness 0.24 101 | -------------------------------------------------------------------------------- /runtime/params_movie4k.txt: -------------------------------------------------------------------------------- 1 | render_params_points 2 | render_params_volumetric 3 | slices 1 4 | displayed_slices 1 5 | sprite_size 0.0232 6 | scale_[log] -0.464 7 | dust_scale 12 8 | dust_alpha 0.316 9 | light_color_r 0.032 10 | light_color_g 0.036 11 | light_color_b 0.036 12 | alpha 0.096 13 | shadow_alpha 0.1 14 | transmission 0.0006 15 | indirect_lighting 0.5 16 | fog 0.001 17 | over_bright_multiplier 3.6 18 | star_brightness 0.44 19 | image_brightness 0.944 20 | image_gamma 0.496 21 | blur_radius 0.98 22 | blur_passes 0 23 | source_intensity 1.064 24 | star_blur_radius 19.6 25 | star_threshold 1.4 26 | star_power 0.9 27 | star_intensity 0.488 28 | glow_radius 22.8 29 | glow_intensity 0.46 30 | flare_intensity 0.044 31 | flare_threshold 0.44 32 | flare_radius 8.8 33 | skybox_brightness 0.24 34 | render_params_splotch 35 | star_scale_[log] -0.44 36 | star_alpha______ 1 37 | dm_scale___[log] -0.4 38 | dm_alpha________ 0.1 39 | max_size_[log]__ 0 40 | alpha___________ 0.021 41 | transmission____ 0.0006 42 | brightness_[pre] 0.08 43 | gamma_[pre] 0.4 44 | brightness_[post] 1 45 | gamma_[post] 1 46 | render_params_splotch_sorted 47 | star_scale_[log] -0.44 48 | star_alpha______ 1 49 | dm_scale___[log] -0.4 50 | dm_alpha________ 0.1 51 | max_size_[log]__ 0 52 | alpha___________ 0.021 53 | transmission____ 0.0006 54 | brightness_[pre] 0.08 55 | gamma_[pre] 0.4 56 | brightness_[post] 1 57 | gamma_[post] 1 58 | render_params_volumetric_new 59 | star_scale_[log] -0.44 60 | star_alpha______ 1 61 | dm_scale___[log] -0.4 62 | dm_alpha________ 0.1 63 | max_size_[log]__ 0 64 | alpha___________ 0.021 65 | transmission____ 0.0006 66 | brightness_[pre] 0.08 67 | gamma_[pre] 0.4 68 | brightness_[post] 1 69 | gamma_[post] 1 70 | slices 1 71 | displayed_slices 1 72 | sprite_size 0.0232 73 | scale_[log] -0.464 74 | dust_scale 12 75 | dust_alpha 0.316 76 | light_color_r 0.032 77 | light_color_g 0.036 78 | light_color_b 0.036 79 | alpha 0.096 80 | shadow_alpha 0.1 81 | transmission 0.0006 82 | indirect_lighting 0.5 83 | fog 0.001 84 | over_bright_multiplier 3.6 85 | star_brightness 0.44 86 | image_brightness 0.944 87 | image_gamma 0.496 88 | blur_radius 0.98 89 | blur_passes 0 90 | source_intensity 1.064 91 | star_blur_radius 19.6 92 | star_threshold 1.4 93 | star_power 0.9 94 | star_intensity 0.488 95 | glow_radius 22.8 96 | glow_intensity 0.46 97 | flare_intensity 0.044 98 | flare_threshold 0.44 99 | flare_radius 8.8 100 | skybox_brightness 0.24 101 | -------------------------------------------------------------------------------- /runtime/profileCommand: -------------------------------------------------------------------------------- 1 | CUDA_PROFILE_CSV=1 CUDA_PROFILE=1 CUDA_PROFILE_CONFIG=./profiler.conf CUDA_PROFILE_LOG=profLog.csv mpirun -np 2 ./main ~/inputData/plummer/plum1M.dumbp bla 0 0.0625 0 0.05 0.5 2 | -------------------------------------------------------------------------------- /runtime/profiler.conf: -------------------------------------------------------------------------------- 1 | gpustarttimestamp 2 | gridsize 3 | threadblocksize 4 | dynsmemperblock 5 | stasmemperblock 6 | regperthread 7 | memTransferSize 8 | streamid 9 | memtransferhostmemtype 10 | localblocksize 11 | -------------------------------------------------------------------------------- /runtime/profiling/bonsai_timing.h: -------------------------------------------------------------------------------- 1 | #ifndef _BONSAI_TIMING_H 2 | #define _BONSAI_TIMING_H 3 | 4 | #include 5 | 6 | #define CUXTIMER_DISABLE 7 | 8 | #include "../profiling/cuxTimer_host.cu" 9 | 10 | #ifdef CUXTIMER_DISABLE 11 | #define PROF_HOOK(name) 12 | #define CUXTIMER(...) 13 | #else 14 | #define PROF_HOOK(name) \ 15 | extern void name ## _init(); \ 16 | extern void name ## _display(FILE *fp=stdout, int csv=false, int show_headings=true); 17 | #endif 18 | 19 | PROF_HOOK(build_tree); 20 | PROF_HOOK(compute_propertiesD); 21 | PROF_HOOK(dev_approximate_gravity); 22 | PROF_HOOK(parallel); 23 | PROF_HOOK(sortKernels); 24 | PROF_HOOK(timestep); 25 | 26 | #undef PROF_HOOK 27 | 28 | #ifdef CUXTIMER_DISABLE 29 | #define PROF_MODULE(name) 30 | #else 31 | #define PROF_MODULE(name) \ 32 | extern void name ## _init() { cudaxTimerReset(); } \ 33 | extern void name ## _display(FILE *fp, int csv, int show_headings) { cudaxTimerDisplay(fp, csv, show_headings); } 34 | #endif 35 | 36 | #endif // _BONSAI_TIMING_H 37 | -------------------------------------------------------------------------------- /runtime/renderer/Cubemap.h: -------------------------------------------------------------------------------- 1 | GLuint loadCubemap(const char *filenameFormat); 2 | GLuint loadCubemapCross(const char *filename); 3 | -------------------------------------------------------------------------------- /runtime/renderer/GLSLProgram.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | // Simple class to contain GLSL shaders/programs 13 | 14 | #ifndef GLSL_PROGRAM_H 15 | #define GLSL_PROGRAM_H 16 | 17 | #include 18 | #include 19 | 20 | class GLSLProgram 21 | { 22 | public: 23 | // construct program from strings 24 | GLSLProgram(const char *vsource, const char *fsource); 25 | GLSLProgram(const char *vsource, const char *gsource, const char *fsource, 26 | GLenum gsInput = GL_POINTS, GLenum gsOutput = GL_TRIANGLE_STRIP); 27 | ~GLSLProgram(); 28 | 29 | void enable(); 30 | void disable(); 31 | 32 | void setUniform1f(const GLchar *name, GLfloat x); 33 | void setUniform2f(const GLchar *name, GLfloat x, GLfloat y); 34 | void setUniform3f(const char *name, float x, float y, float z); 35 | void setUniform4f(const char *name, float x, float y, float z, float w); 36 | void setUniformfv(const GLchar *name, GLfloat *v, int elementSize, int count=1); 37 | void setUniformMatrix4fv(const GLchar *name, GLfloat *m, bool transpose); 38 | 39 | void bindTexture(const char *name, GLuint tex, GLenum target, GLint unit); 40 | 41 | inline GLuint getProgId() { return mProg; } 42 | 43 | private: 44 | GLuint compileProgram(const char *vsource, const char *gsource, const char *fsource, 45 | GLenum gsInput = GL_POINTS, GLenum gsOutput = GL_TRIANGLE_STRIP); 46 | GLuint mProg; 47 | }; 48 | 49 | #endif -------------------------------------------------------------------------------- /runtime/renderer/SmokeShaders.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | extern const char *particleVS, *simpleVS; 13 | extern const char *particleSpherePS, *simplePS, *particlePS, *particleAAPS, *particleShadowPS; 14 | extern const char *mblurVS, *mblurGS; 15 | extern const char *passThruVS, *transformVS, *texture2DPS; 16 | extern const char *blurPS, *blur3x3PS, *blur2PS; 17 | extern const char *starFilterPS, *compositePS; 18 | extern const char *thresholdPS; 19 | extern const char *downSample4PS, *downSample2PS; 20 | extern const char *gaussianBlurPS; 21 | extern const char *floorVS, *floorPS; 22 | extern const char *volumeVS, *volumePS; 23 | extern const char *skyboxVS, *skyboxPS; -------------------------------------------------------------------------------- /runtime/renderer/loadPPM.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "loadPPM.h" 4 | 5 | Image *loadPPM(const char *filename) 6 | { 7 | char buff[16]; 8 | Image *result; 9 | FILE *fp; 10 | int maxval; 11 | 12 | fp = fopen(filename, "rb"); 13 | if (!fp) 14 | { 15 | fprintf(stderr, "Unable to open file `%s'\n", filename); 16 | return 0; 17 | } 18 | 19 | if (!fgets(buff, sizeof(buff), fp)) 20 | { 21 | fprintf(stderr, "Error opening file '%s'\n", filename); 22 | return 0; 23 | } 24 | 25 | if (buff[0] != 'P' || buff[1] != '6') 26 | { 27 | fprintf(stderr, "Invalid image format (must be `P6')\n"); 28 | return 0; 29 | } 30 | 31 | int c = fgetc(fp); 32 | if (c == '#') { 33 | // skip comment 34 | char str[256]; 35 | fgets(str, 256, fp); 36 | } else { 37 | ungetc(c, fp); 38 | } 39 | 40 | result = (Image *) malloc(sizeof(Image)); 41 | if (!result) 42 | { 43 | fprintf(stderr, "Unable to allocate memory\n"); 44 | exit(1); 45 | } 46 | 47 | int r = 0; 48 | if (8 == sizeof(void*)) 49 | r = fscanf(fp, "%llu %llu", &result->width, &result->height); 50 | else 51 | r = fscanf(fp, "%lu %lu", &result->width, &result->height); 52 | 53 | if (r != 2) 54 | { 55 | fprintf(stderr, "Error loading image `%s'\n", filename); 56 | return 0; 57 | } 58 | while (fgetc(fp) != '\n'); 59 | 60 | if (fscanf(fp, "%d", &maxval) != 1) 61 | { 62 | fprintf(stderr, "Error loading image `%s'\n", filename); 63 | return 0; 64 | } 65 | while (fgetc(fp) != '\n'); 66 | 67 | result->data = (unsigned char *) malloc(3 * result->width * result->height); 68 | if (!result->data) 69 | { 70 | fprintf(stderr, "Unable to allocate memory\n"); 71 | exit(1); 72 | } 73 | 74 | if (fread(result->data, 3 * result->width, result->height, fp) != result->height) 75 | { 76 | fprintf(stderr, "Error loading image `%s'\n", filename); 77 | return 0; 78 | } 79 | 80 | fprintf(stdout, "Loaded `%s', %d x %d\n", filename, result->width, result->height); 81 | 82 | fclose(fp); 83 | 84 | return result; 85 | } 86 | -------------------------------------------------------------------------------- /runtime/renderer/loadPPM.h: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | size_t width, height; 3 | unsigned char *data; 4 | } Image; 5 | 6 | Image *loadPPM(const char *filename); 7 | -------------------------------------------------------------------------------- /runtime/renderer/param.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #include 13 | 14 | const Param dummy("error"); 15 | -------------------------------------------------------------------------------- /runtime/renderer/timer.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 4 | // 5 | // Please refer to the NVIDIA end user license agreement (EULA) associated 6 | // with this source code for terms and conditions that govern your use of 7 | // this software. Any use, reproduction, disclosure, or distribution of 8 | // this software and related documentation outside the terms of the EULA 9 | // is strictly prohibited. 10 | // 11 | ///////////////////////////////////////////////////////////////////////////// 12 | 13 | #ifndef TIMER_H 14 | #define TIMER_H 15 | 16 | #include 17 | 18 | #ifdef _WIN32 19 | #ifndef WIN32_LEAN_AND_MEAN 20 | #define WIN32_LEAN_AND_MEAN 21 | #endif 22 | #include 23 | #else 24 | #include 25 | #endif 26 | 27 | #ifdef _WIN32 28 | double PCFreq = 0.0; 29 | __int64 timerStart = 0; 30 | #else 31 | struct timeval timerStart; 32 | #endif 33 | 34 | void StartTimer() 35 | { 36 | #ifdef _WIN32 37 | LARGE_INTEGER li; 38 | 39 | if (!QueryPerformanceFrequency(&li)) 40 | { 41 | printf("QueryPerformanceFrequency failed!\n"); 42 | } 43 | 44 | PCFreq = (double)li.QuadPart/1000.0; 45 | QueryPerformanceCounter(&li); 46 | timerStart = li.QuadPart; 47 | #else 48 | gettimeofday(&timerStart, NULL); 49 | #endif 50 | } 51 | 52 | // time elapsed in ms 53 | double GetTimer() 54 | { 55 | #ifdef _WIN32 56 | LARGE_INTEGER li; 57 | QueryPerformanceCounter(&li); 58 | return (double)(li.QuadPart-timerStart)/PCFreq; 59 | #else 60 | struct timeval timerStop, timerElapsed; 61 | gettimeofday(&timerStop, NULL); 62 | timersub(&timerStop, &timerStart, &timerElapsed); 63 | return timerElapsed.tv_sec*1000.0+timerElapsed.tv_usec/1000.0; 64 | #endif 65 | } 66 | #endif // TIMER_H 67 | 68 | -------------------------------------------------------------------------------- /runtime/src/bonsai_clrshm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include /* For mode constants */ 9 | #include /* For O_* constants */ 10 | 11 | int getFolderContent(std::string folder, std::vector &files) 12 | { 13 | DIR *dp; 14 | struct dirent *dirp; 15 | if((dp = opendir(folder.c_str())) == NULL) 16 | { 17 | fprintf(stderr,"Error( %d ) opening %s \n", errno, folder.c_str()); 18 | return errno; 19 | } 20 | 21 | while ((dirp = readdir(dp)) != NULL) 22 | { 23 | files.push_back(std::string(dirp->d_name)); 24 | } 25 | closedir(dp); 26 | return 0; 27 | } 28 | 29 | 30 | int main(int argc, char * argv[]) 31 | { 32 | // using ShmQHeader = SharedMemoryServer; 33 | // using ShmQData = SharedMemoryServer; 34 | // using ShmSHeader = SharedMemoryServer; 35 | // using ShmSData = SharedMemoryServer; 36 | // const int n = argc > 1 ? atoi(argv[1]) : 1; 37 | // fprintf(stderr, "cleaning for n= %d\n", n); 38 | // for (int i = 0; i < n; i++) 39 | // { 40 | //#if 0 41 | // fprintf(stderr, "clear: %s \n", ShmQHeader::type::sharedFile(i)); 42 | // fprintf(stderr, "clear: %s \n", ShmQData ::type::sharedFile(i)); 43 | // fprintf(stderr, "clear: %s \n", ShmSHeader::type::sharedFile(i)); 44 | // fprintf(stderr, "clear: %s \n", ShmSData ::type::sharedFile(i)); 45 | //#endif 46 | // //ShmQHeader shmQHeader(ShmQHeader::type::sharedFile(i), 1); 47 | // //ShmQData shmQData (ShmQData ::type::sharedFile(i), 1); 48 | // // ShmSHeader shmSHeader(ShmSHeader::type::sharedFile(i), 1); 49 | // // ShmSData shmSData (ShmSData ::type::sharedFile(i), 1); 50 | // } 51 | 52 | 53 | fprintf(stderr,"Cleaning up Bonsai's shared memory buffers\n"); 54 | //Get all the shared memory files 55 | std::string folder("/dev/shm"); //Or on Debian /run/shm also works 56 | std::vector files; 57 | getFolderContent(folder,files); 58 | 59 | //Check if there are any files created by Bonsai 60 | for(auto &p : files) 61 | { 62 | if(p.find("Bonsai") !=std::string::npos) 63 | { 64 | fprintf(stderr,"Removing %s \n", p.c_str()); 65 | shm_unlink(p.c_str()); 66 | } 67 | } 68 | 69 | fprintf(stderr,"Finished cleaning\n"); 70 | 71 | return 0; 72 | } 73 | 74 | -------------------------------------------------------------------------------- /runtime/src/log.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * log.cpp 3 | * 4 | * Created on: Jun 8, 2012 5 | * Author: jbedorf 6 | */ 7 | 8 | 9 | #ifdef USE_MPI 10 | #include 11 | #endif 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | // ******************************************** // 18 | // These functions add the process ID and total // 19 | // number of processes to the output lines. // 20 | // Mainly for parallel debug purposes. // 21 | // ******************************************** // 22 | 23 | #if ENABLE_LOG 24 | #ifdef USE_MPI 25 | #include 26 | 27 | extern bool ENABLE_RUNTIME_LOG; 28 | extern bool PREPEND_RANK; 29 | extern int PREPEND_RANK_PROCID; 30 | extern int PREPEND_RANK_NPROCS; 31 | 32 | const char prependPattern[] = {"[Proc: %d (%d)]\t"}; 33 | 34 | char stderrBUFF[4096]; 35 | 36 | //Standard out, note we write to a buffer to make 37 | //sure the whole line is output in one flush 38 | extern void prependrankLOG(const char *fmt, ...) 39 | { 40 | 41 | va_list ap; 42 | va_start(ap, fmt); 43 | 44 | sprintf(stderrBUFF, prependPattern, PREPEND_RANK_PROCID, PREPEND_RANK_NPROCS); 45 | int len = strlen(stderrBUFF); 46 | vsprintf(stderrBUFF+len, fmt, ap); 47 | va_end(ap); 48 | printf("%s",stderrBUFF); 49 | } 50 | 51 | extern void prependrankLOGF(const char *fmt, ...) 52 | { 53 | // char stderrBUFF[4096]; 54 | va_list ap; 55 | va_start(ap, fmt); 56 | 57 | sprintf(stderrBUFF, prependPattern, PREPEND_RANK_PROCID, PREPEND_RANK_NPROCS); 58 | int len = strlen(stderrBUFF); 59 | vsprintf(stderrBUFF+len, fmt, ap); 60 | va_end(ap); 61 | fprintf(stderr, "%s", stderrBUFF); 62 | } 63 | #endif //USEMPI 64 | #endif //ENABLE LOG 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /runtime/vizscript.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ulimit -s unlimited 3 | export CUDA_VISIBLE_DEVICES=1 4 | mpirun hostname -s > /tmp/hostfile 5 | nhost=`cat /tmp/hostfile|wc -l` 6 | np=$(($nhost*2)) 7 | echo "Nhost= $nhost Np= $np" 8 | mpirun -np $nhost ./bonsai_clrshm $np 9 | mpirun -hostfile /tmp/hostfile -np $np -loadbalance bash -c ' 10 | ulimit -s unlimited && 11 | vglrun -d :0.0 ./bonsai_driver << EOF 12 | ./bonsai2_slowdust -f ./dataIn/snap__00510.0000.bonsai --reducebodies 1 -t 0.015625 -T 1000 --quickdump 0.125 --quickratio 0.2 --usempiio --noquicksync 13 | ./renderer -I --reduceDM 0 -d --noquicksync 14 | EOF 15 | ' 16 | sleep 1 17 | mpirun -np $nhost ./bonsai_clrshm $np 18 | -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(bonsai_tools) 3 | 4 | #-------------------------------------------------- 5 | # Setup Output directories 6 | #-------------------------------------------------- 7 | SET (CMAKE_RUNTIME_OUTPUT_DIRECTORY 8 | ${PROJECT_BINARY_DIR}/bin CACHE PATH "Single Directory for all Executables." 9 | ) 10 | SET (CMAKE_LIBRARY_OUTPUT_DIRECTORY 11 | ${PROJECT_BINARY_DIR}/bin CACHE PATH "Single Directory for all Libraries" 12 | ) 13 | SET (CMAKE_ARCHIVE_OUTPUT_DIRECTORY 14 | ${PROJECT_BINARY_DIR}/bin CACHE PATH "Single Directory for all static libraries." 15 | ) 16 | 17 | #-------------------------------------------------- 18 | # On mac, if we use cuda we need stdlibc++ 19 | # add an option to use boost where std:: isn't available 20 | #-------------------------------------------------- 21 | option(BONSAI_CATALYST_USE_STDLIB OFF "Use stdlibc++ instead of libc++") 22 | if(BONSAI_CATALYST_USE_STDLIB) 23 | add_definitions(-DBONSAI_CATALYST_STDLIB) 24 | #----------------------------------------------- 25 | # Boost 26 | #----------------------------------------------- 27 | find_package( Boost 1.54.0 ) 28 | include_directories(${Boost_INCLUDE_DIR}) 29 | LINK_DIRECTORIES(${Boost_LIBRARY_DIR}) 30 | endif() 31 | 32 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common) 33 | 34 | add_library(bonsai_tools_common 35 | common/anyoption.cpp 36 | ) 37 | 38 | add_subdirectory(catalyst) 39 | add_subdirectory(snapServe) 40 | 41 | -------------------------------------------------------------------------------- /tools/IO/BonsaiIO.h: -------------------------------------------------------------------------------- 1 | ../../runtime/include/BonsaiIO.h -------------------------------------------------------------------------------- /tools/IO/IDType.h: -------------------------------------------------------------------------------- 1 | ../../runtime/include/IDType.h -------------------------------------------------------------------------------- /tools/IO/Makefile: -------------------------------------------------------------------------------- 1 | CXX = mpicxx 2 | CC = mpicc 3 | LD = mpicxx 4 | 5 | OMPFLAGS = -fopenmp 6 | #OMPFLAGS += -D_GLIBCXX_PARALLEL 7 | 8 | OFLAGS = -O3 -g -Wall 9 | # OMPFLAGS= 10 | 11 | CXXFLAGS = -fPIC $(OFLAGS) $(OMPFLAGS) -std=c++11 12 | CXXFLAGS += -Werror 13 | CXXFLAGS += -Wno-literal-suffix # ignores warrning in openmpi 1.6.5 14 | 15 | 16 | 17 | LDFLAGS = 18 | 19 | SRCPATH = ./ 20 | 21 | SRC1 = cvt_tipsy2bonsai.cpp 22 | SRC2 = readBonsai.cpp 23 | SRC3 = benchmark.cpp 24 | SRC4 = cvt_tipsy_gtc12_2bonsai.cpp 25 | SRC5 = cvt_bonsai2dumbp.cpp 26 | SRC6 = cvt_amuseASCII2bonsai.cpp 27 | SRC7 = cvt_bonsai2amuseASCII.cpp 28 | OBJ1 = $(SRC1:%.cpp=%.o) 29 | OBJ2 = $(SRC2:%.cpp=%.o) 30 | OBJ3 = $(SRC3:%.cpp=%.o) 31 | OBJ4 = $(SRC4:%.cpp=%.o) 32 | OBJ5 = $(SRC5:%.cpp=%.o) 33 | OBJ6 = $(SRC6:%.cpp=%.o) 34 | OBJ7 = $(SRC7:%.cpp=%.o) 35 | 36 | PROG1 = cvt_tipsy2bonsai 37 | PROG2 = readBonsai 38 | PROG3 = benchmark 39 | PROG4 = cvt_tipsy_gtc12_2bonsai 40 | PROG5 = cvt_bonsai2dumbp 41 | PROG6 = cvt_tipsy2bonsaiExtended 42 | PROG7 = readBonsaiExtended 43 | PROG8 = cvt_amuseASCII2bonsai 44 | PROG9 = cvt_bonsai2amuseASCII 45 | RM = /bin/rm 46 | 47 | all: $(PROG1) $(PROG2) $(PROG3) $(PROG4) $(PROG5) $(PROG6) $(PROG7) $(PROG8) $(PROG9) 48 | 49 | 50 | $(PROG1): $(OBJ1) 51 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 52 | $(PROG2): $(OBJ2) 53 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 54 | $(PROG3): $(OBJ3) 55 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 56 | $(PROG4): $(OBJ4) 57 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 58 | 59 | 60 | 61 | %.o: $(SRCPATH)/%.cpp 62 | $(CXX) $(CXXFLAGS) -c $< -o $@ 63 | 64 | 65 | clean: 66 | /bin/rm -rf *.o $(PROG1) $(PROG2) $(PROG3) $(OBJ1) $(OBJ2) $(OBJ3) $(PROG4) $(OBJ4) 67 | 68 | $(OBJ1): BonsaiIO.h read_tipsy.h 69 | $(OBJ2): BonsaiIO.h 70 | $(OBJ3): BonsaiIO.h 71 | $(OBJ4): BonsaiIO.h read_tipsy.h 72 | -------------------------------------------------------------------------------- /tools/IO/main.cpp: -------------------------------------------------------------------------------- 1 | #include "BonsaiIO.h" 2 | 3 | struct real2 { float x,y;}; 4 | struct real4 { float x,y,z,w;}; 5 | typedef unsigned long long uulong; 6 | 7 | void writeSnapshot( 8 | real4 *bodyPositions, 9 | real4 *bodyVelocities, 10 | uulong* bodyIds, 11 | const int n, 12 | const int nDomains, 13 | const std::string &fileName, 14 | const float time, 15 | const MPI_Comm &comm, 16 | const int nRank, const int myRank); 17 | 18 | void readSnapshot( 19 | std::vector &bodyPositions, 20 | std::vector &bodyVelocities, 21 | std::vector &bodyID, 22 | std::vector &rhohList, 23 | const std::string &fileName, 24 | const MPI_Comm &comm, 25 | const int nRank, 26 | const int myRank, 27 | int &NTotal2, 28 | int &NFirst, int &NSecond, int &NThird, 29 | std::vector &dustPositions, std::vector &dustVelocities, 30 | std::vector &dustIDs, 31 | const int reduce_bodies_factor, 32 | const int reduce_dust_factor, 33 | const bool restart); 34 | 35 | int main(int argc, char * argv[]) 36 | { 37 | return 0; 38 | }; 39 | -------------------------------------------------------------------------------- /tools/add_dust/IC_MWM31: -------------------------------------------------------------------------------- 1 | 1.0 2 | 1.0 3 | 20.0 4 | 778.0 5 | -90.0 6 | 0.0 7 | -30.0 8 | 240.0 9 | -------------------------------------------------------------------------------- /tools/add_dust/Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | CC = gcc 3 | LD = g++ 4 | F90 = gfortran 5 | 6 | OMPFLAGS = -fopenmp 7 | OMPFLAGS += -D_GLIBCXX_PARALLEL 8 | 9 | OFLAGS = -O3 -g -Wall 10 | # OMPFLAGS= 11 | 12 | CXXFLAGS = -fPIC $(OFLAGS) -Wstrict-aliasing=2 $(OMPFLAGS) 13 | 14 | 15 | 16 | LDFLAGS = 17 | 18 | SRCPATH = ./ 19 | SRC = add_dust.cpp anyoption.cpp 20 | OBJ = $(SRC:%.cpp=%.o) 21 | 22 | SRC1 = initorbit.cpp anyoption.cpp 23 | OBJ1 = $(SRC1:%.cpp=%.o) 24 | 25 | SRC2 = initM31_MW.cpp anyoption.cpp 26 | OBJ2 = $(SRC2:%.cpp=%.o) 27 | 28 | PROG = add_dust 29 | PROG1 = initorbit 30 | PROG2 = initM31_MW 31 | 32 | RM = /bin/rm 33 | 34 | all: $(PROG) $(PROG1) $(PROG2) 35 | 36 | 37 | $(PROG): $(OBJ) 38 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 39 | 40 | $(PROG1): $(OBJ1) 41 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 42 | 43 | $(PROG2): $(OBJ2) 44 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 45 | 46 | %.o: $(SRCPATH)/%.cpp 47 | $(CXX) $(CXXFLAGS) -c $< -o $@ 48 | 49 | 50 | clean: 51 | /bin/rm -rf *.o $(PROG) $(PROG1) $(PROG2) 52 | 53 | $(OBJ): DustRing.h vector3.h tipsydefs.h 54 | $(OBJ1): vector3.h tipsydefs.h 55 | $(OBJ2): vector3.h tipsydefs.h kepler.h 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /tools/add_dust/kepler.h: -------------------------------------------------------------------------------- 1 | #ifndef __KEPLER_H__ 2 | #define __KEPLER_H__ 3 | 4 | #include 5 | #include 6 | #include "vector3.h" 7 | 8 | struct Kepler 9 | { 10 | typedef vector3 vec3; 11 | vec3 pos, vel; 12 | double M; 13 | 14 | double get_dt(const double eps) const 15 | { 16 | const double R = pos.abs(); 17 | return eps * std::sqrt(R*R*R/M); 18 | } 19 | 20 | vec3 get_acc() const 21 | { 22 | const double R = pos.abs(); 23 | return -M/(R*R*R)*pos; 24 | } 25 | 26 | Kepler( 27 | const vec3 &_pos, 28 | const vec3 &_vel, 29 | const double _M, 30 | const double T, 31 | const double eps = 1.0e-5) : 32 | pos(_pos), vel(_vel), M(_M) 33 | { 34 | double t = 0; 35 | double dt = get_dt(eps); 36 | while (t < T) 37 | { 38 | pos += vel*dt*0.5; 39 | vel += get_acc()*dt; 40 | pos += vel*dt*0.5; 41 | t += dt; 42 | dt = get_dt(eps); 43 | } 44 | } 45 | 46 | }; 47 | 48 | 49 | #endif // __KEPLER_H__ 50 | -------------------------------------------------------------------------------- /tools/add_dust/tipsydefs.h: -------------------------------------------------------------------------------- 1 | #ifndef TIPSYDEFS_H 2 | #define TIPSYDEFS_H 3 | 4 | #define MAXDIM 3 5 | #define forever for(;;) 6 | 7 | typedef float Real; 8 | 9 | struct gas_particle { 10 | Real mass; 11 | Real pos[MAXDIM]; 12 | Real vel[MAXDIM]; 13 | Real rho; 14 | Real temp; 15 | Real hsmooth; 16 | Real metals ; 17 | Real phi ; 18 | } ; 19 | 20 | //struct gas_particle *gas_particles; 21 | 22 | struct dark_particle { 23 | Real mass; 24 | Real pos[MAXDIM]; 25 | Real vel[MAXDIM]; 26 | Real eps; 27 | int phi ; 28 | } ; 29 | 30 | //struct dark_particle *dark_particles; 31 | 32 | struct star_particle { 33 | Real mass; 34 | Real pos[MAXDIM]; 35 | Real vel[MAXDIM]; 36 | Real metals ; 37 | Real tform ; 38 | Real eps; 39 | int phi ; 40 | } ; 41 | 42 | //struct star_particle *star_particles; 43 | 44 | struct dump { 45 | double time ; 46 | int nbodies ; 47 | int ndim ; 48 | int nsph ; 49 | int ndark ; 50 | int nstar ; 51 | } ; 52 | 53 | typedef struct dump header ; 54 | 55 | #endif 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/BonsaiIO.h: -------------------------------------------------------------------------------- 1 | ../IO/BonsaiIO.h -------------------------------------------------------------------------------- /tools/bonsaiRenderer/BonsaiSharedData.h: -------------------------------------------------------------------------------- 1 | ../../runtime/include/BonsaiSharedData.h -------------------------------------------------------------------------------- /tools/bonsaiRenderer/IDType.h: -------------------------------------------------------------------------------- 1 | ../IO/IDType.h -------------------------------------------------------------------------------- /tools/bonsaiRenderer/Makefile: -------------------------------------------------------------------------------- 1 | CXX = mpicxx 2 | CC = mpicc 3 | LD = mpicxx 4 | F90 = mpif90 5 | 6 | CFLAGS = -Wall -Werror 7 | CUDA_TK = /usr/local/cuda 8 | # CUDA_TK = $(HOME)/usr/local/cuda-5.5 9 | 10 | CXXFLAGS += -Wall -fopenmp -std=c++11 -fPIE -ffast-math 11 | CXXFLAGS += -Werror 12 | #ignore warnings caused by openmpi 13 | CXXFLAGS += -Wno-literal-suffix 14 | #ignore warnings in a constructor when intialization is not done in the order the variables are declared 15 | CXXFLAGS += -Wno-reorder 16 | CXXFLAGS += -Wno-main # ignore warrnings on main() 17 | 18 | 19 | OPT += -O3 -g 20 | 21 | CFLAGS += $(OPT) -fPIE 22 | CXXFLAGS += $(OPT) 23 | 24 | CXXFLAGS += -mavx 25 | 26 | 27 | NVCC=$(CUDATK)/bin/nvcc 28 | NVCCFLAGS=-arch=sm_35 29 | 30 | ENGINE=particles 31 | ENGINE=smoke 32 | ENGINE=density 33 | #ENGINE=splotch 34 | 35 | 36 | 37 | INCLUDE_FLAGS= -I./ -I./$(ENGINE)/ 38 | INCLUDE_FLAGS += -I$(CUDA_TK)/include 39 | LDFLAGS = -lGL -lGLU -lglut -lGLEW -fopenmp -lrt 40 | LDFLAGS += -pie -rdynamic -lpthread 41 | 42 | 43 | OBJ = main.o anyoption.o $(ENGINE)/renderer.o paramgl.o RendererData.o RendererDataDistribute.o 44 | 45 | include Makefile.in 46 | 47 | ifeq ($(ICET),1) 48 | ICETPATH=../../IceT 49 | INCLUDE_FLAGS += -I$(ICETPATH)/include 50 | LDFLAGS += -L$(ICETPATH)/lib -lIceTCore -lIceTMPI 51 | CXXFLAGS += -DUSE_ICET 52 | endif 53 | 54 | ifeq ($(APPLEOSX),1) 55 | INCLUDE_FLAGS += -I/opt/local/include 56 | CXXFLAGS +=-Wno-deprecated-declarations 57 | LDFLAGS = -framework OpenGL -L/opt/local/lib -lglut -lglew -fopenmp 58 | endif 59 | 60 | 61 | ifeq ($(ENGINE),smoke) 62 | OBJ += tr.o 63 | OBJ += smoke/GLSLProgram.o 64 | OBJ += smoke/SmokeShaders.o 65 | OBJ += smoke/framebufferObject.o 66 | # OBJ += smoke/depthSort.cu_o 67 | OBJ += smoke/depthSort.o 68 | endif 69 | ifeq ($(ENGINE),density) 70 | OBJ += tr.o 71 | OBJ += density/GLSLProgram.o 72 | OBJ += density/SmokeShaders.o 73 | OBJ += density/framebufferObject.o 74 | # OBJ += density/depthSort.cu_o 75 | OBJ += density/depthSort.o 76 | endif 77 | ifeq ($(ENGINE),splotch) 78 | OBJ += splotch/GLSLProgram.o 79 | OBJ += splotch/splotch.o 80 | endif 81 | ifeq ($(ENGINE),splotchGL) 82 | OBJ += splotch/GLSLProgram.o 83 | OBJ += splotchGL/splotch.o 84 | endif 85 | 86 | PROG= renderer 87 | 88 | all: $(PROG) 89 | 90 | $(PROG): $(OBJ) 91 | $(LD) $^ -o $@ $(LDFLAGS) 92 | 93 | %.o: %.cpp 94 | $(CXX) $(CXXFLAGS) $(INCLUDE_FLAGS) -c $< -o $@ 95 | 96 | %.o: %.c 97 | $(CC) $(CFLAGS) $(INCLUDE_FLAGS) -c $< -o $@ 98 | 99 | %.cu_o: %.cu 100 | $(NVCC) $(NVCCFLAGS) $(INCLUDE_FLAGS) -c $< -o $@ 101 | 102 | clean: 103 | /bin/rm -rf $(OBJ) $(PROG) 104 | 105 | clean_bak: 106 | find . -name '*~' -exec /bin/rm -rf '{}' \; 107 | 108 | 109 | 110 | $(OBJ): CameraPath.h anyoption.h paramgl.h param.h RendererData.h $(ENGINE)/renderloop.h $(ENGINE)/renderer.h vector_math.h \ 111 | splotch/Blending.h splotch/GLSLProgram.h splotch/MathArray.h splotch/renderer.h splotch/renderloop.h splotch/Splotch.h splotch/Texture.h splotch/Vertex.h 112 | 113 | 114 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/Makefile.in: -------------------------------------------------------------------------------- 1 | ifeq ($(DDDBG),1) 2 | CXXFLAGS += -DDDBG 3 | endif 4 | 5 | ifeq ($(FP16),0) 6 | CXXFLAGS += -DTEX_FLOAT32 7 | else 8 | CXXFLAGS += -DTEX_FLOAT16 9 | ifeq ($(F16C),1) 10 | CXXFLAGS += -DF16C 11 | endif 12 | endif 13 | 14 | ifeq ($(COPY),1) 15 | CXXFLAGS += -D__COMPOSITE_COPY 16 | endif 17 | 18 | ifeq ($(ASYNC_OMP),0) 19 | else 20 | CXXFLAGS += -DASYNC_OMP 21 | endif 22 | 23 | ifeq ($(NONBLOCK),0) 24 | ifeq ($(PROFILE),1) 25 | CXXFLAGS += -D__COMPOSITE_PROFILE 26 | endif 27 | else 28 | CXXFLAGS += -D__COMPOSITE_NONBLOCK 29 | endif 30 | 31 | ifeq ($(MPIMT),0) 32 | CXXFLAGS += 33 | else 34 | CXXFLAGS += -D_MPIMT 35 | endif 36 | 37 | 38 | ifeq ($(PNG),1) 39 | CXXFLAGS += -D_PNG 40 | LDFLAGS += -lpng 41 | endif 42 | 43 | ifeq ($(4K),1) 44 | OBJ += $(ENGINE)/renderloop4k.o 45 | else 46 | OBJ += $(ENGINE)/renderloop.o 47 | endif 48 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/SharedMemory.h: -------------------------------------------------------------------------------- 1 | ../../runtime/include/SharedMemory.h -------------------------------------------------------------------------------- /tools/bonsaiRenderer/color_map.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/bonsaiRenderer/color_map.bmp -------------------------------------------------------------------------------- /tools/bonsaiRenderer/cvtBmp2Ascii.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | struct BmpHeader 6 | { 7 | 8 | unsigned int bfSize; 9 | unsigned short bfReserved1; 10 | unsigned short bfReserved2; 11 | unsigned int bfOffBits; 12 | 13 | unsigned int biSize; 14 | unsigned int biWidth; 15 | unsigned int biHeight; 16 | unsigned short biPlanes; 17 | unsigned short biBitCount; 18 | unsigned int biCompression; 19 | unsigned int biSizeImage; 20 | unsigned int biXPixPerMeter; 21 | unsigned int biYPixPerMeter; 22 | unsigned int biClrUsed; 23 | unsigned int biClrImporant; 24 | 25 | }; 26 | 27 | int main(int argc, char * argv[]) 28 | { 29 | fprintf(stderr, "Usage: %s < input.bmp > output.cpp\n", 30 | argv[0]); 31 | fprintf(stderr, " Reading BMP ... \n"); 32 | 33 | BmpHeader bmp; 34 | char bfType[2]; 35 | 36 | FILE *instream = stdin; 37 | fread( &bfType, sizeof(char), 2, instream); 38 | fread( &bmp, sizeof(BmpHeader), 1, instream); 39 | const int width = (int)bmp.biWidth; 40 | const int height = (int)bmp.biHeight; 41 | fprintf(stderr, "width= %d height= %d\n", width, height); 42 | std::vector color_array(width*height*3); 43 | fread( &color_array[0], sizeof(unsigned char), width*height*3, instream); 44 | 45 | FILE *ostream = stdout; 46 | fprintf(ostream, "static float colorMap[%d][%d][3] = \n{\n", height, width); 47 | for (int h = 0; h < height; h++) 48 | { 49 | fprintf(ostream, " { "); 50 | for (int w = 0; w < width; w++) 51 | { 52 | fprintf(ostream, "{%d.0f,%d.0f,%d.0f}%c", 53 | (int)color_array[0 + 3*(w+h*width)], 54 | (int)color_array[1 + 3*(w+h*width)], 55 | (int)color_array[2 + 3*(w+h*width)], 56 | w < width-1 ? ',':' '); 57 | } 58 | fprintf(ostream, "},\n"); 59 | } 60 | fprintf(ostream, "};\n"); 61 | 62 | 63 | 64 | 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/density/Cubemap.h: -------------------------------------------------------------------------------- 1 | GLuint loadCubemap(char *filenameFormat); 2 | GLuint loadCubemapCross(char *filename); 3 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/density/GLSLProgram.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | // Simple class to contain GLSL shaders/programs 13 | 14 | #ifndef GLSL_PROGRAM_H 15 | #define GLSL_PROGRAM_H 16 | 17 | #include 18 | #include 19 | 20 | class GLSLProgram 21 | { 22 | public: 23 | // construct program from strings 24 | GLSLProgram(const char *vsource, const char *fsource); 25 | GLSLProgram(const char *vsource, const char *gsource, const char *fsource, 26 | GLenum gsInput = GL_POINTS, GLenum gsOutput = GL_TRIANGLE_STRIP); 27 | ~GLSLProgram(); 28 | 29 | void enable(); 30 | void disable(); 31 | 32 | GLint getAttribLoc(const char *name); 33 | void setUniform1f(const GLchar *name, GLfloat x); 34 | void setUniform2f(const GLchar *name, GLfloat x, GLfloat y); 35 | void setUniform3f(const char *name, float x, float y, float z); 36 | void setUniform4f(const char *name, float x, float y, float z, float w); 37 | void setUniformfv(const GLchar *name, GLfloat *v, int elementSize, int count=1); 38 | void setUniformMatrix4fv(const GLchar *name, GLfloat *m, bool transpose); 39 | 40 | void bindTexture(const char *name, GLuint tex, GLenum target, GLint unit); 41 | 42 | inline GLuint getProgId() { return mProg; } 43 | 44 | private: 45 | GLuint compileProgram(const char *vsource, const char *gsource, const char *fsource, 46 | GLenum gsInput = GL_POINTS, GLenum gsOutput = GL_TRIANGLE_STRIP); 47 | GLuint mProg; 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/density/SmokeShaders.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | extern const char *particleVS, *simpleVS; 13 | extern const char *particleSpherePS, *simplePS, *particlePS, *particleAAPS, *particleShadowPS; 14 | extern const char *mblurVS, *mblurGS; 15 | extern const char *passThruVS, *transformVS, *texture2DPS; 16 | extern const char *blurPS, *blur3x3PS, *blur2PS; 17 | extern const char *starFilterPS, *compositePS; 18 | extern const char *thresholdPS; 19 | extern const char *downSample4PS, *downSample2PS; 20 | extern const char *gaussianBlurPS; 21 | extern const char *floorVS, *floorPS; 22 | extern const char *volumeVS, *volumePS; 23 | extern const char *skyboxVS, *skyboxPS; 24 | extern const char *splotchPS, *splotchGS, *splotchVS, *splotch2texPS; 25 | extern const char *volnewPS, *volnewGS, *volnewVS, *volnew2texPS, *volnewCompositePS; 26 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/density/depthSort.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "vector_math.h" 6 | 7 | 8 | #if 0 9 | // calculate eye-space depth for each particle 10 | KERNEL_DECLARE(calcDepthKernel)(float4 *pos, float *depth, int *indices, float4 modelViewZ, int numParticles) 11 | { 12 | int i = blockIdx.x*blockDim.x + threadIdx.x; 13 | if (i >= numParticles) return; 14 | 15 | float4 p = pos[i]; 16 | float z = dot(make_float4(p.x, p.y, p.z, 1.0f), modelViewZ); 17 | 18 | depth[i] = z; 19 | indices[i] = i; 20 | } 21 | #endif 22 | 23 | typedef std::pair KeyVal_t; 24 | struct Cmp 25 | { 26 | bool operator()(const KeyVal_t &lhs, const KeyVal_t &rhs) 27 | { 28 | return lhs.first < rhs.first; 29 | } 30 | }; 31 | void sort_by_key(float* keys, int* values, int count) 32 | { 33 | std::vector pairs(count); 34 | #pragma omp parallel for 35 | for (int i = 0; i < count; i++) 36 | pairs[i] = std::make_pair(keys[i], values[i]); 37 | #if 1 38 | __gnu_parallel::sort(pairs.begin(), pairs.end(), Cmp()); 39 | #endif 40 | #pragma omp parallel for 41 | for (int i = 0; i < count; i++) 42 | { 43 | keys [i] = pairs[i].first; 44 | values[i] = pairs[i].second; 45 | } 46 | } 47 | 48 | extern "C" 49 | void depthSortCUDA(float4 *pos, float *depth, int *indices, float4 modelViewZ, int numParticles) 50 | { 51 | for (int i = 0; i < numParticles; i++) 52 | { 53 | float4 p = pos[i]; 54 | float z = dot(make_float4(p.x, p.y, p.z, 1.0f), modelViewZ); 55 | 56 | depth[i] = z; 57 | indices[i] = i; 58 | } 59 | sort_by_key(depth, indices, numParticles); 60 | } 61 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/density/depthSort.h: -------------------------------------------------------------------------------- 1 | #include "vector_math.h" 2 | 3 | extern "C" void initCUDA(); 4 | extern "C" void depthSortCUDA(float4 *pos, float *depth, int *indices, float4 modelViewZ, int numParticles); 5 | extern "C" void assignColors(float4 *colors, ulonglong1 *ids, int numParticles, 6 | float4 color2, float4 color3, float4 color4, 7 | float4 starColor, float4 bulgeColor, float4 darkMatterColor, float4 dustColor, 8 | int m_brightFreq, float4 t_current); 9 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/density/loadPPM.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "loadPPM.h" 4 | 5 | Image *loadPPM(const char *filename) 6 | { 7 | char buff[16]; 8 | Image *result; 9 | FILE *fp; 10 | int maxval; 11 | 12 | fp = fopen(filename, "rb"); 13 | if (!fp) 14 | { 15 | fprintf(stderr, "Unable to open file `%s'\n", filename); 16 | return 0; 17 | } 18 | 19 | if (!fgets(buff, sizeof(buff), fp)) 20 | { 21 | fprintf(stderr, "Error opening file '%s'\n", filename); 22 | return 0; 23 | } 24 | 25 | if (buff[0] != 'P' || buff[1] != '6') 26 | { 27 | fprintf(stderr, "Invalid image format (must be `P6')\n"); 28 | return 0; 29 | } 30 | 31 | int c = fgetc(fp); 32 | if (c == '#') { 33 | // skip comment 34 | char str[256]; 35 | fgets(str, 256, fp); 36 | } else { 37 | ungetc(c, fp); 38 | } 39 | 40 | result = (Image *) malloc(sizeof(Image)); 41 | if (!result) 42 | { 43 | fprintf(stderr, "Unable to allocate memory\n"); 44 | exit(1); 45 | } 46 | 47 | int r = 0; 48 | if (8 == sizeof(void*)) 49 | r = fscanf(fp, "%llu %llu", &result->width, &result->height); 50 | else 51 | r = fscanf(fp, "%lu %lu", &result->width, &result->height); 52 | 53 | if (r != 2) 54 | { 55 | fprintf(stderr, "Error loading image `%s'\n", filename); 56 | return 0; 57 | } 58 | while (fgetc(fp) != '\n'); 59 | 60 | if (fscanf(fp, "%d", &maxval) != 1) 61 | { 62 | fprintf(stderr, "Error loading image `%s'\n", filename); 63 | return 0; 64 | } 65 | while (fgetc(fp) != '\n'); 66 | 67 | result->data = (unsigned char *) malloc(3 * result->width * result->height); 68 | if (!result->data) 69 | { 70 | fprintf(stderr, "Unable to allocate memory\n"); 71 | exit(1); 72 | } 73 | 74 | if (fread(result->data, 3 * result->width, result->height, fp) != result->height) 75 | { 76 | fprintf(stderr, "Error loading image `%s'\n", filename); 77 | return 0; 78 | } 79 | 80 | fprintf(stdout, "Loaded `%s', %d x %d\n", filename, result->width, result->height); 81 | 82 | fclose(fp); 83 | 84 | return result; 85 | } 86 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/density/loadPPM.h: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | size_t width, height; 3 | unsigned char *data; 4 | } Image; 5 | 6 | Image *loadPPM(const char *filename); 7 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/density/renderloop.h: -------------------------------------------------------------------------------- 1 | #ifndef _RENDERLOOP_H_ 2 | #define _RENDERLOOP_H_ 3 | 4 | #include "RendererData.h" 5 | #include 6 | #include 7 | 8 | void initAppRenderer(int argc, char** argv, 9 | const int rank, const int nrank, const MPI_Comm &comm, 10 | RendererData &data, 11 | const char *fulleScreenMode /* = "" */, 12 | const bool stereo /* = false */, 13 | std::function &updateFunc, 14 | const std::string imagefn); 15 | #endif // _RENDERLOOP_H_ 16 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/density/timer.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 4 | // 5 | // Please refer to the NVIDIA end user license agreement (EULA) associated 6 | // with this source code for terms and conditions that govern your use of 7 | // this software. Any use, reproduction, disclosure, or distribution of 8 | // this software and related documentation outside the terms of the EULA 9 | // is strictly prohibited. 10 | // 11 | ///////////////////////////////////////////////////////////////////////////// 12 | 13 | #ifndef TIMER_H 14 | #define TIMER_H 15 | 16 | #include 17 | 18 | #ifdef _WIN32 19 | #ifndef WIN32_LEAN_AND_MEAN 20 | #define WIN32_LEAN_AND_MEAN 21 | #endif 22 | #include 23 | #else 24 | #include 25 | #endif 26 | 27 | #ifdef _WIN32 28 | double PCFreq = 0.0; 29 | __int64 timerStart = 0; 30 | #else 31 | struct timeval timerStart; 32 | #endif 33 | 34 | void StartTimer() 35 | { 36 | #ifdef _WIN32 37 | LARGE_INTEGER li; 38 | 39 | if (!QueryPerformanceFrequency(&li)) 40 | { 41 | printf("QueryPerformanceFrequency failed!\n"); 42 | } 43 | 44 | PCFreq = (double)li.QuadPart/1000.0; 45 | QueryPerformanceCounter(&li); 46 | timerStart = li.QuadPart; 47 | #else 48 | gettimeofday(&timerStart, NULL); 49 | #endif 50 | } 51 | 52 | // time elapsed in ms 53 | double GetTimer() 54 | { 55 | #ifdef _WIN32 56 | LARGE_INTEGER li; 57 | QueryPerformanceCounter(&li); 58 | return (double)(li.QuadPart-timerStart)/PCFreq; 59 | #else 60 | struct timeval timerStop, timerElapsed; 61 | gettimeofday(&timerStop, NULL); 62 | timersub(&timerStop, &timerStart, &timerElapsed); 63 | return timerElapsed.tv_sec*1000.0+timerElapsed.tv_usec/1000.0; 64 | #endif 65 | } 66 | #endif // TIMER_H 67 | 68 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/param.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #include 13 | 14 | const Param dummy("error"); 15 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/params.txt: -------------------------------------------------------------------------------- 1 | render_params_points 2 | render_params_volumetric 3 | slices 256 4 | displayed_slices 256 5 | sprite_size 0.074 6 | scale_[log] 0 7 | dust_scale 12 8 | dust_alpha 0.316 9 | light_color_r 0.032 10 | light_color_g 0.036 11 | light_color_b 0.036 12 | alpha 0.1 13 | shadow_alpha 0.1 14 | transmission 0.004 15 | indirect_lighting 0.5 16 | fog 0.001 17 | over_bright_multiplier 3.6 18 | star_brightness 0.44 19 | image_brightness 1.3 20 | image_gamma 0.454545 21 | blur_radius 0.98 22 | blur_passes 4 23 | source_intensity 0.5 24 | star_blur_radius 40 25 | star_threshold 1.2 26 | star_power 1.2 27 | star_intensity 0 28 | glow_radius 25.2 29 | glow_intensity 0.1 30 | flare_intensity 0 31 | flare_threshold 0.72 32 | flare_radius 40.8 33 | skybox_brightness 0.24 34 | render_params_splotch 35 | star_scale_[log] 0 36 | star_alpha______ 1 37 | dm_scale___[log] -0.4 38 | dm_alpha________ 0.1 39 | max_size_[log]__ 0 40 | alpha___________ 0.02 41 | transmission____ 0.001 42 | brightness_[pre] 0.08 43 | gamma_[pre] 0.4 44 | brightness_[post] 1 45 | gamma_[post] 1 46 | render_params_splotch_sorted 47 | star_scale_[log] 0 48 | star_alpha______ 1 49 | dm_scale___[log] -0.4 50 | dm_alpha________ 0.1 51 | max_size_[log]__ 0 52 | alpha___________ 0.02 53 | transmission____ 0.001 54 | brightness_[pre] 0.08 55 | gamma_[pre] 0.4 56 | brightness_[post] 1 57 | gamma_[post] 1 58 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/particles/renderer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #ifndef __RENDER_PARTICLES__ 13 | #define __RENDER_PARTICLES__ 14 | 15 | #include "paramgl.h" 16 | void checkGLErrors(const char *s); 17 | 18 | class ParticleRenderer 19 | { 20 | public: 21 | ParticleRenderer(); 22 | ~ParticleRenderer(); 23 | 24 | void setPositions(float *pos, int numParticles); 25 | void setPositions(double *pos, int numParticles); 26 | void setBaseColor(float color[4]); 27 | void setColors(float *color, int numParticles); 28 | void setPBO(unsigned int pbo, int numParticles, bool fp64); 29 | 30 | enum DisplayMode 31 | { 32 | PARTICLE_POINTS, 33 | PARTICLE_SPRITES, 34 | PARTICLE_SPRITES_COLOR, 35 | PARTICLE_NUM_MODES 36 | }; 37 | 38 | void display(DisplayMode mode = PARTICLE_POINTS); 39 | 40 | void setPointSize(float size) { m_pointSize = size; } 41 | void setSpriteSize(float size) { m_spriteSize = size; } 42 | 43 | void resetPBO(); 44 | ParamListGL *getParams() { return m_params; } 45 | 46 | protected: // methods 47 | void _initGL(); 48 | void _createTexture(int resolution); 49 | void _drawPoints(bool color = false); 50 | void initParams(); 51 | 52 | 53 | protected: // data 54 | float *m_pos; 55 | double *m_pos_fp64; 56 | int m_numParticles; 57 | 58 | float m_pointSize; 59 | float m_spriteSize; 60 | 61 | unsigned int m_vertexShader; 62 | unsigned int m_vertexShaderPoints; 63 | unsigned int m_pixelShader; 64 | unsigned int m_programPoints; 65 | unsigned int m_programSprites; 66 | unsigned int m_texture; 67 | unsigned int m_pbo; 68 | unsigned int m_vboColor; 69 | 70 | float m_baseColor[4]; 71 | 72 | bool m_bFp64Positions; 73 | ParamListGL *m_params; 74 | float m_testParam; 75 | }; 76 | 77 | #endif //__ RENDER_PARTICLES__ 78 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/particles/renderloop.h: -------------------------------------------------------------------------------- 1 | #ifndef _RENDERLOOP_H_ 2 | #define _RENDERLOOP_H_ 3 | 4 | #include "RendererData.h" 5 | 6 | void initAppRenderer(int argc, char** argv, 7 | RendererData &data); 8 | #define PARTICLESRENDERER 9 | #endif // _RENDERLOOP_H_ 10 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/smoke/Cubemap.h: -------------------------------------------------------------------------------- 1 | GLuint loadCubemap(char *filenameFormat); 2 | GLuint loadCubemapCross(char *filename); 3 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/smoke/GLSLProgram.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | // Simple class to contain GLSL shaders/programs 13 | 14 | #ifndef GLSL_PROGRAM_H 15 | #define GLSL_PROGRAM_H 16 | 17 | #include 18 | #include 19 | 20 | class GLSLProgram 21 | { 22 | public: 23 | // construct program from strings 24 | GLSLProgram(const char *vsource, const char *fsource); 25 | GLSLProgram(const char *vsource, const char *gsource, const char *fsource, 26 | GLenum gsInput = GL_POINTS, GLenum gsOutput = GL_TRIANGLE_STRIP); 27 | ~GLSLProgram(); 28 | 29 | void enable(); 30 | void disable(); 31 | 32 | void setUniform1f(const GLchar *name, GLfloat x); 33 | void setUniform2f(const GLchar *name, GLfloat x, GLfloat y); 34 | void setUniform3f(const char *name, float x, float y, float z); 35 | void setUniform4f(const char *name, float x, float y, float z, float w); 36 | void setUniformfv(const GLchar *name, GLfloat *v, int elementSize, int count=1); 37 | void setUniformMatrix4fv(const GLchar *name, GLfloat *m, bool transpose); 38 | 39 | void bindTexture(const char *name, GLuint tex, GLenum target, GLint unit); 40 | 41 | inline GLuint getProgId() { return mProg; } 42 | 43 | private: 44 | GLuint compileProgram(const char *vsource, const char *gsource, const char *fsource, 45 | GLenum gsInput = GL_POINTS, GLenum gsOutput = GL_TRIANGLE_STRIP); 46 | GLuint mProg; 47 | }; 48 | 49 | #endif -------------------------------------------------------------------------------- /tools/bonsaiRenderer/smoke/SmokeShaders.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | extern const char *particleVS, *simpleVS; 13 | extern const char *particleSpherePS, *simplePS, *particlePS, *particleAAPS, *particleShadowPS; 14 | extern const char *mblurVS, *mblurGS; 15 | extern const char *passThruVS, *transformVS, *texture2DPS; 16 | extern const char *blurPS, *blur3x3PS, *blur2PS; 17 | extern const char *starFilterPS, *compositePS; 18 | extern const char *thresholdPS; 19 | extern const char *downSample4PS, *downSample2PS; 20 | extern const char *gaussianBlurPS; 21 | extern const char *floorVS, *floorPS; 22 | extern const char *volumeVS, *volumePS; 23 | extern const char *skyboxVS, *skyboxPS; -------------------------------------------------------------------------------- /tools/bonsaiRenderer/smoke/depthSort.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "vector_math.h" 6 | 7 | 8 | #if 0 9 | // calculate eye-space depth for each particle 10 | KERNEL_DECLARE(calcDepthKernel)(float4 *pos, float *depth, int *indices, float4 modelViewZ, int numParticles) 11 | { 12 | int i = blockIdx.x*blockDim.x + threadIdx.x; 13 | if (i >= numParticles) return; 14 | 15 | float4 p = pos[i]; 16 | float z = dot(make_float4(p.x, p.y, p.z, 1.0f), modelViewZ); 17 | 18 | depth[i] = z; 19 | indices[i] = i; 20 | } 21 | #endif 22 | 23 | typedef std::pair KeyVal_t; 24 | struct Cmp 25 | { 26 | bool operator()(const KeyVal_t &lhs, const KeyVal_t &rhs) 27 | { 28 | return lhs.first < rhs.first; 29 | } 30 | }; 31 | void sort_by_key(float* keys, int* values, int count) 32 | { 33 | std::vector pairs(count); 34 | #pragma omp parallel for 35 | for (int i = 0; i < count; i++) 36 | pairs[i] = std::make_pair(keys[i], values[i]); 37 | __gnu_parallel::sort(pairs.begin(), pairs.end(), Cmp()); 38 | #pragma omp parallel for 39 | for (int i = 0; i < count; i++) 40 | { 41 | keys [i] = pairs[i].first; 42 | values[i] = pairs[i].second; 43 | } 44 | } 45 | 46 | extern "C" 47 | void depthSortCUDA(float4 *pos, float *depth, int *indices, float4 modelViewZ, int numParticles) 48 | { 49 | for (int i = 0; i < numParticles; i++) 50 | { 51 | float4 p = pos[i]; 52 | float z = dot(make_float4(p.x, p.y, p.z, 1.0f), modelViewZ); 53 | 54 | depth[i] = z; 55 | indices[i] = i; 56 | } 57 | sort_by_key(depth, indices, numParticles); 58 | } 59 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/smoke/depthSort.h: -------------------------------------------------------------------------------- 1 | #include "vector_math.h" 2 | 3 | extern "C" void initCUDA(); 4 | extern "C" void depthSortCUDA(float4 *pos, float *depth, int *indices, float4 modelViewZ, int numParticles); 5 | extern "C" void assignColors(float4 *colors, ulonglong1 *ids, int numParticles, 6 | float4 color2, float4 color3, float4 color4, 7 | float4 starColor, float4 bulgeColor, float4 darkMatterColor, float4 dustColor, 8 | int m_brightFreq, float4 t_current); 9 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/smoke/loadPPM.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "loadPPM.h" 4 | 5 | Image *loadPPM(const char *filename) 6 | { 7 | char buff[16]; 8 | Image *result; 9 | FILE *fp; 10 | int maxval; 11 | 12 | fp = fopen(filename, "rb"); 13 | if (!fp) 14 | { 15 | fprintf(stderr, "Unable to open file `%s'\n", filename); 16 | return 0; 17 | } 18 | 19 | if (!fgets(buff, sizeof(buff), fp)) 20 | { 21 | fprintf(stderr, "Error opening file '%s'\n", filename); 22 | return 0; 23 | } 24 | 25 | if (buff[0] != 'P' || buff[1] != '6') 26 | { 27 | fprintf(stderr, "Invalid image format (must be `P6')\n"); 28 | return 0; 29 | } 30 | 31 | int c = fgetc(fp); 32 | if (c == '#') { 33 | // skip comment 34 | char str[256]; 35 | fgets(str, 256, fp); 36 | } else { 37 | ungetc(c, fp); 38 | } 39 | 40 | result = (Image *) malloc(sizeof(Image)); 41 | if (!result) 42 | { 43 | fprintf(stderr, "Unable to allocate memory\n"); 44 | exit(1); 45 | } 46 | 47 | int r = 0; 48 | if (8 == sizeof(void*)) 49 | r = fscanf(fp, "%llu %llu", &result->width, &result->height); 50 | else 51 | r = fscanf(fp, "%lu %lu", &result->width, &result->height); 52 | 53 | if (r != 2) 54 | { 55 | fprintf(stderr, "Error loading image `%s'\n", filename); 56 | return 0; 57 | } 58 | while (fgetc(fp) != '\n'); 59 | 60 | if (fscanf(fp, "%d", &maxval) != 1) 61 | { 62 | fprintf(stderr, "Error loading image `%s'\n", filename); 63 | return 0; 64 | } 65 | while (fgetc(fp) != '\n'); 66 | 67 | result->data = (unsigned char *) malloc(3 * result->width * result->height); 68 | if (!result->data) 69 | { 70 | fprintf(stderr, "Unable to allocate memory\n"); 71 | exit(1); 72 | } 73 | 74 | if (fread(result->data, 3 * result->width, result->height, fp) != result->height) 75 | { 76 | fprintf(stderr, "Error loading image `%s'\n", filename); 77 | return 0; 78 | } 79 | 80 | fprintf(stdout, "Loaded `%s', %d x %d\n", filename, result->width, result->height); 81 | 82 | fclose(fp); 83 | 84 | return result; 85 | } 86 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/smoke/loadPPM.h: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | size_t width, height; 3 | unsigned char *data; 4 | } Image; 5 | 6 | Image *loadPPM(const char *filename); 7 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/smoke/renderloop.h: -------------------------------------------------------------------------------- 1 | #ifndef _RENDERLOOP_H_ 2 | #define _RENDERLOOP_H_ 3 | 4 | #include "RendererData.h" 5 | 6 | void initAppRenderer(int argc, char** argv, 7 | RendererData &data, 8 | const char *fulleScreenMode = "", 9 | const bool stereo = false); 10 | #endif // _RENDERLOOP_H_ 11 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/smoke/timer.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 4 | // 5 | // Please refer to the NVIDIA end user license agreement (EULA) associated 6 | // with this source code for terms and conditions that govern your use of 7 | // this software. Any use, reproduction, disclosure, or distribution of 8 | // this software and related documentation outside the terms of the EULA 9 | // is strictly prohibited. 10 | // 11 | ///////////////////////////////////////////////////////////////////////////// 12 | 13 | #ifndef TIMER_H 14 | #define TIMER_H 15 | 16 | #include 17 | 18 | #ifdef _WIN32 19 | #ifndef WIN32_LEAN_AND_MEAN 20 | #define WIN32_LEAN_AND_MEAN 21 | #endif 22 | #include 23 | #else 24 | #include 25 | #endif 26 | 27 | #ifdef _WIN32 28 | double PCFreq = 0.0; 29 | __int64 timerStart = 0; 30 | #else 31 | struct timeval timerStart; 32 | #endif 33 | 34 | void StartTimer() 35 | { 36 | #ifdef _WIN32 37 | LARGE_INTEGER li; 38 | 39 | if (!QueryPerformanceFrequency(&li)) 40 | { 41 | printf("QueryPerformanceFrequency failed!\n"); 42 | } 43 | 44 | PCFreq = (double)li.QuadPart/1000.0; 45 | QueryPerformanceCounter(&li); 46 | timerStart = li.QuadPart; 47 | #else 48 | gettimeofday(&timerStart, NULL); 49 | #endif 50 | } 51 | 52 | // time elapsed in ms 53 | double GetTimer() 54 | { 55 | #ifdef _WIN32 56 | LARGE_INTEGER li; 57 | QueryPerformanceCounter(&li); 58 | return (double)(li.QuadPart-timerStart)/PCFreq; 59 | #else 60 | struct timeval timerStop, timerElapsed; 61 | gettimeofday(&timerStop, NULL); 62 | timersub(&timerStop, &timerStart, &timerElapsed); 63 | return timerElapsed.tv_sec*1000.0+timerElapsed.tv_usec/1000.0; 64 | #endif 65 | } 66 | #endif // TIMER_H 67 | 68 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/splotch/Blending.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | class Blending 4 | { 5 | public: 6 | enum Type {ONE, ZERO, SRC_ALPHA, ONE_MINUS_SRC_ALPHA}; 7 | 8 | private: 9 | template 10 | static float4 blendColor(const float4 col, const float4 src, const float4 dst) 11 | { 12 | float4 res = col; 13 | switch (TYPE) 14 | { 15 | case ONE_MINUS_SRC_ALPHA: 16 | res.x *= 1.0f - src.w; 17 | res.y *= 1.0f - src.w; 18 | res.z *= 1.0f - src.w; 19 | res.w *= 1.0f - src.w; 20 | break; 21 | case SRC_ALPHA: 22 | res.x *= src.w; 23 | res.y *= src.w; 24 | res.z *= src.w; 25 | res.w *= src.w; 26 | break; 27 | case ZERO: 28 | res.x = res.y = res.z = res.w = 0; 29 | break; 30 | case ONE: 31 | break; 32 | default: 33 | assert(0); 34 | } 35 | return res; 36 | } 37 | 38 | public: 39 | 40 | template 41 | static float4 getColor(const float4 d, const float4 s) 42 | { 43 | float4 res; 44 | 45 | const float4 src = blendColor(s,s,d); 46 | const float4 dst = blendColor(d,s,d); 47 | 48 | res.x = src.x + dst.x; 49 | res.y = src.y + dst.y; 50 | res.z = src.z + dst.z; 51 | res.w = src.w + dst.w; 52 | 53 | #if 0 54 | using std::min; 55 | res.w = min(1.0f, res.w); 56 | #endif 57 | 58 | return res; 59 | } 60 | }; 61 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/splotch/GLSLProgram.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | // Simple class to contain GLSL shaders/programs 13 | 14 | #ifndef GLSL_PROGRAM_H 15 | #define GLSL_PROGRAM_H 16 | 17 | #include 18 | #include 19 | 20 | class GLSLProgram 21 | { 22 | public: 23 | // construct program from strings 24 | GLSLProgram(const char *vsource, const char *fsource); 25 | GLSLProgram(const char *vsource, const char *gsource, const char *fsource, 26 | GLenum gsInput = GL_POINTS, GLenum gsOutput = GL_TRIANGLE_STRIP); 27 | ~GLSLProgram(); 28 | 29 | void enable(); 30 | void disable(); 31 | 32 | void setUniform1f(const GLchar *name, GLfloat x); 33 | void setUniform2f(const GLchar *name, GLfloat x, GLfloat y); 34 | void setUniform3f(const char *name, float x, float y, float z); 35 | void setUniform4f(const char *name, float x, float y, float z, float w); 36 | void setUniformfv(const GLchar *name, GLfloat *v, int elementSize, int count=1); 37 | void setUniformMatrix4fv(const GLchar *name, GLfloat *m, bool transpose); 38 | 39 | void bindTexture(const char *name, GLuint tex, GLenum target, GLint unit); 40 | 41 | inline GLuint getProgId() { return mProg; } 42 | 43 | private: 44 | GLuint compileProgram(const char *vsource, const char *gsource, const char *fsource, 45 | GLenum gsInput = GL_POINTS, GLenum gsOutput = GL_TRIANGLE_STRIP); 46 | GLuint mProg; 47 | }; 48 | 49 | #endif -------------------------------------------------------------------------------- /tools/bonsaiRenderer/splotch/MathArray.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | template 4 | class MathArray 5 | { 6 | private: 7 | T data[K]; 8 | template 9 | static void foreach(Func F) { for (int i = 0; i < K; i++) F(i); } 10 | 11 | template 12 | static MathArray& foreach(MathArray &res, Func F) { for (int i = 0; i < K; i++) F(i); return res;} 13 | 14 | 15 | public: 16 | 17 | int size() const {return K; } 18 | MathArray() {} 19 | explicit MathArray(const T &v) { for (auto &x : data) x = v; } 20 | T& operator[](const int i) { return data[i]; } 21 | T operator[](const int i) const {return data[i]; } 22 | 23 | MathArray &operator-(MathArray rhs) { return foreach(rhs, [&](int i) {rhs[i] = -rhs[i];}); } 24 | 25 | MathArray& operator+=(const MathArray& rhs) { foreach([&](int i){ data[i] += rhs[i]; }); return *this; } 26 | MathArray& operator-=(const MathArray& rhs) { foreach([&](int i){ data[i] -= rhs[i]; }); return *this; } 27 | MathArray& operator*=(const MathArray& rhs) { foreach([&](int i){ data[i] *= rhs[i]; }); return *this; } 28 | MathArray& operator/=(const MathArray& rhs) { foreach([&](int i){ data[i] /= rhs[i]; }); return *this; } 29 | friend MathArray operator+(MathArray rhs, const MathArray& lhs) { return (rhs += lhs); } 30 | friend MathArray operator-(MathArray rhs, const MathArray& lhs) { return (rhs -= lhs); } 31 | friend MathArray operator*(MathArray rhs, const MathArray& lhs) { return (rhs *= lhs); } 32 | friend MathArray operator/(MathArray rhs, const MathArray& lhs) { return (rhs /= lhs); } 33 | 34 | MathArray& operator+=(const T& rhs) { return foreach(*this, [&](int i){ data[i] += rhs; }); } 35 | MathArray& operator-=(const T& rhs) { return foreach(*this, [&](int i){ data[i] -= rhs; }); } 36 | MathArray& operator*=(const T& rhs) { return foreach(*this, [&](int i){ data[i] *= rhs; }); } 37 | MathArray& operator/=(const T& rhs) { return foreach(*this, [&](int i){ data[i] /= rhs; }); } 38 | friend MathArray operator+(MathArray rhs, const T& lhs) { return (rhs += lhs); } 39 | friend MathArray operator-(MathArray rhs, const T& lhs) { return (rhs -= lhs); } 40 | friend MathArray operator*(MathArray rhs, const T& lhs) { return (rhs *= lhs); } 41 | friend MathArray operator/(MathArray rhs, const T& lhs) { return (rhs /= lhs); } 42 | 43 | friend MathArray operator+(const T& rhs, MathArray lhs) { return (lhs += rhs); } 44 | friend MathArray operator*(const T& rhs, MathArray lhs) { return (lhs *= rhs); } 45 | friend MathArray operator-(const T& rhs, MathArray lhs) { return foreach(lhs, [&](int i){lhs[i] = rhs - lhs[i];}); } 46 | friend MathArray operator/(const T& rhs, MathArray lhs) { return foreach(lhs, [&](int i){lhs[i] = rhs / lhs[i];}); } 47 | 48 | }; 49 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/splotch/Texture.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | template 6 | class Texture1D 7 | { 8 | private: 9 | T * const data; 10 | const float size; 11 | public: 12 | Texture1D() : data(NULL), size(0) {} 13 | Texture1D(const T *_data, const int _size) : 14 | data(new T[_size]), size(_size) 15 | { 16 | // #pragma omp parallel for 17 | for (int i = 0; i < size; i++) 18 | data[i] = _data[i]; 19 | } 20 | ~Texture1D() 21 | { 22 | if (data) 23 | delete[] data; 24 | } 25 | T operator()(float s) const 26 | { 27 | using std::max; 28 | using std::min; 29 | s = max(0.0f,min(1.0f,s)); 30 | const float x = s*(size-1); 31 | const float x1 = floor(x); 32 | const float x2 = min(x1+1.0f,size-1); 33 | 34 | const T c1 = data[static_cast(x1)]; 35 | const T c2 = data[static_cast(x2)]; 36 | 37 | return c1 + c2*(x-x2); 38 | } 39 | }; 40 | 41 | template 42 | class Texture2D 43 | { 44 | private: 45 | T * const data; 46 | const float width,height; 47 | public: 48 | Texture2D() : data(NULL), width(0), height(0) {} 49 | Texture2D(const T *_data, const int _width, const int _height) : 50 | data(new T[_width*_height]), width(_width), height(_height) 51 | { 52 | const int n = width*height; 53 | // #pragma omp parallel for 54 | for (int i = 0; i < n; i++) 55 | { 56 | data[i] = _data[i]; 57 | } 58 | } 59 | ~Texture2D() 60 | { 61 | if (data) 62 | delete[] data; 63 | } 64 | Texture2D& operator=(const Texture2D &src) 65 | { 66 | if (data) delete[] data; 67 | *this = Texture2D(src.data,src.width,src.height); 68 | return *this; 69 | } 70 | T operator()(float s, float t) const 71 | { 72 | using std::max; 73 | using std::min; 74 | s = max(0.0f,min(1.0f,s)); 75 | t = max(0.0f,min(1.0f,t)); 76 | const float x = s*(width-2); 77 | const float y = t*(height-2); 78 | const float x1 = floor(x); 79 | const float y1 = floor(y); 80 | const float x2 = min(x1+1.0f,width-1); 81 | const float y2 = min(y1+1.0f,height-1); 82 | 83 | const T c11 = data[static_cast(y1*width + x1)]; 84 | const T c12 = data[static_cast(y2*width + x1)]; 85 | const T c21 = data[static_cast(y1*width + x2)]; 86 | const T c22 = data[static_cast(y2*width + x2)]; 87 | 88 | return 89 | c11*(x2-x)*(y2-y) + 90 | c21*(x-x1)*(y2-y) + 91 | c12*(x2-x)*(y-y1) + 92 | c22*(x-x1)*(y-y1) ; 93 | } 94 | }; 95 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/splotch/renderer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #ifndef __RENDER_PARTICLES__ 13 | #define __RENDER_PARTICLES__ 14 | 15 | #include "paramgl.h" 16 | void checkGLErrors(const char *s); 17 | 18 | class ParticleRenderer 19 | { 20 | public: 21 | ParticleRenderer(); 22 | ~ParticleRenderer(); 23 | 24 | void setPositions(float *pos, int numParticles); 25 | void setPositions(double *pos, int numParticles); 26 | void setBaseColor(float color[4]); 27 | void setColors(float *color, int numParticles); 28 | void setPBO(unsigned int pbo, int numParticles, bool fp64); 29 | 30 | enum DisplayMode 31 | { 32 | PARTICLE_POINTS, 33 | PARTICLE_SPRITES, 34 | PARTICLE_SPRITES_COLOR, 35 | PARTICLE_NUM_MODES 36 | }; 37 | 38 | void display(DisplayMode mode = PARTICLE_POINTS); 39 | 40 | void setPointSize(float size) { m_pointSize = size; } 41 | void setSpriteSize(float size) { m_spriteSize = size; } 42 | 43 | void resetPBO(); 44 | ParamListGL *getParams() { return m_params; } 45 | 46 | protected: // methods 47 | void _initGL(); 48 | void _createTexture(int resolution); 49 | void _drawPoints(bool color = false); 50 | void initParams(); 51 | 52 | 53 | protected: // data 54 | float *m_pos; 55 | double *m_pos_fp64; 56 | int m_numParticles; 57 | 58 | float m_pointSize; 59 | float m_spriteSize; 60 | 61 | unsigned int m_vertexShader; 62 | unsigned int m_vertexShaderPoints; 63 | unsigned int m_pixelShader; 64 | unsigned int m_programPoints; 65 | unsigned int m_programSprites; 66 | unsigned int m_texture; 67 | unsigned int m_pbo; 68 | unsigned int m_vboColor; 69 | 70 | float m_baseColor[4]; 71 | 72 | bool m_bFp64Positions; 73 | ParamListGL *m_params; 74 | float m_testParam; 75 | }; 76 | 77 | #endif //__ RENDER_PARTICLES__ 78 | -------------------------------------------------------------------------------- /tools/bonsaiRenderer/splotch/renderloop.h: -------------------------------------------------------------------------------- 1 | #ifndef _RENDERLOOP_H_ 2 | #define _RENDERLOOP_H_ 3 | 4 | #include "RendererData.h" 5 | 6 | void initAppRenderer(int argc, char** argv, 7 | RendererData &data); 8 | #define PARTICLESRENDERER 9 | #endif // _RENDERLOOP_H_ 10 | -------------------------------------------------------------------------------- /tools/catalyst/BonsaiCatalystData.h: -------------------------------------------------------------------------------- 1 | #ifndef _BONSAI_CATALYST_DATA_ 2 | #define _BONSAI_CATALYST_DATA_ 3 | 4 | #pragma once 5 | 6 | #include "RendererData.h" 7 | #include "vtkSmartPointer.h" 8 | 9 | class vtkBonsaiPipeline; 10 | class vtkCPProcessor; 11 | class vtkCPDataDescription; 12 | class vtkPolyData; 13 | 14 | class BonsaiCatalystData : public RendererData 15 | { 16 | public: 17 | BonsaiCatalystData(const int rank, const int nrank, const MPI_Comm &comm); 18 | ~BonsaiCatalystData(); 19 | 20 | // virtual methods 21 | virtual void coProcess(double time, unsigned int timeStep); 22 | 23 | vtkSmartPointer coProcessor; 24 | vtkSmartPointer coProcessorData; 25 | // IsTimeDataSet is meant to be used to make sure that 26 | // needtocoprocessthistimestep() is called before 27 | // calling any of the other coprocessing functions. 28 | // It is reset to false after calling coprocess as well 29 | // as if coprocessing is not needed for this time/time step 30 | bool isTimeDataSet; 31 | vtkSmartPointer particles; 32 | vtkSmartPointer cxxPipeline; 33 | }; 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /tools/catalyst/Makefile: -------------------------------------------------------------------------------- 1 | CXX = mpicxx 2 | CC = mpicc 3 | LD = mpicxx 4 | F90 = mpif90 5 | 6 | CFLAGS = -Wall -Werror 7 | CUDA_TK = /usr/local/cuda 8 | 9 | CXXFLAGS = -Wall -fopenmp -std=c++11 -fPIE 10 | CXXFLAGS += -Werror 11 | #ignore warnings caused by openmpi 12 | CXXFLAGS += -Wno-literal-suffix 13 | #ignore warnings in a constructor when intialization is not done in the order the variables are declared 14 | CXXFLAGS += -Wno-reorder 15 | CXXFLAGS += -Wno-main # ignore warrnings on main() 16 | 17 | 18 | OPT += -O3 -g 19 | 20 | CFLAGS += $(OPT) -fPIE 21 | CXXFLAGS += $(OPT) 22 | 23 | 24 | 25 | NVCC=$(CUDATK)/bin/nvcc 26 | NVCCFLAGS=-arch=sm_35 27 | 28 | INCLUDE_FLAGS= -I./ -I./$(ENGINE)/ 29 | INCLUDE_FLAGS += -I$(CUDA_TK)/include 30 | LDFLAGS = -lGL -lGLU -lglut -lGLEW -fopenmp -lrt 31 | LDFLAGS += -pie 32 | LDFLAGS += -rdynamic 33 | 34 | ifeq ($(ICET),1) 35 | ICETPATH=../../IceT 36 | INCLUDE_FLAGS += -I$(ICETPATH)/include 37 | LDFLAGS += -L$(ICETPATH)/lib -lIceTCore -lIceTMPI 38 | CXXFLAGS += -DUSE_ICET 39 | endif 40 | 41 | ifeq ($(APPLEOSX),1) 42 | INCLUDE_FLAGS += -I/opt/local/include 43 | CXXFLAGS +=-Wno-deprecated-declarations 44 | LDFLAGS = -framework OpenGL -L/opt/local/lib -lglut -lglew -fopenmp -fpie 45 | LDFLAGS+= -L/opt/local/lib/gcc48 -lgomp -stdlib=libstdc++ 46 | CXX=OMPI_CXX=g++-mp-4.8 mpicxx 47 | CC=OMPI_CXX=g++-mp-4.8 mpicxx 48 | LD=OMPI_CXX=clang++ mpicxx 49 | else 50 | CXXFLAGS += -mavx 51 | endif 52 | 53 | OBJ = main.o anyoption.o RendererData.o 54 | PROG= catalyst 55 | 56 | all: $(PROG) 57 | 58 | $(PROG): $(OBJ) 59 | $(LD) $^ -o $@ $(LDFLAGS) 60 | 61 | %.o: %.cpp 62 | $(CXX) $(CXXFLAGS) $(INCLUDE_FLAGS) -c $< -o $@ 63 | 64 | %.o: %.c 65 | $(CC) $(CFLAGS) $(INCLUDE_FLAGS) -c $< -o $@ 66 | 67 | %.cu_o: %.cu 68 | $(NVCC) $(NVCCFLAGS) $(INCLUDE_FLAGS) -c $< -o $@ 69 | 70 | clean: 71 | /bin/rm -rf $(OBJ) $(PROG) 72 | 73 | clean_bak: 74 | find . -name '*~' -exec /bin/rm -rf '{}' \; 75 | 76 | 77 | 78 | $(OBJ): anyoption.h RendererData.h 79 | 80 | 81 | -------------------------------------------------------------------------------- /tools/catalyst/vtkBonsaiPipeline.h: -------------------------------------------------------------------------------- 1 | #ifndef VTKCPPVSMPIPELINE_H 2 | #define VTKCPPVSMPIPELINE_H 3 | 4 | #include 5 | #include 6 | 7 | class vtkCPDataDescription; 8 | class vtkCPPythonHelper; 9 | 10 | class vtkBonsaiPipeline : public vtkCPPipeline 11 | { 12 | public: 13 | static vtkBonsaiPipeline* New(); 14 | vtkTypeMacro(vtkBonsaiPipeline,vtkCPPipeline); 15 | virtual void PrintSelf(ostream& os, vtkIndent indent); 16 | 17 | virtual void Initialize(int outputFrequency, std::string& fileName); 18 | 19 | virtual int RequestDataDescription(vtkCPDataDescription* dataDescription); 20 | 21 | virtual int CoProcess(vtkCPDataDescription* dataDescription); 22 | 23 | protected: 24 | vtkBonsaiPipeline(); 25 | virtual ~vtkBonsaiPipeline(); 26 | 27 | private: 28 | vtkBonsaiPipeline(const vtkBonsaiPipeline&); // Not implemented 29 | void operator=(const vtkBonsaiPipeline&); // Not implemented 30 | 31 | int OutputFrequency; 32 | std::string FileName; 33 | }; 34 | #endif 35 | -------------------------------------------------------------------------------- /tools/common/BonsaiIO.h: -------------------------------------------------------------------------------- 1 | ../../runtime/include/BonsaiIO.h -------------------------------------------------------------------------------- /tools/common/BonsaiSharedData.h: -------------------------------------------------------------------------------- 1 | ../../runtime/include/BonsaiSharedData.h -------------------------------------------------------------------------------- /tools/common/IDType.h: -------------------------------------------------------------------------------- 1 | ../../runtime/include/IDType.h -------------------------------------------------------------------------------- /tools/common/SharedMemory.h: -------------------------------------------------------------------------------- 1 | ../../runtime/include/SharedMemory.h -------------------------------------------------------------------------------- /tools/cvt2grid/BonsaiIO.h: -------------------------------------------------------------------------------- 1 | ../IO/BonsaiIO.h -------------------------------------------------------------------------------- /tools/cvt2grid/IDType.h: -------------------------------------------------------------------------------- 1 | ../IO/IDType.h -------------------------------------------------------------------------------- /tools/cvt2grid/Makefile: -------------------------------------------------------------------------------- 1 | CXX = mpicxx 2 | CC = mpicc 3 | LD = mpicxx 4 | 5 | OMPFLAGS = -fopenmp 6 | #OMPFLAGS += -D_GLIBCXX_PARALLEL 7 | 8 | OFLAGS = -O3 -g -Wall 9 | # OMPFLAGS= 10 | 11 | CXXFLAGS = -fPIC $(OFLAGS) $(OMPFLAGS) -std=c++11 12 | 13 | 14 | 15 | LDFLAGS = 16 | 17 | SRCPATH = ./ 18 | 19 | SRC1 = cvt2grid.cpp 20 | SRC2 = cvt2gridTree.cpp 21 | SRC3 = densCalc.cpp 22 | OBJ1 = $(SRC1:%.cpp=%.o) 23 | OBJ2 = $(SRC2:%.cpp=%.o) 24 | OBJ3 = $(SRC3:%.cpp=%.o) 25 | 26 | PROG1 = cvt2grid 27 | PROG2 = cvt2gridTree 28 | PROG3 = densCalc 29 | 30 | RM = /bin/rm 31 | 32 | all: $(PROG1) $(PROG2) $(PROG3) 33 | 34 | 35 | $(PROG1): $(OBJ1) 36 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 37 | $(PROG2): $(OBJ2) 38 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 39 | $(PROG3): $(OBJ3) 40 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 41 | 42 | 43 | 44 | 45 | %.o: $(SRCPATH)/%.cpp 46 | $(CXX) $(CXXFLAGS) -c $< -o $@ 47 | 48 | 49 | clean: 50 | /bin/rm -rf *.o $(PROG1) $(PROG2) $(OBJ1) $(OBJ2) 51 | 52 | $(OBJ1): BonsaiIO.h IDType.h Node.h Particle.h Tree.h boundary.h morton_key.h vector3.h wtime.h 53 | $(OBJ2): BonsaiIO.h IDType.h Node.h Particle.h Tree.h boundary.h morton_key.h vector3.h wtime.h 54 | $(OBJ3): BonsaiIO.h IDType.h Node.h Particle.h Tree.h boundary.h morton_key.h vector3.h wtime.h 55 | -------------------------------------------------------------------------------- /tools/cvt2grid/Particle.h: -------------------------------------------------------------------------------- 1 | #ifndef __PARTICLE_H__ 2 | #define __PARTICLE_H__ 3 | 4 | #include 5 | #include "vector3.h" 6 | #include "morton_key.h" 7 | 8 | class Particle 9 | { 10 | public: 11 | 12 | typedef std::vector Vector; 13 | typedef Vector:: iterator Iterator; 14 | typedef Vector::const_iterator constIterator; 15 | 16 | int ID; // 1 1 17 | vec3 pos; // 3 4 18 | float mass; // 1 5 19 | vec3 vel; 20 | 21 | protected: 22 | float h; /* range */ // 1 6 23 | 24 | public: 25 | float density; // 1 7 26 | int nnb; // 1 8 27 | morton_key key; // 2 10 28 | 29 | protected: 30 | float hinv; // 1 11 31 | int iPad[5]; 32 | 33 | public: 34 | 35 | /********* methods ******/ 36 | 37 | Particle(const int _ID, const vec3 &_pos, const float _mass, const float _h = 0.0f) : 38 | ID(_ID), pos(_pos), mass(_mass), h(_h), density(0.0f), nnb(0) { assert(sizeof(Particle) == 16*sizeof(float)); } 39 | Particle() {} 40 | 41 | void set_h(const float _h) 42 | { 43 | h = _h; 44 | hinv = 1.0f/h; 45 | } 46 | float get_h() const {return h;} 47 | float get_hinv() const {return hinv;} 48 | 49 | void compute_key(const vec3 &origin, const float size) 50 | { 51 | key = morton_key(pos - origin, size); 52 | }; 53 | 54 | int octkey(const int rshift) 55 | { 56 | return 7 & (key.val >> rshift); 57 | } 58 | 59 | #if 0 /* does not work w/ OpenMP, not sure why ... */ 60 | const Particle operator = (const Particle &rhs) 61 | { 62 | typedef float v4sf __attribute__ ((vector_size(16))); 63 | v4sf *lp =(v4sf *)this; 64 | v4sf *rp =(v4sf *)(&rhs); 65 | lp[0] = rp[0]; 66 | lp[1] = rp[1]; 67 | lp[2] = rp[2]; 68 | lp[3] = rp[3]; 69 | 70 | return *this; 71 | } 72 | #endif 73 | }; 74 | 75 | 76 | #if 0 77 | namespace std 78 | { 79 | template <> 80 | inline void iter_swap (Particle::Iterator a, Particle::Iterator b) 81 | { 82 | typedef float v4sf __attribute__ ((vector_size(16))); 83 | v4sf *ap =(v4sf *)&(*a); 84 | v4sf *bp =(v4sf *)&(*b); 85 | v4sf tmpa0 = ap[0]; 86 | v4sf tmpa1 = ap[1]; 87 | v4sf tmpa2 = ap[2]; 88 | v4sf tmpa3 = ap[3]; 89 | v4sf tmpb0 = bp[0]; 90 | v4sf tmpb1 = bp[1]; 91 | v4sf tmpb2 = bp[2]; 92 | v4sf tmpb3 = bp[3]; 93 | ap[0] = tmpb0; 94 | ap[1] = tmpb1; 95 | ap[2] = tmpb2; 96 | ap[3] = tmpb3; 97 | bp[0] = tmpa0; 98 | bp[1] = tmpa1; 99 | bp[2] = tmpa2; 100 | bp[3] = tmpa3; 101 | } 102 | } 103 | #endif 104 | 105 | #endif /* __PARTICLE_H__ */ 106 | -------------------------------------------------------------------------------- /tools/cvt2grid/boundary.h: -------------------------------------------------------------------------------- 1 | #include "vector3.h" 2 | 3 | template 4 | struct boundary{ 5 | typedef vector3 vec; 6 | vec min, max; 7 | boundary() : min(HUGE), max(-HUGE) {} 8 | boundary(const vec &_min, const vec &_max) : min(_min), max(_max) {} 9 | boundary(const vec &pos, const REAL &h = 0.0) : min(pos - vec(h)), max(pos + vec(h)) {} 10 | 11 | static const boundary merge(const boundary &a, const boundary &b){ 12 | return boundary(mineach(a.min, b.min), maxeach(a.max, b.max)); 13 | } 14 | void merge(const boundary &b){ 15 | *this = merge(*this, b); 16 | } 17 | friend bool not_overlapped(const boundary &a, const boundary &b){ 18 | return (a.max.x < b.min.x) || (b.max.x < a.min.x) 19 | || (a.max.y < b.min.y) || (b.max.y < a.min.y) 20 | || (a.max.z < b.min.z) || (b.max.z < a.min.z); 21 | } 22 | friend bool overlapped(const boundary &a, const boundary &b){ 23 | return !not_overlapped(a, b); 24 | } 25 | const vec center() const { 26 | return REAL(0.5) * (max + min); 27 | } 28 | const vec hlen() const { 29 | return REAL(0.5) * (max - min); 30 | } 31 | const REAL separation2_from(const vec &pos) const{ 32 | vec dr = center() - pos; 33 | dr = dr.abseach() - hlen(); 34 | dr = vec::maxeach(dr, vec(0.0)); 35 | return dr.norm2(); 36 | } 37 | }; 38 | #if 0 39 | template <> 40 | struct boundary{ 41 | typedef vector3 vec; 42 | typedef float v4sf __attribute__ ((vector_size(16))); 43 | 44 | vec min; 45 | float p0; 46 | vec max; 47 | float p1; 48 | 49 | boundary() : min(HUGE), p0(0.f), max(-HUGE), p1(0.f) {} 50 | boundary(const vec &_min, const vec &_max) : min(_min), p0(0.f), max(_max), p1(0.f) {} 51 | boundary(const vec &pos, float h = 0.f) : 52 | min(pos - vec(h)), p0(0.f), max(pos + vec(h)), p1(0.f) {} 53 | boundary(v4sf _min, v4sf _max){ 54 | *(v4sf *)&min = _min; 55 | *(v4sf *)&max = _max; 56 | } 57 | 58 | static const boundary merge(const boundary &a, const boundary &b){ 59 | return boundary( 60 | __builtin_ia32_minps(*(v4sf *)&a.min, *(v4sf *)&b.min), 61 | __builtin_ia32_maxps(*(v4sf *)&a.max, *(v4sf *)&b.max)); 62 | } 63 | void merge(const boundary &b){ 64 | *this = merge(*this, b); 65 | } 66 | friend bool not_overlapped(const boundary &a, const boundary &b){ 67 | return __builtin_ia32_movmskps( 68 | (v4sf)(__builtin_ia32_cmpltps( 69 | *(v4sf *)&a.max, *(v4sf *)&b.min))) 70 | || __builtin_ia32_movmskps( 71 | (v4sf)(__builtin_ia32_cmpltps( 72 | *(v4sf *)&b.max, *(v4sf *)&a.min))); 73 | } 74 | friend bool overlapped(const boundary &a, const boundary &b){ 75 | return !not_overlapped(a, b); 76 | } 77 | }; 78 | #endif 79 | -------------------------------------------------------------------------------- /tools/cvt2grid/key_table: -------------------------------------------------------------------------------- 1 | 00000000, 2 | 00000001, 3 | 00000010, 4 | 00000011, 5 | 00000100, 6 | 00000101, 7 | 00000110, 8 | 00000111, 9 | 00001000, 10 | 00001001, 11 | 00001010, 12 | 00001011, 13 | 00001100, 14 | 00001101, 15 | 00001110, 16 | 00001111, 17 | 00010000, 18 | 00010001, 19 | 00010010, 20 | 00010011, 21 | 00010100, 22 | 00010101, 23 | 00010110, 24 | 00010111, 25 | 00011000, 26 | 00011001, 27 | 00011010, 28 | 00011011, 29 | 00011100, 30 | 00011101, 31 | 00011110, 32 | 00011111, 33 | 00100000, 34 | 00100001, 35 | 00100010, 36 | 00100011, 37 | 00100100, 38 | 00100101, 39 | 00100110, 40 | 00100111, 41 | 00101000, 42 | 00101001, 43 | 00101010, 44 | 00101011, 45 | 00101100, 46 | 00101101, 47 | 00101110, 48 | 00101111, 49 | 00110000, 50 | 00110001, 51 | 00110010, 52 | 00110011, 53 | 00110100, 54 | 00110101, 55 | 00110110, 56 | 00110111, 57 | 00111000, 58 | 00111001, 59 | 00111010, 60 | 00111011, 61 | 00111100, 62 | 00111101, 63 | 00111110, 64 | 00111111, 65 | 01000000, 66 | 01000001, 67 | 01000010, 68 | 01000011, 69 | 01000100, 70 | 01000101, 71 | 01000110, 72 | 01000111, 73 | 01001000, 74 | 01001001, 75 | 01001010, 76 | 01001011, 77 | 01001100, 78 | 01001101, 79 | 01001110, 80 | 01001111, 81 | 01010000, 82 | 01010001, 83 | 01010010, 84 | 01010011, 85 | 01010100, 86 | 01010101, 87 | 01010110, 88 | 01010111, 89 | 01011000, 90 | 01011001, 91 | 01011010, 92 | 01011011, 93 | 01011100, 94 | 01011101, 95 | 01011110, 96 | 01011111, 97 | 01100000, 98 | 01100001, 99 | 01100010, 100 | 01100011, 101 | 01100100, 102 | 01100101, 103 | 01100110, 104 | 01100111, 105 | 01101000, 106 | 01101001, 107 | 01101010, 108 | 01101011, 109 | 01101100, 110 | 01101101, 111 | 01101110, 112 | 01101111, 113 | 01110000, 114 | 01110001, 115 | 01110010, 116 | 01110011, 117 | 01110100, 118 | 01110101, 119 | 01110110, 120 | 01110111, 121 | 01111000, 122 | 01111001, 123 | 01111010, 124 | 01111011, 125 | 01111100, 126 | 01111101, 127 | 01111110, 128 | 01111111, 129 | -------------------------------------------------------------------------------- /tools/cvt2grid/morton_key.h: -------------------------------------------------------------------------------- 1 | #ifndef __MORTON_KEY_H__ 2 | #define __MORTON_KEY_H__ 3 | 4 | #include 5 | 6 | template 7 | struct morton_key{ 8 | typedef unsigned long long key_t; 9 | key_t val; 10 | 11 | morton_key() : val(0) {} 12 | morton_key(const VEC &vec, const REAL &size){ 13 | static key_t table[128] = { 14 | #include "key_table" 15 | }; 16 | const REAL scale = (1<<20) / size; 17 | int xi = int(vec[0] * scale); 18 | int yi = int(vec[1] * scale); 19 | int zi = int(vec[2] * scale); 20 | assert((xi >> 21) == 0); 21 | assert((yi >> 21) == 0); 22 | assert((zi >> 21) == 0); 23 | key_t xkey = (table[xi&127]) | (table[(xi>>7)&127] << 21) | (table[(xi>>14)&127] << 42); 24 | key_t ykey = (table[yi&127]) | (table[(yi>>7)&127] << 21) | (table[(yi>>14)&127] << 42); 25 | key_t zkey = (table[zi&127]) | (table[(zi>>7)&127] << 21) | (table[(zi>>14)&127] << 42); 26 | val = (xkey<<2) | (ykey<<1) | zkey; 27 | } 28 | }; 29 | 30 | #endif /* __MORTON_KEY_H__ */ 31 | -------------------------------------------------------------------------------- /tools/cvt2grid/wtime.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | double wtime(){ 4 | struct timeval tv; 5 | gettimeofday(&tv, NULL); 6 | return tv.tv_sec + 1.e-6 * tv.tv_usec; 7 | } 8 | -------------------------------------------------------------------------------- /tools/density_estimator/Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | CC = gcc 3 | LD = g++ 4 | F90 = gfortran 5 | 6 | OMPFLAGS = -fopenmp 7 | OMPFLAGS += -D_GLIBCXX_PARALLEL 8 | 9 | OFLAGS = -O3 -g -Wall 10 | # OMPFLAGS= 11 | 12 | CXXFLAGS = -fPIC $(OFLAGS) -Wstrict-aliasing=2 $(OMPFLAGS) 13 | 14 | 15 | 16 | LDFLAGS = 17 | 18 | SRCPATH = ./ 19 | SRC = density.cpp 20 | OBJ = $(SRC:%.cpp=%.o) 21 | 22 | SRC1 = read_tipsy.cpp 23 | OBJ1 = $(SRC1:%.cpp=%.o) 24 | 25 | PROG = density 26 | PROG1 = read_tipsy 27 | 28 | RM = /bin/rm 29 | 30 | all: $(PROG) $(PROG1) 31 | 32 | 33 | $(PROG): $(OBJ) 34 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 35 | 36 | $(PROG1): $(OBJ1) 37 | $(LD) $(LDFLAGS) $^ -o $@ $(OMPFLAGS) 38 | 39 | %.o: $(SRCPATH)/%.cpp 40 | $(CXX) $(CXXFLAGS) -c $< -o $@ 41 | 42 | 43 | clean: 44 | /bin/rm -rf *.o $(PROG) $(PROG1) 45 | 46 | $(OBJ): boundary.h density.h memalign_allocator.h morton_key.h Node.h Particle.h vector3.h wtime.h 47 | $(OBJ1): boundary.h density.h memalign_allocator.h morton_key.h Node.h Particle.h vector3.h wtime.h read_tipsy.h 48 | 49 | build_tree.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h 50 | compute_properties.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h 51 | compute_propertiesD.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h 52 | dev_approximate_gravity.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h 53 | dev_approximate_gravity_let.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h 54 | timestep.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h 55 | sortKernels.ptx: $(CUDAKERNELSPATH)/scanKernels.cu $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h 56 | parallel.ptx: $(CUDAKERNELSPATH)/support_kernels.cu $(INCLUDEPATH)/node_specs.h 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /tools/density_estimator/Particle.h: -------------------------------------------------------------------------------- 1 | #ifndef __PARTICLE_H__ 2 | #define __PARTICLE_H__ 3 | 4 | #include 5 | #include "vector3.h" 6 | #include "morton_key.h" 7 | 8 | class Particle 9 | { 10 | public: 11 | 12 | typedef std::vector Vector; 13 | typedef Vector:: iterator Iterator; 14 | typedef Vector::const_iterator constIterator; 15 | 16 | int ID; // 1 1 17 | vec3 pos; // 3 4 18 | float mass; // 1 5 19 | 20 | protected: 21 | float h; /* range */ // 1 6 22 | 23 | public: 24 | float density; // 1 7 25 | int nnb; // 1 8 26 | morton_key key; // 2 10 27 | 28 | protected: 29 | float hinv; // 1 11 30 | int iPad[5]; 31 | 32 | public: 33 | 34 | /********* methods ******/ 35 | 36 | Particle(const int _ID, const vec3 &_pos, const float _mass, const float _h = 0.0f) : 37 | ID(_ID), pos(_pos), mass(_mass), h(_h), density(0.0f), nnb(0) { assert(sizeof(Particle) == 16*sizeof(float)); } 38 | 39 | void set_h(const float _h) 40 | { 41 | h = _h; 42 | hinv = 1.0f/h; 43 | } 44 | float get_h() const {return h;} 45 | float get_hinv() const {return hinv;} 46 | 47 | void compute_key(const vec3 &origin, const float size) 48 | { 49 | key = morton_key(pos - origin, size); 50 | }; 51 | 52 | int octkey(const int rshift) 53 | { 54 | return 7 & (key.val >> rshift); 55 | } 56 | 57 | #if 0 /* does not work w/ OpenMP, not sure why ... */ 58 | const Particle operator = (const Particle &rhs) 59 | { 60 | typedef float v4sf __attribute__ ((vector_size(16))); 61 | v4sf *lp =(v4sf *)this; 62 | v4sf *rp =(v4sf *)(&rhs); 63 | lp[0] = rp[0]; 64 | lp[1] = rp[1]; 65 | lp[2] = rp[2]; 66 | lp[3] = rp[3]; 67 | 68 | return *this; 69 | } 70 | #endif 71 | }; 72 | 73 | 74 | #if 0 75 | namespace std 76 | { 77 | template <> 78 | inline void iter_swap (Particle::Iterator a, Particle::Iterator b) 79 | { 80 | typedef float v4sf __attribute__ ((vector_size(16))); 81 | v4sf *ap =(v4sf *)&(*a); 82 | v4sf *bp =(v4sf *)&(*b); 83 | v4sf tmpa0 = ap[0]; 84 | v4sf tmpa1 = ap[1]; 85 | v4sf tmpa2 = ap[2]; 86 | v4sf tmpa3 = ap[3]; 87 | v4sf tmpb0 = bp[0]; 88 | v4sf tmpb1 = bp[1]; 89 | v4sf tmpb2 = bp[2]; 90 | v4sf tmpb3 = bp[3]; 91 | ap[0] = tmpb0; 92 | ap[1] = tmpb1; 93 | ap[2] = tmpb2; 94 | ap[3] = tmpb3; 95 | bp[0] = tmpa0; 96 | bp[1] = tmpa1; 97 | bp[2] = tmpa2; 98 | bp[3] = tmpa3; 99 | } 100 | } 101 | #endif 102 | 103 | #endif /* __PARTICLE_H__ */ 104 | -------------------------------------------------------------------------------- /tools/density_estimator/README: -------------------------------------------------------------------------------- 1 | -- Density estimator C++ library. 2 | 3 | See density.cpp as example 4 | $ make 5 | $ gzip -cd ngb_fast.gz | ./density > ngb_fast.dens 6 | -- Density done in 0.040385 sec [ 2.47503e+06 ptcl/sec ] 7 | 8 | $ gzip -cd ngb_slow.gz | ./density > ngb_slow.dens 9 | -- Density done in 0.0576272 sec [ 1.77937e+06 ptcl/sec ] 10 | 11 | Timings done on i7 2600K, 3.5GHz with 4 omp threads and ~100k particles 12 | 13 | -------------------------------------------------------------------------------- /tools/density_estimator/boundary.h: -------------------------------------------------------------------------------- 1 | #include "vector3.h" 2 | 3 | template 4 | struct boundary{ 5 | typedef vector3 vec; 6 | vec min, max; 7 | boundary() : min(HUGE), max(-HUGE) {} 8 | boundary(const vec &_min, const vec &_max) : min(_min), max(_max) {} 9 | boundary(const vec &pos, const REAL &h = 0.0) : min(pos - vec(h)), max(pos + vec(h)) {} 10 | 11 | static const boundary merge(const boundary &a, const boundary &b){ 12 | return boundary(mineach(a.min, b.min), maxeach(a.max, b.max)); 13 | } 14 | void merge(const boundary &b){ 15 | *this = merge(*this, b); 16 | } 17 | friend bool not_overlapped(const boundary &a, const boundary &b){ 18 | return (a.max.x < b.min.x) || (b.max.x < a.min.x) 19 | || (a.max.y < b.min.y) || (b.max.y < a.min.y) 20 | || (a.max.z < b.min.z) || (b.max.z < a.min.z); 21 | } 22 | friend bool overlapped(const boundary &a, const boundary &b){ 23 | return !not_overlapped(a, b); 24 | } 25 | const vec center() const { 26 | return REAL(0.5) * (max + min); 27 | } 28 | const vec hlen() const { 29 | return REAL(0.5) * (max - min); 30 | } 31 | const REAL separation2_from(const vec &pos) const{ 32 | vec dr = center() - pos; 33 | dr = dr.abseach() - hlen(); 34 | dr = vec::maxeach(dr, vec(0.0)); 35 | return dr.norm2(); 36 | } 37 | }; 38 | #if 0 39 | template <> 40 | struct boundary{ 41 | typedef vector3 vec; 42 | typedef float v4sf __attribute__ ((vector_size(16))); 43 | 44 | vec min; 45 | float p0; 46 | vec max; 47 | float p1; 48 | 49 | boundary() : min(HUGE), p0(0.f), max(-HUGE), p1(0.f) {} 50 | boundary(const vec &_min, const vec &_max) : min(_min), p0(0.f), max(_max), p1(0.f) {} 51 | boundary(const vec &pos, float h = 0.f) : 52 | min(pos - vec(h)), p0(0.f), max(pos + vec(h)), p1(0.f) {} 53 | boundary(v4sf _min, v4sf _max){ 54 | *(v4sf *)&min = _min; 55 | *(v4sf *)&max = _max; 56 | } 57 | 58 | static const boundary merge(const boundary &a, const boundary &b){ 59 | return boundary( 60 | __builtin_ia32_minps(*(v4sf *)&a.min, *(v4sf *)&b.min), 61 | __builtin_ia32_maxps(*(v4sf *)&a.max, *(v4sf *)&b.max)); 62 | } 63 | void merge(const boundary &b){ 64 | *this = merge(*this, b); 65 | } 66 | friend bool not_overlapped(const boundary &a, const boundary &b){ 67 | return __builtin_ia32_movmskps( 68 | (v4sf)(__builtin_ia32_cmpltps( 69 | *(v4sf *)&a.max, *(v4sf *)&b.min))) 70 | || __builtin_ia32_movmskps( 71 | (v4sf)(__builtin_ia32_cmpltps( 72 | *(v4sf *)&b.max, *(v4sf *)&a.min))); 73 | } 74 | friend bool overlapped(const boundary &a, const boundary &b){ 75 | return !not_overlapped(a, b); 76 | } 77 | }; 78 | #endif 79 | -------------------------------------------------------------------------------- /tools/density_estimator/density.cpp: -------------------------------------------------------------------------------- 1 | #include "density.h" 2 | 3 | std::vector Node::ptcl; 4 | std::vector Node::Node_heap; 5 | std::vector > Node::pair_list; 6 | 7 | int main(int argc, char * argv[]) 8 | { 9 | Particle::Vector ptcl; 10 | 11 | int idum, nbody; 12 | std::cin >> idum >> nbody; 13 | 14 | std::vector nnb(nbody); 15 | ptcl.reserve(nbody); 16 | 17 | for(int i=0; i> pos >> h >> nnb[i]; 21 | ptcl.push_back(Particle(i, pos, 1.0, 2.0*h)); 22 | } 23 | fprintf(stderr, "nbody= %d \n", nbody); 24 | 25 | Node::allocate(nbody, nbody); 26 | Density density(ptcl, nbody); 27 | 28 | int ngb_min = nbody; 29 | int ngb_max = 0; 30 | double ngb_mean = 0; 31 | double ngb_mean2 = 0; 32 | 33 | int imax = 0; 34 | #if 0 35 | fprintf(stdout, "%d\n", nbody); 36 | #endif 37 | for (int i = 0; i < nbody; i++) 38 | { 39 | std::vector &ptcl = Node::ptcl; 40 | const Particle &p = ptcl[i]; 41 | #if 0 42 | if (!(p.nnb == nnb[p.ID])) 43 | { 44 | fprintf(stderr, "i= %d: nnb_exact= %d nnb_compute= %d\n", 45 | i, nnb[p.ID], p.nnb); 46 | } 47 | #endif 48 | 49 | if (p.nnb > 128) 50 | { 51 | #if 0 52 | fprintf(stderr, "i= %d: nnb_exact= %d nnb_compute= %d h= %g pos= %g %g %g\n", 53 | i, nnb[ID], p.nnb, p.h, p.pos.x, p.pos.y, p.pos.z); 54 | #endif 55 | imax++; 56 | } 57 | 58 | ngb_min = std::min(ngb_min, p.nnb); 59 | ngb_max = std::max(ngb_max, p.nnb); 60 | ngb_mean += p.nnb; 61 | ngb_mean2 += p.nnb*p.nnb; 62 | fprintf(stdout, " %d %g %g %g %g \n", p.ID, p.pos.x, p.pos.y, p.pos.z, p.density); 63 | 64 | } 65 | ngb_mean *= 1.0/(float)nbody; 66 | ngb_mean2 *= 1.0/(float)nbody; 67 | fprintf(stderr, " imax= %d \n", imax); 68 | fprintf(stderr, " nmin= %d nmax= %d nmean= %g = %g\n", 69 | ngb_min, ngb_max, 70 | ngb_mean, 71 | std::sqrt(ngb_mean2 - ngb_mean*ngb_mean)); 72 | 73 | return 0; 74 | } 75 | 76 | 77 | -------------------------------------------------------------------------------- /tools/density_estimator/density.h: -------------------------------------------------------------------------------- 1 | #ifndef __DENSITY_H__ 2 | #define __DENSITY_H__ 3 | 4 | #include 5 | #include "Node.h" 6 | #include "wtime.h" 7 | 8 | 9 | #if 0 10 | #define SLOW 11 | #endif 12 | 13 | struct Density 14 | { 15 | typedef boundary Boundary; 16 | 17 | Particle::Vector density; 18 | Boundary BBox; /* bounding box */ 19 | 20 | struct cmp_particle_key 21 | { 22 | bool operator () (const Particle &a, const Particle &b) 23 | { 24 | return a.key.val < b.key.val; 25 | } 26 | }; 27 | 28 | struct cmp_particle_ID 29 | { 30 | bool operator () (const Particle &a, const Particle &b) 31 | { 32 | return a.ID < b.ID; 33 | } 34 | }; 35 | 36 | Density(const Particle::Vector &ptcl_in, const int Nuse, const int Nngb = 32) 37 | { 38 | const double t0 = wtime(); 39 | fprintf(stderr, "Nuse= %d \n", Nuse); 40 | 41 | std::vector &ptcl = Node::ptcl; 42 | ptcl.reserve(Nuse); 43 | const int Nin = ptcl_in.size(); 44 | assert(Nuse <= Nin); 45 | 46 | const float fac = std::max(1.0f, (float)Nin/(float)Nuse); 47 | 48 | 49 | /* import particles and compute the Bounding Box */ 50 | for (int i = 0; i < Nuse; i++) 51 | { 52 | ptcl.push_back(ptcl_in[(int)(i * fac)]); 53 | BBox.merge(Boundary(ptcl.back().pos)); 54 | } 55 | std::cerr << BBox.min << std::endl; 56 | std::cerr << BBox.max << std::endl; 57 | const vec3 vsize = BBox.hlen(); 58 | const float rsize = std::max(vsize.x, std::max(vsize.x, vsize.y)) * 2.0f; 59 | 60 | /* now build the tree */ 61 | 62 | const int nbody = Nuse; 63 | 64 | for (int i = 0; i < nbody; i++) 65 | ptcl[i].compute_key(BBox.min, rsize); 66 | 67 | std::sort(ptcl.begin(), ptcl.end(), cmp_particle_key()); 68 | 69 | Node::Node_heap.push_back(Node()); 70 | Node &root = Node::Node_heap[0]; 71 | for (int i = 0; i < nbody; i++) 72 | root.push_particle(i, 60); 73 | 74 | #if 1 /* if h's are not know this set-up estimated range */ 75 | const float volume = rsize*rsize*rsize; 76 | root.set_init_h(float(Nngb), volume); 77 | #endif 78 | root.make_boundary(); 79 | 80 | #ifdef SLOW 81 | 82 | #pragma omp parallel for 83 | for(int i=0; i group_list; 89 | root.find_group_Node(2000, group_list); 90 | #pragma omp parallel for schedule(dynamic) 91 | for(int i=0; i<(int)group_list.size(); i++) 92 | *group_list[i] << root; 93 | #else 94 | root << root; 95 | #endif /* _OPENMP */ 96 | 97 | #endif /* SLOW */ 98 | 99 | const double t1 = wtime(); 100 | fprintf(stderr, " -- Density done in %g sec [ %g ptcl/sec ]\n", t1 - t0, Nuse/(t1 - t0)); 101 | 102 | }; 103 | }; 104 | 105 | 106 | #endif /* __DENSITY_H__ */ 107 | -------------------------------------------------------------------------------- /tools/density_estimator/img/snap000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/density_estimator/img/snap000.png -------------------------------------------------------------------------------- /tools/density_estimator/img/snap050.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/density_estimator/img/snap050.png -------------------------------------------------------------------------------- /tools/density_estimator/img/snap100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/density_estimator/img/snap100.png -------------------------------------------------------------------------------- /tools/density_estimator/img/snap150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/density_estimator/img/snap150.png -------------------------------------------------------------------------------- /tools/density_estimator/img/snap200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/density_estimator/img/snap200.png -------------------------------------------------------------------------------- /tools/density_estimator/img/snap250.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/density_estimator/img/snap250.png -------------------------------------------------------------------------------- /tools/density_estimator/img/snap300.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/density_estimator/img/snap300.png -------------------------------------------------------------------------------- /tools/density_estimator/key_table: -------------------------------------------------------------------------------- 1 | 00000000, 2 | 00000001, 3 | 00000010, 4 | 00000011, 5 | 00000100, 6 | 00000101, 7 | 00000110, 8 | 00000111, 9 | 00001000, 10 | 00001001, 11 | 00001010, 12 | 00001011, 13 | 00001100, 14 | 00001101, 15 | 00001110, 16 | 00001111, 17 | 00010000, 18 | 00010001, 19 | 00010010, 20 | 00010011, 21 | 00010100, 22 | 00010101, 23 | 00010110, 24 | 00010111, 25 | 00011000, 26 | 00011001, 27 | 00011010, 28 | 00011011, 29 | 00011100, 30 | 00011101, 31 | 00011110, 32 | 00011111, 33 | 00100000, 34 | 00100001, 35 | 00100010, 36 | 00100011, 37 | 00100100, 38 | 00100101, 39 | 00100110, 40 | 00100111, 41 | 00101000, 42 | 00101001, 43 | 00101010, 44 | 00101011, 45 | 00101100, 46 | 00101101, 47 | 00101110, 48 | 00101111, 49 | 00110000, 50 | 00110001, 51 | 00110010, 52 | 00110011, 53 | 00110100, 54 | 00110101, 55 | 00110110, 56 | 00110111, 57 | 00111000, 58 | 00111001, 59 | 00111010, 60 | 00111011, 61 | 00111100, 62 | 00111101, 63 | 00111110, 64 | 00111111, 65 | 01000000, 66 | 01000001, 67 | 01000010, 68 | 01000011, 69 | 01000100, 70 | 01000101, 71 | 01000110, 72 | 01000111, 73 | 01001000, 74 | 01001001, 75 | 01001010, 76 | 01001011, 77 | 01001100, 78 | 01001101, 79 | 01001110, 80 | 01001111, 81 | 01010000, 82 | 01010001, 83 | 01010010, 84 | 01010011, 85 | 01010100, 86 | 01010101, 87 | 01010110, 88 | 01010111, 89 | 01011000, 90 | 01011001, 91 | 01011010, 92 | 01011011, 93 | 01011100, 94 | 01011101, 95 | 01011110, 96 | 01011111, 97 | 01100000, 98 | 01100001, 99 | 01100010, 100 | 01100011, 101 | 01100100, 102 | 01100101, 103 | 01100110, 104 | 01100111, 105 | 01101000, 106 | 01101001, 107 | 01101010, 108 | 01101011, 109 | 01101100, 110 | 01101101, 111 | 01101110, 112 | 01101111, 113 | 01110000, 114 | 01110001, 115 | 01110010, 116 | 01110011, 117 | 01110100, 118 | 01110101, 119 | 01110110, 120 | 01110111, 121 | 01111000, 122 | 01111001, 123 | 01111010, 124 | 01111011, 125 | 01111100, 126 | 01111101, 127 | 01111110, 128 | 01111111, 129 | -------------------------------------------------------------------------------- /tools/density_estimator/morton_key.h: -------------------------------------------------------------------------------- 1 | #ifndef __MORTON_KEY_H__ 2 | #define __MORTON_KEY_H__ 3 | 4 | #include 5 | 6 | template 7 | struct morton_key{ 8 | typedef unsigned long long key_t; 9 | key_t val; 10 | 11 | morton_key() : val(0) {} 12 | morton_key(const VEC &vec, const REAL &size){ 13 | static key_t table[128] = { 14 | #include "key_table" 15 | }; 16 | const REAL scale = (1<<20) / size; 17 | int xi = int(vec[0] * scale); 18 | int yi = int(vec[1] * scale); 19 | int zi = int(vec[2] * scale); 20 | assert((xi >> 21) == 0); 21 | assert((yi >> 21) == 0); 22 | assert((zi >> 21) == 0); 23 | key_t xkey = (table[xi&127]) | (table[(xi>>7)&127] << 21) | (table[(xi>>14)&127] << 42); 24 | key_t ykey = (table[yi&127]) | (table[(yi>>7)&127] << 21) | (table[(yi>>14)&127] << 42); 25 | key_t zkey = (table[zi&127]) | (table[(zi>>7)&127] << 21) | (table[(zi>>14)&127] << 42); 26 | val = (xkey<<2) | (ykey<<1) | zkey; 27 | } 28 | }; 29 | 30 | #endif /* __MORTON_KEY_H__ */ 31 | -------------------------------------------------------------------------------- /tools/density_estimator/ngb_fast.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/density_estimator/ngb_fast.gz -------------------------------------------------------------------------------- /tools/density_estimator/ngb_slow.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/density_estimator/ngb_slow.gz -------------------------------------------------------------------------------- /tools/density_estimator/plot2d.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import numpy as np 4 | import matplotlib.pylab as plt 5 | import math 6 | 7 | def plot2D(x, y, z, zmin= None, zmax= None, xlim=None, ylim=None, nx = 200, ny = 200): 8 | 9 | if zmin == None: 10 | zmin = min(z) 11 | 12 | if zmax == None: 13 | zmax = max(z) 14 | 15 | for i in range(len(z)): 16 | z[i] = min(z[i], zmax) 17 | z[i] = max(z[i], zmin) 18 | 19 | xi = np.linspace(min(x), max(x), nx) 20 | yi = np.linspace(min(y), max(y), ny) 21 | zi = plt.mlab.griddata(x, y, z, xi, yi) 22 | 23 | plt.contourf(xi, yi, zi, 32, cmap=plt.cm.jet) #, norm=plt.Normalize(zmin, zmax)) 24 | plt.contourf(xi, yi, zi, 32, norm=plt.Normalize(zmin, zmax)) 25 | if 1 == 1: 26 | 27 | if (xlim == None): 28 | plt.xlim(-6.0, +6.0) 29 | else: 30 | plt.xlim(xlim[0], xlim[1]); 31 | 32 | if (ylim == None): 33 | plt.ylim(-6.0, +6.0) 34 | else: 35 | plt.ylim(ylim[0], ylim[1]); 36 | 37 | else: 38 | plt.xlim(-0.5, +0.5) 39 | plt.ylim(-0.5, +0.5) 40 | 41 | 42 | plt.colorbar() 43 | plt.show() 44 | 45 | x = [] 46 | y = [] 47 | w = [] 48 | 49 | data = sys.stdin.readlines(); 50 | zcrd_min = -1 51 | zcrd_max = +1 52 | for line in data: 53 | 54 | wrd = line.split(); 55 | 56 | xcrd = float(wrd[1]); 57 | ycrd = float(wrd[2]); 58 | zcrd = float(wrd[3]); 59 | wcrd = float(wrd[4]); 60 | 61 | if zcrd > zcrd_min and zcrd < zcrd_max: 62 | x.append(xcrd) 63 | y.append(ycrd) 64 | w.append(math.log10(wcrd)) 65 | 66 | print len(w) 67 | plot2D(x,y,w, xlim=[-100, 100], ylim=[-100,100]) 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /tools/density_estimator/read_tipsy.cpp: -------------------------------------------------------------------------------- 1 | #include "read_tipsy.h" 2 | #include "density.h" 3 | 4 | 5 | std::vector Node::ptcl; 6 | std::vector Node::Node_heap; 7 | std::vector > Node::pair_list; 8 | 9 | #if 1 10 | #define DENSDM 11 | #endif 12 | 13 | int main(int argc, char * argv[]) 14 | { 15 | ReadTipsy data; 16 | Particle::Vector ptcl_star, ptcl_dm; 17 | 18 | const int nbody = data.NTotal; 19 | 20 | ptcl_dm .reserve(nbody); 21 | ptcl_star.reserve(nbody); 22 | 23 | for(int i=0; i &ptcl = Node::ptcl; 55 | const Particle &p = ptcl[i]; 56 | 57 | if (p.nnb > 128) 58 | { 59 | #if 0 60 | fprintf(stderr, "i= %d: nnb_exact= %d nnb_compute= %d h= %g pos= %g %g %g\n", 61 | i, nnb[ID], p.nnb, p.h, p.pos.x, p.pos.y, p.pos.z); 62 | #endif 63 | imax++; 64 | } 65 | 66 | ngb_min = std::min(ngb_min, p.nnb); 67 | ngb_max = std::max(ngb_max, p.nnb); 68 | ngb_mean += p.nnb; 69 | ngb_mean2 += p.nnb*p.nnb; 70 | if (p.nnb < 3) 71 | { 72 | nzero++; 73 | continue; 74 | } 75 | fprintf(stdout, " %d %g %g %g %g %d %d\n", 76 | p.ID, p.pos.x, p.pos.y, p.pos.z, p.density, 77 | #ifdef DENSDM 78 | 0, 79 | #else 80 | 1, 81 | #endif 82 | p.nnb); 83 | 84 | } 85 | ngb_mean *= 1.0/(float)N; 86 | ngb_mean2 *= 1.0/(float)N; 87 | fprintf(stderr, " imax= %d nzero= %d\n", imax, nzero); 88 | fprintf(stderr, " nmin= %d nmax= %d nmean= %g = %g\n", 89 | ngb_min, ngb_max, 90 | ngb_mean, 91 | std::sqrt(ngb_mean2 - ngb_mean*ngb_mean)); 92 | 93 | return 0; 94 | } 95 | 96 | 97 | -------------------------------------------------------------------------------- /tools/density_estimator/tipsydefs.h: -------------------------------------------------------------------------------- 1 | #ifndef TIPSYDEFS_H 2 | #define TIPSYDEFS_H 3 | 4 | #define MAXDIM 3 5 | #define forever for(;;) 6 | 7 | typedef float Real; 8 | 9 | struct gas_particle { 10 | Real mass; 11 | Real pos[MAXDIM]; 12 | Real vel[MAXDIM]; 13 | Real rho; 14 | Real temp; 15 | Real hsmooth; 16 | Real metals ; 17 | Real phi ; 18 | } ; 19 | 20 | //struct gas_particle *gas_particles; 21 | 22 | struct dark_particle { 23 | Real mass; 24 | Real pos[MAXDIM]; 25 | Real vel[MAXDIM]; 26 | Real eps; 27 | int phi ; 28 | } ; 29 | 30 | //struct dark_particle *dark_particles; 31 | 32 | struct star_particle { 33 | Real mass; 34 | Real pos[MAXDIM]; 35 | Real vel[MAXDIM]; 36 | Real metals ; 37 | Real tform ; 38 | Real eps; 39 | int phi ; 40 | } ; 41 | 42 | //struct star_particle *star_particles; 43 | 44 | struct dump { 45 | double time ; 46 | int nbodies ; 47 | int ndim ; 48 | int nsph ; 49 | int ndark ; 50 | int nstar ; 51 | } ; 52 | 53 | typedef struct dump header ; 54 | 55 | #endif 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /tools/density_estimator/wtime.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | double wtime(){ 4 | struct timeval tv; 5 | gettimeofday(&tv, NULL); 6 | return tv.tv_sec + 1.e-6 * tv.tv_usec; 7 | } 8 | -------------------------------------------------------------------------------- /tools/parallelIO/Makefile: -------------------------------------------------------------------------------- 1 | PROG=write_test 2 | OBJ=write_test.o 3 | 4 | CXX=mpicxx 5 | LD=mpicxx 6 | 7 | CXXFLAGS=-O3 -Wall 8 | 9 | 10 | all: $(PROG) 11 | 12 | $(PROG): $(OBJ) 13 | $(LD) $(LDFLAGS) -o $@ $< 14 | 15 | $(OBJS): %.cpp 16 | $(CXX) $(CXXFLAGS) -o $@ -c $< 17 | 18 | run: all 19 | mpirun -np 4 ./write_test 5000000 20 | 21 | clean: 22 | /bin/rm -f $(PROG) $(OBJ) 23 | 24 | $(OBJS): write_snapshot.h sion_write_snapshot.h 25 | -------------------------------------------------------------------------------- /tools/parallelIO/bonsaiIO.h: -------------------------------------------------------------------------------- 1 | struct BonsaiIO 2 | { 3 | BonsaiIO() { /* do some sanity tests before open */} 4 | ~BonsaiIO() { /* do some sanity tests before close */} 5 | 6 | bool isFileOpened(); 7 | bool isFileOpenedForRead(); 8 | bool isFileOpenedForWrite(); 9 | void setFileOpenedForRead(); 10 | void setFileOpenedForWrite(); 11 | 12 | template 13 | bool openFile(const std::string &fileName, const char mode, std::vector &IDList) 14 | { 15 | assert(!isFileOpened()); 16 | /* opens a file with "filename" and a mode "r" for read and "w" for write */ 17 | /* if mode is "r", the indexList is populated with particle IDs from the file 18 | * if mode is "w", the indexList must contain unique & immutable particle IDs */ 19 | if (mode == 'r') 20 | setFileOpenedForRead(); 21 | else if (mode == 'w') 22 | setFileOpenedForWrite(); 23 | else 24 | assert(0); 25 | return isFileOpened(); /* if successfull */ 26 | } 27 | 28 | template 29 | bool writeAttribute(const std::string &attributeName, const std::vector &attributeData) 30 | { 31 | assert(isFileOpenedForWrite()); 32 | assert(attrbiteData.size() == IDList.size()); 33 | /* adds particle attribute, e.g. 34 | * writeAttribute("position", positions); 35 | * writeAttribute("mass", masses); 36 | */ 37 | return true; /* if successful */ 38 | } 39 | 40 | bool closeFile() 41 | { 42 | assert(isFileOpened()); 43 | /* write file */ 44 | return true; /* if successfull */ 45 | } 46 | 47 | bool getAttributeList(std::vector &attributeNameList) 48 | { 49 | assert(isFileOpenedForRead()); 50 | /* reads attribute list from the file */ 51 | return true; 52 | } 53 | 54 | template 55 | bool readAttribute(const std::string &attributeName, std::vector &attributeData) 56 | { 57 | assert(isFileOpenedForRead()); 58 | /* returns attrbites for all particles from the list */ 59 | return true; 60 | } 61 | 62 | template 63 | bool readAttribute(const std::string &attributeName, const std::vector &IDList, std::vector &attributeData) 64 | { 65 | assert(isFileOpenedForRead()); 66 | /* returns attrbite for particles with desired IDs */ 67 | return true; 68 | } 69 | 70 | }; 71 | -------------------------------------------------------------------------------- /tools/parallelIO/sion_write_snapshot.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "tipsydefs.h" 5 | 6 | size_t sion_write_snapshot( 7 | const real4 *bodyPositions, 8 | const real4 *bodyVelocities, 9 | const int *bodyIds, 10 | const int n, 11 | const std::string &fileName, 12 | const float time, 13 | const int rank, 14 | const int nrank, 15 | const MPI_Comm &comm) 16 | { 17 | assert(0); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /tools/parallelIO/tipsydefs.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef TIPSYDEFS_H 4 | #define TIPSYDEFS_H 5 | 6 | #define MAXDIM 3 7 | #define forever for(;;) 8 | 9 | typedef float Real; 10 | 11 | struct gas_particle { 12 | Real mass; 13 | Real pos[MAXDIM]; 14 | Real vel[MAXDIM]; 15 | Real rho; 16 | Real temp; 17 | Real hsmooth; 18 | Real metals ; 19 | Real phi ; 20 | } ; 21 | 22 | //struct gas_particle *gas_particles; 23 | 24 | struct dark_particle { 25 | Real mass; 26 | Real pos[MAXDIM]; 27 | Real vel[MAXDIM]; 28 | Real eps; 29 | int phi ; 30 | } ; 31 | 32 | //struct dark_particle *dark_particles; 33 | 34 | struct star_particle { 35 | Real mass; 36 | Real pos[MAXDIM]; 37 | Real vel[MAXDIM]; 38 | Real metals ; 39 | Real tform ; 40 | Real eps; 41 | int phi ; 42 | } ; 43 | 44 | //struct star_particle *star_particles; 45 | 46 | struct dump { 47 | double time ; 48 | int nbodies ; 49 | int ndim ; 50 | int nsph ; 51 | int ndark ; 52 | int nstar ; 53 | } ; 54 | 55 | typedef struct dump header ; 56 | 57 | #endif 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /tools/parallelIO/write_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | static inline double rtc(void) 9 | { 10 | struct timeval Tvalue; 11 | double etime; 12 | struct timezone dummy; 13 | 14 | gettimeofday(&Tvalue,&dummy); 15 | etime = (double) Tvalue.tv_sec + 16 | 1.e-6*((double) Tvalue.tv_usec); 17 | return etime; 18 | } 19 | 20 | struct real4 21 | { 22 | float x,y,z,w; 23 | }; 24 | 25 | #include "write_snapshot.h" 26 | #include "sion_write_snapshot.h" 27 | 28 | int main(int argc, char * argv []) 29 | { 30 | const int n = argc > 1 ? atoi(argv[1]) : 1000000; 31 | fprintf(stderr, " -- writing %d particles -- \n", n); 32 | assert(n > 16); 33 | 34 | MPI_Init(&argc, &argv); 35 | int rank, nrank; 36 | MPI_Comm_rank (MPI_COMM_WORLD, &rank); 37 | MPI_Comm_size (MPI_COMM_WORLD, &nrank); 38 | 39 | const MPI_Comm MPI_WORKING_WORLD = MPI_COMM_WORLD; 40 | 41 | std::vector pos(n), vel(n); 42 | std::vector IDs(n); 43 | 44 | for (int i = 0; i < n ; i++) 45 | { 46 | const float fi = i; 47 | pos[i] = (real4){ fi, fi+1.0f, fi-1.0f, -fi-1.0f}; 48 | vel[i] = (real4){2.0f*fi, 2.0f*fi+1.0f, 2.0f*fi-1.0f, -2.0f*fi-1.0f}; 49 | IDs[i] = 3*i-2; 50 | } 51 | 52 | const float time = 0.125; 53 | 54 | std::string fileName; fileName.resize(256); 55 | MPI_Barrier(MPI_WORKING_WORLD); 56 | const double t0 = rtc(); 57 | 58 | #ifndef _SION_ 59 | sprintf(&fileName[0], "%s_%010.4f-%d", "naive_test", time, rank); 60 | const size_t nbytes = write_snapshot( 61 | &pos[0], &vel[0], &IDs[0], n, fileName, time, 62 | rank, nrank, MPI_WORKING_WORLD); 63 | #else 64 | sprintf(&fileName[0], "%s_%010.4f-%d", "sion_test", time, nrank); 65 | const size_t nbytes = sion_write_snapshot( 66 | &pos[0], &vel[0], &IDs[0], n, fileName, time, 67 | rank, nrank, MPI_WORKING_WORLD); 68 | #endif 69 | 70 | MPI_Barrier(MPI_WORKING_WORLD); 71 | const double t1 = rtc(); 72 | 73 | if (rank == 0) 74 | fprintf(stderr, " -- writing took %g sec -- BW= %g MB/s\n", 75 | (t1-t0), nbytes/1e6/(t1-t0)); 76 | 77 | 78 | MPI_Finalize(); 79 | } 80 | -------------------------------------------------------------------------------- /tools/postProcessTools/density/bmpVisualizer/Makefile: -------------------------------------------------------------------------------- 1 | CC = g++ 2 | CFLAGS = -Wall -O3 -funroll-loops -ffast-math 3 | 4 | 5 | GEN_IMAGE_VOXEL = gen_image_voxel.o voxel.o display.o 6 | 7 | all: gen_image_voxel 8 | 9 | 10 | *.o: *.h Makefile 11 | 12 | .cpp.o: 13 | $(CC) $(CFLAGS) -o $@ -c $< 14 | 15 | 16 | .PHONY: gen_image_voxel 17 | gen_image_voxel: $(GEN_IMAGE_VOXEL) 18 | $(CC) $(CFLAGS) -o $@ $^ $(LOADLIBS) 19 | 20 | 21 | .PHONY: clean 22 | clean: 23 | rm *.o 24 | rm *~ 25 | 26 | -------------------------------------------------------------------------------- /tools/postProcessTools/density/bmpVisualizer/color_map.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treecode/Bonsai/581fa8e70501ce85660c7eac0d61c0e5c5bece4a/tools/postProcessTools/density/bmpVisualizer/color_map.bmp -------------------------------------------------------------------------------- /tools/postProcessTools/density/bmpVisualizer/display.h: -------------------------------------------------------------------------------- 1 | #ifndef _DISPLAY_INCLUDED 2 | #define _DISPLAY_INCLUDED 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "voxel.h" 11 | 12 | 13 | 14 | using namespace std; 15 | 16 | 17 | 18 | class Display{ 19 | 20 | private: 21 | 22 | int width; // image width 23 | int height; // image height 24 | int depth; // image depth 25 | 26 | public: 27 | 28 | Voxel *voxel; 29 | 30 | Display( const int i_width, const int i_height, const int i_depth, Voxel *const i_voxel); 31 | Display( Voxel *const i_voxel); 32 | virtual ~Display(); 33 | 34 | virtual void oImageBmp( const char *ofile, const unsigned char *colormap, 35 | const int colormap_size) const; 36 | 37 | 38 | }; 39 | 40 | 41 | 42 | typedef struct BmpHeader{ 43 | 44 | unsigned int bfSize; 45 | unsigned short bfReserved1; 46 | unsigned short bfReserved2; 47 | unsigned int bfOffBits; 48 | 49 | unsigned int biSize; 50 | unsigned int biWidth; 51 | unsigned int biHeight; 52 | unsigned short biPlanes; 53 | unsigned short biBitCount; 54 | unsigned int biCompression; 55 | unsigned int biSizeImage; 56 | unsigned int biXPixPerMeter; 57 | unsigned int biYPixPerMeter; 58 | unsigned int biClrUsed; 59 | unsigned int biClrImporant; 60 | 61 | }BmpHeader, *pBmpHeader; 62 | 63 | void outputBmp(const int width, const int height, 64 | unsigned char *color_array, const char *ofile); 65 | void readBmp( const char *infile, 66 | unsigned char *color_array, 67 | int &width, int &height); 68 | 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /tools/postProcessTools/density/bmpVisualizer/gen_image_voxel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * By Tomoaki Ishiyama 3 | * 4 | * June 3, edit by JB compatible with Bonsai output 5 | * 6 | */ 7 | 8 | 9 | #include 10 | #include 11 | 12 | #include "voxel.h" 13 | #include "display.h" 14 | 15 | 16 | using namespace std; 17 | 18 | const int CHARMAX = 256; 19 | 20 | 21 | int main( int argc, char **argv){ 22 | 23 | int nfile = 1; 24 | char inputfile[CHARMAX]; 25 | char outputfile[CHARMAX]; 26 | char outputfile2[CHARMAX]; 27 | char colormap_file[CHARMAX]; 28 | 29 | fprintf(stderr,"Usage: %s inputfile outputfile color_map_file \n", argv[0]); 30 | 31 | 32 | if(argc != 4 ) 33 | { 34 | exit(0); 35 | } 36 | 37 | sprintf(inputfile,"%s", argv[1]); 38 | sprintf(outputfile,"%s", argv[2]); 39 | sprintf(colormap_file,"%s", argv[3]); 40 | 41 | cerr << "Processing: " << inputfile << std::endl; 42 | 43 | int csize = 256; 44 | int cmap_size = csize * csize * 3; 45 | unsigned char *colormap_array = new unsigned char[cmap_size]; 46 | int cwidth, cheight; 47 | readBmp( colormap_file, colormap_array, cwidth, cheight); 48 | assert( cwidth == csize); 49 | assert( cheight == csize); 50 | 51 | //Create top view 52 | { 53 | Voxel *voxel = new Voxel( nfile, inputfile, 0 ); 54 | 55 | voxel->convertLinear( 0, 60000.0); 56 | voxel->convertLinear2( 0, 3000.0); 57 | voxel->convertLog( 0, 0); 58 | voxel->convertLinear( 0, 255.0); 59 | voxel->convertLinear2( 0, 255.0); 60 | 61 | Display *display = new Display( voxel); 62 | sprintf(outputfile2, "%s-top.bmp", outputfile); 63 | display->oImageBmp( outputfile2, colormap_array, csize); 64 | delete display; 65 | delete voxel; 66 | } 67 | 68 | //Create front view 69 | { 70 | Voxel *voxel = new Voxel( nfile, inputfile, 2 ); 71 | 72 | voxel->convertLinear( 0, 60000.0); 73 | voxel->convertLinear2( 0, 3000.0); 74 | voxel->convertLog( 0, 0); 75 | voxel->convertLinear( 0, 255.0); 76 | voxel->convertLinear2( 0, 255.0); 77 | 78 | Display *display = new Display( voxel); 79 | sprintf(outputfile2, "%s-front.bmp", outputfile); 80 | display->oImageBmp( outputfile2, colormap_array, csize); 81 | delete display; 82 | delete voxel; 83 | } 84 | 85 | 86 | 87 | 88 | } 89 | 90 | 91 | -------------------------------------------------------------------------------- /tools/postProcessTools/density/bmpVisualizer/voxel.h: -------------------------------------------------------------------------------- 1 | #ifndef _VOXEL_INCLUDED 2 | #define _VOXEL_INCLUDED 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | using namespace std; 13 | 14 | 15 | class Voxel{ 16 | 17 | public: 18 | 19 | int xvoxel; 20 | int yvoxel; 21 | int zvoxel; 22 | int nvoxel; 23 | 24 | double *val; // value of voxel 25 | double *val2; // value of voxel 26 | 27 | 28 | Voxel(); 29 | Voxel( const int nfile, const char *fname, const int idxOffset); 30 | virtual ~Voxel(); 31 | virtual void initVoxel(); 32 | virtual void resetVoxel(); 33 | virtual int getVoxelIndex( const int x, const int y, const int z) const; 34 | 35 | virtual double getMaxVal() const; 36 | virtual double getMinVal() const; 37 | virtual double getMaxVal2() const; 38 | virtual double getMinVal2() const; 39 | virtual void convertLinear( const double left, const double right); 40 | virtual void convertLinear2( const double left, const double right); 41 | virtual void convertLog( const double left, const double right); 42 | 43 | 44 | double & operator ()(int x, int y, int z); 45 | 46 | 47 | }; 48 | 49 | 50 | 51 | inline int Voxel::getVoxelIndex( const int x, const int y, const int z) const{ 52 | return x*yvoxel*zvoxel + y*zvoxel + z; 53 | } 54 | 55 | 56 | 57 | inline double & Voxel::operator ()(int x, int y, int z){ 58 | return val[z+zvoxel*(y+yvoxel*x)]; 59 | //return val[x+xvoxel*y]; 60 | } 61 | 62 | 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /tools/postProcessTools/density/convDensBinToAscii.cpp: -------------------------------------------------------------------------------- 1 | // g++ convDensBinToAscii.cpp -O3 convDensBinToAscii 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | int main(int argc, char** argv) 9 | { 10 | if(argc != 3) 11 | { 12 | fprintf(stderr,"Usage: %s infile outfile\n",argv[0]); 13 | exit(0); 14 | } 15 | 16 | FILE *fin = fopen( argv[1], "rb"); 17 | FILE *fout = fopen( argv[2], "w"); 18 | 19 | if(fin == NULL) 20 | { 21 | printf("Failed to open file: %s \n", argv[1]); 22 | exit(0); 23 | } 24 | if(fout == NULL) 25 | { 26 | printf("Failed to open file: %s \n", argv[2]); 27 | exit(0); 28 | } 29 | 30 | int header[4]; double range[6]; 31 | fread( header, sizeof(int), 4, fin); 32 | fread( range, sizeof(double), 6, fin); 33 | if( header[0] != -1){ 34 | cerr << "this format can not be read" << endl; 35 | } 36 | int nx = header[1]; int ny = header[2]; int nz = header[3]; 37 | 38 | int nynz = nx * ny; 39 | cerr << nx << "\t" << ny << "\t" << nz << endl; 40 | cerr << range[0] << "\t" << range[1] << "\t" 41 | << range[2] << "\t" << range[3] << "\t" 42 | << range[4] << "\t" << range[5] << endl; 43 | 44 | 45 | typedef struct pack_f5{ 46 | float f[5]; 47 | }pack_f5; 48 | pack_f5 *voxel_part = new pack_f5[nynz]; 49 | float tempF = 0.0; 50 | 51 | fprintf(fout, "# Time %f \n", range[0]); 52 | fprintf(fout, "# X Y DTop DVTop DFront DVFront Rphi\n"); 53 | 54 | fread( voxel_part, sizeof(pack_f5), nynz, fin); //Read the density xy,vxy,xz,vxz 55 | 56 | for( int k=0; k 2 | #include "read_tipsy.h" 3 | 4 | 5 | int main(int argc, char * argv[]) 6 | { 7 | 8 | 9 | MPI_Comm comm = MPI_COMM_WORLD; 10 | 11 | MPI_Init(&argc, &argv); 12 | 13 | int nranks, rank; 14 | MPI_Comm_size(comm, &nranks); 15 | MPI_Comm_rank(comm, &rank); 16 | 17 | 18 | if (argc < 3) 19 | { 20 | if (rank == 0) 21 | { 22 | fprintf(stderr, " ------------------------------------------------------------------------\n"); 23 | fprintf(stderr, " Usage: \n"); 24 | fprintf(stderr, " %s baseName nDomains reduceFactorFirst[1] reduceFactorSecond[1]\n", argv[0]); 25 | fprintf(stderr, " ------------------------------------------------------------------------\n"); 26 | } 27 | exit(-1); 28 | } 29 | 30 | const std::string baseName(argv[1]); 31 | const int nDomains = atoi(argv[2]); 32 | 33 | int reduceFactorFirst = 1; 34 | int reduceFactorSecond = 1; 35 | 36 | if (argc > 3) 37 | reduceFactorFirst = atoi(argv[3]); 38 | if (argc > 4) 39 | reduceFactorSecond = atoi(argv[4]); 40 | 41 | if (rank == 0) 42 | { 43 | fprintf(stderr, " reduceFactorFirst= %d\n", reduceFactorFirst); 44 | fprintf(stderr, " reduceFactorSecond= %d\n", reduceFactorSecond); 45 | } 46 | 47 | #if 0 48 | reduceFactorFirst = 10; 49 | reduceFactorSecond = 2; 50 | #endif 51 | 52 | ReadTipsy data( 53 | baseName, 54 | rank, nranks, 55 | nDomains, 56 | reduceFactorFirst, 57 | reduceFactorSecond); 58 | 59 | long long nFirstLocal = data.firstID.size(); 60 | long long nSecondLocal = data.secondID.size(); 61 | 62 | long long nFirst, nSecond; 63 | MPI_Allreduce(&nFirstLocal, &nFirst, 1, MPI_LONG, MPI_SUM, comm); 64 | MPI_Allreduce(&nSecondLocal, &nSecond, 1, MPI_LONG, MPI_SUM, comm); 65 | 66 | if (rank == 0) 67 | { 68 | fprintf(stderr, " nFirst = %lld \n", nFirst); 69 | fprintf(stderr, " nSecond= %lld \n", nSecond); 70 | fprintf(stderr, " nTotal= %lld \n", nFirst + nSecond); 71 | } 72 | 73 | 74 | MPI_Finalize(); 75 | 76 | 77 | 78 | 79 | return 0; 80 | } 81 | 82 | 83 | -------------------------------------------------------------------------------- /tools/readSnap/tipsydefs.h: -------------------------------------------------------------------------------- 1 | #ifndef TIPSYDEFS_H 2 | #define TIPSYDEFS_H 3 | 4 | #define MAXDIM 3 5 | #define forever for(;;) 6 | 7 | typedef float Real; 8 | 9 | struct gas_particle { 10 | Real mass; 11 | Real pos[MAXDIM]; 12 | Real vel[MAXDIM]; 13 | Real rho; 14 | Real temp; 15 | Real hsmooth; 16 | Real metals ; 17 | Real phi ; 18 | } ; 19 | 20 | //struct gas_particle *gas_particles; 21 | 22 | struct dark_particle { 23 | Real mass; 24 | Real pos[MAXDIM]; 25 | Real vel[MAXDIM]; 26 | Real eps; 27 | int phi ; 28 | public: 29 | int getID() const {return phi;} 30 | void setID(int ID) { phi = ID; } 31 | } ; 32 | 33 | struct star_particle { 34 | Real mass; 35 | Real pos[MAXDIM]; 36 | Real vel[MAXDIM]; 37 | Real metals ; 38 | Real tform ; 39 | Real eps; 40 | int phi ; 41 | public: 42 | int getID() const {return phi;} 43 | void setID(int ID) { phi = ID; } 44 | } ; 45 | 46 | 47 | //V2 structures use 64 bit integers for particle storage 48 | //otherwise they take up the same space for compatibility 49 | 50 | struct dark_particleV2 { 51 | Real mass; 52 | Real pos[MAXDIM]; 53 | Real vel[MAXDIM]; 54 | private: 55 | int _ID[2]; //replaces phi and eps 56 | public: 57 | unsigned long long getID() const {return *(unsigned long long*)_ID;} 58 | void setID(unsigned long long ID) { *(unsigned long long*)_ID = ID; } 59 | int getID_V1() const {return _ID[1];} 60 | // Real eps; 61 | } ; 62 | struct star_particleV2 { 63 | Real mass; 64 | Real pos[MAXDIM]; 65 | Real vel[MAXDIM]; 66 | Real metals ; 67 | Real tform ; 68 | private: 69 | int _ID[2]; //replaces phi and eps 70 | public: 71 | unsigned long long getID() const {return *(unsigned long long*)_ID;} 72 | void setID(unsigned long long ID) { *(unsigned long long*)_ID = ID; } 73 | int getID_V1() const {return _ID[1];} 74 | // Real eps; 75 | // int ID; //replaces phi and eps 76 | } ; 77 | 78 | 79 | struct dump { 80 | double time ; 81 | int nbodies ; 82 | int ndim ; 83 | int nsph ; 84 | int ndark ; 85 | int nstar ; 86 | } ; 87 | 88 | struct dumpV2 { 89 | double time ; 90 | int nbodies ; 91 | int ndim ; 92 | int nsph ; 93 | int ndark ; 94 | int nstar ; 95 | int version; 96 | } ; 97 | 98 | 99 | typedef struct dump header ; 100 | 101 | #endif 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /tools/snapServe/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(bonsai_snapserve) 3 | 4 | 5 | if (BONSAI_CATALYST_OSX_GCC) 6 | # 7 | # special mpi for JB's GCC compile on mac 8 | # can be ignored as it is only used if -DBONSAI_CATALYST_OSX_GCC 9 | # is set on command line 10 | # 11 | set(MPI_ROOT /Users/biddisco/apps/mpich-3.1) 12 | set(MPI_DIR /Users/biddisco/apps/mpich-3.1) 13 | set(MPI_C_INCLUDE_PATH /Users/biddisco/apps/mpich-3.1/include) 14 | set(MPI_C_LIBRARIES /Users/biddisco/apps/mpich-3.1/lib/libmpich.dylib) 15 | set(MPI_CXX_INCLUDE_PATH /Users/biddisco/apps/mpich-3.1/include) 16 | set(MPI_CXX_LIBRARIES /Users/biddisco/apps/mpich-3.1/lib/libmpichcxx.dylib;/Users/biddisco/apps/mpich-3.1/lib/libp) 17 | set(MPI_C_COMPILER /usr/local/bin/mpicc) 18 | set(MPI_EXTRA_LIBRARY /Users/biddisco/apps/mpich-3.1/lib/libpmpich.dylib;/Users/biddisco/apps/mpich-3.1/lib/libmpi) 19 | set(MPI_LIBRARY /Users/biddisco/apps/mpich-3.1/lib/libmpichcxx.dylib) 20 | else() 21 | # 22 | # Don't need this if using mpicxx 23 | # 24 | find_package(MPI) 25 | endif() 26 | 27 | include_directories(${MPI_C_INCLUDE_PATH}) 28 | 29 | # 30 | # Flags to get code compiling with clang on mac 31 | # 32 | message("compiler is ${CMAKE_CXX_COMPILER_ID}") 33 | if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") 34 | add_definitions(-DBONSAI_CATALYST_CLANG) 35 | else() 36 | add_definitions("-Wno-literal-suffix") # FIX for OpenMPI with gcc491 37 | add_definitions("-Wno-main") # Do not generate warrning on main 38 | add_definitions("-Werror") 39 | add_definitions("-fPIE") 40 | endif() 41 | 42 | # 43 | # Make sure c++11 is selected, but don't clobber user flags 44 | # if they have been set by hand. Assume user knows what they're doing 45 | # 46 | if ("${CMAKE_CXX_FLAGS}" STREQUAL "") 47 | set(CMAKE_CXX_FLAGS "-std=c++11" CACHE STRING "CXX flags" FORCE) 48 | endif() 49 | 50 | # 51 | # source files 52 | # 53 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../common) 54 | set(SRC_FILES 55 | main.cpp 56 | ) 57 | 58 | add_library(bonsai_tools_common 59 | ${CMAKE_CURRENT_SOURCE_DIR}/../common/anyoption.cpp 60 | ) 61 | 62 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 63 | 64 | # 65 | # exe 66 | # 67 | add_executable(snapServe ${SRC_FILES}) 68 | target_link_libraries(snapServe bonsai_tools_common ${MPI_C_LIBRARIES} -lrt -rdynamic -pie) 69 | -------------------------------------------------------------------------------- /tools/snapServe/filelist.example.txt: -------------------------------------------------------------------------------- 1 | /home/evghenii/data/MWM31-00500-rhoh.bonsai 2 | /home/evghenii/data/1_10_HR2_6_R500_SNAP300-rhoh.bonsai 3 | /home/evghenii/data/1_10_HR2_6_R500_SNAP300-rhoh.bonsai 4 | /home/evghenii/data/MWM31-00500-rhoh.bonsai 5 | -------------------------------------------------------------------------------- /tools/tarScripts/README: -------------------------------------------------------------------------------- 1 | This two script prepare data for moving to tapes: 2 | 3 | 1: first sort data into time-steps: 4 | $ python tarScripts/sortSnapshots.py path_to_snapshots_produced_by_bonsai snapshot_prefix 5 | e.g. 6 | $ python ../tarScripts/sortSnapshots.py ./ MW 7 | 8 | 2: Tar files: 9 | $ python tarScripts/tarSnapshots.py path_to_snapshots_produced_by_sortSnapshots split_size_in_GB 10 | e.g. 11 | $ python tarScripts/tarSnapshots.py ./MW 200 12 | 13 | 3: copy folder ./MW.tarball to tapes 14 | 15 | 16 | To unpack: 17 | $ cd ./MW.tarball 18 | $ xf 00000.0000/archive.tar -F ./newVolumeScript.sh 19 | Preparing volume 2 of 00000.0000/archive.tar. 20 | Preparing volume 3 of 00000.0000/archive.tar-2. 21 | Preparing volume 4 of 00000.0000/archive.tar-3. 22 | Preparing volume 5 of 00000.0000/archive.tar-4. 23 | .. 24 | $ rm -rf 00000.0000/archive.tar* [to remove archive files and create free space] 25 | 26 | 27 | Note: newVolumeScript is generated by tarSnapshots.py inside ./MW.tarball 28 | 29 | 30 | -------------------------------------------------------------------------------- /tools/tarScripts/sortSnapshots.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | import sys 4 | import commands 5 | 6 | def dPrint(line): 7 | print(line); 8 | return 0; 9 | 10 | def execCommand(command): 11 | result = commands.getstatusoutput(command) 12 | if result[0] != 0: 13 | print("Error occured while executing system command %s " % command); 14 | print("status= %d:" % result[0]) 15 | print("error = %s:" % result[1]) 16 | sys.exit(-1) 17 | 18 | return result 19 | 20 | 21 | def countNumProcs(prefix): 22 | firstFile = prefix+"00000.0000-*" 23 | command = "ls %s" % (firstFile) 24 | dPrint(command) 25 | 26 | result = execCommand(command) 27 | status = result[0] 28 | output = result[1]; 29 | 30 | if status != 0: 31 | print("Please check prefix, currently using"); 32 | print("status= %d: error= %s" % (status, output)) 33 | sys.exit(-1) 34 | 35 | output = result[1].split('\n') 36 | 37 | return len(output) 38 | 39 | def countNumSnapshots(prefix): 40 | firstFile = prefix+"*-0" 41 | command = "ls %s" % (firstFile) 42 | dPrint(command) 43 | 44 | result = execCommand(command) 45 | status = result[0] 46 | output = result[1]; 47 | 48 | output = result[1].replace("-0","").replace(prefix,"").split('\n') 49 | dPrint(output) 50 | 51 | return output 52 | 53 | 54 | def usage(appName, exitCode): 55 | print "Usage: " 56 | print "%s path_to_snapshots prefix_of_a_snapshot" % appName 57 | print " --- Example: " 58 | print "%s MilkyWayPD MW" 59 | sys.exit(exitCode) 60 | 61 | 62 | 63 | 64 | if __name__ == "__main__": 65 | 66 | if len(sys.argv) < 3: 67 | usage(sys.argv[0], -1) 68 | 69 | path=sys.argv[1] 70 | prefix0=sys.argv[2]; 71 | 72 | prefix = path + "/" + prefix0 + "_"; 73 | 74 | dPrint(prefix0); 75 | dPrint(prefix); 76 | 77 | nProc = countNumProcs(prefix) 78 | print "Found snapshots for %d procs " % nProc 79 | 80 | snapshots = countNumSnapshots(prefix) 81 | print "Found %d snapshots per proc " % len(snapshots) 82 | 83 | mkFolder = "mkdir %s" % path + "/" + prefix0 84 | result = execCommand(mkFolder) 85 | 86 | 87 | for snap in snapshots: 88 | destFolder = path + "/" + prefix0 + "/" + snap 89 | mkFolder = "mkdir %s" % destFolder 90 | print mkFolder 91 | execCommand(mkFolder) 92 | for proc in range(0,nProc): 93 | srcFile = prefix+snap+("-%d" % proc) 94 | # dstFile = destFolder + "/" + ("%06d" % proc) 95 | dstFile = destFolder + "/" + prefix0+"_"+snap + ("-%d" % proc) 96 | mvFile = "mv %s %s" % (srcFile, dstFile) 97 | print mvFile 98 | execCommand(mvFile) 99 | 100 | 101 | 102 | 103 | 104 | #for arg in sys.argv: 105 | # print arg; 106 | --------------------------------------------------------------------------------