├── ver0 ├── types.hpp ├── main.cpp ├── Particle.hpp ├── Makefile ├── cpu_time.hpp ├── GSimulation.hpp └── GSimulation.cpp ├── ver1 ├── types.hpp ├── main.cpp ├── Particle.hpp ├── cpu_time.hpp ├── Makefile ├── GSimulation.hpp └── GSimulation.cpp ├── ver3 ├── types.hpp ├── main.cpp ├── cpu_time.hpp ├── Makefile ├── Particle.hpp ├── GSimulation.hpp ├── GSimulation.cpp └── GSimulation-moveout.cpp ├── ver4 ├── types.hpp ├── main.cpp ├── cpu_time.hpp ├── Makefile ├── Particle.hpp ├── GSimulation.hpp ├── GSimulation.cpp └── GSimulation-moveout.cpp ├── ver5 ├── types.hpp ├── main.cpp ├── cpu_time.hpp ├── Makefile ├── Particle.hpp ├── GSimulation.hpp └── GSimulation.cpp ├── ver6 ├── types.hpp ├── main.cpp ├── cpu_time.hpp ├── Makefile ├── Particle.hpp ├── GSimulation.hpp └── GSimulation.cpp ├── ver7 ├── types.hpp ├── main.cpp ├── cpu_time.hpp ├── Makefile ├── Particle.hpp ├── GSimulation.hpp └── GSimulation.cpp ├── ver8 ├── types.hpp ├── main.cpp ├── cpu_time.hpp ├── Makefile ├── Particle.hpp ├── GSimulation.hpp └── GSimulation.cpp ├── ver2 ├── types.hpp ├── main.cpp ├── Particle.hpp ├── cpu_time.hpp ├── Makefile ├── GSimulation.hpp └── GSimulation.cpp └── README.md /ver0/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | typedef float real_type; -------------------------------------------------------------------------------- /ver1/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | typedef float real_type; -------------------------------------------------------------------------------- /ver3/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | typedef float real_type; -------------------------------------------------------------------------------- /ver4/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | typedef float real_type; -------------------------------------------------------------------------------- /ver5/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | typedef float real_type; -------------------------------------------------------------------------------- /ver6/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | typedef float real_type; -------------------------------------------------------------------------------- /ver7/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | typedef float real_type; -------------------------------------------------------------------------------- /ver8/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | typedef float real_type; -------------------------------------------------------------------------------- /ver2/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | typedef float real_type; 22 | -------------------------------------------------------------------------------- /ver0/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include 22 | 23 | #include "GSimulation.hpp" 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int N; //number of particles 28 | int nstep; //number ot integration steps 29 | 30 | GSimulation sim; 31 | 32 | if(argc>1) 33 | { 34 | N=atoi(argv[1]); 35 | sim.set_number_of_particles(N); 36 | if(argc==3) 37 | { 38 | nstep=atoi(argv[2]); 39 | sim.set_number_of_steps(nstep); 40 | } 41 | } 42 | 43 | sim.start(); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /ver1/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include 22 | 23 | #include "GSimulation.hpp" 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int N; //number of particles 28 | int nstep; //number ot integration steps 29 | 30 | GSimulation sim; 31 | 32 | if(argc>1) 33 | { 34 | N=atoi(argv[1]); 35 | sim.set_number_of_particles(N); 36 | if(argc==3) 37 | { 38 | nstep=atoi(argv[2]); 39 | sim.set_number_of_steps(nstep); 40 | } 41 | } 42 | 43 | sim.start(); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /ver2/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include 22 | 23 | #include "GSimulation.hpp" 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int N; //number of particles 28 | int nstep; //number ot integration steps 29 | 30 | GSimulation sim; 31 | 32 | if(argc>1) 33 | { 34 | N=atoi(argv[1]); 35 | sim.set_number_of_particles(N); 36 | if(argc==3) 37 | { 38 | nstep=atoi(argv[2]); 39 | sim.set_number_of_steps(nstep); 40 | } 41 | } 42 | 43 | sim.start(); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /ver3/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include 22 | 23 | #include "GSimulation.hpp" 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int N; //number of particles 28 | int nstep; //number ot integration steps 29 | 30 | GSimulation sim; 31 | 32 | if(argc>1) 33 | { 34 | N=atoi(argv[1]); 35 | sim.set_number_of_particles(N); 36 | if(argc==3) 37 | { 38 | nstep=atoi(argv[2]); 39 | sim.set_number_of_steps(nstep); 40 | } 41 | } 42 | 43 | sim.start(); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /ver4/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include 22 | 23 | #include "GSimulation.hpp" 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int N; //number of particles 28 | int nstep; //number ot integration steps 29 | 30 | GSimulation sim; 31 | 32 | if(argc>1) 33 | { 34 | N=atoi(argv[1]); 35 | sim.set_number_of_particles(N); 36 | if(argc==3) 37 | { 38 | nstep=atoi(argv[2]); 39 | sim.set_number_of_steps(nstep); 40 | } 41 | } 42 | 43 | sim.start(); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /ver5/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include 22 | 23 | #include "GSimulation.hpp" 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int N; //number of particles 28 | int nstep; //number ot integration steps 29 | 30 | GSimulation sim; 31 | 32 | if(argc>1) 33 | { 34 | N=atoi(argv[1]); 35 | sim.set_number_of_particles(N); 36 | if(argc==3) 37 | { 38 | nstep=atoi(argv[2]); 39 | sim.set_number_of_steps(nstep); 40 | } 41 | } 42 | 43 | sim.start(); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /ver6/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include 22 | 23 | #include "GSimulation.hpp" 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int N; //number of particles 28 | int nstep; //number ot integration steps 29 | 30 | GSimulation sim; 31 | 32 | if(argc>1) 33 | { 34 | N=atoi(argv[1]); 35 | sim.set_number_of_particles(N); 36 | if(argc==3) 37 | { 38 | nstep=atoi(argv[2]); 39 | sim.set_number_of_steps(nstep); 40 | } 41 | } 42 | 43 | sim.start(); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /ver7/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include 22 | 23 | #include "GSimulation.hpp" 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int N; //number of particles 28 | int nstep; //number ot integration steps 29 | 30 | GSimulation sim; 31 | 32 | if(argc>1) 33 | { 34 | N=atoi(argv[1]); 35 | sim.set_number_of_particles(N); 36 | if(argc==3) 37 | { 38 | nstep=atoi(argv[2]); 39 | sim.set_number_of_steps(nstep); 40 | } 41 | } 42 | 43 | sim.start(); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /ver8/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include 22 | 23 | #include "GSimulation.hpp" 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int N; //number of particles 28 | int nstep; //number ot integration steps 29 | 30 | GSimulation sim; 31 | 32 | if(argc>1) 33 | { 34 | N=atoi(argv[1]); 35 | sim.set_number_of_particles(N); 36 | if(argc==3) 37 | { 38 | nstep=atoi(argv[2]); 39 | sim.set_number_of_steps(nstep); 40 | } 41 | } 42 | 43 | sim.start(); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /ver0/Particle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _PARTICLE_HPP 22 | #define _PARTICLE_HPP 23 | #include 24 | #include "types.hpp" 25 | 26 | struct Particle 27 | { 28 | public: 29 | Particle() { init();} 30 | void init() 31 | { 32 | pos[0] = 0.; pos[1] = 0.; pos[2] = 0.; 33 | vel[0] = 0.; vel[1] = 0.; vel[2] = 0.; 34 | acc[0] = 0.; acc[1] = 0.; acc[2] = 0.; 35 | mass = 0.; 36 | } 37 | real_type pos[3]; 38 | real_type vel[3]; 39 | real_type acc[3]; 40 | real_type mass; 41 | }; 42 | 43 | #endif -------------------------------------------------------------------------------- /ver1/Particle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _PARTICLE_HPP 22 | #define _PARTICLE_HPP 23 | #include 24 | #include "types.hpp" 25 | 26 | struct Particle 27 | { 28 | public: 29 | Particle() { init();} 30 | void init() 31 | { 32 | pos[0] = 0.; pos[1] = 0.; pos[2] = 0.; 33 | vel[0] = 0.; vel[1] = 0.; vel[2] = 0.; 34 | acc[0] = 0.; acc[1] = 0.; acc[2] = 0.; 35 | mass = 0.; 36 | } 37 | real_type pos[3]; 38 | real_type vel[3]; 39 | real_type acc[3]; 40 | real_type mass; 41 | }; 42 | 43 | #endif -------------------------------------------------------------------------------- /ver2/Particle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _PARTICLE_HPP 22 | #define _PARTICLE_HPP 23 | #include 24 | #include "types.hpp" 25 | 26 | struct Particle 27 | { 28 | public: 29 | Particle() { init();} 30 | void init() 31 | { 32 | pos[0] = 0.; pos[1] = 0.; pos[2] = 0.; 33 | vel[0] = 0.; vel[1] = 0.; vel[2] = 0.; 34 | acc[0] = 0.; acc[1] = 0.; acc[2] = 0.; 35 | mass = 0.; 36 | } 37 | real_type pos[3]; 38 | real_type vel[3]; 39 | real_type acc[3]; 40 | real_type mass; 41 | }; 42 | 43 | #endif -------------------------------------------------------------------------------- /ver0/Makefile: -------------------------------------------------------------------------------- 1 | CXX = icpc 2 | COMPFLAGS = -g -std=c++11 -O2 3 | INCLUDES = 4 | 5 | CXXFLAGS = $(COMPFLAGS) 6 | 7 | SOURCES = GSimulation.cpp main.cpp 8 | 9 | ADVPRJ = "./adv-ver0" 10 | 11 | ifeq ($(REPORT), yes) 12 | CXXFLAGS+=-qopt-report=5 13 | ifeq ($(FILTER), yes) 14 | CXXFLAGS+=-qopt-report-phase=vec -qopt-report-filter="GSimulation.cpp,125-175" 15 | endif 16 | endif 17 | 18 | .SUFFIXES: .o .cpp 19 | 20 | ########################################## 21 | OBJSC = $(SOURCES:.cpp=.o) 22 | ########################################## 23 | 24 | EXEC=nbody.x 25 | 26 | all: cpu 27 | 28 | %.o: %.cpp 29 | $(info ) 30 | $(info Compiling the object file for CPU: ) 31 | $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 32 | 33 | cpu: $(OBJSC) 34 | $(info ) 35 | $(info Linking the CPU executable:) 36 | $(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC) 37 | 38 | run: 39 | $(info ) 40 | $(info Run the default test case on CPU: ) 41 | ./nbody.x 42 | 43 | clean: 44 | rm -f $(OBJSC) nbody.x *.optrpt 45 | 46 | #---------------------------------------------------------------- 47 | #---------- Intel Advisor Analysis ------------------------------ 48 | #---------------------------------------------------------------- 49 | 50 | survey: 51 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 52 | 53 | roofline: 54 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 55 | advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x 56 | 57 | open-gui: 58 | advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 & 59 | 60 | clean-results: 61 | rm -rf $(ADVPRJ) 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /ver0/cpu_time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _CPUTIME_HPP 22 | #define _CPUTIME_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer. 29 | 30 | class CPUTime { 31 | private: 32 | double wctime; 33 | 34 | inline double readTime() 35 | { 36 | struct timeval tp; 37 | 38 | gettimeofday(&tp,NULL); 39 | wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6; 40 | return wctime; 41 | } 42 | public: 43 | CPUTime() : wctime(0.0) { } 44 | 45 | inline double start() { return readTime(); } 46 | inline double stop() { return readTime(); } 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /ver1/cpu_time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _CPUTIME_HPP 22 | #define _CPUTIME_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer. 29 | 30 | class CPUTime { 31 | private: 32 | double wctime; 33 | 34 | inline double readTime() 35 | { 36 | struct timeval tp; 37 | 38 | gettimeofday(&tp,NULL); 39 | wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6; 40 | return wctime; 41 | } 42 | public: 43 | CPUTime() : wctime(0.0) { } 44 | 45 | inline double start() { return readTime(); } 46 | inline double stop() { return readTime(); } 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /ver2/cpu_time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _CPUTIME_HPP 22 | #define _CPUTIME_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer. 29 | 30 | class CPUTime { 31 | private: 32 | double wctime; 33 | 34 | inline double readTime() 35 | { 36 | struct timeval tp; 37 | 38 | gettimeofday(&tp,NULL); 39 | wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6; 40 | return wctime; 41 | } 42 | public: 43 | CPUTime() : wctime(0.0) { } 44 | 45 | inline double start() { return readTime(); } 46 | inline double stop() { return readTime(); } 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /ver3/cpu_time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _CPUTIME_HPP 22 | #define _CPUTIME_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer. 29 | 30 | class CPUTime { 31 | private: 32 | double wctime; 33 | 34 | inline double readTime() 35 | { 36 | struct timeval tp; 37 | 38 | gettimeofday(&tp,NULL); 39 | wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6; 40 | return wctime; 41 | } 42 | public: 43 | CPUTime() : wctime(0.0) { } 44 | 45 | inline double start() { return readTime(); } 46 | inline double stop() { return readTime(); } 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /ver4/cpu_time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _CPUTIME_HPP 22 | #define _CPUTIME_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer. 29 | 30 | class CPUTime { 31 | private: 32 | double wctime; 33 | 34 | inline double readTime() 35 | { 36 | struct timeval tp; 37 | 38 | gettimeofday(&tp,NULL); 39 | wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6; 40 | return wctime; 41 | } 42 | public: 43 | CPUTime() : wctime(0.0) { } 44 | 45 | inline double start() { return readTime(); } 46 | inline double stop() { return readTime(); } 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /ver5/cpu_time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _CPUTIME_HPP 22 | #define _CPUTIME_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer. 29 | 30 | class CPUTime { 31 | private: 32 | double wctime; 33 | 34 | inline double readTime() 35 | { 36 | struct timeval tp; 37 | 38 | gettimeofday(&tp,NULL); 39 | wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6; 40 | return wctime; 41 | } 42 | public: 43 | CPUTime() : wctime(0.0) { } 44 | 45 | inline double start() { return readTime(); } 46 | inline double stop() { return readTime(); } 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /ver6/cpu_time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _CPUTIME_HPP 22 | #define _CPUTIME_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer. 29 | 30 | class CPUTime { 31 | private: 32 | double wctime; 33 | 34 | inline double readTime() 35 | { 36 | struct timeval tp; 37 | 38 | gettimeofday(&tp,NULL); 39 | wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6; 40 | return wctime; 41 | } 42 | public: 43 | CPUTime() : wctime(0.0) { } 44 | 45 | inline double start() { return readTime(); } 46 | inline double stop() { return readTime(); } 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /ver7/cpu_time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _CPUTIME_HPP 22 | #define _CPUTIME_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer. 29 | 30 | class CPUTime { 31 | private: 32 | double wctime; 33 | 34 | inline double readTime() 35 | { 36 | struct timeval tp; 37 | 38 | gettimeofday(&tp,NULL); 39 | wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6; 40 | return wctime; 41 | } 42 | public: 43 | CPUTime() : wctime(0.0) { } 44 | 45 | inline double start() { return readTime(); } 46 | inline double stop() { return readTime(); } 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /ver8/cpu_time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _CPUTIME_HPP 22 | #define _CPUTIME_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer. 29 | 30 | class CPUTime { 31 | private: 32 | double wctime; 33 | 34 | inline double readTime() 35 | { 36 | struct timeval tp; 37 | 38 | gettimeofday(&tp,NULL); 39 | wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6; 40 | return wctime; 41 | } 42 | public: 43 | CPUTime() : wctime(0.0) { } 44 | 45 | inline double start() { return readTime(); } 46 | inline double stop() { return readTime(); } 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /ver1/Makefile: -------------------------------------------------------------------------------- 1 | CXX = icpc 2 | COMPFLAGS = -g -std=c++11 -O2 3 | #OPTFLAGS = -xCORE-AVX2 4 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high 5 | 6 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175" 7 | INCLUDES = 8 | 9 | ADVPRJ = "./adv-ver2" 10 | 11 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS) 12 | 13 | SOURCES = GSimulation.cpp main.cpp 14 | 15 | .SUFFIXES: .o .cpp 16 | 17 | ########################################## 18 | OBJSC = $(SOURCES:.cpp=.o) 19 | ########################################## 20 | 21 | EXEC=nbody.x 22 | 23 | all: clean cpu 24 | 25 | %.o: %.cpp 26 | $(info ) 27 | $(info Compiling the object file for CPU: ) 28 | $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 29 | 30 | cpu: $(OBJSC) 31 | $(info ) 32 | $(info Linking the CPU executable: ) 33 | $(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC) 34 | 35 | run: 36 | $(info ) 37 | $(info Run the default test case on CPU: ) 38 | ./nbody.x 39 | 40 | asm:GSimulation.cpp 41 | $(info) 42 | $(info Generate assembly code: ) 43 | $(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S 44 | 45 | clean: 46 | rm -f $(OBJSC) nbody.x *.optrpt *.s 47 | 48 | #---------------------------------------------------------------- 49 | #---------- Intel Advisor Analysis ------------------------------ 50 | #---------------------------------------------------------------- 51 | 52 | survey: 53 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 54 | 55 | roofline: 56 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 57 | advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x 58 | 59 | open-gui: 60 | advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 & 61 | 62 | clean-results: 63 | rm -rf $(ADVPRJ) 64 | 65 | -------------------------------------------------------------------------------- /ver2/Makefile: -------------------------------------------------------------------------------- 1 | CXX = icpc 2 | COMPFLAGS = -g -std=c++11 -O2 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high 4 | 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175" 6 | INCLUDES = 7 | 8 | ADVPRJ = "./adv-ver2" 9 | 10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS) 11 | 12 | SOURCES = GSimulation.cpp main.cpp 13 | 14 | ########################################## 15 | OBJSC = $(SOURCES:.cpp=.o) 16 | ########################################## 17 | 18 | EXEC=nbody.x 19 | 20 | all: clean cpu 21 | 22 | %.o: %.cpp 23 | $(info ) 24 | $(info Compiling the object file for CPU: ) 25 | $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 26 | 27 | cpu: $(OBJSC) 28 | $(info ) 29 | $(info Linking the CPU executable:) 30 | $(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC) 31 | 32 | run: 33 | $(info ) 34 | $(info Run the default test case on CPU: ) 35 | ./nbody.x 36 | 37 | asm:GSimulation.cpp 38 | $(info) 39 | $(info Generate assembly code: ) 40 | $(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S 41 | 42 | clean: 43 | rm -f $(OBJSC) nbody.x *.optrpt *.s 44 | 45 | 46 | #---------------------------------------------------------------- 47 | #---------- Intel Advisor Analysis ------------------------------ 48 | #---------------------------------------------------------------- 49 | 50 | survey: 51 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 52 | 53 | roofline: 54 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 55 | advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x 56 | 57 | map: 58 | advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x 59 | 60 | open-gui: 61 | advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 & 62 | 63 | clean-results: 64 | rm -rf $(ADVPRJ) 65 | 66 | -------------------------------------------------------------------------------- /ver4/Makefile: -------------------------------------------------------------------------------- 1 | CXX = icpc 2 | COMPFLAGS = -g -std=c++11 -O2 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high 4 | 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175" 6 | INCLUDES = 7 | 8 | ADVPRJ = "./adv-ver4" 9 | 10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS) 11 | 12 | SOURCES = GSimulation.cpp main.cpp 13 | 14 | 15 | ########################################## 16 | OBJSC = $(SOURCES:.cpp=.o) 17 | ########################################## 18 | 19 | EXEC=nbody.x 20 | 21 | all: clean cpu 22 | 23 | %.o: %.cpp 24 | $(info ) 25 | $(info Compiling the object file for CPU: ) 26 | $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 27 | 28 | cpu: $(OBJSC) 29 | $(info ) 30 | $(info Linking the CPU executable:) 31 | $(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC) 32 | 33 | run: 34 | $(info ) 35 | $(info Run the default test case on CPU: ) 36 | ./nbody.x 37 | 38 | asm:GSimulation.cpp 39 | $(info) 40 | $(info Generate assembly code: ) 41 | $(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S 42 | 43 | clean: 44 | rm -f $(OBJSC) nbody.x *.optrpt *.s 45 | 46 | 47 | #---------------------------------------------------------------- 48 | #---------- Intel Advisor Analysis ------------------------------ 49 | #---------------------------------------------------------------- 50 | 51 | survey: 52 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 53 | 54 | roofline: 55 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 56 | advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x 57 | 58 | map: 59 | advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x 60 | 61 | open-gui: 62 | advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 & 63 | 64 | clean-results: 65 | rm -rf $(ADVPRJ) 66 | 67 | -------------------------------------------------------------------------------- /ver5/Makefile: -------------------------------------------------------------------------------- 1 | CXX = icpc 2 | COMPFLAGS = -g -std=c++11 -O2 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high 4 | 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175" 6 | INCLUDES = 7 | 8 | ADVPRJ = "./adv-ver5" 9 | 10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS) 11 | 12 | SOURCES = GSimulation.cpp main.cpp 13 | 14 | 15 | ########################################## 16 | OBJSC = $(SOURCES:.cpp=.o) 17 | ########################################## 18 | 19 | EXEC=nbody.x 20 | 21 | all: clean cpu 22 | 23 | %.o: %.cpp 24 | $(info ) 25 | $(info Compiling the object file for CPU: ) 26 | $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 27 | 28 | cpu: $(OBJSC) 29 | $(info ) 30 | $(info Linking the CPU executable:) 31 | $(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC) 32 | 33 | run: 34 | $(info ) 35 | $(info Run the default test case on CPU: ) 36 | ./nbody.x 37 | 38 | asm:GSimulation.cpp 39 | $(info) 40 | $(info Generate assembly code: ) 41 | $(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S 42 | 43 | clean: 44 | rm -f $(OBJSC) nbody.x *.optrpt *.s 45 | 46 | 47 | #---------------------------------------------------------------- 48 | #---------- Intel Advisor Analysis ------------------------------ 49 | #---------------------------------------------------------------- 50 | 51 | survey: 52 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 53 | 54 | roofline: 55 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 56 | advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x 57 | 58 | map: 59 | advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x 60 | 61 | open-gui: 62 | advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 & 63 | 64 | clean-results: 65 | rm -rf $(ADVPRJ) 66 | 67 | -------------------------------------------------------------------------------- /ver6/Makefile: -------------------------------------------------------------------------------- 1 | CXX = icpc 2 | COMPFLAGS = -g -std=c++11 -O2 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high 4 | 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175" 6 | INCLUDES = 7 | 8 | ADVPRJ = "./adv-ver6" 9 | 10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS) 11 | 12 | SOURCES = GSimulation.cpp main.cpp 13 | 14 | 15 | ########################################## 16 | OBJSC = $(SOURCES:.cpp=.o) 17 | ########################################## 18 | 19 | EXEC=nbody.x 20 | 21 | all: clean cpu 22 | 23 | %.o: %.cpp 24 | $(info ) 25 | $(info Compiling the object file for CPU: ) 26 | $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 27 | 28 | cpu: $(OBJSC) 29 | $(info ) 30 | $(info Linking the CPU executable:) 31 | $(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC) 32 | 33 | run: 34 | $(info ) 35 | $(info Run the default test case on CPU: ) 36 | ./nbody.x 37 | 38 | asm:GSimulation.cpp 39 | $(info) 40 | $(info Generate assembly code: ) 41 | $(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S 42 | 43 | clean: 44 | rm -f $(OBJSC) nbody.x *.optrpt *.s 45 | 46 | 47 | #---------------------------------------------------------------- 48 | #---------- Intel Advisor Analysis ------------------------------ 49 | #---------------------------------------------------------------- 50 | 51 | survey: 52 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 53 | 54 | roofline: 55 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 56 | advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x 57 | 58 | map: 59 | advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x 60 | 61 | open-gui: 62 | advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 & 63 | 64 | clean-results: 65 | rm -rf $(ADVPRJ) 66 | 67 | -------------------------------------------------------------------------------- /ver7/Makefile: -------------------------------------------------------------------------------- 1 | CXX = icpc 2 | COMPFLAGS = -g -std=c++11 -O2 -qopenmp 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high 4 | 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175" 6 | INCLUDES = 7 | 8 | ADVPRJ = "./adv-ver7" 9 | 10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS) 11 | 12 | SOURCES = GSimulation.cpp main.cpp 13 | 14 | 15 | ########################################## 16 | OBJSC = $(SOURCES:.cpp=.o) 17 | ########################################## 18 | 19 | EXEC=nbody.x 20 | 21 | all: clean cpu 22 | 23 | %.o: %.cpp 24 | $(info ) 25 | $(info Compiling the object file for CPU: ) 26 | $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 27 | 28 | cpu: $(OBJSC) 29 | $(info ) 30 | $(info Linking the CPU executable:) 31 | $(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC) 32 | 33 | run: 34 | $(info ) 35 | $(info Run the default test case on CPU: ) 36 | ./nbody.x 37 | 38 | asm:GSimulation.cpp 39 | $(info) 40 | $(info Generate assembly code: ) 41 | $(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S 42 | 43 | clean: 44 | rm -f $(OBJSC) nbody.x *.optrpt *.s 45 | 46 | 47 | #---------------------------------------------------------------- 48 | #---------- Intel Advisor Analysis ------------------------------ 49 | #---------------------------------------------------------------- 50 | 51 | survey: 52 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 53 | 54 | roofline: 55 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 56 | advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x 57 | 58 | map: 59 | advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x 60 | 61 | open-gui: 62 | advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 & 63 | 64 | clean-results: 65 | rm -rf $(ADVPRJ) 66 | 67 | -------------------------------------------------------------------------------- /ver8/Makefile: -------------------------------------------------------------------------------- 1 | CXX = icpc 2 | COMPFLAGS = -g -std=c++11 -O2 -qopenmp 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high 4 | 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175" 6 | INCLUDES = 7 | 8 | ADVPRJ = "./adv-ver8" 9 | 10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS) 11 | 12 | SOURCES = GSimulation.cpp main.cpp 13 | 14 | 15 | ########################################## 16 | OBJSC = $(SOURCES:.cpp=.o) 17 | ########################################## 18 | 19 | EXEC=nbody.x 20 | 21 | all: clean cpu 22 | 23 | %.o: %.cpp 24 | $(info ) 25 | $(info Compiling the object file for CPU: ) 26 | $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 27 | 28 | cpu: $(OBJSC) 29 | $(info ) 30 | $(info Linking the CPU executable:) 31 | $(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC) 32 | 33 | run: 34 | $(info ) 35 | $(info Run the default test case on CPU: ) 36 | ./nbody.x 37 | 38 | asm:GSimulation.cpp 39 | $(info) 40 | $(info Generate assembly code: ) 41 | $(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S 42 | 43 | clean: 44 | rm -f $(OBJSC) nbody.x *.optrpt *.s 45 | 46 | 47 | #---------------------------------------------------------------- 48 | #---------- Intel Advisor Analysis ------------------------------ 49 | #---------------------------------------------------------------- 50 | 51 | survey: 52 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 53 | 54 | roofline: 55 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 56 | advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x 57 | 58 | map: 59 | advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x 60 | 61 | open-gui: 62 | advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 & 63 | 64 | clean-results: 65 | rm -rf $(ADVPRJ) 66 | 67 | -------------------------------------------------------------------------------- /ver3/Makefile: -------------------------------------------------------------------------------- 1 | CXX = icpc 2 | COMPFLAGS = -g -std=c++11 -O2 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high 4 | 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175" 6 | INCLUDES = 7 | 8 | ADVPRJ = "./adv-ver3" 9 | 10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS) 11 | 12 | SOURCES = GSimulation.cpp main.cpp 13 | 14 | ifeq ($(SIMD), yes) 15 | CXXFLAGS+= -DSIMD 16 | endif 17 | 18 | ########################################## 19 | OBJSC = $(SOURCES:.cpp=.o) 20 | ########################################## 21 | 22 | EXEC=nbody.x 23 | 24 | all: clean cpu 25 | 26 | %.o: %.cpp 27 | $(info ) 28 | $(info Compiling the object file for CPU: ) 29 | $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 30 | 31 | cpu: $(OBJSC) 32 | $(info ) 33 | $(info Linking the CPU executable:) 34 | $(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC) 35 | 36 | run: 37 | $(info ) 38 | $(info Run the default test case on CPU: ) 39 | ./nbody.x 40 | 41 | asm:GSimulation.cpp 42 | $(info) 43 | $(info Generate assembly code: ) 44 | $(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S 45 | 46 | clean: 47 | rm -f $(OBJSC) nbody.x *.optrpt *.s 48 | 49 | 50 | #---------------------------------------------------------------- 51 | #---------- Intel Advisor Analysis ------------------------------ 52 | #---------------------------------------------------------------- 53 | 54 | survey: 55 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 56 | 57 | roofline: 58 | advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x 59 | advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x 60 | 61 | map: 62 | advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x 63 | 64 | open-gui: 65 | advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 & 66 | 67 | clean-results: 68 | rm -rf $(ADVPRJ) 69 | 70 | -------------------------------------------------------------------------------- /ver3/Particle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _PARTICLE_HPP 22 | #define _PARTICLE_HPP 23 | #include 24 | #include "types.hpp" 25 | 26 | struct Particle 27 | { 28 | public: 29 | Particle() { init();} 30 | void init() 31 | { 32 | pos[0] = 0.; pos[1] = 0.; pos[2] = 0.; 33 | vel[0] = 0.; vel[1] = 0.; vel[2] = 0.; 34 | acc[0] = 0.; acc[1] = 0.; acc[2] = 0.; 35 | mass = 0.; 36 | } 37 | real_type pos[3]; 38 | real_type vel[3]; 39 | real_type acc[3]; 40 | real_type mass; 41 | }; 42 | 43 | struct ParticleSoA 44 | { 45 | public: 46 | ParticleSoA() { init();} 47 | void init() 48 | { 49 | pos_x = NULL; pos_y = NULL; pos_z = NULL; 50 | vel_x = NULL; vel_y = NULL; vel_z = NULL; 51 | acc_x = NULL; acc_y = NULL; acc_z = NULL; 52 | mass = NULL; 53 | } 54 | real_type *pos_x, *pos_y, *pos_z; 55 | real_type *vel_x, *vel_y, *vel_z; 56 | real_type *acc_x, *acc_y, *acc_z; 57 | real_type *mass; 58 | }; 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /ver4/Particle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _PARTICLE_HPP 22 | #define _PARTICLE_HPP 23 | #include 24 | #include "types.hpp" 25 | 26 | struct Particle 27 | { 28 | public: 29 | Particle() { init();} 30 | void init() 31 | { 32 | pos[0] = 0.; pos[1] = 0.; pos[2] = 0.; 33 | vel[0] = 0.; vel[1] = 0.; vel[2] = 0.; 34 | acc[0] = 0.; acc[1] = 0.; acc[2] = 0.; 35 | mass = 0.; 36 | } 37 | real_type pos[3]; 38 | real_type vel[3]; 39 | real_type acc[3]; 40 | real_type mass; 41 | }; 42 | 43 | struct ParticleSoA 44 | { 45 | public: 46 | ParticleSoA() { init();} 47 | void init() 48 | { 49 | pos_x = NULL; pos_y = NULL; pos_z = NULL; 50 | vel_x = NULL; vel_y = NULL; vel_z = NULL; 51 | acc_x = NULL; acc_y = NULL; acc_z = NULL; 52 | mass = NULL; 53 | } 54 | real_type *pos_x, *pos_y, *pos_z; 55 | real_type *vel_x, *vel_y, *vel_z; 56 | real_type *acc_x, *acc_y, *acc_z; 57 | real_type *mass; 58 | }; 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /ver5/Particle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _PARTICLE_HPP 22 | #define _PARTICLE_HPP 23 | #include 24 | #include "types.hpp" 25 | 26 | struct Particle 27 | { 28 | public: 29 | Particle() { init();} 30 | void init() 31 | { 32 | pos[0] = 0.; pos[1] = 0.; pos[2] = 0.; 33 | vel[0] = 0.; vel[1] = 0.; vel[2] = 0.; 34 | acc[0] = 0.; acc[1] = 0.; acc[2] = 0.; 35 | mass = 0.; 36 | } 37 | real_type pos[3]; 38 | real_type vel[3]; 39 | real_type acc[3]; 40 | real_type mass; 41 | }; 42 | 43 | struct ParticleSoA 44 | { 45 | public: 46 | ParticleSoA() { init();} 47 | void init() 48 | { 49 | pos_x = NULL; pos_y = NULL; pos_z = NULL; 50 | vel_x = NULL; vel_y = NULL; vel_z = NULL; 51 | acc_x = NULL; acc_y = NULL; acc_z = NULL; 52 | mass = NULL; 53 | } 54 | real_type *pos_x, *pos_y, *pos_z; 55 | real_type *vel_x, *vel_y, *vel_z; 56 | real_type *acc_x, *acc_y, *acc_z; 57 | real_type *mass; 58 | }; 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /ver6/Particle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _PARTICLE_HPP 22 | #define _PARTICLE_HPP 23 | #include 24 | #include "types.hpp" 25 | 26 | struct Particle 27 | { 28 | public: 29 | Particle() { init();} 30 | void init() 31 | { 32 | pos[0] = 0.; pos[1] = 0.; pos[2] = 0.; 33 | vel[0] = 0.; vel[1] = 0.; vel[2] = 0.; 34 | acc[0] = 0.; acc[1] = 0.; acc[2] = 0.; 35 | mass = 0.; 36 | } 37 | real_type pos[3]; 38 | real_type vel[3]; 39 | real_type acc[3]; 40 | real_type mass; 41 | }; 42 | 43 | struct ParticleSoA 44 | { 45 | public: 46 | ParticleSoA() { init();} 47 | void init() 48 | { 49 | pos_x = NULL; pos_y = NULL; pos_z = NULL; 50 | vel_x = NULL; vel_y = NULL; vel_z = NULL; 51 | acc_x = NULL; acc_y = NULL; acc_z = NULL; 52 | mass = NULL; 53 | } 54 | real_type *pos_x, *pos_y, *pos_z; 55 | real_type *vel_x, *vel_y, *vel_z; 56 | real_type *acc_x, *acc_y, *acc_z; 57 | real_type *mass; 58 | }; 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /ver7/Particle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _PARTICLE_HPP 22 | #define _PARTICLE_HPP 23 | #include 24 | #include "types.hpp" 25 | 26 | struct Particle 27 | { 28 | public: 29 | Particle() { init();} 30 | void init() 31 | { 32 | pos[0] = 0.; pos[1] = 0.; pos[2] = 0.; 33 | vel[0] = 0.; vel[1] = 0.; vel[2] = 0.; 34 | acc[0] = 0.; acc[1] = 0.; acc[2] = 0.; 35 | mass = 0.; 36 | } 37 | real_type pos[3]; 38 | real_type vel[3]; 39 | real_type acc[3]; 40 | real_type mass; 41 | }; 42 | 43 | struct ParticleSoA 44 | { 45 | public: 46 | ParticleSoA() { init();} 47 | void init() 48 | { 49 | pos_x = NULL; pos_y = NULL; pos_z = NULL; 50 | vel_x = NULL; vel_y = NULL; vel_z = NULL; 51 | acc_x = NULL; acc_y = NULL; acc_z = NULL; 52 | mass = NULL; 53 | } 54 | real_type *pos_x, *pos_y, *pos_z; 55 | real_type *vel_x, *vel_y, *vel_z; 56 | real_type *acc_x, *acc_y, *acc_z; 57 | real_type *mass; 58 | }; 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /ver8/Particle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _PARTICLE_HPP 22 | #define _PARTICLE_HPP 23 | #include 24 | #include "types.hpp" 25 | 26 | struct Particle 27 | { 28 | public: 29 | Particle() { init();} 30 | void init() 31 | { 32 | pos[0] = 0.; pos[1] = 0.; pos[2] = 0.; 33 | vel[0] = 0.; vel[1] = 0.; vel[2] = 0.; 34 | acc[0] = 0.; acc[1] = 0.; acc[2] = 0.; 35 | mass = 0.; 36 | } 37 | real_type pos[3]; 38 | real_type vel[3]; 39 | real_type acc[3]; 40 | real_type mass; 41 | }; 42 | 43 | struct ParticleSoA 44 | { 45 | public: 46 | ParticleSoA() { init();} 47 | void init() 48 | { 49 | pos_x = NULL; pos_y = NULL; pos_z = NULL; 50 | vel_x = NULL; vel_y = NULL; vel_z = NULL; 51 | acc_x = NULL; acc_y = NULL; acc_z = NULL; 52 | mass = NULL; 53 | } 54 | real_type *pos_x, *pos_y, *pos_z; 55 | real_type *vel_x, *vel_y, *vel_z; 56 | real_type *acc_x, *acc_y, *acc_z; 57 | real_type *mass; 58 | }; 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /ver0/GSimulation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _GSIMULATION_HPP 22 | #define _GSIMULATION_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include "Particle.hpp" 35 | 36 | class GSimulation 37 | { 38 | public: 39 | GSimulation(); 40 | ~GSimulation(); 41 | 42 | void init(); 43 | void set_number_of_particles(int N); 44 | void set_number_of_steps(int N); 45 | void start(); 46 | 47 | private: 48 | Particle *particles; 49 | 50 | int _npart; //number of particles 51 | int _nsteps; //number of integration steps 52 | real_type _tstep; //time step of the simulation 53 | 54 | int _sfreq; //sample frequency 55 | 56 | real_type _kenergy; //kinetic energy 57 | 58 | double _totTime; //total time of the simulation 59 | double _totFlops; //total number of flops 60 | 61 | void init_pos(); 62 | void init_vel(); 63 | void init_acc(); 64 | void init_mass(); 65 | 66 | inline void set_npart(const int &N){ _npart = N; } 67 | inline int get_npart() const {return _npart; } 68 | 69 | inline void set_tstep(const real_type &dt){ _tstep = dt; } 70 | inline real_type get_tstep() const {return _tstep; } 71 | 72 | inline void set_nsteps(const int &n){ _nsteps = n; } 73 | inline int get_nsteps() const {return _nsteps; } 74 | 75 | inline void set_sfreq(const int &sf){ _sfreq = sf; } 76 | inline int get_sfreq() const {return _sfreq; } 77 | 78 | void print_header(); 79 | 80 | }; 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /ver1/GSimulation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _GSIMULATION_HPP 22 | #define _GSIMULATION_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include "Particle.hpp" 35 | 36 | class GSimulation 37 | { 38 | public: 39 | GSimulation(); 40 | ~GSimulation(); 41 | 42 | void init(); 43 | void set_number_of_particles(int N); 44 | void set_number_of_steps(int N); 45 | void start(); 46 | 47 | private: 48 | Particle *particles; 49 | 50 | int _npart; //number of particles 51 | int _nsteps; //number of integration steps 52 | real_type _tstep; //time step of the simulation 53 | 54 | int _sfreq; //sample frequency 55 | 56 | real_type _kenergy; //kinetic energy 57 | 58 | double _totTime; //total time of the simulation 59 | double _totFlops; //total number of flops 60 | 61 | void init_pos(); 62 | void init_vel(); 63 | void init_acc(); 64 | void init_mass(); 65 | 66 | inline void set_npart(const int &N){ _npart = N; } 67 | inline int get_npart() const {return _npart; } 68 | 69 | inline void set_tstep(const real_type &dt){ _tstep = dt; } 70 | inline real_type get_tstep() const {return _tstep; } 71 | 72 | inline void set_nsteps(const int &n){ _nsteps = n; } 73 | inline int get_nsteps() const {return _nsteps; } 74 | 75 | inline void set_sfreq(const int &sf){ _sfreq = sf; } 76 | inline int get_sfreq() const {return _sfreq; } 77 | 78 | void print_header(); 79 | 80 | }; 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /ver2/GSimulation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _GSIMULATION_HPP 22 | #define _GSIMULATION_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include "Particle.hpp" 35 | 36 | class GSimulation 37 | { 38 | public: 39 | GSimulation(); 40 | ~GSimulation(); 41 | 42 | void init(); 43 | void set_number_of_particles(int N); 44 | void set_number_of_steps(int N); 45 | void start(); 46 | 47 | private: 48 | Particle *particles; 49 | 50 | int _npart; //number of particles 51 | int _nsteps; //number of integration steps 52 | real_type _tstep; //time step of the simulation 53 | 54 | int _sfreq; //sample frequency 55 | 56 | real_type _kenergy; //kinetic energy 57 | 58 | double _totTime; //total time of the simulation 59 | double _totFlops; //total number of flops 60 | 61 | void init_pos(); 62 | void init_vel(); 63 | void init_acc(); 64 | void init_mass(); 65 | 66 | inline void set_npart(const int &N){ _npart = N; } 67 | inline int get_npart() const {return _npart; } 68 | 69 | inline void set_tstep(const real_type &dt){ _tstep = dt; } 70 | inline real_type get_tstep() const {return _tstep; } 71 | 72 | inline void set_nsteps(const int &n){ _nsteps = n; } 73 | inline int get_nsteps() const {return _nsteps; } 74 | 75 | inline void set_sfreq(const int &sf){ _sfreq = sf; } 76 | inline int get_sfreq() const {return _sfreq; } 77 | 78 | void print_header(); 79 | 80 | }; 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /ver3/GSimulation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _GSIMULATION_HPP 22 | #define _GSIMULATION_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include "Particle.hpp" 35 | 36 | class GSimulation 37 | { 38 | public: 39 | GSimulation(); 40 | ~GSimulation(); 41 | 42 | void init(); 43 | void set_number_of_particles(int N); 44 | void set_number_of_steps(int N); 45 | void start(); 46 | 47 | private: 48 | ParticleSoA *particles; 49 | 50 | int _npart; //number of particles 51 | int _nsteps; //number of integration steps 52 | real_type _tstep; //time step of the simulation 53 | 54 | int _sfreq; //sample frequency 55 | 56 | real_type _kenergy; //kinetic energy 57 | 58 | double _totTime; //total time of the simulation 59 | double _totFlops; //total number of flops 60 | 61 | void init_pos(); 62 | void init_vel(); 63 | void init_acc(); 64 | void init_mass(); 65 | 66 | inline void set_npart(const int &N){ _npart = N; } 67 | inline int get_npart() const {return _npart; } 68 | 69 | inline void set_tstep(const real_type &dt){ _tstep = dt; } 70 | inline real_type get_tstep() const {return _tstep; } 71 | 72 | inline void set_nsteps(const int &n){ _nsteps = n; } 73 | inline int get_nsteps() const {return _nsteps; } 74 | 75 | inline void set_sfreq(const int &sf){ _sfreq = sf; } 76 | inline int get_sfreq() const {return _sfreq; } 77 | 78 | void print_header(); 79 | 80 | }; 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /ver4/GSimulation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _GSIMULATION_HPP 22 | #define _GSIMULATION_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include "Particle.hpp" 35 | 36 | class GSimulation 37 | { 38 | public: 39 | GSimulation(); 40 | ~GSimulation(); 41 | 42 | void init(); 43 | void set_number_of_particles(int N); 44 | void set_number_of_steps(int N); 45 | void start(); 46 | 47 | private: 48 | ParticleSoA *particles; 49 | 50 | int _npart; //number of particles 51 | int _nsteps; //number of integration steps 52 | real_type _tstep; //time step of the simulation 53 | 54 | int _sfreq; //sample frequency 55 | 56 | real_type _kenergy; //kinetic energy 57 | 58 | double _totTime; //total time of the simulation 59 | double _totFlops; //total number of flops 60 | 61 | void init_pos(); 62 | void init_vel(); 63 | void init_acc(); 64 | void init_mass(); 65 | 66 | inline void set_npart(const int &N){ _npart = N; } 67 | inline int get_npart() const {return _npart; } 68 | 69 | inline void set_tstep(const real_type &dt){ _tstep = dt; } 70 | inline real_type get_tstep() const {return _tstep; } 71 | 72 | inline void set_nsteps(const int &n){ _nsteps = n; } 73 | inline int get_nsteps() const {return _nsteps; } 74 | 75 | inline void set_sfreq(const int &sf){ _sfreq = sf; } 76 | inline int get_sfreq() const {return _sfreq; } 77 | 78 | void print_header(); 79 | 80 | }; 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /ver5/GSimulation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _GSIMULATION_HPP 22 | #define _GSIMULATION_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include "Particle.hpp" 35 | 36 | class GSimulation 37 | { 38 | public: 39 | GSimulation(); 40 | ~GSimulation(); 41 | 42 | void init(); 43 | void set_number_of_particles(int N); 44 | void set_number_of_steps(int N); 45 | void start(); 46 | 47 | private: 48 | ParticleSoA *particles; 49 | 50 | int _npart; //number of particles 51 | int _nsteps; //number of integration steps 52 | real_type _tstep; //time step of the simulation 53 | 54 | int _sfreq; //sample frequency 55 | 56 | real_type _kenergy; //kinetic energy 57 | 58 | double _totTime; //total time of the simulation 59 | double _totFlops; //total number of flops 60 | 61 | void init_pos(); 62 | void init_vel(); 63 | void init_acc(); 64 | void init_mass(); 65 | 66 | inline void set_npart(const int &N){ _npart = N; } 67 | inline int get_npart() const {return _npart; } 68 | 69 | inline void set_tstep(const real_type &dt){ _tstep = dt; } 70 | inline real_type get_tstep() const {return _tstep; } 71 | 72 | inline void set_nsteps(const int &n){ _nsteps = n; } 73 | inline int get_nsteps() const {return _nsteps; } 74 | 75 | inline void set_sfreq(const int &sf){ _sfreq = sf; } 76 | inline int get_sfreq() const {return _sfreq; } 77 | 78 | void print_header(); 79 | 80 | }; 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /ver6/GSimulation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _GSIMULATION_HPP 22 | #define _GSIMULATION_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include "Particle.hpp" 35 | 36 | class GSimulation 37 | { 38 | public: 39 | GSimulation(); 40 | ~GSimulation(); 41 | 42 | void init(); 43 | void set_number_of_particles(int N); 44 | void set_number_of_steps(int N); 45 | void start(); 46 | 47 | private: 48 | ParticleSoA *particles; 49 | 50 | int _npart; //number of particles 51 | int _nsteps; //number of integration steps 52 | real_type _tstep; //time step of the simulation 53 | 54 | int _sfreq; //sample frequency 55 | 56 | real_type _kenergy; //kinetic energy 57 | 58 | double _totTime; //total time of the simulation 59 | double _totFlops; //total number of flops 60 | 61 | void init_pos(); 62 | void init_vel(); 63 | void init_acc(); 64 | void init_mass(); 65 | 66 | inline void set_npart(const int &N){ _npart = N; } 67 | inline int get_npart() const {return _npart; } 68 | 69 | inline void set_tstep(const real_type &dt){ _tstep = dt; } 70 | inline real_type get_tstep() const {return _tstep; } 71 | 72 | inline void set_nsteps(const int &n){ _nsteps = n; } 73 | inline int get_nsteps() const {return _nsteps; } 74 | 75 | inline void set_sfreq(const int &sf){ _sfreq = sf; } 76 | inline int get_sfreq() const {return _sfreq; } 77 | 78 | void print_header(); 79 | 80 | }; 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /ver7/GSimulation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _GSIMULATION_HPP 22 | #define _GSIMULATION_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include "Particle.hpp" 35 | 36 | class GSimulation 37 | { 38 | public: 39 | GSimulation(); 40 | ~GSimulation(); 41 | 42 | void init(); 43 | void set_number_of_particles(int N); 44 | void set_number_of_steps(int N); 45 | void start(); 46 | 47 | private: 48 | ParticleSoA *particles; 49 | 50 | int _npart; //number of particles 51 | int _nsteps; //number of integration steps 52 | real_type _tstep; //time step of the simulation 53 | 54 | int _sfreq; //sample frequency 55 | 56 | real_type _kenergy; //kinetic energy 57 | 58 | double _totTime; //total time of the simulation 59 | double _totFlops; //total number of flops 60 | 61 | void init_pos(); 62 | void init_vel(); 63 | void init_acc(); 64 | void init_mass(); 65 | 66 | inline void set_npart(const int &N){ _npart = N; } 67 | inline int get_npart() const {return _npart; } 68 | 69 | inline void set_tstep(const real_type &dt){ _tstep = dt; } 70 | inline real_type get_tstep() const {return _tstep; } 71 | 72 | inline void set_nsteps(const int &n){ _nsteps = n; } 73 | inline int get_nsteps() const {return _nsteps; } 74 | 75 | inline void set_sfreq(const int &sf){ _sfreq = sf; } 76 | inline int get_sfreq() const {return _sfreq; } 77 | 78 | void print_header(); 79 | 80 | }; 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /ver8/GSimulation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #ifndef _GSIMULATION_HPP 22 | #define _GSIMULATION_HPP 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include "Particle.hpp" 35 | 36 | class GSimulation 37 | { 38 | public: 39 | GSimulation(); 40 | ~GSimulation(); 41 | 42 | void init(); 43 | void set_number_of_particles(int N); 44 | void set_number_of_steps(int N); 45 | void start(); 46 | 47 | private: 48 | ParticleSoA *particles; 49 | 50 | int _npart; //number of particles 51 | int _nsteps; //number of integration steps 52 | real_type _tstep; //time step of the simulation 53 | 54 | int _sfreq; //sample frequency 55 | 56 | real_type _kenergy; //kinetic energy 57 | 58 | double _totTime; //total time of the simulation 59 | double _totFlops; //total number of flops 60 | 61 | void init_pos(); 62 | void init_vel(); 63 | void init_acc(); 64 | void init_mass(); 65 | 66 | inline void set_npart(const int &N){ _npart = N; } 67 | inline int get_npart() const {return _npart; } 68 | 69 | inline void set_tstep(const real_type &dt){ _tstep = dt; } 70 | inline real_type get_tstep() const {return _tstep; } 71 | 72 | inline void set_nsteps(const int &n){ _nsteps = n; } 73 | inline int get_nsteps() const {return _nsteps; } 74 | 75 | inline void set_sfreq(const int &sf){ _sfreq = sf; } 76 | inline int get_sfreq() const {return _sfreq; } 77 | 78 | void print_header(); 79 | 80 | }; 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /ver0/GSimulation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | void GSimulation :: init_pos() 45 | { 46 | std::random_device rd; //random number generator 47 | std::mt19937 gen(42); 48 | std::uniform_real_distribution unif_d(0,1.0); 49 | 50 | for(int i=0; i unif_d(-1.0,1.0); 63 | 64 | for(int i=0; i (get_npart()); 85 | std::random_device rd; //random number generator 86 | std::mt19937 gen(42); 87 | std::uniform_real_distribution unif_d(0.0,1.0); 88 | 89 | for(int i=0; i 2) 187 | { 188 | av += gflops*get_sfreq()/(ts1 - ts0); 189 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 190 | } 191 | 192 | ts0 = 0; 193 | ts1 = 0; 194 | } 195 | 196 | } //end of the time step loop 197 | 198 | const double t1 = time.stop(); 199 | _totTime = (t1-t0); 200 | _totFlops = gflops*get_nsteps(); 201 | 202 | av/=(double)(nf-2); 203 | dev=sqrt(dev/(double)(nf-2)-av*av); 204 | 205 | int nthreads=1; 206 | 207 | std::cout << std::endl; 208 | std::cout << "# Number Threads : " << nthreads << std::endl; 209 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 210 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 211 | std::cout << "===============================" << std::endl; 212 | 213 | } 214 | 215 | 216 | void GSimulation :: print_header() 217 | { 218 | 219 | std::cout << " nPart = " << get_npart() << "; " 220 | << "nSteps = " << get_nsteps() << "; " 221 | << "dt = " << get_tstep() << std::endl; 222 | 223 | std::cout << "------------------------------------------------" << std::endl; 224 | std::cout << " " 225 | << std::left << std::setw(8) << "s" 226 | << std::left << std::setw(8) << "dt" 227 | << std::left << std::setw(12) << "kenergy" 228 | << std::left << std::setw(12) << "time (s)" 229 | << std::left << std::setw(12) << "GFlops" 230 | << std::endl; 231 | std::cout << "------------------------------------------------" << std::endl; 232 | 233 | 234 | } 235 | 236 | GSimulation :: ~GSimulation() 237 | { 238 | delete particles; 239 | } 240 | -------------------------------------------------------------------------------- /ver1/GSimulation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | void GSimulation :: init_pos() 45 | { 46 | std::random_device rd; //random number generator 47 | std::mt19937 gen(42); 48 | std::uniform_real_distribution unif_d(0,1.0); 49 | 50 | for(int i=0; i unif_d(-1.0,1.0); 63 | 64 | for(int i=0; i (get_npart()); 85 | std::random_device rd; //random number generator 86 | std::mt19937 gen(42); 87 | std::uniform_real_distribution unif_d(0.0,1.0); 88 | 89 | for(int i=0; i 2) 187 | { 188 | av += gflops*get_sfreq()/(ts1 - ts0); 189 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 190 | } 191 | 192 | ts0 = 0; 193 | ts1 = 0; 194 | } 195 | 196 | } //end of the time step loop 197 | 198 | const double t1 = time.stop(); 199 | _totTime = (t1-t0); 200 | _totFlops = gflops*get_nsteps(); 201 | 202 | av/=(double)(nf-2); 203 | dev=sqrt(dev/(double)(nf-2)-av*av); 204 | 205 | int nthreads=1; 206 | 207 | std::cout << std::endl; 208 | std::cout << "# Number Threads : " << nthreads << std::endl; 209 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 210 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 211 | std::cout << "===============================" << std::endl; 212 | 213 | } 214 | 215 | 216 | void GSimulation :: print_header() 217 | { 218 | 219 | std::cout << " nPart = " << get_npart() << "; " 220 | << "nSteps = " << get_nsteps() << "; " 221 | << "dt = " << get_tstep() << std::endl; 222 | 223 | std::cout << "------------------------------------------------" << std::endl; 224 | std::cout << " " 225 | << std::left << std::setw(8) << "s" 226 | << std::left << std::setw(8) << "dt" 227 | << std::left << std::setw(12) << "kenergy" 228 | << std::left << std::setw(12) << "time (s)" 229 | << std::left << std::setw(12) << "GFlops" 230 | << std::endl; 231 | std::cout << "------------------------------------------------" << std::endl; 232 | 233 | 234 | } 235 | 236 | GSimulation :: ~GSimulation() 237 | { 238 | delete particles; 239 | } 240 | -------------------------------------------------------------------------------- /ver2/GSimulation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | void GSimulation :: init_pos() 45 | { 46 | std::random_device rd; //random number generator 47 | std::mt19937 gen(42); 48 | std::uniform_real_distribution unif_d(0,1.0); 49 | 50 | for(int i=0; i unif_d(-1.0,1.0); 63 | 64 | for(int i=0; i (get_npart()); 85 | std::random_device rd; //random number generator 86 | std::mt19937 gen(42); 87 | std::uniform_real_distribution unif_d(0.0,1.0); 88 | 89 | for(int i=0; i 2) 187 | { 188 | av += gflops*get_sfreq()/(ts1 - ts0); 189 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 190 | } 191 | 192 | ts0 = 0; 193 | ts1 = 0; 194 | } 195 | 196 | } //end of the time step loop 197 | 198 | const double t1 = time.stop(); 199 | _totTime = (t1-t0); 200 | _totFlops = gflops*get_nsteps(); 201 | 202 | av/=(double)(nf-2); 203 | dev=sqrt(dev/(double)(nf-2)-av*av); 204 | 205 | int nthreads=1; 206 | 207 | std::cout << std::endl; 208 | std::cout << "# Number Threads : " << nthreads << std::endl; 209 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 210 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 211 | std::cout << "===============================" << std::endl; 212 | 213 | } 214 | 215 | 216 | void GSimulation :: print_header() 217 | { 218 | 219 | std::cout << " nPart = " << get_npart() << "; " 220 | << "nSteps = " << get_nsteps() << "; " 221 | << "dt = " << get_tstep() << std::endl; 222 | 223 | std::cout << "------------------------------------------------" << std::endl; 224 | std::cout << " " 225 | << std::left << std::setw(8) << "s" 226 | << std::left << std::setw(8) << "dt" 227 | << std::left << std::setw(12) << "kenergy" 228 | << std::left << std::setw(12) << "time (s)" 229 | << std::left << std::setw(12) << "GFlops" 230 | << std::endl; 231 | std::cout << "------------------------------------------------" << std::endl; 232 | 233 | 234 | } 235 | 236 | GSimulation :: ~GSimulation() 237 | { 238 | delete particles; 239 | } 240 | -------------------------------------------------------------------------------- /ver4/GSimulation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | void GSimulation :: init_pos() 45 | { 46 | std::random_device rd; //random number generator 47 | std::mt19937 gen(42); 48 | std::uniform_real_distribution unif_d(0,1.0); 49 | 50 | for(int i=0; ipos_x[i] = unif_d(gen); 53 | particles->pos_y[i] = unif_d(gen); 54 | particles->pos_z[i] = unif_d(gen); 55 | } 56 | } 57 | 58 | void GSimulation :: init_vel() 59 | { 60 | std::random_device rd; //random number generator 61 | std::mt19937 gen(42); 62 | std::uniform_real_distribution unif_d(-1.0,1.0); 63 | 64 | for(int i=0; ivel_x[i] = unif_d(gen) * 1.0e-3f; 67 | particles->vel_y[i] = unif_d(gen) * 1.0e-3f; 68 | particles->vel_z[i] = unif_d(gen) * 1.0e-3f; 69 | } 70 | } 71 | 72 | void GSimulation :: init_acc() 73 | { 74 | for(int i=0; iacc_x[i] = 0.f; 77 | particles->acc_y[i] = 0.f; 78 | particles->acc_z[i] = 0.f; 79 | } 80 | } 81 | 82 | void GSimulation :: init_mass() 83 | { 84 | real_type n = static_cast (get_npart()); 85 | std::random_device rd; //random number generator 86 | std::mt19937 gen(42); 87 | std::uniform_real_distribution unif_d(0.0,1.0); 88 | 89 | for(int i=0; imass[i] = n * unif_d(gen); 92 | } 93 | } 94 | 95 | void GSimulation :: start() 96 | { 97 | real_type energy; 98 | real_type dt = get_tstep(); 99 | int n = get_npart(); 100 | int i,j; 101 | 102 | //allocate particles 103 | particles = new ParticleSoA; 104 | 105 | particles->pos_x = new real_type[n]; 106 | particles->pos_y = new real_type[n]; 107 | particles->pos_z = new real_type[n]; 108 | particles->vel_x = new real_type[n]; 109 | particles->vel_y = new real_type[n]; 110 | particles->vel_z = new real_type[n]; 111 | particles->acc_x = new real_type[n]; 112 | particles->acc_y = new real_type[n]; 113 | particles->acc_z = new real_type[n]; 114 | particles->mass = new real_type[n]; 115 | 116 | init_pos(); 117 | init_vel(); 118 | init_acc(); 119 | init_mass(); 120 | 121 | print_header(); 122 | 123 | _totTime = 0.; 124 | 125 | const float softeningSquared = 1.e-3f; 126 | const float G = 6.67259e-11f; 127 | 128 | CPUTime time; 129 | double ts0 = 0; 130 | double ts1 = 0; 131 | double nd = double(n); 132 | double gflops = 1e-9 * ( (11. + 18. ) * nd*nd + nd * 19. ); 133 | double av=0.0, dev=0.0; 134 | int nf = 0; 135 | 136 | const double t0 = time.start(); 137 | for (int s=1; s<=get_nsteps(); ++s) 138 | { 139 | ts0 += time.start(); 140 | for (j = 0; j < n; j++)// update acceleration 141 | { 142 | #pragma omp simd 143 | for (i = 0; i < n; i++) 144 | { 145 | real_type dx, dy, dz; 146 | real_type distanceSqr = 0.0f; 147 | real_type distanceInv = 0.0f; 148 | 149 | dx = particles->pos_x[j] - particles->pos_x[i]; //1flop 150 | dy = particles->pos_y[j] - particles->pos_y[i]; //1flop 151 | dz = particles->pos_z[j] - particles->pos_z[i]; //1flop 152 | 153 | distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared; //6flops 154 | distanceInv = 1.0f / sqrtf(distanceSqr); //1div+1sqrt 155 | 156 | particles->acc_x[i] += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 157 | particles->acc_y[i] += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 158 | particles->acc_z[i] += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 159 | } 160 | } 161 | energy = 0; 162 | 163 | for (i = 0; i < n; ++i)// update position 164 | { 165 | particles->vel_x[i] += particles->acc_x[i] * dt; //2flops 166 | particles->vel_y[i] += particles->acc_y[i] * dt; //2flops 167 | particles->vel_z[i] += particles->acc_z[i] * dt; //2flops 168 | 169 | particles->pos_x[i] += particles->vel_x[i] * dt; //2flops 170 | particles->pos_y[i] += particles->vel_y[i] * dt; //2flops 171 | particles->pos_z[i] += particles->vel_z[i] * dt; //2flops 172 | 173 | particles->acc_x[i] = 0.; 174 | particles->acc_y[i] = 0.; 175 | particles->acc_z[i] = 0.; 176 | 177 | energy += particles->mass[i] * ( 178 | particles->vel_x[i]*particles->vel_x[i] + 179 | particles->vel_y[i]*particles->vel_y[i] + 180 | particles->vel_z[i]*particles->vel_z[i]); //7flops 181 | } 182 | 183 | _kenergy = 0.5 * energy; 184 | 185 | ts1 += time.stop(); 186 | if(!(s%get_sfreq()) ) 187 | { 188 | nf += 1; 189 | std::cout << " " 190 | << std::left << std::setw(8) << s 191 | << std::left << std::setprecision(5) << std::setw(8) << s*get_tstep() 192 | << std::left << std::setprecision(5) << std::setw(12) << _kenergy 193 | << std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0) 194 | << std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0) 195 | << std::endl; 196 | if(nf > 2) 197 | { 198 | av += gflops*get_sfreq()/(ts1 - ts0); 199 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 200 | } 201 | 202 | ts0 = 0; 203 | ts1 = 0; 204 | } 205 | 206 | } //end of the time step loop 207 | 208 | const double t1 = time.stop(); 209 | _totTime = (t1-t0); 210 | _totFlops = gflops*get_nsteps(); 211 | 212 | av/=(double)(nf-2); 213 | dev=sqrt(dev/(double)(nf-2)-av*av); 214 | 215 | int nthreads=1; 216 | 217 | std::cout << std::endl; 218 | std::cout << "# Number Threads : " << nthreads << std::endl; 219 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 220 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 221 | std::cout << "===============================" << std::endl; 222 | 223 | } 224 | 225 | 226 | void GSimulation :: print_header() 227 | { 228 | 229 | std::cout << " nPart = " << get_npart() << "; " 230 | << "nSteps = " << get_nsteps() << "; " 231 | << "dt = " << get_tstep() << std::endl; 232 | 233 | std::cout << "------------------------------------------------" << std::endl; 234 | std::cout << " " 235 | << std::left << std::setw(8) << "s" 236 | << std::left << std::setw(8) << "dt" 237 | << std::left << std::setw(12) << "kenergy" 238 | << std::left << std::setw(12) << "time (s)" 239 | << std::left << std::setw(12) << "GFlops" 240 | << std::endl; 241 | std::cout << "------------------------------------------------" << std::endl; 242 | 243 | 244 | } 245 | 246 | GSimulation :: ~GSimulation() 247 | { 248 | delete [] particles->pos_x; 249 | delete [] particles->pos_y; 250 | delete [] particles->pos_z; 251 | delete [] particles->vel_x; 252 | delete [] particles->vel_y; 253 | delete [] particles->vel_z; 254 | delete [] particles->acc_x; 255 | delete [] particles->acc_y; 256 | delete [] particles->acc_z; 257 | delete [] particles->mass; 258 | delete particles; 259 | } 260 | -------------------------------------------------------------------------------- /ver3/GSimulation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | void GSimulation :: init_pos() 45 | { 46 | std::random_device rd; //random number generator 47 | std::mt19937 gen(42); 48 | std::uniform_real_distribution unif_d(0,1.0); 49 | 50 | for(int i=0; ipos_x[i] = unif_d(gen); 53 | particles->pos_y[i] = unif_d(gen); 54 | particles->pos_z[i] = unif_d(gen); 55 | } 56 | } 57 | 58 | void GSimulation :: init_vel() 59 | { 60 | std::random_device rd; //random number generator 61 | std::mt19937 gen(42); 62 | std::uniform_real_distribution unif_d(-1.0,1.0); 63 | 64 | for(int i=0; ivel_x[i] = unif_d(gen) * 1.0e-3f; 67 | particles->vel_y[i] = unif_d(gen) * 1.0e-3f; 68 | particles->vel_z[i] = unif_d(gen) * 1.0e-3f; 69 | } 70 | } 71 | 72 | void GSimulation :: init_acc() 73 | { 74 | for(int i=0; iacc_x[i] = 0.f; 77 | particles->acc_y[i] = 0.f; 78 | particles->acc_z[i] = 0.f; 79 | } 80 | } 81 | 82 | void GSimulation :: init_mass() 83 | { 84 | real_type n = static_cast (get_npart()); 85 | std::random_device rd; //random number generator 86 | std::mt19937 gen(42); 87 | std::uniform_real_distribution unif_d(0.0,1.0); 88 | 89 | for(int i=0; imass[i] = n * unif_d(gen); 92 | } 93 | } 94 | 95 | void GSimulation :: start() 96 | { 97 | real_type energy; 98 | real_type dt = get_tstep(); 99 | int n = get_npart(); 100 | int i,j; 101 | 102 | //allocate particles 103 | particles = new ParticleSoA; 104 | 105 | particles->pos_x = new real_type[n]; 106 | particles->pos_y = new real_type[n]; 107 | particles->pos_z = new real_type[n]; 108 | particles->vel_x = new real_type[n]; 109 | particles->vel_y = new real_type[n]; 110 | particles->vel_z = new real_type[n]; 111 | particles->acc_x = new real_type[n]; 112 | particles->acc_y = new real_type[n]; 113 | particles->acc_z = new real_type[n]; 114 | particles->mass = new real_type[n]; 115 | 116 | init_pos(); 117 | init_vel(); 118 | init_acc(); 119 | init_mass(); 120 | 121 | print_header(); 122 | 123 | _totTime = 0.; 124 | 125 | const float softeningSquared = 1e-3f; 126 | const float G = 6.67259e-11f; 127 | 128 | CPUTime time; 129 | double ts0 = 0; 130 | double ts1 = 0; 131 | double nd = double(n); 132 | double gflops = 1e-9 * ( (11. + 18. ) * nd*nd + nd * 19. ); 133 | double av=0.0, dev=0.0; 134 | int nf = 0; 135 | 136 | const double t0 = time.start(); 137 | for (int s=1; s<=get_nsteps(); ++s) 138 | { 139 | ts0 += time.start(); 140 | for (i = 0; i < n; i++)// update acceleration 141 | { 142 | #ifdef SIMD 143 | #pragma omp simd 144 | #endif 145 | for (j = 0; j < n; j++) 146 | { 147 | real_type dx, dy, dz; 148 | real_type distanceSqr = 0.0f; 149 | real_type distanceInv = 0.0f; 150 | 151 | dx = particles->pos_x[j] - particles->pos_x[i]; //1flop 152 | dy = particles->pos_y[j] - particles->pos_y[i]; //1flop 153 | dz = particles->pos_z[j] - particles->pos_z[i]; //1flop 154 | 155 | distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared; //6flops 156 | distanceInv = 1.0f / sqrtf(distanceSqr); //1div+1sqrt 157 | 158 | particles->acc_x[i] += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 159 | particles->acc_y[i] += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 160 | particles->acc_z[i] += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 161 | } 162 | } 163 | energy = 0; 164 | 165 | for (i = 0; i < n; ++i)// update position 166 | { 167 | particles->vel_x[i] += particles->acc_x[i] * dt; //2flops 168 | particles->vel_y[i] += particles->acc_y[i] * dt; //2flops 169 | particles->vel_z[i] += particles->acc_z[i] * dt; //2flops 170 | 171 | particles->pos_x[i] += particles->vel_x[i] * dt; //2flops 172 | particles->pos_y[i] += particles->vel_y[i] * dt; //2flops 173 | particles->pos_z[i] += particles->vel_z[i] * dt; //2flops 174 | 175 | particles->acc_x[i] = 0.; 176 | particles->acc_y[i] = 0.; 177 | particles->acc_z[i] = 0.; 178 | 179 | energy += particles->mass[i] * ( 180 | particles->vel_x[i]*particles->vel_x[i] + 181 | particles->vel_y[i]*particles->vel_y[i] + 182 | particles->vel_z[i]*particles->vel_z[i]); //7flops 183 | } 184 | 185 | _kenergy = 0.5 * energy; 186 | 187 | ts1 += time.stop(); 188 | if(!(s%get_sfreq()) ) 189 | { 190 | nf += 1; 191 | std::cout << " " 192 | << std::left << std::setw(8) << s 193 | << std::left << std::setprecision(5) << std::setw(8) << s*get_tstep() 194 | << std::left << std::setprecision(5) << std::setw(12) << _kenergy 195 | << std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0) 196 | << std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0) 197 | << std::endl; 198 | if(nf > 2) 199 | { 200 | av += gflops*get_sfreq()/(ts1 - ts0); 201 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 202 | } 203 | 204 | ts0 = 0; 205 | ts1 = 0; 206 | } 207 | 208 | } //end of the time step loop 209 | 210 | const double t1 = time.stop(); 211 | _totTime = (t1-t0); 212 | _totFlops = gflops*get_nsteps(); 213 | 214 | av/=(double)(nf-2); 215 | dev=sqrt(dev/(double)(nf-2)-av*av); 216 | 217 | int nthreads=1; 218 | 219 | std::cout << std::endl; 220 | std::cout << "# Number Threads : " << nthreads << std::endl; 221 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 222 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 223 | std::cout << "===============================" << std::endl; 224 | 225 | } 226 | 227 | 228 | void GSimulation :: print_header() 229 | { 230 | 231 | std::cout << " nPart = " << get_npart() << "; " 232 | << "nSteps = " << get_nsteps() << "; " 233 | << "dt = " << get_tstep() << std::endl; 234 | 235 | std::cout << "------------------------------------------------" << std::endl; 236 | std::cout << " " 237 | << std::left << std::setw(8) << "s" 238 | << std::left << std::setw(8) << "dt" 239 | << std::left << std::setw(12) << "kenergy" 240 | << std::left << std::setw(12) << "time (s)" 241 | << std::left << std::setw(12) << "GFlops" 242 | << std::endl; 243 | std::cout << "------------------------------------------------" << std::endl; 244 | 245 | 246 | } 247 | 248 | GSimulation :: ~GSimulation() 249 | { 250 | delete [] particles->pos_x; 251 | delete [] particles->pos_y; 252 | delete [] particles->pos_z; 253 | delete [] particles->vel_x; 254 | delete [] particles->vel_y; 255 | delete [] particles->vel_z; 256 | delete [] particles->acc_x; 257 | delete [] particles->acc_y; 258 | delete [] particles->acc_z; 259 | delete [] particles->mass; 260 | delete particles; 261 | } 262 | -------------------------------------------------------------------------------- /ver4/GSimulation-moveout.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | void GSimulation :: init_pos() 45 | { 46 | std::random_device rd; //random number generator 47 | std::mt19937 gen(42); 48 | std::uniform_real_distribution unif_d(0,1.0); 49 | 50 | for(int i=0; ipos_x[i] = unif_d(gen); 53 | particles->pos_y[i] = unif_d(gen); 54 | particles->pos_z[i] = unif_d(gen); 55 | } 56 | } 57 | 58 | void GSimulation :: init_vel() 59 | { 60 | std::random_device rd; //random number generator 61 | std::mt19937 gen(42); 62 | std::uniform_real_distribution unif_d(-1.0,1.0); 63 | 64 | for(int i=0; ivel_x[i] = unif_d(gen) * 1.0e-3f; 67 | particles->vel_y[i] = unif_d(gen) * 1.0e-3f; 68 | particles->vel_z[i] = unif_d(gen) * 1.0e-3f; 69 | } 70 | } 71 | 72 | void GSimulation :: init_acc() 73 | { 74 | for(int i=0; iacc_x[i] = 0.f; 77 | particles->acc_y[i] = 0.f; 78 | particles->acc_z[i] = 0.f; 79 | } 80 | } 81 | 82 | void GSimulation :: init_mass() 83 | { 84 | real_type n = static_cast (get_npart()); 85 | std::random_device rd; //random number generator 86 | std::mt19937 gen(42); 87 | std::uniform_real_distribution unif_d(0.0,1.0); 88 | 89 | for(int i=0; imass[i] = n * unif_d(gen); 92 | } 93 | } 94 | 95 | void GSimulation :: start() 96 | { 97 | real_type energy; 98 | real_type dt = get_tstep(); 99 | int n = get_npart(); 100 | int i,j; 101 | 102 | //allocate particles 103 | particles = new ParticleSoA; 104 | 105 | particles->pos_x = new real_type[n]; 106 | particles->pos_y = new real_type[n]; 107 | particles->pos_z = new real_type[n]; 108 | particles->vel_x = new real_type[n]; 109 | particles->vel_y = new real_type[n]; 110 | particles->vel_z = new real_type[n]; 111 | particles->acc_x = new real_type[n]; 112 | particles->acc_y = new real_type[n]; 113 | particles->acc_z = new real_type[n]; 114 | particles->mass = new real_type[n]; 115 | 116 | init_pos(); 117 | init_vel(); 118 | init_acc(); 119 | init_mass(); 120 | 121 | print_header(); 122 | 123 | _totTime = 0.; 124 | 125 | const float softeningSquared = 1.e-3f; 126 | const float G = 6.67259e-11f; 127 | 128 | CPUTime time; 129 | double ts0 = 0; 130 | double ts1 = 0; 131 | double nd = double(n); 132 | double gflops = 1e-9 * ( (11. + 18. ) * nd*nd + nd * 19. ); 133 | double av=0.0, dev=0.0; 134 | int nf = 0; 135 | 136 | const double t0 = time.start(); 137 | for (int s=1; s<=get_nsteps(); ++s) 138 | { 139 | ts0 += time.start(); 140 | for (i = 0; i < n; i++)// update acceleration 141 | { 142 | real_type ax_i = particles->acc_x[i]; 143 | real_type ay_i = particles->acc_y[i]; 144 | real_type az_i = particles->acc_z[i]; 145 | for (j = 0; j < n; j++) 146 | { 147 | real_type dx, dy, dz; 148 | real_type distanceSqr = 0.0f; 149 | real_type distanceInv = 0.0f; 150 | 151 | dx = particles->pos_x[j] - particles->pos_x[i]; //1flop 152 | dy = particles->pos_y[j] - particles->pos_y[i]; //1flop 153 | dz = particles->pos_z[j] - particles->pos_z[i]; //1flop 154 | 155 | distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared; //6flops 156 | distanceInv = 1.0f / sqrtf(distanceSqr); //1div+1sqrt 157 | 158 | ax_i += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 159 | ay_i += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 160 | az_i += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 161 | } 162 | particles->acc_x[i] = ax_i; 163 | particles->acc_y[i] = ay_i; 164 | particles->acc_z[i] = az_i; 165 | } 166 | energy = 0; 167 | 168 | for (i = 0; i < n; ++i)// update position 169 | { 170 | particles->vel_x[i] += particles->acc_x[i] * dt; //2flops 171 | particles->vel_y[i] += particles->acc_y[i] * dt; //2flops 172 | particles->vel_z[i] += particles->acc_z[i] * dt; //2flops 173 | 174 | particles->pos_x[i] += particles->vel_x[i] * dt; //2flops 175 | particles->pos_y[i] += particles->vel_y[i] * dt; //2flops 176 | particles->pos_z[i] += particles->vel_z[i] * dt; //2flops 177 | 178 | particles->acc_x[i] = 0.; 179 | particles->acc_y[i] = 0.; 180 | particles->acc_z[i] = 0.; 181 | 182 | energy += particles->mass[i] * ( 183 | particles->vel_x[i]*particles->vel_x[i] + 184 | particles->vel_y[i]*particles->vel_y[i] + 185 | particles->vel_z[i]*particles->vel_z[i]); //7flops 186 | } 187 | 188 | _kenergy = 0.5 * energy; 189 | 190 | ts1 += time.stop(); 191 | if(!(s%get_sfreq()) ) 192 | { 193 | nf += 1; 194 | std::cout << " " 195 | << std::left << std::setw(8) << s 196 | << std::left << std::setprecision(5) << std::setw(8) << s*get_tstep() 197 | << std::left << std::setprecision(5) << std::setw(12) << _kenergy 198 | << std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0) 199 | << std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0) 200 | << std::endl; 201 | if(nf > 2) 202 | { 203 | av += gflops*get_sfreq()/(ts1 - ts0); 204 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 205 | } 206 | 207 | ts0 = 0; 208 | ts1 = 0; 209 | } 210 | 211 | } //end of the time step loop 212 | 213 | const double t1 = time.stop(); 214 | _totTime = (t1-t0); 215 | _totFlops = gflops*get_nsteps(); 216 | 217 | av/=(double)(nf-2); 218 | dev=sqrt(dev/(double)(nf-2)-av*av); 219 | 220 | int nthreads=1; 221 | 222 | std::cout << std::endl; 223 | std::cout << "# Number Threads : " << nthreads << std::endl; 224 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 225 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 226 | std::cout << "===============================" << std::endl; 227 | 228 | } 229 | 230 | 231 | void GSimulation :: print_header() 232 | { 233 | 234 | std::cout << " nPart = " << get_npart() << "; " 235 | << "nSteps = " << get_nsteps() << "; " 236 | << "dt = " << get_tstep() << std::endl; 237 | 238 | std::cout << "------------------------------------------------" << std::endl; 239 | std::cout << " " 240 | << std::left << std::setw(8) << "s" 241 | << std::left << std::setw(8) << "dt" 242 | << std::left << std::setw(12) << "kenergy" 243 | << std::left << std::setw(12) << "time (s)" 244 | << std::left << std::setw(12) << "GFlops" 245 | << std::endl; 246 | std::cout << "------------------------------------------------" << std::endl; 247 | 248 | 249 | } 250 | 251 | GSimulation :: ~GSimulation() 252 | { 253 | delete [] particles->pos_x; 254 | delete [] particles->pos_y; 255 | delete [] particles->pos_z; 256 | delete [] particles->vel_x; 257 | delete [] particles->vel_y; 258 | delete [] particles->vel_z; 259 | delete [] particles->acc_x; 260 | delete [] particles->acc_y; 261 | delete [] particles->acc_z; 262 | delete [] particles->mass; 263 | delete particles; 264 | } 265 | -------------------------------------------------------------------------------- /ver3/GSimulation-moveout.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | void GSimulation :: init_pos() 45 | { 46 | std::random_device rd; //random number generator 47 | std::mt19937 gen(42); 48 | std::uniform_real_distribution unif_d(0,1.0); 49 | 50 | for(int i=0; ipos_x[i] = unif_d(gen); 53 | particles->pos_y[i] = unif_d(gen); 54 | particles->pos_z[i] = unif_d(gen); 55 | } 56 | } 57 | 58 | void GSimulation :: init_vel() 59 | { 60 | std::random_device rd; //random number generator 61 | std::mt19937 gen(42); 62 | std::uniform_real_distribution unif_d(-1.0,1.0); 63 | 64 | for(int i=0; ivel_x[i] = unif_d(gen) * 1.0e-3f; 67 | particles->vel_y[i] = unif_d(gen) * 1.0e-3f; 68 | particles->vel_z[i] = unif_d(gen) * 1.0e-3f; 69 | } 70 | } 71 | 72 | void GSimulation :: init_acc() 73 | { 74 | for(int i=0; iacc_x[i] = 0.f; 77 | particles->acc_y[i] = 0.f; 78 | particles->acc_z[i] = 0.f; 79 | } 80 | } 81 | 82 | void GSimulation :: init_mass() 83 | { 84 | real_type n = static_cast (get_npart()); 85 | std::random_device rd; //random number generator 86 | std::mt19937 gen(42); 87 | std::uniform_real_distribution unif_d(0.0,1.0); 88 | 89 | for(int i=0; imass[i] = n * unif_d(gen); 92 | } 93 | } 94 | 95 | void GSimulation :: start() 96 | { 97 | real_type energy; 98 | real_type dt = get_tstep(); 99 | int n = get_npart(); 100 | int i,j; 101 | 102 | //allocate particles 103 | particles = new ParticleSoA; 104 | 105 | particles->pos_x = new real_type[n]; 106 | particles->pos_y = new real_type[n]; 107 | particles->pos_z = new real_type[n]; 108 | particles->vel_x = new real_type[n]; 109 | particles->vel_y = new real_type[n]; 110 | particles->vel_z = new real_type[n]; 111 | particles->acc_x = new real_type[n]; 112 | particles->acc_y = new real_type[n]; 113 | particles->acc_z = new real_type[n]; 114 | particles->mass = new real_type[n]; 115 | 116 | init_pos(); 117 | init_vel(); 118 | init_acc(); 119 | init_mass(); 120 | 121 | print_header(); 122 | 123 | _totTime = 0.; 124 | 125 | const float softeningSquared = 1e-3f; 126 | const float G = 6.67259e-11f; 127 | 128 | CPUTime time; 129 | double ts0 = 0; 130 | double ts1 = 0; 131 | double nd = double(n); 132 | double gflops = 1e-9 * ( (11. + 18. ) * nd*nd + nd * 19. ); 133 | double av=0.0, dev=0.0; 134 | int nf = 0; 135 | 136 | const double t0 = time.start(); 137 | for (int s=1; s<=get_nsteps(); ++s) 138 | { 139 | ts0 += time.start(); 140 | for (i = 0; i < n; i++)// update acceleration 141 | { 142 | real_type ax_i = particles->acc_x[i]; 143 | real_type ay_i = particles->acc_y[i]; 144 | real_type az_i = particles->acc_z[i]; 145 | for (j = 0; j < n; j++) 146 | { 147 | real_type dx, dy, dz; 148 | real_type distanceSqr = 0.0f; 149 | real_type distanceInv = 0.0f; 150 | 151 | dx = particles->pos_x[j] - particles->pos_x[i]; //1flop 152 | dy = particles->pos_y[j] - particles->pos_y[i]; //1flop 153 | dz = particles->pos_z[j] - particles->pos_z[i]; //1flop 154 | 155 | distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared; //6flops 156 | distanceInv = 1.0f / sqrtf(distanceSqr); //1div+1sqrt 157 | 158 | ax_i += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 159 | ay_i += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 160 | az_i += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 161 | } 162 | particles->acc_x[i] = ax_i; 163 | particles->acc_y[i] = ay_i; 164 | particles->acc_z[i] = az_i; 165 | } 166 | energy = 0; 167 | 168 | for (i = 0; i < n; ++i)// update position 169 | { 170 | particles->vel_x[i] += particles->acc_x[i] * dt; //2flops 171 | particles->vel_y[i] += particles->acc_y[i] * dt; //2flops 172 | particles->vel_z[i] += particles->acc_z[i] * dt; //2flops 173 | 174 | particles->pos_x[i] += particles->vel_x[i] * dt; //2flops 175 | particles->pos_y[i] += particles->vel_y[i] * dt; //2flops 176 | particles->pos_z[i] += particles->vel_z[i] * dt; //2flops 177 | 178 | particles->acc_x[i] = 0.; 179 | particles->acc_y[i] = 0.; 180 | particles->acc_z[i] = 0.; 181 | 182 | energy += particles->mass[i] * ( 183 | particles->vel_x[i]*particles->vel_x[i] + 184 | particles->vel_y[i]*particles->vel_y[i] + 185 | particles->vel_z[i]*particles->vel_z[i]); //7flops 186 | } 187 | 188 | _kenergy = 0.5 * energy; 189 | 190 | ts1 += time.stop(); 191 | if(!(s%get_sfreq()) ) 192 | { 193 | nf += 1; 194 | std::cout << " " 195 | << std::left << std::setw(8) << s 196 | << std::left << std::setprecision(5) << std::setw(8) << s*get_tstep() 197 | << std::left << std::setprecision(5) << std::setw(12) << _kenergy 198 | << std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0) 199 | << std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0) 200 | << std::endl; 201 | if(nf > 2) 202 | { 203 | av += gflops*get_sfreq()/(ts1 - ts0); 204 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 205 | } 206 | 207 | ts0 = 0; 208 | ts1 = 0; 209 | } 210 | 211 | } //end of the time step loop 212 | 213 | const double t1 = time.stop(); 214 | _totTime = (t1-t0); 215 | _totFlops = gflops*get_nsteps(); 216 | 217 | av/=(double)(nf-2); 218 | dev=sqrt(dev/(double)(nf-2)-av*av); 219 | 220 | int nthreads=1; 221 | 222 | std::cout << std::endl; 223 | std::cout << "# Number Threads : " << nthreads << std::endl; 224 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 225 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 226 | std::cout << "===============================" << std::endl; 227 | 228 | } 229 | 230 | 231 | void GSimulation :: print_header() 232 | { 233 | 234 | std::cout << " nPart = " << get_npart() << "; " 235 | << "nSteps = " << get_nsteps() << "; " 236 | << "dt = " << get_tstep() << std::endl; 237 | 238 | std::cout << "------------------------------------------------" << std::endl; 239 | std::cout << " " 240 | << std::left << std::setw(8) << "s" 241 | << std::left << std::setw(8) << "dt" 242 | << std::left << std::setw(12) << "kenergy" 243 | << std::left << std::setw(12) << "time (s)" 244 | << std::left << std::setw(12) << "GFlops" 245 | << std::endl; 246 | std::cout << "------------------------------------------------" << std::endl; 247 | 248 | 249 | } 250 | 251 | GSimulation :: ~GSimulation() 252 | { 253 | delete [] particles->pos_x; 254 | delete [] particles->pos_y; 255 | delete [] particles->pos_z; 256 | delete [] particles->vel_x; 257 | delete [] particles->vel_y; 258 | delete [] particles->vel_z; 259 | delete [] particles->acc_x; 260 | delete [] particles->acc_y; 261 | delete [] particles->acc_z; 262 | delete [] particles->mass; 263 | delete particles; 264 | } 265 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Demo Session for Intel® Advisor and Intel® Compiler C++ 2 | This is an example code based on a simple N-body simulation of a distribution of point masses placed 3 | at location r_1,...,r_N and have masses m_1,...,m_N. The position of the particles after a specified 4 | time is computed using a finite difference methods for ordinary differential equation. 5 | 6 | ## Implementation 7 | For each particle the position, the velocity, the acceleration and the mass is stored in a C-like 8 | structure and for an N particles case, an array of this structure is allocated. This is the 9 | simple data-structure which is very close to the physical representation of a particle mass. 10 | The file `Particle.hpp` contains the implementation of such data-structure. 11 | 12 | For each particle indexed by i, the accelearation is computed a_i = G*mj*(ri-rj)/|ri-rj|^3, which 13 | value is used to update the velocity and position using the Euler integration scheme. 14 | Furthermore the total energy of the particles' group is computed. 15 | The file `GSimulation.cpp` contains the implementation of the algorithm. 16 | 17 | ## Directory structure of the Demo 18 | The demo consists of several directories, which correspond to the different 19 | optimization steps to take to enabling vectorization and OpenMP multi-threding of the code. 20 | Each directory has its onw makefile to compile and run the test case. 21 | To compiler the code type `make` and the run the simulation type `make run`. 22 | As benchmark, the simulation starts with 2000 particles and 500 integration steps. One can 23 | change the default giving the number of particles and the number of integration steps using 24 | the command line argument: 25 | `./nbody.x < # of particles> < # of integration>` 26 | 27 | Try to change the number of particles and observe how the performance changes. 28 | 29 | ## Different versions 30 | To start the demo, go to the folder `ver0`, compile and run the test. 31 | 32 | ### Intial version: ver0 33 | The typical output of the simulation is: 34 | ``` 35 | Run the default test case on CPU: 36 | ./nbody.x 37 | =============================== 38 | Initialize Gravity Simulation 39 | nPart = 16000; nSteps = 10; dt = 0.1 40 | ------------------------------------------------ 41 | s dt kenergy time (s) GFlops 42 | ------------------------------------------------ 43 | 1 0.1 26.405 1.7966 4.1324 44 | 2 0.2 313.77 1.5309 4.8498 45 | 3 0.3 926.56 1.5311 4.8489 46 | 4 0.4 1866.4 1.5313 4.8484 47 | 5 0.5 3135.6 1.5315 4.8479 48 | 6 0.6 4737.6 1.5309 4.8497 49 | 7 0.7 6676.6 1.5312 4.8487 50 | 8 0.8 8957.7 1.5311 4.849 51 | 9 0.9 11587 1.5314 4.848 52 | 10 1 14572 1.5309 4.8495 53 | 54 | # Number Threads : 1 55 | # Total Time (s) : 15.577 56 | # Average Perfomance : 4.8488 +- 0.00062286 57 | =============================== 58 | 59 | ``` 60 | 61 | On output is printed some useful information. Colomnwise: s is the 62 | number of steps; dt is the physical time taking into account the physical 63 | time integration step; kenery is the kinetic energy of the group of particles; 64 | time is the computational time taken till that time step; GFlops is the 65 | number of giga flops per second. 66 | N.B. The GFlops is an estimation done by looking into the code and counting 67 | the number of math operations according to the algorithm. This is used only 68 | as standard metric for comparison. More realistic numbers can be measured 69 | in different way (Roofline model of Intel® Advisor). 70 | 71 | Following the five steps of code modernization, 72 | https://software.intel.com/en-us/articles/what-is-code-modernization 73 | we can improve the performance of the code. 74 | 75 | - describe the Intel® Advisor result 76 | - compile the code with processor specific optimization: -xSSE4.2, -xAVX, -xCORE-AVX2, -xCORE-AVX512, -xMIC-AVX512 77 | - generate the compiler report and describe the different options: 78 | - -qopt-report[=N]: default level is 2 79 | - -qopt-report-phase=: default is all 80 | - -qopt-report-file=stdout | stderr | filename 81 | - -qopt-report-filter="GSimulation.cpp,130-204" 82 | 83 | Then show how verbose is the compiler report and use filtering. 84 | 85 | ### ver1 86 | Solution of the ver0. The optimization are: -O2 -xAVX or higher. 87 | The Makefile is the only difference. Here we generate higher vectorized code and 88 | produce the compiler report. 89 | One should run this version in the same way as before and: 90 | - show the new performance numbers 91 | - describe the Intel® Advisor result 92 | - generate the compiler report 93 | - explain FP conversions and precision of constants, variables and math functions 94 | 95 | ### ver2 96 | Solution of the ver1. The difference is in the GSimulation.cpp file where the consistent 97 | computation with floats is made (constants and SQRT function). 98 | One should run this version in the same way as before and: 99 | - show the new performance numbers 100 | - describe the Intel® Advisor result 101 | - generate the compiler report 102 | - explain the remark #25085: Preprocess Loopnests: Moving Out Load and Store and 103 | remark #15415: vectorization support: non-unit strided load was generated for the variable 104 | .... 105 | remark #15300: LOOP WAS VECTORIZED 106 | remark #15452: unmasked strided loads: 6 107 | remark #15475: --- begin vector cost summary --- 108 | remark #15476: scalar cost: 115 109 | remark #15477: vector cost: 26.750 110 | remark #15478: estimated potential speedup: 4.070 111 | remark #15488: --- end vector cost summary --- 112 | .... 113 | - explain vectorization gather/scatter 114 | - explain AoS and SoA differences 115 | 116 | ### ver3 117 | Solution of the ver2. The differences are in: 118 | - Particle.hpp: the new SoA data structure is implemented 119 | - GSimulation.hpp: modified the data member according to SoA 120 | - GSimulation.cpp: allocation and reference to SoA 121 | 122 | One should run this version in the same way as before and: 123 | - show the new performance numbers 124 | - describe the Intel® Advisor result 125 | - generate the compiler report 126 | - explain the remark #15344: loop was not vectorized: vector dependence prevents vectorization 127 | remark #15346: vector dependence: assumed ANTI dependence between ... and ... 128 | remark #15346: vector dependence: assumed FLOW dependence between ... and ... 129 | - explain the vectorization and how much we gain using it 130 | - refer to the Intel® compiler autovectorization guide and explain the requirements 131 | for autovectorization 132 | - explain #pragma simd 133 | - explain #pragma simd reduction 134 | - modify in the `Makefile` the CXXFLAGS adding the OMPFLAGS at line 8, recompile and run 135 | remark #15301: OpenMP SIMD LOOP WAS VECTORIZED 136 | - at this point running the code shows wrong results (Warning with SIMD, be aware of the full control) 137 | - try to use #pragma simd reduction (solution in the file GSimulation-simd.cpp) 138 | NB rember that the simd reduction is not allowed on `particles->acc_x[i]` 139 | Solution: 140 | - cp GSimulation.cpp GSimulation.cpp.bkp 141 | - cp GSimulation-simd.cpp GSimulation.cpp 142 | recompile and run 143 | remark #15301: OpenMP SIMD LOOP WAS VECTORIZED 144 | - rerun and show that the result is now correct 145 | 146 | ### ver4 147 | This is the clean solution of the ver3 after all modification done live in the 148 | previous session. 149 | One should run this version in the same way as before and: 150 | - show the new performance numbers 151 | - describe the Intel® Advisor result 152 | - generate the compiler report 153 | - explain remark #15389: vectorization support: reference ... has `unaligned` access 154 | - explain the data alignment with examples and the alignment size (16/32/64 bytes) 155 | - exlpain peel and reminder loops 156 | 157 | ## ver5 158 | This is the solution of the ver4, with all the allocations replaced by the memory 159 | alignment allocation function. 160 | Running this version allows to see that even modifing the memory allocation functions, 161 | the data is not aligned. One needs to use the function `__assume_aligned(...)`. 162 | Recompile the code adding the option: -DASALIGN. 163 | One should run again this version with the alignment option and: 164 | - show the new performance numbers 165 | - describe the Intel® Advisor result 166 | - generate the compiler report 167 | 168 | This concludes the basic vectorization part of the demo. 169 | At this point, only two topics are missing: 170 | - advanced cache optimization (loop-tiling) (ver6) 171 | - enabling OpenMP (ver7) 172 | 173 | ### ver6 174 | This is the cache optimized version of the code, without OpenMP. 175 | The performance depends on the size of the tile and the number or particles. 176 | One should run again this version and: 177 | - describe in detail what is this kind of optimization and how depends on the tile size 178 | - show the new performance numbers 179 | - describe the Intel® Advisor result 180 | - generate the compiler report 181 | 182 | ### ver7 183 | This is the version of the code with OpenMP. Play with the number of threads, 184 | openmp scheduling and threads affinity. 185 | 186 | ### ver8 187 | This is the version of the code with OpenMP and cache tiling. 188 | One can also play with the floating point model -fp-model fast=2, for example and 189 | look for further performance improvements. 190 | -------------------------------------------------------------------------------- /ver5/GSimulation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | void GSimulation :: init_pos() 45 | { 46 | std::random_device rd; //random number generator 47 | std::mt19937 gen(42); 48 | std::uniform_real_distribution unif_d(0,1.0); 49 | 50 | for(int i=0; ipos_x[i] = unif_d(gen); 53 | particles->pos_y[i] = unif_d(gen); 54 | particles->pos_z[i] = unif_d(gen); 55 | } 56 | } 57 | 58 | void GSimulation :: init_vel() 59 | { 60 | std::random_device rd; //random number generator 61 | std::mt19937 gen(42); 62 | std::uniform_real_distribution unif_d(-1.0,1.0); 63 | 64 | for(int i=0; ivel_x[i] = unif_d(gen) * 1.0e-3f; 67 | particles->vel_y[i] = unif_d(gen) * 1.0e-3f; 68 | particles->vel_z[i] = unif_d(gen) * 1.0e-3f; 69 | } 70 | } 71 | 72 | void GSimulation :: init_acc() 73 | { 74 | for(int i=0; iacc_x[i] = 0.f; 77 | particles->acc_y[i] = 0.f; 78 | particles->acc_z[i] = 0.f; 79 | } 80 | } 81 | 82 | void GSimulation :: init_mass() 83 | { 84 | real_type n = static_cast (get_npart()); 85 | std::random_device rd; //random number generator 86 | std::mt19937 gen(42); 87 | std::uniform_real_distribution unif_d(0.0,1.0); 88 | 89 | for(int i=0; imass[i] = n * unif_d(gen); 92 | } 93 | } 94 | 95 | void GSimulation :: start() 96 | { 97 | real_type energy; 98 | real_type dt = get_tstep(); 99 | int n = get_npart(); 100 | int i,j; 101 | 102 | const int alignment = 64; 103 | particles = (ParticleSoA*) _mm_malloc(sizeof(ParticleSoA),alignment); 104 | 105 | particles->pos_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 106 | particles->pos_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 107 | particles->pos_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 108 | particles->vel_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 109 | particles->vel_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 110 | particles->vel_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 111 | particles->acc_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 112 | particles->acc_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 113 | particles->acc_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 114 | particles->mass = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 115 | 116 | init_pos(); 117 | init_vel(); 118 | init_acc(); 119 | init_mass(); 120 | 121 | print_header(); 122 | 123 | _totTime = 0.; 124 | 125 | const float softeningSquared = 1.e-3f; 126 | const float G = 6.67259e-11f; 127 | 128 | CPUTime time; 129 | double ts0 = 0; 130 | double ts1 = 0; 131 | double nd = double(n); 132 | double gflops = 1e-9 * ( (11. + 18. ) * nd*nd + nd * 19. ); 133 | double av=0.0, dev=0.0; 134 | int nf = 0; 135 | 136 | const double t0 = time.start(); 137 | for (int s=1; s<=get_nsteps(); ++s) 138 | { 139 | ts0 += time.start(); 140 | for (i = 0; i < n; i++)// update acceleration 141 | { 142 | __assume_aligned(particles->pos_x, alignment); 143 | __assume_aligned(particles->pos_y, alignment); 144 | __assume_aligned(particles->pos_z, alignment); 145 | __assume_aligned(particles->acc_x, alignment); 146 | __assume_aligned(particles->acc_y, alignment); 147 | __assume_aligned(particles->acc_z, alignment); 148 | __assume_aligned(particles->mass, alignment); 149 | 150 | real_type ax_i = particles->acc_x[i]; 151 | real_type ay_i = particles->acc_y[i]; 152 | real_type az_i = particles->acc_z[i]; 153 | for (j = 0; j < n; j++) 154 | { 155 | real_type dx, dy, dz; 156 | real_type distanceSqr = 0.0f; 157 | real_type distanceInv = 0.0f; 158 | 159 | dx = particles->pos_x[j] - particles->pos_x[i]; //1flop 160 | dy = particles->pos_y[j] - particles->pos_y[i]; //1flop 161 | dz = particles->pos_z[j] - particles->pos_z[i]; //1flop 162 | 163 | distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared; //6flops 164 | distanceInv = 1.0f / sqrtf(distanceSqr); //1div+1sqrt 165 | 166 | ax_i += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 167 | ay_i += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 168 | az_i += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 169 | } 170 | particles->acc_x[i] = ax_i; 171 | particles->acc_y[i] = ay_i; 172 | particles->acc_z[i] = az_i; 173 | } 174 | energy = 0; 175 | 176 | for (i = 0; i < n; ++i)// update position 177 | { 178 | particles->vel_x[i] += particles->acc_x[i] * dt; //2flops 179 | particles->vel_y[i] += particles->acc_y[i] * dt; //2flops 180 | particles->vel_z[i] += particles->acc_z[i] * dt; //2flops 181 | 182 | particles->pos_x[i] += particles->vel_x[i] * dt; //2flops 183 | particles->pos_y[i] += particles->vel_y[i] * dt; //2flops 184 | particles->pos_z[i] += particles->vel_z[i] * dt; //2flops 185 | 186 | particles->acc_x[i] = 0.; 187 | particles->acc_y[i] = 0.; 188 | particles->acc_z[i] = 0.; 189 | 190 | energy += particles->mass[i] * ( 191 | particles->vel_x[i]*particles->vel_x[i] + 192 | particles->vel_y[i]*particles->vel_y[i] + 193 | particles->vel_z[i]*particles->vel_z[i]); //7flops 194 | } 195 | 196 | _kenergy = 0.5 * energy; 197 | 198 | ts1 += time.stop(); 199 | if(!(s%get_sfreq()) ) 200 | { 201 | nf += 1; 202 | std::cout << " " 203 | << std::left << std::setw(8) << s 204 | << std::left << std::setprecision(5) << std::setw(8) << s*get_tstep() 205 | << std::left << std::setprecision(5) << std::setw(12) << _kenergy 206 | << std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0) 207 | << std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0) 208 | << std::endl; 209 | if(nf > 2) 210 | { 211 | av += gflops*get_sfreq()/(ts1 - ts0); 212 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 213 | } 214 | 215 | ts0 = 0; 216 | ts1 = 0; 217 | } 218 | 219 | } //end of the time step loop 220 | 221 | const double t1 = time.stop(); 222 | _totTime = (t1-t0); 223 | _totFlops = gflops*get_nsteps(); 224 | 225 | av/=(double)(nf-2); 226 | dev=sqrt(dev/(double)(nf-2)-av*av); 227 | 228 | int nthreads=1; 229 | 230 | std::cout << std::endl; 231 | std::cout << "# Number Threads : " << nthreads << std::endl; 232 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 233 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 234 | std::cout << "===============================" << std::endl; 235 | 236 | } 237 | 238 | 239 | void GSimulation :: print_header() 240 | { 241 | 242 | std::cout << " nPart = " << get_npart() << "; " 243 | << "nSteps = " << get_nsteps() << "; " 244 | << "dt = " << get_tstep() << std::endl; 245 | 246 | std::cout << "------------------------------------------------" << std::endl; 247 | std::cout << " " 248 | << std::left << std::setw(8) << "s" 249 | << std::left << std::setw(8) << "dt" 250 | << std::left << std::setw(12) << "kenergy" 251 | << std::left << std::setw(12) << "time (s)" 252 | << std::left << std::setw(12) << "GFlops" 253 | << std::endl; 254 | std::cout << "------------------------------------------------" << std::endl; 255 | 256 | 257 | } 258 | 259 | GSimulation :: ~GSimulation() 260 | { 261 | _mm_free(particles->pos_x); 262 | _mm_free(particles->pos_y); 263 | _mm_free(particles->pos_z); 264 | _mm_free(particles->vel_x); 265 | _mm_free(particles->vel_y); 266 | _mm_free(particles->vel_z); 267 | _mm_free(particles->acc_x); 268 | _mm_free(particles->acc_y); 269 | _mm_free(particles->acc_z); 270 | _mm_free(particles->mass); 271 | _mm_free(particles); 272 | 273 | } 274 | -------------------------------------------------------------------------------- /ver7/GSimulation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | 45 | void GSimulation :: init_pos() 46 | { 47 | std::random_device rd; //random number generator 48 | std::mt19937 gen(42); 49 | std::uniform_real_distribution unif_d(0,1.0); 50 | 51 | for(int i=0; ipos_x[i] = unif_d(gen); 54 | particles->pos_y[i] = unif_d(gen); 55 | particles->pos_z[i] = unif_d(gen); 56 | } 57 | } 58 | 59 | void GSimulation :: init_vel() 60 | { 61 | std::random_device rd; //random number generator 62 | std::mt19937 gen(42); 63 | std::uniform_real_distribution unif_d(-1.0,1.0); 64 | 65 | for(int i=0; ivel_x[i] = unif_d(gen) * 1.0e-3f; 68 | particles->vel_y[i] = unif_d(gen) * 1.0e-3f; 69 | particles->vel_z[i] = unif_d(gen) * 1.0e-3f; 70 | } 71 | } 72 | 73 | void GSimulation :: init_acc() 74 | { 75 | for(int i=0; iacc_x[i] = 0.f; 78 | particles->acc_y[i] = 0.f; 79 | particles->acc_z[i] = 0.f; 80 | } 81 | } 82 | 83 | void GSimulation :: init_mass() 84 | { 85 | real_type n = static_cast (get_npart()); 86 | std::random_device rd; //random number generator 87 | std::mt19937 gen(42); 88 | std::uniform_real_distribution unif_d(0.0,1.0); 89 | 90 | for(int i=0; imass[i] = n * unif_d(gen); 93 | } 94 | } 95 | 96 | void GSimulation :: start() 97 | { 98 | real_type energy; 99 | real_type dt = get_tstep(); 100 | int n = get_npart(); 101 | int i,j; 102 | 103 | const int alignment = 64; 104 | particles = (ParticleSoA*) _mm_malloc(sizeof(ParticleSoA),alignment); 105 | 106 | particles->pos_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 107 | particles->pos_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 108 | particles->pos_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 109 | particles->vel_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 110 | particles->vel_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 111 | particles->vel_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 112 | particles->acc_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 113 | particles->acc_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 114 | particles->acc_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 115 | particles->mass = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 116 | 117 | init_pos(); 118 | init_vel(); 119 | init_acc(); 120 | init_mass(); 121 | 122 | print_header(); 123 | 124 | _totTime = 0.; 125 | 126 | const float softeningSquared = 1.e-3f; 127 | const float G = 6.67259e-11f; 128 | 129 | CPUTime time; 130 | double ts0 = 0; 131 | double ts1 = 0; 132 | double nd = double(n); 133 | double gflops = 1e-9 * ( (11. + 18. ) * nd*nd + nd * 19. ); 134 | double av=0.0, dev=0.0; 135 | int nf = 0; 136 | 137 | const double t0 = time.start(); 138 | for (int s=1; s<=get_nsteps(); ++s) 139 | { 140 | ts0 += time.start(); 141 | #pragma omp parallel for 142 | for (i = 0; i < n; i++)// update acceleration 143 | { 144 | __assume_aligned(particles->pos_x, alignment); 145 | __assume_aligned(particles->pos_y, alignment); 146 | __assume_aligned(particles->pos_z, alignment); 147 | __assume_aligned(particles->acc_x, alignment); 148 | __assume_aligned(particles->acc_y, alignment); 149 | __assume_aligned(particles->acc_z, alignment); 150 | __assume_aligned(particles->mass, alignment); 151 | 152 | real_type ax_i = particles->acc_x[i]; 153 | real_type ay_i = particles->acc_y[i]; 154 | real_type az_i = particles->acc_z[i]; 155 | 156 | for (j = 0; j < n; j++) 157 | { 158 | real_type dx, dy, dz; 159 | real_type distanceSqr = 0.0f; 160 | real_type distanceInv = 0.0f; 161 | 162 | dx = particles->pos_x[j] - particles->pos_x[i]; //1flop 163 | dy = particles->pos_y[j] - particles->pos_y[i]; //1flop 164 | dz = particles->pos_z[j] - particles->pos_z[i]; //1flop 165 | 166 | distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared; //6flops 167 | distanceInv = 1.0f / sqrtf(distanceSqr); //1div+1sqrt 168 | 169 | ax_i += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 170 | ay_i += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 171 | az_i += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 172 | } 173 | particles->acc_x[i] = ax_i; 174 | particles->acc_y[i] = ay_i; 175 | particles->acc_z[i] = az_i; 176 | } 177 | energy = 0; 178 | #pragma omp parallel for reduction(+:energy) 179 | for (i = 0; i < n; ++i)// update position 180 | { 181 | particles->vel_x[i] += particles->acc_x[i] * dt; //2flops 182 | particles->vel_y[i] += particles->acc_y[i] * dt; //2flops 183 | particles->vel_z[i] += particles->acc_z[i] * dt; //2flops 184 | 185 | particles->pos_x[i] += particles->vel_x[i] * dt; //2flops 186 | particles->pos_y[i] += particles->vel_y[i] * dt; //2flops 187 | particles->pos_z[i] += particles->vel_z[i] * dt; //2flops 188 | 189 | particles->acc_x[i] = 0.; 190 | particles->acc_y[i] = 0.; 191 | particles->acc_z[i] = 0.; 192 | 193 | energy += particles->mass[i] * ( 194 | particles->vel_x[i]*particles->vel_x[i] + 195 | particles->vel_y[i]*particles->vel_y[i] + 196 | particles->vel_z[i]*particles->vel_z[i]); //7flops 197 | } 198 | 199 | _kenergy = 0.5 * energy; 200 | 201 | ts1 += time.stop(); 202 | if(!(s%get_sfreq()) ) 203 | { 204 | nf += 1; 205 | std::cout << " " 206 | << std::left << std::setw(8) << s 207 | << std::left << std::setprecision(5) << std::setw(8) << s*get_tstep() 208 | << std::left << std::setprecision(5) << std::setw(12) << _kenergy 209 | << std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0) 210 | << std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0) 211 | << std::endl; 212 | if(nf > 2) 213 | { 214 | av += gflops*get_sfreq()/(ts1 - ts0); 215 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 216 | } 217 | 218 | ts0 = 0; 219 | ts1 = 0; 220 | } 221 | 222 | } //end of the time step loop 223 | 224 | const double t1 = time.stop(); 225 | _totTime = (t1-t0); 226 | _totFlops = gflops*get_nsteps(); 227 | 228 | av/=(double)(nf-2); 229 | dev=sqrt(dev/(double)(nf-2)-av*av); 230 | 231 | int nthreads=1; 232 | #pragma omp parallel 233 | nthreads=omp_get_num_threads(); 234 | 235 | std::cout << std::endl; 236 | std::cout << "# Number Threads : " << nthreads << std::endl; 237 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 238 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 239 | std::cout << "===============================" << std::endl; 240 | 241 | } 242 | 243 | 244 | void GSimulation :: print_header() 245 | { 246 | 247 | std::cout << " nPart = " << get_npart() << "; " 248 | << "nSteps = " << get_nsteps() << "; " 249 | << "dt = " << get_tstep() << std::endl; 250 | 251 | std::cout << "------------------------------------------------" << std::endl; 252 | std::cout << " " 253 | << std::left << std::setw(8) << "s" 254 | << std::left << std::setw(8) << "dt" 255 | << std::left << std::setw(12) << "kenergy" 256 | << std::left << std::setw(12) << "time (s)" 257 | << std::left << std::setw(12) << "GFlops" 258 | << std::endl; 259 | std::cout << "------------------------------------------------" << std::endl; 260 | 261 | 262 | } 263 | 264 | GSimulation :: ~GSimulation() 265 | { 266 | _mm_free(particles->pos_x); 267 | _mm_free(particles->pos_y); 268 | _mm_free(particles->pos_z); 269 | _mm_free(particles->vel_x); 270 | _mm_free(particles->vel_y); 271 | _mm_free(particles->vel_z); 272 | _mm_free(particles->acc_x); 273 | _mm_free(particles->acc_y); 274 | _mm_free(particles->acc_z); 275 | _mm_free(particles->mass); 276 | _mm_free(particles); 277 | 278 | } 279 | -------------------------------------------------------------------------------- /ver6/GSimulation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | void GSimulation :: init_pos() 45 | { 46 | std::random_device rd; //random number generator 47 | std::mt19937 gen(42); 48 | std::uniform_real_distribution unif_d(0,1.0); 49 | 50 | for(int i=0; ipos_x[i] = unif_d(gen); 53 | particles->pos_y[i] = unif_d(gen); 54 | particles->pos_z[i] = unif_d(gen); 55 | } 56 | } 57 | 58 | void GSimulation :: init_vel() 59 | { 60 | std::random_device rd; //random number generator 61 | std::mt19937 gen(42); 62 | std::uniform_real_distribution unif_d(-1.0,1.0); 63 | 64 | for(int i=0; ivel_x[i] = unif_d(gen) * 1.0e-3f; 67 | particles->vel_y[i] = unif_d(gen) * 1.0e-3f; 68 | particles->vel_z[i] = unif_d(gen) * 1.0e-3f; 69 | } 70 | } 71 | 72 | void GSimulation :: init_acc() 73 | { 74 | for(int i=0; iacc_x[i] = 0.f; 77 | particles->acc_y[i] = 0.f; 78 | particles->acc_z[i] = 0.f; 79 | } 80 | } 81 | 82 | void GSimulation :: init_mass() 83 | { 84 | real_type n = static_cast (get_npart()); 85 | std::random_device rd; //random number generator 86 | std::mt19937 gen(42); 87 | std::uniform_real_distribution unif_d(0.0,1.0); 88 | 89 | for(int i=0; imass[i] = n * unif_d(gen); 92 | } 93 | } 94 | 95 | void GSimulation :: start() 96 | { 97 | real_type energy; 98 | real_type dt = get_tstep(); 99 | int n = get_npart(); 100 | int i,j; 101 | 102 | const int alignment = 64; 103 | particles = (ParticleSoA*) _mm_malloc(sizeof(ParticleSoA),alignment); 104 | 105 | particles->pos_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 106 | particles->pos_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 107 | particles->pos_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 108 | particles->vel_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 109 | particles->vel_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 110 | particles->vel_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 111 | particles->acc_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 112 | particles->acc_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 113 | particles->acc_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 114 | particles->mass = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 115 | 116 | init_pos(); 117 | init_vel(); 118 | init_acc(); 119 | init_mass(); 120 | 121 | print_header(); 122 | 123 | _totTime = 0.; 124 | 125 | const float softeningSquared = 1.e-3f; 126 | const float G = 6.67259e-11f; 127 | 128 | CPUTime time; 129 | double ts0 = 0; 130 | double ts1 = 0; 131 | double nd = double(n); 132 | double gflops = 1e-9 * ( (11. + 18. ) * nd*nd + nd * 19. ); 133 | double av=0.0, dev=0.0; 134 | int nf = 0; 135 | 136 | const int tileSize = 8; 137 | 138 | const double t0 = time.start(); 139 | for (int s=1; s<=get_nsteps(); ++s) 140 | { 141 | ts0 += time.start(); 142 | for (int ii = 0; ii < n; ii += tileSize ) 143 | { 144 | real_type acc_xtile[tileSize]; 145 | real_type acc_ytile[tileSize] ; 146 | real_type acc_ztile[tileSize]; 147 | #pragma omp simd 148 | for(int s=0; spos_x, alignment); 155 | __assume_aligned(particles->pos_y, alignment); 156 | __assume_aligned(particles->pos_z, alignment); 157 | __assume_aligned(particles->acc_x, alignment); 158 | __assume_aligned(particles->acc_y, alignment); 159 | __assume_aligned(particles->acc_z, alignment); 160 | __assume_aligned(particles->mass, alignment); 161 | 162 | real_type ax_i = particles->acc_x[i]; 163 | real_type ay_i = particles->acc_y[i]; 164 | real_type az_i = particles->acc_z[i]; 165 | #pragma omp simd 166 | for (j = 0; j < n; j++) 167 | { 168 | for (int i = ii; i < ii + tileSize; i++) 169 | { 170 | real_type dx, dy, dz; 171 | real_type distanceSqr = 0.0f; 172 | real_type distanceInv = 0.0f; 173 | 174 | dx = particles->pos_x[j] - particles->pos_x[i]; //1flop 175 | dy = particles->pos_y[j] - particles->pos_y[i]; //1flop 176 | dz = particles->pos_z[j] - particles->pos_z[i]; //1flop 177 | 178 | distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared; //6flops 179 | distanceInv = 1.0f / sqrtf(distanceSqr); //1div+1sqrt 180 | 181 | acc_xtile[i-ii] += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 182 | acc_ytile[i-ii] += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 183 | acc_ztile[i-ii] += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 184 | } 185 | } 186 | #pragma omp simd 187 | for(int s=0; sacc_x[s+ii] = acc_xtile[s]; 190 | particles->acc_y[s+ii] = acc_ytile[s]; 191 | particles->acc_z[s+ii] = acc_ztile[s]; 192 | } 193 | } 194 | energy = 0; 195 | 196 | for (i = 0; i < n; ++i)// update position 197 | { 198 | particles->vel_x[i] += particles->acc_x[i] * dt; //2flops 199 | particles->vel_y[i] += particles->acc_y[i] * dt; //2flops 200 | particles->vel_z[i] += particles->acc_z[i] * dt; //2flops 201 | 202 | particles->pos_x[i] += particles->vel_x[i] * dt; //2flops 203 | particles->pos_y[i] += particles->vel_y[i] * dt; //2flops 204 | particles->pos_z[i] += particles->vel_z[i] * dt; //2flops 205 | 206 | particles->acc_x[i] = 0.; 207 | particles->acc_y[i] = 0.; 208 | particles->acc_z[i] = 0.; 209 | 210 | energy += particles->mass[i] * ( 211 | particles->vel_x[i]*particles->vel_x[i] + 212 | particles->vel_y[i]*particles->vel_y[i] + 213 | particles->vel_z[i]*particles->vel_z[i]); //7flops 214 | } 215 | 216 | _kenergy = 0.5 * energy; 217 | 218 | ts1 += time.stop(); 219 | if(!(s%get_sfreq()) ) 220 | { 221 | nf += 1; 222 | std::cout << " " 223 | << std::left << std::setw(8) << s 224 | << std::left << std::setprecision(5) << std::setw(8) << s*get_tstep() 225 | << std::left << std::setprecision(5) << std::setw(12) << _kenergy 226 | << std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0) 227 | << std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0) 228 | << std::endl; 229 | if(nf > 2) 230 | { 231 | av += gflops*get_sfreq()/(ts1 - ts0); 232 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 233 | } 234 | 235 | ts0 = 0; 236 | ts1 = 0; 237 | } 238 | 239 | } //end of the time step loop 240 | 241 | const double t1 = time.stop(); 242 | _totTime = (t1-t0); 243 | _totFlops = gflops*get_nsteps(); 244 | 245 | av/=(double)(nf-2); 246 | dev=sqrt(dev/(double)(nf-2)-av*av); 247 | 248 | int nthreads=1; 249 | 250 | std::cout << std::endl; 251 | std::cout << "# Number Threads : " << nthreads << std::endl; 252 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 253 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 254 | std::cout << "===============================" << std::endl; 255 | 256 | } 257 | 258 | 259 | void GSimulation :: print_header() 260 | { 261 | 262 | std::cout << " nPart = " << get_npart() << "; " 263 | << "nSteps = " << get_nsteps() << "; " 264 | << "dt = " << get_tstep() << std::endl; 265 | 266 | std::cout << "------------------------------------------------" << std::endl; 267 | std::cout << " " 268 | << std::left << std::setw(8) << "s" 269 | << std::left << std::setw(8) << "dt" 270 | << std::left << std::setw(12) << "kenergy" 271 | << std::left << std::setw(12) << "time (s)" 272 | << std::left << std::setw(12) << "GFlops" 273 | << std::endl; 274 | std::cout << "------------------------------------------------" << std::endl; 275 | 276 | 277 | } 278 | 279 | GSimulation :: ~GSimulation() 280 | { 281 | _mm_free(particles->pos_x); 282 | _mm_free(particles->pos_y); 283 | _mm_free(particles->pos_z); 284 | _mm_free(particles->vel_x); 285 | _mm_free(particles->vel_y); 286 | _mm_free(particles->vel_z); 287 | _mm_free(particles->acc_x); 288 | _mm_free(particles->acc_y); 289 | _mm_free(particles->acc_z); 290 | _mm_free(particles->mass); 291 | _mm_free(particles); 292 | } 293 | -------------------------------------------------------------------------------- /ver8/GSimulation.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of the example codes which have been used 3 | for the "Code Optmization Workshop". 4 | 5 | Copyright (C) 2016 Fabio Baruffa 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | #include "GSimulation.hpp" 22 | #include "cpu_time.hpp" 23 | 24 | GSimulation :: GSimulation() 25 | { 26 | std::cout << "===============================" << std::endl; 27 | std::cout << " Initialize Gravity Simulation" << std::endl; 28 | set_npart(16000); 29 | set_nsteps(10); 30 | set_tstep(0.1); 31 | set_sfreq(1); 32 | } 33 | 34 | void GSimulation :: set_number_of_particles(int N) 35 | { 36 | set_npart(N); 37 | } 38 | 39 | void GSimulation :: set_number_of_steps(int N) 40 | { 41 | set_nsteps(N); 42 | } 43 | 44 | void GSimulation :: init_pos() 45 | { 46 | std::random_device rd; //random number generator 47 | std::mt19937 gen(42); 48 | std::uniform_real_distribution unif_d(0,1.0); 49 | 50 | for(int i=0; ipos_x[i] = unif_d(gen); 53 | particles->pos_y[i] = unif_d(gen); 54 | particles->pos_z[i] = unif_d(gen); 55 | } 56 | } 57 | 58 | void GSimulation :: init_vel() 59 | { 60 | std::random_device rd; //random number generator 61 | std::mt19937 gen(42); 62 | std::uniform_real_distribution unif_d(-1.0,1.0); 63 | 64 | for(int i=0; ivel_x[i] = unif_d(gen) * 1.0e-3f; 67 | particles->vel_y[i] = unif_d(gen) * 1.0e-3f; 68 | particles->vel_z[i] = unif_d(gen) * 1.0e-3f; 69 | } 70 | } 71 | 72 | void GSimulation :: init_acc() 73 | { 74 | for(int i=0; iacc_x[i] = 0.f; 77 | particles->acc_y[i] = 0.f; 78 | particles->acc_z[i] = 0.f; 79 | } 80 | } 81 | 82 | void GSimulation :: init_mass() 83 | { 84 | real_type n = static_cast (get_npart()); 85 | std::random_device rd; //random number generator 86 | std::mt19937 gen(42); 87 | std::uniform_real_distribution unif_d(0.0,1.0); 88 | 89 | for(int i=0; imass[i] = n * unif_d(gen); 92 | } 93 | } 94 | 95 | void GSimulation :: start() 96 | { 97 | real_type energy; 98 | real_type dt = get_tstep(); 99 | int n = get_npart(); 100 | int i,j; 101 | 102 | const int alignment = 64; 103 | particles = (ParticleSoA*) _mm_malloc(sizeof(ParticleSoA),alignment); 104 | 105 | particles->pos_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 106 | particles->pos_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 107 | particles->pos_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 108 | particles->vel_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 109 | particles->vel_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 110 | particles->vel_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 111 | particles->acc_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 112 | particles->acc_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 113 | particles->acc_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 114 | particles->mass = (real_type*) _mm_malloc(n*sizeof(real_type),alignment); 115 | 116 | init_pos(); 117 | init_vel(); 118 | init_acc(); 119 | init_mass(); 120 | 121 | print_header(); 122 | 123 | _totTime = 0.; 124 | 125 | const float softeningSquared = 1.e-3f; 126 | const float G = 6.67259e-11f; 127 | 128 | CPUTime time; 129 | double ts0 = 0; 130 | double ts1 = 0; 131 | double nd = double(n); 132 | double gflops = 1e-9 * ( (11. + 18. ) * nd*nd + nd * 19. ); 133 | double av=0.0, dev=0.0; 134 | int nf = 0; 135 | 136 | const int tileSize = 8; 137 | 138 | const double t0 = time.start(); 139 | for (int s=1; s<=get_nsteps(); ++s) 140 | { 141 | ts0 += time.start(); 142 | #pragma omp parallel for 143 | for (int ii = 0; ii < n; ii += tileSize ) 144 | { 145 | real_type acc_xtile[tileSize]; 146 | real_type acc_ytile[tileSize] ; 147 | real_type acc_ztile[tileSize]; 148 | #pragma omp simd 149 | for(int s=0; spos_x, alignment); 156 | __assume_aligned(particles->pos_y, alignment); 157 | __assume_aligned(particles->pos_z, alignment); 158 | __assume_aligned(particles->acc_x, alignment); 159 | __assume_aligned(particles->acc_y, alignment); 160 | __assume_aligned(particles->acc_z, alignment); 161 | __assume_aligned(particles->mass, alignment); 162 | 163 | real_type ax_i = particles->acc_x[i]; 164 | real_type ay_i = particles->acc_y[i]; 165 | real_type az_i = particles->acc_z[i]; 166 | #pragma omp simd 167 | for (j = 0; j < n; j++) 168 | { 169 | for (int i = ii; i < ii + tileSize; i++) 170 | { 171 | real_type dx, dy, dz; 172 | real_type distanceSqr = 0.0f; 173 | real_type distanceInv = 0.0f; 174 | 175 | dx = particles->pos_x[j] - particles->pos_x[i]; //1flop 176 | dy = particles->pos_y[j] - particles->pos_y[i]; //1flop 177 | dz = particles->pos_z[j] - particles->pos_z[i]; //1flop 178 | 179 | distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared; //6flops 180 | distanceInv = 1.0f / sqrtf(distanceSqr); //1div+1sqrt 181 | 182 | acc_xtile[i-ii] += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 183 | acc_ytile[i-ii] += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 184 | acc_ztile[i-ii] += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops 185 | } 186 | } 187 | #pragma omp simd 188 | for(int s=0; sacc_x[s+ii] = acc_xtile[s]; 191 | particles->acc_y[s+ii] = acc_ytile[s]; 192 | particles->acc_z[s+ii] = acc_ztile[s]; 193 | } 194 | } 195 | energy = 0; 196 | #pragma omp parallel for reduction(+:energy) 197 | for (i = 0; i < n; ++i)// update position 198 | { 199 | particles->vel_x[i] += particles->acc_x[i] * dt; //2flops 200 | particles->vel_y[i] += particles->acc_y[i] * dt; //2flops 201 | particles->vel_z[i] += particles->acc_z[i] * dt; //2flops 202 | 203 | particles->pos_x[i] += particles->vel_x[i] * dt; //2flops 204 | particles->pos_y[i] += particles->vel_y[i] * dt; //2flops 205 | particles->pos_z[i] += particles->vel_z[i] * dt; //2flops 206 | 207 | particles->acc_x[i] = 0.; 208 | particles->acc_y[i] = 0.; 209 | particles->acc_z[i] = 0.; 210 | 211 | energy += particles->mass[i] * ( 212 | particles->vel_x[i]*particles->vel_x[i] + 213 | particles->vel_y[i]*particles->vel_y[i] + 214 | particles->vel_z[i]*particles->vel_z[i]); //7flops 215 | } 216 | 217 | _kenergy = 0.5 * energy; 218 | 219 | ts1 += time.stop(); 220 | if(!(s%get_sfreq()) ) 221 | { 222 | nf += 1; 223 | std::cout << " " 224 | << std::left << std::setw(8) << s 225 | << std::left << std::setprecision(5) << std::setw(8) << s*get_tstep() 226 | << std::left << std::setprecision(5) << std::setw(12) << _kenergy 227 | << std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0) 228 | << std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0) 229 | << std::endl; 230 | if(nf > 2) 231 | { 232 | av += gflops*get_sfreq()/(ts1 - ts0); 233 | dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0)); 234 | } 235 | 236 | ts0 = 0; 237 | ts1 = 0; 238 | } 239 | 240 | } //end of the time step loop 241 | 242 | const double t1 = time.stop(); 243 | _totTime = (t1-t0); 244 | _totFlops = gflops*get_nsteps(); 245 | 246 | av/=(double)(nf-2); 247 | dev=sqrt(dev/(double)(nf-2)-av*av); 248 | 249 | int nthreads=1; 250 | #pragma omp parallel 251 | nthreads=omp_get_num_threads(); 252 | 253 | std::cout << std::endl; 254 | std::cout << "# Number Threads : " << nthreads << std::endl; 255 | std::cout << "# Total Time (s) : " << _totTime << std::endl; 256 | std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl; 257 | std::cout << "===============================" << std::endl; 258 | 259 | } 260 | 261 | 262 | void GSimulation :: print_header() 263 | { 264 | 265 | std::cout << " nPart = " << get_npart() << "; " 266 | << "nSteps = " << get_nsteps() << "; " 267 | << "dt = " << get_tstep() << std::endl; 268 | 269 | std::cout << "------------------------------------------------" << std::endl; 270 | std::cout << " " 271 | << std::left << std::setw(8) << "s" 272 | << std::left << std::setw(8) << "dt" 273 | << std::left << std::setw(12) << "kenergy" 274 | << std::left << std::setw(12) << "time (s)" 275 | << std::left << std::setw(12) << "GFlops" 276 | << std::endl; 277 | std::cout << "------------------------------------------------" << std::endl; 278 | 279 | 280 | } 281 | 282 | GSimulation :: ~GSimulation() 283 | { 284 | _mm_free(particles->pos_x); 285 | _mm_free(particles->pos_y); 286 | _mm_free(particles->pos_z); 287 | _mm_free(particles->vel_x); 288 | _mm_free(particles->vel_y); 289 | _mm_free(particles->vel_z); 290 | _mm_free(particles->acc_x); 291 | _mm_free(particles->acc_y); 292 | _mm_free(particles->acc_z); 293 | _mm_free(particles->mass); 294 | _mm_free(particles); 295 | } 296 | --------------------------------------------------------------------------------