├── ver0
    ├── types.hpp
    ├── main.cpp
    ├── Particle.hpp
    ├── Makefile
    ├── cpu_time.hpp
    ├── GSimulation.hpp
    └── GSimulation.cpp
├── ver1
    ├── types.hpp
    ├── main.cpp
    ├── Particle.hpp
    ├── cpu_time.hpp
    ├── Makefile
    ├── GSimulation.hpp
    └── GSimulation.cpp
├── ver3
    ├── types.hpp
    ├── main.cpp
    ├── cpu_time.hpp
    ├── Makefile
    ├── Particle.hpp
    ├── GSimulation.hpp
    ├── GSimulation.cpp
    └── GSimulation-moveout.cpp
├── ver4
    ├── types.hpp
    ├── main.cpp
    ├── cpu_time.hpp
    ├── Makefile
    ├── Particle.hpp
    ├── GSimulation.hpp
    ├── GSimulation.cpp
    └── GSimulation-moveout.cpp
├── ver5
    ├── types.hpp
    ├── main.cpp
    ├── cpu_time.hpp
    ├── Makefile
    ├── Particle.hpp
    ├── GSimulation.hpp
    └── GSimulation.cpp
├── ver6
    ├── types.hpp
    ├── main.cpp
    ├── cpu_time.hpp
    ├── Makefile
    ├── Particle.hpp
    ├── GSimulation.hpp
    └── GSimulation.cpp
├── ver7
    ├── types.hpp
    ├── main.cpp
    ├── cpu_time.hpp
    ├── Makefile
    ├── Particle.hpp
    ├── GSimulation.hpp
    └── GSimulation.cpp
├── ver8
    ├── types.hpp
    ├── main.cpp
    ├── cpu_time.hpp
    ├── Makefile
    ├── Particle.hpp
    ├── GSimulation.hpp
    └── GSimulation.cpp
├── ver2
    ├── types.hpp
    ├── main.cpp
    ├── Particle.hpp
    ├── cpu_time.hpp
    ├── Makefile
    ├── GSimulation.hpp
    └── GSimulation.cpp
└── README.md


/ver0/types.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | typedef float real_type;


--------------------------------------------------------------------------------
/ver1/types.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | typedef float real_type;


--------------------------------------------------------------------------------
/ver3/types.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | typedef float real_type;


--------------------------------------------------------------------------------
/ver4/types.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | typedef float real_type;


--------------------------------------------------------------------------------
/ver5/types.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | typedef float real_type;


--------------------------------------------------------------------------------
/ver6/types.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | typedef float real_type;


--------------------------------------------------------------------------------
/ver7/types.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | typedef float real_type;


--------------------------------------------------------------------------------
/ver8/types.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | typedef float real_type;


--------------------------------------------------------------------------------
/ver2/types.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | typedef float real_type;
22 | 


--------------------------------------------------------------------------------
/ver0/main.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include <iostream>
22 | 
23 | #include "GSimulation.hpp"
24 | 
25 | int main(int argc, char** argv) 
26 | {
27 |   int N;			//number of particles
28 |   int nstep; 		//number ot integration steps
29 |   
30 |   GSimulation sim;
31 |     
32 |   if(argc>1)
33 |   {
34 |     N=atoi(argv[1]);
35 |     sim.set_number_of_particles(N);  
36 |     if(argc==3) 
37 |     {
38 |       nstep=atoi(argv[2]);
39 |       sim.set_number_of_steps(nstep);  
40 |     }
41 |   }
42 |   
43 |   sim.start();
44 | 
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/ver1/main.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include <iostream>
22 | 
23 | #include "GSimulation.hpp"
24 | 
25 | int main(int argc, char** argv) 
26 | {
27 |   int N;			//number of particles
28 |   int nstep; 		//number ot integration steps
29 |   
30 |   GSimulation sim;
31 |     
32 |   if(argc>1)
33 |   {
34 |     N=atoi(argv[1]);
35 |     sim.set_number_of_particles(N);  
36 |     if(argc==3) 
37 |     {
38 |       nstep=atoi(argv[2]);
39 |       sim.set_number_of_steps(nstep);  
40 |     }
41 |   }
42 |   
43 |   sim.start();
44 | 
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/ver2/main.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include <iostream>
22 | 
23 | #include "GSimulation.hpp"
24 | 
25 | int main(int argc, char** argv) 
26 | {
27 |   int N;			//number of particles
28 |   int nstep; 		//number ot integration steps
29 |   
30 |   GSimulation sim;
31 |     
32 |   if(argc>1)
33 |   {
34 |     N=atoi(argv[1]);
35 |     sim.set_number_of_particles(N);  
36 |     if(argc==3) 
37 |     {
38 |       nstep=atoi(argv[2]);
39 |       sim.set_number_of_steps(nstep);  
40 |     }
41 |   }
42 |   
43 |   sim.start();
44 | 
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/ver3/main.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include <iostream>
22 | 
23 | #include "GSimulation.hpp"
24 | 
25 | int main(int argc, char** argv) 
26 | {
27 |   int N;			//number of particles
28 |   int nstep; 		//number ot integration steps
29 |   
30 |   GSimulation sim;
31 |     
32 |   if(argc>1)
33 |   {
34 |     N=atoi(argv[1]);
35 |     sim.set_number_of_particles(N);  
36 |     if(argc==3) 
37 |     {
38 |       nstep=atoi(argv[2]);
39 |       sim.set_number_of_steps(nstep);  
40 |     }
41 |   }
42 |   
43 |   sim.start();
44 | 
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/ver4/main.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include <iostream>
22 | 
23 | #include "GSimulation.hpp"
24 | 
25 | int main(int argc, char** argv) 
26 | {
27 |   int N;			//number of particles
28 |   int nstep; 		//number ot integration steps
29 |   
30 |   GSimulation sim;
31 |     
32 |   if(argc>1)
33 |   {
34 |     N=atoi(argv[1]);
35 |     sim.set_number_of_particles(N);  
36 |     if(argc==3) 
37 |     {
38 |       nstep=atoi(argv[2]);
39 |       sim.set_number_of_steps(nstep);  
40 |     }
41 |   }
42 |   
43 |   sim.start();
44 | 
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/ver5/main.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include <iostream>
22 | 
23 | #include "GSimulation.hpp"
24 | 
25 | int main(int argc, char** argv) 
26 | {
27 |   int N;			//number of particles
28 |   int nstep; 		//number ot integration steps
29 |   
30 |   GSimulation sim;
31 |     
32 |   if(argc>1)
33 |   {
34 |     N=atoi(argv[1]);
35 |     sim.set_number_of_particles(N);  
36 |     if(argc==3) 
37 |     {
38 |       nstep=atoi(argv[2]);
39 |       sim.set_number_of_steps(nstep);  
40 |     }
41 |   }
42 |   
43 |   sim.start();
44 | 
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/ver6/main.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include <iostream>
22 | 
23 | #include "GSimulation.hpp"
24 | 
25 | int main(int argc, char** argv) 
26 | {
27 |   int N;			//number of particles
28 |   int nstep; 		//number ot integration steps
29 |   
30 |   GSimulation sim;
31 |     
32 |   if(argc>1)
33 |   {
34 |     N=atoi(argv[1]);
35 |     sim.set_number_of_particles(N);  
36 |     if(argc==3) 
37 |     {
38 |       nstep=atoi(argv[2]);
39 |       sim.set_number_of_steps(nstep);  
40 |     }
41 |   }
42 |   
43 |   sim.start();
44 | 
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/ver7/main.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include <iostream>
22 | 
23 | #include "GSimulation.hpp"
24 | 
25 | int main(int argc, char** argv) 
26 | {
27 |   int N;			//number of particles
28 |   int nstep; 		//number ot integration steps
29 |   
30 |   GSimulation sim;
31 |     
32 |   if(argc>1)
33 |   {
34 |     N=atoi(argv[1]);
35 |     sim.set_number_of_particles(N);  
36 |     if(argc==3) 
37 |     {
38 |       nstep=atoi(argv[2]);
39 |       sim.set_number_of_steps(nstep);  
40 |     }
41 |   }
42 |   
43 |   sim.start();
44 | 
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/ver8/main.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include <iostream>
22 | 
23 | #include "GSimulation.hpp"
24 | 
25 | int main(int argc, char** argv) 
26 | {
27 |   int N;			//number of particles
28 |   int nstep; 		//number ot integration steps
29 |   
30 |   GSimulation sim;
31 |     
32 |   if(argc>1)
33 |   {
34 |     N=atoi(argv[1]);
35 |     sim.set_number_of_particles(N);  
36 |     if(argc==3) 
37 |     {
38 |       nstep=atoi(argv[2]);
39 |       sim.set_number_of_steps(nstep);  
40 |     }
41 |   }
42 |   
43 |   sim.start();
44 | 
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/ver0/Particle.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _PARTICLE_HPP
22 | #define _PARTICLE_HPP
23 | #include <cmath>
24 | #include "types.hpp"
25 | 
26 | struct Particle
27 | {
28 |   public:
29 |     Particle() { init();}
30 |     void init() 
31 |     {
32 |       pos[0] = 0.; pos[1] = 0.; pos[2] = 0.;
33 |       vel[0] = 0.; vel[1] = 0.; vel[2] = 0.;
34 |       acc[0] = 0.; acc[1] = 0.; acc[2] = 0.;
35 |       mass   = 0.;
36 |     }
37 |     real_type pos[3];
38 |     real_type vel[3];
39 |     real_type acc[3];  
40 |     real_type mass;
41 | };
42 | 
43 | #endif


--------------------------------------------------------------------------------
/ver1/Particle.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _PARTICLE_HPP
22 | #define _PARTICLE_HPP
23 | #include <cmath>
24 | #include "types.hpp"
25 | 
26 | struct Particle
27 | {
28 |   public:
29 |     Particle() { init();}
30 |     void init() 
31 |     {
32 |       pos[0] = 0.; pos[1] = 0.; pos[2] = 0.;
33 |       vel[0] = 0.; vel[1] = 0.; vel[2] = 0.;
34 |       acc[0] = 0.; acc[1] = 0.; acc[2] = 0.;
35 |       mass   = 0.;
36 |     }
37 |     real_type pos[3];
38 |     real_type vel[3];
39 |     real_type acc[3];  
40 |     real_type mass;
41 | };
42 | 
43 | #endif


--------------------------------------------------------------------------------
/ver2/Particle.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _PARTICLE_HPP
22 | #define _PARTICLE_HPP
23 | #include <cmath>
24 | #include "types.hpp"
25 | 
26 | struct Particle
27 | {
28 |   public:
29 |     Particle() { init();}
30 |     void init() 
31 |     {
32 |       pos[0] = 0.; pos[1] = 0.; pos[2] = 0.;
33 |       vel[0] = 0.; vel[1] = 0.; vel[2] = 0.;
34 |       acc[0] = 0.; acc[1] = 0.; acc[2] = 0.;
35 |       mass   = 0.;
36 |     }
37 |     real_type pos[3];
38 |     real_type vel[3];
39 |     real_type acc[3];  
40 |     real_type mass;
41 | };
42 | 
43 | #endif


--------------------------------------------------------------------------------
/ver0/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = icpc
 2 | COMPFLAGS =  -g -std=c++11 -O2
 3 |  INCLUDES = 
 4 | 
 5 | CXXFLAGS = $(COMPFLAGS) 
 6 | 
 7 | SOURCES = GSimulation.cpp main.cpp
 8 | 
 9 | ADVPRJ = "./adv-ver0"
10 | 
11 | ifeq ($(REPORT), yes)
12 | 	CXXFLAGS+=-qopt-report=5
13 | ifeq ($(FILTER), yes) 
14 | 	CXXFLAGS+=-qopt-report-phase=vec -qopt-report-filter="GSimulation.cpp,125-175"
15 | endif
16 | endif
17 | 
18 | .SUFFIXES: .o .cpp
19 | 
20 | ##########################################
21 | OBJSC = $(SOURCES:.cpp=.o)
22 | ##########################################
23 | 
24 | EXEC=nbody.x
25 | 
26 | all: cpu
27 | 
28 | %.o: %.cpp
29 | 	$(info )
30 | 	$(info Compiling the object file for CPU: )
31 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 
32 | 
33 | cpu: $(OBJSC)
34 | 	$(info )
35 | 	$(info Linking the CPU executable:)
36 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC)
37 | 	
38 | run: 
39 | 	$(info )
40 | 	$(info Run the default test case on CPU: )
41 | 	./nbody.x 
42 | 	
43 | clean: 
44 | 	rm -f $(OBJSC) nbody.x *.optrpt
45 | 
46 | #----------------------------------------------------------------
47 | #---------- Intel Advisor Analysis ------------------------------
48 | #----------------------------------------------------------------
49 | 
50 | survey:
51 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
52 | 
53 | roofline:
54 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
55 | 	advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x
56 | 
57 | open-gui:
58 | 	advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 &
59 | 
60 | clean-results:
61 | 	rm -rf $(ADVPRJ)
62 |  
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/ver0/cpu_time.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _CPUTIME_HPP
22 | #define _CPUTIME_HPP
23 | 
24 | #include <sys/time.h>
25 | #include <sys/types.h>
26 | #include <sys/resource.h>
27 | 
28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer.
29 | 
30 | class CPUTime {
31 | private:
32 |     double wctime;
33 |     
34 |     inline double readTime() 
35 |     {
36 |       struct timeval tp;
37 | 
38 |       gettimeofday(&tp,NULL);
39 |       wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6;
40 |       return wctime;
41 |     }
42 | public:
43 |     CPUTime() : wctime(0.0) { }
44 |         
45 |     inline double start() { return readTime(); }
46 |     inline double stop()  { return readTime(); }
47 |     
48 | };
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/ver1/cpu_time.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _CPUTIME_HPP
22 | #define _CPUTIME_HPP
23 | 
24 | #include <sys/time.h>
25 | #include <sys/types.h>
26 | #include <sys/resource.h>
27 | 
28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer.
29 | 
30 | class CPUTime {
31 | private:
32 |     double wctime;
33 |     
34 |     inline double readTime() 
35 |     {
36 |       struct timeval tp;
37 | 
38 |       gettimeofday(&tp,NULL);
39 |       wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6;
40 |       return wctime;
41 |     }
42 | public:
43 |     CPUTime() : wctime(0.0) { }
44 |         
45 |     inline double start() { return readTime(); }
46 |     inline double stop()  { return readTime(); }
47 |     
48 | };
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/ver2/cpu_time.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _CPUTIME_HPP
22 | #define _CPUTIME_HPP
23 | 
24 | #include <sys/time.h>
25 | #include <sys/types.h>
26 | #include <sys/resource.h>
27 | 
28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer.
29 | 
30 | class CPUTime {
31 | private:
32 |     double wctime;
33 |     
34 |     inline double readTime() 
35 |     {
36 |       struct timeval tp;
37 | 
38 |       gettimeofday(&tp,NULL);
39 |       wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6;
40 |       return wctime;
41 |     }
42 | public:
43 |     CPUTime() : wctime(0.0) { }
44 |         
45 |     inline double start() { return readTime(); }
46 |     inline double stop()  { return readTime(); }
47 |     
48 | };
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/ver3/cpu_time.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _CPUTIME_HPP
22 | #define _CPUTIME_HPP
23 | 
24 | #include <sys/time.h>
25 | #include <sys/types.h>
26 | #include <sys/resource.h>
27 | 
28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer.
29 | 
30 | class CPUTime {
31 | private:
32 |     double wctime;
33 |     
34 |     inline double readTime() 
35 |     {
36 |       struct timeval tp;
37 | 
38 |       gettimeofday(&tp,NULL);
39 |       wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6;
40 |       return wctime;
41 |     }
42 | public:
43 |     CPUTime() : wctime(0.0) { }
44 |         
45 |     inline double start() { return readTime(); }
46 |     inline double stop()  { return readTime(); }
47 |     
48 | };
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/ver4/cpu_time.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _CPUTIME_HPP
22 | #define _CPUTIME_HPP
23 | 
24 | #include <sys/time.h>
25 | #include <sys/types.h>
26 | #include <sys/resource.h>
27 | 
28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer.
29 | 
30 | class CPUTime {
31 | private:
32 |     double wctime;
33 |     
34 |     inline double readTime() 
35 |     {
36 |       struct timeval tp;
37 | 
38 |       gettimeofday(&tp,NULL);
39 |       wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6;
40 |       return wctime;
41 |     }
42 | public:
43 |     CPUTime() : wctime(0.0) { }
44 |         
45 |     inline double start() { return readTime(); }
46 |     inline double stop()  { return readTime(); }
47 |     
48 | };
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/ver5/cpu_time.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _CPUTIME_HPP
22 | #define _CPUTIME_HPP
23 | 
24 | #include <sys/time.h>
25 | #include <sys/types.h>
26 | #include <sys/resource.h>
27 | 
28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer.
29 | 
30 | class CPUTime {
31 | private:
32 |     double wctime;
33 |     
34 |     inline double readTime() 
35 |     {
36 |       struct timeval tp;
37 | 
38 |       gettimeofday(&tp,NULL);
39 |       wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6;
40 |       return wctime;
41 |     }
42 | public:
43 |     CPUTime() : wctime(0.0) { }
44 |         
45 |     inline double start() { return readTime(); }
46 |     inline double stop()  { return readTime(); }
47 |     
48 | };
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/ver6/cpu_time.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _CPUTIME_HPP
22 | #define _CPUTIME_HPP
23 | 
24 | #include <sys/time.h>
25 | #include <sys/types.h>
26 | #include <sys/resource.h>
27 | 
28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer.
29 | 
30 | class CPUTime {
31 | private:
32 |     double wctime;
33 |     
34 |     inline double readTime() 
35 |     {
36 |       struct timeval tp;
37 | 
38 |       gettimeofday(&tp,NULL);
39 |       wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6;
40 |       return wctime;
41 |     }
42 | public:
43 |     CPUTime() : wctime(0.0) { }
44 |         
45 |     inline double start() { return readTime(); }
46 |     inline double stop()  { return readTime(); }
47 |     
48 | };
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/ver7/cpu_time.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _CPUTIME_HPP
22 | #define _CPUTIME_HPP
23 | 
24 | #include <sys/time.h>
25 | #include <sys/types.h>
26 | #include <sys/resource.h>
27 | 
28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer.
29 | 
30 | class CPUTime {
31 | private:
32 |     double wctime;
33 |     
34 |     inline double readTime() 
35 |     {
36 |       struct timeval tp;
37 | 
38 |       gettimeofday(&tp,NULL);
39 |       wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6;
40 |       return wctime;
41 |     }
42 | public:
43 |     CPUTime() : wctime(0.0) { }
44 |         
45 |     inline double start() { return readTime(); }
46 |     inline double stop()  { return readTime(); }
47 |     
48 | };
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/ver8/cpu_time.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _CPUTIME_HPP
22 | #define _CPUTIME_HPP
23 | 
24 | #include <sys/time.h>
25 | #include <sys/types.h>
26 | #include <sys/resource.h>
27 | 
28 | // Return number of microseconds since 1.1.1970, in a 64 bit integer.
29 | 
30 | class CPUTime {
31 | private:
32 |     double wctime;
33 |     
34 |     inline double readTime() 
35 |     {
36 |       struct timeval tp;
37 | 
38 |       gettimeofday(&tp,NULL);
39 |       wctime = (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6;
40 |       return wctime;
41 |     }
42 | public:
43 |     CPUTime() : wctime(0.0) { }
44 |         
45 |     inline double start() { return readTime(); }
46 |     inline double stop()  { return readTime(); }
47 |     
48 | };
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/ver1/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = icpc
 2 | COMPFLAGS = -g -std=c++11 -O2
 3 | #OPTFLAGS = -xCORE-AVX2
 4 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high
 5 | 
 6 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175"
 7 | INCLUDES =
 8 | 
 9 | ADVPRJ = "./adv-ver2"
10 | 
11 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS)
12 | 
13 | SOURCES = GSimulation.cpp main.cpp
14 | 
15 | .SUFFIXES: .o .cpp
16 | 
17 | ##########################################
18 | OBJSC = $(SOURCES:.cpp=.o)
19 | ##########################################
20 | 
21 | EXEC=nbody.x
22 | 
23 | all: clean cpu
24 | 
25 | %.o: %.cpp
26 | 	$(info )
27 | 	$(info Compiling the object file for CPU: )
28 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
29 | 
30 | cpu: $(OBJSC)
31 | 	$(info )
32 | 	$(info Linking the CPU executable: ) 
33 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC)
34 | 
35 | run:
36 | 	$(info )
37 | 	$(info Run the default test case on CPU: )
38 | 	./nbody.x
39 | 
40 | asm:GSimulation.cpp
41 | 	$(info)
42 | 	$(info Generate assembly code: )
43 | 	$(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S
44 | 
45 | clean:
46 | 	rm -f $(OBJSC) nbody.x *.optrpt *.s
47 | 
48 | #----------------------------------------------------------------
49 | #---------- Intel Advisor Analysis ------------------------------
50 | #----------------------------------------------------------------
51 | 
52 | survey:
53 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
54 | 
55 | roofline:
56 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
57 | 	advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x
58 | 
59 | open-gui:
60 | 	advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 &
61 | 
62 | clean-results:
63 | 	rm -rf $(ADVPRJ)
64 | 
65 | 


--------------------------------------------------------------------------------
/ver2/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = icpc
 2 | COMPFLAGS = -g -std=c++11 -O2
 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high
 4 | 
 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175"
 6 | INCLUDES =
 7 | 
 8 | ADVPRJ = "./adv-ver2"
 9 | 
10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS)
11 | 
12 | SOURCES = GSimulation.cpp main.cpp
13 | 
14 | ##########################################
15 | OBJSC = $(SOURCES:.cpp=.o)
16 | ##########################################
17 | 
18 | EXEC=nbody.x
19 | 
20 | all: clean cpu
21 | 
22 | %.o: %.cpp
23 | 	$(info )
24 | 	$(info Compiling the object file for CPU: )
25 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
26 | 
27 | cpu: $(OBJSC)
28 | 	$(info )
29 | 	$(info Linking the CPU executable:)
30 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC)
31 | 
32 | run:
33 | 	$(info )
34 | 	$(info Run the default test case on CPU: )
35 | 	./nbody.x
36 | 
37 | asm:GSimulation.cpp
38 | 	$(info)
39 | 	$(info Generate assembly code: )
40 | 	$(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S
41 | 
42 | clean:
43 | 	rm -f $(OBJSC) nbody.x *.optrpt *.s
44 | 
45 | 
46 | #----------------------------------------------------------------
47 | #---------- Intel Advisor Analysis ------------------------------
48 | #----------------------------------------------------------------
49 | 
50 | survey:
51 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
52 | 
53 | roofline:
54 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
55 | 	advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x
56 | 
57 | map:
58 | 	advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x
59 | 
60 | open-gui:
61 | 	advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 &
62 | 
63 | clean-results:
64 | 	rm -rf $(ADVPRJ)
65 | 
66 | 


--------------------------------------------------------------------------------
/ver4/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = icpc
 2 | COMPFLAGS = -g -std=c++11 -O2
 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high
 4 | 
 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175"
 6 | INCLUDES =
 7 | 
 8 | ADVPRJ = "./adv-ver4"
 9 | 
10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS)
11 | 
12 | SOURCES = GSimulation.cpp main.cpp
13 | 
14 | 
15 | ##########################################
16 | OBJSC = $(SOURCES:.cpp=.o)
17 | ##########################################
18 | 
19 | EXEC=nbody.x
20 | 
21 | all: clean cpu
22 | 
23 | %.o: %.cpp
24 | 	$(info )
25 | 	$(info Compiling the object file for CPU: )
26 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
27 | 
28 | cpu: $(OBJSC)
29 | 	$(info )
30 | 	$(info Linking the CPU executable:)
31 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC)
32 | 
33 | run:
34 | 	$(info )
35 | 	$(info Run the default test case on CPU: )
36 | 	./nbody.x
37 | 
38 | asm:GSimulation.cpp
39 | 	$(info)
40 | 	$(info Generate assembly code: )
41 | 	$(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S
42 | 
43 | clean:
44 | 	rm -f $(OBJSC) nbody.x *.optrpt *.s
45 | 
46 | 
47 | #----------------------------------------------------------------
48 | #---------- Intel Advisor Analysis ------------------------------
49 | #----------------------------------------------------------------
50 | 
51 | survey:
52 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
53 | 
54 | roofline:
55 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
56 | 	advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x
57 | 
58 | map:
59 | 	advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x
60 | 
61 | open-gui:
62 | 	advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 &
63 | 
64 | clean-results:
65 | 	rm -rf $(ADVPRJ)
66 | 
67 | 


--------------------------------------------------------------------------------
/ver5/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = icpc
 2 | COMPFLAGS = -g -std=c++11 -O2
 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high
 4 | 
 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175"
 6 | INCLUDES =
 7 | 
 8 | ADVPRJ = "./adv-ver5"
 9 | 
10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS)
11 | 
12 | SOURCES = GSimulation.cpp main.cpp
13 | 
14 | 
15 | ##########################################
16 | OBJSC = $(SOURCES:.cpp=.o)
17 | ##########################################
18 | 
19 | EXEC=nbody.x
20 | 
21 | all: clean cpu
22 | 
23 | %.o: %.cpp
24 | 	$(info )
25 | 	$(info Compiling the object file for CPU: )
26 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
27 | 
28 | cpu: $(OBJSC)
29 | 	$(info )
30 | 	$(info Linking the CPU executable:)
31 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC)
32 | 
33 | run:
34 | 	$(info )
35 | 	$(info Run the default test case on CPU: )
36 | 	./nbody.x
37 | 
38 | asm:GSimulation.cpp
39 | 	$(info)
40 | 	$(info Generate assembly code: )
41 | 	$(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S
42 | 
43 | clean:
44 | 	rm -f $(OBJSC) nbody.x *.optrpt *.s
45 | 
46 | 
47 | #----------------------------------------------------------------
48 | #---------- Intel Advisor Analysis ------------------------------
49 | #----------------------------------------------------------------
50 | 
51 | survey:
52 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
53 | 
54 | roofline:
55 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
56 | 	advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x
57 | 
58 | map:
59 | 	advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x
60 | 
61 | open-gui:
62 | 	advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 &
63 | 
64 | clean-results:
65 | 	rm -rf $(ADVPRJ)
66 | 
67 | 


--------------------------------------------------------------------------------
/ver6/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = icpc
 2 | COMPFLAGS = -g -std=c++11 -O2
 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high
 4 | 
 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175"
 6 | INCLUDES =
 7 | 
 8 | ADVPRJ = "./adv-ver6"
 9 | 
10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS)
11 | 
12 | SOURCES = GSimulation.cpp main.cpp
13 | 
14 | 
15 | ##########################################
16 | OBJSC = $(SOURCES:.cpp=.o)
17 | ##########################################
18 | 
19 | EXEC=nbody.x
20 | 
21 | all: clean cpu
22 | 
23 | %.o: %.cpp
24 | 	$(info )
25 | 	$(info Compiling the object file for CPU: )
26 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
27 | 
28 | cpu: $(OBJSC)
29 | 	$(info )
30 | 	$(info Linking the CPU executable:)
31 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC)
32 | 
33 | run:
34 | 	$(info )
35 | 	$(info Run the default test case on CPU: )
36 | 	./nbody.x
37 | 
38 | asm:GSimulation.cpp
39 | 	$(info)
40 | 	$(info Generate assembly code: )
41 | 	$(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S
42 | 
43 | clean:
44 | 	rm -f $(OBJSC) nbody.x *.optrpt *.s
45 | 
46 | 
47 | #----------------------------------------------------------------
48 | #---------- Intel Advisor Analysis ------------------------------
49 | #----------------------------------------------------------------
50 | 
51 | survey:
52 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
53 | 
54 | roofline:
55 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
56 | 	advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x
57 | 
58 | map:
59 | 	advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x
60 | 
61 | open-gui:
62 | 	advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 &
63 | 
64 | clean-results:
65 | 	rm -rf $(ADVPRJ)
66 | 
67 | 


--------------------------------------------------------------------------------
/ver7/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = icpc
 2 | COMPFLAGS = -g -std=c++11 -O2 -qopenmp
 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high
 4 | 
 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175"
 6 | INCLUDES =
 7 | 
 8 | ADVPRJ = "./adv-ver7"
 9 | 
10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS)
11 | 
12 | SOURCES = GSimulation.cpp main.cpp
13 | 
14 | 
15 | ##########################################
16 | OBJSC = $(SOURCES:.cpp=.o)
17 | ##########################################
18 | 
19 | EXEC=nbody.x
20 | 
21 | all: clean cpu
22 | 
23 | %.o: %.cpp
24 | 	$(info )
25 | 	$(info Compiling the object file for CPU: )
26 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
27 | 
28 | cpu: $(OBJSC)
29 | 	$(info )
30 | 	$(info Linking the CPU executable:)
31 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC)
32 | 
33 | run:
34 | 	$(info )
35 | 	$(info Run the default test case on CPU: )
36 | 	./nbody.x
37 | 
38 | asm:GSimulation.cpp
39 | 	$(info)
40 | 	$(info Generate assembly code: )
41 | 	$(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S
42 | 
43 | clean:
44 | 	rm -f $(OBJSC) nbody.x *.optrpt *.s
45 | 
46 | 
47 | #----------------------------------------------------------------
48 | #---------- Intel Advisor Analysis ------------------------------
49 | #----------------------------------------------------------------
50 | 
51 | survey:
52 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
53 | 
54 | roofline:
55 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
56 | 	advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x
57 | 
58 | map:
59 | 	advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x
60 | 
61 | open-gui:
62 | 	advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 &
63 | 
64 | clean-results:
65 | 	rm -rf $(ADVPRJ)
66 | 
67 | 


--------------------------------------------------------------------------------
/ver8/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = icpc
 2 | COMPFLAGS = -g -std=c++11 -O2 -qopenmp
 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high
 4 | 
 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175"
 6 | INCLUDES =
 7 | 
 8 | ADVPRJ = "./adv-ver8"
 9 | 
10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS)
11 | 
12 | SOURCES = GSimulation.cpp main.cpp
13 | 
14 | 
15 | ##########################################
16 | OBJSC = $(SOURCES:.cpp=.o)
17 | ##########################################
18 | 
19 | EXEC=nbody.x
20 | 
21 | all: clean cpu
22 | 
23 | %.o: %.cpp
24 | 	$(info )
25 | 	$(info Compiling the object file for CPU: )
26 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
27 | 
28 | cpu: $(OBJSC)
29 | 	$(info )
30 | 	$(info Linking the CPU executable:)
31 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC)
32 | 
33 | run:
34 | 	$(info )
35 | 	$(info Run the default test case on CPU: )
36 | 	./nbody.x
37 | 
38 | asm:GSimulation.cpp
39 | 	$(info)
40 | 	$(info Generate assembly code: )
41 | 	$(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S
42 | 
43 | clean:
44 | 	rm -f $(OBJSC) nbody.x *.optrpt *.s
45 | 
46 | 
47 | #----------------------------------------------------------------
48 | #---------- Intel Advisor Analysis ------------------------------
49 | #----------------------------------------------------------------
50 | 
51 | survey:
52 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
53 | 
54 | roofline:
55 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
56 | 	advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x
57 | 
58 | map:
59 | 	advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x
60 | 
61 | open-gui:
62 | 	advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 &
63 | 
64 | clean-results:
65 | 	rm -rf $(ADVPRJ)
66 | 
67 | 


--------------------------------------------------------------------------------
/ver3/Makefile:
--------------------------------------------------------------------------------
 1 | CXX = icpc
 2 | COMPFLAGS = -g -std=c++11 -O2
 3 | OPTFLAGS = -xCORE-AVX512 -qopt-zmm-usage=high
 4 | 
 5 | REPFLAGS = -qopt-report=5 -qopt-report-filter="GSimulation.cpp,125-175"
 6 | INCLUDES =
 7 | 
 8 | ADVPRJ = "./adv-ver3"
 9 | 
10 | CXXFLAGS = $(COMPFLAGS) $(OPTFLAGS) $(REPFLAGS)
11 | 
12 | SOURCES = GSimulation.cpp main.cpp
13 | 
14 | ifeq ($(SIMD), yes)
15 |         CXXFLAGS+= -DSIMD
16 | endif
17 | 
18 | ##########################################
19 | OBJSC = $(SOURCES:.cpp=.o)
20 | ##########################################
21 | 
22 | EXEC=nbody.x
23 | 
24 | all: clean cpu
25 | 
26 | %.o: %.cpp
27 | 	$(info )
28 | 	$(info Compiling the object file for CPU: )
29 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
30 | 
31 | cpu: $(OBJSC)
32 | 	$(info )
33 | 	$(info Linking the CPU executable:)
34 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $(EXEC) $(OBJSC)
35 | 
36 | run:
37 | 	$(info )
38 | 	$(info Run the default test case on CPU: )
39 | 	./nbody.x
40 | 
41 | asm:GSimulation.cpp
42 | 	$(info)
43 | 	$(info Generate assembly code: )
44 | 	$(CXX) $(COMPFLAGS) $(OPTFLAGS) $^ -S
45 | 
46 | clean:
47 | 	rm -f $(OBJSC) nbody.x *.optrpt *.s
48 | 
49 | 
50 | #----------------------------------------------------------------
51 | #---------- Intel Advisor Analysis ------------------------------
52 | #----------------------------------------------------------------
53 | 
54 | survey:
55 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
56 | 
57 | roofline:
58 | 	advixe-cl -collect survey -project-dir $(ADVPRJ) -- ./nbody.x
59 | 	advixe-cl -collect tripcounts -flop -project-dir $(ADVPRJ) -- ./nbody.x
60 | 
61 | map:
62 | 	advixe-cl -collect map -mark-up-list=1 -project-dir $(ADVPRJ) -- ./nbody.x
63 | 
64 | open-gui:
65 | 	advixe-gui $(ADVPRJ)/$(ADVPRJ).advixeproj >/dev/null 2>&1 &
66 | 
67 | clean-results:
68 | 	rm -rf $(ADVPRJ)
69 | 
70 | 


--------------------------------------------------------------------------------
/ver3/Particle.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _PARTICLE_HPP
22 | #define _PARTICLE_HPP
23 | #include <cmath>
24 | #include "types.hpp"
25 | 
26 | struct Particle
27 | {
28 |   public:
29 |     Particle() { init();}
30 |     void init() 
31 |     {
32 |       pos[0] = 0.; pos[1] = 0.; pos[2] = 0.;
33 |       vel[0] = 0.; vel[1] = 0.; vel[2] = 0.;
34 |       acc[0] = 0.; acc[1] = 0.; acc[2] = 0.;
35 |       mass   = 0.;
36 |     }
37 |     real_type pos[3];
38 |     real_type vel[3];
39 |     real_type acc[3];  
40 |     real_type mass;
41 | };
42 | 
43 | struct ParticleSoA
44 | {
45 |   public:
46 |     ParticleSoA() { init();}
47 |     void init() 
48 |     {
49 |       pos_x = NULL; pos_y = NULL; pos_z = NULL;
50 |       vel_x = NULL; vel_y = NULL; vel_z = NULL;
51 |       acc_x = NULL; acc_y = NULL; acc_z = NULL;
52 |       mass  = NULL;
53 |     }
54 |     real_type *pos_x, *pos_y, *pos_z;
55 |     real_type *vel_x, *vel_y, *vel_z;
56 |     real_type *acc_x, *acc_y, *acc_z;  
57 |     real_type *mass;
58 | };
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/ver4/Particle.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _PARTICLE_HPP
22 | #define _PARTICLE_HPP
23 | #include <cmath>
24 | #include "types.hpp"
25 | 
26 | struct Particle
27 | {
28 |   public:
29 |     Particle() { init();}
30 |     void init() 
31 |     {
32 |       pos[0] = 0.; pos[1] = 0.; pos[2] = 0.;
33 |       vel[0] = 0.; vel[1] = 0.; vel[2] = 0.;
34 |       acc[0] = 0.; acc[1] = 0.; acc[2] = 0.;
35 |       mass   = 0.;
36 |     }
37 |     real_type pos[3];
38 |     real_type vel[3];
39 |     real_type acc[3];  
40 |     real_type mass;
41 | };
42 | 
43 | struct ParticleSoA
44 | {
45 |   public:
46 |     ParticleSoA() { init();}
47 |     void init() 
48 |     {
49 |       pos_x = NULL; pos_y = NULL; pos_z = NULL;
50 |       vel_x = NULL; vel_y = NULL; vel_z = NULL;
51 |       acc_x = NULL; acc_y = NULL; acc_z = NULL;
52 |       mass  = NULL;
53 |     }
54 |     real_type *pos_x, *pos_y, *pos_z;
55 |     real_type *vel_x, *vel_y, *vel_z;
56 |     real_type *acc_x, *acc_y, *acc_z;  
57 |     real_type *mass;
58 | };
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/ver5/Particle.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _PARTICLE_HPP
22 | #define _PARTICLE_HPP
23 | #include <cmath>
24 | #include "types.hpp"
25 | 
26 | struct Particle
27 | {
28 |   public:
29 |     Particle() { init();}
30 |     void init() 
31 |     {
32 |       pos[0] = 0.; pos[1] = 0.; pos[2] = 0.;
33 |       vel[0] = 0.; vel[1] = 0.; vel[2] = 0.;
34 |       acc[0] = 0.; acc[1] = 0.; acc[2] = 0.;
35 |       mass   = 0.;
36 |     }
37 |     real_type pos[3];
38 |     real_type vel[3];
39 |     real_type acc[3];  
40 |     real_type mass;
41 | };
42 | 
43 | struct ParticleSoA
44 | {
45 |   public:
46 |     ParticleSoA() { init();}
47 |     void init() 
48 |     {
49 |       pos_x = NULL; pos_y = NULL; pos_z = NULL;
50 |       vel_x = NULL; vel_y = NULL; vel_z = NULL;
51 |       acc_x = NULL; acc_y = NULL; acc_z = NULL;
52 |       mass  = NULL;
53 |     }
54 |     real_type *pos_x, *pos_y, *pos_z;
55 |     real_type *vel_x, *vel_y, *vel_z;
56 |     real_type *acc_x, *acc_y, *acc_z;  
57 |     real_type *mass;
58 | };
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/ver6/Particle.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _PARTICLE_HPP
22 | #define _PARTICLE_HPP
23 | #include <cmath>
24 | #include "types.hpp"
25 | 
26 | struct Particle
27 | {
28 |   public:
29 |     Particle() { init();}
30 |     void init() 
31 |     {
32 |       pos[0] = 0.; pos[1] = 0.; pos[2] = 0.;
33 |       vel[0] = 0.; vel[1] = 0.; vel[2] = 0.;
34 |       acc[0] = 0.; acc[1] = 0.; acc[2] = 0.;
35 |       mass   = 0.;
36 |     }
37 |     real_type pos[3];
38 |     real_type vel[3];
39 |     real_type acc[3];  
40 |     real_type mass;
41 | };
42 | 
43 | struct ParticleSoA
44 | {
45 |   public:
46 |     ParticleSoA() { init();}
47 |     void init() 
48 |     {
49 |       pos_x = NULL; pos_y = NULL; pos_z = NULL;
50 |       vel_x = NULL; vel_y = NULL; vel_z = NULL;
51 |       acc_x = NULL; acc_y = NULL; acc_z = NULL;
52 |       mass  = NULL;
53 |     }
54 |     real_type *pos_x, *pos_y, *pos_z;
55 |     real_type *vel_x, *vel_y, *vel_z;
56 |     real_type *acc_x, *acc_y, *acc_z;  
57 |     real_type *mass;
58 | };
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/ver7/Particle.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _PARTICLE_HPP
22 | #define _PARTICLE_HPP
23 | #include <cmath>
24 | #include "types.hpp"
25 | 
26 | struct Particle
27 | {
28 |   public:
29 |     Particle() { init();}
30 |     void init() 
31 |     {
32 |       pos[0] = 0.; pos[1] = 0.; pos[2] = 0.;
33 |       vel[0] = 0.; vel[1] = 0.; vel[2] = 0.;
34 |       acc[0] = 0.; acc[1] = 0.; acc[2] = 0.;
35 |       mass   = 0.;
36 |     }
37 |     real_type pos[3];
38 |     real_type vel[3];
39 |     real_type acc[3];  
40 |     real_type mass;
41 | };
42 | 
43 | struct ParticleSoA
44 | {
45 |   public:
46 |     ParticleSoA() { init();}
47 |     void init() 
48 |     {
49 |       pos_x = NULL; pos_y = NULL; pos_z = NULL;
50 |       vel_x = NULL; vel_y = NULL; vel_z = NULL;
51 |       acc_x = NULL; acc_y = NULL; acc_z = NULL;
52 |       mass  = NULL;
53 |     }
54 |     real_type *pos_x, *pos_y, *pos_z;
55 |     real_type *vel_x, *vel_y, *vel_z;
56 |     real_type *acc_x, *acc_y, *acc_z;  
57 |     real_type *mass;
58 | };
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/ver8/Particle.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _PARTICLE_HPP
22 | #define _PARTICLE_HPP
23 | #include <cmath>
24 | #include "types.hpp"
25 | 
26 | struct Particle
27 | {
28 |   public:
29 |     Particle() { init();}
30 |     void init() 
31 |     {
32 |       pos[0] = 0.; pos[1] = 0.; pos[2] = 0.;
33 |       vel[0] = 0.; vel[1] = 0.; vel[2] = 0.;
34 |       acc[0] = 0.; acc[1] = 0.; acc[2] = 0.;
35 |       mass   = 0.;
36 |     }
37 |     real_type pos[3];
38 |     real_type vel[3];
39 |     real_type acc[3];  
40 |     real_type mass;
41 | };
42 | 
43 | struct ParticleSoA
44 | {
45 |   public:
46 |     ParticleSoA() { init();}
47 |     void init() 
48 |     {
49 |       pos_x = NULL; pos_y = NULL; pos_z = NULL;
50 |       vel_x = NULL; vel_y = NULL; vel_z = NULL;
51 |       acc_x = NULL; acc_y = NULL; acc_z = NULL;
52 |       mass  = NULL;
53 |     }
54 |     real_type *pos_x, *pos_y, *pos_z;
55 |     real_type *vel_x, *vel_y, *vel_z;
56 |     real_type *acc_x, *acc_y, *acc_z;  
57 |     real_type *mass;
58 | };
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/ver0/GSimulation.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _GSIMULATION_HPP
22 | #define _GSIMULATION_HPP
23 | 
24 | #include <random>
25 | #include <iomanip>
26 | #include <iostream>
27 | #include <fstream>
28 | #include <string>
29 | #include <sstream>
30 | #include <stdlib.h>
31 | 
32 | #include <omp.h>
33 | 
34 | #include "Particle.hpp"
35 | 
36 | class GSimulation 
37 | {
38 | public:
39 |   GSimulation();
40 |   ~GSimulation();
41 |   
42 |   void init();
43 |   void set_number_of_particles(int N);
44 |   void set_number_of_steps(int N);
45 |   void start();
46 |   
47 | private:
48 |   Particle *particles;
49 |   
50 |   int       _npart;		//number of particles
51 |   int	    _nsteps;	//number of integration steps
52 |   real_type _tstep;		//time step of the simulation
53 | 
54 |   int	    _sfreq;		//sample frequency
55 |   
56 |   real_type _kenergy;	//kinetic energy
57 |   
58 |   double _totTime;		//total time of the simulation
59 |   double _totFlops;		//total number of flops 
60 |    
61 |   void init_pos();	
62 |   void init_vel();
63 |   void init_acc();
64 |   void init_mass();
65 |     
66 |   inline void set_npart(const int &N){ _npart = N; }
67 |   inline int get_npart() const {return _npart; }
68 |   
69 |   inline void set_tstep(const real_type &dt){ _tstep = dt; }
70 |   inline real_type get_tstep() const {return _tstep; }
71 |   
72 |   inline void set_nsteps(const int &n){ _nsteps = n; }
73 |   inline int get_nsteps() const {return _nsteps; }
74 |   
75 |   inline void set_sfreq(const int &sf){ _sfreq = sf; }
76 |   inline int get_sfreq() const {return _sfreq; }
77 |   
78 |   void print_header();
79 |   
80 | };
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/ver1/GSimulation.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _GSIMULATION_HPP
22 | #define _GSIMULATION_HPP
23 | 
24 | #include <random>
25 | #include <iomanip>
26 | #include <iostream>
27 | #include <fstream>
28 | #include <string>
29 | #include <sstream>
30 | #include <stdlib.h>
31 | 
32 | #include <omp.h>
33 | 
34 | #include "Particle.hpp"
35 | 
36 | class GSimulation 
37 | {
38 | public:
39 |   GSimulation();
40 |   ~GSimulation();
41 |   
42 |   void init();
43 |   void set_number_of_particles(int N);
44 |   void set_number_of_steps(int N);
45 |   void start();
46 |   
47 | private:
48 |   Particle *particles;
49 |   
50 |   int       _npart;		//number of particles
51 |   int	    _nsteps;	//number of integration steps
52 |   real_type _tstep;		//time step of the simulation
53 | 
54 |   int	    _sfreq;		//sample frequency
55 |   
56 |   real_type _kenergy;	//kinetic energy
57 |   
58 |   double _totTime;		//total time of the simulation
59 |   double _totFlops;		//total number of flops 
60 |    
61 |   void init_pos();	
62 |   void init_vel();
63 |   void init_acc();
64 |   void init_mass();
65 |     
66 |   inline void set_npart(const int &N){ _npart = N; }
67 |   inline int get_npart() const {return _npart; }
68 |   
69 |   inline void set_tstep(const real_type &dt){ _tstep = dt; }
70 |   inline real_type get_tstep() const {return _tstep; }
71 |   
72 |   inline void set_nsteps(const int &n){ _nsteps = n; }
73 |   inline int get_nsteps() const {return _nsteps; }
74 |   
75 |   inline void set_sfreq(const int &sf){ _sfreq = sf; }
76 |   inline int get_sfreq() const {return _sfreq; }
77 |   
78 |   void print_header();
79 |   
80 | };
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/ver2/GSimulation.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _GSIMULATION_HPP
22 | #define _GSIMULATION_HPP
23 | 
24 | #include <random>
25 | #include <iomanip>
26 | #include <iostream>
27 | #include <fstream>
28 | #include <string>
29 | #include <sstream>
30 | #include <stdlib.h>
31 | 
32 | #include <omp.h>
33 | 
34 | #include "Particle.hpp"
35 | 
36 | class GSimulation 
37 | {
38 | public:
39 |   GSimulation();
40 |   ~GSimulation();
41 |   
42 |   void init();
43 |   void set_number_of_particles(int N);
44 |   void set_number_of_steps(int N);
45 |   void start();
46 |   
47 | private:
48 |   Particle *particles;
49 |   
50 |   int       _npart;		//number of particles
51 |   int	    _nsteps;	//number of integration steps
52 |   real_type _tstep;		//time step of the simulation
53 | 
54 |   int	    _sfreq;		//sample frequency
55 |   
56 |   real_type _kenergy;	//kinetic energy
57 |   
58 |   double _totTime;		//total time of the simulation
59 |   double _totFlops;		//total number of flops 
60 |    
61 |   void init_pos();	
62 |   void init_vel();
63 |   void init_acc();
64 |   void init_mass();
65 |     
66 |   inline void set_npart(const int &N){ _npart = N; }
67 |   inline int get_npart() const {return _npart; }
68 |   
69 |   inline void set_tstep(const real_type &dt){ _tstep = dt; }
70 |   inline real_type get_tstep() const {return _tstep; }
71 |   
72 |   inline void set_nsteps(const int &n){ _nsteps = n; }
73 |   inline int get_nsteps() const {return _nsteps; }
74 |   
75 |   inline void set_sfreq(const int &sf){ _sfreq = sf; }
76 |   inline int get_sfreq() const {return _sfreq; }
77 |   
78 |   void print_header();
79 |   
80 | };
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/ver3/GSimulation.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _GSIMULATION_HPP
22 | #define _GSIMULATION_HPP
23 | 
24 | #include <random>
25 | #include <iomanip>
26 | #include <iostream>
27 | #include <fstream>
28 | #include <string>
29 | #include <sstream>
30 | #include <stdlib.h>
31 | 
32 | #include <omp.h>
33 | 
34 | #include "Particle.hpp"
35 | 
36 | class GSimulation 
37 | {
38 | public:
39 |   GSimulation();
40 |   ~GSimulation();
41 |   
42 |   void init();
43 |   void set_number_of_particles(int N);
44 |   void set_number_of_steps(int N);
45 |   void start();
46 |   
47 | private:
48 |   ParticleSoA *particles;
49 |   
50 |   int       _npart;		//number of particles
51 |   int	    _nsteps;		//number of integration steps
52 |   real_type _tstep;		//time step of the simulation
53 | 
54 |   int	    _sfreq;		//sample frequency
55 |   
56 |   real_type _kenergy;		//kinetic energy
57 |   
58 |   double _totTime;		//total time of the simulation
59 |   double _totFlops;		//total number of flops 
60 |    
61 |   void init_pos();	
62 |   void init_vel();
63 |   void init_acc();
64 |   void init_mass();
65 |     
66 |   inline void set_npart(const int &N){ _npart = N; }
67 |   inline int get_npart() const {return _npart; }
68 |   
69 |   inline void set_tstep(const real_type &dt){ _tstep = dt; }
70 |   inline real_type get_tstep() const {return _tstep; }
71 |   
72 |   inline void set_nsteps(const int &n){ _nsteps = n; }
73 |   inline int get_nsteps() const {return _nsteps; }
74 |   
75 |   inline void set_sfreq(const int &sf){ _sfreq = sf; }
76 |   inline int get_sfreq() const {return _sfreq; }
77 |   
78 |   void print_header();
79 |   
80 | };
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/ver4/GSimulation.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _GSIMULATION_HPP
22 | #define _GSIMULATION_HPP
23 | 
24 | #include <random>
25 | #include <iomanip>
26 | #include <iostream>
27 | #include <fstream>
28 | #include <string>
29 | #include <sstream>
30 | #include <stdlib.h>
31 | 
32 | #include <omp.h>
33 | 
34 | #include "Particle.hpp"
35 | 
36 | class GSimulation 
37 | {
38 | public:
39 |   GSimulation();
40 |   ~GSimulation();
41 |   
42 |   void init();
43 |   void set_number_of_particles(int N);
44 |   void set_number_of_steps(int N);
45 |   void start();
46 |   
47 | private:
48 |   ParticleSoA *particles;
49 |   
50 |   int       _npart;		//number of particles
51 |   int	    _nsteps;		//number of integration steps
52 |   real_type _tstep;		//time step of the simulation
53 | 
54 |   int	    _sfreq;		//sample frequency
55 |   
56 |   real_type _kenergy;		//kinetic energy
57 |   
58 |   double _totTime;		//total time of the simulation
59 |   double _totFlops;		//total number of flops 
60 |    
61 |   void init_pos();	
62 |   void init_vel();
63 |   void init_acc();
64 |   void init_mass();
65 |     
66 |   inline void set_npart(const int &N){ _npart = N; }
67 |   inline int get_npart() const {return _npart; }
68 |   
69 |   inline void set_tstep(const real_type &dt){ _tstep = dt; }
70 |   inline real_type get_tstep() const {return _tstep; }
71 |   
72 |   inline void set_nsteps(const int &n){ _nsteps = n; }
73 |   inline int get_nsteps() const {return _nsteps; }
74 |   
75 |   inline void set_sfreq(const int &sf){ _sfreq = sf; }
76 |   inline int get_sfreq() const {return _sfreq; }
77 |   
78 |   void print_header();
79 |   
80 | };
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/ver5/GSimulation.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _GSIMULATION_HPP
22 | #define _GSIMULATION_HPP
23 | 
24 | #include <random>
25 | #include <iomanip>
26 | #include <iostream>
27 | #include <fstream>
28 | #include <string>
29 | #include <sstream>
30 | #include <stdlib.h>
31 | 
32 | #include <omp.h>
33 | 
34 | #include "Particle.hpp"
35 | 
36 | class GSimulation 
37 | {
38 | public:
39 |   GSimulation();
40 |   ~GSimulation();
41 |   
42 |   void init();
43 |   void set_number_of_particles(int N);
44 |   void set_number_of_steps(int N);
45 |   void start();
46 |   
47 | private:
48 |   ParticleSoA *particles;
49 |   
50 |   int       _npart;		//number of particles
51 |   int	    _nsteps;		//number of integration steps
52 |   real_type _tstep;		//time step of the simulation
53 | 
54 |   int	    _sfreq;		//sample frequency
55 |   
56 |   real_type _kenergy;		//kinetic energy
57 |   
58 |   double _totTime;		//total time of the simulation
59 |   double _totFlops;		//total number of flops 
60 |    
61 |   void init_pos();	
62 |   void init_vel();
63 |   void init_acc();
64 |   void init_mass();
65 |     
66 |   inline void set_npart(const int &N){ _npart = N; }
67 |   inline int get_npart() const {return _npart; }
68 |   
69 |   inline void set_tstep(const real_type &dt){ _tstep = dt; }
70 |   inline real_type get_tstep() const {return _tstep; }
71 |   
72 |   inline void set_nsteps(const int &n){ _nsteps = n; }
73 |   inline int get_nsteps() const {return _nsteps; }
74 |   
75 |   inline void set_sfreq(const int &sf){ _sfreq = sf; }
76 |   inline int get_sfreq() const {return _sfreq; }
77 |   
78 |   void print_header();
79 |   
80 | };
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/ver6/GSimulation.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _GSIMULATION_HPP
22 | #define _GSIMULATION_HPP
23 | 
24 | #include <random>
25 | #include <iomanip>
26 | #include <iostream>
27 | #include <fstream>
28 | #include <string>
29 | #include <sstream>
30 | #include <stdlib.h>
31 | 
32 | #include <omp.h>
33 | 
34 | #include "Particle.hpp"
35 | 
36 | class GSimulation 
37 | {
38 | public:
39 |   GSimulation();
40 |   ~GSimulation();
41 |   
42 |   void init();
43 |   void set_number_of_particles(int N);
44 |   void set_number_of_steps(int N);
45 |   void start();
46 |   
47 | private:
48 |   ParticleSoA *particles;
49 |   
50 |   int       _npart;		//number of particles
51 |   int	    _nsteps;		//number of integration steps
52 |   real_type _tstep;		//time step of the simulation
53 | 
54 |   int	    _sfreq;		//sample frequency
55 |   
56 |   real_type _kenergy;		//kinetic energy
57 |   
58 |   double _totTime;		//total time of the simulation
59 |   double _totFlops;		//total number of flops 
60 |    
61 |   void init_pos();	
62 |   void init_vel();
63 |   void init_acc();
64 |   void init_mass();
65 |     
66 |   inline void set_npart(const int &N){ _npart = N; }
67 |   inline int get_npart() const {return _npart; }
68 |   
69 |   inline void set_tstep(const real_type &dt){ _tstep = dt; }
70 |   inline real_type get_tstep() const {return _tstep; }
71 |   
72 |   inline void set_nsteps(const int &n){ _nsteps = n; }
73 |   inline int get_nsteps() const {return _nsteps; }
74 |   
75 |   inline void set_sfreq(const int &sf){ _sfreq = sf; }
76 |   inline int get_sfreq() const {return _sfreq; }
77 |   
78 |   void print_header();
79 |   
80 | };
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/ver7/GSimulation.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _GSIMULATION_HPP
22 | #define _GSIMULATION_HPP
23 | 
24 | #include <random>
25 | #include <iomanip>
26 | #include <iostream>
27 | #include <fstream>
28 | #include <string>
29 | #include <sstream>
30 | #include <stdlib.h>
31 | 
32 | #include <omp.h>
33 | 
34 | #include "Particle.hpp"
35 | 
36 | class GSimulation 
37 | {
38 | public:
39 |   GSimulation();
40 |   ~GSimulation();
41 |   
42 |   void init();
43 |   void set_number_of_particles(int N);
44 |   void set_number_of_steps(int N);
45 |   void start();
46 |   
47 | private:
48 |   ParticleSoA *particles;
49 |   
50 |   int       _npart;		//number of particles
51 |   int	    _nsteps;		//number of integration steps
52 |   real_type _tstep;		//time step of the simulation
53 | 
54 |   int	    _sfreq;		//sample frequency
55 |   
56 |   real_type _kenergy;		//kinetic energy
57 |   
58 |   double _totTime;		//total time of the simulation
59 |   double _totFlops;		//total number of flops 
60 |    
61 |   void init_pos();	
62 |   void init_vel();
63 |   void init_acc();
64 |   void init_mass();
65 |     
66 |   inline void set_npart(const int &N){ _npart = N; }
67 |   inline int get_npart() const {return _npart; }
68 |   
69 |   inline void set_tstep(const real_type &dt){ _tstep = dt; }
70 |   inline real_type get_tstep() const {return _tstep; }
71 |   
72 |   inline void set_nsteps(const int &n){ _nsteps = n; }
73 |   inline int get_nsteps() const {return _nsteps; }
74 |   
75 |   inline void set_sfreq(const int &sf){ _sfreq = sf; }
76 |   inline int get_sfreq() const {return _sfreq; }
77 |   
78 |   void print_header();
79 |   
80 | };
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/ver8/GSimulation.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This file is part of the example codes which have been used
 3 |     for the "Code Optmization Workshop".
 4 |     
 5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #ifndef _GSIMULATION_HPP
22 | #define _GSIMULATION_HPP
23 | 
24 | #include <random>
25 | #include <iomanip>
26 | #include <iostream>
27 | #include <fstream>
28 | #include <string>
29 | #include <sstream>
30 | #include <stdlib.h>
31 | 
32 | #include <omp.h>
33 | 
34 | #include "Particle.hpp"
35 | 
36 | class GSimulation 
37 | {
38 | public:
39 |   GSimulation();
40 |   ~GSimulation();
41 |   
42 |   void init();
43 |   void set_number_of_particles(int N);
44 |   void set_number_of_steps(int N);
45 |   void start();
46 |   
47 | private:
48 |   ParticleSoA *particles;
49 |   
50 |   int       _npart;		//number of particles
51 |   int	    _nsteps;		//number of integration steps
52 |   real_type _tstep;		//time step of the simulation
53 | 
54 |   int	    _sfreq;		//sample frequency
55 |   
56 |   real_type _kenergy;		//kinetic energy
57 |   
58 |   double _totTime;		//total time of the simulation
59 |   double _totFlops;		//total number of flops 
60 |    
61 |   void init_pos();	
62 |   void init_vel();
63 |   void init_acc();
64 |   void init_mass();
65 |     
66 |   inline void set_npart(const int &N){ _npart = N; }
67 |   inline int get_npart() const {return _npart; }
68 |   
69 |   inline void set_tstep(const real_type &dt){ _tstep = dt; }
70 |   inline real_type get_tstep() const {return _tstep; }
71 |   
72 |   inline void set_nsteps(const int &n){ _nsteps = n; }
73 |   inline int get_nsteps() const {return _nsteps; }
74 |   
75 |   inline void set_sfreq(const int &sf){ _sfreq = sf; }
76 |   inline int get_sfreq() const {return _sfreq; }
77 |   
78 |   void print_header();
79 |   
80 | };
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/ver0/GSimulation.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | void GSimulation :: init_pos()  
 45 | {
 46 |   std::random_device rd;	//random number generator
 47 |   std::mt19937 gen(42);      
 48 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 49 |   
 50 |   for(int i=0; i<get_npart(); ++i)
 51 |   {
 52 |     particles[i].pos[0] = unif_d(gen);
 53 |     particles[i].pos[1] = unif_d(gen);
 54 |     particles[i].pos[2] = unif_d(gen);
 55 |   }
 56 | }
 57 | 
 58 | void GSimulation :: init_vel()  
 59 | {
 60 |   std::random_device rd;        //random number generator
 61 |   std::mt19937 gen(42);
 62 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 63 | 
 64 |   for(int i=0; i<get_npart(); ++i)
 65 |   {
 66 |     particles[i].vel[0] = unif_d(gen) * 1.0e-3f;
 67 |     particles[i].vel[1] = unif_d(gen) * 1.0e-3f;
 68 |     particles[i].vel[2] = unif_d(gen) * 1.0e-3f; 
 69 |   }
 70 | }
 71 | 
 72 | void GSimulation :: init_acc() 
 73 | {
 74 |   for(int i=0; i<get_npart(); ++i)
 75 |   {
 76 |     particles[i].acc[0] = 0.f; 
 77 |     particles[i].acc[1] = 0.f;
 78 |     particles[i].acc[2] = 0.f;
 79 |   }
 80 | }
 81 | 
 82 | void GSimulation :: init_mass() 
 83 | {
 84 |   real_type n   = static_cast<real_type> (get_npart());
 85 |   std::random_device rd;        //random number generator
 86 |   std::mt19937 gen(42);
 87 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 88 | 
 89 |   for(int i=0; i<get_npart(); ++i)
 90 |   {
 91 |     particles[i].mass = n * unif_d(gen); 
 92 |   }
 93 | }
 94 | 
 95 | void GSimulation :: start() 
 96 | {
 97 |   real_type energy;
 98 |   real_type dt = get_tstep();
 99 |   int n = get_npart();
100 |   int i,j;
101 |   
102 |   //allocate particles
103 |   particles = new Particle[n];
104 |  
105 |   init_pos();	
106 |   init_vel();
107 |   init_acc();
108 |   init_mass();
109 |   
110 |   print_header();
111 |   
112 |   _totTime = 0.; 
113 |   
114 |   const double softeningSquared = 1e-3;
115 |   // prevents explosion in the case the particles are really close to each other 
116 |   const double G = 6.67259e-11;
117 |   
118 |   CPUTime time;
119 |   double ts0 = 0;
120 |   double ts1 = 0;
121 |   double nd = double(n);
122 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
123 |   double av=0.0, dev=0.0;
124 |   int nf = 0;
125 |   
126 |   const double t0 = time.start();
127 |   for (int s=1; s<=get_nsteps(); ++s)
128 |   {   
129 |     ts0 += time.start(); 
130 |     for (i = 0; i < n; i++)// update acceleration
131 |     {
132 |       for (j = 0; j < n; j++)
133 |       {
134 | 	  real_type dx, dy, dz;
135 | 	  real_type distanceSqr = 0.0;
136 | 	  real_type distanceInv = 0.0;
137 | 		  
138 | 	  dx = particles[j].pos[0] - particles[i].pos[0];	//1flop
139 | 	  dy = particles[j].pos[1] - particles[i].pos[1];	//1flop	
140 | 	  dz = particles[j].pos[2] - particles[i].pos[2];	//1flop
141 | 	
142 | 	  distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
143 | 	  distanceInv = 1.0 / sqrt(distanceSqr);			//1div+1sqrt
144 | 		  
145 | 	  particles[i].acc[0] += dx * G * particles[j].mass * distanceInv * distanceInv * distanceInv;	//6flops
146 | 	  particles[i].acc[1] += dy * G * particles[j].mass * distanceInv * distanceInv * distanceInv;	//6flops
147 | 	  particles[i].acc[2] += dz * G * particles[j].mass * distanceInv * distanceInv * distanceInv;	//6flops
148 | 
149 |       }
150 |     }
151 |     energy = 0;
152 | 
153 |     for (i = 0; i < n; ++i)// update position and velocity
154 |     {
155 |       particles[i].vel[0] += particles[i].acc[0] * dt;	//2flops
156 |       particles[i].vel[1] += particles[i].acc[1] * dt;	//2flops
157 |       particles[i].vel[2] += particles[i].acc[2] * dt;	//2flops
158 | 	 
159 |       particles[i].pos[0] += particles[i].vel[0] * dt;	//2flops
160 |       particles[i].pos[1] += particles[i].vel[1] * dt;	//2flops
161 |       particles[i].pos[2] += particles[i].vel[2] * dt;	//2flops
162 | 
163 |       particles[i].acc[0] = 0.;
164 |       particles[i].acc[1] = 0.;
165 |       particles[i].acc[2] = 0.;
166 | 	
167 |       energy += particles[i].mass * (
168 | 		particles[i].vel[0]*particles[i].vel[0] + 
169 |                 particles[i].vel[1]*particles[i].vel[1] +
170 |                 particles[i].vel[2]*particles[i].vel[2]); //7flops
171 |     }
172 |   
173 |     _kenergy = 0.5 * energy; 
174 |     
175 |     ts1 += time.stop();
176 |     if(!(s%get_sfreq()) ) 
177 |     {
178 |       nf += 1;      
179 |       std::cout << " " 
180 | 		<<  std::left << std::setw(8)  << s
181 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
182 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
183 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
184 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
185 | 		<<  std::endl;
186 |       if(nf > 2) 
187 |       {
188 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
189 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
190 |       }
191 |       
192 |       ts0 = 0;
193 |       ts1 = 0;
194 |     }
195 |   
196 |   } //end of the time step loop
197 |   
198 |   const double t1 = time.stop();
199 |   _totTime  = (t1-t0);
200 |   _totFlops = gflops*get_nsteps();
201 |   
202 |   av/=(double)(nf-2);
203 |   dev=sqrt(dev/(double)(nf-2)-av*av);
204 |   
205 |   int nthreads=1;
206 | 
207 |   std::cout << std::endl;
208 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
209 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
210 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
211 |   std::cout << "===============================" << std::endl;
212 | 
213 | }
214 | 
215 | 
216 | void GSimulation :: print_header()
217 | {
218 | 	    
219 |   std::cout << " nPart = " << get_npart()  << "; " 
220 | 	    << "nSteps = " << get_nsteps() << "; " 
221 | 	    << "dt = "     << get_tstep()  << std::endl;
222 | 	    
223 |   std::cout << "------------------------------------------------" << std::endl;
224 |   std::cout << " " 
225 | 	    <<  std::left << std::setw(8)  << "s"
226 | 	    <<  std::left << std::setw(8)  << "dt"
227 | 	    <<  std::left << std::setw(12) << "kenergy"
228 | 	    <<  std::left << std::setw(12) << "time (s)"
229 | 	    <<  std::left << std::setw(12) << "GFlops"
230 | 	    <<  std::endl;
231 |   std::cout << "------------------------------------------------" << std::endl;
232 | 
233 | 
234 | }
235 | 
236 | GSimulation :: ~GSimulation()
237 | {
238 |   delete particles;
239 | }
240 | 


--------------------------------------------------------------------------------
/ver1/GSimulation.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | void GSimulation :: init_pos()  
 45 | {
 46 |   std::random_device rd;	//random number generator
 47 |   std::mt19937 gen(42);      
 48 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 49 |   
 50 |   for(int i=0; i<get_npart(); ++i)
 51 |   {
 52 |     particles[i].pos[0] = unif_d(gen);
 53 |     particles[i].pos[1] = unif_d(gen);
 54 |     particles[i].pos[2] = unif_d(gen);
 55 |   }
 56 | }
 57 | 
 58 | void GSimulation :: init_vel()  
 59 | {
 60 |   std::random_device rd;        //random number generator
 61 |   std::mt19937 gen(42);
 62 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 63 | 
 64 |   for(int i=0; i<get_npart(); ++i)
 65 |   {
 66 |     particles[i].vel[0] = unif_d(gen) * 1.0e-3f;
 67 |     particles[i].vel[1] = unif_d(gen) * 1.0e-3f;
 68 |     particles[i].vel[2] = unif_d(gen) * 1.0e-3f; 
 69 |   }
 70 | }
 71 | 
 72 | void GSimulation :: init_acc() 
 73 | {
 74 |   for(int i=0; i<get_npart(); ++i)
 75 |   {
 76 |     particles[i].acc[0] = 0.f; 
 77 |     particles[i].acc[1] = 0.f;
 78 |     particles[i].acc[2] = 0.f;
 79 |   }
 80 | }
 81 | 
 82 | void GSimulation :: init_mass() 
 83 | {
 84 |   real_type n   = static_cast<real_type> (get_npart());
 85 |   std::random_device rd;        //random number generator
 86 |   std::mt19937 gen(42);
 87 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 88 | 
 89 |   for(int i=0; i<get_npart(); ++i)
 90 |   {
 91 |     particles[i].mass = n * unif_d(gen); 
 92 |   }
 93 | }
 94 | 
 95 | void GSimulation :: start() 
 96 | {
 97 |   real_type energy;
 98 |   real_type dt = get_tstep();
 99 |   int n = get_npart();
100 |   int i,j;
101 |   
102 |   //allocate particles
103 |   particles = new Particle[n];
104 |  
105 |   init_pos();	
106 |   init_vel();
107 |   init_acc();
108 |   init_mass();
109 |   
110 |   print_header();
111 |   
112 |   _totTime = 0.; 
113 |   
114 |   const double softeningSquared = 1e-3;
115 |   // prevents explosion in the case the particles are really close to each other 
116 |   const double G = 6.67259e-11;
117 |   
118 |   CPUTime time;
119 |   double ts0 = 0;
120 |   double ts1 = 0;
121 |   double nd = double(n);
122 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
123 |   double av=0.0, dev=0.0;
124 |   int nf = 0;
125 |   
126 |   const double t0 = time.start();
127 |   for (int s=1; s<=get_nsteps(); ++s)
128 |   {   
129 |     ts0 += time.start(); 
130 |     for (i = 0; i < n; i++)// update acceleration
131 |     {
132 |       for (j = 0; j < n; j++)
133 |       {
134 | 	  real_type dx, dy, dz;
135 | 	  real_type distanceSqr = 0.0;
136 | 	  real_type distanceInv = 0.0;
137 | 		  
138 | 	  dx = particles[j].pos[0] - particles[i].pos[0];	//1flop
139 | 	  dy = particles[j].pos[1] - particles[i].pos[1];	//1flop	
140 | 	  dz = particles[j].pos[2] - particles[i].pos[2];	//1flop
141 | 	
142 | 	  distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
143 | 	  distanceInv = 1.0 / sqrt(distanceSqr);			//1div+1sqrt
144 | 		  
145 | 	  particles[i].acc[0] += dx * G * particles[j].mass * distanceInv * distanceInv * distanceInv;	//6flops
146 | 	  particles[i].acc[1] += dy * G * particles[j].mass * distanceInv * distanceInv * distanceInv;	//6flops
147 | 	  particles[i].acc[2] += dz * G * particles[j].mass * distanceInv * distanceInv * distanceInv;	//6flops
148 | 
149 |       }
150 |     }
151 |     energy = 0;
152 | 
153 |     for (i = 0; i < n; ++i)// update position and velocity
154 |     {
155 |       particles[i].vel[0] += particles[i].acc[0] * dt;	//2flops
156 |       particles[i].vel[1] += particles[i].acc[1] * dt;	//2flops
157 |       particles[i].vel[2] += particles[i].acc[2] * dt;	//2flops
158 | 	 
159 |       particles[i].pos[0] += particles[i].vel[0] * dt;	//2flops
160 |       particles[i].pos[1] += particles[i].vel[1] * dt;	//2flops
161 |       particles[i].pos[2] += particles[i].vel[2] * dt;	//2flops
162 | 
163 |       particles[i].acc[0] = 0.;
164 |       particles[i].acc[1] = 0.;
165 |       particles[i].acc[2] = 0.;
166 | 	
167 |       energy += particles[i].mass * (
168 | 		particles[i].vel[0]*particles[i].vel[0] + 
169 |                 particles[i].vel[1]*particles[i].vel[1] +
170 |                 particles[i].vel[2]*particles[i].vel[2]); //7flops
171 |     }
172 |   
173 |     _kenergy = 0.5 * energy; 
174 |     
175 |     ts1 += time.stop();
176 |     if(!(s%get_sfreq()) ) 
177 |     {
178 |       nf += 1;      
179 |       std::cout << " " 
180 | 		<<  std::left << std::setw(8)  << s
181 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
182 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
183 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
184 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
185 | 		<<  std::endl;
186 |       if(nf > 2) 
187 |       {
188 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
189 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
190 |       }
191 |       
192 |       ts0 = 0;
193 |       ts1 = 0;
194 |     }
195 |   
196 |   } //end of the time step loop
197 |   
198 |   const double t1 = time.stop();
199 |   _totTime  = (t1-t0);
200 |   _totFlops = gflops*get_nsteps();
201 |   
202 |   av/=(double)(nf-2);
203 |   dev=sqrt(dev/(double)(nf-2)-av*av);
204 |   
205 |   int nthreads=1;
206 | 
207 |   std::cout << std::endl;
208 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
209 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
210 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
211 |   std::cout << "===============================" << std::endl;
212 | 
213 | }
214 | 
215 | 
216 | void GSimulation :: print_header()
217 | {
218 | 	    
219 |   std::cout << " nPart = " << get_npart()  << "; " 
220 | 	    << "nSteps = " << get_nsteps() << "; " 
221 | 	    << "dt = "     << get_tstep()  << std::endl;
222 | 	    
223 |   std::cout << "------------------------------------------------" << std::endl;
224 |   std::cout << " " 
225 | 	    <<  std::left << std::setw(8)  << "s"
226 | 	    <<  std::left << std::setw(8)  << "dt"
227 | 	    <<  std::left << std::setw(12) << "kenergy"
228 | 	    <<  std::left << std::setw(12) << "time (s)"
229 | 	    <<  std::left << std::setw(12) << "GFlops"
230 | 	    <<  std::endl;
231 |   std::cout << "------------------------------------------------" << std::endl;
232 | 
233 | 
234 | }
235 | 
236 | GSimulation :: ~GSimulation()
237 | {
238 |   delete particles;
239 | }
240 | 


--------------------------------------------------------------------------------
/ver2/GSimulation.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | void GSimulation :: init_pos()  
 45 | {
 46 |   std::random_device rd;	//random number generator
 47 |   std::mt19937 gen(42);      
 48 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 49 |   
 50 |   for(int i=0; i<get_npart(); ++i)
 51 |   {
 52 |     particles[i].pos[0] = unif_d(gen);
 53 |     particles[i].pos[1] = unif_d(gen);
 54 |     particles[i].pos[2] = unif_d(gen);
 55 |   }
 56 | }
 57 | 
 58 | void GSimulation :: init_vel()  
 59 | {
 60 |   std::random_device rd;        //random number generator
 61 |   std::mt19937 gen(42);
 62 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 63 | 
 64 |   for(int i=0; i<get_npart(); ++i)
 65 |   {
 66 |     particles[i].vel[0] = unif_d(gen) * 1.0e-3f;
 67 |     particles[i].vel[1] = unif_d(gen) * 1.0e-3f;
 68 |     particles[i].vel[2] = unif_d(gen) * 1.0e-3f; 
 69 |   }
 70 | }
 71 | 
 72 | void GSimulation :: init_acc() 
 73 | {
 74 |   for(int i=0; i<get_npart(); ++i)
 75 |   {
 76 |     particles[i].acc[0] = 0.f; 
 77 |     particles[i].acc[1] = 0.f;
 78 |     particles[i].acc[2] = 0.f;
 79 |   }
 80 | }
 81 | 
 82 | void GSimulation :: init_mass() 
 83 | {
 84 |   real_type n   = static_cast<real_type> (get_npart());
 85 |   std::random_device rd;        //random number generator
 86 |   std::mt19937 gen(42);
 87 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 88 | 
 89 |   for(int i=0; i<get_npart(); ++i)
 90 |   {
 91 |     particles[i].mass = n * unif_d(gen); 
 92 |   }
 93 | }
 94 | 
 95 | void GSimulation :: start() 
 96 | {
 97 |   real_type energy;
 98 |   real_type dt = get_tstep();
 99 |   int n = get_npart();
100 |   int i,j;
101 |   
102 |   //allocate particles
103 |   particles = new Particle[n];
104 |  
105 |   init_pos();	
106 |   init_vel();
107 |   init_acc();
108 |   init_mass();
109 |   
110 |   print_header();
111 |   
112 |   _totTime = 0.; 
113 |   
114 |   const float softeningSquared = 1e-3f;
115 |   // prevents explosion in the case the particles are really close to each other 
116 |   const float G = 6.67259e-11f;
117 |   
118 |   CPUTime time;
119 |   double ts0 = 0;
120 |   double ts1 = 0;
121 |   double nd = double(n);
122 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
123 |   double av=0.0, dev=0.0;
124 |   int nf = 0;
125 |   
126 |   const double t0 = time.start();
127 |   for (int s=1; s<=get_nsteps(); ++s)
128 |   {   
129 |     ts0 += time.start(); 
130 |     for (i = 0; i < n; i++)// update acceleration
131 |     {
132 |       for (j = 0; j < n; j++)
133 |       {
134 | 	  real_type dx, dy, dz;
135 | 	  real_type distanceSqr = 0.0f;
136 | 	  real_type distanceInv = 0.0f;
137 | 		  
138 | 	  dx = particles[j].pos[0] - particles[i].pos[0];	//1flop
139 | 	  dy = particles[j].pos[1] - particles[i].pos[1];	//1flop	
140 | 	  dz = particles[j].pos[2] - particles[i].pos[2];	//1flop
141 | 	
142 | 	  distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
143 | 	  distanceInv = 1.0 / sqrt(distanceSqr);			//1div+1sqrt
144 | 		  
145 | 	  particles[i].acc[0] += dx * G * particles[j].mass * distanceInv * distanceInv * distanceInv;	//6flops
146 | 	  particles[i].acc[1] += dy * G * particles[j].mass * distanceInv * distanceInv * distanceInv;	//6flops
147 | 	  particles[i].acc[2] += dz * G * particles[j].mass * distanceInv * distanceInv * distanceInv;	//6flops
148 | 
149 |       }
150 |     }
151 |     energy = 0;
152 | 
153 |     for (i = 0; i < n; ++i)// update position and velocity
154 |     {
155 |       particles[i].vel[0] += particles[i].acc[0] * dt;	//2flops
156 |       particles[i].vel[1] += particles[i].acc[1] * dt;	//2flops
157 |       particles[i].vel[2] += particles[i].acc[2] * dt;	//2flops
158 | 	 
159 |       particles[i].pos[0] += particles[i].vel[0] * dt;	//2flops
160 |       particles[i].pos[1] += particles[i].vel[1] * dt;	//2flops
161 |       particles[i].pos[2] += particles[i].vel[2] * dt;	//2flops
162 | 
163 |       particles[i].acc[0] = 0.;
164 |       particles[i].acc[1] = 0.;
165 |       particles[i].acc[2] = 0.;
166 | 	
167 |       energy += particles[i].mass * (
168 | 		particles[i].vel[0]*particles[i].vel[0] + 
169 |                 particles[i].vel[1]*particles[i].vel[1] +
170 |                 particles[i].vel[2]*particles[i].vel[2]); //7flops
171 |     }
172 |   
173 |     _kenergy = 0.5 * energy; 
174 |     
175 |     ts1 += time.stop();
176 |     if(!(s%get_sfreq()) ) 
177 |     {
178 |       nf += 1;      
179 |       std::cout << " " 
180 | 		<<  std::left << std::setw(8)  << s
181 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
182 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
183 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
184 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
185 | 		<<  std::endl;
186 |       if(nf > 2) 
187 |       {
188 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
189 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
190 |       }
191 |       
192 |       ts0 = 0;
193 |       ts1 = 0;
194 |     }
195 |   
196 |   } //end of the time step loop
197 |   
198 |   const double t1 = time.stop();
199 |   _totTime  = (t1-t0);
200 |   _totFlops = gflops*get_nsteps();
201 |   
202 |   av/=(double)(nf-2);
203 |   dev=sqrt(dev/(double)(nf-2)-av*av);
204 |   
205 |   int nthreads=1;
206 | 
207 |   std::cout << std::endl;
208 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
209 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
210 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
211 |   std::cout << "===============================" << std::endl;
212 | 
213 | }
214 | 
215 | 
216 | void GSimulation :: print_header()
217 | {
218 | 	    
219 |   std::cout << " nPart = " << get_npart()  << "; " 
220 | 	    << "nSteps = " << get_nsteps() << "; " 
221 | 	    << "dt = "     << get_tstep()  << std::endl;
222 | 	    
223 |   std::cout << "------------------------------------------------" << std::endl;
224 |   std::cout << " " 
225 | 	    <<  std::left << std::setw(8)  << "s"
226 | 	    <<  std::left << std::setw(8)  << "dt"
227 | 	    <<  std::left << std::setw(12) << "kenergy"
228 | 	    <<  std::left << std::setw(12) << "time (s)"
229 | 	    <<  std::left << std::setw(12) << "GFlops"
230 | 	    <<  std::endl;
231 |   std::cout << "------------------------------------------------" << std::endl;
232 | 
233 | 
234 | }
235 | 
236 | GSimulation :: ~GSimulation()
237 | {
238 |   delete particles;
239 | }
240 | 


--------------------------------------------------------------------------------
/ver4/GSimulation.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | void GSimulation :: init_pos()
 45 | {
 46 |   std::random_device rd;        //random number generator
 47 |   std::mt19937 gen(42);
 48 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 49 | 
 50 |   for(int i=0; i<get_npart(); ++i)
 51 |   {
 52 |     particles->pos_x[i] = unif_d(gen);
 53 |     particles->pos_y[i] = unif_d(gen);
 54 |     particles->pos_z[i] = unif_d(gen);
 55 |   }
 56 | }
 57 | 
 58 | void GSimulation :: init_vel()
 59 | {
 60 |   std::random_device rd;        //random number generator
 61 |   std::mt19937 gen(42);
 62 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 63 | 
 64 |   for(int i=0; i<get_npart(); ++i)
 65 |   {
 66 |     particles->vel_x[i] = unif_d(gen) * 1.0e-3f;
 67 |     particles->vel_y[i] = unif_d(gen) * 1.0e-3f;
 68 |     particles->vel_z[i] = unif_d(gen) * 1.0e-3f;
 69 |   }
 70 | }
 71 | 
 72 | void GSimulation :: init_acc()
 73 | {
 74 |   for(int i=0; i<get_npart(); ++i)
 75 |   {
 76 |     particles->acc_x[i] = 0.f;
 77 |     particles->acc_y[i] = 0.f;
 78 |     particles->acc_z[i] = 0.f;
 79 |   }
 80 | }
 81 | 
 82 | void GSimulation :: init_mass()
 83 | {
 84 |   real_type n   = static_cast<real_type> (get_npart());
 85 |   std::random_device rd;        //random number generator
 86 |   std::mt19937 gen(42);
 87 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 88 | 
 89 |   for(int i=0; i<get_npart(); ++i)
 90 |   {
 91 |     particles->mass[i] = n * unif_d(gen);
 92 |   }
 93 | }
 94 | 
 95 | void GSimulation :: start() 
 96 | {
 97 |   real_type energy;
 98 |   real_type dt = get_tstep();
 99 |   int n = get_npart();
100 |   int i,j;
101 |   
102 |   //allocate particles
103 |   particles = new ParticleSoA;
104 |   
105 |   particles->pos_x = new real_type[n];
106 |   particles->pos_y = new real_type[n];
107 |   particles->pos_z = new real_type[n];
108 |   particles->vel_x = new real_type[n];
109 |   particles->vel_y = new real_type[n];
110 |   particles->vel_z = new real_type[n];
111 |   particles->acc_x = new real_type[n];
112 |   particles->acc_y = new real_type[n];
113 |   particles->acc_z = new real_type[n];
114 |   particles->mass  = new real_type[n]; 
115 | 
116 |   init_pos();	
117 |   init_vel();
118 |   init_acc();
119 |   init_mass();
120 |   
121 |   print_header();
122 |   
123 |   _totTime = 0.; 
124 |   
125 |   const float softeningSquared = 1.e-3f;
126 |   const float G = 6.67259e-11f;
127 |   
128 |   CPUTime time;
129 |   double ts0 = 0;
130 |   double ts1 = 0;
131 |   double nd = double(n);
132 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
133 |   double av=0.0, dev=0.0;
134 |   int nf = 0;
135 |   
136 |   const double t0 = time.start();
137 |   for (int s=1; s<=get_nsteps(); ++s)
138 |   {   
139 |    ts0 += time.start(); 
140 |    for (j = 0; j < n; j++)// update acceleration
141 |    {
142 | #pragma omp simd   
143 |     for (i = 0; i < n; i++)
144 |      {
145 |          real_type dx, dy, dz;
146 | 	 real_type distanceSqr = 0.0f;
147 | 	 real_type distanceInv = 0.0f;
148 | 		  
149 | 	 dx = particles->pos_x[j] - particles->pos_x[i];	//1flop
150 | 	 dy = particles->pos_y[j] - particles->pos_y[i];	//1flop	
151 | 	 dz = particles->pos_z[j] - particles->pos_z[i];	//1flop
152 | 	
153 |  	 distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
154 |  	 distanceInv = 1.0f / sqrtf(distanceSqr);			//1div+1sqrt
155 | 
156 | 	 particles->acc_x[i] += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
157 | 	 particles->acc_y[i] += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
158 | 	 particles->acc_z[i] += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
159 |      }
160 |    }
161 |    energy = 0;
162 | 
163 |    for (i = 0; i < n; ++i)// update position
164 |    {
165 |      particles->vel_x[i] += particles->acc_x[i] * dt; //2flops
166 |      particles->vel_y[i] += particles->acc_y[i] * dt; //2flops
167 |      particles->vel_z[i] += particles->acc_z[i] * dt; //2flops
168 | 	  
169 |      particles->pos_x[i] += particles->vel_x[i] * dt; //2flops
170 |      particles->pos_y[i] += particles->vel_y[i] * dt; //2flops
171 |      particles->pos_z[i] += particles->vel_z[i] * dt; //2flops
172 | 
173 |      particles->acc_x[i] = 0.;
174 |      particles->acc_y[i] = 0.;
175 |      particles->acc_z[i] = 0.;
176 | 	
177 |      energy += particles->mass[i] * (
178 | 	       particles->vel_x[i]*particles->vel_x[i] + 
179 |                particles->vel_y[i]*particles->vel_y[i] +
180 |                particles->vel_z[i]*particles->vel_z[i]); //7flops
181 |    }
182 |   
183 |     _kenergy = 0.5 * energy; 
184 |     
185 |     ts1 += time.stop();
186 |     if(!(s%get_sfreq()) ) 
187 |     {
188 |       nf += 1;      
189 |       std::cout << " " 
190 | 		<<  std::left << std::setw(8)  << s
191 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
192 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
193 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
194 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
195 | 		<<  std::endl;
196 |       if(nf > 2) 
197 |       {
198 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
199 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
200 |       }
201 |       
202 |       ts0 = 0;
203 |       ts1 = 0;
204 |     }
205 |   
206 |   } //end of the time step loop
207 |   
208 |   const double t1 = time.stop();
209 |   _totTime  = (t1-t0);
210 |   _totFlops = gflops*get_nsteps();
211 |   
212 |   av/=(double)(nf-2);
213 |   dev=sqrt(dev/(double)(nf-2)-av*av);
214 |   
215 |   int nthreads=1;
216 | 
217 |   std::cout << std::endl;
218 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
219 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
220 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
221 |   std::cout << "===============================" << std::endl;
222 | 
223 | }
224 | 
225 | 
226 | void GSimulation :: print_header()
227 | {
228 | 	    
229 |   std::cout << " nPart = " << get_npart()  << "; " 
230 | 	    << "nSteps = " << get_nsteps() << "; " 
231 | 	    << "dt = "     << get_tstep()  << std::endl;
232 | 	    
233 |   std::cout << "------------------------------------------------" << std::endl;
234 |   std::cout << " " 
235 | 	    <<  std::left << std::setw(8)  << "s"
236 | 	    <<  std::left << std::setw(8)  << "dt"
237 | 	    <<  std::left << std::setw(12) << "kenergy"
238 | 	    <<  std::left << std::setw(12) << "time (s)"
239 | 	    <<  std::left << std::setw(12) << "GFlops"
240 | 	    <<  std::endl;
241 |   std::cout << "------------------------------------------------" << std::endl;
242 | 
243 | 
244 | }
245 | 
246 | GSimulation :: ~GSimulation()
247 | {
248 |   delete [] particles->pos_x;
249 |   delete [] particles->pos_y;
250 |   delete [] particles->pos_z;
251 |   delete [] particles->vel_x;
252 |   delete [] particles->vel_y;
253 |   delete [] particles->vel_z;
254 |   delete [] particles->acc_x;
255 |   delete [] particles->acc_y;
256 |   delete [] particles->acc_z;
257 |   delete [] particles->mass;
258 |   delete particles;
259 | }
260 | 


--------------------------------------------------------------------------------
/ver3/GSimulation.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | void GSimulation :: init_pos()
 45 | {
 46 |   std::random_device rd;        //random number generator
 47 |   std::mt19937 gen(42);
 48 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 49 | 
 50 |   for(int i=0; i<get_npart(); ++i)
 51 |   {
 52 |     particles->pos_x[i] = unif_d(gen);
 53 |     particles->pos_y[i] = unif_d(gen);
 54 |     particles->pos_z[i] = unif_d(gen);
 55 |   }
 56 | }
 57 | 
 58 | void GSimulation :: init_vel()
 59 | {
 60 |   std::random_device rd;        //random number generator
 61 |   std::mt19937 gen(42);
 62 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 63 | 
 64 |   for(int i=0; i<get_npart(); ++i)
 65 |   {
 66 |     particles->vel_x[i] = unif_d(gen) * 1.0e-3f;
 67 |     particles->vel_y[i] = unif_d(gen) * 1.0e-3f;
 68 |     particles->vel_z[i] = unif_d(gen) * 1.0e-3f;
 69 |   }
 70 | }
 71 | 
 72 | void GSimulation :: init_acc()
 73 | {
 74 |   for(int i=0; i<get_npart(); ++i)
 75 |   {
 76 |     particles->acc_x[i] = 0.f;
 77 |     particles->acc_y[i] = 0.f;
 78 |     particles->acc_z[i] = 0.f;
 79 |   }
 80 | }
 81 | 
 82 | void GSimulation :: init_mass()
 83 | {
 84 |   real_type n   = static_cast<real_type> (get_npart());
 85 |   std::random_device rd;        //random number generator
 86 |   std::mt19937 gen(42);
 87 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 88 | 
 89 |   for(int i=0; i<get_npart(); ++i)
 90 |   {
 91 |     particles->mass[i] = n * unif_d(gen);
 92 |   }
 93 | }
 94 | 
 95 | void GSimulation :: start() 
 96 | {
 97 |   real_type energy;
 98 |   real_type dt = get_tstep();
 99 |   int n = get_npart();
100 |   int i,j;
101 |   
102 |   //allocate particles
103 |   particles = new ParticleSoA;
104 |   
105 |   particles->pos_x = new real_type[n];
106 |   particles->pos_y = new real_type[n];
107 |   particles->pos_z = new real_type[n];
108 |   particles->vel_x = new real_type[n];
109 |   particles->vel_y = new real_type[n];
110 |   particles->vel_z = new real_type[n];
111 |   particles->acc_x = new real_type[n];
112 |   particles->acc_y = new real_type[n];
113 |   particles->acc_z = new real_type[n];
114 |   particles->mass  = new real_type[n]; 
115 | 
116 |   init_pos();	
117 |   init_vel();
118 |   init_acc();
119 |   init_mass();
120 |   
121 |   print_header();
122 |   
123 |   _totTime = 0.; 
124 |   
125 |   const float softeningSquared = 1e-3f;
126 |   const float G = 6.67259e-11f;
127 |   
128 |   CPUTime time;
129 |   double ts0 = 0;
130 |   double ts1 = 0;
131 |   double nd = double(n);
132 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
133 |   double av=0.0, dev=0.0;
134 |   int nf = 0;
135 |   
136 |   const double t0 = time.start();
137 |   for (int s=1; s<=get_nsteps(); ++s)
138 |   {   
139 |    ts0 += time.start(); 
140 |    for (i = 0; i < n; i++)// update acceleration
141 |    {
142 | #ifdef SIMD
143 | #pragma omp simd 
144 | #endif
145 |      for (j = 0; j < n; j++)
146 |      {
147 |          real_type dx, dy, dz;
148 | 	 real_type distanceSqr = 0.0f;
149 | 	 real_type distanceInv = 0.0f;
150 | 		  
151 | 	 dx = particles->pos_x[j] - particles->pos_x[i];	//1flop
152 | 	 dy = particles->pos_y[j] - particles->pos_y[i];	//1flop	
153 | 	 dz = particles->pos_z[j] - particles->pos_z[i];	//1flop
154 | 	
155 |  	 distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
156 |  	 distanceInv = 1.0f / sqrtf(distanceSqr);			//1div+1sqrt
157 | 
158 | 	 particles->acc_x[i] += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
159 | 	 particles->acc_y[i] += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
160 | 	 particles->acc_z[i] += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
161 |      }
162 |    }
163 |    energy = 0;
164 | 
165 |    for (i = 0; i < n; ++i)// update position
166 |    {
167 |      particles->vel_x[i] += particles->acc_x[i] * dt; //2flops
168 |      particles->vel_y[i] += particles->acc_y[i] * dt; //2flops
169 |      particles->vel_z[i] += particles->acc_z[i] * dt; //2flops
170 | 	  
171 |      particles->pos_x[i] += particles->vel_x[i] * dt; //2flops
172 |      particles->pos_y[i] += particles->vel_y[i] * dt; //2flops
173 |      particles->pos_z[i] += particles->vel_z[i] * dt; //2flops
174 | 
175 |      particles->acc_x[i] = 0.;
176 |      particles->acc_y[i] = 0.;
177 |      particles->acc_z[i] = 0.;
178 | 	
179 |      energy += particles->mass[i] * (
180 | 	       particles->vel_x[i]*particles->vel_x[i] + 
181 |                particles->vel_y[i]*particles->vel_y[i] +
182 |                particles->vel_z[i]*particles->vel_z[i]); //7flops
183 |    }
184 |   
185 |     _kenergy = 0.5 * energy; 
186 |     
187 |     ts1 += time.stop();
188 |     if(!(s%get_sfreq()) ) 
189 |     {
190 |       nf += 1;      
191 |       std::cout << " " 
192 | 		<<  std::left << std::setw(8)  << s
193 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
194 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
195 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
196 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
197 | 		<<  std::endl;
198 |       if(nf > 2) 
199 |       {
200 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
201 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
202 |       }
203 |       
204 |       ts0 = 0;
205 |       ts1 = 0;
206 |     }
207 |   
208 |   } //end of the time step loop
209 |   
210 |   const double t1 = time.stop();
211 |   _totTime  = (t1-t0);
212 |   _totFlops = gflops*get_nsteps();
213 |   
214 |   av/=(double)(nf-2);
215 |   dev=sqrt(dev/(double)(nf-2)-av*av);
216 |   
217 |   int nthreads=1;
218 | 
219 |   std::cout << std::endl;
220 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
221 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
222 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
223 |   std::cout << "===============================" << std::endl;
224 | 
225 | }
226 | 
227 | 
228 | void GSimulation :: print_header()
229 | {
230 | 	    
231 |   std::cout << " nPart = " << get_npart()  << "; " 
232 | 	    << "nSteps = " << get_nsteps() << "; " 
233 | 	    << "dt = "     << get_tstep()  << std::endl;
234 | 	    
235 |   std::cout << "------------------------------------------------" << std::endl;
236 |   std::cout << " " 
237 | 	    <<  std::left << std::setw(8)  << "s"
238 | 	    <<  std::left << std::setw(8)  << "dt"
239 | 	    <<  std::left << std::setw(12) << "kenergy"
240 | 	    <<  std::left << std::setw(12) << "time (s)"
241 | 	    <<  std::left << std::setw(12) << "GFlops"
242 | 	    <<  std::endl;
243 |   std::cout << "------------------------------------------------" << std::endl;
244 | 
245 | 
246 | }
247 | 
248 | GSimulation :: ~GSimulation()
249 | {
250 |   delete [] particles->pos_x;
251 |   delete [] particles->pos_y;
252 |   delete [] particles->pos_z;
253 |   delete [] particles->vel_x;
254 |   delete [] particles->vel_y;
255 |   delete [] particles->vel_z;
256 |   delete [] particles->acc_x;
257 |   delete [] particles->acc_y;
258 |   delete [] particles->acc_z;
259 |   delete [] particles->mass;
260 |   delete particles;
261 | }
262 | 


--------------------------------------------------------------------------------
/ver4/GSimulation-moveout.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | void GSimulation :: init_pos()
 45 | {
 46 |   std::random_device rd;        //random number generator
 47 |   std::mt19937 gen(42);
 48 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 49 | 
 50 |   for(int i=0; i<get_npart(); ++i)
 51 |   {
 52 |     particles->pos_x[i] = unif_d(gen);
 53 |     particles->pos_y[i] = unif_d(gen);
 54 |     particles->pos_z[i] = unif_d(gen);
 55 |   }
 56 | }
 57 | 
 58 | void GSimulation :: init_vel()
 59 | {
 60 |   std::random_device rd;        //random number generator
 61 |   std::mt19937 gen(42);
 62 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 63 | 
 64 |   for(int i=0; i<get_npart(); ++i)
 65 |   {
 66 |     particles->vel_x[i] = unif_d(gen) * 1.0e-3f;
 67 |     particles->vel_y[i] = unif_d(gen) * 1.0e-3f;
 68 |     particles->vel_z[i] = unif_d(gen) * 1.0e-3f;
 69 |   }
 70 | }
 71 | 
 72 | void GSimulation :: init_acc()
 73 | {
 74 |   for(int i=0; i<get_npart(); ++i)
 75 |   {
 76 |     particles->acc_x[i] = 0.f;
 77 |     particles->acc_y[i] = 0.f;
 78 |     particles->acc_z[i] = 0.f;
 79 |   }
 80 | }
 81 | 
 82 | void GSimulation :: init_mass()
 83 | {
 84 |   real_type n   = static_cast<real_type> (get_npart());
 85 |   std::random_device rd;        //random number generator
 86 |   std::mt19937 gen(42);
 87 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 88 | 
 89 |   for(int i=0; i<get_npart(); ++i)
 90 |   {
 91 |     particles->mass[i] = n * unif_d(gen);
 92 |   }
 93 | }
 94 | 
 95 | void GSimulation :: start() 
 96 | {
 97 |   real_type energy;
 98 |   real_type dt = get_tstep();
 99 |   int n = get_npart();
100 |   int i,j;
101 |   
102 |   //allocate particles
103 |   particles = new ParticleSoA;
104 |   
105 |   particles->pos_x = new real_type[n];
106 |   particles->pos_y = new real_type[n];
107 |   particles->pos_z = new real_type[n];
108 |   particles->vel_x = new real_type[n];
109 |   particles->vel_y = new real_type[n];
110 |   particles->vel_z = new real_type[n];
111 |   particles->acc_x = new real_type[n];
112 |   particles->acc_y = new real_type[n];
113 |   particles->acc_z = new real_type[n];
114 |   particles->mass  = new real_type[n]; 
115 | 
116 |   init_pos();	
117 |   init_vel();
118 |   init_acc();
119 |   init_mass();
120 |   
121 |   print_header();
122 |   
123 |   _totTime = 0.; 
124 |   
125 |   const float softeningSquared = 1.e-3f;
126 |   const float G = 6.67259e-11f;
127 |   
128 |   CPUTime time;
129 |   double ts0 = 0;
130 |   double ts1 = 0;
131 |   double nd = double(n);
132 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
133 |   double av=0.0, dev=0.0;
134 |   int nf = 0;
135 |   
136 |   const double t0 = time.start();
137 |   for (int s=1; s<=get_nsteps(); ++s)
138 |   {   
139 |    ts0 += time.start(); 
140 |    for (i = 0; i < n; i++)// update acceleration
141 |    {
142 |      real_type ax_i = particles->acc_x[i];
143 |      real_type ay_i = particles->acc_y[i];
144 |      real_type az_i = particles->acc_z[i];
145 |      for (j = 0; j < n; j++)
146 |      {
147 |          real_type dx, dy, dz;
148 | 	 real_type distanceSqr = 0.0f;
149 | 	 real_type distanceInv = 0.0f;
150 | 		  
151 | 	 dx = particles->pos_x[j] - particles->pos_x[i];	//1flop
152 | 	 dy = particles->pos_y[j] - particles->pos_y[i];	//1flop	
153 | 	 dz = particles->pos_z[j] - particles->pos_z[i];	//1flop
154 | 	
155 |  	 distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
156 |  	 distanceInv = 1.0f / sqrtf(distanceSqr);			//1div+1sqrt
157 | 
158 | 	 ax_i += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
159 | 	 ay_i += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
160 | 	 az_i += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
161 |      }
162 |      particles->acc_x[i] = ax_i;
163 |      particles->acc_y[i] = ay_i;
164 |      particles->acc_z[i] = az_i;
165 |    }
166 |    energy = 0;
167 | 
168 |    for (i = 0; i < n; ++i)// update position
169 |    {
170 |      particles->vel_x[i] += particles->acc_x[i] * dt; //2flops
171 |      particles->vel_y[i] += particles->acc_y[i] * dt; //2flops
172 |      particles->vel_z[i] += particles->acc_z[i] * dt; //2flops
173 | 	  
174 |      particles->pos_x[i] += particles->vel_x[i] * dt; //2flops
175 |      particles->pos_y[i] += particles->vel_y[i] * dt; //2flops
176 |      particles->pos_z[i] += particles->vel_z[i] * dt; //2flops
177 | 
178 |      particles->acc_x[i] = 0.;
179 |      particles->acc_y[i] = 0.;
180 |      particles->acc_z[i] = 0.;
181 | 	
182 |      energy += particles->mass[i] * (
183 | 	       particles->vel_x[i]*particles->vel_x[i] + 
184 |                particles->vel_y[i]*particles->vel_y[i] +
185 |                particles->vel_z[i]*particles->vel_z[i]); //7flops
186 |    }
187 |   
188 |     _kenergy = 0.5 * energy; 
189 |     
190 |     ts1 += time.stop();
191 |     if(!(s%get_sfreq()) ) 
192 |     {
193 |       nf += 1;      
194 |       std::cout << " " 
195 | 		<<  std::left << std::setw(8)  << s
196 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
197 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
198 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
199 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
200 | 		<<  std::endl;
201 |       if(nf > 2) 
202 |       {
203 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
204 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
205 |       }
206 |       
207 |       ts0 = 0;
208 |       ts1 = 0;
209 |     }
210 |   
211 |   } //end of the time step loop
212 |   
213 |   const double t1 = time.stop();
214 |   _totTime  = (t1-t0);
215 |   _totFlops = gflops*get_nsteps();
216 |   
217 |   av/=(double)(nf-2);
218 |   dev=sqrt(dev/(double)(nf-2)-av*av);
219 |   
220 |   int nthreads=1;
221 | 
222 |   std::cout << std::endl;
223 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
224 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
225 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
226 |   std::cout << "===============================" << std::endl;
227 | 
228 | }
229 | 
230 | 
231 | void GSimulation :: print_header()
232 | {
233 | 	    
234 |   std::cout << " nPart = " << get_npart()  << "; " 
235 | 	    << "nSteps = " << get_nsteps() << "; " 
236 | 	    << "dt = "     << get_tstep()  << std::endl;
237 | 	    
238 |   std::cout << "------------------------------------------------" << std::endl;
239 |   std::cout << " " 
240 | 	    <<  std::left << std::setw(8)  << "s"
241 | 	    <<  std::left << std::setw(8)  << "dt"
242 | 	    <<  std::left << std::setw(12) << "kenergy"
243 | 	    <<  std::left << std::setw(12) << "time (s)"
244 | 	    <<  std::left << std::setw(12) << "GFlops"
245 | 	    <<  std::endl;
246 |   std::cout << "------------------------------------------------" << std::endl;
247 | 
248 | 
249 | }
250 | 
251 | GSimulation :: ~GSimulation()
252 | {
253 |   delete [] particles->pos_x;
254 |   delete [] particles->pos_y;
255 |   delete [] particles->pos_z;
256 |   delete [] particles->vel_x;
257 |   delete [] particles->vel_y;
258 |   delete [] particles->vel_z;
259 |   delete [] particles->acc_x;
260 |   delete [] particles->acc_y;
261 |   delete [] particles->acc_z;
262 |   delete [] particles->mass;
263 |   delete particles;
264 | }
265 | 


--------------------------------------------------------------------------------
/ver3/GSimulation-moveout.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | void GSimulation :: init_pos()  
 45 | {
 46 |   std::random_device rd;	//random number generator
 47 |   std::mt19937 gen(42);      
 48 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 49 |   
 50 |   for(int i=0; i<get_npart(); ++i)
 51 |   {
 52 |     particles->pos_x[i] = unif_d(gen);
 53 |     particles->pos_y[i] = unif_d(gen);
 54 |     particles->pos_z[i] = unif_d(gen);
 55 |   }
 56 | }
 57 | 
 58 | void GSimulation :: init_vel()  
 59 | {
 60 |   std::random_device rd;        //random number generator
 61 |   std::mt19937 gen(42);
 62 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 63 | 
 64 |   for(int i=0; i<get_npart(); ++i)
 65 |   {
 66 |     particles->vel_x[i] = unif_d(gen) * 1.0e-3f;
 67 |     particles->vel_y[i] = unif_d(gen) * 1.0e-3f;
 68 |     particles->vel_z[i] = unif_d(gen) * 1.0e-3f; 
 69 |   }
 70 | }
 71 | 
 72 | void GSimulation :: init_acc() 
 73 | {
 74 |   for(int i=0; i<get_npart(); ++i)
 75 |   {
 76 |     particles->acc_x[i] = 0.f; 
 77 |     particles->acc_y[i] = 0.f;
 78 |     particles->acc_z[i] = 0.f;
 79 |   }
 80 | }
 81 | 
 82 | void GSimulation :: init_mass() 
 83 | {
 84 |   real_type n   = static_cast<real_type> (get_npart());
 85 |   std::random_device rd;        //random number generator
 86 |   std::mt19937 gen(42);
 87 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 88 | 
 89 |   for(int i=0; i<get_npart(); ++i)
 90 |   {
 91 |     particles->mass[i] = n * unif_d(gen); 
 92 |   }
 93 | }
 94 | 
 95 | void GSimulation :: start() 
 96 | {
 97 |   real_type energy;
 98 |   real_type dt = get_tstep();
 99 |   int n = get_npart();
100 |   int i,j;
101 |   
102 |   //allocate particles
103 |   particles = new ParticleSoA;
104 |   
105 |   particles->pos_x = new real_type[n];
106 |   particles->pos_y = new real_type[n];
107 |   particles->pos_z = new real_type[n];
108 |   particles->vel_x = new real_type[n];
109 |   particles->vel_y = new real_type[n];
110 |   particles->vel_z = new real_type[n];
111 |   particles->acc_x = new real_type[n];
112 |   particles->acc_y = new real_type[n];
113 |   particles->acc_z = new real_type[n];
114 |   particles->mass  = new real_type[n]; 
115 | 
116 |   init_pos();	
117 |   init_vel();
118 |   init_acc();
119 |   init_mass();
120 |   
121 |   print_header();
122 |   
123 |   _totTime = 0.; 
124 |   
125 |   const float softeningSquared = 1e-3f;
126 |   const float G = 6.67259e-11f;
127 |   
128 |   CPUTime time;
129 |   double ts0 = 0;
130 |   double ts1 = 0;
131 |   double nd = double(n);
132 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
133 |   double av=0.0, dev=0.0;
134 |   int nf = 0;
135 |   
136 |   const double t0 = time.start();
137 |   for (int s=1; s<=get_nsteps(); ++s)
138 |   {   
139 |    ts0 += time.start(); 
140 |    for (i = 0; i < n; i++)// update acceleration
141 |    {
142 |      real_type ax_i = particles->acc_x[i];
143 |      real_type ay_i = particles->acc_y[i];
144 |      real_type az_i = particles->acc_z[i];
145 |      for (j = 0; j < n; j++)
146 |      {
147 |          real_type dx, dy, dz;
148 | 	 real_type distanceSqr = 0.0f;
149 | 	 real_type distanceInv = 0.0f;
150 | 		  
151 | 	 dx = particles->pos_x[j] - particles->pos_x[i];	//1flop
152 | 	 dy = particles->pos_y[j] - particles->pos_y[i];	//1flop	
153 | 	 dz = particles->pos_z[j] - particles->pos_z[i];	//1flop
154 | 	
155 |  	 distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
156 |  	 distanceInv = 1.0f / sqrtf(distanceSqr);			//1div+1sqrt
157 | 
158 | 	 ax_i += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
159 | 	 ay_i += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
160 | 	 az_i += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
161 |      }
162 |      particles->acc_x[i] = ax_i;
163 |      particles->acc_y[i] = ay_i;
164 |      particles->acc_z[i] = az_i;
165 |    }
166 |    energy = 0;
167 | 
168 |    for (i = 0; i < n; ++i)// update position
169 |    {
170 |      particles->vel_x[i] += particles->acc_x[i] * dt; //2flops
171 |      particles->vel_y[i] += particles->acc_y[i] * dt; //2flops
172 |      particles->vel_z[i] += particles->acc_z[i] * dt; //2flops
173 | 	  
174 |      particles->pos_x[i] += particles->vel_x[i] * dt; //2flops
175 |      particles->pos_y[i] += particles->vel_y[i] * dt; //2flops
176 |      particles->pos_z[i] += particles->vel_z[i] * dt; //2flops
177 | 
178 |      particles->acc_x[i] = 0.;
179 |      particles->acc_y[i] = 0.;
180 |      particles->acc_z[i] = 0.;
181 | 	
182 |      energy += particles->mass[i] * (
183 | 	       particles->vel_x[i]*particles->vel_x[i] + 
184 |                particles->vel_y[i]*particles->vel_y[i] +
185 |                particles->vel_z[i]*particles->vel_z[i]); //7flops
186 |    }
187 |   
188 |     _kenergy = 0.5 * energy; 
189 |     
190 |     ts1 += time.stop();
191 |     if(!(s%get_sfreq()) ) 
192 |     {
193 |       nf += 1;      
194 |       std::cout << " " 
195 | 		<<  std::left << std::setw(8)  << s
196 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
197 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
198 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
199 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
200 | 		<<  std::endl;
201 |       if(nf > 2) 
202 |       {
203 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
204 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
205 |       }
206 |       
207 |       ts0 = 0;
208 |       ts1 = 0;
209 |     }
210 |   
211 |   } //end of the time step loop
212 |   
213 |   const double t1 = time.stop();
214 |   _totTime  = (t1-t0);
215 |   _totFlops = gflops*get_nsteps();
216 |   
217 |   av/=(double)(nf-2);
218 |   dev=sqrt(dev/(double)(nf-2)-av*av);
219 |   
220 |   int nthreads=1;
221 | 
222 |   std::cout << std::endl;
223 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
224 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
225 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
226 |   std::cout << "===============================" << std::endl;
227 | 
228 | }
229 | 
230 | 
231 | void GSimulation :: print_header()
232 | {
233 | 	    
234 |   std::cout << " nPart = " << get_npart()  << "; " 
235 | 	    << "nSteps = " << get_nsteps() << "; " 
236 | 	    << "dt = "     << get_tstep()  << std::endl;
237 | 	    
238 |   std::cout << "------------------------------------------------" << std::endl;
239 |   std::cout << " " 
240 | 	    <<  std::left << std::setw(8)  << "s"
241 | 	    <<  std::left << std::setw(8)  << "dt"
242 | 	    <<  std::left << std::setw(12) << "kenergy"
243 | 	    <<  std::left << std::setw(12) << "time (s)"
244 | 	    <<  std::left << std::setw(12) << "GFlops"
245 | 	    <<  std::endl;
246 |   std::cout << "------------------------------------------------" << std::endl;
247 | 
248 | 
249 | }
250 | 
251 | GSimulation :: ~GSimulation()
252 | {
253 |   delete [] particles->pos_x;
254 |   delete [] particles->pos_y;
255 |   delete [] particles->pos_z;
256 |   delete [] particles->vel_x;
257 |   delete [] particles->vel_y;
258 |   delete [] particles->vel_z;
259 |   delete [] particles->acc_x;
260 |   delete [] particles->acc_y;
261 |   delete [] particles->acc_z;
262 |   delete [] particles->mass;
263 |   delete particles;
264 | }
265 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Demo Session for Intel® Advisor and Intel® Compiler C++
  2 | This is an example code based on a simple N-body simulation of a distribution of point masses placed
  3 | at location r_1,...,r_N and have masses m_1,...,m_N. The position of the particles after a specified
  4 | time is computed using a finite difference methods for ordinary differential equation.
  5 | 
  6 | ## Implementation
  7 | For each particle the position, the velocity, the acceleration and the mass is stored in a C-like
  8 | structure and for an N particles case, an array of this structure is allocated. This is the 
  9 | simple data-structure which is very close to the physical representation of a particle mass.
 10 | The file `Particle.hpp` contains the implementation of such data-structure.
 11 | 
 12 | For each particle indexed by i, the accelearation is computed a_i = G*mj*(ri-rj)/|ri-rj|^3, which 
 13 | value is used to update the velocity and position using the Euler integration scheme.
 14 | Furthermore the total energy of the particles' group is computed.
 15 | The file `GSimulation.cpp` contains the implementation of the algorithm.
 16 | 
 17 | ## Directory structure of the Demo
 18 | The demo consists of several directories, which correspond to the different
 19 | optimization steps to take to enabling vectorization and OpenMP multi-threding of the code.
 20 | Each directory has its onw makefile to compile and run the test case.
 21 | To compiler the code type `make` and the run the simulation type `make run`.
 22 | As benchmark, the simulation starts with 2000 particles and 500 integration steps. One can
 23 | change the default giving the number of particles and the number of integration steps using
 24 | the command line argument:
 25 | `./nbody.x < # of particles> < # of integration>`
 26 | 
 27 | Try to change the number of particles and observe how the performance changes.
 28 | 
 29 | ## Different versions
 30 | To start the demo, go to the folder `ver0`, compile and run the test.
 31 | 
 32 | ### Intial version: ver0
 33 | The typical output of the simulation is:
 34 | ```
 35 | Run the default test case on CPU:
 36 | ./nbody.x
 37 | ===============================
 38 |  Initialize Gravity Simulation
 39 |  nPart = 16000; nSteps = 10; dt = 0.1
 40 | ------------------------------------------------
 41 |  s       dt      kenergy     time (s)    GFlops
 42 | ------------------------------------------------
 43 |  1       0.1     26.405      1.7966      4.1324
 44 |  2       0.2     313.77      1.5309      4.8498
 45 |  3       0.3     926.56      1.5311      4.8489
 46 |  4       0.4     1866.4      1.5313      4.8484
 47 |  5       0.5     3135.6      1.5315      4.8479
 48 |  6       0.6     4737.6      1.5309      4.8497
 49 |  7       0.7     6676.6      1.5312      4.8487
 50 |  8       0.8     8957.7      1.5311      4.849
 51 |  9       0.9     11587       1.5314      4.848
 52 |  10      1       14572       1.5309      4.8495
 53 | 
 54 | # Number Threads     : 1
 55 | # Total Time (s)     : 15.577
 56 | # Average Perfomance : 4.8488 +- 0.00062286
 57 | ===============================
 58 | 
 59 | ```
 60 | 
 61 | On output is printed some useful information. Colomnwise: s is the
 62 | number of steps; dt is the physical time taking into account the physical
 63 | time integration step; kenery is the kinetic energy of the group of particles;
 64 | time is the computational time taken till that time step; GFlops is the
 65 | number of giga flops per second. 
 66 | N.B. The GFlops is an estimation done by looking into the code and counting
 67 | the number of math operations according to the algorithm. This is used only
 68 | as standard metric for comparison. More realistic numbers can be measured
 69 | in different way (Roofline model of Intel® Advisor).
 70 | 
 71 | Following the five steps of code modernization, 
 72 | https://software.intel.com/en-us/articles/what-is-code-modernization
 73 | we can improve the performance of the code.
 74 | 
 75 | - describe the Intel® Advisor result
 76 | - compile the code with processor specific optimization: -xSSE4.2, -xAVX, -xCORE-AVX2, -xCORE-AVX512, -xMIC-AVX512
 77 | - generate the compiler report and describe the different options: 
 78 | -  -qopt-report[=N]: default level is 2
 79 | -  -qopt-report-phase=<vec,loop,openmp,...>: default is all
 80 | -  -qopt-report-file=stdout | stderr | filename
 81 | -  -qopt-report-filter="GSimulation.cpp,130-204"
 82 | 
 83 | Then show how verbose is the compiler report and use filtering.
 84 | 
 85 | ### ver1 
 86 | Solution of the ver0. The optimization are: -O2 -xAVX or higher. 
 87 | The Makefile is the only difference. Here we generate higher vectorized code and
 88 | produce the compiler report.
 89 | One should run this version in the same way as before and:
 90 | - show the new performance numbers
 91 | - describe the Intel® Advisor result
 92 | - generate the compiler report
 93 | - explain FP conversions and precision of constants, variables and math functions
 94 | 
 95 | ### ver2
 96 | Solution of the ver1. The difference is in the GSimulation.cpp file where the consistent
 97 | computation with floats is made (constants and SQRT function).
 98 | One should run this version in the same way as before and:
 99 | - show the new performance numbers
100 | - describe the Intel® Advisor result
101 | - generate the compiler report
102 | - explain the remark #25085: Preprocess Loopnests: Moving Out Load and Store and 
103 |   remark #15415: vectorization support: non-unit strided load was generated for the variable
104 |   ....
105 |   remark #15300: LOOP WAS VECTORIZED
106 |   remark #15452: unmasked strided loads: 6 
107 |   remark #15475: --- begin vector cost summary ---
108 |   remark #15476: scalar cost: 115 
109 |   remark #15477: vector cost: 26.750 
110 |   remark #15478: estimated potential speedup: 4.070 
111 |   remark #15488: --- end vector cost summary ---
112 |   ....
113 | - explain vectorization gather/scatter
114 | - explain AoS and SoA differences
115 | 
116 | ### ver3
117 | Solution of the ver2. The differences are in:
118 | - Particle.hpp: the new SoA data structure is implemented
119 | - GSimulation.hpp: modified the data member according to SoA
120 | - GSimulation.cpp: allocation and reference to SoA
121 | 
122 | One should run this version in the same way as before and:
123 | - show the new performance numbers
124 | - describe the Intel® Advisor result
125 | - generate the compiler report
126 | - explain the remark #15344: loop was not vectorized: vector dependence prevents vectorization
127 |   remark #15346: vector dependence: assumed ANTI dependence between ... and ...
128 |   remark #15346: vector dependence: assumed FLOW dependence between ... and ...
129 | - explain the vectorization and how much we gain using it
130 | - refer to the Intel® compiler autovectorization guide and explain the requirements
131 |   for autovectorization
132 | - explain #pragma simd
133 | - explain #pragma simd reduction
134 | - modify in the `Makefile` the CXXFLAGS adding the OMPFLAGS at line 8, recompile and run
135 |   remark #15301: OpenMP SIMD LOOP WAS VECTORIZED
136 | - at this point running the code shows wrong results (Warning with SIMD, be aware of the full control)
137 | - try to use #pragma simd reduction (solution in the file GSimulation-simd.cpp)
138 |   NB rember that the simd reduction is not allowed on `particles->acc_x[i]`
139 |   Solution: 
140 |     - cp GSimulation.cpp GSimulation.cpp.bkp
141 |     - cp GSimulation-simd.cpp GSimulation.cpp
142 |   recompile and run
143 |   remark #15301: OpenMP SIMD LOOP WAS VECTORIZED
144 | - rerun and show that the result is now correct
145 | 
146 | ### ver4
147 | This is the clean solution of the ver3 after all modification done live in the
148 | previous session.
149 | One should run this version in the same way as before and:
150 | - show the new performance numbers
151 | - describe the Intel® Advisor result
152 | - generate the compiler report
153 | - explain remark #15389: vectorization support: reference ... has `unaligned` access 
154 | - explain the data alignment with examples and the alignment size (16/32/64 bytes)
155 | - exlpain peel and reminder loops
156 | 
157 | ## ver5
158 | This is the solution of the ver4, with all the allocations replaced by the memory
159 | alignment allocation function.
160 | Running this version allows to see that even modifing the memory allocation functions,
161 | the data is not aligned. One needs to use the function `__assume_aligned(...)`.
162 | Recompile the code adding the option: -DASALIGN.
163 | One should run again this version with the alignment option and:
164 | - show the new performance numbers
165 | - describe the Intel® Advisor result
166 | - generate the compiler report
167 | 
168 | This concludes the basic vectorization part of the demo.
169 | At this point, only two topics are missing:
170 | - advanced cache optimization (loop-tiling) (ver6)
171 | - enabling OpenMP (ver7)
172 | 
173 | ### ver6
174 | This is the cache optimized version of the code, without OpenMP.
175 | The performance depends on the size of the tile and the number or particles.
176 | One should run again this version and:
177 | - describe in detail what is this kind of optimization and how depends on the tile size
178 | - show the new performance numbers
179 | - describe the Intel® Advisor result
180 | - generate the compiler report
181 | 
182 | ### ver7
183 | This is the version of the code with OpenMP. Play with the number of threads,
184 | openmp scheduling and threads affinity.
185 | 
186 | ### ver8
187 | This is the version of the code with OpenMP and cache tiling.
188 | One can also play with the floating point model -fp-model fast=2, for example and
189 | look for further performance improvements.
190 | 


--------------------------------------------------------------------------------
/ver5/GSimulation.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | void GSimulation :: init_pos()
 45 | {
 46 |   std::random_device rd;        //random number generator
 47 |   std::mt19937 gen(42);
 48 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 49 | 
 50 |   for(int i=0; i<get_npart(); ++i)
 51 |   {
 52 |     particles->pos_x[i] = unif_d(gen);
 53 |     particles->pos_y[i] = unif_d(gen);
 54 |     particles->pos_z[i] = unif_d(gen);
 55 |   }
 56 | }
 57 | 
 58 | void GSimulation :: init_vel()
 59 | {
 60 |   std::random_device rd;        //random number generator
 61 |   std::mt19937 gen(42);
 62 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 63 | 
 64 |   for(int i=0; i<get_npart(); ++i)
 65 |   {
 66 |     particles->vel_x[i] = unif_d(gen) * 1.0e-3f;
 67 |     particles->vel_y[i] = unif_d(gen) * 1.0e-3f;
 68 |     particles->vel_z[i] = unif_d(gen) * 1.0e-3f;
 69 |   }
 70 | }
 71 | 
 72 | void GSimulation :: init_acc()
 73 | {
 74 |   for(int i=0; i<get_npart(); ++i)
 75 |   {
 76 |     particles->acc_x[i] = 0.f;
 77 |     particles->acc_y[i] = 0.f;
 78 |     particles->acc_z[i] = 0.f;
 79 |   }
 80 | }
 81 | 
 82 | void GSimulation :: init_mass()
 83 | {
 84 |   real_type n   = static_cast<real_type> (get_npart());
 85 |   std::random_device rd;        //random number generator
 86 |   std::mt19937 gen(42);
 87 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 88 | 
 89 |   for(int i=0; i<get_npart(); ++i)
 90 |   {
 91 |     particles->mass[i] = n * unif_d(gen);
 92 |   }
 93 | }
 94 | 
 95 | void GSimulation :: start() 
 96 | {
 97 |   real_type energy;
 98 |   real_type dt = get_tstep();
 99 |   int n = get_npart();
100 |   int i,j;
101 |  
102 |   const int alignment = 64;
103 |   particles = (ParticleSoA*) _mm_malloc(sizeof(ParticleSoA),alignment);
104 | 
105 |   particles->pos_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
106 |   particles->pos_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
107 |   particles->pos_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
108 |   particles->vel_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
109 |   particles->vel_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
110 |   particles->vel_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
111 |   particles->acc_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
112 |   particles->acc_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
113 |   particles->acc_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
114 |   particles->mass  = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
115 |  
116 |   init_pos();	
117 |   init_vel();
118 |   init_acc();
119 |   init_mass();
120 |   
121 |   print_header();
122 |   
123 |   _totTime = 0.; 
124 |  
125 |   const float softeningSquared = 1.e-3f;
126 |   const float G = 6.67259e-11f;
127 |   
128 |   CPUTime time;
129 |   double ts0 = 0;
130 |   double ts1 = 0;
131 |   double nd = double(n);
132 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
133 |   double av=0.0, dev=0.0;
134 |   int nf = 0;
135 |   
136 |   const double t0 = time.start();
137 |   for (int s=1; s<=get_nsteps(); ++s)
138 |   {   
139 |    ts0 += time.start(); 
140 |    for (i = 0; i < n; i++)// update acceleration
141 |    {
142 |      __assume_aligned(particles->pos_x, alignment);
143 |      __assume_aligned(particles->pos_y, alignment);
144 |      __assume_aligned(particles->pos_z, alignment);
145 |      __assume_aligned(particles->acc_x, alignment);
146 |      __assume_aligned(particles->acc_y, alignment);
147 |      __assume_aligned(particles->acc_z, alignment);
148 |      __assume_aligned(particles->mass, alignment);
149 | 
150 |      real_type ax_i = particles->acc_x[i];
151 |      real_type ay_i = particles->acc_y[i];
152 |      real_type az_i = particles->acc_z[i];
153 |      for (j = 0; j < n; j++)
154 |      {
155 |          real_type dx, dy, dz;
156 | 	 real_type distanceSqr = 0.0f;
157 | 	 real_type distanceInv = 0.0f;
158 | 		  
159 | 	 dx = particles->pos_x[j] - particles->pos_x[i];	//1flop
160 | 	 dy = particles->pos_y[j] - particles->pos_y[i];	//1flop	
161 | 	 dz = particles->pos_z[j] - particles->pos_z[i];	//1flop
162 | 	
163 |  	 distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
164 |  	 distanceInv = 1.0f / sqrtf(distanceSqr);			//1div+1sqrt
165 | 
166 | 	 ax_i += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
167 | 	 ay_i  += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
168 | 	 az_i += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
169 |      }
170 |      particles->acc_x[i] = ax_i;
171 |      particles->acc_y[i] = ay_i;
172 |      particles->acc_z[i] = az_i;
173 |    }
174 |    energy = 0;
175 | 
176 |    for (i = 0; i < n; ++i)// update position
177 |    {
178 |      particles->vel_x[i] += particles->acc_x[i] * dt; //2flops
179 |      particles->vel_y[i] += particles->acc_y[i] * dt; //2flops
180 |      particles->vel_z[i] += particles->acc_z[i] * dt; //2flops
181 | 	  
182 |      particles->pos_x[i] += particles->vel_x[i] * dt; //2flops
183 |      particles->pos_y[i] += particles->vel_y[i] * dt; //2flops
184 |      particles->pos_z[i] += particles->vel_z[i] * dt; //2flops
185 | 
186 |      particles->acc_x[i] = 0.;
187 |      particles->acc_y[i] = 0.;
188 |      particles->acc_z[i] = 0.;
189 | 	
190 |      energy += particles->mass[i] * (
191 | 	       particles->vel_x[i]*particles->vel_x[i] + 
192 |                particles->vel_y[i]*particles->vel_y[i] +
193 |                particles->vel_z[i]*particles->vel_z[i]); //7flops
194 |    }
195 |   
196 |     _kenergy = 0.5 * energy; 
197 |     
198 |     ts1 += time.stop();
199 |     if(!(s%get_sfreq()) ) 
200 |     {
201 |       nf += 1;      
202 |       std::cout << " " 
203 | 		<<  std::left << std::setw(8)  << s
204 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
205 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
206 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
207 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
208 | 		<<  std::endl;
209 |       if(nf > 2) 
210 |       {
211 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
212 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
213 |       }
214 |       
215 |       ts0 = 0;
216 |       ts1 = 0;
217 |     }
218 |   
219 |   } //end of the time step loop
220 |   
221 |   const double t1 = time.stop();
222 |   _totTime  = (t1-t0);
223 |   _totFlops = gflops*get_nsteps();
224 |   
225 |   av/=(double)(nf-2);
226 |   dev=sqrt(dev/(double)(nf-2)-av*av);
227 |   
228 |   int nthreads=1;
229 | 
230 |   std::cout << std::endl;
231 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
232 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
233 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
234 |   std::cout << "===============================" << std::endl;
235 | 
236 | }
237 | 
238 | 
239 | void GSimulation :: print_header()
240 | {
241 | 	    
242 |   std::cout << " nPart = " << get_npart()  << "; " 
243 | 	    << "nSteps = " << get_nsteps() << "; " 
244 | 	    << "dt = "     << get_tstep()  << std::endl;
245 | 	    
246 |   std::cout << "------------------------------------------------" << std::endl;
247 |   std::cout << " " 
248 | 	    <<  std::left << std::setw(8)  << "s"
249 | 	    <<  std::left << std::setw(8)  << "dt"
250 | 	    <<  std::left << std::setw(12) << "kenergy"
251 | 	    <<  std::left << std::setw(12) << "time (s)"
252 | 	    <<  std::left << std::setw(12) << "GFlops"
253 | 	    <<  std::endl;
254 |   std::cout << "------------------------------------------------" << std::endl;
255 | 
256 | 
257 | }
258 | 
259 | GSimulation :: ~GSimulation()
260 | {
261 |   _mm_free(particles->pos_x);
262 |   _mm_free(particles->pos_y);
263 |   _mm_free(particles->pos_z);
264 |   _mm_free(particles->vel_x);
265 |   _mm_free(particles->vel_y);
266 |   _mm_free(particles->vel_z);
267 |   _mm_free(particles->acc_x);
268 |   _mm_free(particles->acc_y);
269 |   _mm_free(particles->acc_z);
270 |   _mm_free(particles->mass);
271 |   _mm_free(particles);
272 | 
273 | }
274 | 


--------------------------------------------------------------------------------
/ver7/GSimulation.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | 
 45 | void GSimulation :: init_pos()
 46 | {
 47 |   std::random_device rd;        //random number generator
 48 |   std::mt19937 gen(42);
 49 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 50 | 
 51 |   for(int i=0; i<get_npart(); ++i)
 52 |   {
 53 |     particles->pos_x[i] = unif_d(gen);
 54 |     particles->pos_y[i] = unif_d(gen);
 55 |     particles->pos_z[i] = unif_d(gen);
 56 |   }
 57 | }
 58 | 
 59 | void GSimulation :: init_vel()
 60 | {
 61 |   std::random_device rd;        //random number generator
 62 |   std::mt19937 gen(42);
 63 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 64 | 
 65 |   for(int i=0; i<get_npart(); ++i)
 66 |   {
 67 |     particles->vel_x[i] = unif_d(gen) * 1.0e-3f;
 68 |     particles->vel_y[i] = unif_d(gen) * 1.0e-3f;
 69 |     particles->vel_z[i] = unif_d(gen) * 1.0e-3f;
 70 |   }
 71 | }
 72 | 
 73 | void GSimulation :: init_acc()
 74 | {
 75 |   for(int i=0; i<get_npart(); ++i)
 76 |   {
 77 |     particles->acc_x[i] = 0.f;
 78 |     particles->acc_y[i] = 0.f;
 79 |     particles->acc_z[i] = 0.f;
 80 |   }
 81 | }
 82 | 
 83 | void GSimulation :: init_mass()
 84 | {
 85 |   real_type n   = static_cast<real_type> (get_npart());
 86 |   std::random_device rd;        //random number generator
 87 |   std::mt19937 gen(42);
 88 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 89 | 
 90 |   for(int i=0; i<get_npart(); ++i)
 91 |   {
 92 |     particles->mass[i] = n * unif_d(gen);
 93 |   }
 94 | }
 95 | 
 96 | void GSimulation :: start() 
 97 | {
 98 |   real_type energy;
 99 |   real_type dt = get_tstep();
100 |   int n = get_npart();
101 |   int i,j;
102 |  
103 |   const int alignment = 64;
104 |   particles = (ParticleSoA*) _mm_malloc(sizeof(ParticleSoA),alignment);
105 | 
106 |   particles->pos_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
107 |   particles->pos_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
108 |   particles->pos_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
109 |   particles->vel_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
110 |   particles->vel_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
111 |   particles->vel_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
112 |   particles->acc_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
113 |   particles->acc_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
114 |   particles->acc_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
115 |   particles->mass  = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
116 |  
117 |   init_pos();	
118 |   init_vel();
119 |   init_acc();
120 |   init_mass();
121 |   
122 |   print_header();
123 |   
124 |   _totTime = 0.; 
125 |   
126 |   const float softeningSquared = 1.e-3f;
127 |   const float G = 6.67259e-11f;
128 |   
129 |   CPUTime time;
130 |   double ts0 = 0;
131 |   double ts1 = 0;
132 |   double nd = double(n);
133 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
134 |   double av=0.0, dev=0.0;
135 |   int nf = 0;
136 |   
137 |   const double t0 = time.start();
138 |   for (int s=1; s<=get_nsteps(); ++s)
139 |   {   
140 |    ts0 += time.start();
141 | #pragma omp parallel for 
142 |    for (i = 0; i < n; i++)// update acceleration
143 |    {
144 |      __assume_aligned(particles->pos_x, alignment);
145 |      __assume_aligned(particles->pos_y, alignment);
146 |      __assume_aligned(particles->pos_z, alignment);
147 |      __assume_aligned(particles->acc_x, alignment);
148 |      __assume_aligned(particles->acc_y, alignment);
149 |      __assume_aligned(particles->acc_z, alignment);
150 |      __assume_aligned(particles->mass, alignment);
151 |      
152 |      real_type ax_i = particles->acc_x[i];
153 |      real_type ay_i = particles->acc_y[i];
154 |      real_type az_i = particles->acc_z[i];
155 |      
156 |      for (j = 0; j < n; j++)
157 |      {
158 |          real_type dx, dy, dz;
159 | 	 real_type distanceSqr = 0.0f;
160 | 	 real_type distanceInv = 0.0f;
161 | 		  
162 | 	 dx = particles->pos_x[j] - particles->pos_x[i];	//1flop
163 | 	 dy = particles->pos_y[j] - particles->pos_y[i];	//1flop	
164 | 	 dz = particles->pos_z[j] - particles->pos_z[i];	//1flop
165 | 	
166 |  	 distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
167 |  	 distanceInv = 1.0f / sqrtf(distanceSqr);			//1div+1sqrt
168 | 
169 | 	 ax_i += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
170 | 	 ay_i += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
171 | 	 az_i += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
172 |      }
173 |      particles->acc_x[i] = ax_i;
174 |      particles->acc_y[i] = ay_i;
175 |      particles->acc_z[i] = az_i;
176 |    }
177 |    energy = 0;
178 | #pragma omp parallel for reduction(+:energy) 
179 |    for (i = 0; i < n; ++i)// update position
180 |    {
181 |      particles->vel_x[i] += particles->acc_x[i] * dt; //2flops
182 |      particles->vel_y[i] += particles->acc_y[i] * dt; //2flops
183 |      particles->vel_z[i] += particles->acc_z[i] * dt; //2flops
184 | 	  
185 |      particles->pos_x[i] += particles->vel_x[i] * dt; //2flops
186 |      particles->pos_y[i] += particles->vel_y[i] * dt; //2flops
187 |      particles->pos_z[i] += particles->vel_z[i] * dt; //2flops
188 | 
189 |      particles->acc_x[i] = 0.;
190 |      particles->acc_y[i] = 0.;
191 |      particles->acc_z[i] = 0.;
192 | 	
193 |      energy += particles->mass[i] * (
194 | 	       particles->vel_x[i]*particles->vel_x[i] + 
195 |                particles->vel_y[i]*particles->vel_y[i] +
196 |                particles->vel_z[i]*particles->vel_z[i]); //7flops
197 |    }
198 |   
199 |     _kenergy = 0.5 * energy; 
200 |     
201 |     ts1 += time.stop();
202 |     if(!(s%get_sfreq()) ) 
203 |     {
204 |       nf += 1;      
205 |       std::cout << " " 
206 | 		<<  std::left << std::setw(8)  << s
207 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
208 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
209 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
210 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
211 | 		<<  std::endl;
212 |       if(nf > 2) 
213 |       {
214 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
215 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
216 |       }
217 |       
218 |       ts0 = 0;
219 |       ts1 = 0;
220 |     }
221 |   
222 |   } //end of the time step loop
223 |   
224 |   const double t1 = time.stop();
225 |   _totTime  = (t1-t0);
226 |   _totFlops = gflops*get_nsteps();
227 |   
228 |   av/=(double)(nf-2);
229 |   dev=sqrt(dev/(double)(nf-2)-av*av);
230 |   
231 |   int nthreads=1;
232 |  #pragma omp parallel
233 |   nthreads=omp_get_num_threads();
234 | 
235 |   std::cout << std::endl;
236 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
237 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
238 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
239 |   std::cout << "===============================" << std::endl;
240 | 
241 | }
242 | 
243 | 
244 | void GSimulation :: print_header()
245 | {
246 | 	    
247 |   std::cout << " nPart = " << get_npart()  << "; " 
248 | 	    << "nSteps = " << get_nsteps() << "; " 
249 | 	    << "dt = "     << get_tstep()  << std::endl;
250 | 	    
251 |   std::cout << "------------------------------------------------" << std::endl;
252 |   std::cout << " " 
253 | 	    <<  std::left << std::setw(8)  << "s"
254 | 	    <<  std::left << std::setw(8)  << "dt"
255 | 	    <<  std::left << std::setw(12) << "kenergy"
256 | 	    <<  std::left << std::setw(12) << "time (s)"
257 | 	    <<  std::left << std::setw(12) << "GFlops"
258 | 	    <<  std::endl;
259 |   std::cout << "------------------------------------------------" << std::endl;
260 | 
261 | 
262 | }
263 | 
264 | GSimulation :: ~GSimulation()
265 | {
266 |   _mm_free(particles->pos_x);
267 |   _mm_free(particles->pos_y);
268 |   _mm_free(particles->pos_z);
269 |   _mm_free(particles->vel_x);
270 |   _mm_free(particles->vel_y);
271 |   _mm_free(particles->vel_z);
272 |   _mm_free(particles->acc_x);
273 |   _mm_free(particles->acc_y);
274 |   _mm_free(particles->acc_z);
275 |   _mm_free(particles->mass);
276 |   _mm_free(particles);
277 | 
278 | }
279 | 


--------------------------------------------------------------------------------
/ver6/GSimulation.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | void GSimulation :: init_pos()
 45 | {
 46 |   std::random_device rd;        //random number generator
 47 |   std::mt19937 gen(42);
 48 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 49 | 
 50 |   for(int i=0; i<get_npart(); ++i)
 51 |   {
 52 |     particles->pos_x[i] = unif_d(gen);
 53 |     particles->pos_y[i] = unif_d(gen);
 54 |     particles->pos_z[i] = unif_d(gen);
 55 |   }
 56 | }
 57 | 
 58 | void GSimulation :: init_vel()
 59 | {
 60 |   std::random_device rd;        //random number generator
 61 |   std::mt19937 gen(42);
 62 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 63 | 
 64 |   for(int i=0; i<get_npart(); ++i)
 65 |   {
 66 |     particles->vel_x[i] = unif_d(gen) * 1.0e-3f;
 67 |     particles->vel_y[i] = unif_d(gen) * 1.0e-3f;
 68 |     particles->vel_z[i] = unif_d(gen) * 1.0e-3f;
 69 |   }
 70 | }
 71 | 
 72 | void GSimulation :: init_acc()
 73 | {
 74 |   for(int i=0; i<get_npart(); ++i)
 75 |   {
 76 |     particles->acc_x[i] = 0.f;
 77 |     particles->acc_y[i] = 0.f;
 78 |     particles->acc_z[i] = 0.f;
 79 |   }
 80 | }
 81 | 
 82 | void GSimulation :: init_mass()
 83 | {
 84 |   real_type n   = static_cast<real_type> (get_npart());
 85 |   std::random_device rd;        //random number generator
 86 |   std::mt19937 gen(42);
 87 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 88 | 
 89 |   for(int i=0; i<get_npart(); ++i)
 90 |   {
 91 |     particles->mass[i] = n * unif_d(gen);
 92 |   }
 93 | }
 94 | 
 95 | void GSimulation :: start() 
 96 | {
 97 |   real_type energy;
 98 |   real_type dt = get_tstep();
 99 |   int n = get_npart();
100 |   int i,j;
101 |   
102 |   const int alignment = 64;
103 |   particles = (ParticleSoA*) _mm_malloc(sizeof(ParticleSoA),alignment);
104 | 
105 |   particles->pos_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
106 |   particles->pos_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
107 |   particles->pos_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
108 |   particles->vel_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
109 |   particles->vel_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
110 |   particles->vel_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
111 |   particles->acc_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
112 |   particles->acc_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
113 |   particles->acc_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
114 |   particles->mass  = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
115 |   
116 |   init_pos();	
117 |   init_vel();
118 |   init_acc();
119 |   init_mass();
120 |   
121 |   print_header();
122 |   
123 |   _totTime = 0.; 
124 |   
125 |   const float softeningSquared = 1.e-3f;
126 |   const float G = 6.67259e-11f;
127 |   
128 |   CPUTime time;
129 |   double ts0 = 0;
130 |   double ts1 = 0;
131 |   double nd = double(n);
132 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
133 |   double av=0.0, dev=0.0;
134 |   int nf = 0;
135 |   
136 |   const int tileSize = 8;
137 | 
138 |   const double t0 = time.start();
139 |   for (int s=1; s<=get_nsteps(); ++s)
140 |   {   
141 |    ts0 += time.start(); 
142 |    for (int ii = 0; ii < n; ii += tileSize )
143 |    {
144 |      real_type acc_xtile[tileSize];
145 |      real_type acc_ytile[tileSize] ;
146 |      real_type acc_ztile[tileSize];
147 | #pragma omp simd
148 |      for(int s=0; s<tileSize; s++)
149 |      {
150 |        acc_xtile[s] = 0.0f;
151 |        acc_ytile[s] = 0.0f;
152 |        acc_ztile[s] = 0.0f;
153 |      }
154 |      __assume_aligned(particles->pos_x, alignment);
155 |      __assume_aligned(particles->pos_y, alignment);
156 |      __assume_aligned(particles->pos_z, alignment);
157 |      __assume_aligned(particles->acc_x, alignment);
158 |      __assume_aligned(particles->acc_y, alignment);
159 |      __assume_aligned(particles->acc_z, alignment);
160 |      __assume_aligned(particles->mass, alignment);
161 |      
162 |      real_type ax_i = particles->acc_x[i];
163 |      real_type ay_i = particles->acc_y[i];
164 |      real_type az_i = particles->acc_z[i];
165 | #pragma omp simd
166 |      for (j = 0; j < n; j++)
167 |      {
168 |       for (int i = ii; i < ii + tileSize; i++)
169 |        {
170 |          real_type dx, dy, dz;
171 | 	 real_type distanceSqr = 0.0f;
172 | 	 real_type distanceInv = 0.0f;
173 | 		  
174 | 	 dx = particles->pos_x[j] - particles->pos_x[i];	//1flop
175 | 	 dy = particles->pos_y[j] - particles->pos_y[i];	//1flop	
176 | 	 dz = particles->pos_z[j] - particles->pos_z[i];	//1flop
177 | 	
178 |  	 distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
179 |  	 distanceInv = 1.0f / sqrtf(distanceSqr);			//1div+1sqrt
180 | 
181 | 	acc_xtile[i-ii] += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
182 | 	acc_ytile[i-ii] += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
183 | 	acc_ztile[i-ii] += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
184 |         }
185 |      }
186 | #pragma omp simd
187 |      for(int s=0; s<tileSize; s++)
188 |      {
189 |        particles->acc_x[s+ii] = acc_xtile[s];
190 |        particles->acc_y[s+ii] = acc_ytile[s];
191 |        particles->acc_z[s+ii] = acc_ztile[s];
192 |      }
193 |    }
194 |    energy = 0;
195 | 
196 |    for (i = 0; i < n; ++i)// update position
197 |    {
198 |      particles->vel_x[i] += particles->acc_x[i] * dt; //2flops
199 |      particles->vel_y[i] += particles->acc_y[i] * dt; //2flops
200 |      particles->vel_z[i] += particles->acc_z[i] * dt; //2flops
201 | 	  
202 |      particles->pos_x[i] += particles->vel_x[i] * dt; //2flops
203 |      particles->pos_y[i] += particles->vel_y[i] * dt; //2flops
204 |      particles->pos_z[i] += particles->vel_z[i] * dt; //2flops
205 | 
206 |      particles->acc_x[i] = 0.;
207 |      particles->acc_y[i] = 0.;
208 |      particles->acc_z[i] = 0.;
209 | 	
210 |      energy += particles->mass[i] * (
211 | 	       particles->vel_x[i]*particles->vel_x[i] + 
212 |                particles->vel_y[i]*particles->vel_y[i] +
213 |                particles->vel_z[i]*particles->vel_z[i]); //7flops
214 |    }
215 |   
216 |     _kenergy = 0.5 * energy; 
217 |     
218 |     ts1 += time.stop();
219 |     if(!(s%get_sfreq()) ) 
220 |     {
221 |       nf += 1;      
222 |       std::cout << " " 
223 | 		<<  std::left << std::setw(8)  << s
224 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
225 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
226 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
227 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
228 | 		<<  std::endl;
229 |       if(nf > 2) 
230 |       {
231 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
232 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
233 |       }
234 |       
235 |       ts0 = 0;
236 |       ts1 = 0;
237 |     }
238 |   
239 |   } //end of the time step loop
240 |   
241 |   const double t1 = time.stop();
242 |   _totTime  = (t1-t0);
243 |   _totFlops = gflops*get_nsteps();
244 |   
245 |   av/=(double)(nf-2);
246 |   dev=sqrt(dev/(double)(nf-2)-av*av);
247 |   
248 |   int nthreads=1;
249 | 
250 |   std::cout << std::endl;
251 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
252 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
253 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
254 |   std::cout << "===============================" << std::endl;
255 | 
256 | }
257 | 
258 | 
259 | void GSimulation :: print_header()
260 | {
261 | 	    
262 |   std::cout << " nPart = " << get_npart()  << "; " 
263 | 	    << "nSteps = " << get_nsteps() << "; " 
264 | 	    << "dt = "     << get_tstep()  << std::endl;
265 | 	    
266 |   std::cout << "------------------------------------------------" << std::endl;
267 |   std::cout << " " 
268 | 	    <<  std::left << std::setw(8)  << "s"
269 | 	    <<  std::left << std::setw(8)  << "dt"
270 | 	    <<  std::left << std::setw(12) << "kenergy"
271 | 	    <<  std::left << std::setw(12) << "time (s)"
272 | 	    <<  std::left << std::setw(12) << "GFlops"
273 | 	    <<  std::endl;
274 |   std::cout << "------------------------------------------------" << std::endl;
275 | 
276 | 
277 | }
278 | 
279 | GSimulation :: ~GSimulation()
280 | {
281 |   _mm_free(particles->pos_x);
282 |   _mm_free(particles->pos_y);
283 |   _mm_free(particles->pos_z);
284 |   _mm_free(particles->vel_x);
285 |   _mm_free(particles->vel_y);
286 |   _mm_free(particles->vel_z);
287 |   _mm_free(particles->acc_x);
288 |   _mm_free(particles->acc_y);
289 |   _mm_free(particles->acc_z);
290 |   _mm_free(particles->mass);
291 |   _mm_free(particles);
292 | }
293 | 


--------------------------------------------------------------------------------
/ver8/GSimulation.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This file is part of the example codes which have been used
  3 |     for the "Code Optmization Workshop".
  4 |     
  5 |     Copyright (C) 2016  Fabio Baruffa <fbaru-dev@gmail.com>
  6 | 
  7 |     This program is free software: you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation, either version 3 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License
 18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | #include "GSimulation.hpp"
 22 | #include "cpu_time.hpp"
 23 | 
 24 | GSimulation :: GSimulation()
 25 | {
 26 |   std::cout << "===============================" << std::endl;
 27 |   std::cout << " Initialize Gravity Simulation" << std::endl;
 28 |   set_npart(16000); 
 29 |   set_nsteps(10);
 30 |   set_tstep(0.1); 
 31 |   set_sfreq(1);
 32 | }
 33 | 
 34 | void GSimulation :: set_number_of_particles(int N)  
 35 | {
 36 |   set_npart(N);
 37 | }
 38 | 
 39 | void GSimulation :: set_number_of_steps(int N)  
 40 | {
 41 |   set_nsteps(N);
 42 | }
 43 | 
 44 | void GSimulation :: init_pos()
 45 | {
 46 |   std::random_device rd;        //random number generator
 47 |   std::mt19937 gen(42);
 48 |   std::uniform_real_distribution<real_type> unif_d(0,1.0);
 49 | 
 50 |   for(int i=0; i<get_npart(); ++i)
 51 |   {
 52 |     particles->pos_x[i] = unif_d(gen);
 53 |     particles->pos_y[i] = unif_d(gen);
 54 |     particles->pos_z[i] = unif_d(gen);
 55 |   }
 56 | }
 57 | 
 58 | void GSimulation :: init_vel()
 59 | {
 60 |   std::random_device rd;        //random number generator
 61 |   std::mt19937 gen(42);
 62 |   std::uniform_real_distribution<real_type> unif_d(-1.0,1.0);
 63 | 
 64 |   for(int i=0; i<get_npart(); ++i)
 65 |   {
 66 |     particles->vel_x[i] = unif_d(gen) * 1.0e-3f;
 67 |     particles->vel_y[i] = unif_d(gen) * 1.0e-3f;
 68 |     particles->vel_z[i] = unif_d(gen) * 1.0e-3f;
 69 |   }
 70 | }
 71 | 
 72 | void GSimulation :: init_acc()
 73 | {
 74 |   for(int i=0; i<get_npart(); ++i)
 75 |   {
 76 |     particles->acc_x[i] = 0.f;
 77 |     particles->acc_y[i] = 0.f;
 78 |     particles->acc_z[i] = 0.f;
 79 |   }
 80 | }
 81 | 
 82 | void GSimulation :: init_mass()
 83 | {
 84 |   real_type n   = static_cast<real_type> (get_npart());
 85 |   std::random_device rd;        //random number generator
 86 |   std::mt19937 gen(42);
 87 |   std::uniform_real_distribution<real_type> unif_d(0.0,1.0);
 88 | 
 89 |   for(int i=0; i<get_npart(); ++i)
 90 |   {
 91 |     particles->mass[i] = n * unif_d(gen);
 92 |   }
 93 | }
 94 | 
 95 | void GSimulation :: start() 
 96 | {
 97 |   real_type energy;
 98 |   real_type dt = get_tstep();
 99 |   int n = get_npart();
100 |   int i,j;
101 |   
102 |   const int alignment = 64;
103 |   particles = (ParticleSoA*) _mm_malloc(sizeof(ParticleSoA),alignment);
104 | 
105 |   particles->pos_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
106 |   particles->pos_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
107 |   particles->pos_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
108 |   particles->vel_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
109 |   particles->vel_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
110 |   particles->vel_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
111 |   particles->acc_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
112 |   particles->acc_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
113 |   particles->acc_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
114 |   particles->mass  = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
115 |   
116 |   init_pos();	
117 |   init_vel();
118 |   init_acc();
119 |   init_mass();
120 |   
121 |   print_header();
122 |   
123 |   _totTime = 0.; 
124 |   
125 |   const float softeningSquared = 1.e-3f;
126 |   const float G = 6.67259e-11f;
127 |   
128 |   CPUTime time;
129 |   double ts0 = 0;
130 |   double ts1 = 0;
131 |   double nd = double(n);
132 |   double gflops = 1e-9 * ( (11. + 18. ) * nd*nd  +  nd * 19. );
133 |   double av=0.0, dev=0.0;
134 |   int nf = 0;
135 |   
136 |   const int tileSize = 8;
137 | 
138 |   const double t0 = time.start();
139 |   for (int s=1; s<=get_nsteps(); ++s)
140 |   {   
141 |    ts0 += time.start();
142 | #pragma omp parallel for 
143 |    for (int ii = 0; ii < n; ii += tileSize )
144 |    {
145 |      real_type acc_xtile[tileSize];
146 |      real_type acc_ytile[tileSize] ;
147 |      real_type acc_ztile[tileSize];
148 | #pragma omp simd
149 |      for(int s=0; s<tileSize; s++)
150 |      {
151 |        acc_xtile[s] = 0.0f;
152 |        acc_ytile[s] = 0.0f;
153 |        acc_ztile[s] = 0.0f;
154 |      }
155 |      __assume_aligned(particles->pos_x, alignment);
156 |      __assume_aligned(particles->pos_y, alignment);
157 |      __assume_aligned(particles->pos_z, alignment);
158 |      __assume_aligned(particles->acc_x, alignment);
159 |      __assume_aligned(particles->acc_y, alignment);
160 |      __assume_aligned(particles->acc_z, alignment);
161 |      __assume_aligned(particles->mass, alignment);
162 |      
163 |      real_type ax_i = particles->acc_x[i];
164 |      real_type ay_i = particles->acc_y[i];
165 |      real_type az_i = particles->acc_z[i];
166 | #pragma omp simd
167 |      for (j = 0; j < n; j++)
168 |      {
169 |       for (int i = ii; i < ii + tileSize; i++)
170 |        {
171 |          real_type dx, dy, dz;
172 | 	 real_type distanceSqr = 0.0f;
173 | 	 real_type distanceInv = 0.0f;
174 | 		  
175 | 	 dx = particles->pos_x[j] - particles->pos_x[i];	//1flop
176 | 	 dy = particles->pos_y[j] - particles->pos_y[i];	//1flop	
177 | 	 dz = particles->pos_z[j] - particles->pos_z[i];	//1flop
178 | 	
179 |  	 distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared;	//6flops
180 |  	 distanceInv = 1.0f / sqrtf(distanceSqr);			//1div+1sqrt
181 | 
182 | 	acc_xtile[i-ii] += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
183 | 	acc_ytile[i-ii] += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
184 | 	acc_ztile[i-ii] += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
185 |         }
186 |      }
187 | #pragma omp simd
188 |      for(int s=0; s<tileSize; s++)
189 |      {
190 |        particles->acc_x[s+ii] = acc_xtile[s];
191 |        particles->acc_y[s+ii] = acc_ytile[s];
192 |        particles->acc_z[s+ii] = acc_ztile[s];
193 |      }
194 |    }
195 |    energy = 0;
196 | #pragma omp parallel for reduction(+:energy)
197 |    for (i = 0; i < n; ++i)// update position
198 |    {
199 |      particles->vel_x[i] += particles->acc_x[i] * dt; //2flops
200 |      particles->vel_y[i] += particles->acc_y[i] * dt; //2flops
201 |      particles->vel_z[i] += particles->acc_z[i] * dt; //2flops
202 | 	  
203 |      particles->pos_x[i] += particles->vel_x[i] * dt; //2flops
204 |      particles->pos_y[i] += particles->vel_y[i] * dt; //2flops
205 |      particles->pos_z[i] += particles->vel_z[i] * dt; //2flops
206 | 
207 |      particles->acc_x[i] = 0.;
208 |      particles->acc_y[i] = 0.;
209 |      particles->acc_z[i] = 0.;
210 | 	
211 |      energy += particles->mass[i] * (
212 | 	       particles->vel_x[i]*particles->vel_x[i] + 
213 |                particles->vel_y[i]*particles->vel_y[i] +
214 |                particles->vel_z[i]*particles->vel_z[i]); //7flops
215 |    }
216 |   
217 |     _kenergy = 0.5 * energy; 
218 |     
219 |     ts1 += time.stop();
220 |     if(!(s%get_sfreq()) ) 
221 |     {
222 |       nf += 1;      
223 |       std::cout << " " 
224 | 		<<  std::left << std::setw(8)  << s
225 | 		<<  std::left << std::setprecision(5) << std::setw(8)  << s*get_tstep()
226 | 		<<  std::left << std::setprecision(5) << std::setw(12) << _kenergy
227 | 		<<  std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
228 | 		<<  std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
229 | 		<<  std::endl;
230 |       if(nf > 2) 
231 |       {
232 | 	av  += gflops*get_sfreq()/(ts1 - ts0);
233 | 	dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
234 |       }
235 |       
236 |       ts0 = 0;
237 |       ts1 = 0;
238 |     }
239 |   
240 |   } //end of the time step loop
241 |   
242 |   const double t1 = time.stop();
243 |   _totTime  = (t1-t0);
244 |   _totFlops = gflops*get_nsteps();
245 |   
246 |   av/=(double)(nf-2);
247 |   dev=sqrt(dev/(double)(nf-2)-av*av);
248 |   
249 |   int nthreads=1;
250 |   #pragma omp parallel
251 |   nthreads=omp_get_num_threads();
252 |   
253 |   std::cout << std::endl;
254 |   std::cout << "# Number Threads     : " << nthreads << std::endl;	   
255 |   std::cout << "# Total Time (s)     : " << _totTime << std::endl;
256 |   std::cout << "# Average Perfomance : " << av << " +- " <<  dev << std::endl;
257 |   std::cout << "===============================" << std::endl;
258 | 
259 | }
260 | 
261 | 
262 | void GSimulation :: print_header()
263 | {
264 | 	    
265 |   std::cout << " nPart = " << get_npart()  << "; " 
266 | 	    << "nSteps = " << get_nsteps() << "; " 
267 | 	    << "dt = "     << get_tstep()  << std::endl;
268 | 	    
269 |   std::cout << "------------------------------------------------" << std::endl;
270 |   std::cout << " " 
271 | 	    <<  std::left << std::setw(8)  << "s"
272 | 	    <<  std::left << std::setw(8)  << "dt"
273 | 	    <<  std::left << std::setw(12) << "kenergy"
274 | 	    <<  std::left << std::setw(12) << "time (s)"
275 | 	    <<  std::left << std::setw(12) << "GFlops"
276 | 	    <<  std::endl;
277 |   std::cout << "------------------------------------------------" << std::endl;
278 | 
279 | 
280 | }
281 | 
282 | GSimulation :: ~GSimulation()
283 | {
284 |   _mm_free(particles->pos_x);
285 |   _mm_free(particles->pos_y);
286 |   _mm_free(particles->pos_z);
287 |   _mm_free(particles->vel_x);
288 |   _mm_free(particles->vel_y);
289 |   _mm_free(particles->vel_z);
290 |   _mm_free(particles->acc_x);
291 |   _mm_free(particles->acc_y);
292 |   _mm_free(particles->acc_z);
293 |   _mm_free(particles->mass);
294 |   _mm_free(particles);
295 | }
296 | 


--------------------------------------------------------------------------------