├── .gitignore ├── app ├── integrate │ ├── run-integration │ └── integrate.py └── particle │ ├── example_json │ ├── params.py │ ├── run_md_simulation │ ├── particle_simulation_test.py │ ├── util.py │ ├── Particle.py │ ├── Partition.py │ └── particle_simulation.py ├── reference ├── SimpleHTTPServer.md ├── pdsh.md ├── IOPerfTesting.md └── GitReference.md ├── README.md ├── scripts └── shutdown ├── documentation └── TinyTitanSetup.md └── setup ├── compact_cori_setup └── compact_cori_master_setup /.gitignore: -------------------------------------------------------------------------------- 1 | # VIM 2 | *.swp 3 | -------------------------------------------------------------------------------- /app/integrate/run-integration: -------------------------------------------------------------------------------- 1 | #PBS -q regular 2 | #PBS -l mppwidth=48 3 | #PBS -l walltime=00:01:00 4 | 5 | cd $PBS_O_WORKDIR 6 | module load python 7 | module swap python python/3.4 8 | aprun -n 48 python3 integrate.py 9 | -------------------------------------------------------------------------------- /reference/SimpleHTTPServer.md: -------------------------------------------------------------------------------- 1 | ### `SimpleHTTPServer` 2 | #### What is `SimpleHTTPServer` 3 | `SimpleHTTPServer` runs a server that serves files from the current working 4 | directory 5 | 6 | #### Usage 7 | `cd` to the directory you want to run the server from (e.g. the directory that 8 | contains `index.html`) and run `python3 -m http.server 8080`. 9 | -------------------------------------------------------------------------------- /reference/pdsh.md: -------------------------------------------------------------------------------- 1 | ### `pdsh` 2 | #### What is `pdsh` 3 | `pdsh` runs commands on remote systems in parallel, thereby allowing a sysadmin 4 | to execute a command on multiple slaves without having to `ssh` to each one 5 | individually. `pdsh` is used to run commands remotely, so it is not used in 6 | running parallel jobs or with MPI, etc. 7 | 8 | #### Usage 9 | Run `pdsh [command]` to run `[command]` on all remote systems. If you press `^c`, 10 | `pdsh` will print out the status of current threads. If you press `^c` again 11 | within one second, a SIGINT will be sent and the job will be terminated. 12 | 13 | If you don't specify a command, commands will be run interactively. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ###Compact Cori 2 | 3 | Currently, the installation of Compact Cori at CRT requires that two scripts be 4 | run to start the simulation. One of the nodes (`CompactCori16`) is currently 5 | being used as the visualization node (running Ubuntu instead of Debian). 6 | 7 | To run the simulation: 8 | 9 | 1. Ensure the simulation is running on the master node (`CompactCori1`) by 10 | verifying that the LEDs are flashing with each timestep. If not, use 11 | `run_md_simulation`. 12 | 2. Start the visualization server on the visualization node. Run the script on 13 | the desktop to start the visualization server. 14 | 3. Point Firefox to `localhost:8081` to view the simulation 15 | -------------------------------------------------------------------------------- /app/particle/example_json: -------------------------------------------------------------------------------- 1 | { 2 | "params":[ 3 | "num_particles" 1000, 4 | "num_active_workers": 15, 5 | "simulation_height": 1000, 6 | "simulation_width": 1000, 7 | "simulation_depth": 1000, 8 | ] 9 | "particles":[ 10 | { 11 | "particle_id": 5, 12 | "thread_num": 3, 13 | "position": [13.1249,53.289,530.235], 14 | "velocity":[1.35,4.234,935.90], 15 | "mass":13, 16 | "radius":78, 17 | "neighbors":[] 18 | }, 19 | { 20 | "particle_id": 58, 21 | "thread_num": 8, 22 | "position": [56,48.563,985], 23 | "velocity":[-1.1034,9.45,0.1], 24 | "mass":15, 25 | "radius":6, 26 | "neighbors":[] 27 | } 28 | ] 29 | } 30 | 31 | -------------------------------------------------------------------------------- /reference/IOPerfTesting.md: -------------------------------------------------------------------------------- 1 | ### Running IO Performance Tests 2 | #### Using `dd` 3 | `dd` is a utility that allows you to convert and copy a file given parameters. 4 | Syntax is: 5 | ``` 6 | dd if=[input file] of=[output file] [args] 7 | ``` 8 | For example, to write a 5 GiB file from `/dev/zero`, run 9 | ``` 10 | dd if=/dev/zero of=filename bs=5G count=1 11 | ``` 12 | 13 | #### The `time` command 14 | To see how long a command takes to run, use the `time` command. Run `time 15 | [command you want to run]` and when the command exits, time stats will be 16 | printed to STDOUT. For example: 17 | ``` 18 | ❱ time dd if=/dev/zero of=large bs=2G count=1 19 | 20 | 0+1 records in 21 | 0+1 records out 22 | 2147479552 bytes (2.1 GB) copied, 11.6774 s, 184 MB/s 23 | 24 | real 0m11.734s 25 | user 0m0.000s 26 | sys 0m2.660s 27 | 28 | ``` 29 | -------------------------------------------------------------------------------- /app/particle/params.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """Global Parameters 3 | Author: Nicholas Fong 4 | Lawrence Berkeley National Laboratory 5 | National Energy Research Scientific Computing Center 6 | 7 | Acknowledgment: 8 | This work was supported by the Director, Office of Science, 9 | Division of Mathematical, Information, and Computational 10 | Sciences of the U.S. Department of Energy under contract 11 | DE-AC02-05CH11231, using resources of the National Energy Research 12 | Scientific Computing Center. 13 | """ 14 | 15 | num_particles = None 16 | simulation_height = None 17 | simulation_width = None 18 | simulation_depth = None 19 | dt = None 20 | num_active_workers = None 21 | new_num_active_workers = None 22 | partitions = {} 23 | max_radius = None 24 | timesteps_per_second = None 25 | init_total_energy = None 26 | curr_total_energy = None 27 | 28 | comm = None 29 | rank = None 30 | num_threads = None 31 | -------------------------------------------------------------------------------- /app/integrate/integrate.py: -------------------------------------------------------------------------------- 1 | from mpi4py import MPI as mpi 2 | from scipy import integrate as sci_integrate 3 | import numpy as np 4 | 5 | comm = mpi.COMM_WORLD 6 | size = comm.Get_size() 7 | rank = comm.Get_rank() 8 | 9 | def parallel_integrate(start, end, function = np.sin, num_samples = 100000): 10 | width = (end - start)/(num_samples*size) 11 | 12 | # Where this thread starts integrating 13 | local_start = start + width*num_samples*rank 14 | local_end = local_start + width*num_samples 15 | 16 | area = np.zeros(1) 17 | 18 | if rank: 19 | totalArea = None 20 | else: 21 | totalArea = np.zeros(1) 22 | 23 | for i in range(num_samples): 24 | height = function(local_start + i*width) 25 | area += width * height 26 | 27 | comm.Reduce(area, totalArea, root=0) 28 | 29 | if not rank: 30 | print(totalArea) 31 | 32 | # execute main 33 | if __name__ == "__main__": 34 | parallel_integrate(0, np.pi) 35 | parallel_integrate(0, 2*np.pi) 36 | 37 | -------------------------------------------------------------------------------- /scripts/shutdown: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # shutdown - run shutdown -h now on all nodes of Tiny Titan 4 | # 5 | # Author: Nicholas Fong 6 | # Lawrence Berkeley National Laboratory 7 | # National Energy Research Scientific Computing Center 8 | # 9 | # Usage: shutdown [num_nodes] 10 | # 11 | # Acknowledgment: 12 | # This work was supported by the Director, Office of Science, 13 | # Division of Mathematical, Information, and Computational 14 | # Sciences of the U.S. Department of Energy under contract 15 | # DE-AC02-05CH11231. 16 | # 17 | # This research used resources of the National Energy Research 18 | # Scientific Computing Center, which is supported by the Office 19 | # of Science of the U.S. Department of Energy. 20 | # 21 | # Update History: 22 | # YYYY-MM-DD INITIALS - DESCRIPTION 23 | # ***************************************************************************** 24 | # 2015-06-23 NF - Wrote initial script 25 | 26 | PROG=$(basename $0) 27 | 28 | error() { 29 | echo -e "ERROR: $*" >&2 30 | exit 1 31 | } 32 | 33 | usage() { 34 | echo "USAGE: $PROG [num_nodes]" 35 | } 36 | 37 | test $# -ne 1 && error "too few arguments" && usage 38 | 39 | for (( i=2; i<="$1"; i++ )); do 40 | echo -n "Shutting down pi$i... " 41 | ssh pi$i sudo shutdown -h now 42 | echo "Done" 43 | done 44 | sleep 3 45 | 46 | echo "Master node going down... " 47 | sleep 3 48 | sudo shutdown -h now 49 | -------------------------------------------------------------------------------- /app/particle/run_md_simulation: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_md_simulation - run the MD simulation on Compact Cori 4 | # 5 | # Author: Nicholas Fong 6 | # Lawrence Berkeley National Laboratory 7 | # National Energy Research Scientific Computing Center 8 | # 9 | # Usage: run_md_simulation [num_tasks] [num_particlecs] 10 | # 11 | # Acknowledgment: 12 | # This work was supported by the Director, Office of Science, 13 | # Division of Mathematical, Information, and Computational 14 | # Sciences of the U.S. Department of Energy under contract 15 | # DE-AC02-05CH11231. 16 | # 17 | # This research used resources of the National Energy Research 18 | # Scientific Computing Center, which is supported by the Office 19 | # of Science of the U.S. Department of Energy. 20 | # 21 | # Update History: 22 | # YYYY-MM-DD INITIALS - DESCRIPTION 23 | # ***************************************************************************** 24 | # 2015-08-12 NF - Wrote initial script 25 | 26 | PROG=$(basename $0) 27 | 28 | error() { 29 | echo -e "ERROR: $*" >&2 30 | exit 1 31 | } 32 | 33 | usage() { 34 | echo "USAGE: $PROG [num_tasks] [num_particlecs]" 35 | 36 | } 37 | 38 | test $# -ne 2 && error "too few arguments" && usage 39 | 40 | for i in {2..15}; do 41 | scp -r ~/CompactCori/app/particle/* compactcori$i:~/CompactCori/app/particle/ 42 | done 43 | 44 | mpirun -n $1 --hostfile /home/ccori/mpihostsfile python3 ~/CompactCori/app/particle/particle_simulation.py -n $2 45 | #mpirun -n $1 python3 ~/CompactCori/app/particle/particle_simulation.py -n $2 46 | 47 | -------------------------------------------------------------------------------- /documentation/TinyTitanSetup.md: -------------------------------------------------------------------------------- 1 | #### Tiny Titan Setup 2 | 1. Install Rasbian on each node 3 | 1. On all nodes except the master, download the `pi_setup.sh` file from ORNL's 4 | GitHub repo: `https://raw.github.com/TinyTitan/TinySetup/master/pi_setup.sh` 5 | 1. In the `pi_setup.sh` script, change the `apt-get upgrade` to `apt-get 6 | dist-upgrade` 7 | 1. Make `pi_setup.sh` executable by running `chmod u+x pi_setup.sh` 8 | 1. Run the script with `./pi_setup.sh` 9 | 1. On the master node, clone the setup GitHub repo: `git clone 10 | https://github.com/TinyTitan/TinySetup.git` 11 | 1. `cd` to the `TinySetup` directory and make `pi_setup.sh` and 12 | `pi_post_setup.sh` executable: `chmod u+x pi_setup.sh; chmod u+x 13 | pi_post_setup.sh`. Then run both scripts 14 | 15 | ####Install SPH 16 | 1. Clone the SPH repository hosted on the [TinyTitan GitHub Project 17 | Page](https://github.com/TinyTitan/SPH). 18 | 1. `cd` into the SPH directory and run `make` followed by `make run` 19 | 1. Copy the `sph.out` file to your `$HOME` directory. This is necessary because 20 | running `make run` copies `sph.out` to the `$HOME` directory of each Pi, and 21 | running the simulation looks for the `sph.out` file in the same location as 22 | where it is on the master node, so it's easier to move the file on the master 23 | node rather than all the slave nodes. 24 | 25 | ####Install PiBrot 26 | 1. Clone the PiBrot repository hosted on the [TinyTitan GitHub Project 27 | Page](https://github.com/TinyTitan/PiBrot/). 28 | 1. `cd` into the PiBrot directory and edit the `Makefile` to include the `-lm` 29 | argument to `CCFLAGS` 30 | 1. Edit the `Makefile` to use the `-f ~/.mpihostsfile` argument instead of 31 | of IP addresses for the mpirun command. For example, `mpirun -f 32 | /home/pi/pi_mpihostsfile -n 9 /home/pi/pibrot` 33 | 1. run `make` followed by `make run` 34 | 35 | #### Run SPH 36 | 1. Make a directory called `scripts` in the master node's `$HOME` directory 37 | 1. Create a file called `xboxlaunch.sh` in the `scripts` directory. Edit it to 38 | read: 39 | ``` 40 | sudo rmmod xpad 41 | sudo xboxdrv --config ~/SPH/controller_1.cnf --silent & 42 | ``` 43 | 1. Set up a cron job that runs at reboot. Run `crontab -e` and add `@reboot 44 | /home/pi/scripts/xboxlaunch.sh` 45 | 1. Start SPH automatically at boot by editing `~/.config/autostart/xbox.desktop` 46 | to include: 47 | ``` 48 | [Desktop Entry] 49 | 50 | Type=Application 51 | 52 | Exec=/home/pi/TinySetup/startsph 53 | ``` 54 | 1. Run the `startsph` script in the `TinySetup` directory to run the simulation 55 | 56 | -------------------------------------------------------------------------------- /app/particle/particle_simulation_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | Unit test file for particle_simulation.py 4 | """ 5 | import unittest 6 | from particle_simulation import Particle 7 | 8 | class TestParticleMethods(unittest.TestCase): 9 | def test_euclidean_distance(self): 10 | a = Particle(1, 10, 10) 11 | b = Particle(1, 10, 20) 12 | self.assertEqual(a.euclidean_distance_to(b), (10, 0, 10)) 13 | 14 | a = Particle(1, 0, 0) 15 | b = Particle(1, 0, 0) 16 | self.assertEqual(a.euclidean_distance_to(b), (0, 0, 0)) 17 | 18 | a = Particle(1, -10, 20) 19 | b = Particle(1, 10, 20) 20 | self.assertEqual(a.euclidean_distance_to(b), (20, 20, 0)) 21 | 22 | a = Particle(1, -18, 24) 23 | b = Particle(1, 7, 32) 24 | self.assertEqual(a.euclidean_distance_to(b), (26.248809496813376, 25, 8)) 25 | 26 | a = Particle(1, -100, 0) 27 | b = Particle(1, -0.01, 0) 28 | self.assertEqual(a.euclidean_distance_to(b), (99.99, 99.99, 0)) 29 | 30 | def test_populate_neighbors(self): 31 | """ 32 | Assumes an interaction radius of 100 33 | """ 34 | a = Particle(1, -100, 0) 35 | b = Particle(1, 100, 1) 36 | c = Particle(1,250,100) 37 | Particle.static_particles = [a,b,c] 38 | self.assertEqual(Particle.static_particles, [a,b,c]) 39 | 40 | b.populate_neighbors() 41 | self.assertEqual(b.neighbors, []) 42 | 43 | b = Particle(1, -0.01, 0) 44 | Particle.static_particles = [a,b,c] 45 | self.assertEqual(Particle.static_particles, [a,b,c]) 46 | 47 | a.populate_neighbors() 48 | b.populate_neighbors() 49 | dist = a.euclidean_distance_to(b) 50 | self.assertEqual(len(b.neighbors), 1) 51 | self.assertEqual(b.neighbors[0], (a, dist[1], dist[2])) 52 | 53 | def test_move_particle(self): 54 | """ 55 | Make sure the particle always stays within the bound of the height and 56 | width of the simulation. Assumes a 1000 by 1000 simulation size. 57 | """ 58 | 59 | test_height = 1000 60 | test_width = 1000 61 | 62 | a = Particle(1, -100, 0) 63 | a.x_velocity = 900 64 | a.y_velocity = 480 65 | 66 | for _ in range(10000000): 67 | a.move_particle() 68 | self.assertGreater(test_width, a.x_position) 69 | self.assertLessEqual(0, a.x_position) 70 | self.assertGreaterEqual(test_height, a.y_position) 71 | self.assertLessEqual(0, a.y_position) 72 | 73 | if __name__ == '__main__': 74 | unittest.main() 75 | -------------------------------------------------------------------------------- /app/particle/util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """Utility functions 3 | Author: Nicholas Fong 4 | Lawrence Berkeley National Laboratory 5 | National Energy Research Scientific Computing Center 6 | 7 | Acknowledgment: 8 | This work was supported by the Director, Office of Science, 9 | Division of Mathematical, Information, and Computational 10 | Sciences of the U.S. Department of Energy under contract 11 | DE-AC02-05CH11231, using resources of the National Energy Research 12 | Scientific Computing Center. 13 | """ 14 | import params 15 | import math 16 | import traceback 17 | from Particle import Particle 18 | 19 | def info(string): 20 | """Print a message in blue to STDOUT""" 21 | CSI="\x1B[" 22 | print(CSI + "31;36m" + "[INFO] " + string + CSI + "31;0m") 23 | 24 | def debug(string): 25 | """Print a message in yellow to STDOUT""" 26 | CSI="\x1B[" 27 | print(CSI + "31;93m" + "[DEBUG] " + string + CSI + "31;0m") 28 | 29 | def error(string): 30 | """Print a message in red to STDOUT and print a stack trace""" 31 | CSI="\x1B[" 32 | print(CSI + "31;31m" + "[ERROR] " + string + CSI + "31;0m") 33 | print("Stack:") 34 | traceback.print_stack() 35 | exit(1) 36 | 37 | def validate_int(*args): 38 | for arg in args: 39 | if type(arg) is not int: 40 | error(ArgumentError, "incorrect type argument: " + type(arg) + 41 | "was passed instead of a int") 42 | 43 | def validate_list(*args): 44 | for arg in args: 45 | if type(arg) is not list: 46 | error(ArgumentError, "incorrect type argument: " + type(arg) + 47 | "was passed instead of a int") 48 | 49 | def validate_particle_set(*args): 50 | for arg in args: 51 | if type(arg) is not set: 52 | error(ArgumentError, "incorrect type argument: " + type(arg) + 53 | "was passed instead of a set") 54 | for obj in arg: 55 | if type(obj) is not Particle: 56 | error(ArgumentError, "Non-particle type in set; received a " + 57 | type(obj) + " instead of a Particle") 58 | 59 | def determine_particle_thread_num(x_position): 60 | result = math.ceil((x_position/params.simulation_width)*params.num_active_workers) 61 | #print("Got that the thread num should be "+str(result)+"given: x_position " + str(x_position) + " simulation_width of " + str(params.simulation_width) + " and with " + str(params.new_num_active_workers) + " active workers") 62 | if result == 0: 63 | debug("Got that the thread num should be 0 given: x_position " + str(x_position) + " simulation_width of " + str(params.simulation_width) + " and with " + str(params.num_active_workers) + " active workers") 64 | return result 65 | 66 | def determine_new_particle_thread_num(x_position): 67 | result = math.ceil((x_position/params.simulation_width)*params.new_num_active_workers) 68 | if result == 0: 69 | debug("Got that the thread num should be 0 given: x_position " + str(x_position) + " simulation_width of " + str(params.simulation_width) + " and with " + str(params.new_num_active_workers) + " active workers") 70 | return result 71 | 72 | -------------------------------------------------------------------------------- /setup/compact_cori_setup: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # compact_cori_setup: set up a node in the Compact Cori cluster. Assumes a 4 | # clean Debian install, that each node is named 5 | # CompactCori[i] for some integer i, and that this script is 6 | # run as root 7 | # 8 | # Author: Nicholas Fong 9 | # Lawrence Berkeley National Laboratory 10 | # National Energy Research Scientific Computing Center 11 | # 12 | # Usage: compact_cori_setup [-h ][-n node_number] 13 | # 14 | # Acknowledgment: 15 | # This work was supported by the Director, Office of Science, 16 | # Division of Mathematical, Information, and Computational 17 | # Sciences of the U.S. Department of Energy under contract 18 | # DE-AC02-05CH11231. 19 | # 20 | # This research used resources of the National Energy Research 21 | # Scientific Computing Center, which is supported by the Office 22 | # of Science of the U.S. Department of Energy. 23 | # 24 | # Update History: 25 | # YYYY-MM-DD INITIALS - DESCRIPTION 26 | # ***************************************************************************** 27 | # 2015-06-19 NF - Wrote initial script 28 | # 2015-06-23 NF - Added pdsh to installed packages 29 | # 2015-06-30 NF - Add chown for SSH keys 30 | # 2015-07-01 NF - Add apt proxy 31 | 32 | PROG=$(basename $0) 33 | 34 | error() { 35 | echo -e "ERROR: $*" >&2 36 | exit 1 37 | } 38 | 39 | usage() { 40 | echo "USAGE: $PROG -n node_number -u user_name" 41 | } 42 | 43 | while getopts "hn:u:" OPTION; do 44 | case $OPTION in 45 | h) 46 | usage 47 | exit 0 48 | ;; 49 | u) 50 | user_name="$OPTARG" 51 | ;; 52 | n) 53 | node_num="$OPTARG" 54 | ;; 55 | esac 56 | done 57 | 58 | # Argument Sanitation 59 | re='^[0-9]+$' 60 | if ! [[ $node_num =~ $re ]] ; then 61 | error "Invalid argument: please enter an integer after the -n flag" 62 | fi 63 | 64 | if [[ -z $user_name ]]; then 65 | error "Username is a required argument" && usage 66 | fi 67 | 68 | node_name="CompactCori$node_num" 69 | 70 | # Ensure this script is being run as root 71 | if [[ $EUID -ne 0 ]]; then 72 | error "This script must be run as root" 73 | fi 74 | 75 | echo "Running apt-get update and dist-upgrade" 76 | apt-get -y --force-yes update 77 | apt-get -y --force-yes dist-upgrade 78 | echo "Installing git pdsh vim mpich2 xboxdrv libglew-dev sshpass libav-tools tmux python3-mpi4py ntp" 79 | apt-get -y --force-yes install git pdsh vim mpich2 xboxdrv libglew-dev sshpass libav-tools tmux python3-mpi4py ntp libusb-1.0-0-dev 80 | 81 | echo -n "Creating backup copy of /etc/network/interfaces... " 82 | cp /etc/network/interfaces /etc/network/interfaces-backup 83 | echo "Done" 84 | 85 | echo -n "Setting network interface... " 86 | tee /etc/network/interfaces <<-EOF 87 | auto lo 88 | 89 | iface lo inet loopback 90 | 91 | # iface eth0 inet dhcp 92 | 93 | auto eth1 94 | iface eth1 inet static 95 | address 10.0.0.$(($node_num+100)) 96 | #gateway 10.0.0.1 97 | netmask 255.0.0.0 98 | network 10.0.0.0 99 | broadcast 10.255.255.255 100 | 101 | # allow-hotplug wlan0 102 | # iface wlan0 inet manual 103 | # wpa-roam /etc/wpa_supplicant/wpa_supplicant.conf 104 | # iface default inet dhcp 105 | EOF 106 | echo "Done" 107 | 108 | echo -n "Generating SSH keys... " 109 | ssh-keygen -N '' -b 4096 -f /home/$user_name/.ssh/id_rsa 110 | chown $user_name:$user_name /home/$user_name/.ssh/* 111 | echo "Done" 112 | 113 | echo -n "Setting up apt proxy... " 114 | echo 'Acquire::http { Proxy "http://10.0.0.101:3142"; }' > /etc/apt/apt.conf 115 | echo "Done" 116 | 117 | echo "Sleeping for 10 seconds and then shutting down: ^C to postpone shutdown... " 118 | sleep 10 119 | shutdown -h now 120 | -------------------------------------------------------------------------------- /setup/compact_cori_master_setup: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # compact_cori_master_setup: set up the master node in the Compact Cori cluster 4 | # 5 | # Author: Nicholas Fong 6 | # Lawrence Berkeley National Laboratory 7 | # National Energy Research Scientific Computing Center 8 | # 9 | # Usage: compact_cori_master_setup -u username -n num_nodes 10 | # 11 | # Acknowledgment: 12 | # This work was supported by the Director, Office of Science, 13 | # Division of Mathematical, Information, and Computational 14 | # Sciences of the U.S. Department of Energy under contract 15 | # DE-AC02-05CH11231. 16 | # 17 | # This research used resources of the National Energy Research 18 | # Scientific Computing Center, which is supported by the Office 19 | # of Science of the U.S. Department of Energy. 20 | # 21 | # Update History: 22 | # YYYY-MM-DD INITIALS - DESCRIPTION 23 | # ***************************************************************************** 24 | # 2015-06-19 NF - Wrote initial script 25 | # 2015-06-23 NF - Added pdsh and root SSH key transfer 26 | # 2015-06-24 NF - Minor bugfixes and greater verbosity 27 | # 2015-06-26 NF - Fix sed commands for sshd config 28 | # 2015-06-30 NF - Add chown 29 | # 2015-07-01 NF - Add apt proxying 30 | 31 | PROG=$(basename $0) 32 | 33 | error() { 34 | echo -e "ERROR: $*" >&2 35 | exit 1 36 | } 37 | 38 | usage() { 39 | echo "USAGE: $PROG -u username -n num_nodes" 40 | } 41 | 42 | while getopts "hn:u:p:" OPTION; do 43 | case $OPTION in 44 | h) 45 | usage 46 | exit 0 47 | ;; 48 | n) 49 | num_nodes="$OPTARG" 50 | ;; 51 | u) 52 | user_name="$OPTARG" 53 | ;; 54 | esac 55 | done 56 | 57 | # Argument Sanitation 58 | re='^[0-9]+$' 59 | if ! [[ $num_nodes =~ $re ]]; then 60 | error "Invalid argument: please enter an integer after the -n flag" && usage 61 | fi 62 | 63 | if [[ -z $user_name ]]; then 64 | error "Username is a required argument" && usage 65 | fi 66 | 67 | echo "Enter the passphrase for $user_name followed by [return]:" 68 | read pass_phrase 69 | 70 | if [[ -z $pass_phrase ]]; then 71 | error "Please enter the password for $user_name when prompted. Aborting." && usage 72 | fi 73 | 74 | echo -n "Setting up apt-cacher-ng..." 75 | apt-get -y --force-yes install apt-cacher-ng 76 | 77 | echo "Removing ~/mpihostsfile" 78 | rm -f /home/$user_name/mpihostsfile 79 | echo "Removing ~/.ssh/authorized_keys" 80 | rm -f /home/$user_name/.ssh/authorized_keys 81 | 82 | echo "Generating /home/$user_name/mpihostsfile" 83 | echo "Generating /home/$user_name/.ssh/authorized_keys" 84 | echo "Updating /etc/hosts" 85 | 86 | sed -i '/10.0.0.*/d' /etc/hosts 87 | 88 | echo -n "Setting up root SSH key... " 89 | ssh-keygen -N '' -b 8192 -f /root/.ssh/id_rsa 90 | echo "Done" 91 | 92 | for (( i=1; i<=$num_nodes; i++ )); do 93 | node_name="CompactCori$i" 94 | num=$(($i+100)) 95 | ip=10.0.0.$num 96 | echo -n "Adding $node_name to mpihostsfile and /etc/hosts... " 97 | echo "$ip" >> /home/$user_name/mpihostsfile 98 | echo "$ip CompactCori$i" >> /etc/hosts 99 | echo "Done" 100 | 101 | echo -n "Getting remote public key and adding to authorized keys... " 102 | sshpass -p "$pass_phrase" scp -o StrictHostKeyChecking=no $user_name@$node_name:~/.ssh/id_rsa.pub tmp_key 103 | cat tmp_key >> /home/$user_name/.ssh/authorized_keys 104 | chown $user_name:$user_name /home/$user_name/.ssh/authorized_keys 105 | echo "Done" 106 | done 107 | 108 | for (( i=2; i<=$num_nodes; i++ )); do 109 | node_name="CompactCori$i" 110 | echo -n "Copying authorized_keys to $node_name... " 111 | sshpass -p "$pass_phrase" scp -o StrictHostKeyChecking=no /home/$user_name/.ssh/authorized_keys $user_name@$node_name:~/.ssh/authorized_keys 112 | echo "Done" 113 | 114 | #echo "Enter the root passphrase followed by [return]:" 115 | #read root_pass_phrase 116 | # 117 | #if [[ -z $root_pass_phrase ]]; then 118 | # error "Please enter the root password when prompted. Aborting." 119 | #fi 120 | # echo "Copying root SSH key to $node_name..." 121 | # sshpass -p "$root_pass_phrase" scp -o StrictHostKeyChecking=no /root/.ssh/id_rsa.pub root@$node_name:/root/.ssh/authorized_keys 122 | done 123 | 124 | chown $user_name:$user_name /home/$user_name/mpihostsfile 125 | rm tmp_key 126 | 127 | echo "Done" 128 | 129 | echo -n "Setting up pdsh... " 130 | rm -f /etc/profile.d/pdsh.sh 131 | echo "export PDSH_RCMD_TYPE='ssh'" >> /etc/profile.d/pdsh.sh 132 | echo "export WCOLL='/etc/pdsh/machines'" >> /etc/profile.d/pdsh.sh 133 | 134 | rm -f /etc/pdsh/machines 135 | for (( i=2; i<=$num_nodes; i++ )); do 136 | echo "CompactCori$i" >> /etc/pdsh/machines 137 | done 138 | 139 | echo "Done" 140 | 141 | echo "Sleeping for 10 seconds and then rebooting: ^C to postpone reboot... " 142 | sleep 10 143 | shutdown -r now 144 | -------------------------------------------------------------------------------- /app/particle/Particle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | Author: Nicholas Fong 4 | Lawrence Berkeley National Laboratory 5 | National Energy Research Scientific Computing Center 6 | 7 | Acknowledgment: 8 | This work was supported by the Director, Office of Science, 9 | Division of Mathematical, Information, and Computational 10 | Sciences of the U.S. Department of Energy under contract 11 | DE-AC02-05CH11231, using resources of the National Energy Research 12 | Scientific Computing Center. 13 | """ 14 | 15 | import util 16 | import params 17 | import math 18 | 19 | class Particle: 20 | """Particle class for MD simulation.""" 21 | 22 | def __init__(self, particle_id, thread_num, position, velocity, mass, 23 | radius): 24 | util.validate_list(position, velocity) 25 | util.validate_int(particle_id, thread_num, mass, radius) 26 | 27 | self.particle_id = particle_id 28 | self.thread_num = thread_num 29 | self.position = position 30 | self.velocity = velocity 31 | self.mass = mass 32 | self.radius = radius 33 | 34 | def jsonify(self, indent = 4): 35 | """Hacky conversion to JSON to avoid infinite loop with jsonify and 36 | nested neighbors 37 | """ 38 | ## TODO: Debug this to DRY out this method 39 | # particle_dict = self.__dict__ 40 | # util.info("Dict looks like: " + str(particle_dict)) 41 | # json = json.dumps(particle_dict, sort_keys = True, indent=4) 42 | ## json = json.dumps(particle_dict, default=lambda obj: obj.__dict__, sort_keys = True, indent=4) 43 | # util.info("And now it looks like: " + str(json)) 44 | # json = "\n".join((" " * indent) + i for i in json.splitlines()) 45 | 46 | json = " " * indent + "{\n" 47 | json += " " * 2 * indent + "\"particle_id\": " + str(self.particle_id) + ",\n" 48 | json += " " * 2 * indent + "\"thread_num\": " + str(self.thread_num) + ",\n" 49 | json += " " * 2 * indent + "\"position\": " + str(self.position) + ",\n" 50 | json += " " * 2 * indent + "\"velocity\": " + str(self.velocity) + ",\n" 51 | json += " " * 2 * indent + "\"mass\": " + str(self.mass) + ",\n" 52 | json += " " * 2 * indent + "\"radius\": " + str(self.radius) + "\n" 53 | json += " " * indent + "},\n" 54 | return json 55 | 56 | def euclidean_distance_to(self, particle): 57 | """Return the 3D euclidean distance between this Particle and another""" 58 | x = self.position[0] - particle.position[0] 59 | y = self.position[1] - particle.position[1] 60 | z = self.position[2] - particle.position[2] 61 | center_to_center = math.sqrt((x**2) + (y**2) + (z**2)) 62 | #return (center_to_center - self.radius - particle.radius, (x, y, z)) 63 | return (center_to_center, (x, y, z)) 64 | 65 | def update_velocity(self, particles): 66 | """Populate the list of neighbors with particles that are colliding this 67 | particle 68 | """ 69 | neighbors = [] 70 | for particle in particles: 71 | euclidean_distance, distances = self.euclidean_distance_to(particle) 72 | #if euclidean_distance <= 0 and particle is not self: 73 | if euclidean_distance <= 5*(self.radius + particle.radius) and particle is not self: 74 | neighbors.append((particle, distances, euclidean_distance)) 75 | 76 | #p_energy = 0 77 | max_force = 5000 78 | for neighbor, distances, euclidean_distance in neighbors: 79 | #p_energy = p_energy + 5000 *( ( 1 / euclidean_distance ) - (1 / (5*(self.radius + particle.radius) ) ) 80 | for i in range(3): 81 | #max_force = 10 82 | if euclidean_distance == 0: 83 | force = max_force 84 | else: 85 | force = params.force * (1 * (distances[i])/euclidean_distance**3)/self.mass 86 | if force > max_force: 87 | force = max_force 88 | elif force < -1*max_force: 89 | force = -1*max_force 90 | self.velocity[i] += force 91 | 92 | def update_position(self, time): 93 | """Update the position of this Particle based on the velocity of the 94 | Particle 95 | """ 96 | delta = [component*time for component in self.velocity] 97 | # util.info("Delta is " + str(delta)) 98 | self.position[0] += delta[0] 99 | self.position[1] += delta[1] 100 | self.position[2] += delta[2] 101 | 102 | if any(d > self.radius for d in delta): 103 | util.debug(str(self.particle_id) + " is moving a distance of more than self.radius") 104 | self.velocity[0] = self.velocity[0]/2 105 | self.velocity[1] = self.velocity[1]/2 106 | self.velocity[2] = self.velocity[2]/2 107 | 108 | # Bounce particles off edge of simulation 109 | simulation = [params.simulation_width, params.simulation_height, params.simulation_depth] 110 | for i in range(3): 111 | while self.position[i] < 0 or self.position[i] > simulation[i]: 112 | # util.debug(str(self.particle_id) + " is out of bounds: " + str(self.position) + " and is going this fast:" + str(self.velocity)) 113 | self.velocity[i] *= -1 114 | self.position[i] = self.position[i]*-1 if self.position[i] < 0\ 115 | else 2*simulation[i] - self.position[i] 116 | # util.debug("I am no longer out of bounds: " + str(self.position)) 117 | # util.info("Particle " + str(self.particle_id) + " with mass " + str(self.mass) + " is at " + str(self.position)) 118 | # util.info("Particle " + str(self.particle_id) + " is moving: " + str(self.velocity)) 119 | -------------------------------------------------------------------------------- /reference/GitReference.md: -------------------------------------------------------------------------------- 1 | ### Git Basics 2 | #### Forking the Repo 3 | 1. To work on the repository, you'll have to create a GitHub account and fork 4 | the repository. Navigate to https://github.com/NERSC/CompactCori and click 5 | the `Fork` button in the upper right hand corner. This will create a public 6 | copy of the CompactCori repository on your GitHub account 7 | 2. Clone the repository to Edison. SSH to Edison by running `ssh 8 | [your_username]@edison.nersc.gov`. Run `git clone 9 | git@github.com:[your_github_username]/CompactCori` 10 | 3. Create an upstream remote for your repository. `cd` into the `CompactCori` 11 | directory and add the upstream remote: `git remote add upstream 12 | https://github.com/NERSC/CompactCori.git` 13 | 14 | #### Working with Branches 15 | You should never make changes directly on the `master` branch. All changes 16 | should be done on a separate branch from master ( unless you have some 17 | compelling reason to branch off of a branch that's not master). Remember that 18 | the work on master is considered production -- that is, that there aren't bugs 19 | or issues with it. This is called "branch per feature" in agile software 20 | engineering. 21 | 22 | 1. First, make sure you're on the master branch using `git status`. 23 | 1. If you're on another branch, `checkout` to master: `git checkout master` 24 | 2. To create a new branch, run `git checkout -b [branchname]` where you replace 25 | [branchname] with a hyphenated descriptive name for your new branch 26 | 3. To switch between branches, run `git checkout [branchname]` where 27 | [branchname] is the name of the branch you want to switch to 28 | 29 | #### Committing Files 30 | 1. Run `git add [filename(s)]` to **stage** the file. This is equivalent to 31 | telling Git "When I commit, please include any changes made to this file" 32 | 2. Run `git commit` to commit all the files you added. In the resulting commit 33 | message, write a short (<50 charachter) summary of what changes the commit 34 | contains. You should also write your subject line in the imperative mood. A 35 | great way to test to see if you're doing things right is to prepend "Applying 36 | this commit will [your summary here]". Note that this works for the above 37 | commit message: "Applying this commit will fix list and force calculation". 38 | If the commit summary read "Fixed particle list and force calculation", the 39 | sentence would read "Applying this commit will fixed particle list and force 40 | calculation", which is obviously grammatically incorrect. Then write the 41 | body of the commit -- a more detailed (perhaps bulleted) more detailed list 42 | of changes. Be sure that your text is wrapped at 72 characters and that you 43 | leave a blank line between the summary and body. For example: 44 | ``` 45 | Fix particle list and force calculation 46 | 47 | - Make particles a static class variable 48 | - Check if particle is self 49 | - Fix division by zero error in calculate_force 50 | 51 | ``` 52 | 3. Run `git pull --rebase` to make sure your code is up-to-date with what's 53 | already on GitHub. Fix conflicts as necessary (feel free to ask me for help 54 | if you need help) 55 | 4. Run `git push origin [branchname]` to push your changes to GitHub so everyone 56 | else can see your work 57 | 58 | #### Reverting One File 59 | At some point, you may find that you need to revert one file to a previous 60 | version that you committed. This is trivial in Git. 61 | 62 | 1. Look up the SHA1 sum of the commit you want to revert the file to. You can 63 | use `git log [path/to/file]`, replacing `[path/to/file]` with the path to the 64 | file you want to revert 65 | 1. Checkout the file to that version by running `git checkout [SHA1-sum] 66 | [path/to/file]`, replacing `[SHA1-sum]` with the respective SHA1 sum you 67 | found in the first step and replacing `[path/to/file]` with the path to the 68 | file you want to revert 69 | 1. Commit the change, following the commit instructions above 70 | 71 | #### Reverting All Files 72 | If you make major changes to files tracked by Git that completely breaks what 73 | you're working on, you can safely revert the repository back to a previously 74 | committed state. Note that when you do this, you lose history, so you **cannot 75 | undo this** and you **will lose any changes you've made since the commit you're 76 | reverting to**. If you're sure you want to revert repository: 77 | 78 | 1. Look up the SHA1 sum of the commit you want to revert the file to. You can 79 | use `git log` to do this. 80 | 1. run `git reset --hard [SHA1-sum]`, replacing `[SHA1-sum]` with the respective 81 | SHA1 sum you found in the first step 82 | 83 | #### Updating to Upstream and Opening a Pull Request 84 | Before you create a Pull Request to merge your code back into the NERSC Git 85 | repository, you'll want to make sure your branches are up-to-date. You can do 86 | this by updating your master branch off of the upstream remote we set up 87 | earlier, and then rebasing your code off of your updated master before pull 88 | requesting. 89 | 90 | 1. Run `git fetch upstream` to fetch all the branches in the upstream repository 91 | 2. Run `git checkout master` to change to the master branch 92 | 3. Rebase your master branch on the upstream branch by running `git rebase 93 | upstream/master`. If there are merge conflicts, resolve them (or ask me for 94 | help if you're unsure of what's going on) 95 | 4. Rebase your working branches on your newly updated master branch: `git 96 | checkout [branchname]; git rebase master`. Again, resolve merge conflicts if 97 | necessary (or ask for help) 98 | 5. Push your working branch back to GitHub. Since you rebased which changed the 99 | commit history of the branch, if you do a regular `git push` you'll probably 100 | get a message about the push being unsuccessful. Run a `git push --force` to 101 | force push your changes to your branch. Be careful when doing this; make 102 | sure you're on the correct branch 103 | 6. Create a pull request by navigating to GitHub, clicking on the Pull Requests 104 | icon, and clicking the green button to create a new PR. Choose the branch 105 | you want to merge back into the NERSC repo, add a description, and submit the 106 | PR for a code review. 107 | -------------------------------------------------------------------------------- /app/particle/Partition.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | Author: Nicholas Fong 4 | Lawrence Berkeley National Laboratory 5 | National Energy Research Scientific Computing Center 6 | 7 | Acknowledgment: 8 | This work was supported by the Director, Office of Science, 9 | Division of Mathematical, Information, and Computational 10 | Sciences of the U.S. Department of Energy under contract 11 | DE-AC02-05CH11231, using resources of the National Energy Research 12 | Scientific Computing Center. 13 | """ 14 | 15 | import util 16 | import params 17 | import sys 18 | 19 | class Partition: 20 | """Partition class, where each Partition corresponds to the area of the 21 | simulation that a thread owns. 22 | 23 | Invariant: If Partition i is active (that is, if there are i threads working 24 | on the simulation), then for all partitions j < i, j is active as well 25 | 26 | When changing the ownership of a particle, the sending partition always changes 27 | the owner of each particle. The reciving partition validates that the change 28 | was made correctly. 29 | """ 30 | def __init__(self, thread_num): 31 | """Delta calculation assumes that there will be num_threads - 1 32 | partitions since num_active_workers is incremented within the 33 | constructor 34 | """ 35 | util.validate_int(thread_num) 36 | 37 | self.thread_num = thread_num 38 | self.particles = set() 39 | self.neighbor_particles = set() 40 | self.delta_x = params.simulation_width//(params.num_threads - 1) 41 | self.start_x = self.delta_x*(self.thread_num-1) 42 | self.end_x = params.simulation_width if self.thread_num is params.num_active_workers else self.start_x + self.delta_x 43 | 44 | params.num_active_workers += 1 45 | params.new_num_active_workers += 1 46 | 47 | def update_start_end(self): 48 | """This method is used to pick volume that does not evenly divide into 49 | the number of active workers when the number of workers changes 50 | 51 | Note that the delta is calculated based on the number of active workers 52 | and not the number of threads, since the number of threads no longer 53 | necessarily correlates to the number of active workers 54 | """ 55 | self.delta_x = params.simulation_width//(params.num_active_workers) 56 | self.start_x = self.delta_x*(self.thread_num-1) 57 | self.end_x = params.simulation_width if self.thread_num is params.num_active_workers else self.start_x + self.delta_x 58 | 59 | def add_particles(self, particle_set): 60 | """Add multiple Particles to the set of Particles that this Partition is 61 | responsible for 62 | """ 63 | util.validate_particle_set(particle_set) 64 | for particle in particle_set: 65 | if particle.thread_num != self.thread_num: 66 | util.error("Thread numbers don't match: particle is " + 67 | str(particle.thread_num) + " and self is: " + 68 | str(self.thread_num)) 69 | self.particles = self.particles.union(particle_set) 70 | 71 | def add_particle(self, particle): 72 | """Add a single Particle to the set of Particles that this Partition is 73 | responsible for 74 | """ 75 | if particle.thread_num != self.thread_num: 76 | util.error("Thread numbers don't match: particle is " + 77 | str(particle.thread_num) + " and self is: " + 78 | str(self.thread_num)) 79 | self.particles.add(particle) 80 | 81 | def remove_particles(self, particle_set): 82 | """Remove a set of particles from this Partition""" 83 | util.validate_particle_set(particle_set) 84 | self.particles.difference_update(particle_set) 85 | 86 | def set_particles(self, particle_set): 87 | """Overwrite the set of Particles that tis Partition is responsible for. 88 | This is used when changing the number of Partitions 89 | """ 90 | util.validate_particle_set(particle_set) 91 | for particle in particle_set: 92 | if particle.thread_num != self.thread_num: 93 | util.error("Thread numbers don't match: particle is " + 94 | str(particle.thread_num) + " and self is: " + 95 | str(self.thread_num)) 96 | self.particles = particle_set 97 | 98 | def particle_is_not_in_range(self, particle): 99 | """Helps determine whether or not a Particle belongs to this Partition 100 | or another partition. This method assumes that the particle will never 101 | be travelling fast enough to jump more than one partition at a time. 102 | 103 | Returns -1 if the particle is in the previous partition 104 | 0 if the particle is still in this partition 105 | 1 if the particle is in the next partition 106 | """ 107 | if particle.position[0] < self.start_x: 108 | return -1 109 | elif particle.position[0] > self.end_x: 110 | return 1 111 | else: 112 | return 0 113 | 114 | def handoff_neighboring_particles(self): 115 | """This method returns all particles that touch the border between 116 | neighboring partitions 117 | """ 118 | right = set() 119 | left = set() 120 | for particle in self.particles: 121 | if particle.position[0] + particle.radius + params.max_radius > self.end_x: 122 | right.add(particle) 123 | elif particle.position[0] - particle.radius - params.max_radius < self.start_x: 124 | left.add(particle) 125 | return (right, left) 126 | 127 | def neighboring_sendrecv(self, sendobj, source_destination, tag): 128 | """This helper method performs a sendrecv for 129 | send_and_receive_neighboring_particles 130 | """ 131 | neighbor_particles = params.comm.sendrecv(sendobj = sendobj, 132 | dest = source_destination, sendtag = tag, 133 | source = source_destination, recvtag = tag) 134 | self.neighbor_particles = self.neighbor_particles.union(neighbor_particles) 135 | 136 | def send_and_receive_neighboring_particles(self): 137 | """Call handoff_neighboring_particles to get all particles that touch 138 | the border between this partition and its one (or two) neighbors. Then 139 | send the neighbor the particle set and add the set that it sends to this 140 | Partition to self.neighbor_particles by calling neighboring_sendrecv 141 | 142 | There are two steps: 143 | 1. Rank 1 and 2 exchange while 144 | Rank 3 and 4 exchange while 145 | ... 146 | 2. Rank 2 and 3 exchange while 147 | Rank 4 and 5 exchange while 148 | ... 149 | """ 150 | right, left = self.handoff_neighboring_particles() 151 | self.neighbor_particles = set() 152 | 153 | if params.rank == 1: 154 | if params.num_active_workers != 1: 155 | self.neighboring_sendrecv(right, self.thread_num + 1, 1) 156 | 157 | elif params.rank == params.num_active_workers and params.rank % 2 == 0: 158 | self.neighboring_sendrecv(left, self.thread_num - 1, 1) 159 | 160 | elif params.rank == params.num_active_workers and params.rank % 2 == 1: 161 | self.neighboring_sendrecv(left, self.thread_num - 1, 2) 162 | 163 | elif params.rank % 2 == 0: 164 | self.neighboring_sendrecv(left, self.thread_num - 1, 1) 165 | self.neighboring_sendrecv(right, self.thread_num + 1, 2) 166 | 167 | else: 168 | self.neighboring_sendrecv(right, self.thread_num + 1, 1) 169 | self.neighboring_sendrecv(left, self.thread_num - 1, 2) 170 | 171 | def interact_particles(self): 172 | """Do computation and interact particles within this Partition. 173 | Includes interactions between particles that are bordering this 174 | Partition. Update the velocity and the position of each particle 175 | """ 176 | for particle in self.particles: 177 | particle.update_velocity(self.particles | self.neighbor_particles) 178 | # for particle in self.particles: 179 | # particle.update_velocity() 180 | for particle in self.particles: 181 | particle.update_position(params.dt) 182 | 183 | def exchange_sendrecv(self, increment, sendobj, source_destination, tag): 184 | """This helper method removes the particles from this Partition, 185 | decrements or incrments each particle's thread number, and then performs 186 | a sendrecv for exchange_particles 187 | """ 188 | self.remove_particles(sendobj) 189 | if increment: 190 | for particle in sendobj: 191 | particle.thread_num += 1 192 | else: 193 | for particle in sendobj: 194 | particle.thread_num -= 1 195 | new_particles = params.comm.sendrecv(sendobj = sendobj, 196 | dest = source_destination, sendtag = tag, 197 | source = source_destination, recvtag = tag) 198 | self.add_particles(new_particles) 199 | 200 | def exchange_particles(self): 201 | """Send particles that should now belong to neighboring partitions to 202 | neighbors, and receive any particles that now belong to this partition 203 | 204 | Call particle_is_not_in_range for each particle to determine which 205 | particles should now belong to a different Partition. 206 | 207 | Then, remove the particles from this Partition's list of particles, 208 | decrement or increment each particle's thread number, and send the 209 | neighbor the particle set to the respective neighbor using 210 | exchange_sendrecv. Also add the set that it sends to this Partition to 211 | self.particles 212 | 213 | There are two steps in the exchange: 214 | 1. Rank 1 and 2 exchange while 215 | Rank 3 and 4 exchange while 216 | ... 217 | 2. Rank 2 and 3 exchange while 218 | Rank 4 and 5 exchange while 219 | ... 220 | """ 221 | sys.stdout.flush() 222 | right, left = set(), set() 223 | for particle in self.particles: 224 | if particle.thread_num != params.rank: 225 | util.debug("Rank is " + str(params.rank) + " but particle has thread number " + str(particle.thread_num)) 226 | switch = self.particle_is_not_in_range(particle) 227 | if switch is -1: 228 | left.add(particle) 229 | elif switch is 1: 230 | right.add(particle) 231 | 232 | # Send neighbors their new particles 233 | if params.rank == 1: 234 | if params.num_active_workers != 1: 235 | self.exchange_sendrecv(True, right, self.thread_num + 1, 1) 236 | 237 | elif params.rank == params.num_active_workers and params.rank % 2 == 0: 238 | self.exchange_sendrecv(False, left, self.thread_num - 1, 1) 239 | 240 | elif params.rank == params.num_active_workers and params.rank % 2 == 1: 241 | self.exchange_sendrecv(False, left, self.thread_num - 1, 2) 242 | 243 | elif params.rank % 2 == 0: 244 | self.exchange_sendrecv(False, left, self.thread_num - 1, 1) 245 | self.exchange_sendrecv(True, right, self.thread_num + 1, 2) 246 | 247 | else: 248 | self.exchange_sendrecv(True, right, self.thread_num + 1, 1) 249 | self.exchange_sendrecv(False, left, self.thread_num - 1, 2) 250 | 251 | def update_master(self): 252 | """Update the master node with new particles""" 253 | # if len(self.particles) is not 0: 254 | # util.debug("Rank " + str(params.rank) + " is sending back " + str(len(self.particles)) + " particles") 255 | params.comm.send(self.particles, tag = 0) 256 | 257 | def receive_new_particles(self): 258 | """Receive new particle set after changing the number of threads""" 259 | new_particles = params.comm.recv(source = 0, tag = 11) 260 | self.set_particles(new_particles) 261 | #self.update_start_x() 262 | self.update_start_end() 263 | 264 | def __repr__(self): 265 | """Represent a Partition by the number of particles that the Partition 266 | is responsible for 267 | """ 268 | return str(len(self.particles)) 269 | -------------------------------------------------------------------------------- /app/particle/particle_simulation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """A parallelized MD simulation in Python written for version 1 of the Compact Cori project at NERSC. 3 | 4 | Only the master node has an accurate params file. Slaves know the number of 5 | active workers, but other parameters are not guaranteed to be accurate. 6 | 7 | Threads are 0-indexed 8 | 9 | The master node does not do any computational work. 10 | 11 | Threads 1-n correspond to the n partitions that do computational work. 12 | 13 | Speedup could possibly occur by using numpy instead of arrays/lists 14 | 15 | Author: Nicholas Fong 16 | Lawrence Berkeley National Laboratory 17 | National Energy Research Scientific Computing Center 18 | 19 | Acknowledgment: 20 | This work was supported by the Director, Office of Science, 21 | Division of Mathematical, Information, and Computational 22 | Sciences of the U.S. Department of Energy under contract 23 | DE-AC02-05CH11231, using resources of the National Energy Research 24 | Scientific Computing Center. 25 | """ 26 | from Partition import Partition 27 | from Particle import Particle 28 | import util 29 | import params 30 | 31 | import argparse 32 | import random 33 | import math 34 | import threading 35 | import json 36 | import time 37 | from urllib.parse import urlparse 38 | from urllib.parse import parse_qs 39 | from http.server import BaseHTTPRequestHandler 40 | from mpi4py import MPI as mpi 41 | import subprocess 42 | import os 43 | 44 | params.mpi = mpi 45 | params.comm = mpi.COMM_WORLD 46 | params.rank = params.comm.Get_rank() 47 | params.num_threads = params.comm.Get_size() 48 | params.mpi_status = mpi.Status() 49 | 50 | # Parse arguments 51 | parser = argparse.ArgumentParser() 52 | parser.add_argument("-n", "--numparticles", type=int, 53 | help = "number of particles in simulation") 54 | parser.add_argument("--height", type=int, 55 | help = "height of simulation ") 56 | parser.add_argument("--width", type=int, 57 | help = "width of simulation ") 58 | parser.add_argument("--depth", type=int, 59 | help = "depth of simulation ") 60 | parser.add_argument("-d", "--dt", type=float, 61 | help = "time constant") 62 | parser.add_argument("-f", "--force", type=float, 63 | help = "force constant") 64 | args = parser.parse_args() 65 | 66 | params.num_particles = args.numparticles if args.numparticles else 100 67 | params.simulation_height = args.height if args.height else 1000 68 | params.simulation_width = args.width if args.width else 1000 69 | params.simulation_depth = args.depth if args.depth else 1000 70 | params.dt = args.dt if args.dt else 0.0005 71 | #params.force = args.force if args.force else 100 72 | params.force = args.force if args.force else 100000 73 | params.num_active_workers = 0 74 | params.new_num_active_workers = 0 75 | params.partitions = {} 76 | params.max_radius = min(params.simulation_width, params.simulation_height, params.simulation_depth)//32 77 | params.timesteps_per_second = 0 78 | params.init_total_energy = 0.0 79 | params.curr_total_energy = 0.0 80 | 81 | if params.rank is 0: 82 | # Create partitions 1 through params.num_threads - 1 83 | for i in range(1, params.num_threads): 84 | params.partitions[i] = Partition(i) 85 | 86 | # Create Particles for Partitions 87 | for i in range(params.num_particles): 88 | radius = 30#random.randint(1, params.max_radius) 89 | position = [random.randint(radius, params.simulation_width - 1), 90 | random.randint(radius, params.simulation_height - 1), 91 | random.randint(radius, params.simulation_depth - 1)] 92 | velocity = [400*random.randint(0,radius//4), 93 | 400*random.randint(0,radius//4), 94 | 400*random.randint(0,radius//4)] 95 | mass = 3#random.randint(1,10) 96 | params.init_total_energy += 0.5 * mass * (velocity[0]**2 + velocity[1]**2 + velocity[2]**2) 97 | thread_num = util.determine_particle_thread_num(position[0]) 98 | new_particle = Particle(i, thread_num, position, velocity, mass, radius) 99 | params.partitions[thread_num].add_particle(new_particle) 100 | 101 | params.curr_total_energy = params.init_total_energy 102 | 103 | def update_params(): 104 | params.new_num_active_workers = params.comm.bcast(params.new_num_active_workers) 105 | 106 | # Broadcast setup information 107 | params.partitions = params.comm.bcast(params.partitions) 108 | params.num_active_workers = params.comm.bcast(params.num_active_workers) 109 | update_params() 110 | 111 | colors = { 112 | 0: "255,255,255", 113 | 114 | 1: "255,000,000", 115 | 2: "255,102,000", 116 | 3: "255,204,000", 117 | 4: "204,255,000", 118 | 5: "102,255,000", 119 | 120 | 6: "000,255,000", 121 | 7: "000,255,102", 122 | 8: "000,255,204", 123 | 9: "000,204,255", 124 | 10: "000,102,255", 125 | 126 | 11: "000,000,255", 127 | 12: "102,000,255", 128 | 13: "204,000,255", 129 | 14: "255,000,204", 130 | 15: "255,000,102", 131 | } 132 | num_colors = len(colors) 133 | 134 | FNULL = open(os.devnull, 'w') 135 | 136 | # One timestep 137 | def timestep(): 138 | """Only do something as a slave if an active worker""" 139 | if params.rank is 0: 140 | # threading.Thread(target=subprocess.call(["blink1-tool", "--rgb=" + str(colors[params.rank%4]), "--blink=1", "-m0", "-t20"],stdout=FNULL, stderr=subprocess.STDOUT)).start() 141 | subprocess.Popen(["blink1-tool --rgb=" + str(colors[0]) + " --blink=1, -m0, -t20 > /dev/null"], shell=True, stdin=None, stdout=None, stderr=None, close_fds=True) 142 | for i in range(1, params.num_active_workers+1): 143 | new_particles = params.comm.recv(source = mpi.ANY_SOURCE, status = params.mpi_status, tag = 0) 144 | params.partitions[params.mpi_status.Get_source()].particles = new_particles 145 | elif params.rank <= params.num_active_workers: 146 | subprocess.Popen(["blink1-tool --rgb=" + str(colors[params.rank % num_colors]) + " --blink=5, -m0, -t20 > /dev/null"], shell=True, stdin=None, stdout=None, stderr=None, close_fds=True) 147 | partition = params.partitions[params.rank] 148 | partition.send_and_receive_neighboring_particles() 149 | partition.interact_particles() 150 | partition.exchange_particles() 151 | partition.update_master() 152 | 153 | def change_num_active_workers(): 154 | params.num_active_workers = params.new_num_active_workers 155 | print("In change num_active_workers "+str(params.rank)+" "+str(params.num_active_workers)) 156 | if params.rank is 0: 157 | if params.new_num_active_workers < 1 or params.new_num_active_workers > params.num_threads - 1: 158 | util.debug("Invalid number of active workers requested: " + params.new_num_active_workers) 159 | 160 | new_distribution = {} 161 | for i in range(1, params.num_threads): 162 | new_distribution[i] = set() 163 | 164 | for partition_id, partition in params.partitions.items(): 165 | for particle in partition.particles: 166 | new_thread = util.determine_particle_thread_num(particle.position[0]) 167 | particle.thread_num = new_thread 168 | new_distribution[new_thread].add(particle) 169 | 170 | for i in range(1, params.num_threads): 171 | print("sending "+str(i)) 172 | params.partitions[i].particles = new_distribution[i] 173 | params.comm.send(new_distribution[i], dest = i, tag = 11) 174 | else: 175 | print("receiving "+str(params.rank)) 176 | params.partitions[params.rank].receive_new_particles() 177 | #print("received "+str(params.rank)) 178 | 179 | endpoint = "{\n}" 180 | class Server(BaseHTTPRequestHandler): 181 | def do_GET(self): 182 | """Handle GET requests to the API endpoint""" 183 | global endpoint 184 | parsed_path = urlparse(self.path) 185 | if "/api/v1/get_particles" in parsed_path: 186 | message = endpoint 187 | self.send_response(200) 188 | # TODO: Security? 189 | self.send_header("Access-Control-Allow-Origin", "*") 190 | self.end_headers() 191 | self.wfile.write(message.encode("utf-8")) 192 | else: 193 | util.info("GET sent to " + str(parsed_path[2])) 194 | 195 | def do_POST(self): 196 | """Handle POST requests to the API endpoint""" 197 | global endpoint 198 | parsed_path = urlparse(self.path) 199 | if "/api/v1/post_parameters" in parsed_path: 200 | self.send_response(200) 201 | # TODO: Security? 202 | self.send_header("Access-Control-Allow-Origin", "*") 203 | self.end_headers() 204 | length = int(self.headers["Content-Length"]) 205 | post_data = self.rfile.read(length).decode("utf-8") 206 | #post_data = self.rfile.read(length) 207 | # Parse data from POST 208 | print('Got a post ') 209 | print(type(post_data)) 210 | print(post_data) 211 | new_data = parse_qs(post_data) 212 | print(type(new_data)) 213 | for x in new_data: 214 | print(x) 215 | print(new_data[x]) 216 | params.new_num_active_workers = int(new_data['num_workers'][0]) 217 | #change_num_active_workers() 218 | else: 219 | util.info("POST sent to " + str(parsed_path[2])) 220 | def log_message(self, format, *args): 221 | return 222 | 223 | def main(): 224 | global endpoint 225 | 226 | if params.rank is 0: 227 | from http.server import HTTPServer 228 | port_number = 8080 229 | host = "10.0.0.101" 230 | server = HTTPServer((host, port_number), Server) 231 | util.info("Starting server on port " + str(port_number) + ", ^c to exit") 232 | threading.Thread(target=server.serve_forever).start() 233 | 234 | iterations = 0 235 | while True: 236 | # Timing 237 | samples = 100 238 | iterations += 1 239 | 240 | if (iterations % samples == 1) and params.rank == 0: 241 | start = time.time() 242 | 243 | # Any Changes to number of workers? 244 | update_params() 245 | if params.new_num_active_workers is not params.num_active_workers: 246 | change_num_active_workers() 247 | 248 | timestep() 249 | 250 | # Timing 251 | if (iterations % samples == 0) and params.rank == 0: 252 | params.timesteps_per_second = samples/(time.time() - start) 253 | # util.info(str(params.partitions)) 254 | # util.info("Average steps per second: " + str(params.timesteps_per_second)) 255 | 256 | if params.rank is 0: 257 | # Use a copy of endpoint to prevent queries to endpoint from 258 | # receiving an in-progress timestep 259 | temp_endpoint = "{\n" 260 | param_endpoint = " \"params\": {\n" 261 | param_endpoint += " \"num_particles\": " + str(params.num_particles) + ",\n" 262 | param_endpoint += " \"num_active_workers\": " + str(params.num_active_workers) + ",\n" 263 | param_endpoint += " \"simulation_height\": " + str(params.simulation_height) + ",\n" 264 | param_endpoint += " \"simulation_width\": " + str(params.simulation_width) + ",\n" 265 | param_endpoint += " \"simulation_depth\": " + str(params.simulation_depth) + ",\n" 266 | param_endpoint += " \"timesteps_per_second\": " + str(params.timesteps_per_second) + ",\n" 267 | param_endpoint += " \"total_energy\": " + str(params.curr_total_energy) + "\n" 268 | param_endpoint += " },\n" 269 | 270 | params.curr_total_energy = 0.0 271 | 272 | particles_endpoint = " \"particles\": [\n" 273 | for key, partition in params.partitions.items(): 274 | for particle in partition.particles: 275 | # particle.neighbors = "" 276 | # particles_endpoint += particle.jsonify()#json.dumps(particle, default=lambda obj: obj.__dict__, sort_keys = True, indent=4) + ",\n" 277 | particles_endpoint += particle.jsonify()#json.dumps(particle, default=lambda obj: obj.__dict__, sort_keys = True, indent=4) + ",\n" 278 | params.curr_total_energy += 0.5 * particle.mass * (particle.velocity[0]**2 + particle.velocity[1]**2 + particle.velocity[2]**2) 279 | particles_endpoint = particles_endpoint[:-2] # trim extra comma 280 | 281 | util.debug('init_total_energy_______: ' + str(params.init_total_energy)) 282 | 283 | if True: 284 | # if not iterations % 1000: 285 | sqrt_ratio = math.sqrt(params.init_total_energy / params.curr_total_energy) 286 | # ratio = params.init_total_energy / params.curr_total_energy 287 | for key, partition in params.partitions.items(): 288 | for particle in partition.particles: 289 | for i in range(3): 290 | particle.velocity[i] *= sqrt_ratio 291 | params.comm.send(partition.particles, dest=partition.thread_num, tag=99) 292 | util.debug('curr_total_energy_before: ' + str(params.curr_total_energy)) 293 | 294 | # RM 295 | if True: 296 | params.curr_total_energy = 0.0 297 | # if not iterations % 1000: 298 | for key, partition in params.partitions.items(): 299 | for particle in partition.particles: 300 | params.curr_total_energy += 0.5 * particle.mass * (particle.velocity[0]**2 + particle.velocity[1]**2 + particle.velocity[2]**2) 301 | util.debug('curr_total_energy_after_: ' + str(params.curr_total_energy)) 302 | # RM 303 | 304 | endpoint = "{\n" + param_endpoint + particles_endpoint + "\n ]\n}\n" 305 | # elif not iterations % 1000: 306 | else: 307 | new_particles = params.comm.recv(source=0, status=params.mpi_status, tag=99) 308 | params.partitions[params.rank].set_particles(new_particles) 309 | 310 | if __name__ == "__main__": 311 | main() 312 | --------------------------------------------------------------------------------