├── AUTHORS ├── COPYRIGHT ├── ChangeLog ├── Makefile.am ├── Makefile.top ├── README.md ├── THANKS ├── autogen ├── cell ├── Makefile.am ├── cell_check.c ├── cell_check.h ├── cell_check_internal.h ├── cell_check_unit_test.c ├── cw_util.c ├── cw_util.h └── ppu │ ├── Makefile.am │ ├── cell_check.h │ ├── cell_check_internal.h │ ├── cell_check_ppu.c │ ├── cw_util.c │ └── cw_util.h ├── configure.ac ├── m4 └── smgc-cell.m4 ├── supermagic.c ├── supermagic.h └── util ├── crunch-hang ├── rr-job-prep └── rr-run-job /AUTHORS: -------------------------------------------------------------------------------- 1 | Samuel K. Gutierrez - Los Alamos National Laboratory 2 | -------------------------------------------------------------------------------- /COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2021 Los Alamos National Security, LLC. All rights reserved. 2 | 3 | This program was prepared by Los Alamos National Security, LLC at Los Alamos 4 | National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 5 | Department of Energy (DOE). All rights in the program are reserved by the DOE 6 | and Los Alamos National Security, LLC. Permission is granted to the public to 7 | copy and use this software without charge, provided that this Notice and any 8 | statement of authorship are reproduced on all copies. Neither the U.S. 9 | Government nor LANS makes any warranty, express or implied, or assumes any 10 | liability or responsibility for the use of this software. 11 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | SUPERMAGIC CHANGE LOG 2 | 3 | 2011-05-19 Samuel K. Gutierrez samuelREMOVEME@lanl.gov 4 | * updated code - ready for public distribution. 5 | * on to version 0.7.0 6 | 7 | 2011-03-10 Samuel K. Gutierrez samuelREMOVEME@lanl.gov 8 | * fixed Makefile to allow for Intel compilation 9 | * on to version 0.6.0 10 | 11 | 2011-01-02 Samuel K. Gutierrez samuelREMOVEME@lanl.gov 12 | * incorporated Evan H. Samanas' I/O stats code. Thanks Evan! 13 | * on to version 0.5.1. 14 | * added a -M parameter to supermagic. the -M parameter changes the default 15 | target file size written by each rank process (see: mpi_io and n_to_n_io). 16 | thanks to Reese Baird for the suggestion. 17 | * on to version 0.5.2. 18 | 19 | 2010-04-05 Samuel K. Gutierrez samuelREMOVEME@lanl.gov 20 | * get_rhn now returns "???" if a remote host name exchange has not occurred 21 | * lower memory footprint - solves roadrunner scale issue? 22 | * by default, a stat of the user's home dir is now disabled 23 | * modified test order 24 | * removed ptp_sanity test 25 | * added verbose output option 26 | * thanks to Ben McClelland and Daryl Grunau for their great suggestions 27 | * on to version 0.1.5. 28 | 29 | 2010-03-01 Samuel K. Gutierrez samuelREMOVEME@lanl.gov 30 | * added a collective host name exchange. 31 | * updated output format to include rank (host name) information. 32 | * thanks to Timothy O. Harrington for the great idea. 33 | * removed fixed buffer sizes found in some tests. 34 | * on to version 0.1.3. 35 | 36 | 2010-02-26 Samuel K. Gutierrez samuelREMOVEME@lanl.gov 37 | * removed underscore from macro names. 38 | * message size specified at compile time can now be 39 | overwritten by a run-time parameter -m|-msg-size. 40 | * thanks to Cornell Wright for the great suggestions. 41 | * on to version 0.1.1. 42 | * minor updates. 43 | * on to version 0.1.2. 44 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2011-2021 Los Alamos National Security, LLC. 3 | # All rights reserved. 4 | # 5 | # This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | # National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | # Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | # and Los Alamos National Security, LLC. Permission is granted to the public to 9 | # copy and use this software without charge, provided that this Notice and any 10 | # statement of authorship are reproduced on all copies. Neither the U.S. 11 | # Government nor LANS makes any warranty, express or implied, or assumes any 12 | # liability or responsibility for the use of this software. 13 | ################################################################################ 14 | 15 | bin_PROGRAMS = supermagic 16 | 17 | SUBDIRS = cell 18 | 19 | supermagic_SOURCES = \ 20 | supermagic.h supermagic.c 21 | 22 | supermagic_CFLAGS = 23 | supermagic_LDFLAGS = 24 | supermagic_LDADD = 25 | 26 | if SMGC_BUILD_CELL 27 | supermagic_CFLAGS += -I$(top_srcdir)/cell 28 | supermagic_LDFLAGS += -L$(top_srcdir)/cell 29 | supermagic_LDADD += -lcell_check 30 | endif 31 | 32 | EXTRA_DIST = \ 33 | Makefile.top AUTHORS README.md \ 34 | THANKS COPYRIGHT autogen \ 35 | ChangeLog \ 36 | util/crunch-hang util/rr-job-prep util/rr-run-job 37 | 38 | ACLOCAL_AMFLAGS = -I m4 39 | 40 | dist-hook: 41 | ln -s Makefile.top $(distdir)/Makefile 42 | -------------------------------------------------------------------------------- /Makefile.top: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2012 Los Alamos National Security, LLC. 3 | # All rights reserved. 4 | # 5 | # This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | # National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | # Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | # and Los Alamos National Security, LLC. Permission is granted to the public to 9 | # copy and use this software without charge, provided that this Notice and any 10 | # statement of authorship are reproduced on all copies. Neither the U.S. 11 | # Government nor LANS makes any warranty, express or implied, or assumes any 12 | # liability or responsibility for the use of this software. 13 | ################################################################################ 14 | 15 | # Author: Samuel K. Gutierrez 16 | 17 | SHELL = /bin/sh 18 | 19 | .PHONY: all cell 20 | 21 | all: 22 | @echo "### running configure and make for you ###" 23 | @echo "" 24 | @./configure && make 25 | 26 | cell: 27 | @echo "### running configure and make for you ###" 28 | @echo "" 29 | @./configure --enable-cell && make 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # supermagic 2 | 3 | ## What is supermagic? 4 | supermagic is a very simple MPI sanity code. Nothing more, nothing less. 5 | 6 | ## Latest Distributions 7 | Distribution tarballs are found here: http://hpc.github.com/supermagic 8 | 9 | ## Getting and Configuring supermagic 10 | Clone this repository: 11 | ```shell 12 | git clone https://github.com/hpc/supermagic.git 13 | ``` 14 | 15 | Run autogen to generate the configure script: 16 | ```shell 17 | cd supermagic 18 | # Note that this step is skipped when building from a distribution tarball. 19 | ./autogen 20 | ``` 21 | 22 | Run configure with the required options. Some examples include: 23 | ```shell 24 | # Example 1: Using mpicc as the C wrapper compiler. 25 | ./configure CC=mpicc 26 | # Example 2: Adding an installation prefix. 27 | ./configure CC=mpicc --prefix=$HOME/.local 28 | ``` 29 | 30 | ## Building supermagic: MPI-Only 31 | ```shell 32 | make 33 | ``` 34 | 35 | ### An example using modules and `mpicc`: 36 | ```shell 37 | module load openmpi-gcc 38 | make 39 | ``` 40 | 41 | ### An example with verbose build output 42 | ```shell 43 | make V=1 44 | ``` 45 | 46 | ## Building supermagic: MPI + Cell 47 | ```shell 48 | make cell 49 | ``` 50 | 51 | ### An example using modules, mpicc, and cellsdk 52 | ```shell 53 | module load openmpi-gcc cellsdk/3.1 54 | make cell 55 | ``` 56 | 57 | ## supermagic Usage 58 | ``` 59 | Usage: 60 | mpirun -np N ./supermagic [OPTIONS] 61 | 62 | options: 63 | [-a|--all] run all tests in suite 64 | [-h|--help] display this message 65 | [-m|--msg-size x[B,k,M,G]] change message size 66 | [-M|--file-size B[B,k,M,G]] change file size (per rank) 67 | [-n|--n-iters X] run X iterations of a test suite 68 | [-q|--quiet] run in quiet mode 69 | [-s|--stat /a/path] add /a/path to stat list 70 | [-t|--with-tests t1[,t2,tn]] run tests in requested order 71 | [-w|--write /a/path] add /a/path to IO tests 72 | [-V|--verbose] display verbose output 73 | 74 | Available tests: 75 | hostname_exchange 76 | stat_paths 77 | mpi_io 78 | n_to_n_io 79 | small_all_to_all_ptp 80 | small_allreduce_max 81 | alt_sendrecv_ring 82 | root_bcast 83 | rand_root_bcast 84 | large_sendrecv_ring 85 | rand_root_bcast 86 | large_all_to_root_ptp 87 | large_all_to_all_ptp 88 | hello_world 89 | # cell_sanity only available when cell support is requested via "make cell" 90 | cell_sanity 91 | ``` 92 | 93 | For example 94 | ```shell 95 | mpirun -np 4 ./supermagic -s /glob/usr/file -s /usr/proj -n 2 96 | ``` 97 | 98 | ### Example 1: Basic Usage 99 | ```shell 100 | mpirun ./supermagic 101 | ``` 102 | 103 | ### Example 2: Script that tests the system before application execution 104 | ```shell 105 | mpirun ./supermagic 106 | if [[ $? != 0 ]] 107 | then 108 | exit 1; 109 | fi 110 | # supermagic didn't detect any errors. Run my real application now: 111 | mpirun ./my_real_app 112 | ``` 113 | 114 | ### Example 3: Running a custom set of tests 115 | ```shell 116 | # First runs mpi_io test then runs cell_sanity test 117 | mpirun ./supermagic -t mpi_io,cell_sanity -w /scratch1/jess/my_data_dir 118 | ``` 119 | 120 | ## supermagic Best Practices 121 | In general, it is best to run supermagic in a way that closely mimics the way in 122 | which you run your real target application. For example, if you provide a list 123 | of MPI parameters that change the way in which your MPI implementation behaves, 124 | please also include those parameters when running supermagic. 125 | 126 | ### Example 4: Open MPI MCA parameters 127 | ```shell 128 | mpirun -mca a_parameter -mca another ./supermagic 129 | 130 | if [[ $? != 0 ]] 131 | then 132 | exit 1; 133 | fi 134 | # supermagic didn't detect any errors, run my real application now: 135 | mpirun -mca a_parameter -mca another ./my_real_app 136 | ``` 137 | 138 | ## Frequently Asked Questions 139 | 140 | Q: "UNKNOWN" host names are not very useful. How can I get useful host names? 141 | 142 | A: Run hostname_exchange first. This will populate a host name lookup table and 143 | get rid of "UNKNOWN" host names. 144 | 145 | For example: 146 | ```shell 147 | mpirun ./supermagic -t hostname_exchange,rand_root_bcast 148 | ``` 149 | -------------------------------------------------------------------------------- /THANKS: -------------------------------------------------------------------------------- 1 | THANKS! 2 | 3 | Reese Baird 4 | Cornell Wright 5 | Ben McClelland 6 | Daryl Grunau 7 | -------------------------------------------------------------------------------- /autogen: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | autoreconf --force --install 4 | exit $? 5 | 6 | ############################################################################### 7 | # Copyright (c) 2011-2012 Los Alamos National Security, LLC. 8 | # All rights reserved. 9 | # 10 | # This program was prepared by Los Alamos National Security, LLC at Los Alamos 11 | # National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 12 | # Department of Energy (DOE). All rights in the program are reserved by the DOE 13 | # and Los Alamos National Security, LLC. Permission is granted to the public to 14 | # copy and use this software without charge, provided that this Notice and any 15 | # statement of authorship are reproduced on all copies. Neither the U.S. 16 | # Government nor LANS makes any warranty, express or implied, or assumes any 17 | # liability or responsibility for the use of this software. 18 | ################################################################################ 19 | -------------------------------------------------------------------------------- /cell/Makefile.am: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2012 Los Alamos National Security, LLC. 3 | # All rights reserved. 4 | # 5 | # This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | # National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | # Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | # and Los Alamos National Security, LLC. Permission is granted to the public to 9 | # copy and use this software without charge, provided that this Notice and any 10 | # statement of authorship are reproduced on all copies. Neither the U.S. 11 | # Government nor LANS makes any warranty, express or implied, or assumes any 12 | # liability or responsibility for the use of this software. 13 | ################################################################################ 14 | 15 | CC=gcc 16 | 17 | lib_LTLIBRARIES = 18 | 19 | if SMGC_BUILD_CELL 20 | lib_LTLIBRARIES += libcell_check.la 21 | endif 22 | 23 | libcell_check_la_SOURCES = \ 24 | cell_check.c cell_check.h cell_check_internal.h cw_util.h cw_util.c cw_util.h 25 | 26 | libcell_check_la_LDFLAGS = \ 27 | -L/usr/lib64/dacs/debug 28 | 29 | libcell_check_la_LIBADD = \ 30 | -ldacs_hybrid 31 | 32 | SUBDIRS = ppu 33 | -------------------------------------------------------------------------------- /cell/cell_check.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2010-2012 Los Alamos National Security, LLC. 3 | * All rights reserved. 4 | * 5 | * This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | * National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | * Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | * and Los Alamos National Security, LLC. Permission is granted to the public to 9 | * copy and use this software without charge, provided that this Notice and any 10 | * statement of authorship are reproduced on all copies. Neither the U.S. 11 | * Government nor LANS makes any warranty, express or implied, or assumes any 12 | * liability or responsibility for the use of this software. 13 | */ 14 | 15 | /***************************************************************************** 16 | * cell_check.c - Roadrunner Cell connectivity check out routine 17 | * 18 | * Perform a series of basic tests to ensure that Cells can be reserved, 19 | * PPU programs launched and basic message function. 20 | * 21 | * Typical invocation from a higher level test program: 22 | * 23 | * rc = cell_check(CELL_CHECK_MSG_FAIL, CELL_CHECK_TEST_ALL); 24 | * if (rc != 0) printf("cell_check failure rc=%d\n", rc); 25 | * 26 | * Change History: 27 | * 28 | * 2010-09-16 cgw Initial version 29 | * 2010-09-20 cgw Clean up failure path, add some checks 30 | * 31 | *****************************************************************************/ 32 | #include "cell_check.h" 33 | #include "cell_check_internal.h" 34 | 35 | unsigned long cell_check(CELL_CHECK_MSG_T msg_level, CELL_CHECK_TEST_T test_level) { 36 | uint32_t num_accel = 1; 37 | de_id_t accel; 38 | dacs_process_id_t accel_pid; 39 | uint32_t msgS = 123, msgR = 0; 40 | int init_ok = 0, reserve_child_ok = 0, start_ok = 0, wid_reserve_ok = 0; 41 | dacs_wid_t wid; 42 | uint32_t accel_fail; 43 | int32_t exit_status; 44 | char * accel_proc = "cell/ppu/cell_check_ppu"; 45 | DACS_PROC_CREATION_FLAG_T creation_flags = DACS_PROC_LOCAL_FILE; 46 | 47 | g.version = 11; 48 | g.msg_level = msg_level; 49 | g.test_level = test_level; 50 | g.fail = 0; 51 | 52 | cwMsgSetTag(NULL, "cell_check@$H[$A] "); 53 | cwSighandInit(NULL); 54 | 55 | // Initialize the runtime and reserve an accelerator 56 | DACS( 1, 1, dacs_init, (DACS_INIT_FLAGS_NONE) ); 57 | init_ok = !g.fail; 58 | 59 | if (g.test_level >= CELL_CHECK_TEST_RESERVE) { 60 | if (!g.fail) { 61 | DACS( 2, 1, dacs_get_num_avail_children, (DACS_DE_CBE, &num_accel) ); 62 | } 63 | 64 | if (!g.fail) CHECK( 2, 2, (num_accel >= 1), num_accel); 65 | 66 | num_accel = 1; 67 | 68 | if (!g.fail) { 69 | DACS( 2, 3, dacs_reserve_children, (DACS_DE_CBE, &num_accel, &accel) ); 70 | reserve_child_ok = !g.fail; 71 | } 72 | 73 | if (!g.fail) CHECK( 2, 4, (num_accel == 1), num_accel); 74 | 75 | if (g.test_level >= CELL_CHECK_TEST_START) { 76 | // Start the accelerator process with msg & test level via argv 77 | char ml[8], tl[8]; 78 | char * argv[3] = {ml, tl, NULL}; 79 | snprintf(ml, sizeof(ml), "%d", g.msg_level); 80 | snprintf(tl, sizeof(tl), "%d", g.test_level); 81 | 82 | if (!g.fail) { 83 | DACS( 3, 1, dacs_de_start, (accel,(void *)accel_proc, (char const **) argv, 84 | NULL, creation_flags, &accel_pid) ); 85 | start_ok = !g.fail; 86 | } 87 | if (g.test_level >= CELL_CHECK_TEST_SENDREC) { 88 | // Send message containing a single uint32_t 89 | if (!g.fail) { 90 | DACS( 4, 1, dacs_wid_reserve, (&wid) ); 91 | wid_reserve_ok = !g.fail; 92 | } 93 | if (!g.fail) { 94 | DACS( 4, 2, dacs_send, (&msgS, sizeof(msgS), accel, accel_pid, 95 | 0, wid, DACS_BYTE_SWAP_WORD) ); 96 | } 97 | if (!g.fail) { 98 | DACS( 4, 3, dacs_wait, (wid) ); 99 | } 100 | // Receive the value back, incremented 101 | if (!g.fail) { 102 | DACS( 4, 4, dacs_recv, (&msgR, sizeof(msgR), accel, accel_pid, 103 | DACS_STREAM_ALL, wid, DACS_BYTE_SWAP_WORD) ); 104 | } 105 | if (!g.fail) { 106 | DACS( 4, 5, dacs_wait, (wid) ); 107 | } 108 | 109 | if (!g.fail) CHECK( 4, 6, (msgR == msgS + 1), msgR); 110 | 111 | if (wid_reserve_ok) { 112 | DACS( 4, 6, dacs_wid_release, (&wid) ); 113 | } 114 | } // if (g.test_level >= CELL_CHECK_TEST_SENDREC) 115 | 116 | // Wait for accelerator to finish 117 | if (start_ok) { 118 | DACS( 5, 1, dacs_mailbox_read, (&accel_fail, accel, accel_pid) ); 119 | if (accel_fail != 0 && g.fail == 0) g.fail = accel_fail; 120 | CHECK(5, 2, (accel_fail == 0), accel_fail); 121 | 122 | DACS( 5, 3, dacs_de_wait, (accel, accel_pid, &exit_status) ); 123 | CHECK( 5, 4, (exit_status == 0), exit_status); 124 | } 125 | } // if (g.test_level >= CELL_CHECK_TEST_START) 126 | 127 | // Cleanup and exit 128 | if (reserve_child_ok) { 129 | DACS( 6, 1, dacs_release_de_list, (num_accel, &accel) ); 130 | } 131 | } // if (g.test_level >= CELL_CHECK_TEST_RESERVE) 132 | if (init_ok) { 133 | DACS( 6, 2, dacs_exit, () ); 134 | } 135 | 136 | cwSighandTerm(); 137 | return g.fail; 138 | }; 139 | 140 | /* --- end of cell_check.c --- */ 141 | -------------------------------------------------------------------------------- /cell/cell_check.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2010-2011 Los Alamos National Security, LLC. 3 | * All rights reserved. 4 | * 5 | * This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | * National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | * Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | * and Los Alamos National Security, LLC. Permission is granted to the public to 9 | * copy and use this software without charge, provided that this Notice and any 10 | * statement of authorship are reproduced on all copies. Neither the U.S. 11 | * Government nor LANS makes any warranty, express or implied, or assumes any 12 | * liability or responsibility for the use of this software. 13 | */ 14 | 15 | /***************************************************************************** 16 | * cell_check.h - Roadrunner Cell connectivity check out routine 17 | * 18 | * Perform a series of basic tests to ensure that Cells can be reserved, 19 | * PPU programs launched and basic message function. 20 | * 21 | * Typical invocation from a higher level test program: 22 | * 23 | * rc = cell_check(CELL_CHECK_MSG_FAIL, CELL_CHECK_TEST_ALL); 24 | * if (rc != 0) printf("cell_check failure rc=%d\n", rc); 25 | * 26 | * Change History: 27 | * 28 | * 2010-09-15 cgw Initial version 29 | * 30 | *****************************************************************************/ 31 | 32 | #ifndef _CELL_CHECK_H 33 | #define _CELL_CHECK_H 34 | 35 | typedef enum { 36 | CELL_CHECK_MSG_NONE, // No messages ever, results via RC 37 | CELL_CHECK_MSG_FAIL, // Failures only, to stderr 38 | CELL_CHECK_MSG_RESULT, // Results to stdout 39 | CELL_CHECK_MSG_PROGRESS, // Test start to stdout 40 | CELL_CHECK_MSG_DEBUG, // Debug messages 41 | } CELL_CHECK_MSG_T; 42 | 43 | typedef enum { 44 | CELL_CHECK_TEST_INIT, // dacs_init/dacs_exit only 45 | CELL_CHECK_TEST_RESERVE, // Reserve Cells 46 | CELL_CHECK_TEST_START, // Launch PPU program 47 | CELL_CHECK_TEST_SENDREC, // Send messages to/from PPU 48 | CELL_CHECK_TEST_ALL // Perform all tests 49 | } CELL_CHECK_TEST_T; 50 | 51 | unsigned long cell_check(CELL_CHECK_MSG_T msg_level, CELL_CHECK_TEST_T test_level); 52 | 53 | #endif 54 | /* --- end of cell_check.h --- */ 55 | -------------------------------------------------------------------------------- /cell/cell_check_internal.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2010-2011 Los Alamos National Security, LLC. 3 | * All rights reserved. 4 | * 5 | * This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | * National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | * Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | * and Los Alamos National Security, LLC. Permission is granted to the public to 9 | * copy and use this software without charge, provided that this Notice and any 10 | * statement of authorship are reproduced on all copies. Neither the U.S. 11 | * Government nor LANS makes any warranty, express or implied, or assumes any 12 | * liability or responsibility for the use of this software. 13 | */ 14 | 15 | /***************************************************************************** 16 | * cell_check_internal.h - Roadrunner Cell connectivity check out routine 17 | * 18 | * Common macros and subroutines for x86 and PPU modules of cell_check. 19 | * 20 | * Change History: 21 | * 22 | * 2010-09-15 cgw Initial version 23 | * 2010-09-20 cgw Clean up failure path, add CHECK macro 24 | * 25 | *****************************************************************************/ 26 | 27 | #ifndef _CELL_CHECK_INTERNAL_H 28 | #define _CELL_CHECK_INTERNAL_H 29 | 30 | #include 31 | #include 32 | #include "cw_util.h" 33 | 34 | // Global data used by macros 35 | struct { 36 | uint32_t version; 37 | CELL_CHECK_MSG_T msg_level; 38 | CELL_CHECK_TEST_T test_level; 39 | uint32_t fail; 40 | } g; 41 | 42 | #define FAIL_CODE(MAJOR,MINOR,RC) ((RC)%1000 + (MINOR)*1000 + (MAJOR)*100000 + g.version*10000000) 43 | 44 | #define DACS(MAJOR,MINOR,API,PARAM) {\ 45 | DACS_ERR_T dacs_rc;\ 46 | uint32_t fail_code;\ 47 | if (g.msg_level >= CELL_CHECK_MSG_PROGRESS) cwMsg("Call %s", #API#PARAM);\ 48 | dacs_rc = (API PARAM);\ 49 | if (g.msg_level >= CELL_CHECK_MSG_RESULT)\ 50 | cwMsg("%s rc:%d [%s]", #API, dacs_rc, dacs_strerror(dacs_rc));\ 51 | if (dacs_rc < DACS_SUCCESS) {\ 52 | fail_code = FAIL_CODE(MAJOR, MINOR, -dacs_rc);\ 53 | if (g.fail == 0) g.fail = fail_code;\ 54 | if (g.msg_level >= CELL_CHECK_MSG_FAIL)\ 55 | cwMsgE("FAIL:%lu; %s rc:%d [%s]", fail_code, #API#PARAM, dacs_rc, dacs_strerror(dacs_rc));\ 56 | }\ 57 | } 58 | 59 | #define CHECK(MAJOR,MINOR,COND,RC) {\ 60 | if COND {\ 61 | if (g.msg_level >= CELL_CHECK_MSG_PROGRESS)\ 62 | cwMsg("Check %s OK", #COND);\ 63 | } else {\ 64 | uint32_t fail_code;\ 65 | fail_code = FAIL_CODE(MAJOR, MINOR, RC);\ 66 | if (g.fail == 0) g.fail = fail_code;\ 67 | if (g.msg_level >= CELL_CHECK_MSG_FAIL)\ 68 | cwMsgE("FAIL:%lu; check %s failed rc:%lu", fail_code, #COND, RC);\ 69 | }\ 70 | } 71 | 72 | #endif 73 | /* --- end of cell_check_internal.h --- */ 74 | -------------------------------------------------------------------------------- /cell/cell_check_unit_test.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2010-2011 Los Alamos National Security, LLC. 3 | * All rights reserved. 4 | * 5 | * This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | * National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | * Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | * and Los Alamos National Security, LLC. Permission is granted to the public to 9 | * copy and use this software without charge, provided that this Notice and any 10 | * statement of authorship are reproduced on all copies. Neither the U.S. 11 | * Government nor LANS makes any warranty, express or implied, or assumes any 12 | * liability or responsibility for the use of this software. 13 | */ 14 | 15 | /***************************************************************************** 16 | * cell_check_unit_test.c 17 | * 18 | * Invoke cell_check for debugging. 19 | * 20 | * Change History: 21 | * 22 | * 2010-09-15 cgw Initial version 23 | * 24 | *****************************************************************************/ 25 | #include 26 | #include "cell_check.h" 27 | 28 | 29 | int main(int argc, char ** argv) { 30 | (void)argc; 31 | (void)argv; 32 | 33 | unsigned long rc = cell_check(CELL_CHECK_MSG_FAIL, CELL_CHECK_TEST_ALL); 34 | printf("cell_check() rc:%lu\n", rc); 35 | 36 | return 0; 37 | } 38 | 39 | /* --- end of cell_check_unit_test.h --- */ 40 | -------------------------------------------------------------------------------- /cell/cw_util.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2010-2011 Los Alamos National Security, LLC. 3 | * All rights reserved. 4 | * 5 | * This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | * National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | * Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | * and Los Alamos National Security, LLC. Permission is granted to the public to 9 | * copy and use this software without charge, provided that this Notice and any 10 | * statement of authorship are reproduced on all copies. Neither the U.S. 11 | * Government nor LANS makes any warranty, express or implied, or assumes any 12 | * liability or responsibility for the use of this software. 13 | */ 14 | 15 | //**************************************************************************** 16 | // cw_util.c - convenience functions 17 | // 18 | // Change History: 19 | // --------------- 20 | // Date Who Description 21 | // 2009-10-14 cgw Initial version 22 | // 2010-09-16 cgw Extract from dacsx.c 23 | // 2010-09-20 cgw Add cwMsgE 24 | //**************************************************************************** 25 | #define _GNU_SOURCE 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #define __USE_GNU 1 38 | #include 39 | #include 40 | #undef __USE_GNU 41 | #ifdef CW_MPI 42 | #include 43 | #endif // ifdef CW_MPI 44 | #include "cw_util.h" 45 | 46 | //----------------------------------------------------------------------------- 47 | // Return CPU affinity in a form suitable for messages. Single CPU affinity 48 | // returns a non-negative integer CPU number. Multi CPU affinity returns the 49 | // negative of the bit mask of affine CPUs. Affinity to no CPUs returns -1. 50 | //---------------------------------------------------------------------------- 51 | int32_t cwGetCPUaffinity(void) { 52 | int numCPU = sysconf( _SC_NPROCESSORS_CONF ); 53 | cpu_set_t af; 54 | int32_t i, afmask = 0, afcount = 0, afCPU=-1; 55 | 56 | sched_getaffinity(0, sizeof(af), &af); 57 | 58 | for (i=0; i short hostname 130 | // $A --> CPU affinity 131 | // $R --> MPI rank (-DCW_MPI only) 132 | // 133 | // If tsFormat and/or tagFormat are null, the current value will not 134 | // be changed. If either is an empty string, it will not be prepended 135 | // to the message. 136 | // 137 | // If cwMsgSetTag has not been called, the default message tags are: 138 | // "%Y%m%d_%H:%M:%S" "$H.$A" 139 | //----------------------------------------------------------------------------- 140 | void cwMsgSetTag(char * tsFormat, char * tagFormat, ...) { 141 | va_list ap; 142 | char tmp1[200], tmp2[200]; 143 | 144 | if (!cwMsgParam.initFlag) { 145 | cwMsgParam.initFlag = 1; 146 | cwMsgSetTag("%Y%m%d_%H:%M:%S ", "$H.$A "); 147 | } 148 | 149 | if (tsFormat) { 150 | if (strlen(tsFormat) == 0) { 151 | cwMsgParam.tsFormat = NULL; 152 | } else { 153 | cwDoSub(tsFormat, tmp2, sizeof(tmp2)); 154 | cwMsgParam.tsFormat = strdup(tmp2); 155 | } 156 | } 157 | 158 | if (tagFormat) { 159 | if (strlen(tagFormat) == 0) { 160 | cwMsgParam.tagStr = NULL; 161 | } else { 162 | va_start(ap, tagFormat); 163 | vsnprintf(tmp1, sizeof(tmp1), tagFormat, ap); 164 | va_end(ap); 165 | cwDoSub(tmp1, tmp2, sizeof(tmp2)); 166 | cwMsgParam.tagStr = strdup(tmp2); 167 | } 168 | } 169 | 170 | } 171 | 172 | #define CW_MSG_IMPL(NAME, FILE) \ 173 | void NAME(char * format, ...) {\ 174 | va_list ap;\ 175 | char tmp_str[512] = "";\ 176 | \ 177 | if (!cwMsgParam.initFlag) cwMsgSetTag(NULL, NULL);\ 178 | \ 179 | if (cwMsgParam.tsFormat) {\ 180 | time_t curtime = time(NULL);\ 181 | struct tm *loctime = localtime(&curtime);\ 182 | strftime(tmp_str, sizeof(tmp_str), cwMsgParam.tsFormat, loctime);\ 183 | }\ 184 | \ 185 | if (cwMsgParam.tagStr) {\ 186 | strncat(tmp_str, cwMsgParam.tagStr, sizeof(tmp_str)-strlen(tmp_str)-1);\ 187 | }\ 188 | \ 189 | va_start(ap, format);\ 190 | vsnprintf(tmp_str + strlen(tmp_str), sizeof(tmp_str) - strlen(tmp_str), format, ap);\ 191 | va_end(ap);\ 192 | strncat(tmp_str, "\n", sizeof(tmp_str)-strlen(tmp_str)-1);\ 193 | \ 194 | fputs(tmp_str, FILE);\ 195 | fflush(FILE);\ 196 | }\ 197 | 198 | CW_MSG_IMPL(cwMsg, stdout) 199 | CW_MSG_IMPL(cwMsgE, stderr) 200 | 201 | //----------------------------------------------------------------------------- 202 | // Signal handling 203 | //----------------------------------------------------------------------------- 204 | static int cwSighandSig[] = {SIGFPE, SIGILL, SIGSEGV, SIGBUS, SIGABRT, SIGTERM}; 205 | #define SIGCT DIM1(cwSighandSig) 206 | static struct sigaction cwSighandOldact[SIGCT]; 207 | static int * cwSighandLastsigPtr = NULL; 208 | 209 | void cwSighandHandler(int signo, siginfo_t * siginfo, void * context) { 210 | void * array [32]; 211 | int i; 212 | size_t size; 213 | char * * strings; 214 | 215 | if (cwSighandLastsigPtr) *cwSighandLastsigPtr = signo; 216 | /* Print summary message */ 217 | if (signo == SIGSEGV || signo == SIGFPE || signo == SIGILL || signo == SIGBUS) { 218 | cwMsg("%s(%d) errno:%d code:%d address %p", 219 | strsignal(signo), signo, siginfo->si_errno, siginfo->si_code, 220 | siginfo->si_addr); 221 | } else { 222 | cwMsg("%s(%d) errno:%d code:%d", 223 | strsignal(signo), signo, siginfo->si_errno, siginfo->si_code); 224 | } 225 | 226 | if (signo == SIGTERM) { 227 | // For sigterm, just re-enable this handler and return 228 | struct sigaction handler; 229 | 230 | memset(&handler, 0, sizeof(handler)); 231 | handler.sa_sigaction = &cwSighandHandler; 232 | handler.sa_flags = SA_SIGINFO | SA_RESTART; 233 | sigaction(SIGTERM, &handler, NULL); 234 | } else { 235 | // For error signals, print stack trace, restore prior handler and re-raise signal 236 | 237 | /* Print stack trace */ 238 | size = backtrace(array, DIM1(array)); 239 | strings = backtrace_symbols(array, size); 240 | 241 | if (strings) { 242 | for (i=0; i<(int)size; i++) { 243 | if (strings[i]) { 244 | cwMsg("[%2d] %s", i, strings[i]); 245 | } 246 | } 247 | free(strings); 248 | } 249 | sleep(5); 250 | 251 | /* Restore original signal handlers and re-raise signal */ 252 | cwSighandTerm(); 253 | raise(signo); 254 | } 255 | } 256 | 257 | void cwSighandInit(int * lastsig_ptr) { 258 | int rc; 259 | unsigned i; 260 | struct sigaction handler; 261 | 262 | memset(&handler, 0, sizeof(handler)); 263 | handler.sa_sigaction = &cwSighandHandler; 264 | handler.sa_flags = SA_SIGINFO | SA_RESTART; 265 | 266 | for (i=0; i short hostname 52 | // $A --> CPU affinity 53 | // $R --> MPI rank (-DCW_MPI only) 54 | // 55 | // If tsFormat and/or tagFormat are null, the current value will not be changed. 56 | // If either is an empty string, it will not be prepended to the message. 57 | // 58 | // If cwMsgSetTag has not been called, the default message tags are: 59 | // "%Y%m%d_%H:%M:%S" and "$H.$A". 60 | // 61 | // The $R tag should not be used until after MPI_Init() has been called. 62 | //----------------------------------------------------------------------------- 63 | void cwMsgSetTag(char * tsFormat, char * tagFormat, ...); 64 | 65 | //----------------------------------------------------------------------------- 66 | // Enable signal handlers that will print a stack backtrace if an error 67 | // signal occurs. If lastsig_ptr is not NULL, whenever a signal occurs, 68 | // the indicated int will be set to the signal number. This can be used to 69 | // detect termination signals. Not available on SPU. 70 | //----------------------------------------------------------------------------- 71 | void cwSighandInit(int * lastsig_ptr); 72 | void cwSighandTerm(void); 73 | 74 | 75 | //---------------------------------------------------------------------------- 76 | // Convenience macros 77 | //---------------------------------------------------------------------------- 78 | // 1st, 2nd and 3rd dimensions of declared arrays 79 | #define DIM1(array) ( sizeof(array) / sizeof(array[0]) ) 80 | #define DIM2(array) ( sizeof(array[0]) / sizeof(array[0][0]) ) 81 | #define DIM3(array) ( sizeof(array[0][0]) / sizeof(array[0][0][0]) ) 82 | // Round up or down to a power of 2 boundry 83 | #define BDYUP(x, pwr2) ((unsigned)((x)+(pwr2)-1) & ~((unsigned)(pwr2)-1)) 84 | #define BDYDN(x, pwr2) ((unsigned)(x) & ~((unsigned)(pwr2)-1)) 85 | // Min and Max 86 | #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) 87 | #define MAX(a,b) ( (a) > (b) ? (a) : (b) ) 88 | #endif 89 | 90 | // --- end of cw_util.h --- 91 | -------------------------------------------------------------------------------- /cell/ppu/Makefile.am: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2012 Los Alamos National Security, LLC. 3 | # All rights reserved. 4 | # 5 | # This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | # National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | # Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | # and Los Alamos National Security, LLC. Permission is granted to the public to 9 | # copy and use this software without charge, provided that this Notice and any 10 | # statement of authorship are reproduced on all copies. Neither the U.S. 11 | # Government nor LANS makes any warranty, express or implied, or assumes any 12 | # liability or responsibility for the use of this software. 13 | ################################################################################ 14 | 15 | my_ppubindir = $(bindir)/cell/ppu 16 | 17 | my_ppubin_PROGRAMS = 18 | 19 | if SMGC_BUILD_CELL 20 | my_ppubin_PROGRAMS += cell_check_ppu 21 | endif 22 | 23 | CC=ppu-gcc 24 | 25 | cell_check_ppu_SOURCES = \ 26 | cell_check_ppu.c cell_check.h cell_check_internal.h cw_util.h cw_util.c 27 | 28 | cell_check_ppu_LDFLAGS = \ 29 | -L/opt/cell/sysroot/usr/lib64/dacs/debug 30 | 31 | cell_check_ppu_LDADD = \ 32 | -ldacs_hybrid -lstdc++ 33 | -------------------------------------------------------------------------------- /cell/ppu/cell_check.h: -------------------------------------------------------------------------------- 1 | ../cell_check.h -------------------------------------------------------------------------------- /cell/ppu/cell_check_internal.h: -------------------------------------------------------------------------------- 1 | ../cell_check_internal.h -------------------------------------------------------------------------------- /cell/ppu/cell_check_ppu.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2010-2011 Los Alamos National Security, LLC. 3 | * All rights reserved. 4 | * 5 | * This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | * National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | * Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | * and Los Alamos National Security, LLC. Permission is granted to the public to 9 | * copy and use this software without charge, provided that this Notice and any 10 | * statement of authorship are reproduced on all copies. Neither the U.S. 11 | * Government nor LANS makes any warranty, express or implied, or assumes any 12 | * liability or responsibility for the use of this software. 13 | */ 14 | 15 | /***************************************************************************** 16 | * cell_check_ppu.c - Roadrunner Cell connectivity check out routine 17 | * 18 | * Perform a series of basic tests to ensure that Cells can be reserved, 19 | * PPU programs launched and basic message function. 20 | * 21 | * Change History: 22 | * 23 | * 2010-09-16 cgw Initial version 24 | * 2010-09-20 cgw Clean up failure path 25 | * 26 | *****************************************************************************/ 27 | #include 28 | #include "cell_check.h" 29 | #include "cell_check_internal.h" 30 | 31 | int main(int argc, char * * argv) { 32 | dacs_wid_t wid; 33 | uint32_t msgR, msgS; 34 | int init_ok = 0, wid_reserve_ok = 0; 35 | 36 | cwMsgSetTag(NULL, "cell_check@$H[$A] "); 37 | cwSighandInit(NULL); 38 | g.version = 11; 39 | g.fail = 0; 40 | g.msg_level = CELL_CHECK_MSG_DEBUG; 41 | g.test_level = CELL_CHECK_TEST_INIT; 42 | 43 | if (argc >= 2) g.msg_level = strtol(argv[1], NULL, 10); 44 | if (argc >= 3) g.test_level = strtol(argv[2], NULL, 10); 45 | 46 | // Initialize the runtime 47 | DACS( 51, 1, dacs_init, (DACS_INIT_FLAGS_NONE) ); 48 | init_ok = !g.fail; 49 | 50 | if (g.test_level >= CELL_CHECK_TEST_SENDREC) { 51 | // Receive a message containing a single uint32_t 52 | if (! g.fail) { 53 | DACS( 52, 1, dacs_wid_reserve, (&wid) ); 54 | wid_reserve_ok = !g.fail; 55 | } 56 | if (! g.fail) { 57 | DACS( 52, 2, dacs_recv, (&msgR, sizeof(msgR), DACS_DE_PARENT, DACS_PID_PARENT, 58 | DACS_STREAM_ALL, wid, DACS_BYTE_SWAP_WORD) ); 59 | } 60 | if (! g.fail) { 61 | DACS( 52, 3, dacs_wait, (wid) ); 62 | } 63 | // Send the uint32_t back, incremented by 1 64 | if (! g.fail) { 65 | msgS = msgR + 1; 66 | DACS( 52, 4, dacs_send, (&msgS, sizeof(msgS), DACS_DE_PARENT, DACS_PID_PARENT, 67 | 0, wid, DACS_BYTE_SWAP_WORD) ); 68 | } 69 | if (! g.fail) { 70 | DACS( 52, 5, dacs_wait, (wid) ); 71 | } 72 | if (wid_reserve_ok) { 73 | DACS( 52, 6, dacs_wid_release, (&wid) ); 74 | } 75 | } // if (g.test_level >= CELL_CHECK_TEST_SENDREC) 76 | 77 | if (init_ok) { 78 | DACS( 53, 1, dacs_mailbox_write, (&g.fail, DACS_DE_PARENT, DACS_PID_PARENT) ); 79 | DACS( 53, 2, dacs_exit, () ); 80 | } 81 | 82 | cwSighandTerm(); 83 | return g.fail; 84 | }; 85 | 86 | /* --- end of cell_check_ppu.c --- */ 87 | -------------------------------------------------------------------------------- /cell/ppu/cw_util.c: -------------------------------------------------------------------------------- 1 | ../cw_util.c -------------------------------------------------------------------------------- /cell/ppu/cw_util.h: -------------------------------------------------------------------------------- 1 | ../cw_util.h -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2011-2012 Los Alamos National Security, LLC. 3 | # All rights reserved. 4 | # 5 | # This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | # National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | # Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | # and Los Alamos National Security, LLC. Permission is granted to the public to 9 | # copy and use this software without charge, provided that this Notice and any 10 | # statement of authorship are reproduced on all copies. Neither the U.S. 11 | # Government nor LANS makes any warranty, express or implied, or assumes any 12 | # liability or responsibility for the use of this software. 13 | ################################################################################ 14 | 15 | AC_PREREQ([2.60]) 16 | 17 | AC_INIT([supermagic], [1.3-dev], [samuel@lanl.gov]) 18 | 19 | AC_USE_SYSTEM_EXTENSIONS 20 | 21 | dnl check if AM_SILENT_RULES are supported 22 | dnl if so, build using AM_SILENT_RULES 23 | m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) 24 | m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) 25 | 26 | dnl note: -Wall -Werror are automake options. they are NOT compiler flags 27 | AM_INIT_AUTOMAKE([-Wall -Werror foreign]) 28 | 29 | AM_CONFIG_HEADER([config.h]) 30 | 31 | dnl checks for programs. 32 | dnl note that we are checking for mpicc first, the next check will verify CC 33 | AC_PROG_CC([mpicc cc]) 34 | 35 | AM_PROG_CC_C_O 36 | AC_PROG_CC_C99 37 | 38 | AC_PROG_LIBTOOL 39 | AC_CONFIG_MACRO_DIR([m4]) 40 | 41 | dnl check for adequate mpi support 42 | AC_MSG_CHECKING([if $CC can compile MPI applications]) 43 | AC_LINK_IFELSE([ 44 | AC_LANG_PROGRAM( 45 | [[#include ]], 46 | [[MPI_Finalize();]])], 47 | [AC_MSG_RESULT([yes])], 48 | [AC_MSG_RESULT([no]) 49 | AC_MSG_ERROR([$CC cannot compile MPI applications. cannot continue.])] 50 | ) 51 | 52 | dnl checks for libraries. 53 | SMGC_CELL 54 | 55 | dnl checks for header files. 56 | AC_CHECK_HEADERS([\ 57 | inttypes.h limits.h stdint.h stdlib.h string.h unistd.h \ 58 | getopt.h time.h string.h fcntl.h limits.h arpa/inet.h netdb.h \ 59 | sys/time.h stdint.h stdio.h errno.h stdbool.h signal.h]) 60 | 61 | dnl checks for typedefs, structures, and compiler characteristics. 62 | AC_TYPE_PID_T 63 | AC_TYPE_SIZE_T 64 | 65 | dnl checks for library functions. 66 | AC_FUNC_MALLOC 67 | AC_CHECK_FUNCS([memset strerror strtoul]) 68 | 69 | AC_CONFIG_FILES([Makefile 70 | cell/Makefile 71 | cell/ppu/Makefile]) 72 | 73 | AC_OUTPUT 74 | 75 | dnl let the user know the configuration 76 | 77 | cat << EOF 78 | 79 | supermagic configuration 80 | ######################## 81 | 82 | CC : $CC 83 | CFLAGS : $CFLAGS 84 | LDFLAGS : $LDFLAGS 85 | LIBS : $LIBS 86 | CPPFLAGS: $CPPFLAGS 87 | CPP : $CPP 88 | 89 | EOF 90 | -------------------------------------------------------------------------------- /m4/smgc-cell.m4: -------------------------------------------------------------------------------- 1 | dnl 2 | dnl Copyright (c) 2012 Los Alamos National Security, LLC. 3 | dnl All rights reserved. 4 | dnl 5 | dnl This program was prepared by Los Alamos National Security, LLC at Los 6 | dnl Alamos National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with 7 | dnl the U.S. Department of Energy (DOE). All rights in the program are 8 | dnl reserved by the DOE and Los Alamos National Security, LLC. Permission is 9 | dnl granted to the public to copy and use this software without charge, 10 | dnl provided that this Notice and any statement of authorship are reproduced on 11 | dnl all copies. Neither the U.S. Government nor LANS makes any warranty, 12 | dnl express or implied, or assumes any liability or responsibility for the use 13 | dnl of this software. 14 | dnl 15 | 16 | dnl cell support configury 17 | 18 | AC_DEFUN([SMGC_CELL], [ 19 | AC_ARG_ENABLE([cell], 20 | [AS_HELP_STRING([--enable-cell], 21 | [Enable Cell support (default:disabled)])]) 22 | 23 | AC_MSG_CHECKING([if want cell support]) 24 | 25 | if test "x$enable_cell" = "xyes"; then 26 | AC_MSG_RESULT([yes]) 27 | dnl now check if we have adequate support 28 | AC_PATH_PROG([PPUGCC], [ppu-gcc]) 29 | if test -z "$PPUGCC"; then 30 | enable_cell_support=0 31 | AC_MSG_ERROR([Cell support requested, but not available.]) 32 | else 33 | enable_cell_support=1 34 | fi 35 | else 36 | AC_MSG_RESULT([no]) 37 | enable_cell_support=0 38 | fi 39 | 40 | AM_CONDITIONAL([SMGC_BUILD_CELL], [test "x$enable_cell_support" = "x1"]) 41 | 42 | AC_DEFINE_UNQUOTED([SMGC_HAVE_CELL_SUPPORT], 43 | [$enable_cell_support], 44 | [Define to 1 if have Cell support.]) 45 | ])dnl 46 | -------------------------------------------------------------------------------- /supermagic.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2010-2022 Los Alamos National Security, LLC. 3 | * All rights reserved. 4 | * 5 | * This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | * National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | * Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | * and Los Alamos National Security, LLC. Permission is granted to the public to 9 | * copy and use this software without charge, provided that this Notice and any 10 | * statement of authorship are reproduced on all copies. Neither the U.S. 11 | * Government nor LANS makes any warranty, express or implied, or assumes any 12 | * liability or responsibility for the use of this software. 13 | */ 14 | 15 | /** 16 | * @author Samuel K. Gutierrez - samuelREMOVEME@lanl.gov 17 | * found a bug? have an idea? please let me know. 18 | */ 19 | 20 | #include "supermagic.h" 21 | 22 | /* ////////////////////////////////////////////////////////////////////////// */ 23 | /* ////////////////////////////////////////////////////////////////////////// */ 24 | /* private utility functions */ 25 | /* ////////////////////////////////////////////////////////////////////////// */ 26 | /* ////////////////////////////////////////////////////////////////////////// */ 27 | 28 | #if 0 29 | /* ////////////////////////////////////////////////////////////////////////// */ 30 | static int 31 | qsort_cmp_uli(const void *p1, 32 | const void *p2) 33 | { 34 | return (*(unsigned long int *)p1 - *(unsigned long int *)p2); 35 | } 36 | 37 | /* ////////////////////////////////////////////////////////////////////////// */ 38 | static int 39 | get_my_global_color(bool used_cached, int *out_color) 40 | { 41 | unsigned long int *net_nums = NULL; 42 | int mpi_rc = MPI_ERR_OTHER, rc = SMGC_ERROR; 43 | int i = 0, node_i = 0; 44 | unsigned long int prev_num; 45 | 46 | if (NULL == out_color) { 47 | return SMGC_ERROR; 48 | } 49 | 50 | /* do we have to figure this out - or can we use a cached value? */ 51 | if (SMGC_COLOR_INVALID != my_color && used_cached) { 52 | *out_color = my_color; 53 | return SMGC_SUCCESS; 54 | } 55 | 56 | /* if we are here, then we have to do some work :-( */ 57 | net_nums = (unsigned long int *)calloc(num_ranks, 58 | sizeof(unsigned long int)); 59 | if (NULL == net_nums) { 60 | SMGC_ERR_MSG("out of resources\n"); 61 | return SMGC_ERROR; 62 | } 63 | if (SMGC_SUCCESS != get_net_num(host_name_buff, &my_net_num)) { 64 | SMGC_ERR_MSG("get_net_num failure\n"); 65 | goto out; 66 | } 67 | 68 | SMGC_MPF(" mpi_comm_world: exchanging network infomation\n" 69 | " mpi_allgather buffer size: %lu B\n", 70 | (num_ranks * sizeof(unsigned long int))); 71 | 72 | mpi_rc = MPI_Allgather(&my_net_num, 1, MPI_UNSIGNED_LONG, net_nums, 1, 73 | MPI_UNSIGNED_LONG, MPI_COMM_WORLD); 74 | SMGC_MPICHK(mpi_rc, out); 75 | 76 | qsort(net_nums, (size_t)num_ranks, sizeof(unsigned long int), 77 | qsort_cmp_uli); 78 | 79 | prev_num = net_nums[i++]; 80 | 81 | while (i < num_ranks && prev_num != my_net_num) { 82 | while (net_nums[i] == prev_num) { 83 | ++i; 84 | } 85 | ++node_i; 86 | prev_num = net_nums[i]; 87 | } 88 | 89 | *out_color = node_i; 90 | rc = SMGC_SUCCESS; 91 | 92 | out: 93 | if (NULL != net_nums) free(net_nums); 94 | return rc; 95 | } 96 | 97 | /* ////////////////////////////////////////////////////////////////////////// */ 98 | static int 99 | get_net_num(const char *target_hostname, 100 | unsigned long int *out_net_num) 101 | { 102 | struct hostent *host = NULL; 103 | 104 | if (NULL == target_hostname || NULL == out_net_num) { 105 | return SMGC_ERROR; 106 | } 107 | 108 | if (NULL == (host = gethostbyname(target_hostname))) { 109 | SMGC_ERR_MSG("gethostbyname error\n"); 110 | /* epic fail! */ 111 | return SMGC_ERROR; 112 | } 113 | 114 | /* htonl used here for good measure - probably not needed */ 115 | *out_net_num = (unsigned long int) 116 | htonl(inet_network(inet_ntoa(*(struct in_addr *)host->h_addr))); 117 | 118 | return SMGC_SUCCESS; 119 | } 120 | #endif 121 | 122 | /* ////////////////////////////////////////////////////////////////////////// */ 123 | static int 124 | get_mult(char symbol, int *resp) 125 | { 126 | *resp = -42; 127 | switch (symbol) { 128 | case 'B': 129 | *resp = 1; 130 | break; 131 | case 'k': 132 | *resp = 1024; 133 | break; 134 | case 'M': 135 | *resp = 1048576; 136 | break; 137 | case 'G': 138 | *resp = 1073741824; 139 | break; 140 | default: 141 | SMGC_ERR_MSG("\'%c\' is not a supported message size option.\n", 142 | symbol); 143 | /* fail */ 144 | return SMGC_ERROR; 145 | /* never reached */ 146 | break; 147 | } 148 | return SMGC_SUCCESS; 149 | } 150 | 151 | /* ////////////////////////////////////////////////////////////////////////// */ 152 | static int 153 | get_msg_size(const char *str, const char *label, int *real_msg_size) 154 | { 155 | long unit_size = -1; 156 | /* default multiplier is 1 (B) */ 157 | int mult = 1; 158 | char *end_ptr = NULL; 159 | 160 | errno = 0; 161 | unit_size = strtol(str, &end_ptr, 10); 162 | 163 | /* was there an error */ 164 | if ((EINVAL == errno && 0 == unit_size) || 165 | (ERANGE == errno && (LONG_MIN == unit_size || LONG_MAX == unit_size))) { 166 | int err = errno; 167 | SMGC_ERR_MSG("strtol error: %d (%s)\n", err, strerror(err)); 168 | return SMGC_ERROR; 169 | } 170 | /* catch negative message sizes */ 171 | else if (unit_size < 0) { 172 | SMGC_ERR_MSG("negative %s sizes are not supported.\n", label); 173 | return SMGC_ERROR; 174 | } 175 | /* all is well with the value returned by strtol */ 176 | else { 177 | /* big hammer */ 178 | uint64_t us = 0, m = 0, tmp = 0; 179 | /* a multiplier was provided by the user */ 180 | if ('\0' != *end_ptr) { 181 | /* get the multiplier */ 182 | if (SMGC_ERROR == get_mult(end_ptr[0], &mult)) { 183 | /* fail */ 184 | return SMGC_ERROR; 185 | } 186 | } 187 | us = (uint64_t)unit_size; 188 | m = (uint64_t)mult; 189 | /* what is the real message size (B) */ 190 | tmp = (us * m); 191 | if (tmp > INT_MAX || tmp < us || tmp < m) { 192 | SMGC_ERR_MSG("requested %s size is too large.\n", label); 193 | return SMGC_ERROR; 194 | } 195 | else { 196 | *real_msg_size = (int)tmp; 197 | } 198 | return SMGC_SUCCESS; 199 | } 200 | /* never reached */ 201 | return SMGC_ERROR; 202 | } 203 | 204 | /* ////////////////////////////////////////////////////////////////////////// */ 205 | /** 206 | * returns number of tests within test suite pointed to by test_suite_ptr 207 | */ 208 | static int 209 | get_num_tests(smgc_test_t *test_suite_ptr) 210 | { 211 | int i; 212 | for (i = 0; i < SMGC_MAX_TESTS; ++i) { 213 | if (NULL == test_suite_ptr[i].tname || NULL == test_suite_ptr[i].tfp) { 214 | break; 215 | } 216 | } 217 | return i; 218 | } 219 | 220 | /* ////////////////////////////////////////////////////////////////////////// */ 221 | /** 222 | * prints all available tests 223 | */ 224 | static void 225 | list_all_tests(void) 226 | { 227 | int i, num_tests = get_num_tests(smgc_all_tests); 228 | 229 | SMGC_MPF("available tests:\n"); 230 | for (i = 0; i < num_tests; ++i) { 231 | SMGC_MPF(" %s\n", smgc_all_tests[i].tname); 232 | } 233 | SMGC_MPF("\n"); 234 | } 235 | 236 | /* ////////////////////////////////////////////////////////////////////////// */ 237 | /** 238 | * prints usage information 239 | */ 240 | static void 241 | usage(void) 242 | { 243 | SMGC_MPF("\n%s\n", SMGC_USAGE); 244 | list_all_tests(); 245 | SMGC_MPF("%s\n", SMGC_EXAMPLE); 246 | } 247 | 248 | /* ////////////////////////////////////////////////////////////////////////// */ 249 | static bool 250 | is_valid_test(char *test_name, int *test_index) 251 | { 252 | int i; 253 | 254 | for (i = 0; NULL != smgc_all_tests[i].tname; ++i) { 255 | if (0 == strcmp(test_name, smgc_all_tests[i].tname)) { 256 | if (NULL != test_index) { 257 | *test_index = i; 258 | } 259 | return true; 260 | } 261 | } 262 | 263 | /* if test_name is not valid, *test_index is undefined */ 264 | return false; 265 | } 266 | 267 | /* ////////////////////////////////////////////////////////////////////////// */ 268 | static int 269 | create_test_list(const char *test_list_str, 270 | smgc_test_t **smgc_custom_jb_test_ptr) 271 | { 272 | char *test_string = NULL; 273 | char *last = NULL; 274 | char *tmp_list = NULL; 275 | int num_tests = 0; 276 | int test_index = 0; 277 | smgc_test_t *tmp_ptr = *smgc_custom_jb_test_ptr; 278 | 279 | if (tests_on_heap) { 280 | if (NULL != smgc_test_ptr) { 281 | free(smgc_test_ptr); 282 | smgc_test_ptr = NULL; 283 | } 284 | } 285 | 286 | if (NULL == (tmp_list = strdup(test_list_str))) { 287 | SMGC_ERR_MSG("out of resources\n"); 288 | return SMGC_ERROR; 289 | } 290 | else if (NULL == (tmp_ptr = (smgc_test_t *)malloc(sizeof(smgc_test_t) * 291 | SMGC_MAX_TESTS))) { 292 | free(tmp_list); 293 | SMGC_ERR_MSG("out of resources\n"); 294 | return SMGC_ERROR; 295 | } 296 | 297 | /* if we are here, then let the games begin */ 298 | 299 | for (test_string = strtok_r(tmp_list, ",", &last); 300 | NULL != test_string && SMGC_MAX_TESTS >= num_tests; 301 | test_string = strtok_r(NULL, ",", &last)) { 302 | if (is_valid_test(test_string, &test_index)) { 303 | tmp_ptr[num_tests++] = smgc_all_tests[test_index]; 304 | } 305 | } 306 | 307 | /* cap with NULLs */ 308 | tmp_ptr[num_tests].tname = NULL; 309 | tmp_ptr[num_tests].tfp = NULL; 310 | 311 | /* update the test suite */ 312 | upd_test_suite(tmp_ptr); 313 | tests_on_heap = true; 314 | return SMGC_SUCCESS; 315 | } 316 | 317 | /* ////////////////////////////////////////////////////////////////////////// */ 318 | static char * 319 | get_time_str(time_t *raw_time) 320 | { 321 | char tsb[SMGC_MAX_TIME_LEN]; 322 | struct tm *bd_time_ptr = NULL; 323 | 324 | time(raw_time); 325 | bd_time_ptr = localtime(raw_time); 326 | 327 | strftime(tsb, SMGC_MAX_TIME_LEN - 1, SMGC_DATE_FORMAT, bd_time_ptr); 328 | /* caller is responsible for freeing returned resources */ 329 | return strdup(tsb); 330 | } 331 | 332 | /* ////////////////////////////////////////////////////////////////////////// */ 333 | static char * 334 | get_rhn(int rank) 335 | { 336 | if (NULL != rhname_lut_ptr) { 337 | return &(rhname_lut_ptr[rank * SMGC_HOST_NAME_MAX]); 338 | } 339 | else { 340 | return rhn_unknown; 341 | } 342 | } 343 | 344 | /* ////////////////////////////////////////////////////////////////////////// */ 345 | static void 346 | upd_test_suite(smgc_test_t *new_test_suite_ptr) 347 | { 348 | smgc_test_ptr = new_test_suite_ptr; 349 | num_tests = get_num_tests(smgc_test_ptr); 350 | } 351 | 352 | /* ////////////////////////////////////////////////////////////////////////// */ 353 | static void 354 | set_jb_params() 355 | { 356 | if (num_ranks >= SMGC_LRG_JB) { 357 | msg_size = SMGC_LRG_JB_MSG_SIZE; 358 | be_verbose = false; 359 | be_quiet = true; 360 | upd_test_suite(smgc_lrg_jb_tests); 361 | } 362 | else { 363 | upd_test_suite(smgc_small_jb_tests); 364 | } 365 | } 366 | 367 | /* ////////////////////////////////////////////////////////////////////////// */ 368 | static int 369 | io_stats(double_int_t in_dint, char *label, int unit_type) 370 | { 371 | char *unit = NULL; 372 | double val = in_dint.val; 373 | double sum = 0.0; 374 | double_int_t max = {0.0, 0}, min = {0.0, 0}; 375 | int mpi_ret_code = MPI_ERR_OTHER; 376 | 377 | switch (unit_type) { 378 | case IO_STATS_TIME_S: 379 | unit = SMGC_TIME_S_UNIT_STR; 380 | break; 381 | case IO_STATS_MBS: 382 | unit = SMGC_MBS_UNIT_STR; 383 | break; 384 | default: 385 | SMGC_ERR_MSG("io_stats::unknow unit_type\n"); 386 | goto err; 387 | /* never reached */ 388 | break; 389 | } 390 | 391 | mpi_ret_code = MPI_Reduce(&in_dint, &max, 1, MPI_DOUBLE_INT, MPI_MAXLOC, 392 | SMGC_MASTER_RANK, MPI_COMM_WORLD); 393 | SMGC_MPICHK(mpi_ret_code, err); 394 | 395 | mpi_ret_code = MPI_Reduce(&in_dint, &min, 1, MPI_DOUBLE_INT, MPI_MINLOC, 396 | SMGC_MASTER_RANK, MPI_COMM_WORLD); 397 | SMGC_MPICHK(mpi_ret_code, err); 398 | 399 | mpi_ret_code = MPI_Reduce(&val, &sum, 1, MPI_DOUBLE, MPI_SUM, 400 | SMGC_MASTER_RANK, MPI_COMM_WORLD); 401 | SMGC_MPICHK(mpi_ret_code, err); 402 | 403 | SMGC_MPF(" --- %s:\n", label); 404 | SMGC_MPF(" max rank: %06d (%s)\n", max.rank, get_rhn(max.rank)); 405 | SMGC_MPF(" max %s: %.3f %s\n", label, max.val, unit); 406 | SMGC_MPF(" min rank: %06d (%s)\n", min.rank, get_rhn(min.rank)); 407 | SMGC_MPF(" min %s: %.3f %s\n", label, min.val, unit); 408 | SMGC_MPF(" ave %s: %.3f %s\n", label, sum / num_ranks, unit); 409 | SMGC_MPF(" aggregate %s: %.3f %s\n", label, sum, unit); 410 | 411 | return SMGC_SUCCESS; 412 | err: 413 | return SMGC_ERROR; 414 | } 415 | 416 | /* ////////////////////////////////////////////////////////////////////////// */ 417 | /* ////////////////////////////////////////////////////////////////////////// */ 418 | /* test functions */ 419 | /* ////////////////////////////////////////////////////////////////////////// */ 420 | /* ////////////////////////////////////////////////////////////////////////// */ 421 | 422 | /* ////////////////////////////////////////////////////////////////////////// */ 423 | /* test by request only */ 424 | static int 425 | n_to_n_io(void) 426 | { 427 | int i = 0, j = 0, fd = -1, rc = SMGC_ERROR, mpi_rc = MPI_ERR_OTHER; 428 | /* what we are going to write and what we should read, buff rest char */ 429 | char wr_char = 'j', clobber_char = 'x'; 430 | ssize_t bytes_written = -1, bytes_read = -1; 431 | /* the size of the file that i'll be writing (in B) */ 432 | size_t buff_size = file_size; 433 | char *my_file_name = NULL; 434 | /* points to buffer used for both reading and writing */ 435 | char *buff = NULL; 436 | /* variables for recording time */ 437 | double open_time = 0.0, close_time = 0.0, write_start = 0.0, 438 | write_fin = 0.0; 439 | double read_start = 0.0, read_fin = 0.0, lseek_start = 0.0, lseek_fin = 0.0, 440 | effe_bw_time_fix = 0.0, memset_start = 0.0, memset_fin = 0.0, 441 | tmp_dbl = 0.0; 442 | /* bandwidth variables */ 443 | double effe_bw = 0.0, read_bw = 0.0, write_bw = 0.0; 444 | /* for reduce operations that find max and min rank */ 445 | double_int_t in_effe = {0.0, 0}; 446 | double_int_t in_wr = {0.0, 0}; 447 | double_int_t in_rd = {0.0, 0}; 448 | 449 | /* no work to do, return success and move on */ 450 | if (0 == num_fs_test_paths) { 451 | SMGC_MPF(" zero paths requested via -w option. skipping test.\n"); 452 | return SMGC_SUCCESS; 453 | } 454 | 455 | /* if we are here, let the games begin! */ 456 | 457 | if (NULL == (buff = (char *)malloc(buff_size * sizeof(char)))) { 458 | SMGC_ERR_MSG("out of resources\n"); 459 | return SMGC_ERROR; 460 | } 461 | memset(buff, wr_char, buff_size); 462 | 463 | SMGC_MPF(" file size (per rank process): %lu B\n", buff_size); 464 | 465 | /* write to all requested paths */ 466 | for (i = 0; i < num_fs_test_paths; ++i) { 467 | if (-1 == asprintf(&my_file_name, "%s/%s_%d", fs_test_list[i], 468 | SMGC_MPI_FILE_NAME, my_rank)) { 469 | SMGC_ERR_MSG("out of resources\n"); 470 | goto out; 471 | } 472 | if (NULL == my_file_name) { 473 | SMGC_ERR_MSG("out of resources\n"); 474 | goto out; 475 | } 476 | 477 | /* let the user know we are working on it */ 478 | SMGC_MPF(" === mpi_comm_world: writing to %s\n", fs_test_list[i]); 479 | 480 | /* barrier before we start */ 481 | mpi_rc = MPI_Barrier(MPI_COMM_WORLD); 482 | SMGC_MPICHK(mpi_rc, out); 483 | 484 | open_time = MPI_Wtime(); 485 | if (-1 == (fd = open(my_file_name, O_CREAT | O_RDWR, 0600))) { 486 | int error = errno; 487 | SMGC_ERR_MSG("open failed with errno: %d (%s)\n", error, 488 | strerror(error)); 489 | goto out; 490 | } 491 | 492 | write_start = MPI_Wtime(); 493 | if (-1 == (bytes_written = write(fd, buff, buff_size))) { 494 | int error = errno; 495 | SMGC_ERR_MSG("write failed with errno: %d (%s)\n", error, 496 | strerror(error)); 497 | goto out; 498 | } 499 | write_fin = MPI_Wtime(); 500 | 501 | /* seek to beginning of file - time this step so we can subtract the 502 | * time spent here from our bw calculations 503 | */ 504 | lseek_start = MPI_Wtime(); 505 | if(-1 == lseek(fd, 0, SEEK_SET)) { 506 | int error = errno; 507 | SMGC_ERR_MSG("lseek failed with errno: %d (%s)\n", error, 508 | strerror(error)); 509 | goto out; 510 | } 511 | lseek_fin = MPI_Wtime(); 512 | 513 | memset_start = MPI_Wtime(); 514 | /* overwrite buff's contents before read */ 515 | memset(buff, clobber_char, buff_size); 516 | memset_fin = MPI_Wtime(); 517 | 518 | read_start = MPI_Wtime(); 519 | if (-1 == (bytes_read = read(fd, buff, buff_size))) { 520 | int error = errno; 521 | SMGC_ERR_MSG("read failed with errno: %d (%s)\n", error, 522 | strerror(error)); 523 | goto out; 524 | } 525 | read_fin = MPI_Wtime(); 526 | 527 | if (0 != close(fd)) { 528 | int error = errno; 529 | SMGC_ERR_MSG("close failed with errno: %d (%s)\n", error, 530 | strerror(error)); 531 | goto out; 532 | } 533 | close_time = MPI_Wtime(); 534 | 535 | fd = -1; 536 | 537 | if (0 != unlink(my_file_name)) { 538 | int error = errno; 539 | SMGC_ERR_MSG("unlink failed with errno: %d (%s)\n", error, 540 | strerror(error)); 541 | goto out; 542 | } 543 | /* check integrity of read */ 544 | if (bytes_written != bytes_read) { 545 | SMGC_ERR_MSG("write/read mismatch. wrote %lu read %lu\n", 546 | bytes_written, bytes_read); 547 | goto out; 548 | } 549 | /* iterate over char buff - making certain all is well */ 550 | for (j = 0; j < bytes_written; ++j) { 551 | if (wr_char != buff[j]) { 552 | SMGC_ERR_MSG( 553 | "characters read do not match characters written!\n" 554 | ); 555 | goto out; 556 | } 557 | } 558 | 559 | /* subtract time not spent in benchmarked routines */ 560 | effe_bw_time_fix = (lseek_fin - lseek_start) + 561 | (memset_fin - memset_start); 562 | 563 | /* calculate bandwidths */ 564 | 565 | /* negative and 0 length file size protection provided by lower level */ 566 | /* zero value fixup - good enough for our purposes */ 567 | 568 | /* effective bandwidth */ 569 | if (0.0 >= (tmp_dbl = ((close_time - open_time) - effe_bw_time_fix))) { 570 | effe_bw = 0.0; 571 | } 572 | else { 573 | effe_bw = ((double)buff_size / tmp_dbl / (double)SMGC_MB_SIZE); 574 | } 575 | /* write bandwidth */ 576 | if (0.0 >= (tmp_dbl = (write_fin - write_start))) { 577 | write_bw = 0.0; 578 | } 579 | else { 580 | write_bw = ((double)buff_size / tmp_dbl / (double)SMGC_MB_SIZE); 581 | } 582 | /* read bandwidth */ 583 | if (0.0 >= (tmp_dbl = (read_fin - read_start))) { 584 | read_bw = 0.0; 585 | } 586 | else { 587 | read_bw = ((double)buff_size / tmp_dbl / (double)SMGC_MB_SIZE); 588 | } 589 | 590 | /* prepare values for reduce */ 591 | in_effe.val = effe_bw; 592 | in_effe.rank = my_rank; 593 | in_wr.val = write_bw; 594 | in_wr.rank = my_rank; 595 | in_rd.val = read_bw; 596 | in_rd.rank = my_rank; 597 | 598 | /* calculate effective bandwidth stats */ 599 | if (SMGC_SUCCESS != (rc = io_stats(in_effe, "effective write bandwidth", 600 | IO_STATS_MBS))) { 601 | goto out; 602 | } 603 | /* calculate write bandwidth stats */ 604 | if (SMGC_SUCCESS != (rc = io_stats(in_wr, "pure write bandwidth", 605 | IO_STATS_MBS))) { 606 | goto out; 607 | } 608 | /* calculate read bandwidth stats */ 609 | if (SMGC_SUCCESS != (rc = io_stats(in_rd, "pure read bandwidth", 610 | IO_STATS_MBS))) { 611 | goto out; 612 | } 613 | 614 | /* all is well for this iteration */ 615 | if (NULL != my_file_name) { 616 | free(my_file_name); 617 | my_file_name = NULL; 618 | } 619 | } 620 | 621 | /* all is well */ 622 | rc = SMGC_SUCCESS; 623 | out: 624 | if (-1 != fd) { 625 | close(fd); 626 | unlink(my_file_name); 627 | } 628 | if (NULL != buff) free(buff); 629 | if (NULL != my_file_name) free(my_file_name); 630 | return rc; 631 | } 632 | 633 | /* ////////////////////////////////////////////////////////////////////////// */ 634 | static int 635 | hello_world(void) 636 | { 637 | int res_len = 0; 638 | char name_buff[MPI_MAX_PROCESSOR_NAME + 1]; 639 | 640 | mpi_ret_code = MPI_Get_processor_name(name_buff, &res_len); 641 | SMGC_MPICHK(mpi_ret_code, err); 642 | 643 | SMGC_FPF(stdout, " hello from rank %06d (%s) of %06d\n", my_rank, 644 | name_buff, num_ranks); 645 | 646 | return SMGC_SUCCESS; 647 | err: 648 | return SMGC_ERROR; 649 | } 650 | 651 | /* ////////////////////////////////////////////////////////////////////////// */ 652 | static int 653 | stat_paths(void) 654 | { 655 | int i = 0; 656 | struct stat buff; 657 | /* nothing to do, so that's easy */ 658 | if (0 == num_stat_paths) { 659 | SMGC_MPF(" zero paths requested via -s option. skipping test.\n"); 660 | return SMGC_SUCCESS; 661 | } 662 | 663 | for (i = 0; i < num_stat_paths; ++i) { 664 | SMGC_MPF(" mpi_comm_world: stating %s\n", stat_list[i]); 665 | /* try to stat the file */ 666 | if (0 != stat(stat_list[i], &buff)) { 667 | SMGC_FPF(stderr, " !!! rank %d (%s) unable to stat %s\n", my_rank, 668 | host_name_buff, stat_list[i]); 669 | return SMGC_ERROR; 670 | } 671 | } 672 | return SMGC_SUCCESS; 673 | } 674 | 675 | /* ////////////////////////////////////////////////////////////////////////// */ 676 | static void 677 | kill_mpi_messaging(int sig) 678 | { 679 | (void)sig; 680 | /* no check in error path - hope for the best */ 681 | gethostname(host_name_buff, SMGC_HOST_NAME_MAX - 1); 682 | host_name_buff[SMGC_HOST_NAME_MAX - 1] = '\0'; 683 | 684 | fprintf(stderr, "\n########## HANG DETECTED " 685 | "[on loop iteration: %d] %d (%s) ==> %d (%s) ==> %d (%s) " 686 | "##########\n", 687 | glob_loop_iter, glob_l_neighbor, get_rhn(glob_l_neighbor), my_rank, 688 | host_name_buff, glob_r_neighbor, get_rhn(glob_r_neighbor)); 689 | 690 | exit(EXIT_FAILURE); 691 | } 692 | 693 | /* ////////////////////////////////////////////////////////////////////////// */ 694 | #if SMGC_HAVE_CELL_SUPPORT == 1 695 | 696 | /* in seconds */ 697 | #define CELL_TEST_TIMEOUT 480 698 | 699 | /* ////////////////////////////////////////////////////////////////////////// */ 700 | static void 701 | kill_cell_check(void) 702 | { 703 | /* no check in error path - hope for the best */ 704 | gethostname(host_name_buff, SMGC_HOST_NAME_MAX - 1); 705 | host_name_buff[SMGC_HOST_NAME_MAX - 1] = '\0'; 706 | 707 | SMGC_ERR_MSG( 708 | "rank %d (%s) unable to execute cell_sanity test within %d s.\n", 709 | my_rank, host_name_buff, CELL_TEST_TIMEOUT); 710 | 711 | exit(EXIT_FAILURE); 712 | } 713 | 714 | /* ////////////////////////////////////////////////////////////////////////// */ 715 | static int 716 | cell_sanity(void) 717 | { 718 | unsigned long rc = 0; 719 | struct itimerval itimer; 720 | 721 | itimer.it_value.tv_sec = CELL_TEST_TIMEOUT; 722 | itimer.it_value.tv_usec = 0; 723 | itimer.it_interval = itimer.it_value; 724 | 725 | SMGC_MPF(" mpi_comm_world: running cell diagnostics\n"); 726 | 727 | signal(SIGALRM, (void(*)(int))&kill_cell_check); 728 | setitimer(ITIMER_REAL, &itimer, NULL); 729 | 730 | if (0 != (rc = cell_check(CELL_CHECK_MSG_FAIL, CELL_CHECK_TEST_ALL))) { 731 | SMGC_ERR_MSG("cell_check failure - return code: %lu\n", rc); 732 | goto err; 733 | } 734 | else { 735 | /* all is well... disable the alarm */ 736 | itimer.it_value.tv_sec = 0; 737 | itimer.it_value.tv_usec = 0; 738 | itimer.it_interval = itimer.it_value; 739 | setitimer(ITIMER_REAL, &itimer, NULL); 740 | } 741 | 742 | return SMGC_SUCCESS; 743 | err: 744 | return SMGC_ERROR; 745 | } 746 | #endif /* SMGC_HAVE_CELL_SUPPORT */ 747 | 748 | /* ////////////////////////////////////////////////////////////////////////// */ 749 | static int 750 | mpi_io(void) 751 | { 752 | int mpi_ret_code = MPI_ERR_OTHER, num_elems = 0, i = 0, rc = SMGC_ERROR; 753 | /* access mode flags */ 754 | int amode = MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE; 755 | char *buff = NULL, path_buff[SMGC_PATH_MAX]; 756 | /* i/o time markers */ 757 | double effe_start = 0.0, effe_fin = 0.0; 758 | double write_start = 0.0, write_fin = 0.0, read_start = 0.0, read_fin = 0.0; 759 | /* bandwidth variables */ 760 | double effe_bw = 0.0, write_bw = 0.0, read_bw = 0.0, effe_bw_time_fix = 0.0, 761 | gete_start = 0.0, gete_fin = 0.0, gete_start2 = 0.0, gete_fin2 = 0.0; 762 | double tmp_dbl = 0.0; 763 | /* structs for min/max */ 764 | double_int_t in_wr = {0.0, 0}, in_rd = {0.0, 0}, in_effe = {0.0, 0}; 765 | /* file handle */ 766 | MPI_File mpi_fh; 767 | MPI_Status status; 768 | MPI_Offset offset = (MPI_Offset)(my_rank * file_size); 769 | 770 | /* no writing to do, so return SMGC_SUCCESS */ 771 | if (0 == num_fs_test_paths) { 772 | SMGC_MPF(" zero paths requested via -w option. skipping test.\n"); 773 | return SMGC_SUCCESS; 774 | } 775 | 776 | buff = (char *)malloc(file_size * sizeof(char)); 777 | SMGC_MEMCHK(buff, out); 778 | 779 | memset(buff, 'j', (size_t)(file_size * sizeof(char))); 780 | 781 | /* if we are here, then let the real work begin */ 782 | 783 | SMGC_MPF(" file size (per rank process): %lu B\n", file_size); 784 | 785 | for (i = 0; i < num_fs_test_paths; ++i) { 786 | int nw = snprintf(path_buff, SMGC_PATH_MAX, "%s/%s", fs_test_list[i], 787 | SMGC_MPI_FILE_NAME); 788 | if (nw >= SMGC_PATH_MAX) return SMGC_ERROR; 789 | 790 | SMGC_MPF(" === mpi_comm_world: writing to %s\n", fs_test_list[i]); 791 | 792 | /* barrier before we start each iteration */ 793 | mpi_ret_code = MPI_Barrier(MPI_COMM_WORLD); 794 | SMGC_MPICHK(mpi_ret_code, out); 795 | 796 | effe_start = MPI_Wtime(); 797 | mpi_ret_code = MPI_File_open(MPI_COMM_WORLD, path_buff, amode, 798 | MPI_INFO_NULL, &mpi_fh); 799 | SMGC_MPICHK(mpi_ret_code, out); 800 | 801 | mpi_ret_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_CHAR, 802 | MPI_CHAR, "native", MPI_INFO_NULL); 803 | SMGC_MPICHK(mpi_ret_code, out); 804 | 805 | write_start = MPI_Wtime(); 806 | mpi_ret_code = MPI_File_write_at(mpi_fh, offset, buff, 807 | file_size, MPI_CHAR, 808 | &status); 809 | SMGC_MPICHK(mpi_ret_code, out); 810 | write_fin = MPI_Wtime(); 811 | 812 | gete_start = MPI_Wtime(); 813 | mpi_ret_code = MPI_Get_elements(&status, MPI_CHAR, &num_elems); 814 | SMGC_MPICHK(mpi_ret_code, out); 815 | gete_fin = MPI_Wtime(); 816 | 817 | if ((sizeof(char) * file_size) != (size_t)num_elems) { 818 | SMGC_ERR_MSG("write size mismatch. wrote %lu requested %d\n", 819 | sizeof(char) * file_size, 820 | num_elems); 821 | goto out; 822 | } 823 | 824 | read_start = MPI_Wtime(); 825 | mpi_ret_code = MPI_File_read_at(mpi_fh, offset, buff, file_size, 826 | MPI_CHAR, &status); 827 | SMGC_MPICHK(mpi_ret_code, out); 828 | read_fin = MPI_Wtime(); 829 | 830 | gete_start2 = MPI_Wtime(); 831 | mpi_ret_code = MPI_Get_elements(&status, MPI_CHAR, &num_elems); 832 | SMGC_MPICHK(mpi_ret_code, out); 833 | gete_fin2 = MPI_Wtime(); 834 | 835 | mpi_ret_code = MPI_File_close(&mpi_fh); 836 | SMGC_MPICHK(mpi_ret_code, out); 837 | effe_fin = MPI_Wtime(); 838 | 839 | if ((sizeof(char) * file_size) != (size_t)num_elems) { 840 | SMGC_ERR_MSG("write/read mismatch. wrote %lu read %d\n", 841 | sizeof(char) * file_size, 842 | num_elems); 843 | goto out; 844 | } 845 | 846 | /* subtract time not spent in benchmarked routines */ 847 | effe_bw_time_fix = (gete_fin - gete_start) + 848 | (gete_fin2 - gete_start2); 849 | 850 | /* calculate bandwidths */ 851 | 852 | /* negative and 0 length file size protection provided by lower level */ 853 | /* zero value fixup - good enough for our purposes */ 854 | 855 | /* effective bandwidth */ 856 | if (0.0 >= (tmp_dbl = ((effe_fin - effe_start) - effe_bw_time_fix))) { 857 | effe_bw = 0.0; 858 | } 859 | else { 860 | effe_bw = ((double)file_size / tmp_dbl / (double)SMGC_MB_SIZE); 861 | } 862 | /* write bandwidth */ 863 | if (0.0 >= (tmp_dbl = (write_fin - write_start))) { 864 | write_bw = 0.0; 865 | } 866 | else { 867 | write_bw = ((double)file_size / tmp_dbl / (double)SMGC_MB_SIZE); 868 | } 869 | /* read bandwidth */ 870 | if (0.0 >= (tmp_dbl = (read_fin - read_start))) { 871 | read_bw = 0.0; 872 | } 873 | else { 874 | read_bw = ((double)file_size / tmp_dbl / (double)SMGC_MB_SIZE); 875 | } 876 | 877 | /* fill structs for calculating min/max */ 878 | in_effe.val = effe_bw; 879 | in_effe.rank = my_rank; 880 | in_wr.val = write_bw; 881 | in_wr.rank = my_rank; 882 | in_rd.val = read_bw; 883 | in_rd.rank = my_rank; 884 | 885 | /* calculate effective bandwidth stats */ 886 | if (SMGC_SUCCESS != io_stats(in_effe, "effective write bandwidth", 887 | IO_STATS_MBS)) { 888 | goto out; 889 | } 890 | /* calculate write bandwidth stats */ 891 | if (SMGC_SUCCESS != io_stats(in_wr, "pure write bandwidth", 892 | IO_STATS_MBS)) { 893 | goto out; 894 | } 895 | /* calculate read bandwidth stats */ 896 | if (SMGC_SUCCESS != io_stats(in_rd, "pure read bandwidth", 897 | IO_STATS_MBS)) { 898 | goto out; 899 | } 900 | } 901 | 902 | /* all is well, set rc accordingly */ 903 | rc = SMGC_SUCCESS; 904 | out: 905 | if (NULL != buff) free(buff); 906 | return rc; 907 | } 908 | 909 | #if 0 910 | /* ////////////////////////////////////////////////////////////////////////// */ 911 | static int 912 | host_info_exchange(void) 913 | { 914 | unsigned long int *net_nums = (unsigned long int *) 915 | calloc(num_ranks, sizeof(unsigned long int)); 916 | int rc = SMGC_ERROR; 917 | 918 | if (NULL == net_nums) { 919 | SMGC_ERR_MSG("out of resources\n"); 920 | return SMGC_ERROR; 921 | } 922 | if (SMGC_SUCCESS != get_my_global_color(true, &my_color)) { 923 | SMGC_ERR_MSG("get_my_global_color failure\n"); 924 | goto out; 925 | } 926 | 927 | out: 928 | if (NULL != net_nums) free(net_nums); 929 | return rc; 930 | } 931 | #endif 932 | 933 | /* ////////////////////////////////////////////////////////////////////////// */ 934 | static int 935 | hostname_exchange(void) 936 | { 937 | int buff_size = num_ranks * SMGC_HOST_NAME_MAX * sizeof(char); 938 | if (NULL == rhname_lut_ptr) { 939 | rhname_lut_ptr = (char *)calloc(buff_size, sizeof(char)); 940 | SMGC_MEMCHK(rhname_lut_ptr, error); 941 | } 942 | 943 | memset(rhname_lut_ptr, '\0', buff_size); 944 | 945 | SMGC_MPF(" mpi_comm_world: mpi_allgather buffer size: %d B\n", 946 | buff_size); 947 | SMGC_MPF(" mpi_comm_world: exchanging host name information\n"); 948 | 949 | /* populate the remote host name lookup table */ 950 | mpi_ret_code = MPI_Allgather(host_name_buff, SMGC_HOST_NAME_MAX, MPI_CHAR, 951 | rhname_lut_ptr, SMGC_HOST_NAME_MAX, MPI_CHAR, 952 | MPI_COMM_WORLD); 953 | SMGC_MPICHK(mpi_ret_code, error); 954 | 955 | return SMGC_SUCCESS; 956 | error: 957 | /* only free rhname_lut_ptr in error condition */ 958 | if (NULL != rhname_lut_ptr) { 959 | free(rhname_lut_ptr); 960 | rhname_lut_ptr = NULL; 961 | } 962 | return SMGC_ERROR; 963 | } 964 | 965 | /* ////////////////////////////////////////////////////////////////////////// */ 966 | static int 967 | small_allreduce_max(void) 968 | { 969 | double send_buff = (double)my_rank, recv_buff = 0.0; 970 | 971 | SMGC_MPF(" message size: %d B\n", (int)sizeof(double)); 972 | SMGC_MPF(" mpi_comm_world: mpi_allreducing\n"); 973 | 974 | mpi_ret_code = MPI_Allreduce(&send_buff, &recv_buff, 1, MPI_DOUBLE, 975 | MPI_MAX, MPI_COMM_WORLD); 976 | SMGC_MPICHK(mpi_ret_code, error); 977 | 978 | SMGC_MPF(" mpi_comm_world: verifying result\n"); 979 | 980 | /* yes, i do want to do it this way :-) */ 981 | if (recv_buff != (double)(num_ranks - 1)) { 982 | SMGC_ERR_MSG("invalid result detected\n"); 983 | goto error; 984 | } 985 | 986 | return SMGC_SUCCESS; 987 | error: 988 | return SMGC_ERROR; 989 | } 990 | 991 | /* ////////////////////////////////////////////////////////////////////////// */ 992 | /** 993 | * potentially a very synchronous all to root point-to-point implementation. 994 | */ 995 | static int 996 | large_all_to_root_ptp(void) 997 | { 998 | int buff_size = msg_size; 999 | int src_rank = 0, tag = 0; 1000 | int rc = SMGC_ERROR; 1001 | char *char_buff = NULL; 1002 | char *del = "\b\b\b\b\b\b\b\b\b\b\b\b\b"; 1003 | MPI_Status status; 1004 | 1005 | if (NULL == (char_buff = (char *)calloc(buff_size, sizeof(char)))) { 1006 | SMGC_ERR_MSG("out of resources\n"); 1007 | return SMGC_ERROR; 1008 | } 1009 | 1010 | SMGC_MPF(" message size: %d B\n", buff_size); 1011 | 1012 | /* if we are here, let the games begin */ 1013 | 1014 | if (SMGC_MASTER_RANK != my_rank) { 1015 | mpi_ret_code = MPI_Send(char_buff, buff_size, MPI_CHAR, 1016 | SMGC_MASTER_RANK, tag, MPI_COMM_WORLD); 1017 | SMGC_MPICHK(mpi_ret_code, out); 1018 | } 1019 | /* i am the root */ 1020 | else { 1021 | SMGC_MPF(" mpi_comm_world: mpi_sending to rank %d - ", 1022 | SMGC_MASTER_RANK); 1023 | for (src_rank = 0; src_rank < num_ranks; ++src_rank) { 1024 | if (my_rank != src_rank) { 1025 | SMGC_MPF("%s%06d/%06d%s", 1 == src_rank ? "" : del, src_rank, 1026 | (num_ranks - 1), 1027 | (num_ranks -1) == src_rank ? "\n" : ""); 1028 | 1029 | mpi_ret_code = MPI_Recv(char_buff, buff_size, MPI_CHAR, 1030 | src_rank, tag, MPI_COMM_WORLD, &status); 1031 | SMGC_MPICHK(mpi_ret_code, out); 1032 | } 1033 | } 1034 | } 1035 | /* we made it - rainbows and butterflies */ 1036 | rc = SMGC_SUCCESS; 1037 | out: 1038 | if (NULL != char_buff) { 1039 | free(char_buff); 1040 | } 1041 | return rc; 1042 | } 1043 | 1044 | /* ////////////////////////////////////////////////////////////////////////// */ 1045 | static int 1046 | root_bcast(void) 1047 | { 1048 | int buff_size = msg_size, rc = SMGC_ERROR; 1049 | char *char_buff = NULL; 1050 | 1051 | if (NULL == (char_buff = (char *)calloc(buff_size, sizeof(char)))) { 1052 | SMGC_ERR_MSG("out of resources\n"); 1053 | return SMGC_ERROR; 1054 | } 1055 | 1056 | SMGC_MPF(" message size: %d B\n", buff_size); 1057 | SMGC_MPF(" rank %06d (%s): broadcasting to mpi_comm_world\n", my_rank, 1058 | host_name_buff); 1059 | 1060 | mpi_ret_code = MPI_Bcast(char_buff, buff_size, MPI_CHAR, SMGC_MASTER_RANK, 1061 | MPI_COMM_WORLD); 1062 | SMGC_MPICHK(mpi_ret_code, out); 1063 | 1064 | /* if we are here, then all is well - note that fact */ 1065 | rc = SMGC_SUCCESS; 1066 | out: 1067 | if (NULL != char_buff) { 1068 | free(char_buff); 1069 | char_buff = NULL; 1070 | } 1071 | return rc; 1072 | } 1073 | 1074 | /* ////////////////////////////////////////////////////////////////////////// */ 1075 | static int 1076 | rand_root_bcast(void) 1077 | { 1078 | int i = 0, buff_size = msg_size, num_itrs = 8, next_bc_root = 0, 1079 | rc = SMGC_ERROR; 1080 | char *char_buff = NULL; 1081 | 1082 | if (NULL == (char_buff = (char *)calloc(buff_size, sizeof(char)))) { 1083 | SMGC_ERR_MSG("out of resources\n"); 1084 | return SMGC_ERROR; 1085 | } 1086 | 1087 | srand((int)time(NULL)); 1088 | 1089 | SMGC_MPF(" message size: %d B\n", msg_size); 1090 | 1091 | for (i = 0; i < num_itrs; ++i) { 1092 | /* let the master rank figure out the next "random" root */ 1093 | if (SMGC_MASTER_RANK == my_rank) { 1094 | next_bc_root = rand() % num_ranks; 1095 | SMGC_FPF(stdout, 1096 | " %06d (%s): broadcasting to mpi_comm_world\n", 1097 | next_bc_root, get_rhn(next_bc_root)); 1098 | } 1099 | /* let mpi_comm_world know about the next bcast root */ 1100 | mpi_ret_code = MPI_Bcast(&next_bc_root, 1, MPI_INT, SMGC_MASTER_RANK, 1101 | MPI_COMM_WORLD); 1102 | SMGC_MPICHK(mpi_ret_code, out); 1103 | 1104 | /* root broadcast! */ 1105 | mpi_ret_code = MPI_Bcast(char_buff, buff_size, MPI_CHAR, next_bc_root, 1106 | MPI_COMM_WORLD); 1107 | SMGC_MPICHK(mpi_ret_code, out); 1108 | } 1109 | /* success! */ 1110 | rc = SMGC_SUCCESS; 1111 | out: 1112 | if (NULL != char_buff) { 1113 | free(char_buff); 1114 | char_buff = NULL; 1115 | } 1116 | return rc; 1117 | } 1118 | 1119 | /* ////////////////////////////////////////////////////////////////////////// */ 1120 | static int 1121 | large_sendrecv_ring(void) 1122 | { 1123 | int i = 0, num_iters = 4, send_tag = 42, recv_tag = 42, 1124 | buff_size = msg_size, r_neighbor = 0, l_neighbor = 0; 1125 | char *send_char_buff = NULL, *recv_char_buff = NULL; 1126 | MPI_Status status; 1127 | 1128 | r_neighbor = (my_rank + 1) % num_ranks; 1129 | l_neighbor = my_rank - 1; 1130 | 1131 | if (l_neighbor < 0) { 1132 | l_neighbor = num_ranks - 1; 1133 | } 1134 | 1135 | SMGC_MPF(" message size: %d B\n", buff_size); 1136 | 1137 | for (i = 0; i < num_iters; ++i) { 1138 | send_char_buff = (char *)calloc(buff_size, sizeof(char)); 1139 | SMGC_MEMCHK(send_char_buff, error); 1140 | recv_char_buff = (char *)calloc(buff_size, sizeof(char)); 1141 | SMGC_MEMCHK(recv_char_buff, error); 1142 | 1143 | SMGC_MPF(" =====================================>\n"); 1144 | 1145 | mpi_ret_code = MPI_Sendrecv(send_char_buff, buff_size, 1146 | MPI_CHAR, r_neighbor, send_tag, 1147 | recv_char_buff, buff_size, MPI_CHAR, 1148 | l_neighbor, recv_tag, MPI_COMM_WORLD, 1149 | &status); 1150 | SMGC_MPICHK(mpi_ret_code, error); 1151 | 1152 | SMGC_MPF(" <=====================================\n"); 1153 | 1154 | mpi_ret_code = MPI_Sendrecv(send_char_buff, buff_size, 1155 | MPI_CHAR, l_neighbor, send_tag, 1156 | recv_char_buff, buff_size, MPI_CHAR, 1157 | r_neighbor, recv_tag, MPI_COMM_WORLD, 1158 | &status); 1159 | SMGC_MPICHK(mpi_ret_code, error); 1160 | 1161 | free(send_char_buff); 1162 | send_char_buff = NULL; 1163 | free(recv_char_buff); 1164 | recv_char_buff = NULL; 1165 | } 1166 | 1167 | return SMGC_SUCCESS; 1168 | error: 1169 | if (NULL != send_char_buff) { 1170 | free(send_char_buff); 1171 | } 1172 | if (NULL != recv_char_buff) { 1173 | free(recv_char_buff); 1174 | } 1175 | return SMGC_ERROR; 1176 | } 1177 | 1178 | /* ////////////////////////////////////////////////////////////////////////// */ 1179 | static int 1180 | alt_sendrecv_ring(void) 1181 | { 1182 | int i = 0, num_iters = 4, send_tag = 42, recv_tag = 42, buff_size = 0, 1183 | large_buff_size = msg_size, small_buff_size = 1, r_neighbor = 0, 1184 | l_neighbor = 0; 1185 | char *send_char_buff = NULL, *recv_char_buff = NULL; 1186 | char *large_msg_size_str = "====================================="; 1187 | char *small_msg_size_str = "-------------------------------------"; 1188 | char *cur_size_str_ptr = large_msg_size_str; 1189 | MPI_Status status; 1190 | 1191 | r_neighbor = (my_rank + 1) % num_ranks; 1192 | l_neighbor = my_rank - 1; 1193 | 1194 | if (l_neighbor < 0) { 1195 | l_neighbor = num_ranks - 1; 1196 | } 1197 | 1198 | SMGC_MPF(" message size key: === %d B, --- %d B\n", large_buff_size, 1199 | small_buff_size); 1200 | 1201 | for (i = 0; i < num_iters; ++i) { 1202 | /* what size buffs are we using this time around? */ 1203 | if (0 != i % 2) { 1204 | buff_size = large_buff_size; 1205 | cur_size_str_ptr = large_msg_size_str; 1206 | } 1207 | else { 1208 | buff_size = small_buff_size; 1209 | cur_size_str_ptr = small_msg_size_str; 1210 | } 1211 | 1212 | send_char_buff = (char *)calloc(buff_size, sizeof(char)); 1213 | SMGC_MEMCHK(send_char_buff, error); 1214 | recv_char_buff = (char *)calloc(buff_size, sizeof(char)); 1215 | SMGC_MEMCHK(recv_char_buff, error); 1216 | 1217 | SMGC_MPF(" %s>\n", cur_size_str_ptr); 1218 | 1219 | mpi_ret_code = MPI_Sendrecv(send_char_buff, buff_size, MPI_CHAR, 1220 | r_neighbor, send_tag, recv_char_buff, 1221 | buff_size, MPI_CHAR, l_neighbor, recv_tag, 1222 | MPI_COMM_WORLD, &status); 1223 | SMGC_MPICHK(mpi_ret_code, error); 1224 | 1225 | SMGC_MPF(" <%s\n", cur_size_str_ptr); 1226 | 1227 | mpi_ret_code = MPI_Sendrecv(send_char_buff, buff_size, MPI_CHAR, 1228 | l_neighbor, send_tag, recv_char_buff, 1229 | buff_size, MPI_CHAR, r_neighbor, recv_tag, 1230 | MPI_COMM_WORLD, &status); 1231 | SMGC_MPICHK(mpi_ret_code, error); 1232 | 1233 | free(send_char_buff); 1234 | send_char_buff = NULL; 1235 | free(recv_char_buff); 1236 | recv_char_buff = NULL; 1237 | } 1238 | 1239 | return SMGC_SUCCESS; 1240 | error: 1241 | if (NULL != send_char_buff) { 1242 | free(send_char_buff); 1243 | } 1244 | if (NULL != recv_char_buff) { 1245 | free(recv_char_buff); 1246 | } 1247 | return SMGC_ERROR; 1248 | } 1249 | 1250 | /* ////////////////////////////////////////////////////////////////////////// */ 1251 | static void 1252 | reset_globs(void) 1253 | { 1254 | glob_loop_iter = 0; 1255 | glob_l_neighbor = 0; 1256 | glob_r_neighbor = 0; 1257 | } 1258 | 1259 | /* ////////////////////////////////////////////////////////////////////////// */ 1260 | static int 1261 | small_all_to_all_ptp(void) 1262 | { 1263 | int i = 0; 1264 | int j = 0; 1265 | int rc = SMGC_ERROR; 1266 | int l_neighbor = 0; 1267 | int r_neighbor = 0; 1268 | int buff_size = 1; 1269 | char *send_char_buff = NULL; 1270 | char *recv_char_buff = NULL; 1271 | char *del = "\b\b\b\b\b\b\b\b\b\b\b\b\b"; 1272 | struct itimerval itimer; 1273 | MPI_Status status; 1274 | 1275 | reset_globs(); 1276 | 1277 | send_char_buff = (char *)calloc(buff_size, sizeof(char)); 1278 | SMGC_MEMCHK(send_char_buff, out); 1279 | recv_char_buff = (char *)calloc(buff_size, sizeof(char)); 1280 | SMGC_MEMCHK(recv_char_buff, out); 1281 | 1282 | SMGC_MPF(" message size: %d B\n", buff_size); 1283 | SMGC_MPF(" mpi_comm_world: all to all - "); 1284 | 1285 | for (i = 1; i <= num_ranks; ++i, glob_loop_iter = i) { 1286 | SMGC_MPF("%s%06d/%06d%s", 1 == i ? "" : del, i, num_ranks, 1287 | num_ranks == i ? "\n" : ""); 1288 | 1289 | r_neighbor = (my_rank + i) % num_ranks; 1290 | l_neighbor = my_rank; 1291 | 1292 | for (j = 0; j < i; ++j) { 1293 | --l_neighbor; 1294 | if (l_neighbor < 0) { 1295 | l_neighbor = num_ranks - 1; 1296 | } 1297 | } 1298 | 1299 | glob_l_neighbor = l_neighbor; 1300 | glob_r_neighbor = r_neighbor; 1301 | 1302 | TIMER_ENABLE(itimer); 1303 | mpi_ret_code = MPI_Sendrecv(send_char_buff, buff_size, MPI_CHAR, 1304 | r_neighbor, i, recv_char_buff, buff_size, 1305 | MPI_CHAR, l_neighbor, i, MPI_COMM_WORLD, 1306 | &status); 1307 | SMGC_MPICHK(mpi_ret_code, out); 1308 | TIMER_DISABLE(itimer); 1309 | } 1310 | 1311 | /* all is well */ 1312 | rc = SMGC_SUCCESS; 1313 | 1314 | out: 1315 | if (NULL != send_char_buff) free(send_char_buff); 1316 | if (NULL != recv_char_buff) free(recv_char_buff); 1317 | return rc; 1318 | } 1319 | 1320 | /* ////////////////////////////////////////////////////////////////////////// */ 1321 | static int 1322 | large_all_to_all_ptp(void) 1323 | { 1324 | int i = 0; 1325 | int j = 0; 1326 | int rc = SMGC_ERROR; 1327 | int l_neighbor = 0; 1328 | int r_neighbor = 0; 1329 | int buff_size = msg_size; 1330 | char *send_char_buff = NULL; 1331 | char *recv_char_buff = NULL; 1332 | char *del = "\b\b\b\b\b\b\b\b\b\b\b\b\b"; 1333 | MPI_Status status; 1334 | 1335 | send_char_buff = (char *)calloc(buff_size, sizeof(char)); 1336 | SMGC_MEMCHK(send_char_buff, out); 1337 | recv_char_buff = (char *)calloc(buff_size, sizeof(char)); 1338 | SMGC_MEMCHK(recv_char_buff, out); 1339 | 1340 | SMGC_MPF(" message size: %d B\n", buff_size); 1341 | SMGC_MPF(" mpi_comm_world: all to all - "); 1342 | 1343 | for (i = 1; i <= num_ranks; ++i) { 1344 | SMGC_MPF("%s%06d/%06d%s", 1 == i ? "" : del, i, num_ranks, 1345 | num_ranks == i ? "\n" : ""); 1346 | 1347 | r_neighbor = (my_rank + i) % num_ranks; 1348 | l_neighbor = my_rank; 1349 | 1350 | for (j = 0; j < i; ++j) { 1351 | --l_neighbor; 1352 | if (l_neighbor < 0) { 1353 | l_neighbor = num_ranks - 1; 1354 | } 1355 | } 1356 | 1357 | mpi_ret_code = MPI_Sendrecv(send_char_buff, buff_size, MPI_CHAR, 1358 | r_neighbor, i, recv_char_buff, buff_size, 1359 | MPI_CHAR, l_neighbor, i, MPI_COMM_WORLD, 1360 | &status); 1361 | SMGC_MPICHK(mpi_ret_code, out); 1362 | } 1363 | 1364 | /* all is well */ 1365 | rc = SMGC_SUCCESS; 1366 | 1367 | out: 1368 | if (NULL != send_char_buff) free(send_char_buff); 1369 | if (NULL != recv_char_buff) free(recv_char_buff); 1370 | return rc; 1371 | } 1372 | 1373 | 1374 | /* ////////////////////////////////////////////////////////////////////////// */ 1375 | /* ////////////////////////////////////////////////////////////////////////// */ 1376 | /* main */ 1377 | /* ////////////////////////////////////////////////////////////////////////// */ 1378 | /* ////////////////////////////////////////////////////////////////////////// */ 1379 | int 1380 | main(int argc, 1381 | char **argv) 1382 | { 1383 | int i = 0, ii = 0, c = 0, opt_indx = 0, num_iters = SMGC_DEF_NUM_ITRS; 1384 | smgc_test_t *smgc_custom_jb_test_ptr = NULL; 1385 | 1386 | /* can't hurt */ 1387 | memset(bin_bloat, 'x', sizeof(char) * SMGC_BIN_SIZE); 1388 | /* do this here because an error may occur before gethostname. 1389 | * we use the contents of host_name_buff in error messages. 1390 | */ 1391 | snprintf(host_name_buff, SMGC_HOST_NAME_MAX - 1, "%s", rhn_unknown); 1392 | 1393 | /* init MPI */ 1394 | mpi_ret_code = MPI_Init(&argc, &argv); 1395 | SMGC_MPICHK(mpi_ret_code, error); 1396 | mpi_ret_code = MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); 1397 | SMGC_MPICHK(mpi_ret_code, error); 1398 | mpi_ret_code = MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 1399 | SMGC_MPICHK(mpi_ret_code, error); 1400 | 1401 | /* set job parameters based on the size of mpi_comm_world 1402 | * do this here, so it can be overwritten by the user 1403 | */ 1404 | set_jb_params(); 1405 | 1406 | /* start the timer */ 1407 | if (SMGC_MASTER_RANK == my_rank) { 1408 | start_time = MPI_Wtime(); 1409 | if (NULL == (start_time_str = get_time_str(&start_clock))) { 1410 | SMGC_ERR_MSG("out of resources\n"); 1411 | goto error; 1412 | } 1413 | } 1414 | 1415 | while (1) { 1416 | opt_indx = 0; 1417 | 1418 | static struct option long_options[] = 1419 | { 1420 | {"all" , no_argument, 0, 'a'}, 1421 | {"version" , no_argument, 0, 'v'}, 1422 | {"verbose" , no_argument, 0, 'V'}, 1423 | {"help" , no_argument, 0, 'h'}, 1424 | {"stat" , required_argument, 0, 's'}, 1425 | {"write" , required_argument, 0, 'w'}, 1426 | {"n-iters" , required_argument, 0, 'n'}, 1427 | {"msg-size" , required_argument, 0, 'm'}, 1428 | {"file-size" , required_argument, 0, 'M'}, 1429 | {"quiet" , no_argument, 0, 'q'}, 1430 | {"with-tests" , required_argument, 0, 't'}, 1431 | {"msg-timeout", required_argument, 0, 'T'}, 1432 | {0 , 0 , 0, 0 } 1433 | }; 1434 | 1435 | c = getopt_long_only(argc, argv, "avVhs:w:n:m:M:qt:T:", long_options, 1436 | &opt_indx); 1437 | 1438 | if (c == -1) { 1439 | break; 1440 | } 1441 | 1442 | switch (c) { 1443 | case 'a': /* run ALL tests */ 1444 | if (tests_on_heap) { 1445 | free(smgc_test_ptr); 1446 | } 1447 | tests_on_heap = false; 1448 | upd_test_suite(smgc_small_jb_tests); 1449 | break; 1450 | 1451 | case 'v': /* version */ 1452 | SMGC_MPF("%s %s\n", PACKAGE_NAME, PACKAGE_VERSION); 1453 | goto fin; 1454 | 1455 | case 'V': /* don't shh! */ 1456 | ; 1457 | break; 1458 | 1459 | case 'q': /* shh! */ 1460 | ; 1461 | break; 1462 | 1463 | case 'h': /* socorro! */ 1464 | usage(); 1465 | goto fin; 1466 | 1467 | case 's': /* stat something for me please */ 1468 | if (num_stat_paths < SMGC_MAX_STAT_PATHS) { 1469 | strncpy(stat_list[num_stat_paths++], optarg, 1470 | (SMGC_PATH_MAX - 1)); 1471 | } 1472 | break; 1473 | 1474 | case 'w': /* write some stuff to this path */ 1475 | if (num_fs_test_paths < SMGC_MAX_FS_TEST_PATHS) { 1476 | strncpy(fs_test_list[num_fs_test_paths++], optarg, 1477 | (SMGC_PATH_MAX - 1)); 1478 | } 1479 | break; 1480 | 1481 | case 'n': /* update number of test iterations */ 1482 | i = atoi(optarg); 1483 | if (i >= 0) { 1484 | num_iters = i; 1485 | } 1486 | break; 1487 | 1488 | case 'm': /* change the default message size */ 1489 | i = atoi(optarg); 1490 | if (i > 0) { 1491 | if (SMGC_SUCCESS != get_msg_size(optarg, "message", &i)) { 1492 | goto fin; 1493 | } 1494 | else { 1495 | msg_size = i; 1496 | } 1497 | } 1498 | /* else we don't change the message size */ 1499 | break; 1500 | 1501 | case 'M': /* change the default file size */ 1502 | i = atoi(optarg); 1503 | if (i > 0) { 1504 | if (SMGC_SUCCESS != get_msg_size(optarg, "file", &i)) { 1505 | goto fin; 1506 | } 1507 | else { 1508 | file_size = i; 1509 | } 1510 | } 1511 | /* else we don't change the file size */ 1512 | break; 1513 | 1514 | case 't': /* construct custom list of tests */ 1515 | if (SMGC_SUCCESS != 1516 | create_test_list(optarg, &smgc_custom_jb_test_ptr)) { 1517 | SMGC_ERR_MSG("error constructing test list\n"); 1518 | goto fin; 1519 | } 1520 | break; 1521 | case 'T': /* set message timeout */ 1522 | msg_timeout = strtol(optarg, (char **)NULL, 10); 1523 | break; 1524 | 1525 | default: 1526 | usage(); 1527 | goto fin; 1528 | } 1529 | } 1530 | 1531 | if (optind < argc) { /* non-option argv elements */ 1532 | usage(); 1533 | goto fin; 1534 | } 1535 | 1536 | /* get your host's name */ 1537 | if (0 != gethostname(host_name_buff, SMGC_HOST_NAME_MAX - 1)) { 1538 | SMGC_ERR_MSG("unable to get hostname...\n"); 1539 | goto error; 1540 | } 1541 | host_name_buff[SMGC_HOST_NAME_MAX - 1] = '\0'; 1542 | 1543 | /* display info header */ 1544 | SMGC_MPF("\n $$$ %s %s $$$\n\n", PACKAGE_NAME, PACKAGE_VERSION); 1545 | SMGC_MPF(" start yyyymmdd-hhmmss : %s\n", start_time_str); 1546 | SMGC_MPF(" hostname : %s\n", host_name_buff); 1547 | SMGC_MPF(" numpe : %d\n", num_ranks); 1548 | SMGC_MPF(" bin bloat : %d B\n", SMGC_BIN_SIZE); 1549 | SMGC_MPF(" default msg size : %d B\n", SMGC_MSG_SIZE); 1550 | SMGC_MPF(" actual msg size : %d B\n", msg_size); 1551 | SMGC_MPF(" message timeout : %d %s\n", 1552 | msg_timeout, msg_timeout < 0 ? "" : "s"); 1553 | SMGC_MPF(" default file size/rank : %d B\n", SMGC_MPI_IO_BUFF_SIZE); 1554 | SMGC_MPF(" actual file size/rank : %lu B\n", file_size); 1555 | SMGC_MPF(" num iters : %d\n", num_iters); 1556 | SMGC_MPF(" num tests : %d\n", num_tests); 1557 | SMGC_MPF("\n"); 1558 | 1559 | for (ii = 0; ii < num_iters; ++ii) { 1560 | SMGC_MPF(" === starting pass %d of %d\n\n", (ii + 1), num_iters); 1561 | 1562 | /* run each test */ 1563 | for (i = 0; i < num_tests; ++i) { 1564 | SMGC_MPF(" === starting : %s test\n", smgc_test_ptr[i].tname); 1565 | /* run and check */ 1566 | SMGC_TSTCHK(smgc_test_ptr[i].tfp(), error); 1567 | SMGC_MPF(" === done : %s test\n", smgc_test_ptr[i].tname); 1568 | SMGC_MPF("\n"); 1569 | mpi_ret_code = MPI_Barrier(MPI_COMM_WORLD); 1570 | SMGC_MPICHK(mpi_ret_code, error); 1571 | } 1572 | } 1573 | 1574 | mpi_ret_code = MPI_Barrier(MPI_COMM_WORLD); 1575 | SMGC_MPICHK(mpi_ret_code, error); 1576 | 1577 | /* stop the timer */ 1578 | if (my_rank == SMGC_MASTER_RANK) { 1579 | end_time = MPI_Wtime(); 1580 | exec_time = end_time - start_time; 1581 | } 1582 | 1583 | SMGC_MPF(" exec time : %lf (s)\n\n", exec_time); 1584 | SMGC_MPF(" $$$ %s $$$\n\n", "carpe manana"); 1585 | SMGC_MPF(" PASSED\n"); 1586 | 1587 | mpi_ret_code = MPI_Finalize(); 1588 | SMGC_MPICHK(mpi_ret_code, error); 1589 | 1590 | if (my_rank == SMGC_MASTER_RANK && NULL != start_time_str) { 1591 | free(start_time_str); 1592 | start_time_str = NULL; 1593 | } 1594 | 1595 | if (NULL != rhname_lut_ptr) { 1596 | free(rhname_lut_ptr); 1597 | } 1598 | 1599 | return EXIT_SUCCESS; 1600 | 1601 | fin: 1602 | mpi_ret_code = MPI_Finalize(); 1603 | /* jumping to error here is a weird thing, but if MPI_Finalize isn't 1604 | * successful, then at least we'll know. 1605 | */ 1606 | SMGC_MPICHK(mpi_ret_code, error); 1607 | return EXIT_SUCCESS; 1608 | 1609 | error: 1610 | MPI_Abort(MPI_COMM_WORLD, mpi_ret_code); 1611 | return EXIT_FAILURE; 1612 | } 1613 | -------------------------------------------------------------------------------- /supermagic.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2010-2022 Los Alamos National Security, LLC. 3 | * All rights reserved. 4 | * 5 | * This program was prepared by Los Alamos National Security, LLC at Los Alamos 6 | * National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 7 | * Department of Energy (DOE). All rights in the program are reserved by the DOE 8 | * and Los Alamos National Security, LLC. Permission is granted to the public to 9 | * copy and use this software without charge, provided that this Notice and any 10 | * statement of authorship are reproduced on all copies. Neither the U.S. 11 | * Government nor LANS makes any warranty, express or implied, or assumes any 12 | * liability or responsibility for the use of this software. 13 | */ 14 | 15 | /** 16 | * @author Samuel K. Gutierrez - samuelREMOVEME@lanl.gov 17 | * found a bug? have an idea? please let me know. 18 | */ 19 | 20 | #ifndef SUPERMAGIC_H 21 | #define SUPERMAGIC_H 22 | 23 | #ifdef HAVE_CONFIG_H 24 | #include "config.h" 25 | #endif 26 | #ifdef HAVE_STDLIB_H 27 | #include 28 | #endif 29 | #ifdef HAVE_STDIO_H 30 | #include 31 | #endif 32 | #ifdef HAVE_ERRNO_H 33 | #include 34 | #endif 35 | #ifdef HAVE_UNISTD_H 36 | #include 37 | #endif 38 | #ifdef HAVE_GETOPT_H 39 | #include 40 | #endif 41 | #ifdef HAVE_TIME_H 42 | #include 43 | #endif 44 | #ifdef HAVE_STRING_H 45 | #include 46 | #endif 47 | #ifdef HAVE_FCNTL_H 48 | #include 49 | #endif 50 | #ifdef HAVE_LIMITS_H 51 | #include 52 | #endif 53 | #ifdef HAVE_STDINT_H 54 | #include 55 | #endif 56 | #ifdef HAVE_STDBOOL_H 57 | #include 58 | #endif 59 | #ifdef HAVE_SYS_TYPES_H 60 | #include 61 | #endif 62 | #ifdef HAVE_SYS_STAT_H 63 | #include 64 | #endif 65 | #ifdef HAVE_NETDB_H 66 | #include 67 | #endif 68 | #ifdef HAVE_ARPA_INET_H 69 | #include 70 | #endif 71 | #ifdef HAVE_SIGNAL_H 72 | #include 73 | #endif 74 | #ifdef HAVE_SYS_TIME_H 75 | #include 76 | #endif 77 | 78 | #include "mpi.h" 79 | 80 | /* only include the following if building with cell support */ 81 | #if SMGC_HAVE_CELL_SUPPORT == 1 82 | #include "cell_check.h" 83 | #endif 84 | 85 | /* binary bloat size (B) - default is 1 MB */ 86 | #ifndef SMGC_BIN_SIZE 87 | #define SMGC_BIN_SIZE (1 << 20) 88 | #elif defined SMGC_BIN_SIZE && (SMGC_BIN_SIZE) <= 0 89 | #undef SMGC_BIN_SIZE 90 | #define SMGC_BIN_SIZE 1 91 | #endif 92 | 93 | /* message size default */ 94 | #define SMGC_MSG_SIZE (512 * 1024) 95 | 96 | /* messaging timeout macros */ 97 | #define TIMER_ENABLE(itimer) \ 98 | do { \ 99 | if (msg_timeout > 0) { \ 100 | (itimer).it_value.tv_sec = msg_timeout; \ 101 | (itimer).it_value.tv_usec = 0; \ 102 | (itimer).it_interval = (itimer).it_value; \ 103 | signal(SIGALRM, &kill_mpi_messaging); \ 104 | setitimer(ITIMER_REAL, &(itimer), NULL); \ 105 | } \ 106 | } while (0) 107 | 108 | #define TIMER_DISABLE(itimer) \ 109 | do { \ 110 | if (msg_timeout > 0) { \ 111 | (itimer).it_value.tv_sec = 0; \ 112 | (itimer).it_value.tv_usec = 0; \ 113 | (itimer).it_interval = (itimer).it_value; \ 114 | setitimer(ITIMER_REAL, &(itimer), NULL); \ 115 | } \ 116 | } while (0) 117 | 118 | /* invalid color - all valid colors are expected to be positive values */ 119 | #define SMGC_COLOR_INVALID -1 120 | 121 | /* return codes used for internal purposes */ 122 | enum { 123 | SMGC_SUCCESS = 0, 124 | SMGC_ERROR 125 | }; 126 | 127 | enum { 128 | IO_STATS_MBS = 0, 129 | IO_STATS_TIME_S 130 | }; 131 | 132 | #define SMGC_USAGE \ 133 | "usage:\n" \ 134 | " mpirun -np N ./"PACKAGE_NAME" [OPTION] ... \n\n" \ 135 | "options:\n" \ 136 | " [-a|--all] run all tests in suite\n" \ 137 | " [-h|--help] display this message\n" \ 138 | " [-m|--msg-size x[B,k,M,G]] change message size\n" \ 139 | " [-M|--file-size B[B,k,M,G]] change file size (per rank)\n" \ 140 | " [-n|--n-iters X] run X iterations of a test suite\n" \ 141 | " [-q|--quiet] run in quiet mode\n" \ 142 | " [-s|--stat /a/path] add /a/path to stat list\n" \ 143 | " [-t|--with-tests t1[,t2,tn]] run tests in requested order\n" \ 144 | " [-w|--write /a/path] add /a/path to IO tests\n" \ 145 | " [-V|--verbose] display verbose output\n" \ 146 | 147 | #define SMGC_EXAMPLE \ 148 | "example:\n" \ 149 | " mpirun -np 4 ./"PACKAGE_NAME" -s /glob/usr/file -s /usr/proj -n 2\n" 150 | 151 | /* "master" rank */ 152 | #define SMGC_MASTER_RANK 0 153 | #define SMGC_MAX_TIME_LEN 32 154 | #define SMGC_HOST_NAME_MAX MPI_MAX_PROCESSOR_NAME 155 | #define SMGC_PATH_MAX PATH_MAX 156 | #define SMGC_MAX_TESTS 64 157 | 158 | #define SMGC_MBS_UNIT_STR "MB/s" 159 | #define SMGC_TIME_S_UNIT_STR "s" 160 | #define SMGC_DATE_FORMAT "%Y%m%d-%H%M%S" 161 | #define SMGC_MPI_FILE_NAME "FS_TEST_FILE-YOU_CAN_DELETE_ME" 162 | 163 | /* dictates what a "large" job is (in rank processes) */ 164 | #define SMGC_LRG_JB 256 165 | /* maximum amount of stat paths */ 166 | #define SMGC_MAX_STAT_PATHS 256 167 | /* maximum amount of fs test paths */ 168 | #define SMGC_MAX_FS_TEST_PATHS 64 169 | /* default number of test iterations */ 170 | #define SMGC_DEF_NUM_ITRS 1 171 | /* large job message size */ 172 | #define SMGC_LRG_JB_MSG_SIZE 1024 173 | /* for converting bandwidth to MB/s */ 174 | #define SMGC_MB_SIZE (1 << 20) 175 | /* i/o file size: 4 KB per rank process is the default */ 176 | #define SMGC_MPI_IO_BUFF_SIZE (1 << 12) 177 | 178 | /* stringification stuff */ 179 | #define SMGC_STRINGIFY(x) #x 180 | #define SMGC_TOSTRING(x) SMGC_STRINGIFY(x) 181 | 182 | /* error reporting macros */ 183 | #define SMGC_ERR_AT __FILE__ " ("SMGC_TOSTRING(__LINE__)")" 184 | #define SMGC_ERR_PREFIX "-[SMGC ERROR: "SMGC_ERR_AT" FAILED ]- " 185 | #define SMGC_MSG_PREFIX "-[supermagic]- " 186 | 187 | /* error message */ 188 | #define SMGC_ERR_MSG(pfargs...) \ 189 | do { \ 190 | fprintf(stderr, SMGC_ERR_PREFIX); \ 191 | fprintf(stderr, pfargs); \ 192 | } while (0) 193 | 194 | /* memory alloc check */ 195 | #define SMGC_MEMCHK(_ptr_,_gt_) \ 196 | do { \ 197 | if (NULL == (_ptr_)) { \ 198 | SMGC_ERR_MSG("memory allocation error on %s\n", host_name_buff); \ 199 | goto _gt_; \ 200 | } \ 201 | } while (0) 202 | 203 | /* mpi check */ 204 | #define SMGC_MPICHK(_ret_,_gt_) \ 205 | do { \ 206 | if (MPI_SUCCESS != (_ret_)) { \ 207 | MPI_Error_string((_ret_), err_str, &err_str_len); \ 208 | SMGC_ERR_MSG("mpi success not returned on %s... %s (errno: %d)\n", \ 209 | host_name_buff, err_str, (_ret_)); \ 210 | goto _gt_; \ 211 | } \ 212 | } while (0) 213 | 214 | /* test check */ 215 | #define SMGC_TSTCHK(_ret_,_gt_) \ 216 | do { \ 217 | if (SMGC_SUCCESS != (_ret_)) { \ 218 | SMGC_ERR_MSG("test failure detected on %s ...\n", host_name_buff); \ 219 | goto _gt_; \ 220 | } \ 221 | } while (0) 222 | 223 | /* master rank printf */ 224 | #define SMGC_MPF(pfargs...) \ 225 | do { \ 226 | if ((SMGC_MASTER_RANK) == my_rank) { \ 227 | fprintf(stdout, pfargs); \ 228 | fflush(stdout); \ 229 | } \ 230 | } while (0) 231 | 232 | /* fprintf with flush */ 233 | #define SMGC_FPF(stream,pfargs...) \ 234 | do { \ 235 | fprintf((stream), pfargs); \ 236 | fflush((stream)); \ 237 | } while (0) 238 | 239 | /* ////////////////////////////////////////////////////////////////////////// */ 240 | /* globals */ 241 | /* ////////////////////////////////////////////////////////////////////////// */ 242 | static int glob_loop_iter = 0; 243 | static int glob_l_neighbor = 0; 244 | static int glob_r_neighbor = 0; 245 | /* no timeout by default */ 246 | static int msg_timeout = -1; 247 | 248 | /* ////////////////////////////////////////////////////////////////////////// */ 249 | /* static forward declarations - typedefs - etc. */ 250 | /* ////////////////////////////////////////////////////////////////////////// */ 251 | /* test function pointer */ 252 | typedef int (*func_ptr)(void); 253 | 254 | typedef struct smgc_test_t { 255 | /* test name */ 256 | char *tname; 257 | /* test function pointer */ 258 | func_ptr tfp; 259 | } smgc_test_t; 260 | 261 | typedef struct double_int_t { 262 | double val; 263 | int rank; 264 | } double_int_t; 265 | 266 | #if 0 267 | static int 268 | get_net_num(const char *target_hostname, 269 | unsigned long int *out_net_num); 270 | #endif 271 | 272 | static int 273 | get_mult(char symbol, int *resp); 274 | 275 | static int 276 | create_test_list(const char *, smgc_test_t **); 277 | 278 | static int 279 | get_msg_size(const char *, const char *, int *); 280 | 281 | static int 282 | small_allreduce_max(void); 283 | 284 | static int 285 | hostname_exchange(void); 286 | 287 | #if 0 288 | static int 289 | host_info_exchange(void); 290 | #endif 291 | 292 | static int 293 | large_all_to_root_ptp(void); 294 | 295 | static int 296 | alt_sendrecv_ring(void); 297 | 298 | static int 299 | large_sendrecv_ring(void); 300 | 301 | static int 302 | small_all_to_all_ptp(void); 303 | 304 | static int 305 | large_all_to_all_ptp(void); 306 | 307 | static int 308 | stat_paths(void); 309 | 310 | static int 311 | root_bcast(void); 312 | 313 | static int 314 | rand_root_bcast(void); 315 | 316 | static void 317 | usage(void); 318 | 319 | static char * 320 | get_time_str(time_t *); 321 | 322 | static char * 323 | get_rhn(int rank); 324 | 325 | static void 326 | set_jb_params(void); 327 | 328 | static int 329 | get_num_tests(smgc_test_t *); 330 | 331 | static void 332 | upd_test_suite(smgc_test_t *); 333 | 334 | static int 335 | mpi_io(void); 336 | 337 | static int 338 | n_to_n_io(void); 339 | 340 | static int 341 | io_stats(double_int_t, char *, int); 342 | 343 | static void 344 | kill_mpi_messaging(int sig); 345 | 346 | #if SMGC_HAVE_CELL_SUPPORT == 1 347 | static int 348 | cell_sanity(void); 349 | #endif 350 | 351 | static int 352 | hello_world(void); 353 | 354 | /* ////////////////////////////////////////////////////////////////////////// */ 355 | /* global variables */ 356 | /* ////////////////////////////////////////////////////////////////////////// */ 357 | static char *rhn_unknown = "UNKNOWN"; 358 | /* error string length */ 359 | static int err_str_len; 360 | /* file size for both n-n and mpi_io */ 361 | static size_t file_size = SMGC_MPI_IO_BUFF_SIZE; 362 | /* error string buffer */ 363 | static char err_str[MPI_MAX_ERROR_STRING]; 364 | /* stat list */ 365 | static char stat_list[SMGC_MAX_STAT_PATHS][SMGC_PATH_MAX]; 366 | /* filesystem test list */ 367 | static char fs_test_list[SMGC_MAX_FS_TEST_PATHS][SMGC_PATH_MAX]; 368 | /* bloat array */ 369 | static char bin_bloat[SMGC_BIN_SIZE] = {'x'}; 370 | /* host name buffer */ 371 | static char host_name_buff[SMGC_HOST_NAME_MAX]; 372 | /* remote hostname lookup table pointer */ 373 | static char *rhname_lut_ptr = NULL; 374 | /* start time string */ 375 | static char *start_time_str = NULL; 376 | /* start time */ 377 | static double start_time = 0.0; 378 | /* start time struct */ 379 | static time_t start_clock; 380 | /* end time */ 381 | static double end_time; 382 | /* my rank */ 383 | static int my_rank = 0; 384 | /* size of mpi_comm_world */ 385 | static int num_ranks = 0; 386 | #if 0 387 | /* my network number */ 388 | static unsigned long int my_net_num; 389 | #endif 390 | /* my "color" */ 391 | int my_color = SMGC_COLOR_INVALID; 392 | /* holds mpi return codes */ 393 | static int mpi_ret_code = MPI_ERR_OTHER; 394 | /* number of paths to stat */ 395 | static int num_stat_paths = 0; 396 | /* number of paths to run IO tests on */ 397 | static int num_fs_test_paths = 0; 398 | /* message size */ 399 | static int msg_size = SMGC_MSG_SIZE; 400 | /* flag that dictates whether or not verbose output will be displayed */ 401 | static bool be_verbose = false; 402 | /* flag that dictates whether or not we are in quiet mode */ 403 | static bool be_quiet = false; 404 | /* execution time */ 405 | static double exec_time = 0.0; 406 | /* points to the selected test suite */ 407 | static smgc_test_t *smgc_test_ptr = NULL; 408 | /* number of tests that will be executed */ 409 | static int num_tests = 0; 410 | /* were the tests allocated on the heap? */ 411 | static bool tests_on_heap = false; 412 | 413 | /* ////////////////////////////////// */ 414 | /* o add new tests below */ 415 | /* ////////////////////////////////// */ 416 | 417 | /* all tests */ 418 | static smgc_test_t smgc_all_tests[] = 419 | { 420 | {"hostname_exchange" , &hostname_exchange }, 421 | {"stat_paths" , &stat_paths }, 422 | {"mpi_io" , &mpi_io }, 423 | {"n_to_n_io" , &n_to_n_io }, 424 | #if SMGC_HAVE_CELL_SUPPORT == 1 425 | {"cell_sanity" , &cell_sanity }, 426 | #endif 427 | {"small_all_to_all_ptp" , &small_all_to_all_ptp }, 428 | {"small_allreduce_max" , &small_allreduce_max }, 429 | {"alt_sendrecv_ring" , &alt_sendrecv_ring }, 430 | {"root_bcast" , &root_bcast }, 431 | {"large_sendrecv_ring" , &large_sendrecv_ring }, 432 | {"rand_root_bcast" , &rand_root_bcast }, 433 | {"large_all_to_root_ptp", &large_all_to_root_ptp}, 434 | {"large_all_to_all_ptp" , &large_all_to_all_ptp }, 435 | {"hello_world" , &hello_world }, 436 | {NULL , NULL } /* MUST BE LAST ELEMENT */ 437 | }; 438 | 439 | /* subset of tests - used for "smaller" jobs */ 440 | static smgc_test_t smgc_small_jb_tests[] = 441 | { 442 | {"hostname_exchange" , &hostname_exchange }, 443 | {"stat_paths" , &stat_paths }, 444 | {"mpi_io" , &mpi_io }, 445 | #if SMGC_HAVE_CELL_SUPPORT == 1 446 | {"cell_sanity" , &cell_sanity }, 447 | #endif 448 | {"small_all_to_all_ptp" , &small_all_to_all_ptp }, 449 | {"small_allreduce_max" , &small_allreduce_max }, 450 | {"alt_sendrecv_ring" , &alt_sendrecv_ring }, 451 | {"root_bcast" , &root_bcast }, 452 | {"large_sendrecv_ring" , &large_sendrecv_ring }, 453 | {"rand_root_bcast" , &rand_root_bcast }, 454 | {"large_all_to_root_ptp", &large_all_to_root_ptp}, 455 | {"large_all_to_all_ptp" , &large_all_to_all_ptp }, 456 | {NULL , NULL } /* MUST BE LAST ELEMENT */ 457 | }; 458 | 459 | /* subset of tests - used for "large" jobs */ 460 | static smgc_test_t smgc_lrg_jb_tests[] = 461 | { 462 | {"hostname_exchange" , &hostname_exchange }, 463 | {"stat_paths" , &stat_paths }, 464 | {"mpi_io" , &mpi_io }, 465 | #if SMGC_HAVE_CELL_SUPPORT == 1 466 | {"cell_sanity" , &cell_sanity }, 467 | #endif 468 | {"small_allreduce_max" , &small_allreduce_max}, 469 | {"alt_sendrecv_ring" , &alt_sendrecv_ring }, 470 | {"root_bcast" , &root_bcast }, 471 | {"rand_root_bcast" , &rand_root_bcast }, 472 | {"large_sendrecv_ring" , &large_sendrecv_ring}, 473 | {NULL , NULL } /* MUST BE LAST ELEMENT */ 474 | }; 475 | 476 | #endif /* SUPERMAGIC_H */ 477 | -------------------------------------------------------------------------------- /util/crunch-hang: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ################################################################################ 4 | # Copyright (c) 2012 Los Alamos National Security, LLC. 5 | # All rights reserved. 6 | # 7 | # This program was prepared by Los Alamos National Security, LLC at Los Alamos 8 | # National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 9 | # Department of Energy (DOE). All rights in the program are reserved by the DOE 10 | # and Los Alamos National Security, LLC. Permission is granted to the public to 11 | # copy and use this software without charge, provided that this Notice and any 12 | # statement of authorship are reproduced on all copies. Neither the U.S. 13 | # Government nor LANS makes any warranty, express or implied, or assumes any 14 | # liability or responsibility for the use of this software. 15 | ################################################################################ 16 | 17 | # author: samuel k. gutierrez 18 | # last updated: Thu Jul 26 15:02:10 MDT 2012 19 | 20 | # 21 | # crunch-hang processes supermagic hang detection output and attempts to 22 | # identify the nodes responsible for the hang. 23 | # 24 | 25 | declare -r PROG_NAME="crunch-hang" 26 | declare -r PROG_VER="0.4" 27 | declare -r MAGIC_STR="########## HANG DETECTED" 28 | declare -r BAD_TRIP_REGEXP=\ 29 | '[0-9]+ \(\w+[.]*\w*\) ==> [0-9]+ \(\w+[.]*\w*\) ==> [0-9]+ \(\w+[.]*\w*\)' 30 | 31 | # example output string from supermagic hang detection 32 | # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 33 | # ########## HANG DETECTED [on loop iteration: 14] 4 (?) ==> 18 (a) ==> 32 (?) 34 | 35 | usage() 36 | { 37 | cat << EOF 38 | Usage: 39 | $PROG_NAME [OPTION] [/PATH/TO/HANG-OUTPUT-FILE] 40 | Options: 41 | -v|--version 42 | -h|--help 43 | About: 44 | $PROG_NAME processes supermagic hang detection output and attempts to 45 | identify the nodes responsible for the hang. 46 | EOF 47 | } 48 | 49 | ################################################################################ 50 | # output will be something like: "on loop iteration: 10" 51 | # $1: file name 52 | lowest_iter_string() 53 | { 54 | local fname="$1" 55 | local res=`cat "$fname" | grep "$MAGIC_STR" | sort -n -k 7 | head -n 1 | \ 56 | egrep -o "on loop iteration: [0-9]+"` 57 | echo "$res" 58 | } 59 | 60 | ################################################################################ 61 | # $1: file name 62 | # $2: target string 63 | get_num_instances() 64 | { 65 | echo `grep -c "$2" "$1"` 66 | } 67 | 68 | ################################################################################ 69 | # $1: file name 70 | # $2: target string 71 | get_bad_paths() 72 | { 73 | cat "$1" | grep "$MAGIC_STR" | sort -n -k 7 | head -n "$2" | \ 74 | cut -d ' ' -f 8-15 75 | } 76 | 77 | ################################################################################ 78 | # $1: file name 79 | # $2: target string 80 | disp_bad_paths() 81 | { 82 | local fname="$1" 83 | local tstring="$2" 84 | local bad_paths=$(get_bad_paths "$fname" "$tstring") 85 | echo "### "$PROG_NAME" "$PROG_VER" ###" 86 | echo "### key: rank (name) ==> rank (name) ==> rank (name) ###" 87 | echo "one or more of the following paths appear to be broken:" 88 | echo -e "$bad_paths" 89 | the_bad_path=$(calc_bad_path "$fname" "$bad_paths") 90 | echo -e \ 91 | "one or more of the the following ranks caused the hang:\n@@$the_bad_path@@" 92 | } 93 | 94 | ################################################################################ 95 | # $1: file name 96 | # $2: bad paths 97 | # will be something like: 98 | # 1 2 3 4 5 6 7 8 99 | # 1810 (UNKNOWN) ==> 239 (ceb180a.localdomain) ==> 692 (UNKNOWN) 100 | # 239 (UNKNOWN) ==> 692 (ceb160a.localdomain) ==> 1145 (UNKNOWN) 101 | calc_bad_path() 102 | { 103 | local i=0 j=0 104 | local bad_paths=(0) 105 | local path_pairs 106 | local bad_path 107 | # work on each line and store the path 108 | while read path_i; do 109 | bad_paths[$i]=`echo "$path_i" | cut -d ' ' -f 1,4,7` 110 | (( i += 1 )) 111 | done < <(echo -e "$2") 112 | # store the path pairs 113 | for ((i = 0; i < ${#bad_paths[*]}; i++ )); do 114 | path_pairs[$j]=`echo "${bad_paths[$i]}" | cut -d ' ' -f 1-2` 115 | (( j++ )) 116 | path_pairs[$j]=`echo "${bad_paths[$i]}" | cut -d ' ' -f 2-3` 117 | (( j++ )) 118 | done 119 | # find the bad one 120 | # output will be something like (before processing): 121 | # 2 239 692 122 | # 1 692 1145 123 | # 1 1810 239 124 | # in this example, the bad path is between ranks 239 and 692 125 | local bad_path=`printf "%s\n" "${path_pairs[@]}" | sort -n | uniq -c | \ 126 | sed 's/^\s*//g' | sort -nr | head -n 1 | cut -d ' ' -f 2-3` 127 | local bone=`echo $bad_path | cut -d ' ' -f 1` 128 | local btwo=`echo $bad_path | cut -d ' ' -f 2` 129 | # at this point $bad_path has something like: 130 | # 239 692 131 | # now we have to find the full path within the file using the two ranks 132 | # that were called out in $bad_path ($bone and $btwo). 133 | local full_path=`cat "$1" | \ 134 | egrep ""$bone" \(\w+[.]*\w*\) ==> "$btwo" \(\w+[.]*\w*\)"` 135 | #local full_path=`cat "$1" | \ 136 | # egrep "==> "$bone" \(\w+[.]*\w*\) ==> "$btwo" \(\w+[.]*\w*\)"` 137 | # at this point $full_path has the entire path string. we need to prep it 138 | # some more. so, do that... 139 | # use head -n 1 to just choose one if there are multiples 140 | full_path=`echo $full_path | egrep -o "$BAD_TRIP_REGEXP" | head -n 1` 141 | echo $full_path 142 | } 143 | 144 | ################################################################################ 145 | crunch() 146 | { 147 | if [[ -r $1 ]]; then 148 | lis=$(lowest_iter_string "$1") 149 | # no hang output, no need to continue 150 | if [[ "x$lis" == "x" ]]; then 151 | echo "### NO HANG ###" 152 | exit 0; 153 | fi 154 | ni=$(get_num_instances "$1" "$lis") 155 | disp_bad_paths "$1" "$ni" 156 | return 0 157 | else 158 | echo "\"$1\" does not exist or is not readable." 159 | usage 160 | return 1 161 | fi 162 | } 163 | 164 | ################################################################################ 165 | for i in $*; do 166 | case $1 in 167 | -h|--help) 168 | usage 169 | exit 0; 170 | ;; 171 | -v|--version) 172 | echo "$PROG_NAME $PROG_VER" 173 | exit 0; 174 | ;; 175 | --) 176 | shift 177 | break 178 | ;; 179 | -?|--?) 180 | echo "unknown option: \"$i\"" 181 | exit 1; 182 | ;; 183 | esac 184 | done 185 | 186 | if [[ $# == 0 ]]; then 187 | usage 188 | exit 1; 189 | else 190 | crunch $1 191 | exit $?; 192 | fi 193 | -------------------------------------------------------------------------------- /util/rr-job-prep: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ################################################################################ 4 | # Copyright (c) 2012 Los Alamos National Security, LLC. 5 | # All rights reserved. 6 | # 7 | # This program was prepared by Los Alamos National Security, LLC at Los Alamos 8 | # National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 9 | # Department of Energy (DOE). All rights in the program are reserved by the DOE 10 | # and Los Alamos National Security, LLC. Permission is granted to the public to 11 | # copy and use this software without charge, provided that this Notice and any 12 | # statement of authorship are reproduced on all copies. Neither the U.S. 13 | # Government nor LANS makes any warranty, express or implied, or assumes any 14 | # liability or responsibility for the use of this software. 15 | ################################################################################ 16 | 17 | # author: samuel k. gutierrez 18 | # last updated: Wed Jul 11 14:24:27 MDT 2012 19 | 20 | ################################################################################ 21 | ################################################################################ 22 | # ONLY TESTED ON RR-LIKE SYSTEMS WITH PBS AND OPEN MPI 1.4.3! # 23 | ################################################################################ 24 | ################################################################################ 25 | 26 | # program name 27 | declare -r PROG_NAME="rr-job-prep" 28 | # program version 29 | declare -r PROG_VER="0.3" 30 | # name of the crunch-hang program 31 | declare -r CRUNCH_NAME="crunch-hang" 32 | # name of the full node set 33 | declare -r ALL_NODES_FN="ALL" 34 | # name of the subset node file 35 | declare -r SUBSET_NODES_FN="SUB" 36 | # the name of the supermagic output file 37 | declare -r SMGC_OUT_FN="OUT" 38 | # the name of the file containing all the bad nodes in a grep friendly way 39 | declare -r BAD_NODE_LIST_FN="BAD_NODES" 40 | # the name of the file containing all the bad nodes in a syslog friendly way 41 | # that show the path triplets 42 | declare -r BAD_NODE_TRIPLETS="BAD_NODE_TRIPLETS" 43 | # bad node set egrep string 44 | declare -r BAD_SET_REGEXP=\ 45 | "@@[0-9]+ \(\w+[.]*\w*\) ==> [0-9]+ \(\w+[.]*\w*\) ==> [0-9]+ \(\w+[.]*\w*\)@@" 46 | # time between mpiruns (in seconds) 47 | declare -r TIME_BETWEEN_RUNS=10 48 | # items to be run on exit 49 | declare -a on_exit_items 50 | # this will eventually be set to a real value, but set to a safe default 51 | tmp_base="/tmp" 52 | # job id 53 | jobid="unknown" 54 | # timeout value (in seconds) 55 | kill_timeout=600 56 | # messaging timeout (s) - default is no timeout 57 | smgc_msg_timeout=-1 58 | # start date and time string 59 | start_date_and_time="time unknown" 60 | 61 | ################################################################################ 62 | ################################################################################ 63 | # exit/cleanup utility code 64 | ################################################################################ 65 | ################################################################################ 66 | function on_exit() 67 | { 68 | for i in "${on_exit_items[@]}"; do 69 | #echo "on_exit: $i" 70 | eval "$i" 2>&1 > /dev/null 71 | done 72 | } 73 | 74 | ################################################################################ 75 | function add_on_exit() 76 | { 77 | local n=${#on_exit_items[*]} 78 | on_exit_items[$n]="$*" 79 | if [[ $n -eq 0 ]]; then 80 | #echo "setting trap" 81 | trap on_exit EXIT 82 | fi 83 | } 84 | 85 | ################################################################################ 86 | function usage() 87 | { 88 | cat << EOF 89 | Usage: 90 | $PROG_NAME \ 91 | [OPTION] [/PATH/TO/SUPERMAGIC] [/PATH/TO/CRUNCH-HANG] \ 92 | [MIN PES] [HANG TIMEOUT (s)] [JOB PES] [YOUR RUN CMD] 93 | Options: 94 | -v|--version 95 | -h|--help 96 | About: 97 | $PROG_NAME attempts to find a good set of nodes within an allocation. 98 | EOF 99 | } 100 | 101 | ################################################################################ 102 | function run_and_pray() 103 | { 104 | if [[ `have_enough_pes "$3"` -eq 0 ]]; then 105 | echo 106 | echo "##############################################################" 107 | echo "# cannot continue. not enough pes to meet minpe requirement. #" 108 | echo "##############################################################" 109 | echo 110 | note_bad_nodes 111 | exit 1; 112 | fi 113 | # if we are here, then try to run the messaging test 114 | ( mpirun --hostfile "$tmp_base/$SUBSET_NODES_FN" \ 115 | "$1" -t small_all_to_all_ptp -T $smgc_msg_timeout 2>&1 | \ 116 | tee "$tmp_base/$SMGC_OUT_FN" 2>&1 > /dev/null )& 117 | local cmd_pid=$! 118 | 119 | # setup the killer 120 | ( sleep $kill_timeout && \ 121 | kill -9 $cmd_pid )& 122 | local killer_pid=$! 123 | 124 | wait $cmd_pid &> /dev/null 125 | wait_status=$? 126 | 127 | if [[ $wait_status -ne 0 ]]; then 128 | echo "### RUN TIMEOUT EXCEEDED ###" 129 | echo "### JOBID: $jobid" 130 | # not much else we can do here... well, maybe that's not true, but it 131 | # is a little harder to deal with 132 | exit 1; 133 | else 134 | # "normal" exit, so clean up - we still don't know if things worked, 135 | # but we need to terminate the killer process 136 | #echo "### killing killer" 137 | disown $killer_pid 138 | kill $killer_pid 139 | # XXX big hammer ... fix this later 140 | killall -q sleep 141 | fi 142 | # now crunch the output from supermagic 143 | local crunch_out=`"$2" "$tmp_base/$SMGC_OUT_FN"` 144 | # success! 145 | if [[ "x$crunch_out" == "x### NO HANG ###" ]]; then 146 | echo 147 | echo "### successful run - we found a good set!" 148 | echo 149 | return 0; 150 | else 151 | # no love. we launched, but the job hung in messaging :-(. update the 152 | # hostfile and return 1 - indicating that we failed to find a good set. 153 | return 1; 154 | fi 155 | } 156 | 157 | ################################################################################ 158 | function cleanup_failed_run() 159 | { 160 | killall -q -9 mpirun 161 | killall -q -9 supermagic 162 | } 163 | 164 | ################################################################################ 165 | # $1 path to crunch-hang 166 | # looking for something like: 167 | # 1 2 3 4 5 6 7 8 168 | # @@4 (UNKNOWN) ==> 6 (ceb179a.localdomain) ==> 8 (UNKNOWN)@@ 169 | # XXX update later - really ugly 170 | function remove_bad_nodes_from_set() 171 | { 172 | local crunch_out=`$1 $tmp_base/$SMGC_OUT_FN` 173 | local bad_set=`echo -e "$crunch_out" | egrep "$BAD_SET_REGEXP"` 174 | # at this point we have the entire string, but we really only want the ranks 175 | local bad_ranks=`echo $bad_set | cut -d ' ' -f 1,4,7` 176 | # some more string prep - remove the '@@' 177 | bad_ranks=`echo $bad_ranks | sed 's/@@//g'` 178 | # now we have something like: 4 6 8 179 | local rone=`echo $bad_ranks | cut -d ' ' -f 1` 180 | local rtwo=`echo $bad_ranks | cut -d ' ' -f 2` 181 | local rthree=`echo $bad_ranks | cut -d ' ' -f 3` 182 | # ranks are base zero, so adjust for that 183 | (( rone+=1 )) 184 | (( rtwo+=1 )) 185 | (( rthree+=1 )) 186 | # get the node names - the rank will correspond to the line number 187 | local rone_name=`sed -n "${rone},${rone}p" "$tmp_base/$SUBSET_NODES_FN"` 188 | local rtwo_name=`sed -n "${rtwo},${rtwo}p" "$tmp_base/$SUBSET_NODES_FN"` 189 | local rthree_name=`sed -n "${rthree},${rthree}p" "$tmp_base/$SUBSET_NODES_FN"` 190 | # back to base zero for the user report 191 | (( rone-=1 )) 192 | (( rtwo-=1 )) 193 | (( rthree-=1 )) 194 | 195 | echo "### removing the following \"bad\" ranks:" 196 | echo "# rank $rone on $rone_name" 197 | echo "# rank $rtwo on $rtwo_name" 198 | echo "# rank $rthree on $rthree_name" 199 | 200 | # update the bad nodes list 201 | echo -e "$rone_name\n$rtwo_name\n$rthree_name" | \ 202 | sort -n | uniq >> "$tmp_base/$BAD_NODE_LIST_FN" 203 | if [[ $? != 0 ]]; then 204 | echo "failure writing bad node list." 205 | exit 1; 206 | fi 207 | local log1="$rone_name could not talk to $rtwo_name in $msg_timeout seconds." 208 | local log2="$rtwo_name could not talk to $rthree_name in $msg_timeout seconds." 209 | log_funk "$log1" "$log2" 210 | echo "$log1" >> "$tmp_base/$BAD_NODE_TRIPLETS" 211 | echo "$log2" >> "$tmp_base/$BAD_NODE_TRIPLETS" 212 | # now update the subset of nodes to run on 213 | grep -v -f "$tmp_base/$BAD_NODE_LIST_FN" "$tmp_base/$ALL_NODES_FN" > \ 214 | "$tmp_base/$SUBSET_NODES_FN" 215 | if [[ $? != 0 ]]; then 216 | echo "failure writing new node set" 217 | exit 1; 218 | fi 219 | # sleep a little to make sure the new file is written 220 | sleep 1 221 | } 222 | 223 | ################################################################################ 224 | function have_enough_pes() 225 | { 226 | local numpes=`wc -l "$tmp_base/$SUBSET_NODES_FN" | cut -d ' ' -f 1` 227 | if [[ "$numpes" -ge "$1" ]]; then 228 | echo 1 229 | else 230 | echo 0 231 | fi 232 | } 233 | 234 | ################################################################################ 235 | function log_funk() 236 | { 237 | logger $start_date_and_time $jobid -t $PROG_NAME -s -- "$1" 238 | logger $start_date_and_time $jobid -t $PROG_NAME -s -- "$2" 239 | } 240 | 241 | ################################################################################ 242 | function note_bad_nodes() 243 | { 244 | if [[ -f "$tmp_base/$BAD_NODE_TRIPLETS" ]]; then 245 | local trips=`cat "$tmp_base/$BAD_NODE_TRIPLETS"` 246 | echo "##############################################################" 247 | echo "### \"bad node\" list ###" 248 | echo "##############################################################" 249 | echo -e "$trips" 250 | echo "##############################################################" 251 | fi 252 | } 253 | 254 | ################################################################################ 255 | function find_good_node_set() 256 | { 257 | local run_status=0 258 | run_and_pray "$1" "$2" "$3" 259 | run_status=$? 260 | # iterate until we find a good set or run into the minpe limit 261 | while [[ $run_status == 1 ]]; do 262 | # cleanup a little before we try again... 263 | cleanup_failed_run 264 | # we don't want to overwhelm the system, so take a little break 265 | echo "### waiting $TIME_BETWEEN_RUNS s before next test..." 266 | sleep $TIME_BETWEEN_RUNS 267 | remove_bad_nodes_from_set "$2" 268 | echo "### trying another set..." 269 | run_and_pray "$1" "$2" "$3" 270 | run_status=$? 271 | done 272 | note_bad_nodes 273 | } 274 | 275 | ################################################################################ 276 | function have_jobid() 277 | { 278 | if [[ "x$PBS_JOBID" == "x" ]]; then 279 | echo 0 280 | else 281 | echo 1 282 | fi 283 | } 284 | 285 | ################################################################################ 286 | # assuming sanity has already passed 287 | function get_total_numpe() 288 | { 289 | echo `wc -l $PBS_NODEFILE | cut -d ' ' -f 1` 290 | } 291 | 292 | ################################################################################ 293 | function init() 294 | { 295 | start_date_and_time=`date` 296 | # set some app params 297 | kill_timeout="$4" 298 | smgc_msg_timeout="$7" 299 | 300 | if [[ `have_jobid` -eq 1 ]]; then 301 | jobid="$PBS_JOBID" 302 | else 303 | echo "cannot determine job id." 304 | exit 1; 305 | fi 306 | 307 | tmp_base=`mktemp -d -t SMGCXXXX` 308 | local total_numpe=$(get_total_numpe) 309 | 310 | sanity "$1" "$2" "$3" "$5" 311 | 312 | echo 313 | echo "##############################################################" 314 | echo "### cross your fingers and grab a cup of coffee ###" 315 | echo "### this may take a while ###" 316 | echo "### ###" 317 | echo "### please ignore random output ###" 318 | echo "##############################################################" 319 | echo 320 | echo "### $PROG_NAME $PROG_VER" 321 | echo "### start: $start_date_and_time" 322 | echo "### job id: $jobid" 323 | echo "### communication timeout: $smgc_msg_timeout s" 324 | echo "### single test timeout: $kill_timeout s" 325 | echo "### temp directory: $tmp_base" 326 | echo "### starting numpe: $total_numpe" 327 | echo "### minpes: $3" 328 | echo "### runpes: $5" 329 | echo 330 | 331 | # save a copy of the full list of nodes and start with a full set 332 | cp $PBS_NODEFILE "$tmp_base/$ALL_NODES_FN" && \ 333 | cp "$tmp_base/$ALL_NODES_FN" "$tmp_base/$SUBSET_NODES_FN" 334 | if [[ $? != 0 ]]; then 335 | echo "cp failure!" 336 | exit 1; 337 | else 338 | # add cleanup code below 339 | add_on_exit rm -f "$tmp_base/$ALL_NODES_FN" 340 | add_on_exit rm -f "$tmp_base/$SUBSET_NODES_FN" 341 | add_on_exit rm -f "$tmp_base/$SMGC_OUT_FN" 342 | add_on_exit rm -f "$tmp_base/$BAD_NODE_LIST_FN" 343 | add_on_exit rm -f "$tmp_base/$BAD_NODE_TRIPLETS" 344 | add_on_exit rmdir "$tmp_base" 345 | add_on_exit killall -q -9 mpirun 346 | add_on_exit killall -q -9 supermagic 347 | # XXX big hammer - fix later 348 | add_on_exit killall -q sleep 349 | fi 350 | } 351 | 352 | ################################################################################ 353 | # $1: path to supermagic 354 | # $2: path to crunch-hang 355 | # will exit with status 1 if app/env prerequisites are not met. 356 | function sanity() 357 | { 358 | local the_base=$(basename "$1") 359 | # check for mpirun 360 | type -p "mpirun" 2>&1 > /dev/null 361 | local tstat=$? 362 | if [[ $tstat -eq 1 ]]; then 363 | echo "mpirun not found. cannot continue." 364 | exit 1; 365 | fi 366 | # check for supermagic - at some point provide better detection 367 | if [[ "$the_base" == "supermagic" ]]; then 368 | # okay, the name is fine, but can we exec the thing? 369 | if [[ ! -x "$1" ]]; then 370 | echo "cannot continue: cannot execute "$1"" 371 | exit 1; 372 | fi 373 | else 374 | echo -e "could not find supermagic:\n"$1" is not supermagic." 375 | exit 1; 376 | fi 377 | # check crunch-hang 378 | if [[ -x "$2" ]]; then 379 | local out_str=`"$2" --version` 380 | local cname=`echo "$out_str" | cut -f 1 -d ' '` 381 | if [[ "x$cname" != "x$CRUNCH_NAME" ]]; then 382 | echo -e "valid $CRUNCH_NAME not found:\n"$2" cannot be used." 383 | exit 1; 384 | fi 385 | else 386 | echo -e "could not execute crunch-hang:\n"$2"." 387 | exit 1; 388 | fi 389 | # is PBS_NODEFILE set? we need this to construct a node list. 390 | if [[ "x$PBS_NODEFILE" == "x" ]]; then 391 | echo "PBS_NODEFILE not set. this environment used in "$PROG_NAME"." 392 | exit 1; 393 | fi 394 | # do we have sane pe values 395 | if [[ ! $3 -ge $4 ]]; then 396 | echo "MIN PES must be greater than or equal to JOB PES." 397 | exit 1; 398 | fi 399 | } 400 | 401 | ################################################################################ 402 | for i in $*; do 403 | case $1 in 404 | -h|--help) 405 | usage 406 | exit 0; 407 | ;; 408 | -v|--version) 409 | echo "$PROG_VER" 410 | exit 0; 411 | ;; 412 | --) 413 | shift 414 | break 415 | ;; 416 | -?|--?) 417 | echo "unknown option: \"$i\"" 418 | exit 1; 419 | ;; 420 | esac 421 | done 422 | 423 | if [[ $# != 7 ]]; then 424 | usage 425 | exit 1; 426 | else 427 | smgc_path="$1" 428 | msg_timeout="$2" 429 | crunch_path="$3" 430 | min_pes="$4" 431 | timeout="$5" 432 | user_pes="$6" 433 | user_app="$7" 434 | 435 | init "$smgc_path" "$crunch_path" "$min_pes" "$timeout" "$user_pes" "$user_app" "$msg_timeout" 436 | # if we are here, let the games begin... 437 | find_good_node_set "$smgc_path" "$crunch_path" "$min_pes" 438 | # now run your command 439 | zcmd="mpirun -n $user_pes --hostfile "$tmp_base/$SUBSET_NODES_FN" $user_app" 440 | echo 441 | echo "##############################################################" 442 | echo "### about to start your application ###" 443 | echo "##############################################################" 444 | echo 445 | echo "$zcmd" 446 | echo 447 | eval $zcmd 448 | exit $?; 449 | fi 450 | -------------------------------------------------------------------------------- /util/rr-run-job: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ################################################################################ 4 | # Copyright (c) 2012 Los Alamos National Security, LLC. 5 | # All rights reserved. 6 | # 7 | # This program was prepared by Los Alamos National Security, LLC at Los Alamos 8 | # National Laboratory (LANL) under contract No. DE-AC52-06NA25396 with the U.S. 9 | # Department of Energy (DOE). All rights in the program are reserved by the DOE 10 | # and Los Alamos National Security, LLC. Permission is granted to the public to 11 | # copy and use this software without charge, provided that this Notice and any 12 | # statement of authorship are reproduced on all copies. Neither the U.S. 13 | # Government nor LANS makes any warranty, express or implied, or assumes any 14 | # liability or responsibility for the use of this software. 15 | ################################################################################ 16 | 17 | # run your job from here 18 | 19 | ################################################################################ 20 | ################################################################################ 21 | # please update the following to meet your needs # 22 | ################################################################################ 23 | ################################################################################ 24 | 25 | # the minimum number of pes that your job requires 26 | declare -r MINNUMPE=60 27 | # timeout for a single mpi launch and test run (in seconds). 28 | # increase this value as needed for larger jobs 29 | declare -r TEST_TIMEOUT=600 30 | # mpi messaging timeout (s) 31 | declare -r MSG_TIMEOUT=2 32 | # full path to base directory of supermagic utilities 33 | declare -r SMGC_BASE="/users/samuel/supermagic" 34 | # full path to supermagic binary 35 | declare -r SMGC="$SMGC_BASE/src/supermagic" 36 | # full path to crunch-hang script 37 | declare -r CRUNCH="$SMGC_BASE/crunch-hang" 38 | # full path to rr-job-prep 39 | declare -r JOB_PREP="$SMGC_BASE/rr-job-prep" 40 | # number of pes to use when running your app 41 | declare -r MY_RUN_PES=4 42 | # update with your run command 43 | declare -r MY_RUN_CMD="$SMGC -a" 44 | 45 | $JOB_PREP $SMGC $MSG_TIMEOUT $CRUNCH $MINNUMPE $TEST_TIMEOUT "$MY_RUN_PES" "$MY_RUN_CMD" 46 | 47 | exit $? 48 | --------------------------------------------------------------------------------