├── .github ├── CONTRIBUTING.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .gitmodules ├── .travis.yml ├── CMakeLists.txt ├── LICENSE.txt ├── README.md ├── conf └── travis-install-mpi.sh ├── core ├── CMakeLists.txt ├── ROSSConfig.cmake ├── avl_tree.c ├── avl_tree.h ├── buddy.c ├── buddy.h ├── clock │ ├── aarch64.c │ ├── aarch64.h │ ├── amd64.c │ ├── amd64.h │ ├── armv7l.c │ ├── armv7l.h │ ├── bgl.c │ ├── bgl.h │ ├── bgq.c │ ├── bgq.h │ ├── gtod.c │ ├── gtod.h │ ├── i386.c │ ├── i386.h │ ├── ia64.c │ ├── ia64.h │ ├── ppc.c │ ├── ppc.h │ ├── ppc64le.c │ └── ppc64le.h ├── cmake │ ├── Coveralls.cmake │ ├── CoverallsClear.cmake │ ├── CoverallsGenerateGcov.cmake │ ├── GetGitRevisionDescription.LICENSE_1_0.txt │ ├── GetGitRevisionDescription.cmake │ ├── GetGitRevisionDescription.cmake.in │ └── SetupMPI.cmake ├── config.h.in ├── gvt │ ├── 7oclock.c.old │ ├── 7oclock.h.old │ ├── mpi_allreduce.c │ └── mpi_allreduce.h ├── hash-quadratic.c ├── hash-quadratic.h ├── instrumentation │ ├── ross-lps │ │ ├── analysis-lp.c │ │ ├── analysis-lp.h │ │ └── specialized-lps.c │ ├── st-event-trace.c │ ├── st-instrumentation.c │ ├── st-instrumentation.h │ ├── st-model-data.c │ ├── st-sim-engine.c │ └── st-stats-buffer.c ├── lz4.c ├── lz4.h ├── network-mpi.c ├── network-mpi.h ├── queue │ ├── calendar.c.old │ ├── heap.c.old │ ├── kp_splay.c.old │ ├── splay.c │ └── tw-queue.h ├── rand-clcg4.c ├── rand-clcg4.h ├── rio │ ├── README.md │ ├── io-mpi.c │ ├── io-serialize.c │ └── io.h ├── ross-config.in ├── ross-extern.h ├── ross-global.c ├── ross-gvt.h ├── ross-inline.h ├── ross-kernel-inline.h ├── ross-random.c ├── ross-random.h ├── ross-types.h ├── ross.h ├── ross.pc.in ├── tw-event.c ├── tw-eventq.h ├── tw-kp.c ├── tw-lp.c ├── tw-opts.c ├── tw-opts.h ├── tw-pe.c ├── tw-sched.c ├── tw-setup.c ├── tw-state.c ├── tw-stats.c ├── tw-timing.c ├── tw-timing.h └── tw-util.c ├── docs ├── CMakeLists.txt ├── Doxyfile.user.in └── header.html └── models ├── CMakeLists.txt ├── README.md └── phold ├── CMakeLists.txt ├── phold.c └── phold.h /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | There are many ways to contribute to ROSS: 4 | 5 | - Create and release a model. 6 | Like any simulation engine, ROSS is always looking for new models and new model developers. 7 | This is also the best way to learn about ROSS and its API. 8 | - File a bug or request a feature through [GitHub Issues](http://github.com/ROSS-org/ROSS/issues). 9 | We are always looking to improve ROSS to make it more stable for our users. 10 | Feature requests and related discussions are located here as well. 11 | - The best way to ensure a bug or feature request is addressed is to do it yourself! 12 | Spelunking through the ROSS core can be a enlightening journey. 13 | Once you've made the change, feel free to create a [pull request](https://github.com/ROSS-org/ROSS/pulls). 14 | Between our continuous integration testing and our experienced ROSS core team, we will ensure your change is safe before deploying it to the master branch. 15 | 16 | ## Small Changes 17 | 18 | Development on the ROSS core is done through [GitHub Pull Requests](https://help.github.com/articles/using-pull-requests/). 19 | We always welcome small-change contributions to ROSS, including: 20 | 21 | - clarification of error/warning messages 22 | - bug fixes (hopefully there aren't any bugs to begin with!) 23 | - whitespace or code-style changes 24 | - other straight-forward changes that do not have wide-reaching consequences 25 | 26 | ## Major Changes and Features 27 | 28 | ROSS is being continually developed and we are frequently adding new features. 29 | For these larger changes of ROSS, there are a few boxes that must be checked before any pull request is merged into the master branch. 30 | 31 | 1. Ensure current tests pass 32 | 2. Ensure coverage increases 33 | 3. Ensure dependent projects are updated (needed for API changes) 34 | 4. Document the change though a blog post 35 | 36 | ### Continuous Integration Testing and Coverage 37 | 38 | First, the new feature or major change must pass all of the existing TravisCI tests. 39 | 40 | Next, the test coverage must increase (or at least stay the same). 41 | For new features, this usually means that a new test must written. 42 | There are typically two options for a test: 43 | - Add a new test to PHOLD model (see [models/phold/CMakeLists.txt](https://github.com/ROSS-org/ROSS/blob/master/models/phold/CMakeLists.txt)). 44 | - Create a new model which tests your feature and add this model to the ROSS-Models repository. 45 | 46 | ### ROSS Model Changes 47 | 48 | The [ROSS-Models repository](http://github.com/ROSS-org/ROSS-Models) contains models which are no longer under development. 49 | If your new feature is a major API change to ROSS, the models in this repository must be updated. 50 | The workflow to update the ROSS-Models submodule is as follows: 51 | 52 | 1. In your feature branch of ROSS, load the submodules 53 | ``` 54 | git submodule init 55 | git submodule update 56 | ``` 57 | 2. Move into the `models/ROSS-Models/` directory. 58 | Make the appropriate API changes and commit them using ROSS. 59 | 3. While within this directory, upload these changes to GitHub using the typical `git push origin master` command. 60 | 4. Move back up to the base ROSS directory. 61 | You should see the changed commit hash for the ROSS-Models submodule when you run a `git status`. 62 | Commit this change in hash number using `git commit -am "updated ROSS-Models"`. 63 | 64 | ### CODES 65 | 66 | The [CODES Project](http://press3.mcs.anl.gov/codes/) is actively developed and depends on ROSS as its underlying simulation engine. 67 | The CODES repository can be found [here](https://xgitlab.cels.anl.gov). 68 | You should be able to login in to ANL's GitLab service. 69 | Here you can fork the CODES repository and create a pull request with any required changes. 70 | 71 | ### Documentation 72 | 73 | In order to keep our documentation up-to-date, any new feature or major change must be documented before it is merged into the master branch. 74 | The easiest way to document the change is to create a new blog post for our website. 75 | The [website contributing guide](https://github.com/ROSS-org/ross-org.github.io/blob/master/CONTRIBUTING.md) documents this process. 76 | 77 | ## Versioning and New Releases 78 | 79 | ROSS does not utilize a numbered-version system. 80 | Instead, each commit on the master branch represents a change in ROSS. 81 | Thus, each commit hash can be used as a version number that we guarantee will never change. 82 | 83 | To achieve the eternal validity of a commit hash, we utilize squash commits to merge any changes. 84 | All merges into the master branch should be made through the GitHub pull request interface. 85 | Through this interface, the merge can be squashed. 86 | Squash commits have several implications: 87 | 88 | 1. *The squash-on-merge option must be selected within the GitHub interface by the person doing the merge.* 89 | 1. The individual commits are not placed in the history of the master branch. 90 | However, they do remain available through the pull request page. 91 | 2. One positive outcome is that the blame on any file will be simplified since there is now only one commit associated with the entire change. 92 | 3. Once a feature branch is merged into master, it should be **deleted from any local repositories**. 93 | There are possible issues if someone attempts to re-merge the branch (including commits previously added in a squash). 94 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **add your comments here** 2 | 3 | --- 4 | 5 | If this merge represents a feature addition to ROSS, the following items must be completed before the branch will be merged: 6 | 7 | - [ ] Document the feature on the blog (See the [website Contributing guide](https://github.com/ROSS-org/ross-org.github.io/blob/master/CONTRIBUTING.md)). 8 | Include a link to your blog post in the Pull Request. 9 | - [ ] Builds should cleanly compile with -Wall and -Wextra. 10 | - [ ] One or more TravisCI tests should be created (and they should pass) 11 | - [ ] Through the TravisCI tests, coverage should increase 12 | - [ ] Test with CODES to ensure everything continues to work 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore models that aren't already in tree 2 | # (must manually add models to override this) 3 | models/* 4 | *.swp 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "risa"] 2 | path = core/risa 3 | url = https://github.com/ROSS-org/RISA 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: required 3 | language: cpp 4 | compiler: 5 | - clang 6 | # Change this to your needs 7 | notifications: 8 | email: 9 | on_success: change 10 | on_failure: always 11 | env: 12 | - secure: "BbB1KVY0Yb6DJwxdfFDF1PJwSx9euNfNX94oDKftiH8LE0nEzfS6xZc2sBkWTWOThHml9ttBkDIx/NhxEThOjyVcX6uv4kibP6moV5EqxqC+kLoZSEZnVuAdTJfGRKBdzmRp66R5a/GiMzzz/F3+smdVFMb6XR06sPQa5TQZjEc=" 13 | git: 14 | submodules: false 15 | before_install: 16 | - sudo apt-add-repository -y ppa:libreoffice/libreoffice-4-2 17 | - sudo apt-get update -q 18 | - sudo apt-get install -y -qq lcov curl doxygen graphviz 19 | - lscpu 20 | - CLOCK_SPEED=`lscpu | grep "MHz" | awk '{print $3*1000*1000}'` 21 | - echo $CLOCK_SPEED 22 | - sh ./conf/travis-install-mpi.sh mpich2 23 | - mpirun --version 24 | script: 25 | - mkdir cxx-build && cd cxx-build 26 | - cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-Wall -Wextra" .. 27 | - export MPICH_CC=clang++ 28 | - make 29 | - unset MPICH_CC 30 | - cd .. 31 | - mkdir release && cd release 32 | - MPICH_CC=clang cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-Wall -Wextra" -DROSS_BUILD_MODELS=ON .. 33 | - make 34 | - CTEST_OUTPUT_ON_FAILURE=1 make test 35 | - cd .. 36 | - mkdir build-gtod && cd build-gtod 37 | - MPICH_CC=clag cmake -DCOVERALLS=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-Wall -Wextra" -DROSS_BUILD_MODELS=ON -DROSS_CLOCK_OVERRIDE=ON .. 38 | - make 39 | - CTEST_OUTPUT_ON_FAILURE=1 ctest -R SCHED 40 | - make coveralls 41 | - cd .. 42 | - mkdir build && cd build 43 | - MPICH_CC=clang cmake -DCOVERALLS=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-Wall -Wextra" -DROSS_BUILD_MODELS=ON .. 44 | - make 45 | - CTEST_OUTPUT_ON_FAILURE=1 make test 46 | - make coveralls 47 | - cd .. 48 | - mkdir build2 && cd build2 49 | - MPICH_CC=clang cmake -DAVL_TREE=OFF -DCOVERALLS=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-Wall -Wextra" -DROSS_BUILD_MODELS=ON .. 50 | - make 51 | - CTEST_OUTPUT_ON_FAILURE=1 make test 52 | - make coveralls 53 | branches: 54 | only: 55 | - master 56 | - develop 57 | after_success: 58 | - bash <(curl -s https://codecov.io/bash) 59 | - ## the following automatically builds the doxygen 60 | - ## documentation and pushes it to the gh_pages branch 61 | - ## Shamelessly stolen from http://bit.ly/1H1sawW 62 | - 63 | - # First, set up credentials using the environment variables 64 | - # GIT_NAME, GIT_EMAIL and GH_TOKEN. These were passed 65 | - # encrypted to travis and should have been decrypted 66 | - # using travis' private key before this script was run. 67 | - git config --global user.name "ROSS bot" 68 | - git config --global user.email ross.gh.robot@gmail.com 69 | - 70 | - # clone the whole repo again, but switch to gh_pages branch 71 | - git clone -b master --single-branch https://github.com/ross-org/ross-org.github.io 72 | - cd ross-org.github.io 73 | - git clone -b master --single-branch https://github.com/ross-org/ROSS 74 | - cd ROSS 75 | - mkdir build && cd build 76 | - cmake -DROSS_BUILD_DOXYGEN=ON -DDOXYGEN_CALLER_GRAPHS=ON -DDOXYGEN_CALL_GRAPHS=ON .. 77 | - make apidoc 78 | - cd ../.. 79 | - git rm -r ROSS-docs 80 | - mkdir -p ROSS-docs/docs 81 | - mv ROSS/build/docs/html ROSS-docs/docs 82 | - git add ROSS-docs 83 | - git commit -m "Automatic doxygen build." 84 | - git push https://${GH_TOKEN}@github.com/ROSS-org/ross-org.github.io master 85 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | PROJECT(ROSS_TOP C) 2 | CMAKE_MINIMUM_REQUIRED(VERSION 3.5) 3 | 4 | SET(CMAKE_POSITION_INDEPENDENT_CODE ON) 5 | 6 | # ROSS Configuration Options 7 | 8 | ENABLE_TESTING() 9 | INCLUDE(CTest) 10 | 11 | LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/core/cmake/") 12 | 13 | # Follow section based on Spack doc: 14 | # https://spack.readthedocs.io/en/latest/workflows.html#write-the-cmake-build 15 | # enable @rpath in the install name for any shared library being built 16 | # note: it is planned that a future version of CMake will enable this by default 17 | set(CMAKE_MACOSX_RPATH 1) 18 | 19 | # Always use full RPATH 20 | # http://www.cmake.org/Wiki/CMake_RPATH_handling 21 | # http://www.kitware.com/blog/home/post/510 22 | 23 | # use, i.e. don't skip the full RPATH for the build tree 24 | SET(CMAKE_SKIP_BUILD_RPATH FALSE) 25 | 26 | # when building, don't use the install RPATH already 27 | # (but later on when installing) 28 | SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) 29 | 30 | # add the automatically determined parts of the RPATH 31 | # which point to directories outside the build tree to the install RPATH 32 | SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) 33 | 34 | # the RPATH to be used when installing, but only if it's not a system directory 35 | LIST(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) 36 | IF("${isSystemDir}" STREQUAL "-1") 37 | SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") 38 | ENDIF("${isSystemDir}" STREQUAL "-1") 39 | 40 | # end of spack-related addition 41 | 42 | # We probably don't want this to run on every build. 43 | option(COVERALLS "Generate coveralls data" OFF) 44 | 45 | if (COVERALLS) 46 | include(Coveralls) 47 | coveralls_turn_on_coverage() 48 | endif() 49 | 50 | # Priority Queue Implementation 51 | SET(QUEUE splay) 52 | # Other queue implementations are no longer supported. 53 | # SET(QUEUE splay CACHE STRING "Queue type chosen by the user at configure time") 54 | # SET_PROPERTY(CACHE QUEUE PROPERTY STRINGS splay calendar heap kp_splay) 55 | 56 | # Random Library 57 | SET(RAND clcg4) 58 | 59 | # Network option 60 | # tcp option removed 61 | SET(NETWORK mpi) 62 | 63 | # GVT algorithm option 64 | #SET(GVT 7oclock) 65 | SET(GVT mpi_allreduce) 66 | 67 | # Architecture setting and management 68 | SET(VALID_ARCH NO) 69 | OPTION(ROSS_CLOCK_OVERRIDE "override platform detection to use gtod clock" NO) 70 | 71 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386) 72 | SET(VALID_ARCH YES) 73 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall") 74 | ADD_DEFINITIONS(-D_GNU_SOURCE) 75 | SET(CLOCK i386) 76 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386) 77 | 78 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgl) 79 | SET(VALID_ARCH YES) 80 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -qflag=i:i -qattr=full -O5") 81 | SET(OPTIONS "-qtune=440 -qarch=440d") 82 | SET(CLOCK bgl) 83 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgl) 84 | 85 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgp) 86 | SET(VALID_ARCH YES) 87 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -qflag=i:i -qattr=full -O5") 88 | SET(OPTIONS "-qtune=450 -qarch=450d") 89 | SET(CLOCK bgl) 90 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgp) 91 | 92 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgq) 93 | SET(VALID_ARCH YES) 94 | OPTION(USE_BGPM "Blue Gene/Q specific Performance Counters" OFF) 95 | SET(BGPM_INCLUDE /bgsys/drivers/ppcfloor/bgpm/include/) 96 | ADD_LIBRARY(imp_bgpm STATIC IMPORTED) 97 | SET_PROPERTY(TARGET imp_bgpm PROPERTY IMPORTED_LOCATION /bgsys/drivers/ppcfloor/bgpm/lib/libbgpm.a) 98 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${BGPM_INCLUDE} -O5 -qstrict -qprefetch=aggressive -qarch=qp -qtune=qp -qmaxmem=-1 -qsimd=noauto -qreport -qhot") 99 | SET(CLOCK bgq) 100 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgq) 101 | 102 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL ppc64) 103 | SET(VALID_ARCH YES) 104 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall") 105 | ADD_DEFINITIONS(-D_GNU_SOURCE) 106 | SET(CLOCK ppc) 107 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL ppc64) 108 | 109 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL ppc64le) 110 | SET(VALID_ARCH YES) 111 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O5 -qprefetch=aggressive -qarch=pwr9 -qtune=auto -qmaxmem=-1 -qsimd=noauto -qhot") 112 | SET(CLOCK ppc64le) 113 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL ppc64le) 114 | 115 | IF((${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64) OR (${CMAKE_SYSTEM_PROCESSOR} STREQUAL amd64)) 116 | SET(VALID_ARCH YES) 117 | SET(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}") 118 | ADD_DEFINITIONS(-D_GNU_SOURCE) 119 | SET(CLOCK amd64) 120 | ENDIF((${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64) OR (${CMAKE_SYSTEM_PROCESSOR} STREQUAL amd64)) 121 | 122 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL aarch64) 123 | SET(VALID_ARCH YES) 124 | SET(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}") 125 | ADD_DEFINITIONS(-D_GNU_SOURCE) 126 | SET(CLOCK aarch64) 127 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL aarch64) 128 | 129 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL armv7l) 130 | SET(VALID_ARCH YES) 131 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") 132 | ADD_DEFINITIONS(-D_GNU_SOURCE) 133 | SET(CLOCK armv7l) 134 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL armv7l) 135 | 136 | IF(VALID_ARCH AND NOT ${ROSS_CLOCK_OVERRIDE}) 137 | MESSAGE(STATUS "System architecture detected: ${CMAKE_SYSTEM_PROCESSOR}") 138 | MESSAGE(STATUS "Using C_FLAGS: ${CMAKE_C_FLAGS}") 139 | MESSAGE(STATUS "Using CLOCK: ${CLOCK}") 140 | ELSE(VALID_ARCH AND NOT ${ROSS_CLOCK_OVERRIDE}) 141 | MESSAGE(WARNING "System architecture not recognized!\n" 142 | "Found: ${CMAKE_SYSTEM_PROCESSOR}\n" 143 | "Falling back to get-time-of-day clock implementation.") 144 | SET(CLOCK gtod) 145 | ENDIF(VALID_ARCH AND NOT ${ROSS_CLOCK_OVERRIDE}) 146 | 147 | 148 | ## MPI 149 | INCLUDE(SetupMPI) 150 | IF(MPI_C_FOUND) 151 | INCLUDE_DIRECTORIES(${MPI_C_INCLUDE_PATH}) 152 | LIST(APPEND ROSS_EXTERNAL_LIBS ${MPI_C_LIBRARIES}) 153 | ELSE(MPI_C_FOUND) 154 | MESSAGE("WARNING: Could not find MPI!") 155 | MESSAGE(" Either add an MPI compiler to your path (using modules)") 156 | MESSAGE(" Or force CMake to build using the correct compiler (`export CC=mpicc`)") 157 | ENDIF(MPI_C_FOUND) 158 | 159 | # ROSS Core code 160 | ADD_SUBDIRECTORY(core) 161 | 162 | # ROSS Models directory 163 | OPTION(ROSS_BUILD_MODELS "Build ROSS models?" OFF) 164 | IF(ROSS_BUILD_MODELS) 165 | ADD_SUBDIRECTORY(models) 166 | ENDIF(ROSS_BUILD_MODELS) 167 | 168 | # ROSS Documentation 169 | FIND_PACKAGE(Doxygen) 170 | IF(DOXYGEN_FOUND) 171 | OPTION(ROSS_BUILD_DOXYGEN "Build Doxygen documentation?" OFF) 172 | IF(ROSS_BUILD_DOXYGEN) 173 | ADD_SUBDIRECTORY(docs) 174 | ENDIF(ROSS_BUILD_DOXYGEN) 175 | ENDIF(DOXYGEN_FOUND) 176 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Rensselaer Polytechnic Institute 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | Neither the name of Rensselaer Polytechnic Institute nor the names 16 | of its contributors may be used to endorse or promote products 17 | derived from this software without specific prior written 18 | permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Welcome to Simplified ROSS! 2 | 3 | Welcome to a leaner, meaner, *faster* version of ROSS. 4 | While the entire history of ROSS has been preserved in this repository, a major change in the directory structure has made getting the full history of a file somewhat of a pain. 5 | You may find the now-deprecated version at the [ROSS-Legacy tag](https://github.com/ROSS-org/ROSS/releases/tag/Legacy) in this repository. 6 | Using this repository you can compare files from the new `ROSS/core` to `ROSS/ross`. 7 | For a detailed list of changes between old ROSS and SR please visit [the wiki](https://github.com/ROSS-org/ROSS/wiki/Differences-between-Simplified-ROSS-and-ROSS-Legacy). 8 | 9 | For the most recent docs and other important posts about ROSS, see the [ROSS webpage](http://ross-org.github.io). 10 | 11 | [![Build Status](https://travis-ci.com/ROSS-org/ROSS.svg?branch=master)](https://travis-ci.com/ROSS-org/ROSS) 12 | [![codecov.io](http://codecov.io/github/ROSS-org/ROSS/coverage.svg?branch=master)](http://codecov.io/github/ROSS-org/ROSS?branch=master) 13 | [![Doxygen](https://img.shields.io/badge/doxygen-reference-blue.svg)](http://ross-org.github.io/ROSS-docs/docs/html) 14 | 15 | ## History 16 | 17 | ROSS's history starts with a one-week re-implementation of [Georgia Tech Time Warp (GTW)](http://www.cc.gatech.edu/computing/pads/tech-parallel-gtw.html) by Shawn Pearce and Dave Bauer in 1999. 18 | After 10 years of in-house development, version 5.0 of [Rensselaer's Optimistic Simulation System](http://sourceforge.net/projects/pdes/) went live at SourceForge.net. 19 | Thus the official version history began! 20 | 21 | Through the years ROSS has migrated from CVS, to SVN, to Git and GitHub.com. 22 | The code was maintained by Chris Carothers and his graduate students at RPI ([publications](http://cs.rpi.edu//~chrisc/#publications)). 23 | Over the years, several features (including a shared-memory version) were implemented within ROSS. 24 | Some of these features have since been optimized out, leaving behind cruft. 25 | 26 | In early 2015 a sleeker version of ROSS was released. 27 | Developed as Simplified ROSS ([gonsie/SR](http://github.com/gonsie/SR)), this version removed many files, functions, and variables that had become deprecated over time. 28 | 29 | ## Requirements 30 | 31 | 1. ROSS is written in C standard and thus requires a C compiler (C11 is prefered, but not required). 32 | 2. The build system is [CMake](http://cmake.org), and we require version 3.5 or higher. 33 | 3. ROSS relies on MPI. 34 | We recommend the [MPICH](http://www.mpich.org) implementation. 35 | 36 | ## Startup Instructions 37 | 38 | 1. Clone the repository to your local machine: 39 | ``` 40 | git clone -b master --single-branch git@github.com:ROSS-org/ROSS.git 41 | cd ROSS 42 | ``` 43 | Since the ROSS repostiory is quite large, it is recommended that you only clone the master branch. 44 | To speed up the clone command even more, use the `--depth=1` argument. 45 | 46 | 2. *Optional* Install the submodules: 47 | ``` 48 | git submodule init 49 | git submodule update 50 | ``` 51 | Currently, ROSS includes one submodule: 52 | - [RISA](https://github.com/ROSS-org/RISA) ROSS In Situ Analysis 53 | 54 | 3. *Optional* Symlink your model to ROSS. 55 | Please [this blog post](https://ross-org.github.io/setup/build-model-with-ross.html) for details about creating and integrating a model with ROSS. 56 | ``` 57 | ln -s ~/path-to/your-existing-model models/your-model-name 58 | ``` 59 | 60 | 4. Create a build directory. 61 | ROSS developers typically do out-of-tree builds. See the [Installation page](https://ross-org.github.io/setup/installation.html) for more details. 62 | ``` 63 | cd ~/directory-of-builds/ 64 | mkdir ROSS-build 65 | cd ROSS-build 66 | ccmake ~/path-to/ROSS 67 | ``` 68 | 69 | 5. Make your model(s) with one of the following commands 70 | ``` 71 | make -k // ignore errors from other models 72 | make -j 12 // parallel build 73 | make model-name // build only one model 74 | ``` 75 | 76 | 6. Run your model. 77 | See [this blog post](https://ross-org.github.io/setup/running-sim.html) for details about the ROSS command line options. 78 | ``` 79 | cd ~/directory-of-builds/ROSS-build/models/your-model 80 | ./your-model --synch=1 // sequential mode 81 | mpirun -np 2 ./your-model --synch=2 // conservative mode 82 | mpirun -np 2 ./your-model --synch=3 // optimistic mode 83 | ./your-model --synch=4 // optimistic debug mode (note: not a parallel execution!) 84 | ``` 85 | -------------------------------------------------------------------------------- /conf/travis-install-mpi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # this conf file is taken from the mpi4py project 3 | # http://mpi4py.scipy.org/ 4 | set -e 5 | case $1 in 6 | mpich1) set -x; 7 | sudo apt-get install -q gfortran mpich-shmem-bin libmpich-shmem1.0-dev;; 8 | mpich2) set -x; 9 | sudo apt-get install -q gfortran mpich2 libmpich2-dev;; 10 | mpich3) set -x; 11 | sudo apt-get install -q gfortran libcr0 default-jdk; 12 | wget -q http://www.cebacad.net/files/mpich/ubuntu/mpich-3.1/mpich_3.1-1ubuntu_amd64.deb; 13 | sudo dpkg -i ./mpich_3.1-1ubuntu_amd64.deb; 14 | rm -f ./mpich_3.1-1ubuntu_amd64.deb;; 15 | openmpi) set -x; 16 | sudo apt-get install -q gfortran openmpi-bin openmpi-common libopenmpi-dev;; 17 | *) 18 | echo "Unknown MPI implementation:" $1; exit 1;; 19 | esac 20 | -------------------------------------------------------------------------------- /core/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | PROJECT(ROSS C) 2 | INCLUDE_DIRECTORIES(${ROSS_SOURCE_DIR} ${ROSS_BINARY_DIR}) 3 | 4 | SET(ross_srcs 5 | 6 | queue/tw-queue.h 7 | queue/${QUEUE}.c 8 | 9 | ross-random.h 10 | ross-random.c 11 | rand-${RAND}.h 12 | rand-${RAND}.c 13 | 14 | clock/${CLOCK}.h 15 | clock/${CLOCK}.c 16 | 17 | ross-gvt.h 18 | gvt/${GVT}.h 19 | gvt/${GVT}.c 20 | 21 | network-${NETWORK}.h 22 | network-${NETWORK}.c 23 | 24 | hash-quadratic.h 25 | hash-quadratic.c 26 | 27 | buddy.h 28 | buddy.c 29 | lz4.h 30 | lz4.c 31 | tw-state.c 32 | 33 | ross-extern.h 34 | ross-global.c 35 | 36 | ross-inline.h 37 | ross-kernel-inline.h 38 | ross.h 39 | ross-types.h 40 | 41 | tw-eventq.h 42 | tw-event.c 43 | 44 | tw-opts.h 45 | tw-opts.c 46 | 47 | tw-pe.c 48 | tw-kp.c 49 | tw-lp.c 50 | 51 | tw-timing.h 52 | tw-timing.c 53 | 54 | tw-sched.c 55 | tw-setup.c 56 | tw-stats.c 57 | tw-util.c 58 | 59 | instrumentation/st-instrumentation.h 60 | instrumentation/st-instrumentation.c 61 | instrumentation/st-stats-buffer.c 62 | instrumentation/st-sim-engine.c 63 | instrumentation/st-event-trace.c 64 | instrumentation/st-model-data.c 65 | instrumentation/ross-lps/analysis-lp.h 66 | instrumentation/ross-lps/analysis-lp.c 67 | instrumentation/ross-lps/specialized-lps.c) 68 | 69 | # ROSS VERSION INFORMATION 70 | ## Print Date and Time at top of ROSS output 71 | INCLUDE (CheckFunctionExists) 72 | CHECK_FUNCTION_EXISTS(ctime HAVE_CTIME) 73 | 74 | ## Print ROSS Git Hash 75 | # From http://stackoverflow.com/questions/1435953/how-can-i-pass-git-sha1-to-compiler-as-definition-using-cmake 76 | # Now following this approach (which is based on the previous): 77 | # http://ipenguin.ws/2012/11/cmake-automatically-use-git-tags-as.html 78 | # This way lets us use the actual version numbers of ROSS, instead of the git commit 79 | LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") 80 | INCLUDE(GetGitRevisionDescription) 81 | #GET_GIT_HEAD_REVISION(GIT_REFSPEC GIT_SHA1) 82 | 83 | # changed to look at the working tree and not the latest commit hash -- allows for use of --dirty. 84 | git_describe_working_tree(VERSION --tags --dirty) 85 | message(STATUS "ROSS VERSION=${VERSION}") 86 | 87 | #parse the version information into pieces. 88 | string(REGEX REPLACE "^v([0-9]+)\\..*" "\\1" VERSION_MAJOR "${VERSION}") 89 | string(REGEX REPLACE "^v[0-9]+\\.([0-9]+).*" "\\1" VERSION_MINOR "${VERSION}") 90 | string(REGEX REPLACE "^v[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" VERSION_PATCH "${VERSION}") 91 | string(REGEX REPLACE "^v[0-9]+\\.[0-9]+\\.[0-9]+(.*)" "\\1" VERSION_SHA1 "${VERSION}") 92 | # VERSION_SHORT used in the ross.pc file 93 | set(VERSION_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}") 94 | 95 | 96 | # Data Structure for remote Events 97 | # If AVL_TREE is OFF, ROSS reverts to hashing 98 | OPTION(AVL_TREE "Use AVL trees for optimistic mode events? (hash tables otherwise)" ON) 99 | IF(AVL_TREE) 100 | SET(ross_srcs ${ross_srcs} avl_tree.h avl_tree.c) 101 | ENDIF(AVL_TREE) 102 | 103 | # RIO: Restart IO 104 | OPTION(USE_RIO "Enable RIO checkpointing library?" OFF) 105 | IF(USE_RIO) 106 | SET(ross_srcs ${ross_srcs} rio/io-mpi.c rio/io-serialize.c) 107 | INCLUDE_DIRECTORIES(rio) 108 | ENDIF(USE_RIO) 109 | 110 | # Damaris I/O and data management 111 | OPTION(USE_DAMARIS "Build with Damaris library (for in situ vis/analysis)?" OFF) 112 | if (USE_DAMARIS) 113 | ADD_SUBDIRECTORY(risa) 114 | INCLUDE_DIRECTORIES(${DAMARIS_INCLUDE}) 115 | SET(ross_srcs ${ross_srcs} ${ROSS_Damaris_SOURCE_DIR}/core/damaris.h) 116 | ENDIF(USE_DAMARIS) 117 | 118 | # Use deterministic unbiased RNG tiebreaker for event ties 119 | OPTION(USE_RAND_TIEBREAKER "Build with deterministic unbiased tiebreaker for event ties" ON) 120 | 121 | # Use debugging-friendly memory allocation 122 | OPTION(ROSS_ALLOC_DEBUG "Use naive allocator to be more friendly to memory debugging tools" OFF) 123 | 124 | OPTION(RAND_NORMAL "Turn on state for normal distribution" ON) 125 | 126 | # Show timing data at end of run 127 | OPTION(ROSS_timing "Perform ROSS timings" ON) 128 | 129 | # Used by ross-kernel-inline.h 130 | OPTION(ROSS_runtime_checks "Perform ID checks" OFF) 131 | IF(CMAKE_BUILD_TYPE MATCHES Debug) 132 | SET(ROSS_runtime_checks ON CACHE BOOL "Perform ID checks" FORCE) 133 | ENDIF(CMAKE_BUILD_TYPE MATCHES Debug) 134 | 135 | # Set all options 136 | #SET(OPTIONS "${OPTIONS} -DROSS_QUEUE_${QUEUE} -DROSS_RAND_${RAND} -DROSS_NETWORK_mpi -DROSS_CLOCK_${CLOCK} -DROSS_GVT_${GVT} -DARCH_${ARCH}") 137 | #SET(OPTIONS "${OPTIONS} -DROSS_QUEUE_${QUEUE} -DROSS_RAND_${RAND} -DROSS_NETWORK_mpi -DROSS_CLOCK_${CLOCK} -DROSS_GVT_${GVT} -DARCH_${ARCH}" PARENT_SCOPE) 138 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DROSS_OPTION_LIST='\"${OPTIONS}\"'") 139 | 140 | # Generate Library 141 | OPTION(ROSS_BUILD_SHARED_LIBS "Build shared libraries instead of static" OFF) 142 | SET(BUILD_SHARED_LIBS ${ROSS_BUILD_SHARED_LIBS}) 143 | ADD_LIBRARY(ROSS ${ross_srcs}) 144 | SET_TARGET_PROPERTIES(ROSS PROPERTIES OUTPUT_NAME ROSS) 145 | TARGET_LINK_LIBRARIES(ROSS ${ROSS_EXTERNAL_LIBS}) 146 | TARGET_INCLUDE_DIRECTORIES(ROSS INTERFACE ${MPI_C_INCLUDE_PATH}) 147 | 148 | if (COVERALLS) 149 | set(COVERAGE_SRCS ${ross_srcs}) 150 | # Don't check coverage on lz4 151 | list(REMOVE_ITEM COVERAGE_SRCS lz4.h lz4.c) 152 | 153 | # Create the coveralls target. 154 | coveralls_setup( 155 | "${COVERAGE_SRCS}" # The source files. 156 | OFF) # If we should upload. 157 | 158 | endif() 159 | 160 | # Build Specific Config Header 161 | CONFIGURE_FILE(config.h.in config.h) 162 | SET(ross_srcs ${ross_srcs} config.h) 163 | SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in PROPERTIES GENERATED FALSE) 164 | SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_BINARY_DIR}/config.h PROPERTIES GENERATED TRUE) 165 | 166 | 167 | # CODES config bin 168 | SET(ROSS_CC $ENV{CC}) 169 | SET(ROSS_CXX $ENV{CXX}) 170 | CONFIGURE_FILE(ross-config.in ross-config @ONLY) 171 | CONFIGURE_FILE(ross.pc.in ross.pc @ONLY) 172 | SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/ross.pc.in PROPERTIES GENERATED FALSE) 173 | SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_BINARY_DIR}/ross.pc PROPERTIES GENERATED TRUE) 174 | 175 | 176 | # Make Install 177 | INSTALL(FILES ${ROSS_BINARY_DIR}/ross-config DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) 178 | INSTALL(FILES ${ROSS_BINARY_DIR}/config.h DESTINATION include) 179 | INSTALL(DIRECTORY ${ROSS_SOURCE_DIR}/ DESTINATION include FILES_MATCHING PATTERN "*.h") 180 | INSTALL(TARGETS ROSS EXPORT ROSS-targets DESTINATION lib) 181 | INSTALL(EXPORT ROSS-targets DESTINATION lib) 182 | INSTALL(FILES ROSSConfig.cmake DESTINATION lib) 183 | INSTALL(FILES ${ROSS_BINARY_DIR}/ross.pc DESTINATION lib/pkgconfig) 184 | -------------------------------------------------------------------------------- /core/ROSSConfig.cmake: -------------------------------------------------------------------------------- 1 | GET_FILENAME_COMPONENT(SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 2 | INCLUDE(${SELF_DIR}/ROSS-targets.cmake) 3 | GET_FILENAME_COMPONENT(ROSS_INCLUDE_DIRS "${SELF_DIR}/../include" ABSOLUTE) 4 | -------------------------------------------------------------------------------- /core/avl_tree.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* Copied and modified from http://pine.cs.yale.edu/pinewiki/C/AvlTree google cache */ 4 | 5 | /* implementation of an AVL tree with explicit heights */ 6 | 7 | struct avlNode { 8 | struct avlNode *child[2]; /* left and right */ 9 | tw_event *key; 10 | int height; 11 | struct avlNode *next; /* for ROSS weird linked-list memory */ 12 | }; 13 | 14 | /* empty avl tree is just a null pointer */ 15 | 16 | #define AVL_EMPTY (0) 17 | 18 | /* free a tree */ 19 | void avlDestroy(AvlTree t); 20 | 21 | /* return the height of a tree */ 22 | int avlGetHeight(AvlTree t); 23 | 24 | /* return nonzero if key is present in tree */ 25 | int avlSearch(AvlTree t, tw_event *key); 26 | 27 | /* insert a new element into a tree */ 28 | /* note *t is actual tree */ 29 | void avlInsert(AvlTree *t, tw_event *key); 30 | 31 | /* run sanity checks on tree (for debugging) */ 32 | /* assert will fail if heights are wrong */ 33 | void avlSanityCheck(AvlTree t); 34 | 35 | /* print all keys of the tree in order */ 36 | void avlPrintKeys(AvlTree t); 37 | 38 | /* delete and return minimum value in a tree */ 39 | tw_event * avlDeleteMin(AvlTree *t); 40 | 41 | tw_event * avlDelete(AvlTree *t, tw_event *key); 42 | 43 | AvlTree avl_alloc(void); 44 | 45 | void avl_free(AvlTree t); 46 | -------------------------------------------------------------------------------- /core/buddy.h: -------------------------------------------------------------------------------- 1 | #ifndef BUDDY_H 2 | #define BUDDY_H 3 | 4 | #include 5 | 6 | /** 7 | * @file buddy.h 8 | * @brief Buddy-system memory allocator 9 | */ 10 | 11 | typedef enum purpose { FREE, USED } purpose_t; 12 | 13 | #define BUDDY_ALIGN_PREF (32 - 2 * sizeof(void*) - sizeof(uint32_t) - sizeof(purpose_t)) 14 | 15 | /** 16 | * Metadata about this particular block 17 | * (and stored at the beginning of this block). 18 | * One per allocated block of memory. 19 | * Should be 32 bytes to not screw up alignment. 20 | */ 21 | typedef struct buddy_list 22 | { 23 | // Should be two pointers 24 | LIST_ENTRY(buddy_list) next_freelist; 25 | uint32_t size; 26 | purpose_t use; 27 | char padding[BUDDY_ALIGN_PREF]; 28 | } buddy_list_t; 29 | 30 | typedef enum valid { VALID, INVALID } valid_t; 31 | 32 | /** 33 | * Bucket of 2^order sized free memory blocks. 34 | */ 35 | typedef struct buddy_list_bucket 36 | { 37 | LIST_HEAD(buddy_list_head, buddy_list) ptr; 38 | unsigned int count; 39 | unsigned int order; 40 | valid_t is_valid; 41 | } buddy_list_bucket_t; 42 | 43 | buddy_list_bucket_t * create_buddy_table(unsigned int power_of_two); 44 | void *buddy_alloc(unsigned size); 45 | void buddy_free(void *ptr); 46 | 47 | #endif /* BUDDY_H */ 48 | -------------------------------------------------------------------------------- /core/clock/aarch64.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef __GNUC__ 4 | # error gcc asm extensions required 5 | #endif 6 | #if ! (defined(__aarch64__)) 7 | # error only aarch64 platform supported 8 | #endif 9 | 10 | /* 11 | * Does same stuff as the amd64, but uses cntvct_el0 12 | */ 13 | static const tw_optdef clock_opts [] = 14 | { 15 | TWOPT_GROUP("ROSS Timing"), 16 | TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"), 17 | TWOPT_END() 18 | }; 19 | 20 | const tw_optdef *tw_clock_setup(void) 21 | { 22 | return clock_opts; 23 | } 24 | 25 | 26 | 27 | void 28 | tw_clock_init(tw_pe * me) 29 | { 30 | me->clock_time = 0; 31 | me->clock_offset = tw_clock_read(); 32 | } 33 | 34 | tw_clock 35 | tw_clock_now(tw_pe * me) 36 | { 37 | me->clock_time = tw_clock_read() - me->clock_offset; 38 | return me->clock_time; 39 | } 40 | -------------------------------------------------------------------------------- /core/clock/aarch64.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_clock_aarch64 2 | #define INC_clock_aarch64 3 | 4 | typedef uint64_t tw_clock; 5 | 6 | static inline tw_clock tw_clock_read(void) 7 | { 8 | tw_clock result=0; 9 | #ifdef ROSS_timing 10 | asm volatile ("mrs %0, cntvct_el0" : "=r" (result)); 11 | #endif 12 | return result; 13 | } 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /core/clock/amd64.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef __GNUC__ 4 | # error gcc asm extensions required 5 | #endif 6 | #if ! (defined(__amd64__) || defined(__x86_64__)) 7 | # error only amd64 platform supported 8 | #endif 9 | 10 | /* 11 | * Our function below calls the "rdtsc" x86 assembly language function 12 | * to obtain the current clock cycle value. 13 | */ 14 | static const tw_optdef clock_opts [] = 15 | { 16 | TWOPT_GROUP("ROSS Timing"), 17 | TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"), 18 | TWOPT_END() 19 | }; 20 | 21 | const tw_optdef *tw_clock_setup(void) 22 | { 23 | return clock_opts; 24 | } 25 | 26 | 27 | 28 | void 29 | tw_clock_init(tw_pe * me) 30 | { 31 | me->clock_time = 0; 32 | me->clock_offset = tw_clock_read(); 33 | } 34 | 35 | tw_clock 36 | tw_clock_now(tw_pe * me) 37 | { 38 | me->clock_time = tw_clock_read() - me->clock_offset; 39 | return me->clock_time; 40 | } 41 | -------------------------------------------------------------------------------- /core/clock/amd64.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_clock_amd64 2 | #define INC_clock_amd64 3 | 4 | typedef uint64_t tw_clock; 5 | 6 | static inline tw_clock tw_clock_read(void) 7 | { 8 | tw_clock result=0; 9 | #ifdef ROSS_timing 10 | unsigned a, d; 11 | 12 | do { 13 | __asm__ __volatile__("rdtsc" : "=a" (a), "=d" (d)); 14 | result = ((uint64_t)a) | (((uint64_t)d) << 32); 15 | } while (__builtin_expect ((int) result == -1, 0)); 16 | #endif 17 | return result; 18 | } 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /core/clock/armv7l.c: -------------------------------------------------------------------------------- 1 | /* 2 | This implementation of an ARM v7 clock reader utilizes the 3 | Performance Monitoring Unit (PMU) on Cortex-A7 chips. 4 | Unfortunately, access to the cycle counter from userspace 5 | is disabled by default. A kernel module that enables access 6 | from userspace is required or the system will fault. 7 | 8 | An example kernel module that does just that can be found: 9 | https://github.com/nmcglohon/armv7l-userspace-counter.git 10 | 11 | More information can be found: 12 | http://neocontra.blogspot.com/2013/05/user-mode-performance-counters-for.html 13 | */ 14 | 15 | #include 16 | 17 | #ifndef __GNUC__ 18 | # error gcc asm extensions required 19 | #endif 20 | #if ! (defined(__arm__)) 21 | # error only 32 bit arm platform supported 22 | #endif 23 | 24 | static const tw_optdef clock_opts [] = 25 | { 26 | TWOPT_GROUP("ROSS Timing"), 27 | TWOPT_STIME("clock-rate", g_tw_clock_rate, "CPU Clock Rate"), 28 | TWOPT_END() 29 | }; 30 | 31 | const tw_optdef *tw_clock_setup(void) 32 | { 33 | return clock_opts; 34 | } 35 | 36 | 37 | void tw_clock_init(tw_pe * me) 38 | { 39 | me->clock_time = 0; 40 | me->clock_offset = tw_clock_read(); 41 | } 42 | 43 | 44 | tw_clock tw_clock_now(tw_pe * me) 45 | { 46 | me->clock_time = tw_clock_read() - me->clock_offset; 47 | return me->clock_time; 48 | } 49 | -------------------------------------------------------------------------------- /core/clock/armv7l.h: -------------------------------------------------------------------------------- 1 | /* 2 | This implementation of an ARM v7 clock reader utilizes the 3 | Performance Monitoring Unit (PMU) on Cortex-A7 chips. 4 | Unfortunately, access to the cycle counter from userspace 5 | is disabled by default. A kernel module that enables access 6 | from userspace is required or the system will fault. 7 | 8 | An example kernel module that does just that can be found: 9 | https://github.com/nmcglohon/armv7l-userspace-counter.git 10 | 11 | More information can be found: 12 | http://neocontra.blogspot.com/2013/05/user-mode-performance-counters-for.html 13 | */ 14 | 15 | #ifndef INC_clock_armv7l 16 | #define INC_clock_armv7l 17 | 18 | typedef unsigned int tw_clock; 19 | 20 | static inline tw_clock tw_clock_read(void) 21 | { 22 | unsigned int result; 23 | #ifdef ROSS_timing 24 | do { 25 | __asm__ __volatile__ ("MRC p15, 0, %0, c9, c13, 0" : "=r"(result)); 26 | } while (__builtin_expect ((int) result == -1, 0)); 27 | #endif 28 | 29 | return result; 30 | } 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /core/clock/bgl.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static const tw_optdef clock_opts [] = 4 | { 5 | TWOPT_GROUP("ROSS Timing"), 6 | TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"), 7 | TWOPT_END() 8 | }; 9 | 10 | const tw_optdef *tw_clock_setup(void) 11 | { 12 | return clock_opts; 13 | } 14 | 15 | tw_clock 16 | tw_clock_read(void) 17 | { 18 | tw_clock result = 0; 19 | #ifdef ROSS_timing 20 | unsigned long int upper, lower,tmp; 21 | 22 | __asm__ volatile( 23 | "0: \n" 24 | "\tmftbu %0 \n" 25 | "\tmftb %1 \n" 26 | "\tmftbu %2 \n" 27 | "\tcmpw %2,%0 \n" 28 | "\tbne 0b \n" 29 | : "=r"(upper),"=r"(lower),"=r"(tmp) 30 | ); 31 | 32 | result = upper; 33 | result = result<<32; 34 | result = result|lower; 35 | #endif 36 | return(result); 37 | } 38 | 39 | void 40 | tw_clock_init(tw_pe * me) 41 | { 42 | me->clock_time = 0; 43 | me->clock_offset = tw_clock_read(); 44 | } 45 | 46 | tw_clock 47 | tw_clock_now(tw_pe * me) 48 | { 49 | me->clock_time = tw_clock_read() - me->clock_offset; 50 | return me->clock_time; 51 | } 52 | -------------------------------------------------------------------------------- /core/clock/bgl.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_clock_bgl 2 | #define INC_clock_bgl 3 | 4 | typedef unsigned long long tw_clock; 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /core/clock/bgq.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static const tw_optdef clock_opts [] = 4 | { 5 | TWOPT_GROUP("ROSS Timing"), 6 | TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"), 7 | TWOPT_END() 8 | }; 9 | 10 | const tw_optdef *tw_clock_setup(void) 11 | { 12 | return clock_opts; 13 | } 14 | 15 | tw_clock 16 | tw_clock_read(void) 17 | { 18 | tw_clock result = 0; 19 | #ifdef ROSS_timing 20 | result = GetTimeBase(); 21 | #endif 22 | return(result); 23 | } 24 | 25 | void 26 | tw_clock_init(tw_pe * me) 27 | { 28 | me->clock_time = 0; 29 | me->clock_offset = tw_clock_read(); 30 | } 31 | 32 | tw_clock 33 | tw_clock_now(tw_pe * me) 34 | { 35 | me->clock_time = tw_clock_read() - me->clock_offset; 36 | return me->clock_time; 37 | } 38 | -------------------------------------------------------------------------------- /core/clock/bgq.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_clock_bgq 2 | #define INC_clock_bgq 3 | 4 | #include 5 | 6 | typedef unsigned long long tw_clock; 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /core/clock/gtod.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern unsigned long long g_tw_clock_rate; 4 | 5 | static const tw_optdef clock_opts [] = 6 | { 7 | TWOPT_GROUP("ROSS Timing"), 8 | TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"), 9 | TWOPT_END() 10 | }; 11 | 12 | const tw_optdef *tw_clock_setup(void) 13 | { 14 | return clock_opts; 15 | } 16 | 17 | tw_clock tw_clock_read(void) 18 | { 19 | #ifdef ZERO_BASED 20 | static volatile int inited = 0; 21 | static volatile tw_clock base = 0; 22 | #else 23 | const tw_clock base = 0; 24 | #endif 25 | 26 | const tw_clock scale = 1000000; 27 | struct timeval tv; 28 | gettimeofday(&tv,NULL); 29 | 30 | #ifdef ZERO_BASED 31 | if(inited == 0) { 32 | base = ((tw_clock) tv.tv_sec)*scale + (tw_clock) tv.tv_usec; 33 | inited = 1; 34 | } 35 | #endif 36 | 37 | return 38 | (((tw_clock) tv.tv_sec)*scale + (tw_clock) tv.tv_usec) - base; 39 | } 40 | 41 | void 42 | tw_clock_init(tw_pe * me) 43 | { 44 | me->clock_time = 0; 45 | me->clock_offset = tw_clock_read(); 46 | } 47 | 48 | tw_clock 49 | tw_clock_now(tw_pe * me) 50 | { 51 | me->clock_time = tw_clock_read() - me->clock_offset; 52 | return me->clock_time; 53 | } 54 | -------------------------------------------------------------------------------- /core/clock/gtod.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_clock_gtod 2 | #define INC_clock_gtod 3 | 4 | typedef uint64_t tw_clock; 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /core/clock/i386.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef __GNUC__ 4 | # error gcc asm extensions required 5 | #endif 6 | #ifndef __i386__ 7 | # error only i386 platform supported 8 | #endif 9 | 10 | static const tw_optdef clock_opts [] = 11 | { 12 | TWOPT_GROUP("ROSS Timing"), 13 | TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"), 14 | TWOPT_END() 15 | }; 16 | 17 | const tw_optdef *tw_clock_setup(void) 18 | { 19 | return clock_opts; 20 | } 21 | 22 | tw_clock tw_clock_read(void) 23 | { 24 | tw_clock result; 25 | do { 26 | __asm__ __volatile__("rdtsc" : "=A" (result)); 27 | } while (__builtin_expect ((int) result == -1, 0)); 28 | return result; 29 | } 30 | 31 | void 32 | tw_clock_init(tw_pe * me) 33 | { 34 | me->clock_time = 0; 35 | me->clock_offset = tw_clock_read(); 36 | } 37 | 38 | tw_clock 39 | tw_clock_now(tw_pe * me) 40 | { 41 | me->clock_time = tw_clock_read() - me->clock_offset; 42 | return me->clock_time; 43 | } 44 | -------------------------------------------------------------------------------- /core/clock/i386.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_clock_i386 2 | #define INC_clock_i386 3 | 4 | typedef uint64_t tw_clock; 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /core/clock/ia64.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef __GNUC__ 4 | # error gcc asm extensions required 5 | #endif 6 | #ifndef __ia64__ 7 | # error only ia64 platform supported 8 | #endif 9 | 10 | static tw_clock tw_clock_read(void) 11 | { 12 | tw_clock result; 13 | do { 14 | __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); 15 | } while (__builtin_expect ((int) result == -1, 0)); 16 | return result; 17 | } 18 | 19 | void 20 | tw_clock_init(tw_pe * me) 21 | { 22 | me->clock_time = 0; 23 | me->clock_offset = tw_clock_read(); 24 | } 25 | 26 | tw_clock 27 | tw_clock_now(tw_pe * me) 28 | { 29 | me->clock_time = tw_clock_read() - me->clock_offset; 30 | return me->clock_time; 31 | } 32 | -------------------------------------------------------------------------------- /core/clock/ia64.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_clock_ia64 2 | #define INC_clock_ia64 3 | 4 | typedef uint64_t tw_clock; 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /core/clock/ppc.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef __GNUC__ 4 | # error gcc asm extensions required 5 | #endif 6 | #if !(defined __ppc__ || defined __PPC__) 7 | # error only ppc platform supported 8 | #endif 9 | 10 | tw_clock tw_clock_read(void) 11 | { 12 | unsigned long tbu; 13 | unsigned long tb1; 14 | unsigned long tbu1; 15 | 16 | do { 17 | asm volatile( 18 | "mftbu %2\n\t" 19 | "mftb %0\n\t" 20 | "mftbu %1\n\t" 21 | : "=r"(tb1), "=r"(tbu), "=r"(tbu1) ); 22 | } while (tbu != tbu1); 23 | 24 | return ( ((tw_clock)tbu) << 32 ) | tb1; 25 | } 26 | 27 | void 28 | tw_clock_init(tw_pe * me) 29 | { 30 | me->clock_time = 0; 31 | me->clock_offset = tw_clock_read(); 32 | } 33 | 34 | tw_clock 35 | tw_clock_now(tw_pe * me) 36 | { 37 | me->clock_time = tw_clock_read() - me->clock_offset; 38 | return me->clock_time; 39 | } 40 | -------------------------------------------------------------------------------- /core/clock/ppc.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_clock_ppc 2 | #define INC_clock_ppc 3 | 4 | typedef uint64_t tw_clock; 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /core/clock/ppc64le.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern unsigned long long g_tw_clock_rate; 4 | 5 | static const tw_optdef clock_opts [] = 6 | { 7 | TWOPT_GROUP("ROSS Timing"), 8 | TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"), 9 | TWOPT_END() 10 | }; 11 | 12 | const tw_optdef *tw_clock_setup(void) 13 | { 14 | 15 | // reset from default to 512MHz as that's the timebase for the POWER9 system. 16 | g_tw_clock_rate = 512000000.0; 17 | return clock_opts; 18 | } 19 | 20 | tw_clock tw_clock_read(void) 21 | { 22 | unsigned int tbl, tbu0, tbu1; 23 | 24 | do { 25 | __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0)); 26 | __asm__ __volatile__ ("mftb %0" : "=r"(tbl)); 27 | __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1)); 28 | } while (tbu0 != tbu1); 29 | 30 | return (((unsigned long long)tbu0) << 32) | tbl; 31 | } 32 | 33 | void 34 | tw_clock_init(tw_pe * me) 35 | { 36 | me->clock_time = 0; 37 | me->clock_offset = tw_clock_read(); 38 | } 39 | 40 | tw_clock 41 | tw_clock_now(tw_pe * me) 42 | { 43 | me->clock_time = tw_clock_read() - me->clock_offset; 44 | return me->clock_time; 45 | } 46 | -------------------------------------------------------------------------------- /core/clock/ppc64le.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_clock_ppc64le 2 | #define INC_clock_ppc64le 3 | 4 | typedef uint64_t tw_clock; 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /core/cmake/Coveralls.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Permission is hereby granted, free of charge, to any person obtaining a copy 3 | # of this software and associated documentation files (the "Software"), to deal 4 | # in the Software without restriction, including without limitation the rights 5 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 6 | # copies of the Software, and to permit persons to whom the Software is 7 | # furnished to do so, subject to the following conditions: 8 | # 9 | # The above copyright notice and this permission notice shall be included in all 10 | # copies or substantial portions of the Software. 11 | # 12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | # SOFTWARE. 19 | # 20 | # Copyright (C) 2014 Joakim Söderberg 21 | # 22 | 23 | 24 | # 25 | # Param _COVERAGE_SRCS A list of source files that coverage should be collected for. 26 | # Param _COVERALLS_UPLOAD Upload the result to coveralls? 27 | # 28 | function(coveralls_setup _COVERAGE_SRCS _COVERALLS_UPLOAD) 29 | 30 | if (ARGC GREATER 2) 31 | set(_CMAKE_SCRIPT_PATH ${ARGN}) 32 | message("Coveralls: Using alternate CMake script dir: ${_CMAKE_SCRIPT_PATH}") 33 | else() 34 | set(_CMAKE_SCRIPT_PATH ${PROJECT_SOURCE_DIR}/cmake) 35 | endif() 36 | 37 | if (NOT EXISTS "${_CMAKE_SCRIPT_PATH}/CoverallsClear.cmake") 38 | message(FATAL_ERROR "Coveralls: Missing ${_CMAKE_SCRIPT_PATH}/CoverallsClear.cmake") 39 | endif() 40 | 41 | if (NOT EXISTS "${_CMAKE_SCRIPT_PATH}/CoverallsGenerateGcov.cmake") 42 | message(FATAL_ERROR "Coveralls: Missing ${_CMAKE_SCRIPT_PATH}/CoverallsGenerateGcov.cmake") 43 | endif() 44 | 45 | # When passing a CMake list to an external process, the list 46 | # will be converted from the format "1;2;3" to "1 2 3". 47 | # This means the script we're calling won't see it as a list 48 | # of sources, but rather just one long path. We remedy this 49 | # by replacing ";" with "*" and then reversing that in the script 50 | # that we're calling. 51 | # http://cmake.3232098.n2.nabble.com/Passing-a-CMake-list-quot-as-is-quot-to-a-custom-target-td6505681.html 52 | set(COVERAGE_SRCS_TMP ${_COVERAGE_SRCS}) 53 | set(COVERAGE_SRCS "") 54 | foreach (COVERAGE_SRC ${COVERAGE_SRCS_TMP}) 55 | set(COVERAGE_SRCS "${COVERAGE_SRCS}*${COVERAGE_SRC}") 56 | endforeach() 57 | 58 | #message("Coverage sources: ${COVERAGE_SRCS}") 59 | set(COVERALLS_FILE ${PROJECT_BINARY_DIR}/coveralls.json) 60 | 61 | add_custom_target(coveralls_generate 62 | 63 | # Zero the coverage counters. 64 | COMMAND ${CMAKE_COMMAND} 65 | -P "${_CMAKE_SCRIPT_PATH}/CoverallsClear.cmake" 66 | 67 | # Run regress tests. 68 | COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure 69 | 70 | # Generate Gcov and translate it into coveralls JSON. 71 | # We do this by executing an external CMake script. 72 | # (We don't want this to run at CMake generation time, but after compilation and everything has run). 73 | COMMAND ${CMAKE_COMMAND} 74 | -DCOVERAGE_SRCS="${COVERAGE_SRCS}" # TODO: This is passed like: "a b c", not "a;b;c" 75 | -DCOVERALLS_OUTPUT_FILE="${COVERALLS_FILE}" 76 | -DCOV_PATH="${PROJECT_BINARY_DIR}" 77 | -DPROJECT_ROOT="${PROJECT_SOURCE_DIR}" 78 | -P "${_CMAKE_SCRIPT_PATH}/CoverallsGenerateGcov.cmake" 79 | 80 | WORKING_DIRECTORY ${PROJECT_BINARY_DIR} 81 | COMMENT "Generating coveralls output..." 82 | ) 83 | 84 | if (_COVERALLS_UPLOAD) 85 | message("COVERALLS UPLOAD: ON") 86 | 87 | find_program(CURL_EXECUTABLE curl) 88 | 89 | if (NOT CURL_EXECUTABLE) 90 | message(FATAL_ERROR "Coveralls: curl not found! Aborting") 91 | endif() 92 | 93 | add_custom_target(coveralls_upload 94 | # Upload the JSON to coveralls. 95 | COMMAND ${CURL_EXECUTABLE} 96 | -S -F json_file=@${COVERALLS_FILE} 97 | https://coveralls.io/api/v1/jobs 98 | 99 | DEPENDS coveralls_generate 100 | 101 | WORKING_DIRECTORY ${PROJECT_BINARY_DIR} 102 | COMMENT "Uploading coveralls output...") 103 | 104 | add_custom_target(coveralls DEPENDS coveralls_upload) 105 | else() 106 | message("COVERALLS UPLOAD: OFF") 107 | add_custom_target(coveralls DEPENDS coveralls_generate) 108 | endif() 109 | 110 | endfunction() 111 | 112 | macro(coveralls_turn_on_coverage) 113 | if(NOT (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) 114 | AND (NOT "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")) 115 | message(FATAL_ERROR "Coveralls: Compiler ${CMAKE_C_COMPILER_ID} is not GNU gcc! Aborting... You can set this on the command line using CC=/usr/bin/gcc CXX=/usr/bin/g++ cmake ..") 116 | endif() 117 | 118 | if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") 119 | message(FATAL_ERROR "Coveralls: Code coverage results with an optimised (non-Debug) build may be misleading! Add -DCMAKE_BUILD_TYPE=Debug") 120 | endif() 121 | 122 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") 123 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") 124 | endmacro() 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /core/cmake/CoverallsClear.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Permission is hereby granted, free of charge, to any person obtaining a copy 3 | # of this software and associated documentation files (the "Software"), to deal 4 | # in the Software without restriction, including without limitation the rights 5 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 6 | # copies of the Software, and to permit persons to whom the Software is 7 | # furnished to do so, subject to the following conditions: 8 | # 9 | # The above copyright notice and this permission notice shall be included in all 10 | # copies or substantial portions of the Software. 11 | # 12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | # SOFTWARE. 19 | # 20 | # Copyright (C) 2014 Joakim Söderberg 21 | # 22 | 23 | file(REMOVE_RECURSE ${PROJECT_BINARY_DIR}/*.gcda) 24 | 25 | -------------------------------------------------------------------------------- /core/cmake/GetGitRevisionDescription.LICENSE_1_0.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /core/cmake/GetGitRevisionDescription.cmake.in: -------------------------------------------------------------------------------- 1 | # 2 | # Internal file for GetGitRevisionDescription.cmake 3 | # 4 | # Requires CMake 2.6 or newer (uses the 'function' command) 5 | # 6 | # Original Author: 7 | # 2009-2010 Ryan Pavlik 8 | # http://academic.cleardefinition.com 9 | # Iowa State University HCI Graduate Program/VRAC 10 | # 11 | # Copyright Iowa State University 2009-2010. 12 | # Distributed under the Boost Software License, Version 1.0. 13 | # (See accompanying file LICENSE_1_0.txt or copy at 14 | # http://www.boost.org/LICENSE_1_0.txt) 15 | 16 | set(HEAD_HASH) 17 | 18 | file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024) 19 | 20 | string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS) 21 | if(HEAD_CONTENTS MATCHES "ref") 22 | # named branch 23 | string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}") 24 | if(EXISTS "@GIT_DIR@/${HEAD_REF}") 25 | configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY) 26 | elseif(EXISTS "@GIT_DIR@/logs/${HEAD_REF}") 27 | configure_file("@GIT_DIR@/logs/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY) 28 | set(HEAD_HASH "${HEAD_REF}") 29 | endif() 30 | else() 31 | # detached HEAD 32 | configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY) 33 | endif() 34 | 35 | if(NOT HEAD_HASH) 36 | file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024) 37 | string(STRIP "${HEAD_HASH}" HEAD_HASH) 38 | endif() 39 | -------------------------------------------------------------------------------- /core/cmake/SetupMPI.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2017, Lawrence Livermore National Security, LLC. 3 | # 4 | # Produced at the Lawrence Livermore National Laboratory 5 | # 6 | # LLNL-CODE-725085 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is part of BLT. 11 | # 12 | # For additional details, please also read BLT/LICENSE. 13 | # 14 | # Redistribution and use in source and binary forms, with or without 15 | # modification, are permitted provided that the following conditions are met: 16 | # 17 | # * Redistributions of source code must retain the above copyright notice, 18 | # this list of conditions and the disclaimer below. 19 | # 20 | # * Redistributions in binary form must reproduce the above copyright notice, 21 | # this list of conditions and the disclaimer (as noted below) in the 22 | # documentation and/or other materials provided with the distribution. 23 | # 24 | # * Neither the name of the LLNS/LLNL nor the names of its contributors may 25 | # be used to endorse or promote products derived from this software without 26 | # specific prior written permission. 27 | # 28 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 29 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 | # ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, 32 | # LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY 33 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 37 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 38 | # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | # POSSIBILITY OF SUCH DAMAGE. 40 | # 41 | ############################################################################### 42 | 43 | ################################ 44 | # MPI 45 | ################################ 46 | 47 | find_package(MPI) 48 | message(STATUS "MPI C Compile Flags: ${MPI_C_COMPILE_FLAGS}") 49 | message(STATUS "MPI C Include Path: ${MPI_C_INCLUDE_PATH}") 50 | message(STATUS "MPI C Link Flags: ${MPI_C_LINK_FLAGS}") 51 | message(STATUS "MPI C Libraries: ${MPI_C_LIBRARIES}") 52 | 53 | message(STATUS "MPI CXX Compile Flags: ${MPI_CXX_COMPILE_FLAGS}") 54 | message(STATUS "MPI CXX Include Path: ${MPI_CXX_INCLUDE_PATH}") 55 | message(STATUS "MPI CXX Link Flags: ${MPI_CXX_LINK_FLAGS}") 56 | message(STATUS "MPI CXX Libraries: ${MPI_CXX_LIBRARIES}") 57 | 58 | message(STATUS "MPI Executable: ${MPIEXEC}") 59 | message(STATUS "MPI Num Proc Flag: ${MPIEXEC_NUMPROC_FLAG}") 60 | 61 | 62 | if (ENABLE_FORTRAN) 63 | # Determine if we should use fortran mpif.h header or fortran mpi module 64 | find_path(mpif_path 65 | NAMES "mpif.h" 66 | PATHS ${MPI_Fortran_INCLUDE_PATH} 67 | NO_DEFAULT_PATH 68 | ) 69 | 70 | if(mpif_path) 71 | set(MPI_Fortran_USE_MPIF ON CACHE PATH "") 72 | message(STATUS "Using MPI Fortran header: mpif.h") 73 | else() 74 | set(MPI_Fortran_USE_MPIF OFF CACHE PATH "") 75 | message(STATUS "Using MPI Fortran module: mpi.mod") 76 | endif() 77 | endif() 78 | -------------------------------------------------------------------------------- /core/config.h.in: -------------------------------------------------------------------------------- 1 | // ROSS Configuration Options 2 | #define ROSS_QUEUE_${QUEUE} 3 | #define ROSS_RAND_${RAND} 4 | #define ROSS_NETWORK_${NETWORK} 5 | #define ROSS_GVT_${GVT} 6 | #define ROSS_CLOCK_${CLOCK} 7 | #define ARCH_${ARCH} 8 | 9 | // ROSS Core 10 | #cmakedefine HAVE_CTIME 1 11 | #define ROSS_VERSION "${VERSION}" 12 | #cmakedefine AVL_TREE 1 13 | #cmakedefine USE_BGPM 14 | #cmakedefine RAND_NORMAL 15 | #cmakedefine ROSS_timing 16 | #cmakedefine ROSS_runtime_checks 17 | #cmakedefine ROSS_ALLOC_DEBUG 18 | #cmakedefine USE_RIO 19 | #cmakedefine USE_DAMARIS 20 | #cmakedefine USE_RAND_TIEBREAKER 21 | -------------------------------------------------------------------------------- /core/gvt/7oclock.h.old: -------------------------------------------------------------------------------- 1 | #ifndef INC_gvt_7oclock_h 2 | #define INC_gvt_7oclock_h 3 | 4 | /* Clock Computation Variables: 5 | * 6 | * The clock is used to implement the 7 O'Clock Algorithm, but 7 | * is useful in other areas, such as determining how long it takes to 8 | * actually complete tasks, such as enq's and deq's. 9 | */ 10 | static tw_volatile int g_tw_7oclock_node_flag; 11 | static tw_volatile tw_clock g_tw_clock_max_send_delta_t; 12 | static tw_volatile tw_clock g_tw_clock_gvt_interval; 13 | static tw_volatile tw_clock g_tw_clock_gvt_window_size; 14 | 15 | static tw_stime gvt_print_interval = 0.1; 16 | static tw_stime percent_complete = 0.0; 17 | 18 | static inline int 19 | tw_gvt_inprogress(tw_pe * pe) 20 | { 21 | #if 0 22 | return (g_tw_7oclock_node_flag == -g_tw_npe && 23 | tw_clock_now(pe) < g_tw_clock_gvt_interval ? 0 : 1); 24 | #endif 25 | return (g_tw_7oclock_node_flag >= 0 || 26 | tw_clock_now(pe) + g_tw_clock_max_send_delta_t >= g_tw_clock_gvt_interval); 27 | } 28 | 29 | static inline void 30 | gvt_print(tw_stime gvt) 31 | { 32 | if(gvt_print_interval > 1.0) 33 | return; 34 | 35 | if(percent_complete == 0.0) 36 | { 37 | percent_complete = gvt_print_interval; 38 | return; 39 | } 40 | 41 | printf("GVT #%d: simulation %d%% complete (", 42 | g_tw_gvt_done, 43 | (int) min(100, floor(100 * (gvt/g_tw_ts_end)))); 44 | 45 | if (gvt == DBL_MAX) 46 | printf("GVT = %s", "MAX"); 47 | else 48 | printf("GVT = %.4f", gvt); 49 | 50 | printf(").\n"); 51 | percent_complete += gvt_print_interval; 52 | } 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /core/gvt/mpi_allreduce.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_gvt_mpi_allreduce_h 2 | #define INC_gvt_mpi_allreduce_h 3 | 4 | static double gvt_print_interval = 0.01; 5 | static double percent_complete = 0.0; 6 | 7 | static inline int 8 | tw_gvt_inprogress(tw_pe * pe) 9 | { 10 | return pe->gvt_status; 11 | } 12 | 13 | static inline void 14 | gvt_print(tw_stime gvt) 15 | { 16 | if(gvt_print_interval == 1.0) 17 | return; 18 | 19 | if(percent_complete == 0.0) 20 | { 21 | percent_complete = gvt_print_interval; 22 | return; 23 | } 24 | 25 | double ts = TW_STIME_DBL(gvt); 26 | 27 | printf("GVT #%d: simulation %d%% complete, max event queue size %u (", 28 | g_tw_gvt_done, 29 | (int) ROSS_MIN(100, floor(100 * (ts/g_tw_ts_end))), 30 | tw_pq_max_size(g_tw_pe->pq)); 31 | 32 | if (ts == DBL_MAX) 33 | printf("GVT = %s", "MAX"); 34 | else 35 | printf("GVT = %.4f", ts); 36 | 37 | printf(").\n"); 38 | 39 | #ifdef AVL_TREE 40 | printf("AVL tree size: %d\n", g_tw_pe->avl_tree_size); 41 | #endif 42 | 43 | percent_complete += gvt_print_interval; 44 | } 45 | 46 | extern tw_stat st_get_allreduce_count(); 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /core/hash-quadratic.c: -------------------------------------------------------------------------------- 1 | #include 2 | #ifdef AVL_TREE 3 | #include "avl_tree.h" 4 | #endif /* AVL_TREE */ 5 | 6 | #ifndef AVL_TREE 7 | static void rehash(tw_hash * hash_t, int pe); 8 | static int find_entry(tw_event ** hash_t, tw_event * event, int hash_size, int pe); 9 | static void insert(tw_event ** hash_t, tw_event * event, int hash_size); 10 | static int find_empty(tw_event ** hash_t, tw_event * event, int hash_size); 11 | static int next_prime(int ptst); 12 | static tw_event **allocate_table(int hash_size); 13 | static int hash_(tw_eventid event_id, int hash_size); 14 | #endif 15 | static int is_prime(int ptst); 16 | tw_event *hash_search(tw_event ** hash_t, tw_event *evt, int size); 17 | 18 | void hash_print(tw_hash * h); 19 | 20 | static unsigned int ncpu = 1; 21 | unsigned int g_tw_hash_size = 31; 22 | 23 | #ifndef AVL_TREE 24 | int 25 | hash_(tw_eventid event_id, int hash_size) 26 | { 27 | return event_id % hash_size; 28 | } 29 | #endif 30 | 31 | void * 32 | tw_hash_create() 33 | { 34 | #ifdef AVL_TREE 35 | unsigned int i; 36 | AvlTree avl_list; 37 | 38 | g_tw_pe->avl_tree_size = 0; 39 | 40 | g_tw_avl_node_count = 1 << g_tw_avl_node_count; 41 | avl_list = (AvlTree) tw_calloc(TW_LOC, "avl tree", sizeof(struct avlNode), g_tw_avl_node_count); 42 | 43 | for (i = 0; i < g_tw_avl_node_count - 1; i++) { 44 | avl_list[i].next = &avl_list[i + 1]; 45 | } 46 | avl_list[i].next = NULL; 47 | 48 | g_tw_pe->avl_list_head = &avl_list[0]; 49 | 50 | return NULL; 51 | #else 52 | tw_hash *h; 53 | unsigned int pi; 54 | 55 | ncpu = tw_nnodes(); 56 | h = (tw_hash *) tw_calloc(TW_LOC, "tw_hash", sizeof(tw_hash), 1); 57 | 58 | if (!h) 59 | tw_error(TW_LOC, "Cannot allocate tw_hash."); 60 | 61 | h->num_stored = (int *) tw_calloc(TW_LOC, "tw_hash", sizeof(int) * ncpu, 1); 62 | h->hash_sizes = (unsigned int *) tw_calloc(TW_LOC, "tw_hash", sizeof(int) * ncpu, 1); 63 | h->incoming = (tw_event ***) tw_calloc(TW_LOC, "tw_hash", sizeof(tw_event *)* ncpu, 1); 64 | 65 | if(!is_prime(g_tw_hash_size)) 66 | g_tw_hash_size = next_prime(g_tw_hash_size); 67 | 68 | for (pi = 0; pi < ncpu; pi++) 69 | { 70 | h->num_stored[pi] = 0; 71 | h->hash_sizes[pi] = g_tw_hash_size; 72 | h->incoming[pi] = allocate_table(h->hash_sizes[pi]); 73 | } 74 | 75 | return (void *) h; 76 | #endif 77 | } 78 | 79 | void 80 | tw_hash_insert(void *h, tw_event * event, long pe) 81 | { 82 | #ifdef AVL_TREE 83 | (void) h; 84 | (void) pe; 85 | tw_clock start; 86 | 87 | g_tw_pe->avl_tree_size++; 88 | 89 | start = tw_clock_read(); 90 | avlInsert(&event->dest_lp->kp->avl_tree, event); 91 | g_tw_pe->stats.s_avl += tw_clock_read() - start; 92 | #else 93 | tw_hash *hash_t; 94 | 95 | hash_t = (tw_hash *) h; 96 | 97 | insert(hash_t->incoming[pe], event, hash_t->hash_sizes[pe]); 98 | 99 | (hash_t->num_stored[pe])++; 100 | if (hash_t->num_stored[pe] > floor(hash_t->hash_sizes[pe] * MAX_FRACTION)) 101 | { 102 | rehash(hash_t, pe); 103 | } 104 | #endif 105 | } 106 | 107 | #ifndef AVL_TREE 108 | void 109 | insert(tw_event ** hash_t, tw_event * event, int hash_size) 110 | { 111 | int key = 0; 112 | 113 | key = find_empty(hash_t, event, hash_size); 114 | hash_t[key] = event; 115 | } 116 | 117 | void 118 | rehash(tw_hash * hash_t, int pe) 119 | { 120 | int old_size; 121 | int old_stored; 122 | int i; 123 | tw_event **old_list; 124 | 125 | old_stored = hash_t->num_stored[pe]; 126 | old_list = hash_t->incoming[pe]; 127 | old_size = hash_t->hash_sizes[pe]; 128 | 129 | hash_t->num_stored[pe] = 0; 130 | hash_t->hash_sizes[pe] = next_prime(hash_t->hash_sizes[pe]); 131 | hash_t->incoming[pe] = allocate_table(hash_t->hash_sizes[pe]); 132 | 133 | for (i = 0; i < old_size; i++) 134 | { 135 | if (old_list[i] != NULL) 136 | { 137 | insert(hash_t->incoming[pe], old_list[i], hash_t->hash_sizes[pe]); 138 | (hash_t->num_stored[pe])++; 139 | } 140 | } 141 | 142 | if(old_stored != hash_t->num_stored[pe]) 143 | tw_error(TW_LOC, "Did not rehash properly!"); 144 | 145 | #if VERIFY_HASH_QUAD 146 | printf("\nHASH TABLE RESIZED: old size = %d, new size = %d \n\n", old_size, 147 | hash_t->hash_sizes[pe]); 148 | #endif 149 | } 150 | 151 | int 152 | find_empty(tw_event ** hash_t, tw_event * event, int hash_size) 153 | { 154 | unsigned int i; 155 | int key; 156 | 157 | i = 0; 158 | key = hash_(event->event_id, hash_size); 159 | 160 | if(0 > key) 161 | tw_error(TW_LOC, "here!"); 162 | 163 | while (hash_t[key]) 164 | { 165 | key += 2 * (++i) - 1; 166 | if (key >= hash_size) 167 | key -= hash_size; 168 | } 169 | 170 | return key; 171 | } 172 | 173 | int 174 | find_entry(tw_event ** hash_t, tw_event * event, int hash_size, int pe) 175 | { 176 | unsigned int i; 177 | int key; 178 | 179 | i = 0; 180 | key = hash_(event->event_id, hash_size); 181 | 182 | while (hash_t[key] == NULL || event->event_id != hash_t[key]->event_id) 183 | { 184 | key += 2 * (++i) - 1; 185 | if (key >= hash_size) 186 | key -= hash_size; 187 | 188 | if (key > hash_size) 189 | { 190 | tw_error(TW_LOC, "Cannot find event in hash table: PE %d, key %d, size %d\n", 191 | pe, key, hash_size); 192 | } 193 | } 194 | 195 | return key; 196 | } 197 | 198 | tw_event ** 199 | allocate_table(int hash_size) 200 | { 201 | return (tw_event **) tw_calloc(TW_LOC, "tw_hash", sizeof(tw_event *) * hash_size, 1); 202 | } 203 | #endif 204 | 205 | tw_event * 206 | tw_hash_remove(void *h, tw_event * event, long pe) 207 | { 208 | #if AVL_TREE 209 | (void) h; 210 | (void) pe; 211 | tw_event *ret; 212 | tw_clock start; 213 | 214 | g_tw_pe->avl_tree_size--; 215 | 216 | start = tw_clock_read(); 217 | ret = avlDelete(&event->dest_lp->kp->avl_tree, event); 218 | g_tw_pe->stats.s_avl += tw_clock_read() - start; 219 | return ret; 220 | #else 221 | tw_hash *hash_t = (tw_hash *) h; 222 | tw_event *ret_event; 223 | int key; 224 | 225 | if(pe > tw_nnodes() - 1) 226 | tw_error(TW_LOC, "bad pe id"); 227 | 228 | key = find_entry(hash_t->incoming[pe], event, hash_t->hash_sizes[pe], pe); 229 | ret_event = hash_t->incoming[pe][key]; 230 | 231 | hash_t->incoming[pe][key] = NULL; 232 | (hash_t->num_stored[pe])--; 233 | 234 | return ret_event; 235 | #endif 236 | } 237 | 238 | int 239 | next_prime(int ptst) 240 | { 241 | 242 | ptst = ptst * 2 + 1; 243 | 244 | if (is_prime(ptst)) 245 | { 246 | // printf("%d is prime.\n", ptst); 247 | return ptst; 248 | } 249 | // printf("Searching forward for next prime... "); 250 | while (!is_prime(ptst)) 251 | ptst += 2; 252 | 253 | // printf("found %d.\n",ptst); 254 | 255 | return ptst; 256 | } 257 | 258 | int 259 | is_prime(int ptst) 260 | { 261 | long pmaxseek, a; 262 | int prim_found; 263 | 264 | if (ptst % 2 == 0) 265 | return 0; 266 | 267 | prim_found = 1; 268 | pmaxseek = (long)sqrt((double)ptst) + 1; 269 | 270 | for (a = 3; a <= pmaxseek; a++, a++) 271 | { 272 | if (!(ptst % a)) 273 | { 274 | prim_found = 0; 275 | break; 276 | } 277 | } 278 | 279 | return prim_found; 280 | } 281 | 282 | tw_event * 283 | hash_search(tw_event ** hash_t, tw_event *evt, int size) 284 | { 285 | int j, empty; 286 | tw_event *e; 287 | 288 | for (empty = 0, j = 0; j < size; j++) 289 | { 290 | e = hash_t[j]; 291 | 292 | if (e && (e->event_id == evt->event_id)) 293 | { 294 | printf("Found event in hash: %d\n", j); 295 | return e; 296 | } else 297 | empty++; 298 | } 299 | 300 | printf("%ld: HASH has %d empty cells. \n", g_tw_mynode, empty); 301 | 302 | return NULL; 303 | } 304 | 305 | void 306 | hash_print(tw_hash * h) 307 | { 308 | unsigned int i, j, empty; 309 | unsigned int *sizes = h->hash_sizes; 310 | int *stored = h->num_stored; 311 | tw_event **hash_t; 312 | tw_event *e; 313 | 314 | for (i = 0; i < ncpu; i++) 315 | { 316 | printf("PE %d: \n", i); 317 | printf("table size: %d \n", sizes[i]); 318 | printf("num_stored: %d \n\n", stored[i]); 319 | 320 | hash_t = h->incoming[i]; 321 | 322 | for (empty = 0, j = 0; j < sizes[i]; j++) 323 | { 324 | e = hash_t[j]; 325 | 326 | if (e) 327 | { 328 | //printf("recv_ts = %f \n", e->recv_ts); 329 | //printf("%d: %ld \n\n", j, e->event_id); 330 | } else 331 | empty++; 332 | } 333 | printf("PE %d has %d empty cells. \n", i, empty); 334 | } 335 | } 336 | -------------------------------------------------------------------------------- /core/hash-quadratic.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_hash_quadratic_h 2 | #define INC_hash_quadratic_h 3 | 4 | #define MAX_FRACTION 0.50 5 | 6 | extern unsigned int g_tw_hash_size; 7 | 8 | typedef struct tw_hash tw_hash; 9 | 10 | struct tw_hash 11 | { 12 | tw_event ***incoming; 13 | int *num_stored; 14 | unsigned int *hash_sizes; 15 | }; 16 | 17 | 18 | /* 19 | * hash-quadratic.c 20 | */ 21 | extern void *tw_hash_create(); 22 | extern void tw_hash_insert(void *h, tw_event * event, long pe); 23 | extern tw_event *tw_hash_remove(void *h, tw_event * event, long pe); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /core/instrumentation/ross-lps/analysis-lp.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_analysis_lp_h 2 | #define INC_analysis_lp_h 3 | 4 | #include 5 | 6 | typedef struct analysis_state analysis_state; 7 | typedef struct analysis_msg analysis_msg; 8 | typedef struct model_sample_data model_sample_data; 9 | typedef struct lp_metadata lp_metadata; 10 | 11 | typedef enum{ 12 | ALP_NONE, 13 | ALP_FULL, // LP, KP, and PE 14 | ALP_KP, // KP and PE 15 | ALP_PE, //PE only 16 | ALP_MODEL 17 | } analysis_type_flag; 18 | 19 | struct analysis_msg 20 | { 21 | tw_lpid src; 22 | tw_stime timestamp; 23 | 24 | }; 25 | 26 | struct lp_metadata 27 | { 28 | tw_lpid lpid; 29 | tw_kpid kpid; 30 | tw_peid peid; 31 | tw_stime ts; 32 | tw_stime real_time; 33 | int sample_sz; 34 | int flag; // 0 == PE, 1 == KP, 2 == LP, 3 == model 35 | }; 36 | 37 | struct model_sample_data 38 | { 39 | model_sample_data *prev; 40 | model_sample_data *next; 41 | tw_stime timestamp; 42 | void **lp_data; /* data for each LP on the associated KP at this sampling point */ 43 | }; 44 | 45 | struct analysis_state 46 | { 47 | tw_lpid analysis_id; // id among analysis LPs only 48 | int num_lps; 49 | int num_lps_sim; 50 | tw_lpid *lp_list; // list of LPs that the analysis LP is responsible for 51 | tw_lpid *lp_list_sim; 52 | model_sample_data *model_samples_head; 53 | model_sample_data *model_samples_current; 54 | model_sample_data *model_samples_tail; 55 | }; 56 | 57 | void analysis_init(analysis_state *s, tw_lp *lp); 58 | void analysis_event(analysis_state *s, tw_bf *bf, analysis_msg *m, tw_lp *lp); 59 | void analysis_event_rc(analysis_state *s, tw_bf *bf, analysis_msg *m, tw_lp *lp); 60 | void analysis_commit(analysis_state *s, tw_bf *bf, analysis_msg *m, tw_lp *lp); 61 | void analysis_finish(analysis_state *s, tw_lp *lp); 62 | void collect_sim_engine_data(tw_pe *pe, tw_lp *lp, analysis_state *s, tw_stime current_rt); 63 | tw_peid analysis_map(tw_lpid gid); 64 | 65 | extern tw_lpid analysis_start_gid; 66 | void st_analysis_lp_settype(tw_lpid lpid); 67 | #endif 68 | -------------------------------------------------------------------------------- /core/instrumentation/ross-lps/specialized-lps.c: -------------------------------------------------------------------------------- 1 | #include "ross.h" 2 | #include "analysis-lp.h" 3 | 4 | /* 5 | * This file is for general set up functions related to setting up any 6 | * ROSS specialized LPs. 7 | * 8 | * TODO add some output on these LPs, add some counters that can be subtracted from other counters, so we're not including this in the model info 9 | */ 10 | 11 | int g_st_use_analysis_lps = 0; 12 | tw_lpid g_st_analysis_nlp = 0; 13 | int g_st_sample_count = 65536; 14 | 15 | tw_lpid analysis_start_gid = 0; 16 | tw_lpid g_st_total_model_lps = 0; 17 | 18 | void specialized_lp_setup() 19 | { 20 | if (g_st_engine_stats == VT_STATS || g_st_engine_stats == ALL_STATS || 21 | g_st_model_stats == VT_STATS || g_st_model_stats == ALL_STATS) 22 | { 23 | g_st_use_analysis_lps = 1; 24 | st_buffer_init(ANALYSIS_LP); 25 | } 26 | else 27 | return; 28 | 29 | // determine total LPs used by model and assign value to analysis_start_gid 30 | if (g_tw_synchronization_protocol != SEQUENTIAL) 31 | MPI_Allreduce(&g_tw_nlp, &g_st_total_model_lps, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_ROSS); 32 | else 33 | g_st_total_model_lps = g_tw_nlp; 34 | 35 | analysis_start_gid = g_st_total_model_lps; 36 | g_st_analysis_nlp = g_tw_nkp; // # of analysis LPs per PE 37 | 38 | 39 | } 40 | 41 | void specialized_lp_init_mapping() 42 | { 43 | tw_lpid lpid; 44 | for(lpid = 0; lpid < g_st_analysis_nlp; lpid++) 45 | { 46 | tw_lp_onpe(g_tw_nlp + lpid, g_tw_pe, analysis_start_gid + g_tw_mynode * g_st_analysis_nlp + lpid); 47 | tw_lp_onkp(g_tw_lp[g_tw_nlp + lpid], g_tw_kp[lpid]); // analysis lpid == kpid 48 | st_analysis_lp_settype(g_tw_nlp + lpid); 49 | } 50 | } 51 | 52 | void specialized_lp_run() 53 | { 54 | // has to be set at beginning of tw_run, in case model changes g_tw_ts_end between calling tw_init and tw_run 55 | if (g_st_sampling_end == 0) 56 | g_st_sampling_end = g_tw_ts_end; 57 | } 58 | 59 | const tw_optdef special_lp_opt[] = 60 | { 61 | TWOPT_GROUP("Specialized ROSS LPs"), 62 | //TWOPT_UINT("analysis-lps", g_st_use_analysis_lps, "Set to 1 to turn on analysis LPs (1 per KP) for virtual time sampling"), 63 | TWOPT_UINT("sample-count", g_st_sample_count, "Number of samples to allocate in memory"), 64 | TWOPT_END() 65 | }; 66 | 67 | const tw_optdef *st_special_lp_opts(void) 68 | { 69 | return special_lp_opt; 70 | } 71 | -------------------------------------------------------------------------------- /core/instrumentation/st-event-trace.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int g_st_ev_trace = 0; 4 | 5 | static short evtype_warned = 0; 6 | 7 | // collect src LP, dest LP, virtual time stamp, real time start 8 | // model can implement callback function to collect model level data, e.g. event type 9 | void st_collect_event_data(tw_event *cev, double recv_rt) 10 | { 11 | tw_clock start_cycle_time = tw_clock_read(); 12 | int collect_flag = 1; 13 | st_event_data ev_data; 14 | ev_data.src_lp = (unsigned int) cev->send_lp; 15 | ev_data.dest_lp = (unsigned int) cev->dest_lp->gid; 16 | ev_data.send_vts = (float) TW_STIME_DBL(cev->send_ts); 17 | ev_data.recv_vts = (float) TW_STIME_DBL(cev->recv_ts); 18 | ev_data.real_ts = (float) recv_rt; 19 | int total_sz = sizeof(ev_data); 20 | 21 | if (!cev->dest_lp->model_types && !evtype_warned && g_tw_mynode == g_tw_masternode) 22 | { 23 | fprintf(stderr, "WARNING: node: %ld: %s:%i: ", g_tw_mynode, __FILE__, __LINE__); 24 | fprintf(stderr, "The struct st_model_types has not been defined! No model level data will be collected\n"); 25 | evtype_warned = 1; 26 | } 27 | 28 | if (cev->dest_lp->model_types && cev->dest_lp->model_types->ev_trace) 29 | ev_data.model_data_sz = cev->dest_lp->model_types->ev_sz; 30 | else 31 | ev_data.model_data_sz = 0; 32 | 33 | 34 | total_sz += ev_data.model_data_sz; 35 | char buffer[total_sz]; 36 | 37 | if (ev_data.model_data_sz > 0) 38 | (*cev->dest_lp->model_types->ev_trace)(tw_event_data(cev), cev->dest_lp, &buffer[sizeof(ev_data)], &collect_flag); 39 | 40 | if (collect_flag) 41 | { 42 | memcpy(&buffer[0], &ev_data, sizeof(ev_data)); 43 | if (g_tw_synchronization_protocol != SEQUENTIAL) 44 | st_buffer_push(EV_TRACE, &buffer[0], total_sz); 45 | else if (g_tw_synchronization_protocol == SEQUENTIAL && !g_st_disable_out) 46 | fwrite(buffer, total_sz, 1, seq_ev_trace); 47 | 48 | } 49 | g_tw_pe->stats.s_stat_comp += tw_clock_read() - start_cycle_time; 50 | } 51 | -------------------------------------------------------------------------------- /core/instrumentation/st-instrumentation.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | char g_st_stats_out[INST_MAX_LENGTH] = {0}; 5 | char g_st_stats_path[4096] = {0}; 6 | int g_st_pe_data = 1; 7 | int g_st_kp_data = 0; 8 | int g_st_lp_data = 0; 9 | int g_st_disable_out = 0; 10 | 11 | int g_st_model_stats = 0; 12 | int g_st_engine_stats = 0; 13 | 14 | int g_st_gvt_sampling = 0; 15 | int g_st_num_gvt = 10; 16 | 17 | int g_st_rt_sampling = 0; 18 | tw_clock g_st_rt_interval = 1000; 19 | tw_clock g_st_rt_samp_start_cycles = 0; 20 | 21 | double g_st_vt_interval = 1000000; 22 | double g_st_sampling_end = 0; 23 | 24 | 25 | 26 | static const tw_optdef inst_options[] = { 27 | TWOPT_GROUP("ROSS Instrumentation"), 28 | TWOPT_UINT("engine-stats", g_st_engine_stats, "Collect sim engine level stats; 0 don't collect, 1 GVT-sampling, 2 RT sampling, 3 VT sampling, 4 All sampling modes"), 29 | TWOPT_UINT("model-stats", g_st_model_stats, "Collect model level stats (requires model-level implementation); 0 don't collect, 1 GVT-sampling, 2 RT sampling, 3 VT sampling, 4 all sampling modes"), 30 | TWOPT_UINT("num-gvt", g_st_num_gvt, "number of GVT computations between GVT-based sampling points"), 31 | TWOPT_ULONGLONG("rt-interval", g_st_rt_interval, "real time sampling interval in ms"), 32 | TWOPT_DOUBLE("vt-interval", g_st_vt_interval, "Virtual time sampling interval"), 33 | TWOPT_DOUBLE("vt-samp-end", g_st_sampling_end, "End time for virtual time sampling (if different from g_tw_ts_end)"), 34 | TWOPT_UINT("pe-data", g_st_pe_data, "Turn on/off collection of sim engine data at PE level"), 35 | TWOPT_UINT("kp-data", g_st_kp_data, "Turn on/off collection of sim engine data at KP level"), 36 | TWOPT_UINT("lp-data", g_st_lp_data, "Turn on/off collection of sim engine data at LP level"), 37 | TWOPT_UINT("event-trace", g_st_ev_trace, "collect detailed data on all events for specified LPs; 0, no trace, 1 full trace, 2 only events causing rollbacks, 3 only committed events"), 38 | TWOPT_CHAR("stats-prefix", g_st_stats_out, "prefix for filename(s) for stats output"), 39 | TWOPT_CHAR("stats-path", g_st_stats_path, "path to directory to save instrumentation output"), 40 | TWOPT_UINT("buffer-size", g_st_buffer_size, "size of buffer in bytes for stats collection"), 41 | TWOPT_UINT("buffer-free", g_st_buffer_free_percent, "percentage of free space left in buffer before writing out at GVT"), 42 | TWOPT_UINT("disable-output", g_st_disable_out, "used for perturbation analysis; buffer never dumped to file when 1"), 43 | TWOPT_END() 44 | }; 45 | 46 | const tw_optdef *st_inst_opts(void) 47 | { 48 | return inst_options; 49 | } 50 | 51 | void st_inst_init(void) 52 | { 53 | specialized_lp_run(); 54 | 55 | if (!(g_st_engine_stats || g_st_model_stats || g_st_ev_trace)) 56 | return; 57 | 58 | // setup appropriate flags for various instrumentation modes 59 | // set up files and buffers for necessary instrumentation modes 60 | if (g_st_engine_stats == GVT_STATS || g_st_engine_stats == ALL_STATS) 61 | { 62 | g_st_gvt_sampling = 1; 63 | st_buffer_init(GVT_COL); 64 | } 65 | if (g_st_engine_stats == RT_STATS || g_st_engine_stats == ALL_STATS) 66 | { 67 | g_st_rt_sampling = 1; 68 | st_buffer_init(RT_COL); 69 | } 70 | 71 | if (g_st_model_stats == GVT_STATS || g_st_model_stats == ALL_STATS) 72 | g_st_gvt_sampling = 1; 73 | if (g_st_model_stats == RT_STATS || g_st_model_stats == ALL_STATS) 74 | g_st_rt_sampling = 1; 75 | 76 | if (g_st_rt_sampling) 77 | { 78 | g_st_rt_interval = g_st_rt_interval * g_tw_clock_rate / 1000; 79 | g_st_rt_samp_start_cycles = tw_clock_read(); 80 | } 81 | 82 | if (g_st_ev_trace) 83 | st_buffer_init(EV_TRACE); 84 | if (g_st_model_stats) 85 | st_buffer_init(MODEL_COL); 86 | } 87 | 88 | void st_inst_dump() 89 | { 90 | if (g_st_disable_out) 91 | return; 92 | 93 | if (g_st_engine_stats == GVT_STATS || g_st_engine_stats == ALL_STATS) 94 | st_buffer_write(0, GVT_COL); 95 | if (g_st_engine_stats == RT_STATS || g_st_engine_stats == ALL_STATS) 96 | st_buffer_write(0, RT_COL); 97 | if (g_st_ev_trace) 98 | st_buffer_write(0, EV_TRACE); 99 | if (g_st_model_stats) 100 | st_buffer_write(0, MODEL_COL); 101 | if (g_st_use_analysis_lps) 102 | st_buffer_write(0, ANALYSIS_LP); 103 | } 104 | 105 | void st_inst_finalize(tw_pe *me) 106 | { 107 | if (g_st_engine_stats == GVT_STATS || g_st_engine_stats == ALL_STATS) 108 | st_buffer_finalize(GVT_COL); 109 | if (g_st_engine_stats == RT_STATS || g_st_engine_stats == ALL_STATS) 110 | { 111 | // collect data one final time to account for time between last sample and sim end time 112 | st_collect_engine_data(me, RT_COL); 113 | st_buffer_finalize(RT_COL); 114 | } 115 | if (g_st_ev_trace) 116 | st_buffer_finalize(EV_TRACE); 117 | if (g_st_model_stats) 118 | st_buffer_finalize(MODEL_COL); 119 | if (g_st_use_analysis_lps) 120 | st_buffer_finalize(ANALYSIS_LP); 121 | 122 | } 123 | -------------------------------------------------------------------------------- /core/instrumentation/st-instrumentation.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_st_instrumentation_h 2 | #define INC_st_instrumentation_h 3 | 4 | /* 5 | * Header file for all of the ROSS instrumentation 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #define INST_MAX_LENGTH 4096 12 | 13 | /* st-stats-buffer.c */ 14 | #define st_buffer_free_space(buf) (buf->size - buf->count) 15 | #define st_buffer_write_ptr(buf) (buf->buffer + buf->write_pos) 16 | #define st_buffer_read_ptr(buf) (buf->buffer + buf->read_pos) 17 | 18 | typedef struct{ 19 | char *buffer; 20 | int size; 21 | int write_pos; 22 | int read_pos; 23 | int count; 24 | } st_stats_buffer; 25 | 26 | extern char stats_directory[INST_MAX_LENGTH]; 27 | extern int g_st_buffer_size; 28 | extern int g_st_buffer_free_percent; 29 | extern FILE *seq_ev_trace, *seq_model, *seq_analysis; 30 | 31 | void st_buffer_allocate(); 32 | void st_buffer_init(int type); 33 | void st_buffer_push(int type, char *data, int size); 34 | void st_buffer_write(int end_of_sim, int type); 35 | void st_buffer_finalize(int type); 36 | 37 | /* st-instrumentation.c */ 38 | typedef struct sample_metadata sample_metadata; 39 | 40 | typedef enum{ 41 | GVT_COL, 42 | RT_COL, 43 | ANALYSIS_LP, 44 | EV_TRACE, 45 | MODEL_COL, 46 | NUM_COL_TYPES 47 | } collection_types; 48 | 49 | typedef enum{ 50 | PE_TYPE, 51 | KP_TYPE, 52 | LP_TYPE, 53 | MODEL_TYPE 54 | } inst_data_types; 55 | 56 | typedef enum { 57 | GRAN_PE, 58 | GRAN_KP, 59 | GRAN_LP, 60 | GRAN_ALL 61 | } granularity_types; 62 | 63 | struct sample_metadata 64 | { 65 | int flag; 66 | int sample_sz; 67 | tw_stime ts; 68 | double real_time; 69 | }; 70 | 71 | extern char g_st_stats_out[INST_MAX_LENGTH]; 72 | extern char g_st_stats_path[INST_MAX_LENGTH]; 73 | extern int g_st_pe_data; 74 | extern int g_st_kp_data; 75 | extern int g_st_lp_data; 76 | extern int g_st_disable_out; 77 | 78 | extern int g_st_model_stats; 79 | extern int g_st_engine_stats; 80 | 81 | extern int g_st_gvt_sampling; 82 | extern int g_st_num_gvt; 83 | 84 | extern int g_st_rt_sampling; 85 | extern tw_clock g_st_rt_interval; 86 | extern tw_clock g_st_rt_samp_start_cycles; 87 | 88 | extern const tw_optdef *st_inst_opts(); 89 | extern void st_inst_init(void); 90 | extern void st_inst_dump(); 91 | extern void st_inst_finalize(tw_pe *me); 92 | 93 | /* 94 | * st-sim-engine.c 95 | * Simulation Engine related instrumentation 96 | */ 97 | typedef struct st_pe_stats st_pe_stats; 98 | typedef struct st_kp_stats st_kp_stats; 99 | typedef struct st_lp_stats st_lp_stats; 100 | 101 | struct st_pe_stats{ 102 | unsigned int peid; 103 | 104 | unsigned int s_nevent_processed; 105 | unsigned int s_nevent_abort; 106 | unsigned int s_e_rbs; 107 | unsigned int s_rb_total; 108 | unsigned int s_rb_secondary; 109 | unsigned int s_fc_attempts; 110 | unsigned int s_pq_qsize; 111 | unsigned int s_nsend_network; 112 | unsigned int s_nread_network; 113 | //unsigned int s_nsend_remote_rb; 114 | //unsigned int s_nsend_loc_remote; 115 | //unsigned int s_nsend_net_remote; 116 | unsigned int s_ngvts; 117 | unsigned int s_pe_event_ties; 118 | unsigned int all_reduce_count; 119 | float efficiency; 120 | 121 | float s_net_read; 122 | float s_net_other; 123 | float s_gvt; 124 | float s_fossil_collect; 125 | float s_event_abort; 126 | float s_event_process; 127 | float s_pq; 128 | float s_rollback; 129 | float s_cancel_q; 130 | float s_avl; 131 | float s_buddy; 132 | float s_lz4; 133 | }; 134 | 135 | struct st_kp_stats{ 136 | unsigned int peid; 137 | unsigned int kpid; 138 | 139 | unsigned int s_nevent_processed; 140 | unsigned int s_nevent_abort; 141 | unsigned int s_e_rbs; 142 | unsigned int s_rb_total; 143 | unsigned int s_rb_secondary; 144 | unsigned int s_nsend_network; 145 | unsigned int s_nread_network; 146 | float time_ahead_gvt; 147 | float efficiency; 148 | }; 149 | 150 | struct st_lp_stats{ 151 | unsigned int peid; 152 | unsigned int kpid; 153 | unsigned int lpid; 154 | 155 | unsigned int s_nevent_processed; 156 | unsigned int s_nevent_abort; 157 | unsigned int s_e_rbs; 158 | unsigned int s_nsend_network; 159 | unsigned int s_nread_network; 160 | float efficiency; 161 | }; 162 | 163 | void st_collect_engine_data(tw_pe *me, int col_type); 164 | void st_collect_engine_data_pes(tw_pe *pe, sample_metadata *sample_md, tw_statistics *s, int col_type); 165 | void st_collect_engine_data_kps(tw_pe *me, tw_kp *kp, sample_metadata *sample_md, int col_type); 166 | void st_collect_engine_data_lps(tw_lp *lp, sample_metadata *sample_md, int col_type); 167 | 168 | /* 169 | * st-event-trace.c 170 | */ 171 | typedef enum{ 172 | NO_TRACE, 173 | FULL_TRACE, 174 | RB_TRACE, 175 | COMMIT_TRACE 176 | } traces_enum; 177 | 178 | typedef struct { 179 | unsigned int src_lp; 180 | unsigned int dest_lp; 181 | float send_vts; 182 | float recv_vts; 183 | float real_ts; 184 | unsigned int model_data_sz; 185 | } st_event_data; 186 | 187 | // collect_flag allows for specific events to be turned on/off in tracing 188 | typedef void (*ev_trace_f) (void *msg, tw_lp *lp, char *buffer, int *collect_flag); 189 | 190 | extern int g_st_ev_trace; 191 | 192 | void st_collect_event_data(tw_event *cev, double recv_rt); 193 | 194 | /* 195 | * ross-lps/analysis-lp.c 196 | */ 197 | typedef void (*sample_event_f)(void *state, tw_bf *b, tw_lp *lp, void *sample); 198 | typedef void (*sample_revent_f)(void *state, tw_bf *b, tw_lp *lp, void *sample); 199 | extern void specialized_lp_setup(); 200 | extern void specialized_lp_init_mapping(); 201 | extern void specialized_lp_run(); 202 | extern const tw_optdef *st_special_lp_opts(void); 203 | extern int g_st_use_analysis_lps; 204 | extern tw_lpid g_st_analysis_nlp; 205 | extern double g_st_vt_interval; 206 | extern double g_st_sampling_end; 207 | extern tw_lpid g_st_total_model_lps; 208 | extern int g_st_sample_count; 209 | 210 | /* 211 | * st-model-data.c 212 | */ 213 | // function to be implemented in LP for collection of model level stats 214 | typedef void (*model_stat_f) (void *sv, tw_lp *lp, char *buffer); 215 | typedef struct st_model_types st_model_types; 216 | 217 | /* 218 | * Struct to help ROSS collect model-level data 219 | * */ 220 | struct st_model_types { 221 | ev_trace_f ev_trace; /**< @brief function pointer to collect data about all events for given LP */ 222 | size_t ev_sz; /**< @brief size of data collected from model for each event */ 223 | model_stat_f model_stat_fn; /**< @brief function pointer to collect model level data for RT and GVT-based instrumentation */ 224 | size_t mstat_sz; /**< @brief size of data collected from model at sampling points */ 225 | sample_event_f sample_event_fn; 226 | sample_revent_f sample_revent_fn; 227 | size_t sample_struct_sz; 228 | }; 229 | 230 | typedef enum{ 231 | NO_STATS, 232 | GVT_STATS, 233 | RT_STATS, 234 | VT_STATS, 235 | ALL_STATS 236 | } stats_types_enum; 237 | 238 | typedef struct { 239 | unsigned int peid; 240 | unsigned int kpid; 241 | unsigned int lpid; 242 | float gvt; 243 | int stats_type; 244 | unsigned int model_sz; 245 | } model_metadata; 246 | 247 | extern st_model_types *g_st_model_types; 248 | 249 | void st_model_setup_types(tw_lp *lp); 250 | void st_model_settype(tw_lpid i, st_model_types *model_types); 251 | void st_collect_model_data(tw_pe *pe, double current_rt, int stats_type); 252 | 253 | #endif 254 | -------------------------------------------------------------------------------- /core/instrumentation/st-model-data.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | st_model_types *g_st_model_types = NULL; 4 | static int model_type_warned = 0; 5 | 6 | 7 | // if model uses tw_lp_setup_types() to set lp->type, it will also call 8 | // this function to set up the functions types for model-level data collection 9 | // because this can make use of the already defined type mapping 10 | void st_model_setup_types(tw_lp *lp) 11 | { 12 | if (g_st_model_types) 13 | lp->model_types = &g_st_model_types[g_tw_lp_typemap(lp->gid)]; 14 | else if (!model_type_warned && g_tw_mynode == g_tw_masternode) 15 | { 16 | fprintf(stderr, "WARNING: node: %ld: %s:%i: ", g_tw_mynode, __FILE__, __LINE__); 17 | fprintf(stderr, "The g_st_model_types has not been defined! No model level data will be collected\n"); 18 | model_type_warned = 1; 19 | } 20 | 21 | } 22 | 23 | // if model uses tw_lp_settypes(), model will also need to call 24 | // this function to set up function types for model-level data collection 25 | void st_model_settype(tw_lpid i, st_model_types *model_types) 26 | { 27 | if (model_types) 28 | { 29 | tw_lp *lp = g_tw_lp[i]; 30 | lp->model_types = model_types; 31 | } 32 | else if (!model_type_warned && g_tw_mynode == g_tw_masternode) 33 | { 34 | fprintf(stderr, "WARNING: node: %ld: %s:%i: ", g_tw_mynode, __FILE__, __LINE__); 35 | fprintf(stderr, "The struct st_model_types has not been defined for at least 1 LP type! No model level data will be collected for LP types without a valid st_model_types struct defined.\n"); 36 | model_type_warned = 1; 37 | } 38 | } 39 | 40 | /* 41 | * This function allows for ROSS to collect model level data, when not using Analysis LPs. 42 | * Call this function when collecting simulation level data (GVT-based and/or real time-based). 43 | * Loop through all LPs on this PE and collect stats 44 | */ 45 | void st_collect_model_data(tw_pe *pe, double current_rt, int stats_type) 46 | { 47 | tw_clock start_cycle_time = tw_clock_read(); 48 | int index; 49 | tw_lpid lpid = 0; 50 | int total_sz = 0; 51 | tw_lp *clp; 52 | sample_metadata sample_md; 53 | model_metadata model_md; 54 | sample_md.flag = MODEL_TYPE; 55 | sample_md.sample_sz = sizeof(model_md); 56 | sample_md.real_time = current_rt; 57 | model_md.peid = (unsigned int) g_tw_mynode; 58 | #ifdef USE_RAND_TIEBREAKER 59 | model_md.gvt = (float) TW_STIME_DBL(pe->GVT_sig.recv_ts); 60 | #else 61 | model_md.gvt = (float) TW_STIME_DBL(pe->GVT); 62 | #endif 63 | model_md.stats_type = stats_type; 64 | 65 | for (lpid = 0; lpid < g_tw_nlp; lpid++) 66 | { 67 | index = 0; 68 | clp = g_tw_lp[lpid]; 69 | if (!clp->model_types || !clp->model_types->model_stat_fn) 70 | { 71 | // may not want to collect model stats on every LP type, so if not defined, just continue 72 | continue; 73 | } 74 | 75 | sample_md.ts = tw_now(clp); 76 | model_md.kpid = (unsigned int) clp->kp->id; 77 | model_md.lpid = (unsigned int) clp->gid; 78 | model_md.model_sz = (unsigned int) clp->model_types->mstat_sz; 79 | total_sz = sizeof(sample_md) + sizeof(model_md) + model_md.model_sz; 80 | char buffer[total_sz]; 81 | memcpy(&buffer[0], &sample_md, sizeof(sample_md)); 82 | index += sizeof(sample_md); 83 | memcpy(&buffer[index], &model_md, sizeof(model_md)); 84 | index += sizeof(model_md); 85 | 86 | if (model_md.model_sz > 0) 87 | { 88 | (*clp->model_types->model_stat_fn)(clp->cur_state, clp, &buffer[index]); 89 | 90 | if (g_tw_synchronization_protocol != SEQUENTIAL) 91 | st_buffer_push(MODEL_COL, &buffer[0], total_sz); 92 | else if (g_tw_synchronization_protocol == SEQUENTIAL && !g_st_disable_out) 93 | fwrite(buffer, total_sz, 1, seq_model); 94 | } 95 | } 96 | pe->stats.s_stat_comp += tw_clock_read() - start_cycle_time; 97 | } 98 | -------------------------------------------------------------------------------- /core/instrumentation/st-sim-engine.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #define __STDC_FORMAT_MACROS 1 4 | 5 | long g_st_current_interval = 0; 6 | static tw_statistics last_pe_stats[3]; 7 | static tw_stat last_all_reduce_cnt = 0; 8 | 9 | /* wrapper to call gvt instrumentation functions depending on which granularity to use */ 10 | void st_collect_engine_data(tw_pe *pe, int col_type) 11 | { 12 | tw_clock start_time = tw_clock_read(); 13 | tw_kp *kp; 14 | tw_lp *lp; 15 | unsigned int i; 16 | tw_statistics s; 17 | bzero(&s, sizeof(s)); 18 | tw_get_stats(pe, &s); 19 | 20 | sample_metadata sample_md; 21 | #ifdef USE_RAND_TIEBREAKER 22 | sample_md.ts = pe->GVT_sig.recv_ts; 23 | #else 24 | sample_md.ts = pe->GVT; 25 | #endif 26 | sample_md.real_time = (double)tw_clock_read() / g_tw_clock_rate; 27 | 28 | if (g_st_pe_data) 29 | st_collect_engine_data_pes(pe, &sample_md, &s, col_type); 30 | if (g_st_kp_data) 31 | { 32 | for (i = 0; i < g_tw_nkp; i++) 33 | { 34 | kp = tw_getkp(i); 35 | st_collect_engine_data_kps(pe, kp, &sample_md, col_type); 36 | } 37 | } 38 | if (g_st_lp_data) 39 | { 40 | for (i = 0; i < g_tw_nlp; i++) 41 | { 42 | lp = tw_getlp(i); 43 | st_collect_engine_data_lps(lp, &sample_md, col_type); 44 | } 45 | } 46 | pe->stats.s_stat_comp += tw_clock_read() - start_time; 47 | } 48 | 49 | void st_collect_engine_data_pes(tw_pe *pe, sample_metadata *sample_md, tw_statistics *s, int col_type) 50 | { 51 | st_pe_stats pe_stats; 52 | int buf_size = sizeof(*sample_md) + sizeof(pe_stats); 53 | char buffer[buf_size]; 54 | tw_stat all_reduce_cnt = st_get_allreduce_count(); 55 | 56 | // sample_md time stamps were set in the calling function 57 | sample_md->flag = PE_TYPE; 58 | sample_md->sample_sz = sizeof(pe_stats); 59 | 60 | pe_stats.peid = (unsigned int) g_tw_mynode; 61 | pe_stats.s_nevent_processed = (unsigned int)( s->s_nevent_processed-last_pe_stats[col_type].s_nevent_processed); 62 | pe_stats.s_nevent_abort = (unsigned int)(s->s_nevent_abort-last_pe_stats[col_type].s_nevent_abort); 63 | pe_stats.s_e_rbs = (unsigned int)(s->s_e_rbs-last_pe_stats[col_type].s_e_rbs); 64 | pe_stats.s_rb_total = (unsigned int)( s->s_rb_total-last_pe_stats[col_type].s_rb_total); 65 | pe_stats.s_rb_secondary = (unsigned int)(s->s_rb_secondary-last_pe_stats[col_type].s_rb_secondary); 66 | pe_stats.s_fc_attempts = (unsigned int)(s->s_fc_attempts-last_pe_stats[col_type].s_fc_attempts); 67 | pe_stats.s_pq_qsize = tw_pq_get_size(pe->pq); 68 | pe_stats.s_nsend_network = (unsigned int)(s->s_nsend_network-last_pe_stats[col_type].s_nsend_network); 69 | pe_stats.s_nread_network = (unsigned int)(s->s_nread_network-last_pe_stats[col_type].s_nread_network); 70 | pe_stats.s_pe_event_ties = (unsigned int)(s->s_pe_event_ties-last_pe_stats[col_type].s_pe_event_ties); 71 | pe_stats.s_ngvts = (unsigned int)(g_tw_gvt_done - last_pe_stats[col_type].s_ngvts); 72 | pe_stats.all_reduce_count = (unsigned int)(all_reduce_cnt-last_all_reduce_cnt); 73 | 74 | // I think it's possible for net_events to be negative over some interval of simulation time 75 | // e.g., if in the current interval we've happened to process more rollback events than forward events 76 | // for now, just report efficiency as 0 in this case? 77 | int net_events = pe_stats.s_nevent_processed - pe_stats.s_e_rbs; 78 | if (net_events > 0) 79 | pe_stats.efficiency = (float) 100.0 * (1.0 - ((float) pe_stats.s_e_rbs / (float) net_events)); 80 | else 81 | pe_stats.efficiency = 0; 82 | 83 | // TODO set a starting clock rate and subtract that from the counters? 84 | // because PEs on different nodes will probably have different starting points for cycle counters 85 | pe_stats.s_net_read = (float)(pe->stats.s_net_read - last_pe_stats[col_type].s_net_read) / g_tw_clock_rate; 86 | pe_stats.s_net_other = (float)(pe->stats.s_net_other - last_pe_stats[col_type].s_net_other) / g_tw_clock_rate; 87 | pe_stats.s_gvt = (float)(pe->stats.s_gvt - last_pe_stats[col_type].s_gvt) / g_tw_clock_rate; 88 | pe_stats.s_fossil_collect = (float)(pe->stats.s_fossil_collect - last_pe_stats[col_type].s_fossil_collect) / g_tw_clock_rate; 89 | pe_stats.s_event_abort = (float)(pe->stats.s_event_abort - last_pe_stats[col_type].s_event_abort) / g_tw_clock_rate; 90 | pe_stats.s_event_process = (float)(pe->stats.s_event_process - last_pe_stats[col_type].s_event_process) / g_tw_clock_rate; 91 | pe_stats.s_pq = (float)(pe->stats.s_pq - last_pe_stats[col_type].s_pq) / g_tw_clock_rate; 92 | pe_stats.s_rollback = (float)(pe->stats.s_rollback - last_pe_stats[col_type].s_rollback) / g_tw_clock_rate; 93 | pe_stats.s_cancel_q = (float)(pe->stats.s_cancel_q - last_pe_stats[col_type].s_cancel_q) / g_tw_clock_rate; 94 | pe_stats.s_avl = (float)(pe->stats.s_avl - last_pe_stats[col_type].s_avl) / g_tw_clock_rate; 95 | pe_stats.s_buddy = (float)(pe->stats.s_buddy - last_pe_stats[col_type].s_buddy) / g_tw_clock_rate; 96 | pe_stats.s_lz4 = (float)(pe->stats.s_lz4 - last_pe_stats[col_type].s_lz4) / g_tw_clock_rate; 97 | 98 | memcpy(&buffer[0], sample_md, sizeof(*sample_md)); 99 | memcpy(&buffer[sizeof(*sample_md)], &pe_stats, sizeof(pe_stats)); 100 | st_buffer_push(col_type, &buffer[0], buf_size); 101 | 102 | memcpy(&last_pe_stats[col_type], s, sizeof(tw_statistics)); 103 | last_all_reduce_cnt = all_reduce_cnt; 104 | } 105 | 106 | void st_collect_engine_data_kps(tw_pe *pe, tw_kp *kp, sample_metadata *sample_md, int col_type) 107 | { 108 | st_kp_stats kp_stats; 109 | int buf_size = sizeof(*sample_md) + sizeof(kp_stats); 110 | char buffer[buf_size]; 111 | int index = 0; 112 | 113 | // sample_md time stamps were set in the calling function 114 | sample_md->flag = KP_TYPE; 115 | sample_md->sample_sz = sizeof(kp_stats); 116 | 117 | kp_stats.peid = (unsigned int) g_tw_mynode; 118 | 119 | kp_stats.kpid = kp->id; 120 | 121 | kp_stats.s_nevent_processed = (unsigned int)(kp->kp_stats->s_nevent_processed - kp->last_stats[col_type]->s_nevent_processed); 122 | kp_stats.s_nevent_abort = (unsigned int)(kp->kp_stats->s_nevent_abort - kp->last_stats[col_type]->s_nevent_abort); 123 | kp_stats.s_e_rbs = (unsigned int)(kp->kp_stats->s_e_rbs - kp->last_stats[col_type]->s_e_rbs); 124 | kp_stats.s_rb_total = (unsigned int)(kp->kp_stats->s_rb_total - kp->last_stats[col_type]->s_rb_total); 125 | kp_stats.s_rb_secondary = (unsigned int)(kp->kp_stats->s_rb_secondary - kp->last_stats[col_type]->s_rb_secondary); 126 | kp_stats.s_nsend_network = (unsigned int)(kp->kp_stats->s_nsend_network - kp->last_stats[col_type]->s_nsend_network); 127 | kp_stats.s_nread_network = (unsigned int)(kp->kp_stats->s_nread_network - kp->last_stats[col_type]->s_nread_network); 128 | #ifdef USE_RAND_TIEBREAKER 129 | kp_stats.time_ahead_gvt = (float)(TW_STIME_DBL(kp->last_sig.recv_ts) - TW_STIME_DBL(pe->GVT_sig.recv_ts)); 130 | #else 131 | kp_stats.time_ahead_gvt = (float)(TW_STIME_DBL(kp->last_time) - TW_STIME_DBL(pe->GVT)); 132 | #endif 133 | 134 | int net_events = kp_stats.s_nevent_processed - kp_stats.s_e_rbs; 135 | if (net_events > 0) 136 | kp_stats.efficiency = (float) 100.0 * (1.0 - ((float) kp_stats.s_e_rbs / (float) net_events)); 137 | else 138 | kp_stats.efficiency = 0; 139 | 140 | memcpy(kp->last_stats[col_type], kp->kp_stats, sizeof(st_kp_stats)); 141 | 142 | memcpy(&buffer[index], sample_md, sizeof(*sample_md)); 143 | index += sizeof(*sample_md); 144 | memcpy(&buffer[index], &kp_stats, sizeof(kp_stats)); 145 | index += sizeof(kp_stats); 146 | 147 | if (index != buf_size) 148 | tw_error(TW_LOC, "size of data being pushed to buffer is incorrect!\n"); 149 | 150 | st_buffer_push(col_type, &buffer[0], buf_size); 151 | } 152 | 153 | void st_collect_engine_data_lps(tw_lp *lp, sample_metadata *sample_md, int col_type) 154 | { 155 | st_lp_stats lp_stats; 156 | int buf_size = sizeof(*sample_md) + sizeof(lp_stats); 157 | char buffer[buf_size]; 158 | int index = 0; 159 | 160 | // sample_md time stamps were set in the calling function 161 | sample_md->flag = LP_TYPE; 162 | sample_md->sample_sz = sizeof(lp_stats); 163 | 164 | lp_stats.peid = (unsigned int) g_tw_mynode; 165 | 166 | lp_stats.kpid = lp->kp->id; 167 | lp_stats.lpid = lp->gid; 168 | 169 | lp_stats.s_nevent_processed = (unsigned int)(lp->lp_stats->s_nevent_processed - lp->last_stats[col_type]->s_nevent_processed); 170 | lp_stats.s_nevent_abort = (unsigned int)(lp->lp_stats->s_nevent_abort - lp->last_stats[col_type]->s_nevent_abort); 171 | lp_stats.s_e_rbs = (unsigned int)(lp->lp_stats->s_e_rbs - lp->last_stats[col_type]->s_e_rbs); 172 | lp_stats.s_nsend_network = (unsigned int)(lp->lp_stats->s_nsend_network - lp->last_stats[col_type]->s_nsend_network); 173 | lp_stats.s_nread_network = (unsigned int)(lp->lp_stats->s_nread_network - lp->last_stats[col_type]->s_nread_network); 174 | 175 | int net_events = lp_stats.s_nevent_processed - lp_stats.s_e_rbs; 176 | if (net_events > 0) 177 | lp_stats.efficiency = (float) 100.0 * (1.0 - ((float) lp_stats.s_e_rbs / (float) net_events)); 178 | else 179 | lp_stats.efficiency = 0; 180 | 181 | memcpy(lp->last_stats[col_type], lp->lp_stats, sizeof(st_lp_stats)); 182 | 183 | memcpy(&buffer[index], sample_md, sizeof(*sample_md)); 184 | index += sizeof(*sample_md); 185 | memcpy(&buffer[index], &lp_stats, sizeof(lp_stats)); 186 | index += sizeof(lp_stats); 187 | 188 | if (index != buf_size) 189 | tw_error(TW_LOC, "size of data being pushed to buffer is incorrect!\n"); 190 | 191 | st_buffer_push(col_type, &buffer[0], buf_size); 192 | } 193 | -------------------------------------------------------------------------------- /core/instrumentation/st-stats-buffer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | static long missed_bytes = 0; 6 | static MPI_Offset *prev_offsets = NULL; 7 | static MPI_File *buffer_fh = NULL; 8 | char stats_directory[INST_MAX_LENGTH]; 9 | int g_st_buffer_size = 8000000; 10 | int g_st_buffer_free_percent = 15; 11 | static int buffer_overflow_warned = 0; 12 | static const char *file_suffix[NUM_COL_TYPES]; 13 | FILE *seq_ev_trace, *seq_model, *seq_analysis; 14 | static st_stats_buffer **g_st_buffer; 15 | 16 | void st_buffer_allocate() 17 | { 18 | if (!(g_st_engine_stats || g_st_model_stats || g_st_ev_trace || g_st_use_analysis_lps)) 19 | return; 20 | 21 | int i, rc; 22 | 23 | // setup directory for instrumentation output 24 | if (g_tw_mynode == g_tw_masternode) 25 | { 26 | if (!g_st_stats_path[0]) 27 | sprintf(g_st_stats_path, "stats-output"); 28 | rc = mkdir(g_st_stats_path, S_IRUSR | S_IWUSR | S_IXUSR); 29 | if (rc == -1) 30 | { 31 | // this check gets rid of the GCC warning about trunciated string inputs 32 | if( snprintf(stats_directory, sizeof(stats_directory), "%s-%ld-%ld", g_st_stats_path, (long)getpid(), (long)time(NULL)) == 33 | sizeof(stats_directory) ) 34 | { 35 | printf("Error in st_buffer_allocate: stats_directory name lacked sufficient space and was truncaited\n"); 36 | exit(-1); 37 | } 38 | mkdir(stats_directory, S_IRUSR | S_IWUSR | S_IXUSR); 39 | } 40 | else 41 | sprintf(stats_directory, "%s", g_st_stats_path); 42 | } 43 | 44 | // make sure everyone has the directory name 45 | MPI_Bcast(stats_directory, INST_MAX_LENGTH, MPI_CHAR, g_tw_masternode, MPI_COMM_ROSS); 46 | 47 | // allocate buffer pointers 48 | g_st_buffer = (st_stats_buffer**) tw_calloc(TW_LOC, "instrumentation (buffer)", sizeof(st_stats_buffer*), NUM_COL_TYPES); 49 | 50 | // setup MPI file offsets 51 | if (!prev_offsets) 52 | { 53 | prev_offsets = (MPI_Offset*) tw_calloc(TW_LOC, "statistics collection (buffer)", sizeof(MPI_Offset), NUM_COL_TYPES); 54 | for (i = 0; i < NUM_COL_TYPES; i++) 55 | prev_offsets[i] = 0; 56 | } 57 | 58 | // set up file handlers 59 | if (!buffer_fh) 60 | buffer_fh = (MPI_File*) tw_calloc(TW_LOC, "statistics collection (buffer)", sizeof(MPI_File), NUM_COL_TYPES); 61 | 62 | } 63 | 64 | /* initialize circular buffer for stats collection 65 | * basically the read position marks the beginning of used space in the buffer 66 | * while the write postion marks the end of used space in the buffer 67 | */ 68 | void st_buffer_init(int type) 69 | { 70 | char filename[INST_MAX_LENGTH]; 71 | file_suffix[0] = "gvt"; 72 | file_suffix[1] = "rt"; 73 | file_suffix[2] = "analysis-lps"; 74 | file_suffix[3] = "evtrace"; 75 | file_suffix[4] = "model"; 76 | 77 | g_st_buffer[type] = (st_stats_buffer*) tw_calloc(TW_LOC, "statistics collection (buffer)", sizeof(st_stats_buffer), 1); 78 | g_st_buffer[type]->size = g_st_buffer_size; 79 | g_st_buffer[type]->write_pos = 0; 80 | g_st_buffer[type]->read_pos = 0; 81 | g_st_buffer[type]->count = 0; 82 | g_st_buffer[type]->buffer = (char*) tw_calloc(TW_LOC, "statistics collection (buffer)", 1, g_st_buffer[type]->size); 83 | 84 | // set up MPI File 85 | if (!g_st_disable_out) 86 | { 87 | if (!g_st_stats_out[0]) 88 | sprintf(g_st_stats_out, "ross-stats"); 89 | // this check gets rid of the GCC warning about trunciated string inputs 90 | if( snprintf(filename, sizeof(filename), "%s/%s-%s.bin", stats_directory, g_st_stats_out, file_suffix[type]) == 91 | sizeof(filename)) 92 | { 93 | printf("Error in st_buffer_init: filename lacked sufficient space and was truncaited\n"); 94 | exit(-1); 95 | } 96 | if (g_tw_synchronization_protocol != SEQUENTIAL) 97 | MPI_File_open(MPI_COMM_ROSS, filename, MPI_MODE_CREATE | MPI_MODE_EXCL | MPI_MODE_WRONLY, MPI_INFO_NULL, &buffer_fh[type]); 98 | else if (strcmp(file_suffix[type], "evtrace") == 0 && g_tw_synchronization_protocol == SEQUENTIAL) 99 | seq_ev_trace = fopen(filename, "w"); 100 | else if (strcmp(file_suffix[type], "model") == 0 && g_tw_synchronization_protocol == SEQUENTIAL) 101 | seq_model = fopen(filename, "w"); 102 | else if (type == ANALYSIS_LP && g_tw_synchronization_protocol == SEQUENTIAL) 103 | seq_analysis = fopen(filename, "w"); 104 | 105 | } 106 | } 107 | 108 | /* write stats to buffer 109 | * currently does not overwrite in cases of overflow, just records the amount of overflow in bytes 110 | * for later reporting 111 | */ 112 | void st_buffer_push(int type, char *data, int size) 113 | { 114 | int size1, size2; 115 | if (!g_st_disable_out && st_buffer_free_space(g_st_buffer[type]) < size) 116 | { 117 | if (!buffer_overflow_warned) 118 | { 119 | printf("WARNING: Stats buffer overflow on rank %lu\n", g_tw_mynode); 120 | buffer_overflow_warned = 1; 121 | printf("tw_now() = %f\n", TW_STIME_DBL(tw_now(g_tw_lp[0]))); 122 | } 123 | missed_bytes += size; 124 | size = 0; // if we can't push it all, don't push anything to buffer 125 | } 126 | 127 | if (size) 128 | { 129 | if ((size1 = g_st_buffer[type]->size - g_st_buffer[type]->write_pos) >= size) 130 | { 131 | // can use only one memcpy here 132 | memcpy(st_buffer_write_ptr(g_st_buffer[type]), data, size); 133 | g_st_buffer[type]->write_pos += size; 134 | } 135 | else // data to be stored wraps around end of physical array 136 | { 137 | size2 = size - size1; 138 | memcpy(st_buffer_write_ptr(g_st_buffer[type]), data, size1); 139 | memcpy(g_st_buffer[type]->buffer, data + size1, size2); 140 | g_st_buffer[type]->write_pos = size2; 141 | } 142 | } 143 | g_st_buffer[type]->count += size; 144 | //printf("PE %ld wrote %d bytes to buffer; %d bytes of free space left\n", g_tw_mynode, size, st_buffer_free_space(g_st_buffer[type])); 145 | } 146 | 147 | /* determine whether to dump buffer to file 148 | * should only be called at GVT! */ 149 | void st_buffer_write(int end_of_sim, int type) 150 | { 151 | MPI_Offset offset = prev_offsets[type]; 152 | MPI_File *fh = &buffer_fh[type]; 153 | int write_to_file = 0; 154 | int my_write_size = 0; 155 | unsigned int i; 156 | int write_sizes[tw_nnodes()]; 157 | tw_clock start_cycle_time = tw_clock_read(); 158 | 159 | my_write_size = g_st_buffer[type]->count; 160 | 161 | MPI_Allgather(&my_write_size, 1, MPI_INT, &write_sizes[0], 1, MPI_INT, MPI_COMM_ROSS); 162 | if (end_of_sim) 163 | write_to_file = 1; 164 | else 165 | { 166 | for (i = 0; i < tw_nnodes(); i++) 167 | { 168 | if ((double) write_sizes[i] / g_st_buffer_size >= g_st_buffer_free_percent / 100.0) 169 | write_to_file = 1; 170 | } 171 | } 172 | 173 | if (write_to_file) 174 | { 175 | for (i = 0; i < tw_nnodes(); i++) 176 | { 177 | if (i < g_tw_mynode) 178 | offset += write_sizes[i]; 179 | prev_offsets[type] += write_sizes[i]; 180 | } 181 | //printf("rank %ld writing %d bytes at offset %lld (prev_offsets[ANALYSIS_LP] = %lld)\n", g_tw_mynode, my_write_size, offset, prev_offsets[type]); 182 | // dump buffer to file 183 | MPI_Status status; 184 | g_tw_pe->stats.s_stat_comp += tw_clock_read() - start_cycle_time; 185 | start_cycle_time = tw_clock_read(); 186 | MPI_File_write_at_all(*fh, offset, st_buffer_read_ptr(g_st_buffer[type]), my_write_size, MPI_BYTE, &status); 187 | g_tw_pe->stats.s_stat_write += tw_clock_read() - start_cycle_time; 188 | 189 | // reset the buffer 190 | g_st_buffer[type]->write_pos = 0; 191 | g_st_buffer[type]->read_pos = 0; 192 | g_st_buffer[type]->count = 0; 193 | buffer_overflow_warned = 0; 194 | } 195 | else 196 | g_tw_pe->stats.s_stat_comp += tw_clock_read() - start_cycle_time; 197 | } 198 | 199 | /* make sure we write out any remaining buffer data */ 200 | void st_buffer_finalize(int type) 201 | { 202 | // check if any data needs to be written out 203 | if (!g_st_disable_out) 204 | st_buffer_write(1, type); 205 | 206 | printf("PE %ld: There were %ld bytes of data missed because of buffer overflow\n", g_tw_mynode, missed_bytes); 207 | 208 | MPI_File_close(&buffer_fh[type]); 209 | 210 | } 211 | -------------------------------------------------------------------------------- /core/network-mpi.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_network_mpi_h 2 | #define INC_network_mpi_h 3 | 4 | typedef long tw_node; 5 | 6 | extern MPI_Comm MPI_COMM_ROSS; 7 | 8 | /** 9 | * @brief Initalize the network library and parse options. 10 | * 11 | * argc and argv are pointers to the original command line; the 12 | * network library may edit these before the option parser sees 13 | * them allowing for network implementation specific argument 14 | * handling to occur. 15 | * 16 | * It's possible for a model to init MPI itself, as this 17 | * function will first check if MPI is already initialized before 18 | * attempting to call MPI_Init(). 19 | * 20 | * This function also sets the global variables 21 | * g_tw_masternode and g_tw_mynode. 22 | * 23 | * @param[in] argc Pointer to command line arg count 24 | * @param[in] argv Pointer to command line args 25 | * @return tw_optdef array to be included in overall process 26 | * command line argument display and parsing; NULL may be returned 27 | * to indicate the implementation has no options it wants included. 28 | */ 29 | const tw_optdef *tw_net_init(int *argc, char ***argv); 30 | 31 | /** 32 | * @brief Setup the MPI_COMM_ROSS communicator to use instead of MPI_COMM_WORLD. 33 | * 34 | * This function should be called before tw_net_init. 35 | * @param[in] comm Custom MPI communicator for setting MPI_COMM_ROSS 36 | */ 37 | void tw_comm_set(MPI_Comm comm); 38 | 39 | /** 40 | * @brief Starts the network library after option parsing. 41 | * 42 | * Makes calls to initialize the PE (g_tw_pe), create the hash/AVL tree 43 | * (for optimistic modes), and queues for posted sends/recvs. 44 | * Also pre-posts MPI Irecvs operations. 45 | */ 46 | void tw_net_start(void); 47 | 48 | /** 49 | * @brief Stops the network library after simulation end. 50 | * 51 | * Checks to see if custom communicator was used. If not, finalizes MPI. 52 | * Otherwise, the application is expected to finalize MPI itself. 53 | */ 54 | void tw_net_stop(void); 55 | 56 | /** Aborts the entire simulation when a grave error is found. */ 57 | void tw_net_abort(void) NORETURN; 58 | 59 | /** 60 | * @brief starts service_queues() to poll network 61 | * 62 | * @param[in] me pointer to the PE 63 | */ 64 | extern void tw_net_read(tw_pe *); 65 | 66 | /** 67 | * @brief Adds the event to the outgoing queue of events to be sent, 68 | * polls for finished sends, and attempts to start sends from outq. 69 | * 70 | * @param[in] e remote event to be sent 71 | */ 72 | extern void tw_net_send(tw_event *); 73 | 74 | /** 75 | * @brief Cancel the given remote event by either removing from the outq 76 | * or sending an antimessage, depending on the status of the original positive send. 77 | * 78 | * @param[in] e remote event to be canceled 79 | */ 80 | extern void tw_net_cancel(tw_event *); 81 | 82 | /** Obtain the total number of PEs executing the simulation. 83 | * 84 | * @return number of ROSS PEs/MPI world size 85 | */ 86 | extern unsigned tw_nnodes(void); 87 | 88 | /** Block until all nodes call the barrier. */ 89 | extern void tw_net_barrier(void); 90 | 91 | /** 92 | * @brief Obtain the lowest timestamp inside the network buffers. 93 | * 94 | * @return minimum timestamp for this PE's network buffers 95 | */ 96 | extern tw_stime tw_net_minimum(void); 97 | 98 | #ifdef USE_RAND_TIEBREAKER 99 | /** 100 | * @brief Obtain the event signature for the lowest ordered event inside the network buffers. 101 | * 102 | * @return minimum event signature for this PE's network buffers 103 | */ 104 | extern tw_event_sig tw_net_minimum_sig(void); 105 | #endif 106 | 107 | /** 108 | * @brief Function to reduce all the statistics for output. 109 | * @attention Notice that the MPI_Reduce "count" parameter is greater than one. 110 | * We are reducing on multiple variables *simultaneously* so if you change 111 | * this function or the struct tw_statistics, you must update the other. 112 | **/ 113 | extern tw_statistics *tw_net_statistics(tw_pe *, tw_statistics *); 114 | 115 | #endif 116 | -------------------------------------------------------------------------------- /core/queue/heap.c.old: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | * Additional Contributions and Acknowledgements 3 | * Kalyan Perumalla - Ga Tech 4 | * 5 | * This implementation is an adaption of the implementation done 6 | * by Kalyan for Ga Tech Time Warp 7 | **********************************************************************/ 8 | 9 | #include 10 | 11 | typedef tw_event *ELEMENT_TYPE; 12 | typedef double KEY_TYPE; 13 | #define KEY(e) (e->recv_ts) 14 | 15 | struct tw_pq 16 | { 17 | unsigned long nelems; 18 | unsigned long curr_max; 19 | ELEMENT_TYPE *elems; /* Array [0..curr_max] of ELEMENT_TYPE */ 20 | }; 21 | 22 | #define SWAP(heap,x,y,t) { \ 23 | t = heap->elems[x]; \ 24 | heap->elems[x] = heap->elems[y]; \ 25 | heap->elems[y] = t; \ 26 | heap->elems[x]->heap_index = x; \ 27 | heap->elems[y]->heap_index = y; \ 28 | } 29 | 30 | /*---------------------------------------------------------------------------*/ 31 | void* 32 | tw_unsafe_realloc( 33 | const char *file, 34 | int line, 35 | const char *for_who, 36 | void *addr, 37 | size_t len) 38 | { 39 | malloc_calls++; 40 | total_allocated += len; 41 | addr = realloc(addr, len); 42 | if (!addr) 43 | tw_error( 44 | file, line, 45 | "Cannot allocate %lu bytes for %s", 46 | (unsigned long)len, 47 | for_who); 48 | return addr; 49 | } 50 | 51 | /*---------------------------------------------------------------------------*/ 52 | static inline ELEMENT_TYPE HeapPeekTop( tw_pq *h ) 53 | { 54 | return (h->nelems <= 0) ? 0 : h->elems[0]; 55 | } 56 | 57 | /*---------------------------------------------------------------------------*/ 58 | static void sift_down( tw_pq *h, int i ) 59 | { 60 | int n = h->nelems, k = i, j, c1, c2; 61 | ELEMENT_TYPE temp; 62 | 63 | if( n <= 1 ) return; 64 | 65 | /* Stops when neither child is "strictly less than" parent */ 66 | do{ 67 | j = k; 68 | c1 = c2 = 2*k+1; 69 | c2++; 70 | if( c1 < n && KEY(h->elems[c1]) < KEY(h->elems[k]) ) k = c1; 71 | if( c2 < n && KEY(h->elems[c2]) < KEY(h->elems[k]) ) k = c2; 72 | SWAP( h, j, k, temp ); 73 | }while( j != k ); 74 | } 75 | 76 | /*---------------------------------------------------------------------------*/ 77 | static void percolate_up( tw_pq *h, int i ) 78 | { 79 | int n = h->nelems, k = i, j, p; 80 | ELEMENT_TYPE temp; 81 | 82 | if( n <= 1 ) return; 83 | 84 | /* Stops when parent is "less than or equal to" child */ 85 | do 86 | { 87 | j = k; 88 | if( (p = (k+1)/2) ) 89 | { 90 | --p; 91 | if( KEY(h->elems[k]) < KEY(h->elems[p]) ) k = p; 92 | } 93 | SWAP( h, j, k, temp ); 94 | }while( j != k ); 95 | } 96 | 97 | /*---------------------------------------------------------------------------*/ 98 | void tw_pq_enqueue(tw_pq *h, ELEMENT_TYPE e ) 99 | { 100 | if( h->nelems >= h->curr_max ) 101 | { 102 | const unsigned int i = 50000; 103 | const unsigned int u = h->curr_max; 104 | h->curr_max += i; 105 | h->elems = tw_unsafe_realloc( 106 | TW_LOC, 107 | "heap queue elements", 108 | h->elems, 109 | sizeof(*h->elems) * h->curr_max); 110 | memset(&h->elems[u], 0, sizeof(*h->elems) * i); 111 | } 112 | 113 | e->heap_index = h->nelems; 114 | h->elems[h->nelems++] = e; 115 | percolate_up( h, h->nelems-1 ); 116 | 117 | e->state.owner = TW_pe_pq; 118 | e->next = NULL; 119 | e->prev = NULL; 120 | } 121 | 122 | /*---------------------------------------------------------------------------*/ 123 | ELEMENT_TYPE tw_pq_dequeue(tw_pq *h) 124 | { 125 | if( h->nelems <= 0 ) 126 | return 0; 127 | else 128 | { 129 | ELEMENT_TYPE e = h->elems[0]; 130 | h->elems[0] = h->elems[--h->nelems]; 131 | h->elems[0]->heap_index = 0; 132 | sift_down( h, 0 ); 133 | e->state.owner = 0; 134 | return e; 135 | } 136 | } 137 | 138 | /*---------------------------------------------------------------------------*/ 139 | #if 0 140 | static void DumpBucket( void *pq, FILE *fp ) 141 | { 142 | int i; 143 | tw_pq *h = (tw_pq *)(pq); 144 | fprintf( fp, "[ " ); 145 | for( i = 0; i < h->nelems; i++ ) 146 | { 147 | fprintf( fp, "%s", ( i && i % 10 == 0 ) ? "\n\t" : "" ); 148 | fprintf( fp, "%s%lf", (i ? ", ":""), KEY(h->elems[i]) ); 149 | } 150 | fprintf( fp, " ]\n" ); 151 | fflush( fp ); 152 | } 153 | #endif 154 | 155 | /*---------------------------------------------------------------------------*/ 156 | void tw_pq_delete_any(tw_pq *h, tw_event * victim) 157 | { 158 | int i = victim->heap_index; 159 | 160 | if( !(0 <= i && i < h->nelems) || (h->elems[i]->heap_index != i) ) 161 | { 162 | fprintf( stderr, "Fatal: Bad node in FEL!\n" ); exit(2); 163 | } 164 | else 165 | { 166 | h->nelems--; 167 | victim->state.owner = 0; 168 | 169 | if( h->nelems > 0 ) 170 | { 171 | ELEMENT_TYPE successor = h->elems[h->nelems]; 172 | h->elems[i] = successor; 173 | successor->heap_index = i; 174 | if( KEY(successor) <= KEY(victim) ) percolate_up( h, i ); 175 | else sift_down( h, i ); 176 | } 177 | } 178 | } 179 | 180 | /*---------------------------------------------------------------------------*/ 181 | tw_pq * tw_pq_create(void) 182 | { 183 | tw_pq *h = tw_calloc(TW_LOC, "heap queue", sizeof(tw_pq), 1); 184 | h->nelems = 0; 185 | h->curr_max = (2*g_tw_events_per_pe); 186 | h->elems = tw_unsafe_realloc( 187 | TW_LOC, 188 | "heap queue elements", 189 | NULL, 190 | sizeof(*h->elems) * h->curr_max); 191 | memset(h->elems, 0, sizeof(*h->elems) * h->curr_max); 192 | 193 | return h; 194 | } 195 | 196 | /*---------------------------------------------------------------------------*/ 197 | tw_stime tw_pq_minimum(tw_pq *pq) 198 | { 199 | ELEMENT_TYPE e = HeapPeekTop(pq); 200 | double retval = e ? KEY(e) : HUGE_VAL; 201 | return retval; 202 | } 203 | 204 | /*---------------------------------------------------------------------------*/ 205 | unsigned int tw_pq_get_size( tw_pq *pq ) 206 | { 207 | return ( pq->nelems ); 208 | } 209 | -------------------------------------------------------------------------------- /core/queue/tw-queue.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // This is the API for the tw queue system 4 | // There are several queue implementations 5 | 6 | tw_pq *tw_pq_create(void); 7 | void tw_pq_enqueue(tw_pq *, tw_event *); 8 | tw_event *tw_pq_dequeue(tw_pq *); 9 | tw_stime tw_pq_minimum(tw_pq *); 10 | #ifdef USE_RAND_TIEBREAKER 11 | tw_event_sig tw_pq_minimum_sig(tw_pq *); 12 | #endif 13 | void tw_pq_delete_any(tw_pq *, tw_event *); 14 | unsigned int tw_pq_get_size(tw_pq *); 15 | unsigned int tw_pq_max_size(tw_pq *); 16 | #ifdef ROSS_QUEUE_kp_splay 17 | tw_eventpq * tw_eventpq_create(void); 18 | #endif 19 | -------------------------------------------------------------------------------- /core/rand-clcg4.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_clcg4_h 2 | #define INC_clcg4_h 3 | 4 | typedef int32_t * tw_seed; 5 | 6 | struct tw_rng 7 | { 8 | /* 9 | * equals a[i]^{m[i]-2} mod m[i] 10 | */ 11 | long long b[4]; 12 | 13 | /* 14 | * a[j]^{2^w} et a[j]^{2^{v+w}}. 15 | */ 16 | int32_t m[4]; 17 | int32_t a[4]; 18 | int32_t aw[4]; 19 | int32_t avw[4]; 20 | 21 | // the seed.. 22 | int32_t seed[4]; 23 | }; 24 | 25 | enum SeedType 26 | { 27 | InitialSeed, LastSeed, NewSeed 28 | }; 29 | 30 | typedef enum SeedType SeedType; 31 | 32 | struct tw_rng_stream 33 | { 34 | unsigned long count; 35 | int32_t Ig[4]; 36 | int32_t Lg[4]; 37 | int32_t Cg[4]; 38 | 39 | //tw_rng *rng; 40 | 41 | #ifdef RAND_NORMAL 42 | double tw_normal_u1; 43 | double tw_normal_u2; 44 | int tw_normal_flipflop; 45 | #endif 46 | }; 47 | 48 | extern tw_rng *rng_init(int v, int w); 49 | extern tw_rng *rng_core_init(int v, int w); 50 | extern void rng_set_initial_seed(); 51 | extern void rng_init_generator(tw_rng_stream * g, SeedType Where, tw_rng * the_rng); 52 | extern void rng_set_seed(tw_rng_stream * g, uint32_t * s, tw_rng * the_rng); 53 | extern void rng_get_state(tw_rng_stream * g, uint32_t * s); 54 | extern void rng_write_state(tw_rng_stream * g, FILE *f); 55 | extern double rng_gen_val(tw_rng_stream * g); 56 | extern double rng_gen_reverse_val(tw_rng_stream * g); 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /core/rio/README.md: -------------------------------------------------------------------------------- 1 | # RIO: A Checkpoint/Restart API for ROSS 2 | 3 | RIO (ROSS Restart IO) is a checkpointing API for [Rensselaer's Optimistic Simulation System](https://github.com/ROSS-org/ROSS). 4 | RIO is for checkpoint-restart operations and in its current state it cannot be used to created incremental checkpoints for fault tolerance. 5 | 6 | ## Limitations 7 | 8 | As ROSS is developed, full RIO functionality may be lacking and certain LP and event information may not be saved in a checkpoint. 9 | At this time, the following features are not compatible with RIO: 10 | - delta encoding 11 | - LP suspend 12 | - instrumentation 13 | 14 | ## Documentation 15 | 16 | The documentation for RIO can be found on the ROSS website (Look for the RIO section on the [archive page](https://ROSS-org.github.io/archive.html)). 17 | The documentation includes: 18 | 19 | - [Overview](https://ROSS-org.github.io/rio/rio-overview.html) 20 | - [API Description](https://ROSS-org.github.io/rio/rio-api.html) 21 | - [Checkpoint Description](https://ROSS-org.github.io/rio/rio-files.html) 22 | - [Adding RIO to a Model](https://ROSS-org.github.io/rio/rio-cmake.html) 23 | 24 | ## Example Usage 25 | 26 | The full RIO API has been implemented in the [PHOLD-IO model](https://github.com/ROSS-org/pholdio). 27 | 28 | ## Coding Conventions 29 | 30 | RIO API functionality is prefixed with `io`. 31 | -------------------------------------------------------------------------------- /core/rio/io-serialize.c: -------------------------------------------------------------------------------- 1 | #include "ross.h" 2 | 3 | size_t io_lp_serialize (tw_lp *lp, void *buffer) { 4 | int i, j; 5 | 6 | io_lp_store tmp; 7 | 8 | tmp.gid = lp->gid; 9 | for (i = 0; i < g_tw_nRNG_per_lp; i++) { 10 | for (j = 0; j < 4; j++) { 11 | tmp.rng[j] = lp->rng->Ig[j]; 12 | tmp.rng[j+4] = lp->rng->Lg[j]; 13 | tmp.rng[j+8] = lp->rng->Cg[j]; 14 | } 15 | #ifdef RAND_NORMAL 16 | tmp.tw_normal_u1 = lp->rng->tw_normal_u1; 17 | tmp.tw_normal_u2 = lp->rng->tw_normal_u2; 18 | tmp.tw_normal_flipflop = lp->rng->tw_normal_flipflop; 19 | #endif 20 | } 21 | tmp.critical_path = lp->critical_path; 22 | 23 | memcpy(buffer, &tmp, sizeof(io_lp_store)); 24 | return sizeof(io_lp_store); 25 | } 26 | 27 | size_t io_lp_deserialize (tw_lp *lp, void *buffer) { 28 | int i, j; 29 | 30 | io_lp_store tmp; 31 | memcpy(&tmp, buffer, sizeof(io_lp_store)); 32 | 33 | lp->gid = tmp.gid; 34 | 35 | for (i = 0; i < g_tw_nRNG_per_lp; i++) { 36 | for (j = 0; j < 4; j++) { 37 | lp->rng->Ig[j] = tmp.rng[j]; 38 | lp->rng->Lg[j] = tmp.rng[j+4]; 39 | lp->rng->Cg[j] = tmp.rng[j+8]; 40 | } 41 | #ifdef RAND_NORMAL 42 | lp->rng->tw_normal_u1 = tmp.tw_normal_u1; 43 | lp->rng->tw_normal_u2 = tmp.tw_normal_u2; 44 | lp->rng->tw_normal_flipflop = tmp.tw_normal_flipflop; 45 | #endif 46 | } 47 | lp->critical_path = tmp.critical_path; 48 | 49 | return sizeof(io_lp_store); 50 | } 51 | 52 | size_t io_event_serialize (tw_event *e, void *buffer) { 53 | int i; 54 | 55 | io_event_store tmp; 56 | 57 | memcpy(&(tmp.cv), &(e->cv), sizeof(tw_bf)); 58 | tmp.critical_path = e->critical_path; 59 | tmp.dest_lp = (tw_lpid)e->dest_lp; // ROSS HACK: dest_lp is gid 60 | tmp.src_lp = e->src_lp->gid; 61 | tmp.recv_ts = e->recv_ts - g_tw_ts_end; 62 | 63 | memcpy(buffer, &tmp, sizeof(io_event_store)); 64 | // printf("Storing event going to %lu at %f\n", tmp.dest_lp, tmp.recv_ts); 65 | return sizeof(io_event_store); 66 | } 67 | 68 | size_t io_event_deserialize (tw_event *e, void *buffer) { 69 | int i; 70 | 71 | io_event_store tmp; 72 | memcpy(&tmp, buffer, sizeof(io_event_store)); 73 | e->critical_path = tmp.critical_path; 74 | 75 | memcpy(&(e->cv), &(tmp.cv), sizeof(tw_bf)); 76 | e->dest_lp = (tw_lp *) tmp.dest_lp; // ROSS HACK: e->dest_lp is GID for a bit 77 | //undo pointer to GID conversion 78 | if (g_tw_mapping == LINEAR) { 79 | e->src_lp = g_tw_lp[((tw_lpid)tmp.src_lp) - g_tw_lp_offset]; 80 | } else if (g_tw_mapping == CUSTOM) { 81 | e->src_lp = g_tw_custom_lp_global_to_local_map((tw_lpid)tmp.src_lp); 82 | } else { 83 | tw_error(TW_LOC, "RIO ERROR: Unsupported mapping"); 84 | } 85 | e->recv_ts = tmp.recv_ts; 86 | // printf("Loading event going to %lu at %f\n", tmp.dest_lp, tmp.recv_ts); 87 | return sizeof(io_event_store); 88 | } 89 | -------------------------------------------------------------------------------- /core/rio/io.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_io_h 2 | #define INC_io_h 3 | 4 | //Elsa Gonsiorowski 5 | //Rensselaer Polytechnic Institute 6 | //Decemeber 13, 2013 7 | 8 | // ** Global IO System variables ** // 9 | 10 | // Set with command line --io-files 11 | // should be consistent across the system 12 | extern int g_io_number_of_files; 13 | 14 | // Register opts with ROSS 15 | extern const tw_optdef io_opts[3]; 16 | 17 | enum io_load_e { 18 | NONE, // default value 19 | PRE_INIT, // load LPs then lp->init 20 | INIT, // load LPs instead lp->init 21 | POST_INIT, // load LPs after lp->init 22 | }; 23 | typedef enum io_load_e io_load_type; 24 | extern io_load_type g_io_load_at; 25 | extern char g_io_checkpoint_name[1024]; 26 | 27 | // Should be set in main, before call to io_init 28 | // Maximum number of events that will be scheduled past end time 29 | extern int g_io_events_buffered_per_rank; 30 | 31 | // ** API Functions, Types, and Variables ** // 32 | 33 | void io_register_model_version(char *sha1); 34 | void io_init(); 35 | 36 | void io_load_checkpoint(char * master_filename, io_load_type load_at); 37 | void io_store_checkpoint(char * master_filename, int data_file_number); 38 | void io_appending_job(); 39 | 40 | // LP type map and function struct 41 | typedef void (*serialize_f)(void * state, void * buffer, tw_lp *lp); 42 | typedef void (*deserialize_f)(void * state, void * buffer, tw_lp *lp); 43 | typedef size_t (*model_size_f)(void * state, tw_lp *lp); 44 | 45 | typedef struct { 46 | serialize_f serialize; 47 | deserialize_f deserialize; 48 | model_size_f model_size; 49 | } io_lptype; 50 | 51 | extern io_lptype * g_io_lp_types; 52 | 53 | // ** Internal IO types, variables, and functions ** // 54 | 55 | typedef struct { 56 | int part; 57 | int file; 58 | int offset; 59 | int size; 60 | int lp_count; 61 | int ev_count; 62 | } io_partition; 63 | static int io_partition_field_count = 6; 64 | 65 | typedef struct { 66 | tw_lpid gid; 67 | int32_t rng[12]; 68 | #ifdef RAND_NORMAL 69 | double tw_normal_u1; 70 | double tw_normal_u2; 71 | int tw_normal_flipflop; 72 | #endif 73 | unsigned int critical_path; 74 | } io_lp_store; 75 | 76 | typedef struct { 77 | tw_bf cv; 78 | unsigned int critical_path; 79 | tw_lpid dest_lp; 80 | tw_lpid src_lp; 81 | tw_stime recv_ts; 82 | // NOTE: not storing tw_memory or tw_out 83 | } io_event_store; 84 | 85 | extern io_partition * g_io_partitions; 86 | 87 | // Functions Called Directly from ROSS 88 | void io_load_events(tw_pe * me); 89 | void io_event_cancel(tw_event *e); 90 | void io_read_checkpoint(); 91 | 92 | // SERIALIZE FUNCTIONS for LP and EVENT structs 93 | // found in io-serialize.c 94 | size_t io_lp_serialize (tw_lp * lp, void * buffer); 95 | size_t io_lp_deserialize (tw_lp * lp, void * buffer); 96 | size_t io_event_serialize (tw_event * e, void * buffer); 97 | size_t io_event_deserialize (tw_event * e, void * buffer); 98 | 99 | // INLINE function for buffering events past end time 100 | extern tw_eventq g_io_buffered_events; 101 | extern tw_eventq g_io_free_events; 102 | extern tw_event * io_event_grab(tw_pe *pe); 103 | #endif 104 | -------------------------------------------------------------------------------- /core/ross-config.in: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ROSS_CC="@ROSS_CC@" 4 | ROSS_CXX="@ROSS_CXX@" 5 | ROSS_LD="@ROSS_CC@" 6 | 7 | prefix=`(cd $(dirname $0);pwd) | sed -e 's/\/bin//'` 8 | CFLAGS_OPTS='' 9 | usage="\ 10 | Usage: ross-config [--cflags] [--ldflags] [--libs] [--cc] [--cxx] [--ld]" 11 | 12 | if test $# -eq 0; then 13 | echo "${usage}" 1>&2 14 | exit 1 15 | fi 16 | 17 | while test $# -gt 0; do 18 | case "$1" in 19 | -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; 20 | *) optarg= ;; 21 | esac 22 | 23 | case $1 in 24 | --cc) 25 | echo "${ROSS_CC}" 26 | 27 | ;; 28 | --cxx) 29 | echo "${ROSS_CXX}" 30 | 31 | ;; 32 | --ld) 33 | echo "${ROSS_LD}" 34 | 35 | ;; 36 | --cflags) 37 | echo "-I${prefix}/include ${CFLAGS_OPTS}" 38 | 39 | ;; 40 | --ldflags) 41 | echo -L${prefix}/lib 42 | 43 | ;; 44 | --libs) 45 | libflags="-lROSS -lm" 46 | echo $libflags 47 | ;; 48 | *) 49 | echo "${usage}" 1>&2 50 | exit 1 51 | ;; 52 | esac 53 | shift 54 | done 55 | -------------------------------------------------------------------------------- /core/ross-extern.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_ross_extern_h 2 | #define INC_ross_extern_h 3 | 4 | extern void tw_rand_init_streams(tw_lp * lp, unsigned int nstreams, unsigned int n_core_streams); 5 | 6 | /* 7 | * tw-stats.c 8 | */ 9 | extern void tw_get_stats(tw_pe * me, tw_statistics *s); 10 | extern void tw_stats(tw_pe *me); 11 | 12 | /* 13 | * ross-global.c 14 | */ 15 | extern tw_synch g_tw_synchronization_protocol; 16 | extern map_local_f g_tw_custom_lp_global_to_local_map; 17 | extern map_custom_f g_tw_custom_initial_mapping; 18 | extern tw_lp_map g_tw_mapping; 19 | extern tw_lpid g_tw_nlp; 20 | extern tw_lpid g_tw_total_lps; //Total LPs in the simulation 21 | extern tw_lpid g_tw_lp_offset; 22 | extern tw_kpid g_tw_nkp; 23 | extern tw_lp **g_tw_lp; 24 | extern tw_kp **g_tw_kp; 25 | extern int g_tw_fossil_attempts; 26 | extern unsigned int g_tw_nRNG_per_lp; 27 | extern unsigned int g_tw_nRNG_core_per_lp; //Separate ROSS engine-only use RNG streams 28 | extern tw_lpid g_tw_rng_default; 29 | extern tw_seed g_tw_rng_seed; 30 | extern tw_seed g_tw_core_rng_seed; 31 | extern unsigned int g_tw_mblock; 32 | extern unsigned int g_tw_gvt_interval; 33 | extern unsigned long long g_tw_max_opt_lookahead; 34 | extern unsigned long long g_tw_gvt_realtime_interval; 35 | extern unsigned long long g_tw_gvt_interval_start_cycles; 36 | extern double g_tw_ts_end; 37 | extern unsigned int g_tw_sim_started; 38 | extern size_t g_tw_msg_sz; 39 | extern size_t g_tw_event_msg_sz; 40 | extern size_t g_tw_delta_sz; 41 | extern uint32_t g_tw_buddy_alloc; 42 | extern buddy_list_bucket_t *g_tw_buddy_master; 43 | extern uint32_t g_tw_avl_node_count; 44 | 45 | extern unsigned int g_tw_lz4_knob; 46 | 47 | extern double g_tw_lookahead; 48 | extern double g_tw_min_detected_offset; 49 | 50 | extern tw_pe *g_tw_pe; 51 | extern unsigned int g_tw_events_per_pe; 52 | extern unsigned int g_tw_events_per_pe_extra; 53 | 54 | extern unsigned int g_tw_gvt_threshold; 55 | extern unsigned int g_tw_gvt_done; 56 | 57 | extern unsigned int g_tw_net_device_size; 58 | extern tw_peid g_tw_mynode; 59 | extern tw_peid g_tw_masternode; 60 | 61 | extern FILE *g_tw_csv; 62 | 63 | extern tw_lptype * g_tw_lp_types; 64 | extern tw_typemap_f g_tw_lp_typemap; 65 | 66 | /* 67 | * Cycle Counter variables 68 | */ 69 | extern tw_clock g_tw_cycles_gvt; 70 | extern tw_clock g_tw_cycles_ev_abort; 71 | extern tw_clock g_tw_cycles_ev_proc; 72 | extern tw_clock g_tw_cycles_ev_queue; 73 | extern tw_clock g_tw_cycles_rbs; 74 | extern tw_clock g_tw_cycles_cancel; 75 | 76 | /* 77 | * clock-* 78 | */ 79 | extern const tw_optdef *tw_clock_setup(); 80 | extern void tw_clock_init(tw_pe * me); 81 | extern tw_clock tw_clock_now(tw_pe * me); 82 | extern tw_clock tw_clock_read(); 83 | extern unsigned long long g_tw_clock_rate; 84 | 85 | /* 86 | * tw-event.c 87 | */ 88 | extern void tw_event_send(tw_event * event); 89 | extern void tw_event_rollback(tw_event * event); 90 | #ifdef USE_RAND_TIEBREAKER 91 | extern int tw_event_sig_compare(tw_event_sig e_sig, tw_event_sig n_sig); 92 | #endif 93 | 94 | /* 95 | * ross-inline.h 96 | */ 97 | static inline void tw_event_free(tw_pe *, tw_event *); 98 | static inline void tw_free_output_messages(tw_event *e, int print_message); 99 | static inline void * tw_event_data(tw_event * event); 100 | 101 | /* 102 | * tw-lp.c 103 | */ 104 | extern tw_lp *tw_lp_next_onpe(tw_lp * last, tw_pe * pe); 105 | extern void tw_lp_settype(tw_lpid lp, tw_lptype * type); 106 | extern void tw_lp_onpe(tw_lpid index, tw_pe * pe, tw_lpid id); 107 | extern void tw_lp_onkp(tw_lp * lp, tw_kp * kp); 108 | extern void tw_init_lps(tw_pe * me); 109 | extern void tw_pre_run_lps(tw_pe * me); 110 | extern void tw_lp_setup_types(); 111 | extern void tw_lp_suspend(tw_lp * lp, int do_orig_event_rc, int error_num ); 112 | 113 | /* 114 | * tw-kp.c 115 | */ 116 | extern void tw_kp_onpe(tw_kpid id, tw_pe * pe); 117 | extern void kp_fossil_remote(tw_kp * kp); 118 | extern tw_kp* tw_kp_next_onpe(tw_kp * last, tw_pe * pe); 119 | extern void tw_init_kps(tw_pe * me); 120 | extern tw_out* tw_kp_grab_output_buffer(tw_kp *kp); 121 | extern void tw_kp_put_back_output_buffer(tw_out *out); 122 | 123 | extern void tw_kp_rollback_event(tw_event *event); 124 | extern void tw_kp_rollback_to(tw_kp * kp, tw_stime to); 125 | #ifdef USE_RAND_TIEBREAKER 126 | extern void tw_kp_rollback_to_sig(tw_kp * kp, tw_event_sig to_sig); 127 | #endif 128 | 129 | /* 130 | * tw-pe.c 131 | */ 132 | extern void tw_pe_settype(const tw_petype * type); 133 | extern void tw_pe_init(void); 134 | extern void tw_pe_fossil_collect(void); 135 | 136 | /* 137 | * tw-setup.c 138 | */ 139 | extern void tw_init(int *argc, char ***argv); 140 | extern void tw_define_lps(tw_lpid nlp, size_t msg_sz); 141 | extern void tw_run(void); 142 | extern void tw_end(void); 143 | extern tw_lpid map_onetype (tw_lpid gid); 144 | 145 | /* 146 | * tw-sched.c 147 | */ 148 | extern void tw_sched_init(tw_pe * me); 149 | extern void tw_scheduler_sequential(tw_pe * me); 150 | extern void tw_scheduler_conservative(tw_pe * me); 151 | extern void tw_scheduler_optimistic(tw_pe * me); 152 | extern void tw_scheduler_optimistic_debug(tw_pe * me); 153 | extern void tw_scheduler_optimistic_realtime(tw_pe * me); 154 | 155 | /* 156 | * tw-state.c 157 | */ 158 | extern void tw_snapshot(tw_lp *lp, size_t state_sz); 159 | extern long tw_snapshot_delta(tw_lp *lp, size_t state_sz); 160 | extern void tw_snapshot_restore(tw_lp *lp, size_t state_sz); 161 | 162 | /* 163 | * tw-timing.c 164 | */ 165 | extern void tw_wall_now(tw_wtime * t); 166 | extern void tw_wall_sub(tw_wtime * r, tw_wtime * a, tw_wtime * b); 167 | extern double tw_wall_to_double(tw_wtime * t); 168 | 169 | /* 170 | * tw-util.c 171 | */ 172 | 173 | #define TW_LOC __FILE__,__LINE__ 174 | extern int tw_output(tw_lp *lp, const char *fmt, ...); 175 | extern void tw_error(const char *file, int line, const char *fmt, ...) NORETURN; 176 | extern void tw_warning(const char *file, int line, const char *fmt, ...); 177 | extern void tw_printf(const char *file, int line, const char *fmt, ...); 178 | extern void tw_calloc_stats(size_t *alloc, size_t *waste); 179 | extern void* tw_calloc(const char *file, int line, const char *for_who, size_t e_sz, size_t n); 180 | 181 | #endif 182 | -------------------------------------------------------------------------------- /core/ross-global.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | * LP data structures are allocated dynamically when the 5 | * process starts up based on the number it requires. 6 | * 7 | * g_tw_nlp -- Number of LPs on this processor 8 | * g_tw_lp_offset -- global id of g_tw_lp[0] (on this processor) 9 | * g_tw_nkp -- Number of KPs on this processor 10 | IF this is 1, then it gets over written as nkp_per_pe * g_tw_npe 11 | thus it is total KPs in simulation, not on this processor 12 | * g_tw_lp -- Public LP object array (on this processor) 13 | * g_tw_kp -- Public KP object array (on this processor) 14 | * g_tw_fossil_attempts -- Number of times fossil_collect is called 15 | * g_tw_nRNG_per_lp -- Number of RNG per LP 16 | * g_tw_nRNG_core_per_lp -- Number of ROSS core RNG per LP for use by ROSS engine exclusively 17 | */ 18 | 19 | tw_synch g_tw_synchronization_protocol=NO_SYNCH; 20 | map_local_f g_tw_custom_lp_global_to_local_map=NULL; 21 | map_custom_f g_tw_custom_initial_mapping=NULL; 22 | tw_lp_map g_tw_mapping=LINEAR; 23 | 24 | tw_lpid g_tw_nlp = 0; 25 | tw_lpid g_tw_total_lps = 0; //Total LPs in the simulation 26 | tw_lpid g_tw_lp_offset = 0; 27 | tw_kpid g_tw_nkp = 1; 28 | tw_lp **g_tw_lp = NULL; 29 | tw_kp **g_tw_kp = NULL; 30 | int g_tw_fossil_attempts = 0; 31 | unsigned int g_tw_nRNG_per_lp = 1; 32 | unsigned int g_tw_nRNG_core_per_lp = 1; 33 | tw_lpid g_tw_rng_default = 1; 34 | tw_seed g_tw_rng_seed = NULL; 35 | tw_seed g_tw_core_rng_seed = NULL; 36 | unsigned int g_tw_sim_started = 0; 37 | size_t g_tw_msg_sz; 38 | size_t g_tw_delta_sz = 0; 39 | uint32_t g_tw_buddy_alloc = 0; /**< Allocation for buddy system */ 40 | buddy_list_bucket_t *g_tw_buddy_master = 0; 41 | uint32_t g_tw_avl_node_count = 18; 42 | 43 | /** Tunable LZ4 param. Higher = faster with less compression. 44 | 17 is a good value for speed with 1 being the default value. */ 45 | unsigned int g_tw_lz4_knob = 17; 46 | 47 | size_t g_tw_event_msg_sz = 0; 48 | 49 | /* 50 | * Minimum lookahead for a model -- model defined when 51 | * using the Simple Synchronization Protocol (conservative) 52 | */ 53 | double g_tw_lookahead=0.005; 54 | 55 | /* 56 | * Minimum detected timestamp offset used by the simulation at 57 | * runtime, can be used to help tune conservative protocol runs. 58 | */ 59 | double g_tw_min_detected_offset=DBL_MAX; 60 | 61 | /** 62 | * Number of messages to process at once out of the PQ before 63 | * returning back to handling things like GVT, message recption, 64 | * etc. AKA the "batch" parameter to ROSS. 65 | */ 66 | unsigned int g_tw_mblock = 16; 67 | unsigned int g_tw_gvt_interval = 16; 68 | unsigned long long g_tw_max_opt_lookahead = ULLONG_MAX; 69 | unsigned long long g_tw_gvt_realtime_interval; // calculated at runtime 70 | unsigned long long g_tw_gvt_interval_start_cycles = 0; 71 | 72 | double g_tw_ts_end = 100000.0; 73 | 74 | /* 75 | * g_tw_pe -- Public PE pointer 76 | * g_tw_events_per_pe -- Number of events to place in for each PE. 77 | * MUST be > 1 because of abort buffer. 78 | */ 79 | tw_pe *g_tw_pe = NULL; 80 | unsigned int g_tw_events_per_pe = 2048; 81 | /** Number of extra events allocated per PE. Command-line customizable. */ 82 | unsigned int g_tw_events_per_pe_extra = 0; 83 | 84 | unsigned int g_tw_gvt_threshold = 1000; 85 | unsigned int g_tw_gvt_done = 0; 86 | 87 | /* 88 | * Network variables: 89 | * g_tw_masternode -- pointer to GVT net node, for GVT comp 90 | */ 91 | unsigned int g_tw_net_device_size = 0; 92 | tw_peid g_tw_mynode = 0; 93 | tw_peid g_tw_masternode = 0; 94 | 95 | FILE *g_tw_csv = NULL; 96 | 97 | 98 | /* 99 | * 100 | */ 101 | 102 | unsigned long long g_tw_clock_rate=1000000000.0; // Default to 1 GHz 103 | 104 | // LP Type Mapping 105 | tw_lptype * g_tw_lp_types = NULL; 106 | tw_typemap_f g_tw_lp_typemap = &map_onetype; 107 | -------------------------------------------------------------------------------- /core/ross-gvt.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_ross_gvt_h 2 | #define INC_ross_gvt_h 3 | 4 | /* 5 | * Initialize the GVT library and parse options. 6 | */ 7 | 8 | /* setup the GVT library (config cmd line args, etc) */ 9 | extern const tw_optdef *tw_gvt_setup(void); 10 | 11 | /* start the GVT library (init vars, etc) */ 12 | extern void tw_gvt_start(void); 13 | 14 | /* 15 | * GVT computation is broken into two stages: 16 | * stage 1: determine if GVT computation should be started 17 | * stage 2: compute GVT 18 | */ 19 | extern void tw_gvt_step1(tw_pe *); 20 | extern void tw_gvt_step1_realtime(tw_pe *); 21 | extern void tw_gvt_step2(tw_pe *); 22 | 23 | /* 24 | * Provide a mechanism to force a GVT computation outside of the 25 | * GVT interval (optional) 26 | */ 27 | extern void tw_gvt_force_update(void); 28 | extern void tw_gvt_force_update_realtime(void); 29 | 30 | /* Set the PE GVT value */ 31 | extern int tw_gvt_set(tw_pe * pe, tw_stime LVT); 32 | 33 | /* Returns true if GVT in progress, false otherwise */ 34 | static inline int tw_gvt_inprogress(tw_pe * pe); 35 | 36 | /* Statistics collection and printing function */ 37 | extern void tw_gvt_stats(FILE * F); 38 | #endif 39 | -------------------------------------------------------------------------------- /core/ross-inline.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_ross_inline_h 2 | #define INC_ross_inline_h 3 | 4 | static inline tw_event * 5 | tw_event_grab(tw_pe *pe) 6 | { 7 | tw_event *e = tw_eventq_pop(&pe->free_q); 8 | 9 | if (e) 10 | { 11 | e->cancel_next = NULL; 12 | e->caused_by_me = NULL; 13 | e->cause_next = NULL; 14 | e->prev = e->next = NULL; 15 | 16 | memset(&e->state, 0, sizeof(e->state)); 17 | memset(&e->event_id, 0, sizeof(e->event_id)); 18 | } 19 | return e; 20 | } 21 | 22 | static inline void 23 | tw_free_output_messages(tw_event *e, int print_message) 24 | { 25 | while (e->out_msgs) { 26 | tw_out *temp = e->out_msgs; 27 | if (print_message) 28 | printf("%s", temp->message); 29 | e->out_msgs = temp->next; 30 | // Put it back 31 | tw_kp_put_back_output_buffer(temp); 32 | } 33 | } 34 | 35 | /** 36 | * Creates an event with a given priority in range [0,1], lower value is higher priority 37 | * 38 | * @bug There's a bug in this function. We put dest_gid, which is 39 | * a 64-bit value, into dest_lp which may be a 32-bit pointer. 40 | */ 41 | static inline tw_event * 42 | tw_event_new_user_prio(tw_lpid dest_gid, tw_stime offset_ts, tw_lp * sender, tw_stime prio) 43 | { 44 | tw_pe *send_pe; 45 | tw_event *e; 46 | tw_stime recv_ts; 47 | 48 | if (TW_STIME_DBL(offset_ts) < 0.0) { 49 | tw_error(TW_LOC, "Cannot send events into the past! Sending LP: %lu\n", sender->gid); 50 | } 51 | 52 | if (TW_STIME_DBL(prio) < 0.0 || TW_STIME_DBL(prio) > 1.0) { 53 | tw_error(TW_LOC, "Cannot specify an event priority outside of range [0.0,1.0]"); 54 | } 55 | 56 | send_pe = sender->pe; 57 | recv_ts = TW_STIME_ADD(tw_now(sender), offset_ts); 58 | 59 | if(g_tw_synchronization_protocol == CONSERVATIVE) 60 | { 61 | /* keep track of the smallest timestamp offset we have seen */ 62 | if(TW_STIME_DBL(offset_ts) < g_tw_min_detected_offset) 63 | g_tw_min_detected_offset = TW_STIME_DBL(offset_ts); 64 | } 65 | 66 | /* If this event will be past the end time, or there 67 | * are no more free events available, use abort event. 68 | */ 69 | if (TW_STIME_DBL(recv_ts) >= g_tw_ts_end) { 70 | #ifdef USE_RIO 71 | e = io_event_grab(send_pe); 72 | #else 73 | e = send_pe->abort_event; 74 | #endif 75 | send_pe->stats.s_events_past_end++; 76 | } else { 77 | e = tw_event_grab(send_pe); 78 | if (!e) { 79 | if (g_tw_synchronization_protocol == CONSERVATIVE 80 | || g_tw_synchronization_protocol == SEQUENTIAL) { 81 | tw_error(TW_LOC, 82 | "No free event buffers. Try increasing via g_tw_events_per_pe" 83 | " or --extramem"); 84 | } 85 | else 86 | e = send_pe->abort_event; 87 | } 88 | } 89 | 90 | e->send_pe = sender->pe->id; 91 | e->dest_lp = (tw_lp *) dest_gid; 92 | e->dest_lpid = dest_gid; 93 | e->src_lp = sender; 94 | e->recv_ts = recv_ts; 95 | e->send_ts = tw_now(sender); 96 | e->critical_path = sender->critical_path + 1; 97 | 98 | 99 | #ifdef USE_RAND_TIEBREAKER 100 | e->sig.priority = prio; 101 | tw_event *now_event = sender->kp->pe->cur_event; 102 | tw_stime u_rand_val = tw_rand_unif(sender->core_rng); //create a random number used to deterministically break event ties, this is rolled back in tw_event_rollback() during the sender LP cancel loop 103 | e->sig.recv_ts = recv_ts; 104 | if (offset_ts == 0) { 105 | if (now_event->sig.tie_lineage_length > MAX_TIE_CHAIN) 106 | tw_error(TW_LOC, "Maximum zero-offset tie chain reached (%d), increase #define in ross-types.h",MAX_TIE_CHAIN); 107 | memcpy(e->sig.event_tiebreaker, now_event->sig.event_tiebreaker, sizeof(tw_stime)*(now_event->sig.tie_lineage_length)); 108 | e->sig.event_tiebreaker[now_event->sig.tie_lineage_length] = u_rand_val; 109 | e->sig.tie_lineage_length = now_event->sig.tie_lineage_length + 1; 110 | } 111 | else { 112 | e->sig.event_tiebreaker[0] = u_rand_val; 113 | e->sig.tie_lineage_length = 1; 114 | } 115 | #endif 116 | 117 | tw_free_output_messages(e, 0); 118 | 119 | return e; 120 | } 121 | 122 | /** 123 | * @bug There's a bug in this function. We put dest_gid, which is 124 | * a 64-bit value, into dest_lp which may be a 32-bit pointer. 125 | */ 126 | static inline tw_event * 127 | tw_event_new(tw_lpid dest_gid, tw_stime offset_ts, tw_lp * sender) 128 | { 129 | return tw_event_new_user_prio(dest_gid, offset_ts, sender, 1); 130 | } 131 | 132 | 133 | static inline void 134 | tw_event_free(tw_pe *pe, tw_event *e) 135 | { 136 | /* 137 | * During the course of a rollback, events are supposed to put 138 | * the membufs back on the event. The event is then cancelled 139 | * and freed -- which is how a membuf could end up on a freed 140 | * event. 141 | */ 142 | tw_free_output_messages(e, 0); 143 | 144 | if (e->delta_buddy) { 145 | tw_clock start = tw_clock_read(); 146 | buddy_free(e->delta_buddy); 147 | g_tw_pe->stats.s_buddy += (tw_clock_read() - start); 148 | e->delta_buddy = 0; 149 | } 150 | 151 | e->state.owner = TW_pe_free_q; 152 | 153 | tw_eventq_unshift(&pe->free_q, e); 154 | } 155 | 156 | static inline void * 157 | tw_event_data(tw_event * event) 158 | { 159 | return event + 1; 160 | } 161 | 162 | #endif 163 | -------------------------------------------------------------------------------- /core/ross-kernel-inline.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_ross_kernel_inline_h 2 | #define INC_ross_kernel_inline_h 3 | #include "instrumentation/st-instrumentation.h" 4 | 5 | #define ROSS_MAX(a,b) ((a) > (b) ? (a) : (b)) 6 | #define ROSS_MIN(a,b) ((a) < (b) ? (a) : (b)) 7 | 8 | static inline tw_lp * 9 | tw_getlocal_lp(tw_lpid gid) 10 | { 11 | tw_lpid id = gid; 12 | 13 | // finding analysis LPs doesn't depend on model's choice of mapping 14 | if (g_st_use_analysis_lps && gid >= g_st_total_model_lps) 15 | { 16 | return g_tw_lp[(gid - g_st_total_model_lps) % g_tw_nkp + g_tw_nlp]; 17 | } 18 | 19 | switch (g_tw_mapping) { 20 | case CUSTOM: 21 | return( g_tw_custom_lp_global_to_local_map( gid ) ); 22 | case ROUND_ROBIN: 23 | id = gid / tw_nnodes(); 24 | break; 25 | case LINEAR: 26 | id = gid - g_tw_lp_offset; 27 | break; 28 | } 29 | 30 | #ifdef ROSS_runtime_checks 31 | if (id >= g_tw_nlp) 32 | tw_error(TW_LOC, "ID %d exceeded MAX LPs", id); 33 | if (gid != g_tw_lp[id]->gid) 34 | tw_error(TW_LOC, "Inconsistent LP Mapping"); 35 | #endif /* ROSS_runtime_checks */ 36 | 37 | return g_tw_lp[id]; 38 | } 39 | 40 | static inline tw_lp * 41 | tw_getlp(tw_lpid id) 42 | { 43 | #ifdef ROSS_runtime_checks 44 | if (id >= g_tw_nlp + g_st_analysis_nlp) 45 | tw_error(TW_LOC, "ID %d exceeded MAX LPs", id); 46 | if (id != g_tw_lp[id]->id) 47 | tw_error(TW_LOC, "Inconsistent LP Mapping"); 48 | #endif /* ROSS_runtime_checks */ 49 | return g_tw_lp[id]; 50 | } 51 | 52 | static inline tw_kp * 53 | tw_getkp(tw_kpid id) 54 | { 55 | #ifdef ROSS_runtime_checks 56 | if (id >= g_tw_nkp) 57 | tw_error(TW_LOC, "ID %u exceeded MAX KPs", id); 58 | if( g_tw_kp[id] == NULL ) 59 | tw_error(TW_LOC, "Local KP %u found NULL \n", id ); 60 | if (id != g_tw_kp[id]->id) 61 | tw_error(TW_LOC, "Inconsistent KP Mapping"); 62 | #endif /* ROSS_runtime_checks */ 63 | 64 | return g_tw_kp[id]; 65 | } 66 | 67 | static inline int 68 | tw_ismaster(void) 69 | { 70 | return (g_tw_mynode == g_tw_masternode); 71 | } 72 | 73 | static inline void * 74 | tw_getstate(tw_lp * lp) 75 | { 76 | return lp->cur_state; 77 | } 78 | 79 | #ifdef USE_RAND_TIEBREAKER 80 | static inline tw_stime 81 | tw_now(tw_lp const * lp) 82 | { 83 | return (lp->kp->last_sig.recv_ts); 84 | } 85 | 86 | static inline tw_event_sig 87 | tw_now_sig(tw_lp const *lp) 88 | { 89 | return (lp->kp->last_sig); 90 | } 91 | #else 92 | static inline tw_stime 93 | tw_now(tw_lp const * lp) 94 | { 95 | return (lp->kp->last_time); 96 | } 97 | #endif 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /core/ross-random.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | * tw_rand_init 5 | */ 6 | tw_rng * 7 | tw_rand_init(uint32_t v, uint32_t w) 8 | { 9 | return rng_init(v, w); 10 | } 11 | 12 | /* 13 | * tw_rand_core_init 14 | */ 15 | tw_rng * 16 | tw_rand_core_init(uint32_t v, uint32_t w) 17 | { 18 | return rng_core_init(v, w); 19 | } 20 | 21 | /* 22 | * tw_rand_integer 23 | * 24 | * For LP # gen, return a uniform rn from low to high 25 | */ 26 | /** 27 | * NOTE: Don't pass negative values to low! 28 | */ 29 | long 30 | tw_rand_integer(tw_rng_stream * g, long low, long high) 31 | { 32 | long safe_high = high; 33 | 34 | if (safe_high != LONG_MAX) { 35 | safe_high += 1; 36 | } 37 | 38 | if (safe_high <= low) { 39 | return (0); 40 | } else { 41 | return (low + (long)(tw_rand_unif(g) * (safe_high - low))); 42 | } 43 | } 44 | 45 | unsigned long 46 | tw_rand_ulong(tw_rng_stream * g, unsigned long low, unsigned long high) 47 | { 48 | unsigned long safe_high = high; 49 | 50 | if (safe_high != ULONG_MAX) { 51 | safe_high += 1; 52 | } 53 | 54 | if (safe_high < low) { 55 | return (0); 56 | } else { 57 | return (low + (unsigned long)(tw_rand_unif(g) * (safe_high - low))); 58 | } 59 | } 60 | 61 | long 62 | tw_rand_binomial(tw_rng_stream * g, long N, double P) 63 | { 64 | long sucesses, trials; 65 | 66 | sucesses = 0; 67 | 68 | for (trials = 0; trials < N; trials++) 69 | { 70 | if (tw_rand_unif(g) <= P) 71 | sucesses++; 72 | } 73 | 74 | return (sucesses); 75 | } 76 | 77 | double 78 | tw_rand_exponential(tw_rng_stream * g, double Lambda) 79 | { 80 | return (-Lambda * log(tw_rand_unif(g))); 81 | } 82 | 83 | double 84 | tw_rand_pareto(tw_rng_stream * g, double shape, double scale) 85 | { 86 | return( scale * 1.0/pow(tw_rand_unif(g), 1/shape) ); 87 | } 88 | 89 | double 90 | tw_rand_gamma(tw_rng_stream * g, double shape, double scale) 91 | { 92 | double a, b, q, phi, d; 93 | 94 | if (shape > 1) 95 | { 96 | a = 1 / sqrt(2 * shape - 1); 97 | b = shape - log(4); 98 | q = shape + 1 / a; 99 | phi = 4.5; 100 | d = 1 + log(phi); 101 | 102 | while (1) 103 | { 104 | double U_One = tw_rand_unif(g); 105 | double U_Two = tw_rand_unif(g); 106 | double V = a * log(U_One / (1 - U_One)); 107 | double Y = shape * exp(V); 108 | double Z = U_One * U_One * U_Two; 109 | double W = b + q * V - Y; 110 | 111 | double temp1 = W + d - phi * Z; 112 | double temp2 = log(Z); 113 | 114 | if (temp1 >= 0 || W >= temp2) 115 | return (scale * Y); 116 | 117 | } 118 | } else if (shape == 1) 119 | { 120 | return (tw_rand_exponential(g, scale)); 121 | } else 122 | { 123 | b = (exp(1) + shape) / exp(1); 124 | 125 | while (1) 126 | { 127 | double U_One = tw_rand_unif(g); 128 | double P = b * U_One; 129 | 130 | if (P <= 1) 131 | { 132 | double Y = pow(P, (1 / shape)); 133 | double U_Two = tw_rand_unif(g); 134 | 135 | if (U_Two <= exp(-Y)) 136 | return (scale * Y); 137 | } else 138 | { 139 | double Y = -log((b - P) / shape); 140 | double U_Two = tw_rand_unif(g); 141 | 142 | if (U_Two <= pow(Y, (shape - 1))) 143 | return (scale * Y); 144 | } 145 | } 146 | } 147 | } 148 | 149 | long 150 | tw_rand_geometric(tw_rng_stream * g, double P) 151 | { 152 | int count = 1; 153 | 154 | while (tw_rand_unif(g) > P) 155 | count++; 156 | 157 | return (count); 158 | } 159 | 160 | double 161 | tw_rand_normal01(tw_rng_stream * g, unsigned int *rng_calls) 162 | { 163 | #ifndef RAND_NORMAL 164 | tw_error(TW_LOC, "Please compile using -DRAND_NORMAL!"); 165 | #endif 166 | 167 | #ifdef RAND_NORMAL 168 | *rng_calls = 0; 169 | g->tw_normal_flipflop = !g->tw_normal_flipflop; 170 | 171 | if ((g->tw_normal_flipflop) || 172 | (g->tw_normal_u1< 0.0) || 173 | (g->tw_normal_u1 >= 1.0) || 174 | (g->tw_normal_u2 < 0.0) || 175 | (g->tw_normal_u2 > 1.0)) 176 | { 177 | g->tw_normal_u1 = tw_rand_unif(g); 178 | g->tw_normal_u2 = tw_rand_unif(g); 179 | *rng_calls = 2; 180 | 181 | return (sqrt(-2.0 * log(g->tw_normal_u1)) * sin(tw_opi * g->tw_normal_u2)); 182 | } 183 | else 184 | { 185 | return (sqrt(-2.0 * log(g->tw_normal_u1)) * cos(tw_opi * g->tw_normal_u2)); 186 | } 187 | #endif 188 | } 189 | 190 | double 191 | tw_rand_normal_sd(tw_rng_stream * g, double Mu, double Sd, unsigned int *rng_calls) 192 | { 193 | return ( Mu + (tw_rand_normal01(g, rng_calls) * Sd)); 194 | } 195 | 196 | long 197 | tw_rand_poisson(tw_rng_stream * g, double Lambda) 198 | { 199 | double a, b; 200 | long count; 201 | 202 | a = exp(-Lambda); 203 | b = 1; 204 | count = 0; 205 | 206 | b = b * tw_rand_unif(g); 207 | 208 | while (b >= a) 209 | { 210 | count++; 211 | b = b * tw_rand_unif(g); 212 | } 213 | 214 | return (count); 215 | } 216 | 217 | double 218 | tw_rand_lognormal(tw_rng_stream * g, double mean, double sd, unsigned int *rng_calls) 219 | { 220 | return (exp( mean + sd * tw_rand_normal01(g, rng_calls))); 221 | } 222 | 223 | double 224 | tw_rand_weibull(tw_rng_stream * g, double mean, double shape) 225 | { 226 | double scale = mean / tgamma( ((double)1.0 + (double)1.0/shape)); 227 | return(scale * pow(-log( tw_rand_unif(g)), (double)1.0/shape)); 228 | } 229 | -------------------------------------------------------------------------------- /core/ross-random.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_tw_rand_h 2 | #define INC_tw_rand_h 3 | 4 | #define tw_opi 6.28318530718 5 | #define tw_rand_unif(G) rng_gen_val(G) 6 | #define tw_rand_reverse_unif(G) rng_gen_reverse_val(G) 7 | 8 | typedef struct tw_rng tw_rng; 9 | typedef struct tw_rng_stream tw_rng_stream; 10 | 11 | /* 12 | * Public Function Prototypes 13 | */ 14 | extern tw_rng *tw_rand_init(uint32_t v, uint32_t w); 15 | extern tw_rng *tw_rand_core_init(uint32_t v, uint32_t w); 16 | extern void tw_rand_initial_seed(tw_rng_stream * g, tw_lpid id, tw_rng * the_rng); 17 | extern long tw_rand_integer(tw_rng_stream * g, long low, long high); 18 | extern unsigned long tw_rand_ulong(tw_rng_stream * g, unsigned long low, unsigned long high); 19 | extern long tw_rand_binomial(tw_rng_stream * g, long N, double P); 20 | extern double tw_rand_exponential(tw_rng_stream * g, double Lambda); 21 | extern double tw_rand_pareto(tw_rng_stream * g, double scale, double shape); 22 | extern double tw_rand_gamma(tw_rng_stream * g, double shape, double scale); 23 | extern long tw_rand_geometric(tw_rng_stream * g, double P); 24 | extern double tw_rand_normal01(tw_rng_stream * g, unsigned int *rng_calls); 25 | extern double tw_rand_normal_sd(tw_rng_stream * g, double Mu, double Sd, unsigned int *rng_calls); 26 | extern long tw_rand_poisson(tw_rng_stream * g, double Lambda); 27 | extern double tw_rand_weibull(tw_rng_stream * g, double mean, double shape); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /core/ross.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_ross_h 2 | #define INC_ross_h 3 | 4 | /** @mainpage Rensselaer's Optimistic Simulation System (ROSS) 5 | @section intro_sec Introduction 6 | 7 | ROSS is an acronym for Rensselaer's Optimistic Simulation System. It is a 8 | parallel discrete-event simulator that executes on shared-memory 9 | multiprocessor systems. ROSS is geared for running large-scale simulation 10 | models (i.e., 100K to even 1 million object models). The synchronization 11 | mechanism is based on Time Warp. Time Warp is an optimistic 12 | synchronization mechanism develop by Jefferson and Sowizral [10, 11] used 13 | in the parallelization of discrete-event simulation. The distributed 14 | simulator consists of a collection of logical processes or LPs, each 15 | modeling a distinct component of the system being modeled, e.g., a server 16 | in a queuing network. LPs communicate by exchanging timestamped event 17 | messages, e.g., denoting the arrival of a new job at that server. 18 | 19 | The Time Warp mechanism uses a detection-and-recovery protocol to 20 | synchronize the computation. Any time an LP determines that it has 21 | processed events out of timestamp order, it "rolls back" those events, and 22 | re-executes them. For a detailed discussion of Time Warp as well as other 23 | parallel simulation protocols we refer the reader to [8] 24 | 25 | ROSS was modeled after a Time Warp simulator called GTW or Georgia Tech 26 | Time Warp[7]. ROSS helped to demonstrate that Time Warp simulators can be 27 | run efficiently both in terms of speed and memory usage relative to a 28 | high-performance sequential simulator. 29 | 30 | To achieve high parallel performance, ROSS uses a technique call Reverse 31 | Computation. Here, the roll back mechanism in the optimistic simulator is 32 | realized not by classic state-saving, but by literally allowing to the 33 | greatest possible extent events to be reverse. Thus, as models are 34 | developed for parallel execution, both the forward and reverse execution 35 | code must be written. Currently, both are done by hand. We are 36 | investigating automatic methods that are able to generate the reverse 37 | execution code using only the forward execution code as input. For more 38 | information on ROSS and Reverse Computation we refer the interested reader 39 | to [4, 5]. Both of these text are provided as additional reading in the 40 | ROSS distribution. 41 | 42 | @section license_sec License 43 | Copyright (c) 2013, Rensselaer Polytechnic Institute 44 | All rights reserved. 45 | 46 | Redistribution and use in source and binary forms, with or without 47 | modification, are permitted provided that the following conditions are 48 | met: 49 | 50 | Redistributions of source code must retain the above copyright 51 | notice, this list of conditions and the following disclaimer. 52 | 53 | Redistributions in binary form must reproduce the above copyright 54 | notice, this list of conditions and the following disclaimer in the 55 | documentation and/or other materials provided with the distribution. 56 | 57 | Neither the name of Rensselaer Polytechnic Institute nor the names 58 | of its contributors may be used to endorse or promote products 59 | derived from this software without specific prior written 60 | permission. 61 | 62 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 63 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 64 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 65 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 66 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 67 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 68 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 69 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 70 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 71 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 72 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 73 | 74 | */ 75 | 76 | /******************************************************************* 77 | * The location of this include is important, as it is outside of * 78 | * the __cplusplus check. This is required as the mpi header will * 79 | * mess up and complain if we force it into an extern "C" context. * 80 | *******************************************************************/ 81 | #include 82 | 83 | #ifdef __cplusplus 84 | extern "C" { 85 | #endif 86 | 87 | #ifndef ARRAY_SIZE 88 | #define ARRAY_SIZE(a) ( sizeof((a)) / sizeof((a)[0]) ) 89 | #endif 90 | 91 | #ifdef __GNUC__ 92 | # define NORETURN __attribute__((__noreturn__)) 93 | #else 94 | # define NORETURN 95 | # ifndef __attribute__ 96 | # define __attribute__(x) 97 | # endif 98 | #endif 99 | 100 | /********************************************************************* 101 | * 102 | * Include ``standard'' headers that most of ROSS will require. 103 | * 104 | ********************************************************************/ 105 | 106 | #include "config.h" 107 | 108 | #include 109 | #include 110 | #include 111 | #include 112 | #include 113 | #include 114 | #include 115 | #include 116 | #include 117 | #include 118 | 119 | #if !defined(DBL_MAX) 120 | #include 121 | #endif 122 | 123 | #include 124 | #include 125 | 126 | #ifdef USE_BGPM 127 | #include 128 | #endif 129 | 130 | #ifdef ROSS_INTERNAL 131 | #undef malloc 132 | #undef calloc 133 | #undef realloc 134 | #undef strdup 135 | #undef free 136 | 137 | # define malloc(a) must_use_tw_calloc_not_malloc 138 | # define calloc(a,b) must_use_tw_calloc_not_calloc 139 | # define realloc(a,b) must_use_tw_calloc_not_realloc 140 | # define strdup(b) must_use_tw_calloc_not_strdup 141 | # define free(b) must_not_use_free 142 | #endif 143 | 144 | // #include "config.h" -- moved to individual files that need them -- e.g., tw-setup.c 145 | 146 | /* tw_peid -- Processing Element "PE" id */ 147 | typedef unsigned long tw_peid; 148 | 149 | /* tw_stime -- Simulation time value for sim clock (NOT wall!) */ 150 | typedef double tw_stime; 151 | #define MPI_TYPE_TW_STIME MPI_DOUBLE 152 | #define TW_STIME_CRT(x) (x) 153 | #define TW_STIME_DBL(x) (x) 154 | #define TW_STIME_CMP(x, y) (((x) < (y)) ? -1 : ((x) > (y))) 155 | #define TW_STIME_ADD(x, y) ((x) + (y)) 156 | #define TW_STIME_MAX DBL_MAX 157 | 158 | /* tw_lpid -- Logical Process "LP" id */ 159 | //typedef unsigned long long tw_lpid; 160 | typedef uint64_t tw_lpid; 161 | 162 | 163 | #include "buddy.h" 164 | #include "ross-random.h" 165 | 166 | #ifdef ROSS_RAND_clcg4 167 | # include "rand-clcg4.h" 168 | #endif 169 | 170 | #ifdef ROSS_CLOCK_i386 171 | # include "clock/i386.h" 172 | #endif 173 | #ifdef ROSS_CLOCK_amd64 174 | # include "clock/amd64.h" 175 | #endif 176 | #ifdef ROSS_CLOCK_ia64 177 | # include "clock/ia64.h" 178 | #endif 179 | #ifdef ROSS_CLOCK_ppc 180 | # include "clock/ppc.h" 181 | #endif 182 | #ifdef ROSS_CLOCK_ppc64le 183 | # include "clock/ppc64le.h" 184 | #endif 185 | #ifdef ROSS_CLOCK_bgl 186 | # include "clock/bgl.h" 187 | #endif 188 | #ifdef ROSS_CLOCK_bgq 189 | # include "clock/bgq.h" 190 | #endif 191 | #ifdef ROSS_CLOCK_aarch64 192 | # include "clock/aarch64.h" 193 | #endif 194 | #ifdef ROSS_CLOCK_armv7l 195 | # include "clock/armv7l.h" 196 | #endif 197 | #ifdef ROSS_CLOCK_gtod 198 | # include "clock/gtod.h" 199 | #endif 200 | 201 | #include "tw-timing.h" 202 | #include "ross-types.h" 203 | #include "tw-opts.h" 204 | 205 | #ifdef ROSS_NETWORK_mpi 206 | # include "network-mpi.h" 207 | #endif 208 | 209 | #include "ross-gvt.h" 210 | #include "ross-extern.h" 211 | #include "ross-kernel-inline.h" 212 | #include "hash-quadratic.h" 213 | 214 | #include "queue/tw-queue.h" 215 | 216 | #ifdef ROSS_GVT_7oclock 217 | # include "gvt/7oclock.h" 218 | #endif 219 | #ifdef ROSS_GVT_mpi_allreduce 220 | # include "mpi.h" 221 | # include "gvt/mpi_allreduce.h" 222 | #endif 223 | 224 | #include "instrumentation/st-instrumentation.h" 225 | 226 | #ifdef USE_DAMARIS 227 | #include "damaris/core/damaris.h" 228 | #endif 229 | 230 | #include "tw-eventq.h" 231 | 232 | #ifdef USE_RIO 233 | #include "rio/io.h" 234 | #endif 235 | 236 | #include "ross-inline.h" 237 | 238 | 239 | #ifdef __cplusplus 240 | } 241 | #endif 242 | 243 | #endif 244 | -------------------------------------------------------------------------------- /core/ross.pc.in: -------------------------------------------------------------------------------- 1 | prefix = @CMAKE_INSTALL_PREFIX@ 2 | 3 | ross_cflags=-I${prefix}/include 4 | ross_ldflags=-L${prefix}/lib -Wl,-rpath,${prefix}/lib 5 | ross_libs=-lROSS -lm 6 | 7 | Name: ROSS 8 | Description: Rensselaer's Optimistic Simulation System 9 | Version: @VERSION_SHORT@ 10 | URL: https://github.com/ROSS-org/ROSS 11 | Requires: 12 | Libs: ${ross_ldflags} ${ross_libs} 13 | Cflags: ${ross_cflags} 14 | -------------------------------------------------------------------------------- /core/tw-kp.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void 4 | tw_kp_onpe(tw_kpid id, tw_pe * pe) 5 | { 6 | if(id >= g_tw_nkp) 7 | tw_error(TW_LOC, "ID %d exceeded MAX KPs", id); 8 | 9 | if(g_tw_kp[id]) 10 | tw_error(TW_LOC, "KP already allocated: %lld\n", id); 11 | 12 | g_tw_kp[id] = (tw_kp *) tw_calloc(TW_LOC, "Local KP", sizeof(tw_kp), 1); 13 | 14 | g_tw_kp[id]->id = id; 15 | g_tw_kp[id]->pe = pe; 16 | 17 | #ifdef ROSS_QUEUE_kp_splay 18 | g_tw_kp[id]->pq = tw_eventpq_create(); 19 | #endif 20 | } 21 | 22 | 23 | #ifdef USE_RAND_TIEBREAKER 24 | void 25 | tw_kp_rollback_to_sig(tw_kp * kp, tw_event_sig to_sig) 26 | { 27 | tw_event *e; 28 | tw_clock pq_start; 29 | 30 | kp->s_rb_total++; 31 | kp->kp_stats->s_rb_total++; 32 | 33 | while (kp->pevent_q.size && tw_event_sig_compare(kp->pevent_q.head->sig, to_sig) >= 0) 34 | { 35 | e = tw_eventq_shift(&kp->pevent_q); 36 | 37 | // rollback first 38 | tw_event_rollback(e); 39 | 40 | // reset kp pointers 41 | if (kp->pevent_q.size == 0) 42 | { 43 | // kp->last_time = kp->pe->GVT; 44 | kp->last_sig = kp->pe->GVT_sig; 45 | } else 46 | { 47 | // kp->last_time = kp->pevent_q.head->recv_ts; 48 | kp->last_sig = kp->pevent_q.head->sig; 49 | } 50 | 51 | // place event back into priority queue 52 | pq_start = tw_clock_read(); 53 | tw_pq_enqueue(kp->pe->pq, e); 54 | kp->pe->stats.s_pq += tw_clock_read() - pq_start; 55 | } 56 | } 57 | #else 58 | void 59 | tw_kp_rollback_to(tw_kp * kp, tw_stime to) 60 | { 61 | tw_event *e; 62 | tw_clock pq_start; 63 | 64 | kp->s_rb_total++; 65 | // instrumentation 66 | kp->kp_stats->s_rb_total++; 67 | 68 | #if VERIFY_ROLLBACK 69 | printf("%d %d: rb_to %f, now = %f \n", 70 | kp->pe->id, kp->id, TW_STIME_DBL(to), TW_STIME_DBL(kp->last_time)); 71 | #endif 72 | 73 | while(kp->pevent_q.size && TW_STIME_CMP(kp->pevent_q.head->recv_ts, to) >= 0) 74 | { 75 | e = tw_eventq_shift(&kp->pevent_q); 76 | 77 | /* 78 | * rollback first 79 | */ 80 | tw_event_rollback(e); 81 | 82 | /* 83 | * reset kp pointers 84 | */ 85 | if (kp->pevent_q.size == 0) 86 | { 87 | kp->last_time = kp->pe->GVT; 88 | } else 89 | { 90 | kp->last_time = kp->pevent_q.head->recv_ts; 91 | } 92 | 93 | /* 94 | * place event back into priority queue 95 | */ 96 | pq_start = tw_clock_read(); 97 | tw_pq_enqueue(kp->pe->pq, e); 98 | kp->pe->stats.s_pq += tw_clock_read() - pq_start; 99 | } 100 | } 101 | #endif 102 | 103 | void 104 | tw_kp_rollback_event(tw_event * event) 105 | { 106 | tw_event *e = NULL; 107 | tw_kp *kp; 108 | tw_pe *pe; 109 | tw_clock pq_start; 110 | 111 | kp = event->dest_lp->kp; 112 | pe = kp->pe; 113 | 114 | kp->s_rb_total++; 115 | kp->s_rb_secondary++; 116 | // instrumentation 117 | kp->kp_stats->s_rb_total++; 118 | kp->kp_stats->s_rb_secondary++; 119 | 120 | #if VERIFY_ROLLBACK 121 | printf("%d %d: rb_event: %f \n", pe->id, kp->id, event->recv_ts); 122 | 123 | if(!kp->pevent_q.size) 124 | tw_error(TW_LOC, "Attempting to rollback empty pevent_q!"); 125 | #endif 126 | 127 | e = tw_eventq_shift(&kp->pevent_q); 128 | while(e != event) 129 | { 130 | #ifdef USE_RAND_TIEBREAKER 131 | kp->last_sig = kp->pevent_q.head->sig; 132 | #else 133 | kp->last_time = kp->pevent_q.head->recv_ts; 134 | #endif 135 | tw_event_rollback(e); 136 | pq_start = tw_clock_read(); 137 | tw_pq_enqueue(pe->pq, e); 138 | pe->stats.s_pq += tw_clock_read() - pq_start; 139 | 140 | e = tw_eventq_shift(&kp->pevent_q); 141 | } 142 | 143 | tw_event_rollback(e); 144 | 145 | #ifdef USE_RAND_TIEBREAKER 146 | if (0 == kp->pevent_q.size) 147 | kp->last_sig = kp->pe->GVT_sig; 148 | else 149 | kp->last_sig = kp->pevent_q.head->sig; 150 | #else 151 | if (0 == kp->pevent_q.size) 152 | kp->last_time = kp->pe->GVT; 153 | else 154 | kp->last_time = kp->pevent_q.head->recv_ts; 155 | #endif 156 | } 157 | 158 | #ifndef NUM_OUT_MESG 159 | #define NUM_OUT_MESG 2000 160 | #endif 161 | static tw_out* 162 | init_output_messages(tw_kp *kp) 163 | { 164 | int i; 165 | 166 | tw_out *ret = (tw_out *) tw_calloc(TW_LOC, "tw_out", sizeof(struct tw_out), NUM_OUT_MESG); 167 | 168 | for (i = 0; i < NUM_OUT_MESG - 1; i++) { 169 | ret[i].next = &ret[i + 1]; 170 | ret[i].owner = kp; 171 | } 172 | ret[i].next = NULL; 173 | ret[i].owner = kp; 174 | 175 | return ret; 176 | } 177 | 178 | void 179 | tw_init_kps(tw_pe * me) 180 | { 181 | tw_kpid i; 182 | int j; 183 | 184 | for (i = 0; i < g_tw_nkp; i++) 185 | { 186 | tw_kp *kp = tw_getkp(i); 187 | 188 | if (kp->pe != me) 189 | continue; 190 | 191 | kp->id = i; 192 | kp->s_nevent_processed = 0; 193 | kp->s_e_rbs = 0; 194 | kp->s_rb_total = 0; 195 | kp->s_rb_secondary = 0; 196 | if (g_tw_synchronization_protocol == OPTIMISTIC || 197 | g_tw_synchronization_protocol == OPTIMISTIC_DEBUG || 198 | g_tw_synchronization_protocol == OPTIMISTIC_REALTIME) { 199 | kp->output = init_output_messages(kp); 200 | } 201 | 202 | // instrumentation setup 203 | kp->kp_stats = (st_kp_stats*) tw_calloc(TW_LOC, "KP instrumentation", sizeof(st_kp_stats), 1); 204 | for (j = 0; j < 3; j++) 205 | kp->last_stats[j] = (st_kp_stats*) tw_calloc(TW_LOC, "KP instrumentation", sizeof(st_kp_stats), 1); 206 | } 207 | } 208 | 209 | tw_out * 210 | tw_kp_grab_output_buffer(tw_kp *kp) 211 | { 212 | if (kp->output) { 213 | tw_out *ret = kp->output; 214 | kp->output = kp->output->next; 215 | ret->next = 0; 216 | return ret; 217 | } 218 | 219 | return NULL; 220 | } 221 | 222 | void 223 | tw_kp_put_back_output_buffer(tw_out *out) 224 | { 225 | tw_kp *kp = out->owner; 226 | 227 | if (kp->output) { 228 | out->next = kp->output; 229 | kp->output = out; 230 | } 231 | else { 232 | kp->output = out; 233 | kp->output->next = NULL; 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /core/tw-lp.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /** 4 | * @file tw-lp.c 5 | * @brief tw_lp_settype is defined here! 6 | */ 7 | 8 | // IMPORTANT: this function replaces tw_lp_settype 9 | // g_tw_lp_types must be defined 10 | // g_tw_lp_typemap must be defined 11 | void tw_lp_setup_types () { 12 | if ( !g_tw_lp_types ) { 13 | tw_error(TW_LOC, "No LP types are defined"); 14 | } 15 | 16 | if ( !g_tw_lp_typemap ) { 17 | tw_error(TW_LOC, "No LP type mapping is defined"); 18 | } 19 | 20 | unsigned int i; 21 | for (i = 0; i < g_tw_nlp; i++) { 22 | tw_lp *lp = g_tw_lp[i]; 23 | lp->type = &g_tw_lp_types[g_tw_lp_typemap(lp->gid)]; 24 | 25 | if (g_st_ev_trace) 26 | st_model_setup_types(lp); 27 | } 28 | 29 | } 30 | 31 | /** 32 | * IMPORTANT: This function should be called after tw_define_lps. It 33 | * copies the function pointers which define the LP to the appropriate 34 | * location for *each* LP, i.e. you probably want to call this more than 35 | * once. 36 | */ 37 | void 38 | tw_lp_settype(tw_lpid id, tw_lptype * type) 39 | { 40 | tw_lp *lp = g_tw_lp[id]; 41 | 42 | if(id >= g_tw_nlp + g_st_analysis_nlp) 43 | tw_error(TW_LOC, "ID %ld exceeded MAX LPs (%ld)", id, g_tw_nlp + g_st_analysis_nlp); 44 | 45 | if(!lp || !lp->pe) 46 | tw_error(TW_LOC, "LP %u has no PE assigned.", lp->gid); 47 | 48 | // memcpy(&lp->type, type, sizeof(*type)); 49 | lp->type = type; 50 | 51 | if (type->state_sz > g_tw_delta_sz) { 52 | g_tw_delta_sz = type->state_sz; 53 | } 54 | } 55 | 56 | void 57 | tw_lp_onpe(tw_lpid id, tw_pe * pe, tw_lpid gid) 58 | { 59 | if(id >= g_tw_nlp + g_st_analysis_nlp) 60 | tw_error(TW_LOC, "ID %d exceeded MAX LPs", id); 61 | 62 | if(g_tw_lp[id]) 63 | tw_error(TW_LOC, "LP already allocated: %lld\n", id); 64 | 65 | g_tw_lp[id] = (tw_lp *) tw_calloc(TW_LOC, "Local LP", sizeof(tw_lp), 1); 66 | 67 | g_tw_lp[id]->gid = gid; 68 | g_tw_lp[id]->id = id; 69 | g_tw_lp[id]->pe = pe; 70 | } 71 | 72 | void 73 | tw_lp_onkp(tw_lp * lp, tw_kp * kp) 74 | { 75 | if(!lp) 76 | tw_error(TW_LOC, "Bad LP pointer!"); 77 | 78 | lp->kp = kp; 79 | kp->lp_count++; 80 | } 81 | 82 | void 83 | tw_init_lps(tw_pe * me) 84 | { 85 | tw_lpid i; 86 | int j; 87 | 88 | for(i = 0; i < g_tw_nlp + g_st_analysis_nlp; i++) 89 | { 90 | tw_lp * lp = g_tw_lp[i]; 91 | 92 | if (lp->pe != me) 93 | continue; 94 | 95 | // Allocate initial state vector for this LP 96 | if(!lp->cur_state) { 97 | lp->cur_state = tw_calloc(TW_LOC, "state vector", lp->type->state_sz, 1); 98 | } 99 | 100 | lp->lp_stats = (st_lp_stats*) tw_calloc(TW_LOC, "LP instrumentation", sizeof(st_lp_stats), 1); 101 | for (j = 0; j < 3; j++) 102 | lp->last_stats[j] = (st_lp_stats*) tw_calloc(TW_LOC, "LP instrumentation", sizeof(st_lp_stats), 1); 103 | 104 | #ifndef USE_RIO 105 | if (lp->type->init) 106 | { 107 | me->cur_event = me->abort_event; 108 | me->cur_event->caused_by_me = NULL; 109 | 110 | (*(init_f)lp->type->init) (lp->cur_state, lp); 111 | 112 | if (me->cev_abort) 113 | tw_error(TW_LOC, "ran out of events during init"); 114 | } 115 | #endif 116 | } 117 | #ifdef USE_RIO 118 | // RIO requires that all LPs have been allocated 119 | if (g_io_load_at == PRE_INIT || g_io_load_at == INIT) { 120 | tw_clock start = tw_clock_read(); 121 | io_read_checkpoint(); 122 | me->stats.s_rio_load += (tw_clock_read() - start); 123 | } 124 | if (g_io_load_at != INIT) { 125 | tw_clock start = tw_clock_read(); 126 | for (i = 0; i < g_tw_nlp; i++) { 127 | tw_lp * lp = g_tw_lp[i]; 128 | me->cur_event = me->abort_event; 129 | me->cur_event->caused_by_me = NULL; 130 | 131 | (*(init_f)lp->type->init) (lp->cur_state, lp); 132 | 133 | if (me->cev_abort) { 134 | tw_error(TW_LOC, "ran out of events during init"); 135 | } 136 | } 137 | me->stats.s_rio_lp_init += (tw_clock_read() - start); 138 | } 139 | if (g_io_load_at == POST_INIT) { 140 | tw_clock start = tw_clock_read(); 141 | io_read_checkpoint(); 142 | me->stats.s_rio_load += (tw_clock_read() - start); 143 | } 144 | #endif 145 | } 146 | 147 | void tw_pre_run_lps (tw_pe * me) { 148 | tw_lpid i; 149 | 150 | for(i = 0; i < g_tw_nlp + g_st_analysis_nlp; i++) { 151 | tw_lp * lp = g_tw_lp[i]; 152 | 153 | if (lp->pe != me) 154 | continue; 155 | 156 | if (lp->type->pre_run) { 157 | me->cur_event = me->abort_event; 158 | me->cur_event->caused_by_me = NULL; 159 | 160 | (*(pre_run_f)lp->type->pre_run) (lp->cur_state, lp); 161 | 162 | if (me->cev_abort) 163 | tw_error(TW_LOC, "ran out of events during pre_run"); 164 | } 165 | } 166 | } 167 | 168 | /********************************************************************//** 169 | LP Suspension Design Notes! (John Jenkins, ANL) 170 | 171 | Many times, when developing optimistic models, we are able to 172 | determine < LP state, event > pairs which represent infeasible model 173 | behavior. These types of simulation states typically arise when time 174 | warp causes us to receive and potentially process messages in an order 175 | we don't expect. 176 | 177 | For example, consider a client/server protocol in which a server sends 178 | an ACK to a client upon completion of some event. In optimistic mode, 179 | the client can see what amounts to duplicate ACKs from the server due 180 | to the server LP rolling back and re-sending an ACK. 181 | 182 | While some models can gracefully cope with such issues, more complex 183 | models can have troubles (the client in the example could for instance 184 | destroy the request metadata after receiving an ACK). 185 | 186 | A solution, as noted in the "Dark Side of Risk" paper, is to introduce 187 | LP "self-suspend" functionality. If an LP is able to detect a < state, 188 | message > pair which is incorrect / unexpected in a well-behaved 189 | simulation, the LP should be able to put itself into suspend mode, 190 | refusing to process messages until rolled back to a pre < state, 191 | message > state. There are two benefits: 1) it greatly reduces the 192 | difficulty in tracking down and distinguishing proper model bugs from 193 | bugs arising from time-warp related issues such as out-of-order event 194 | receipt and 2) it improves simulation performance by pruning the 195 | number of processed events that we know are invalid and will be rolled 196 | back anyways. 197 | 198 | I suggest the function signature tw_suspend(tw_lp *lp, int 199 | do_suspend_event_rc, const char * format, ...), with the following 200 | semantics: 201 | 202 | After a call to tw_suspend, all subsequent events (both forward and 203 | reverse) that arrive at the suspended LP shall be processed as if they 204 | were no-ops. The reverse event handler of the event that caused the 205 | suspend will be run if do_orig_event_rc is nonzero; otherwise, the 206 | reverse event handler shall additionally be a no-op. Typically, 207 | do_orig_event_rc == 0 is desired, as good coding practices for 208 | moderate-or-greater complexity simulations dictate state/event 209 | validation prior to modifying LP state (partial rollbacks are very 210 | undesirable), but there may be messy logic in the user code for which 211 | a partial rollback is warranted (operations that free memory as a side 212 | effect of operations, for example). An LP exits suspend state upon 213 | rolling back the event that caused the suspend (whether or not that 214 | event is processed as a no-op). Upon GVT, if an LP is in self-suspend 215 | mode and the event that caused the suspend has a timestamp less than 216 | that of GVT, then the simulator shall report the format string of 217 | suspended LP(s) and exit. A NULL format string is acceptable for 218 | performance purposes, e.g. when doing "production" simulation runs. 219 | 220 | @param lp Pointer to the LP we're suspending 221 | @param do_orig_event_rc A bool indicating whether or not to skip the RC function 222 | @param error_num User-specified value for tracking purposes; ROSS ignores this 223 | 224 | *************************************************************************/ 225 | 226 | void 227 | tw_lp_suspend(tw_lp * lp, int do_orig_event_rc, int error_num ) 228 | { 229 | if(!lp) 230 | tw_error(TW_LOC, "Bad LP pointer!"); 231 | 232 | lp->suspend_flag=1; 233 | lp->suspend_event = lp->pe->cur_event; // only valid prior to GVT 234 | #ifdef USE_RAND_TIEBREAKER 235 | lp->suspend_sig = tw_now_sig(lp); 236 | #else 237 | lp->suspend_time = tw_now(lp); 238 | #endif 239 | lp->suspend_error_number = error_num; 240 | lp->suspend_do_orig_event_rc = do_orig_event_rc; 241 | 242 | } 243 | -------------------------------------------------------------------------------- /core/tw-opts.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_tw_opts_h 2 | #define INC_tw_opts_h 3 | 4 | enum tw_opttype 5 | { 6 | TWOPTTYPE_GROUP = 1, 7 | TWOPTTYPE_ULONG, /**< value must be an "unsigned long*" */ 8 | TWOPTTYPE_ULONGLONG, /**< value must be an "unsigned long long*" */ 9 | TWOPTTYPE_UINT, /**< value must be an "unsigned int*" */ 10 | TWOPTTYPE_STIME, /**< value must be a "tw_stime*" */ 11 | TWOPTTYPE_DOUBLE, /**< value must be a "double *" */ 12 | TWOPTTYPE_CHAR, /**< value must be a "char *" */ 13 | TWOPTTYPE_FLAG, /**< value must be an "unsigned int*" */ 14 | TWOPTTYPE_SHOWHELP 15 | }; 16 | typedef enum tw_opttype tw_opttype; 17 | 18 | typedef struct tw_optdef tw_optdef; 19 | struct tw_optdef 20 | { 21 | tw_opttype type; 22 | const char *name; 23 | const char *help; 24 | void *value; 25 | }; 26 | 27 | #define TWOPT_GROUP(h) { TWOPTTYPE_GROUP, NULL, (h), NULL } 28 | #define TWOPT_ULONG(n,v,h) { TWOPTTYPE_ULONG, (n), (h), &(v) } 29 | #define TWOPT_ULONGLONG(n,v,h) { TWOPTTYPE_ULONGLONG, (n), (h), &(v) } 30 | #define TWOPT_UINT(n,v,h) { TWOPTTYPE_UINT, (n), (h), &(v) } 31 | #define TWOPT_STIME(n,v,h) { TWOPTTYPE_STIME, (n), (h), &(v) } 32 | #define TWOPT_DOUBLE(n,v,h) { TWOPTTYPE_DOUBLE, (n), (h), &(v) } 33 | #define TWOPT_CHAR(n,v,h) { TWOPTTYPE_CHAR, (n), (h), &(v) } 34 | #define TWOPT_FLAG(n,v,h) { TWOPTTYPE_FLAG, (n), (h), &(v) } 35 | #define TWOPT_END() { (tw_opttype)0, NULL, NULL, NULL } 36 | 37 | /** Remove options from the command line arguments. */ 38 | extern void tw_opt_parse(int *argc, char ***argv); 39 | /** Add an opt group */ 40 | extern void tw_opt_add(const tw_optdef *options); 41 | /** Pretty-print the option descriptions (for --help) */ 42 | extern void tw_opt_print(void); 43 | /** Pretty-print the option descriptions and current values */ 44 | extern void tw_opt_settings(FILE *f); 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /core/tw-pe.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static void dummy_pe_f (tw_pe *pe) 4 | { 5 | (void) pe; 6 | } 7 | 8 | void 9 | tw_pe_settype(const tw_petype * type) 10 | { 11 | if (!g_tw_pe) 12 | tw_error(TW_LOC, "Undefined PE!"); 13 | 14 | #define copy_pef(f, d) \ 15 | g_tw_pe->type.f = type->f ? type->f : d 16 | 17 | copy_pef(pre_lp_init, dummy_pe_f); 18 | copy_pef(post_lp_init, dummy_pe_f); 19 | copy_pef(gvt, dummy_pe_f); 20 | copy_pef(final, dummy_pe_f); 21 | 22 | #undef copy_pef 23 | } 24 | 25 | /** 26 | * initialize individual PE structs 27 | * 28 | * must be called after tw_nnodes / MPI world size is set. 29 | * 30 | */ 31 | void 32 | tw_pe_init(void) 33 | { 34 | if (g_tw_pe) tw_error(TW_LOC, "PE %u already initialized", g_tw_mynode); 35 | 36 | g_tw_pe = (tw_pe*)tw_calloc(TW_LOC, "PE Struct", sizeof(*g_tw_pe), 1); 37 | 38 | g_tw_pe->id = g_tw_mynode; 39 | 40 | tw_petype no_type; 41 | memset(&no_type, 0, sizeof(no_type)); 42 | tw_pe_settype(&no_type); 43 | 44 | #ifdef USE_RAND_TIEBREAKER 45 | g_tw_pe->trans_msg_sig = tw_get_init_sig(TW_STIME_MAX, 1, TW_STIME_MAX); 46 | #else 47 | g_tw_pe->trans_msg_ts = TW_STIME_MAX; 48 | #endif 49 | g_tw_pe->gvt_status = 0; 50 | 51 | g_tw_pe->rng = tw_rand_init(31, 41); 52 | g_tw_pe->core_rng = tw_rand_core_init(31, 41); // Core RNG must have same v & w values as main RNG 53 | 54 | //If we're in (some variation of) optimistic mode, we need this hash 55 | if (g_tw_synchronization_protocol == OPTIMISTIC || 56 | g_tw_synchronization_protocol == OPTIMISTIC_DEBUG || 57 | g_tw_synchronization_protocol == OPTIMISTIC_REALTIME) { 58 | g_tw_pe->hash_t = tw_hash_create(); 59 | } else { 60 | g_tw_pe->hash_t = NULL; 61 | } 62 | 63 | } 64 | 65 | void 66 | tw_pe_fossil_collect(void) 67 | { 68 | tw_kp *kp; 69 | 70 | unsigned int i; 71 | 72 | g_tw_fossil_attempts++; 73 | 74 | for(i = 0; i < g_tw_nkp; i++) 75 | { 76 | kp = tw_getkp(i); 77 | tw_eventq_fossil_collect(&kp->pevent_q, g_tw_pe); 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /core/tw-state.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "lz4.h" 4 | 5 | /** 6 | * Make a snapshot of the LP state and store it into the delta buffer 7 | */ 8 | void 9 | tw_snapshot(tw_lp *lp, size_t state_sz) 10 | { 11 | assert(lp->pe->delta_buffer[0] && "increase --buddy-size argument!"); 12 | memcpy(lp->pe->delta_buffer[0], lp->cur_state, state_sz); 13 | } 14 | 15 | /** 16 | * Create the delta from the current state and the snapshot. 17 | * Compress it. 18 | * @return The size of the compressed data placed in delta_buffer[1]. 19 | */ 20 | long 21 | tw_snapshot_delta(tw_lp *lp, size_t state_sz) 22 | { 23 | unsigned long i; 24 | tw_clock start; 25 | int ret_size = 0; 26 | unsigned char *current_state = (unsigned char *)lp->cur_state; 27 | unsigned char *snapshot = lp->pe->delta_buffer[0]; 28 | void *scratch = lp->pe->delta_buffer[2]; 29 | 30 | for (i = 0; i < state_sz; i++) { 31 | snapshot[i] = current_state[i] - snapshot[i]; 32 | } 33 | 34 | start = tw_clock_read(); 35 | ret_size = LZ4_compress_fast_extState(scratch, (char*)snapshot, (char*)lp->pe->delta_buffer[1], state_sz, g_tw_delta_sz, g_tw_lz4_knob); 36 | g_tw_pe->stats.s_lz4 += (tw_clock_read() - start); 37 | if (ret_size < 0) { 38 | tw_error(TW_LOC, "LZ4_compress error"); 39 | } 40 | 41 | start = tw_clock_read(); 42 | lp->pe->cur_event->delta_buddy = buddy_alloc(ret_size); 43 | g_tw_pe->stats.s_buddy += (tw_clock_read() - start); 44 | assert(lp->pe->cur_event->delta_buddy); 45 | lp->pe->cur_event->delta_size = ret_size; 46 | memcpy(lp->pe->cur_event->delta_buddy, lp->pe->delta_buffer[1], ret_size); 47 | 48 | return ret_size; 49 | } 50 | 51 | /** 52 | * Restore the state of lp to the (decompressed) data held in buffer 53 | */ 54 | void 55 | tw_snapshot_restore(tw_lp *lp, size_t state_sz) 56 | { 57 | unsigned int i; 58 | tw_clock start = tw_clock_read(); 59 | unsigned char *snapshot = (unsigned char *)lp->pe->cur_event->delta_buddy; 60 | unsigned char *current_state = (unsigned char *)lp->cur_state; 61 | 62 | int ret = LZ4_decompress_fast((char *)snapshot, (char*)lp->pe->delta_buffer[0], state_sz); 63 | g_tw_pe->stats.s_lz4 += (tw_clock_read() - start); 64 | if (ret < 0) { 65 | tw_error(TW_LOC, "LZ4_decompress_fast error"); 66 | } 67 | 68 | snapshot = lp->pe->delta_buffer[0]; 69 | for (i = 0; i < state_sz; i++) { 70 | current_state[i] = current_state[i] - snapshot[i]; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /core/tw-stats.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef ROSS_DO_NOT_PRINT 4 | static void 5 | show_lld(const char *name, tw_stat v) 6 | { 7 | printf("\t%-50s %11lld\n", name, v); 8 | fprintf(g_tw_csv, ",%lld", v); 9 | } 10 | 11 | static void 12 | show_2f(const char *name, double v) 13 | { 14 | printf("\t%-50s %11.2f %%\n", name, v); 15 | fprintf(g_tw_csv, ",%.2f", v); 16 | } 17 | 18 | static void 19 | show_1f(const char *name, double v) 20 | { 21 | printf("\t%-50s %11.1f\n", name, v); 22 | fprintf(g_tw_csv, ",%.2f", v); 23 | } 24 | 25 | static void 26 | show_4f(const char *name, double v) 27 | { 28 | printf("\t%-50s %11.4lf\n", name, v); 29 | fprintf(g_tw_csv, ",%.4lf", v); 30 | } 31 | 32 | #endif 33 | 34 | void 35 | tw_get_stats(tw_pe * pe, tw_statistics *s) 36 | { 37 | tw_kp *kp; 38 | 39 | unsigned int i; 40 | 41 | if (pe != g_tw_pe) 42 | return; 43 | 44 | if (0 == g_tw_sim_started) 45 | return; 46 | 47 | tw_wtime rt; 48 | 49 | tw_wall_sub(&rt, &pe->end_time, &pe->start_time); 50 | 51 | s->s_max_run_time = ROSS_MAX(s->s_max_run_time, tw_wall_to_double(&rt)); 52 | s->s_nevent_abort += pe->stats.s_nevent_abort; 53 | s->s_pq_qsize += tw_pq_get_size(pe->pq); 54 | 55 | s->s_nsend_net_remote += pe->stats.s_nsend_net_remote; 56 | s->s_nsend_loc_remote += pe->stats.s_nsend_loc_remote; 57 | 58 | s->s_nsend_network += pe->stats.s_nsend_network; 59 | s->s_nread_network += pe->stats.s_nread_network; 60 | s->s_nsend_remote_rb += pe->stats.s_nsend_remote_rb; 61 | 62 | s->s_total += pe->stats.s_total; 63 | s->s_init += pe->stats.s_init; 64 | s->s_net_read += pe->stats.s_net_read; 65 | s->s_net_other += pe->stats.s_net_other; 66 | s->s_gvt += pe->stats.s_gvt; 67 | s->s_fossil_collect += pe->stats.s_fossil_collect; 68 | s->s_event_abort += pe->stats.s_event_abort; 69 | s->s_event_process += pe->stats.s_event_process; 70 | s->s_pq += pe->stats.s_pq; 71 | s->s_rollback += pe->stats.s_rollback; 72 | s->s_cancel_q += pe->stats.s_cancel_q; 73 | s->s_pe_event_ties += pe->stats.s_pe_event_ties; 74 | s->s_min_detected_offset = g_tw_min_detected_offset; 75 | s->s_avl += pe->stats.s_avl; 76 | s->s_buddy += pe->stats.s_buddy; 77 | s->s_lz4 += pe->stats.s_lz4; 78 | s->s_stat_comp += pe->stats.s_stat_comp; 79 | s->s_stat_write += pe->stats.s_stat_write; 80 | s->s_events_past_end += pe->stats.s_events_past_end; 81 | #ifdef USE_RIO 82 | s->s_rio_load += pe->stats.s_rio_load; 83 | s->s_rio_lp_init += pe->stats.s_rio_lp_init; 84 | #endif 85 | s->s_alp_nevent_processed = pe->stats.s_alp_nevent_processed; 86 | s->s_alp_e_rbs = pe->stats.s_alp_e_rbs; 87 | 88 | for(i = 0; i < g_tw_nkp; i++) 89 | { 90 | kp = tw_getkp(i); 91 | s->s_nevent_processed += kp->s_nevent_processed; 92 | s->s_e_rbs += kp->s_e_rbs; 93 | s->s_rb_total += kp->s_rb_total; 94 | s->s_rb_secondary += kp->s_rb_secondary; 95 | } 96 | 97 | 98 | s->s_fc_attempts = g_tw_fossil_attempts; 99 | s->s_net_events = s->s_nevent_processed - s->s_e_rbs; 100 | s->s_rb_primary = s->s_rb_total - s->s_rb_secondary; 101 | } 102 | 103 | void st_print_analysis_LP_stats(tw_statistics *s) 104 | { 105 | tw_stat model_nevent = s->s_nevent_processed - s->s_alp_nevent_processed; 106 | tw_stat model_e_rbs = s->s_e_rbs - s->s_alp_e_rbs; 107 | tw_stat model_net = model_nevent - model_e_rbs; 108 | tw_stat analysis_net = s->s_alp_nevent_processed - s->s_alp_e_rbs; 109 | 110 | printf("\nSeparate Statistics for Model and Analysis LPs\n"); 111 | printf("Model LPs:\n"); 112 | show_lld("Total Events Processed", model_nevent); 113 | show_lld("Events Rolled Back", model_e_rbs); 114 | show_lld("Net Events Processed", model_net); 115 | show_2f("Efficiency", 100.0 * (1.0 - ((double) model_e_rbs / (double) model_net))); 116 | 117 | printf("\nAnalysis LPs:\n"); 118 | show_lld("Total Events Processed", s->s_alp_nevent_processed); 119 | show_lld("Events Rolled Back", s->s_alp_e_rbs); 120 | show_lld("Net Events Processed", analysis_net); 121 | show_2f("Efficiency", 100.0 * (1.0 - ((double) s->s_alp_e_rbs / (double) analysis_net))); 122 | } 123 | 124 | void 125 | tw_stats(tw_pe *me) 126 | { 127 | tw_statistics s; 128 | bzero(&s, sizeof(s)); 129 | size_t m_alloc, m_waste; 130 | tw_calloc_stats(&m_alloc, &m_waste); 131 | tw_lp *lp = NULL; 132 | unsigned int i; 133 | for(i = 0; i < g_tw_nlp + g_st_analysis_nlp; i++) 134 | { 135 | lp = tw_getlp(i); 136 | if (lp->type->final) 137 | (*lp->type->final) (lp->cur_state, lp); 138 | } 139 | tw_get_stats(me, &s); 140 | s = *(tw_net_statistics(me, &s)); 141 | 142 | if (!tw_ismaster()) 143 | return; 144 | 145 | #ifndef ROSS_DO_NOT_PRINT 146 | printf("\n\t: Running Time = %.4f seconds\n", s.s_max_run_time); 147 | fprintf(g_tw_csv, "%.4f", s.s_max_run_time); 148 | 149 | printf("\nTW Library Statistics:\n"); 150 | show_lld("Total Events Processed", s.s_nevent_processed); 151 | show_lld("Events Aborted (part of RBs)", s.s_nevent_abort); 152 | show_lld("Events Rolled Back", s.s_e_rbs); 153 | show_lld("Event Ties Detected in PE Queues", s.s_pe_event_ties); 154 | if(g_tw_synchronization_protocol == CONSERVATIVE) 155 | printf("\t%-50s %11.9lf\n", 156 | "Minimum TS Offset Detected in Conservative Mode", 157 | (double) s.s_min_detected_offset); 158 | show_2f("Efficiency", 100.0 * (1.0 - ((double) s.s_e_rbs / (double) s.s_net_events))); 159 | show_lld("Total Remote (shared mem) Events Processed", s.s_nsend_loc_remote); 160 | 161 | show_2f( 162 | "Percent Remote Events", 163 | ( (double)s.s_nsend_loc_remote 164 | / (double)s.s_net_events) 165 | * 100.0 166 | ); 167 | 168 | show_lld("Total Remote (network) Events Processed", s.s_nsend_net_remote); 169 | show_2f( 170 | "Percent Remote Events", 171 | ( (double)s.s_nsend_net_remote 172 | / (double)s.s_net_events) 173 | * 100.0 174 | ); 175 | 176 | printf("\n"); 177 | show_lld("Total Roll Backs ", s.s_rb_total); 178 | show_lld("Primary Roll Backs ", s.s_rb_primary); 179 | show_lld("Secondary Roll Backs ", s.s_rb_secondary); 180 | show_lld("Fossil Collect Attempts", s.s_fc_attempts); 181 | show_lld("Total GVT Computations", g_tw_gvt_done); 182 | 183 | printf("\n"); 184 | show_lld("Net Events Processed", s.s_net_events); 185 | show_1f( 186 | "Event Rate (events/sec)", 187 | ((double)s.s_net_events / s.s_max_run_time) 188 | ); 189 | 190 | show_lld("Total Events Scheduled Past End Time", s.s_events_past_end); 191 | 192 | printf("\nTW Memory Statistics:\n"); 193 | show_lld("Events Allocated", 1 + g_tw_events_per_pe + g_tw_events_per_pe_extra); 194 | show_lld("Memory Allocated", m_alloc / 1024); 195 | show_lld("Memory Wasted", m_waste / 1024); 196 | 197 | if (tw_nnodes() > 1) { 198 | printf("\n"); 199 | printf("TW Network Statistics:\n"); 200 | show_lld("Remote sends", s.s_nsend_network); 201 | show_lld("Remote recvs", s.s_nread_network); 202 | } 203 | 204 | printf("\nTW Data Structure sizes in bytes (sizeof):\n"); 205 | show_lld("PE struct", sizeof(tw_pe)); 206 | show_lld("KP struct", sizeof(tw_kp)); 207 | show_lld("LP struct", sizeof(tw_lp)); 208 | show_lld("LP Model struct", lp->type->state_sz); 209 | show_lld("LP RNGs", sizeof(*lp->rng)); 210 | show_lld("Total LP", sizeof(tw_lp) + lp->type->state_sz + sizeof(*lp->rng)); 211 | show_lld("Event struct", sizeof(tw_event)); 212 | show_lld("Event struct with Model", sizeof(tw_event) + g_tw_msg_sz); 213 | 214 | #ifdef ROSS_timing 215 | printf("\nTW Clock Cycle Statistics (MAX values in secs at %1.4lf GHz):\n", g_tw_clock_rate / 1000000000.0); 216 | show_4f("Initialization", (double) s.s_init / g_tw_clock_rate); 217 | show_4f("Priority Queue (enq/deq)", (double) s.s_pq / g_tw_clock_rate); 218 | show_4f("AVL Tree (insert/delete)", (double) s.s_avl / g_tw_clock_rate); 219 | show_4f("LZ4 (de)compression", (double) s.s_lz4 / g_tw_clock_rate); 220 | show_4f("Buddy system", (double) s.s_buddy / g_tw_clock_rate); 221 | #ifdef USE_RIO 222 | show_4f("RIO Loading", (double) s.s_rio_load / g_tw_clock_rate); 223 | show_4f("RIO LP Init", (double) s.s_rio_lp_init / g_tw_clock_rate); 224 | #endif 225 | show_4f("Event Processing", (double) s.s_event_process / g_tw_clock_rate); 226 | show_4f("Event Cancel", (double) s.s_cancel_q / g_tw_clock_rate); 227 | show_4f("Event Abort", (double) s.s_event_abort / g_tw_clock_rate); 228 | printf("\n"); 229 | show_4f("GVT", (double) s.s_gvt / g_tw_clock_rate); 230 | show_4f("Fossil Collect", (double) s.s_fossil_collect / g_tw_clock_rate); 231 | show_4f("Primary Rollbacks", (double) s.s_rollback / g_tw_clock_rate); 232 | show_4f("Network Read", (double) s.s_net_read / g_tw_clock_rate); 233 | show_4f("Other Network", (double) s.s_net_other / g_tw_clock_rate); 234 | show_4f("Instrumentation (computation)", (double) s.s_stat_comp / g_tw_clock_rate); 235 | show_4f("Instrumentation (write)", (double) s.s_stat_write / g_tw_clock_rate); 236 | show_4f("Total Time (Note: Using Running Time above for Speedup)", (double) s.s_total / g_tw_clock_rate); 237 | #endif 238 | 239 | tw_gvt_stats(stdout); 240 | 241 | if (g_st_use_analysis_lps) 242 | st_print_analysis_LP_stats(&s); 243 | #endif 244 | } 245 | -------------------------------------------------------------------------------- /core/tw-timing.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void 4 | tw_wall_now(tw_wtime * t) 5 | { 6 | if(0 != gettimeofday((struct timeval *)t, NULL)) 7 | tw_error(TW_LOC, "Unable to get time of day!"); 8 | } 9 | 10 | void 11 | tw_wall_sub(tw_wtime * r, tw_wtime * a, tw_wtime * b) 12 | { 13 | r->tv_sec = a->tv_sec - b->tv_sec; 14 | r->tv_usec = a->tv_usec - b->tv_usec; 15 | 16 | if (r->tv_usec < 0) 17 | { 18 | r->tv_sec--; 19 | r->tv_usec += 1000000; 20 | } 21 | } 22 | 23 | double 24 | tw_wall_to_double(tw_wtime * t) 25 | { 26 | return (double)t->tv_sec + (((double)t->tv_usec) / 1000000); 27 | } 28 | -------------------------------------------------------------------------------- /core/tw-timing.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_tw_timing_h 2 | #define INC_tw_timing_h 3 | 4 | typedef struct timeval tw_wtime; 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /core/tw-util.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /** 4 | * Rollback-aware printf, i.e. if the event gets rolled back, undo the printf. 5 | * We can'd do that of course so we store the message in a buffer until GVT. 6 | */ 7 | int 8 | tw_output(tw_lp *lp, const char *fmt, ...) 9 | { 10 | int ret = 0; 11 | va_list ap; 12 | tw_event *cev; 13 | tw_out *temp; 14 | 15 | if (g_tw_synchronization_protocol == SEQUENTIAL || g_tw_synchronization_protocol == CONSERVATIVE) { 16 | va_start(ap, fmt); 17 | vfprintf(stdout, fmt, ap); 18 | va_end(ap); 19 | return 0; 20 | } 21 | 22 | tw_out *out = tw_kp_grab_output_buffer(lp->kp); 23 | if (!out) { 24 | tw_printf(TW_LOC, "kp (%d) has no available output buffers\n", lp->kp->id); 25 | tw_printf(TW_LOC, "This event may be rolled back!"); 26 | va_start(ap, fmt); 27 | vfprintf(stdout, fmt, ap); 28 | va_end(ap); 29 | return 0; 30 | } 31 | 32 | cev = lp->pe->cur_event; 33 | 34 | if (cev->out_msgs == 0) { 35 | cev->out_msgs = out; 36 | } 37 | else { 38 | // Attach it to the end 39 | temp = cev->out_msgs; 40 | 41 | while (temp->next != 0) { 42 | temp = temp->next; 43 | } 44 | temp->next = out; 45 | } 46 | 47 | va_start(ap, fmt); 48 | ret = vsnprintf(out->message, sizeof(out->message), fmt, ap); 49 | va_end(ap); 50 | if (ret >= 0 && (unsigned)ret < sizeof(out->message)) { 51 | // Should be successful 52 | } 53 | else { 54 | tw_printf(TW_LOC, "Message may be too large?"); 55 | } 56 | 57 | return ret; 58 | } 59 | 60 | void 61 | tw_printf(const char *file, int line, const char *fmt, ...) 62 | { 63 | va_list ap; 64 | 65 | va_start(ap, fmt); 66 | fprintf(stdout, "%s:%i: ", file, line); 67 | vfprintf(stdout, fmt, ap); 68 | fprintf(stdout, "\n"); 69 | fflush(stdout); 70 | va_end(ap); 71 | } 72 | 73 | void 74 | tw_error(const char *file, int line, const char *fmt, ...) 75 | { 76 | va_list ap; 77 | 78 | va_start(ap, fmt); 79 | fprintf(stdout, "node: %ld: error: %s:%i: ", g_tw_mynode, file, line); 80 | vfprintf(stdout, fmt, ap); 81 | fprintf(stdout, "\n"); 82 | fflush(stdout); 83 | fflush(stdout); 84 | va_end(ap); 85 | 86 | tw_net_abort(); 87 | } 88 | 89 | void 90 | tw_warning(const char *file, int line, const char *fmt, ...) 91 | { 92 | va_list ap; 93 | 94 | va_start(ap, fmt); 95 | fprintf(stdout, "node: %ld: warning: %s:%i: ", g_tw_mynode, file, line); 96 | vfprintf(stdout, fmt, ap); 97 | fprintf(stdout, "\n"); 98 | fflush(stdout); 99 | fflush(stdout); 100 | va_end(ap); 101 | } 102 | 103 | struct mem_pool 104 | { 105 | struct mem_pool *next_pool; 106 | char *next_free; 107 | char *end_free; 108 | }__attribute__((aligned(8))); 109 | 110 | static struct mem_pool *main_pool; 111 | 112 | //static const size_t pool_size = 512 * 1024 - sizeof(struct mem_pool); 113 | static const size_t pool_size = (512 * 1024) - 32; 114 | static const size_t pool_align = ROSS_MAX(sizeof(double),sizeof(void*)); 115 | static size_t total_allocated; 116 | static unsigned malloc_calls; 117 | static void* my_malloc(size_t len); 118 | 119 | void 120 | tw_calloc_stats( 121 | size_t *bytes_alloc, 122 | size_t *bytes_wasted) 123 | { 124 | struct mem_pool *p; 125 | 126 | *bytes_alloc = total_allocated; 127 | *bytes_wasted = malloc_calls * (sizeof(void*) + sizeof(size_t)); 128 | 129 | for (p = main_pool; p; p = p->next_pool) 130 | *bytes_wasted += p->end_free - p->next_free; 131 | } 132 | 133 | /* debug version - don't use pool allocator so tools like valgrind can 134 | * detect memory bugs */ 135 | #ifdef ROSS_ALLOC_DEBUG 136 | 137 | void* 138 | tw_calloc( 139 | const char *file, 140 | int line, 141 | const char *for_who, 142 | size_t e_sz, 143 | size_t n) 144 | { 145 | void *r = calloc(e_sz, n); 146 | if (!r){ 147 | tw_error( 148 | file, line, 149 | "Cannot allocate %lu bytes for %u %s", 150 | (unsigned long)e_sz, 151 | n, 152 | for_who); 153 | } 154 | return r; 155 | } 156 | 157 | #else 158 | 159 | static void* 160 | pool_alloc(size_t len) 161 | { 162 | struct mem_pool *p; 163 | void *r; 164 | 165 | for (p = main_pool; p; p = p->next_pool) 166 | if ((unsigned)(p->end_free - p->next_free) >= len) 167 | break; 168 | 169 | if (!p) { 170 | if (len >= pool_size) { 171 | r = my_malloc(len); 172 | goto ret; 173 | } 174 | 175 | p = (struct mem_pool *) my_malloc(pool_size + 32); 176 | if (!p) { 177 | r = NULL; 178 | goto ret; 179 | } 180 | 181 | p->next_pool = main_pool; 182 | //p->next_free = (char*)(p + 1); 183 | p->next_free = (char *)((size_t)32 + (size_t)p); 184 | if( 7 & (size_t)(p->next_free) ) 185 | printf("pool_alloc: WARNING found pool start address (%p) NOT 8 byte aligned\n", p->next_free); 186 | p->end_free = p->next_free + pool_size; 187 | main_pool = p; 188 | } 189 | 190 | r = p->next_free; 191 | p->next_free += len; 192 | 193 | if( 7 & (size_t)r || 7 & (size_t)(p->next_free) ) 194 | printf("pool_alloc: WARNING found return ptr (%p) or next_free (%p) NOT 8 bytes aligned\n", r, p->next_free ); 195 | 196 | ret: 197 | if (r) 198 | total_allocated += len; 199 | return r; 200 | } 201 | 202 | void* 203 | tw_calloc( 204 | const char *file, 205 | int line, 206 | const char *for_who, 207 | size_t e_sz, 208 | size_t n) 209 | { 210 | void *r; 211 | 212 | if(e_sz & (pool_align - 1)) 213 | { 214 | e_sz += pool_align - (e_sz & (pool_align - 1)); 215 | // printf("%s:%d:%s: realigned size to %d \n", file, line, for_who, e_sz ); 216 | } 217 | 218 | e_sz *= n; 219 | if (!e_sz) 220 | return NULL; 221 | 222 | r = pool_alloc(e_sz); 223 | if (!r) 224 | tw_error( 225 | file, line, 226 | "Cannot allocate %lu bytes for %u %s" 227 | " (need total of %lu KiB)", 228 | (unsigned long)e_sz, 229 | n, 230 | for_who, 231 | (unsigned long)((total_allocated + e_sz) / 1024)); 232 | memset(r, 0, e_sz); 233 | return r; 234 | } 235 | 236 | #endif 237 | 238 | #undef malloc 239 | static void* 240 | my_malloc(size_t len) 241 | { 242 | malloc_calls++; 243 | return malloc(len); 244 | } 245 | 246 | #undef realloc 247 | -------------------------------------------------------------------------------- /docs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | IF(ROSS_BUILD_DOXYGEN) 2 | 3 | IF(DOXYGEN_DOT_FOUND) 4 | SET(HAVE_DOT YES) 5 | 6 | OPTION(DOXYGEN_CALL_GRAPHS "Build Doxygen Call Graphs" OFF) 7 | IF(DOXYGEN_CALL_GRAPHS) 8 | SET(CALL_GRAPHS_ON YES) 9 | ELSE(DOXYGEN_CALL_GRAPHS) 10 | SET(CALL_GRAPHS_ON NO) 11 | ENDIF(DOXYGEN_CALL_GRAPHS) 12 | 13 | OPTION(DOXYGEN_CALLER_GRAPHS "Build Doxygen Caller Graphs" OFF) 14 | IF(DOXYGEN_CALLER_GRAPHS) 15 | SET(CALLER_GRAPHS_ON YES) 16 | ELSE(DOXYGEN_CALLER_GRAPHS) 17 | SET(CALLER_GRAPHS_ON NO) 18 | ENDIF(DOXYGEN_CALLER_GRAPHS) 19 | 20 | OPTION(DOXYGEN_DEFAULT_GRAPHS "Build Default Doxygen Graphs" OFF) 21 | IF(DOXYGEN_DEFAULT_GRAPHS) 22 | SET(DEFAULT_GRAPHS_ON YES) 23 | ELSE(DOXYGEN_DEFAULT_GRAPHS) 24 | SET(DEFAULT_GRAPHS_ON NO) 25 | ENDIF(DOXYGEN_DEFAULT_GRAPHS) 26 | 27 | ELSE(DOXYGEN_DOT_FOUND) 28 | SET(HAVE_DOT NO) 29 | SET(CALL_GRAPHS NO) 30 | SET(CALLER_GRAPHS NO) 31 | SET(DEFAULT_GRAPHS NO) 32 | ENDIF(DOXYGEN_DOT_FOUND) 33 | 34 | SET(DOXYGEN_OUTPUT ${CMAKE_BINARY_DIR}/docs/html/index.html) 35 | 36 | CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.user.in ${CMAKE_BINARY_DIR}/Doxyfile.user) 37 | 38 | ADD_CUSTOM_COMMAND( 39 | OUTPUT ${DOXYGEN_OUTPUT} 40 | COMMAND ${CMAKE_COMMAND} -E echo_append "Building API Documentation..." 41 | COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile.user 42 | COMMAND ${CMAKE_COMMAND} -E echo "Done.") 43 | 44 | ADD_CUSTOM_TARGET(apidoc ALL DEPENDS ${DOXYGEN_OUTPUT}) 45 | 46 | ENDIF(ROSS_BUILD_DOXYGEN) 47 | -------------------------------------------------------------------------------- /docs/header.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | $projectname: $title 9 | $title 10 | 11 | 12 | 13 | $treeview 14 | $search 15 | $mathjax 16 | 17 | $extrastylesheet 18 | 19 | 20 |
21 | 22 | 23 |
24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 39 | 40 | 41 | 42 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
32 | 33 |
$projectname 34 |  $projectnumber 35 |
36 |
37 |
$projectbrief
38 |
43 |
$projectbrief
44 |
$searchbox
55 |
56 | 57 | 58 | -------------------------------------------------------------------------------- /models/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## BASIC SCHEDULERS 2 | FUNCTION(ROSS_TEST_SCHEDULERS target_name) 3 | ADD_TEST(${target_name}_SCHED_Sequential ${target_name} --synch=1) 4 | ADD_TEST(${target_name}_SCHED_Conservative mpirun -np 2 ./${target_name} --synch=2) 5 | ADD_TEST(${target_name}_SCHED_Optimistic mpirun -np 2 ./${target_name} --synch=3 --extramem=100000) 6 | ADD_TEST(${target_name}_SCHED_Realtime mpirun -np 2 ./${target_name} --synch=5 --gvt-interval=1 --clock-rate=$ENV{CLOCK_SPEED}) 7 | ADD_TEST(${target_name}_SCHED_OptDebug ${target_name} --synch=4 --nkp=1 --extramem=1000000) 8 | 9 | SET(sched_tests 10 | ${target_name}_SCHED_Sequential 11 | ${target_name}_SCHED_Conservative 12 | ${target_name}_SCHED_Optimistic 13 | ${target_name}_SCHED_Realtime 14 | ${target_name}_SCHED_OptDebug) 15 | SET_TESTS_PROPERTIES(${sched_tests} PROPERTIES TIMEOUT 60) 16 | ENDFUNCTION(ROSS_TEST_SCHEDULERS) 17 | 18 | ## INSTRUMENTATION TESTS 19 | FUNCTION(ROSS_TEST_INSTRUMENTATION target_name) 20 | ADD_TEST(${target_name}_INST_OptPE mpirun -np 2 ./${target_name} --synch=3 --engine-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000) 21 | ADD_TEST(${target_name}_INST_OptLP mpirun -np 2 ./${target_name} --synch=3 --engine-stats=4 --event-trace=2 --kp-data=1 --lp-data=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000) 22 | 23 | ADD_TEST(${target_name}_INST_RTOptPE mpirun -np 2 ./${target_name} --synch=5 --engine-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000) 24 | ADD_TEST(${target_name}_INST_RTOptLP mpirun -np 2 ./${target_name} --synch=5 --engine-stats=4 --event-trace=2 --kp-data=1 --lp-data=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000) 25 | 26 | ADD_TEST(${target_name}_INST_ConsPE mpirun -np 2 ./${target_name} --synch=2 --engine-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000) 27 | ADD_TEST(${target_name}_INST_ConsLP mpirun -np 2 ./${target_name} --synch=2 --engine-stats=4 --event-trace=2 --kp-data=1 --lp-data=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000) 28 | 29 | ADD_TEST(${target_name}_INST_Seq ./${target_name} --synch=1 --event-trace=1 --extramem=100000) 30 | 31 | ADD_TEST(${target_name}_INST_Model mpirun -np 2 ./${target_name} --synch=3 --model-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000) 32 | 33 | ADD_TEST(${target_name}_INST_DisableOutput mpirun -np 2 ./${target_name} --synch=3 --engine-stats=4 --disable-output=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000) 34 | ADD_TEST(${target_name}_INST_ForceOverflow mpirun -np 2 ./${target_name} --synch=3 --event-trace=1 --buffer-free=0 --extramem=100000) 35 | ADD_TEST(${target_name}_INST_FullEventTrace mpirun -np 2 ./${target_name} --synch=3 --event-trace=1 --extramem=100000) 36 | ADD_TEST(${target_name}_INST_RBEventTrace mpirun -np 2 ./${target_name} --synch=3 --event-trace=2 --extramem=100000) 37 | 38 | SET(inst_tests 39 | ${target_name}_INST_OptPE 40 | ${target_name}_INST_OptLP 41 | ${target_name}_INST_RTOptPE 42 | ${target_name}_INST_RTOptLP 43 | ${target_name}_INST_ConsPE 44 | ${target_name}_INST_ConsLP 45 | ${target_name}_INST_Seq 46 | ${target_name}_INST_Model 47 | ${target_name}_INST_DisableOutput 48 | ${target_name}_INST_ForceOverflow 49 | ${target_name}_INST_RBEventTrace) 50 | SET_TESTS_PROPERTIES(${inst_tests} PROPERTIES TIMEOUT 60) 51 | ENDFUNCTION(ROSS_TEST_INSTRUMENTATION) 52 | 53 | 54 | # Automatically search and add model subdirectories 55 | CMAKE_POLICY(SET CMP0009 NEW) # follow sym-links 56 | FILE(GLOB_RECURSE my_list . FOLLOW_SYMLINKS */CMakeLists.txt) 57 | FOREACH(cmf ${my_list}) 58 | GET_FILENAME_COMPONENT(dir_path ${cmf} PATH) 59 | ADD_SUBDIRECTORY(${dir_path}) 60 | # MESSAGE(${dir_path}) 61 | ENDFOREACH(cmf) 62 | -------------------------------------------------------------------------------- /models/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to Modeling! 2 | 3 | The ROSS repository currently links to two model repositories: 4 | - [A Template Model](http://github.com/nmcglohon/template-model) that can be used as a starting point for any new model. 5 | - [A Suite of Stable Models](http://github.com/ROSS-org/ROSS-Models) which contains several completed models. 6 | 7 | ## Building Existing Models 8 | 9 | To get the linked model repositories, run the following commands after cloning the ROSS repository: 10 | ``` 11 | git submodule init 12 | git submodule update 13 | ``` 14 | Then build ROSS as you regularly would. 15 | Be sure to turn on the option to ROSS_BUILD_MODELS in CMake (more details can be found on the [wiki page](http://github.com/ROSS-org/ROSS/wiki/Installation)). 16 | 17 | ## Creating Your Own Model 18 | 19 | As you develop your model, the best practice is to do it in a separate git repostroy. 20 | Sym-link your model into this folder and CMake will automatically find it for building. 21 | ``` 22 | cd ~/Projects/ROSS/models 23 | ln -s ~/Projects/my-model ./ 24 | ``` 25 | For more details on creating a model please check out the [wiki page](http://github.com/ROSS-org/ROSS/wiki/Constructing-the-Model). 26 | -------------------------------------------------------------------------------- /models/phold/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | INCLUDE_DIRECTORIES(${ROSS_BINARY_DIR}) 2 | IF(USE_DAMARIS) 3 | INCLUDE_DIRECTORIES(${DAMARIS_INCLUDE}) 4 | ENDIF(USE_DAMARIS) 5 | IF(BGPM) 6 | INCLUDE_DIRECTORIES(${ROSS_SOURCE_DIR} ${BGPM_LIB}) 7 | ELSE(NOT(BGPM)) 8 | INCLUDE_DIRECTORIES(${ROSS_SOURCE_DIR}) 9 | ENDIF(BGPM) 10 | 11 | SET(phold_srcs 12 | phold.c phold.h) 13 | 14 | 15 | ADD_EXECUTABLE(phold ${phold_srcs}) 16 | ADD_EXECUTABLE(phold_test ${phold_srcs}) 17 | 18 | IF(BGPM) 19 | TARGET_LINK_LIBRARIES(phold ROSS imp_bgpm m) 20 | TARGET_LINK_LIBRARIES(phold_test ROSS imp_bgpm m) 21 | ELSE(NOT(BGPM)) 22 | IF(USE_DAMARIS) 23 | TARGET_LINK_LIBRARIES(phold ROSS ROSS_Damaris m) 24 | TARGET_LINK_LIBRARIES(phold_test ROSS ROSS_Damaris m) 25 | ELSE(NOT(USE_DAMARIS)) 26 | TARGET_LINK_LIBRARIES(phold ROSS m) 27 | TARGET_LINK_LIBRARIES(phold_test ROSS m) 28 | ENDIF(USE_DAMARIS) 29 | ENDIF(BGPM) 30 | 31 | ROSS_TEST_SCHEDULERS(phold) 32 | ROSS_TEST_INSTRUMENTATION(phold) 33 | 34 | SET_TARGET_PROPERTIES(phold_test PROPERTIES COMPILE_DEFINITIONS TEST_COMM_ROSS) 35 | ROSS_TEST_SCHEDULERS(phold_test) 36 | ROSS_TEST_INSTRUMENTATION(phold_test) 37 | 38 | INSTALL(FILES ${ROSS_BINARY_DIR}/../models/phold/phold DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) 39 | -------------------------------------------------------------------------------- /models/phold/phold.c: -------------------------------------------------------------------------------- 1 | #include "phold.h" 2 | 3 | 4 | tw_peid 5 | phold_map(tw_lpid gid) 6 | { 7 | return (tw_peid) gid / g_tw_nlp; 8 | } 9 | 10 | void 11 | phold_init(phold_state * s, tw_lp * lp) 12 | { 13 | (void) s; 14 | int i; 15 | 16 | if( stagger ) 17 | { 18 | for (i = 0; i < g_phold_start_events; i++) 19 | { 20 | tw_event_send( 21 | tw_event_new(lp->gid, 22 | tw_rand_exponential(lp->rng, mean) + lookahead + (tw_stime)(lp->gid % (unsigned int)g_tw_ts_end), 23 | lp)); 24 | } 25 | } 26 | else 27 | { 28 | for (i = 0; i < g_phold_start_events; i++) 29 | { 30 | tw_event_send( 31 | tw_event_new(lp->gid, 32 | tw_rand_exponential(lp->rng, mean) + lookahead, 33 | lp)); 34 | } 35 | } 36 | } 37 | 38 | void 39 | phold_pre_run(phold_state * s, tw_lp * lp) 40 | { 41 | (void) s; 42 | tw_lpid dest; 43 | 44 | if(tw_rand_unif(lp->rng) <= percent_remote) 45 | { 46 | dest = tw_rand_integer(lp->rng, 0, ttl_lps - 1); 47 | } else 48 | { 49 | dest = lp->gid; 50 | } 51 | 52 | if(dest >= (g_tw_nlp * tw_nnodes())) 53 | tw_error(TW_LOC, "bad dest"); 54 | 55 | tw_event_send(tw_event_new(dest, tw_rand_exponential(lp->rng, mean) + lookahead, lp)); 56 | } 57 | 58 | void 59 | phold_event_handler(phold_state * s, tw_bf * bf, phold_message * m, tw_lp * lp) 60 | { 61 | (void) s; 62 | (void) m; 63 | tw_lpid dest; 64 | 65 | if(tw_rand_unif(lp->rng) <= percent_remote) 66 | { 67 | bf->c1 = 1; 68 | dest = tw_rand_integer(lp->rng, 0, ttl_lps - 1); 69 | // Makes PHOLD non-deterministic across processors! Don't uncomment 70 | /* dest += offset_lpid; */ 71 | /* if(dest >= ttl_lps) */ 72 | /* dest -= ttl_lps; */ 73 | } else 74 | { 75 | bf->c1 = 0; 76 | dest = lp->gid; 77 | } 78 | 79 | if(dest >= (g_tw_nlp * tw_nnodes())) 80 | tw_error(TW_LOC, "bad dest"); 81 | 82 | tw_event_send(tw_event_new(dest, tw_rand_exponential(lp->rng, mean) + lookahead, lp)); 83 | } 84 | 85 | void 86 | phold_event_handler_rc(phold_state * s, tw_bf * bf, phold_message * m, tw_lp * lp) 87 | { 88 | (void) s; 89 | (void) m; 90 | tw_rand_reverse_unif(lp->rng); 91 | tw_rand_reverse_unif(lp->rng); 92 | 93 | if(bf->c1 == 1) 94 | tw_rand_reverse_unif(lp->rng); 95 | } 96 | 97 | void phold_commit(phold_state * s, tw_bf * bf, phold_message * m, tw_lp * lp) 98 | { 99 | (void) s; 100 | (void) bf; 101 | (void) m; 102 | (void) lp; 103 | } 104 | 105 | void 106 | phold_finish(phold_state * s, tw_lp * lp) 107 | { 108 | (void) s; 109 | (void) lp; 110 | } 111 | 112 | tw_lptype mylps[] = { 113 | {(init_f) phold_init, 114 | /* (pre_run_f) phold_pre_run, */ 115 | (pre_run_f) NULL, 116 | (event_f) phold_event_handler, 117 | (revent_f) phold_event_handler_rc, 118 | (commit_f) phold_commit, 119 | (final_f) phold_finish, 120 | (map_f) phold_map, 121 | sizeof(phold_state)}, 122 | {0}, 123 | }; 124 | 125 | void event_trace(phold_message *m, tw_lp *lp, char *buffer, int *collect_flag) 126 | { 127 | (void) m; 128 | (void) lp; 129 | (void) buffer; 130 | (void) collect_flag; 131 | return; 132 | } 133 | 134 | void phold_stats_collect(phold_state *s, tw_lp *lp, char *buffer) 135 | { 136 | (void) s; 137 | (void) lp; 138 | (void) buffer; 139 | return; 140 | } 141 | 142 | st_model_types model_types[] = { 143 | {(ev_trace_f) event_trace, 144 | 0, 145 | (model_stat_f) phold_stats_collect, 146 | sizeof(int), 147 | NULL, //(sample_event_f) 148 | NULL, //(sample_revent_f) 149 | 0}, 150 | {0} 151 | }; 152 | 153 | const tw_optdef app_opt[] = 154 | { 155 | TWOPT_GROUP("PHOLD Model"), 156 | TWOPT_DOUBLE("remote", percent_remote, "desired remote event rate"), 157 | TWOPT_UINT("nlp", nlp_per_pe, "number of LPs per processor"), 158 | TWOPT_DOUBLE("mean", mean, "exponential distribution mean for timestamps"), 159 | TWOPT_DOUBLE("mult", mult, "multiplier for event memory allocation"), 160 | TWOPT_DOUBLE("lookahead", lookahead, "lookahead for events"), 161 | TWOPT_UINT("start-events", g_phold_start_events, "number of initial messages per LP"), 162 | TWOPT_UINT("stagger", stagger, "Set to 1 to stagger event uniformly across 0 to end time."), 163 | TWOPT_UINT("memory", optimistic_memory, "additional memory buffers"), 164 | TWOPT_CHAR("run", run_id, "user supplied run name"), 165 | TWOPT_END() 166 | }; 167 | 168 | int 169 | main(int argc, char **argv) 170 | { 171 | 172 | #ifdef TEST_COMM_ROSS 173 | // Init outside of ROSS 174 | MPI_Init(&argc, &argv); 175 | // Split COMM_WORLD in half even/odd 176 | int mpi_rank; 177 | MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); 178 | MPI_Comm split_comm; 179 | MPI_Comm_split(MPI_COMM_WORLD, mpi_rank%2, mpi_rank, &split_comm); 180 | if(mpi_rank%2 == 1){ 181 | // tests should catch any MPI_COMM_WORLD collectives 182 | MPI_Finalize(); 183 | } 184 | // Allows ROSS to function as normal 185 | tw_comm_set(split_comm); 186 | #endif 187 | 188 | unsigned int i; 189 | 190 | // set a min lookahead of 1.0 191 | lookahead = 1.0; 192 | tw_opt_add(app_opt); 193 | tw_init(&argc, &argv); 194 | 195 | #ifdef USE_DAMARIS 196 | if(g_st_ross_rank) 197 | { // only ross ranks should run code between here and tw_run() 198 | #endif 199 | if( lookahead > 1.0 ) 200 | tw_error(TW_LOC, "Lookahead > 1.0 .. needs to be less\n"); 201 | 202 | //reset mean based on lookahead 203 | mean = mean - lookahead; 204 | 205 | offset_lpid = g_tw_mynode * nlp_per_pe; 206 | ttl_lps = tw_nnodes() * nlp_per_pe; 207 | g_tw_events_per_pe = (mult * nlp_per_pe * g_phold_start_events) + 208 | optimistic_memory; 209 | //g_tw_rng_default = TW_FALSE; 210 | g_tw_lookahead = lookahead; 211 | 212 | tw_define_lps(nlp_per_pe, sizeof(phold_message)); 213 | 214 | for(i = 0; i < g_tw_nlp; i++) 215 | { 216 | tw_lp_settype(i, &mylps[0]); 217 | st_model_settype(i, &model_types[0]); 218 | } 219 | 220 | if( g_tw_mynode == 0 ) 221 | { 222 | printf("========================================\n"); 223 | printf("PHOLD Model Configuration..............\n"); 224 | printf(" Lookahead..............%lf\n", lookahead); 225 | printf(" Start-events...........%u\n", g_phold_start_events); 226 | printf(" stagger................%u\n", stagger); 227 | printf(" Mean...................%lf\n", mean); 228 | printf(" Mult...................%lf\n", mult); 229 | printf(" Memory.................%u\n", optimistic_memory); 230 | printf(" Remote.................%lf\n", percent_remote); 231 | printf("========================================\n\n"); 232 | } 233 | 234 | tw_run(); 235 | #ifdef USE_DAMARIS 236 | } // end if(g_st_ross_rank) 237 | #endif 238 | tw_end(); 239 | 240 | return 0; 241 | } 242 | -------------------------------------------------------------------------------- /models/phold/phold.h: -------------------------------------------------------------------------------- 1 | #ifndef INC_phold_h 2 | #define INC_phold_h 3 | 4 | #include 5 | 6 | /* 7 | * PHOLD Types 8 | */ 9 | 10 | typedef struct phold_state phold_state; 11 | typedef struct phold_message phold_message; 12 | 13 | struct phold_state 14 | { 15 | long int dummy_state; 16 | }; 17 | 18 | struct phold_message 19 | { 20 | long int dummy_data; 21 | }; 22 | 23 | /* 24 | * PHOLD Globals 25 | */ 26 | tw_stime lookahead = 1.0; 27 | static unsigned int stagger = 0; 28 | static unsigned int offset_lpid = 0; 29 | static tw_stime mult = 1.4; 30 | static tw_stime percent_remote = 0.25; 31 | static unsigned int ttl_lps = 0; 32 | static unsigned int nlp_per_pe = 8; 33 | static int g_phold_start_events = 1; 34 | static int optimistic_memory = 100; 35 | 36 | // rate for timestamp exponential distribution 37 | static tw_stime mean = 1.0; 38 | 39 | static char run_id[1024] = "undefined"; 40 | 41 | #endif 42 | --------------------------------------------------------------------------------