├── .github
    ├── CONTRIBUTING.md
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .gitmodules
├── .travis.yml
├── CMakeLists.txt
├── LICENSE.txt
├── README.md
├── conf
    └── travis-install-mpi.sh
├── core
    ├── CMakeLists.txt
    ├── ROSSConfig.cmake
    ├── avl_tree.c
    ├── avl_tree.h
    ├── buddy.c
    ├── buddy.h
    ├── clock
    │   ├── aarch64.c
    │   ├── aarch64.h
    │   ├── amd64.c
    │   ├── amd64.h
    │   ├── armv7l.c
    │   ├── armv7l.h
    │   ├── bgl.c
    │   ├── bgl.h
    │   ├── bgq.c
    │   ├── bgq.h
    │   ├── gtod.c
    │   ├── gtod.h
    │   ├── i386.c
    │   ├── i386.h
    │   ├── ia64.c
    │   ├── ia64.h
    │   ├── ppc.c
    │   ├── ppc.h
    │   ├── ppc64le.c
    │   └── ppc64le.h
    ├── cmake
    │   ├── Coveralls.cmake
    │   ├── CoverallsClear.cmake
    │   ├── CoverallsGenerateGcov.cmake
    │   ├── GetGitRevisionDescription.LICENSE_1_0.txt
    │   ├── GetGitRevisionDescription.cmake
    │   ├── GetGitRevisionDescription.cmake.in
    │   └── SetupMPI.cmake
    ├── config.h.in
    ├── gvt
    │   ├── 7oclock.c.old
    │   ├── 7oclock.h.old
    │   ├── mpi_allreduce.c
    │   └── mpi_allreduce.h
    ├── hash-quadratic.c
    ├── hash-quadratic.h
    ├── instrumentation
    │   ├── ross-lps
    │   │   ├── analysis-lp.c
    │   │   ├── analysis-lp.h
    │   │   └── specialized-lps.c
    │   ├── st-event-trace.c
    │   ├── st-instrumentation.c
    │   ├── st-instrumentation.h
    │   ├── st-model-data.c
    │   ├── st-sim-engine.c
    │   └── st-stats-buffer.c
    ├── lz4.c
    ├── lz4.h
    ├── network-mpi.c
    ├── network-mpi.h
    ├── queue
    │   ├── calendar.c.old
    │   ├── heap.c.old
    │   ├── kp_splay.c.old
    │   ├── splay.c
    │   └── tw-queue.h
    ├── rand-clcg4.c
    ├── rand-clcg4.h
    ├── rio
    │   ├── README.md
    │   ├── io-mpi.c
    │   ├── io-serialize.c
    │   └── io.h
    ├── ross-config.in
    ├── ross-extern.h
    ├── ross-global.c
    ├── ross-gvt.h
    ├── ross-inline.h
    ├── ross-kernel-inline.h
    ├── ross-random.c
    ├── ross-random.h
    ├── ross-types.h
    ├── ross.h
    ├── ross.pc.in
    ├── tw-event.c
    ├── tw-eventq.h
    ├── tw-kp.c
    ├── tw-lp.c
    ├── tw-opts.c
    ├── tw-opts.h
    ├── tw-pe.c
    ├── tw-sched.c
    ├── tw-setup.c
    ├── tw-state.c
    ├── tw-stats.c
    ├── tw-timing.c
    ├── tw-timing.h
    └── tw-util.c
├── docs
    ├── CMakeLists.txt
    ├── Doxyfile.user.in
    └── header.html
└── models
    ├── CMakeLists.txt
    ├── README.md
    └── phold
        ├── CMakeLists.txt
        ├── phold.c
        └── phold.h


/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | There are many ways to contribute to ROSS:
 4 | 
 5 | - Create and release a model.
 6 |   Like any simulation engine, ROSS is always looking for new models and new model developers.
 7 |   This is also the best way to learn about ROSS and its API.
 8 | - File a bug or request a feature through [GitHub Issues](http://github.com/ROSS-org/ROSS/issues).
 9 |   We are always looking to improve ROSS to make it more stable for our users.
10 |   Feature requests and related discussions are located here as well.
11 | - The best way to ensure a bug or feature request is addressed is to do it yourself!
12 |   Spelunking through the ROSS core can be a enlightening journey.
13 |   Once you've made the change, feel free to create a [pull request](https://github.com/ROSS-org/ROSS/pulls).
14 |   Between our continuous integration testing and our experienced ROSS core team, we will ensure your change is safe before deploying it to the master branch.
15 | 
16 | ## Small Changes
17 | 
18 | Development on the ROSS core is done through [GitHub Pull Requests](https://help.github.com/articles/using-pull-requests/).
19 | We always welcome small-change contributions to ROSS, including:
20 | 
21 | - clarification of error/warning messages
22 | - bug fixes (hopefully there aren't any bugs to begin with!)
23 | - whitespace or code-style changes
24 | - other straight-forward changes that do not have wide-reaching consequences
25 | 
26 | ## Major Changes and Features
27 | 
28 | ROSS is being continually developed and we are frequently adding new features.
29 | For these larger changes of ROSS, there are a few boxes that must be checked before any pull request is merged into the master branch.
30 | 
31 | 1. Ensure current tests pass
32 | 2. Ensure coverage increases
33 | 3. Ensure dependent projects are updated (needed for API changes)
34 | 4. Document the change though a blog post
35 | 
36 | ### Continuous Integration Testing and Coverage
37 | 
38 | First, the new feature or major change must pass all of the existing TravisCI tests.
39 | 
40 | Next, the test coverage must increase (or at least stay the same).
41 | For new features, this usually means that a new test must written.
42 | There are typically two options for a test:
43 | - Add a new test to PHOLD model (see [models/phold/CMakeLists.txt](https://github.com/ROSS-org/ROSS/blob/master/models/phold/CMakeLists.txt)).
44 | - Create a new model which tests your feature and add this model to the ROSS-Models repository.
45 | 
46 | ### ROSS Model Changes
47 | 
48 | The [ROSS-Models repository](http://github.com/ROSS-org/ROSS-Models) contains models which are no longer under development.
49 | If your new feature is a major API change to ROSS, the models in this repository must be updated.
50 | The workflow to update the ROSS-Models submodule is as follows:
51 | 
52 | 1. In your feature branch of ROSS, load the submodules
53 | ```
54 | 	git submodule init
55 | 	git submodule update
56 | ```
57 | 2. Move into the `models/ROSS-Models/` directory.
58 |    Make the appropriate API changes and commit them using ROSS.
59 | 3. While within this directory, upload these changes to GitHub using the typical `git push origin master` command.
60 | 4. Move back up to the base ROSS directory.
61 |    You should see the changed commit hash for the ROSS-Models submodule when you run a `git status`.
62 |    Commit this change in hash number using `git commit -am "updated ROSS-Models"`.
63 | 
64 | ### CODES
65 | 
66 | The [CODES Project](http://press3.mcs.anl.gov/codes/) is actively developed and depends on ROSS as its underlying simulation engine.
67 | The CODES repository can be found [here](https://xgitlab.cels.anl.gov).
68 | You should be able to login in to ANL's GitLab service.
69 | Here you can fork the CODES repository and create a pull request with any required changes.
70 | 
71 | ### Documentation
72 | 
73 | In order to keep our documentation up-to-date, any new feature or major change must be documented before it is merged into the master branch.
74 | The easiest way to document the change is to create a new blog post for our website.
75 | The [website contributing guide](https://github.com/ROSS-org/ross-org.github.io/blob/master/CONTRIBUTING.md) documents this process.
76 | 
77 | ## Versioning and New Releases
78 | 
79 | ROSS does not utilize a numbered-version system.
80 | Instead, each commit on the master branch represents a change in ROSS.
81 | Thus, each commit hash can be used as a version number that we guarantee will never change.
82 | 
83 | To achieve the eternal validity of a commit hash, we utilize squash commits to merge any changes.
84 | All merges into the master branch should be made through the GitHub pull request interface.
85 | Through this interface, the merge can be squashed.
86 | Squash commits have several implications:
87 | 
88 | 1. *The squash-on-merge option must be selected within the GitHub interface by the person doing the merge.*
89 | 1. The individual commits are not placed in the history of the master branch.
90 |    However, they do remain available through the pull request page.
91 | 2. One positive outcome is that the blame on any file will be simplified since there is now only one commit associated with the entire change.
92 | 3. Once a feature branch is merged into master, it should be **deleted from any local repositories**.
93 |    There are possible issues if someone attempts to re-merge the branch (including commits previously added in a squash).
94 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **add your comments here**
 2 | 
 3 | ---
 4 | 
 5 | If this merge represents a feature addition to ROSS, the following items must be completed before the branch will be merged:
 6 | 
 7 | - [ ] Document the feature on the blog (See the [website Contributing guide](https://github.com/ROSS-org/ross-org.github.io/blob/master/CONTRIBUTING.md)).
 8 |   Include a link to your blog post in the Pull Request.
 9 | - [ ] Builds should cleanly compile with -Wall and -Wextra.
10 | - [ ] One or more TravisCI tests should be created (and they should pass)
11 | - [ ] Through the TravisCI tests, coverage should increase
12 | - [ ] Test with CODES to ensure everything continues to work
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore models that aren't already in tree
2 | # (must manually add models to override this)
3 | models/*
4 | *.swp
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "risa"]
2 | 	path = core/risa
3 | 	url = https://github.com/ROSS-org/RISA
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: trusty
 2 | sudo: required
 3 | language: cpp
 4 | compiler:
 5 |   - clang
 6 | # Change this to your needs
 7 | notifications:
 8 |   email:
 9 |     on_success: change
10 |     on_failure: always
11 | env:
12 |  - secure: "BbB1KVY0Yb6DJwxdfFDF1PJwSx9euNfNX94oDKftiH8LE0nEzfS6xZc2sBkWTWOThHml9ttBkDIx/NhxEThOjyVcX6uv4kibP6moV5EqxqC+kLoZSEZnVuAdTJfGRKBdzmRp66R5a/GiMzzz/F3+smdVFMb6XR06sPQa5TQZjEc="
13 | git:
14 |   submodules: false
15 | before_install:
16 |  - sudo apt-add-repository -y ppa:libreoffice/libreoffice-4-2
17 |  - sudo apt-get update -q
18 |  - sudo apt-get install -y -qq lcov curl doxygen graphviz
19 |  - lscpu
20 |  - CLOCK_SPEED=`lscpu | grep "MHz" | awk '{print $3*1000*1000}'`
21 |  - echo $CLOCK_SPEED
22 |  - sh ./conf/travis-install-mpi.sh mpich2
23 |  - mpirun --version
24 | script:
25 |  - mkdir cxx-build && cd cxx-build
26 |  - cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-Wall -Wextra" ..
27 |  - export MPICH_CC=clang++
28 |  - make
29 |  - unset MPICH_CC
30 |  - cd ..
31 |  - mkdir release && cd release
32 |  - MPICH_CC=clang cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-Wall -Wextra" -DROSS_BUILD_MODELS=ON ..
33 |  - make
34 |  - CTEST_OUTPUT_ON_FAILURE=1 make test
35 |  - cd ..
36 |  - mkdir build-gtod && cd build-gtod
37 |  - MPICH_CC=clag cmake -DCOVERALLS=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-Wall -Wextra" -DROSS_BUILD_MODELS=ON -DROSS_CLOCK_OVERRIDE=ON ..
38 |  - make
39 |  - CTEST_OUTPUT_ON_FAILURE=1 ctest -R SCHED
40 |  - make coveralls
41 |  - cd ..
42 |  - mkdir build && cd build
43 |  - MPICH_CC=clang cmake -DCOVERALLS=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-Wall -Wextra" -DROSS_BUILD_MODELS=ON ..
44 |  - make
45 |  - CTEST_OUTPUT_ON_FAILURE=1 make test
46 |  - make coveralls
47 |  - cd ..
48 |  - mkdir build2 && cd build2
49 |  - MPICH_CC=clang cmake -DAVL_TREE=OFF -DCOVERALLS=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-Wall -Wextra" -DROSS_BUILD_MODELS=ON ..
50 |  - make
51 |  - CTEST_OUTPUT_ON_FAILURE=1 make test
52 |  - make coveralls
53 | branches:
54 |   only:
55 |   - master
56 |   - develop
57 | after_success:
58 |   - bash <(curl -s https://codecov.io/bash)
59 |   - ## the following automatically builds the doxygen
60 |   - ## documentation and pushes it to the gh_pages branch
61 |   - ## Shamelessly stolen from http://bit.ly/1H1sawW
62 |   -
63 |   - # First, set up credentials using the environment variables
64 |   - # GIT_NAME, GIT_EMAIL and GH_TOKEN. These were passed
65 |   - # encrypted to travis and should have been decrypted
66 |   - # using travis' private key before this script was run.
67 |   - git config --global user.name "ROSS bot"
68 |   - git config --global user.email ross.gh.robot@gmail.com
69 |   -
70 |   - # clone the whole repo again, but switch to gh_pages branch
71 |   - git clone -b master --single-branch https://github.com/ross-org/ross-org.github.io
72 |   - cd ross-org.github.io
73 |   - git clone -b master --single-branch https://github.com/ross-org/ROSS
74 |   - cd ROSS
75 |   - mkdir build && cd build
76 |   - cmake -DROSS_BUILD_DOXYGEN=ON -DDOXYGEN_CALLER_GRAPHS=ON -DDOXYGEN_CALL_GRAPHS=ON ..
77 |   - make apidoc
78 |   - cd ../..
79 |   - git rm -r ROSS-docs
80 |   - mkdir -p ROSS-docs/docs
81 |   - mv ROSS/build/docs/html ROSS-docs/docs
82 |   - git add ROSS-docs
83 |   - git commit -m "Automatic doxygen build."
84 |   - git push https://${GH_TOKEN}@github.com/ROSS-org/ross-org.github.io master
85 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | PROJECT(ROSS_TOP C)
  2 | CMAKE_MINIMUM_REQUIRED(VERSION 3.5)
  3 | 
  4 | SET(CMAKE_POSITION_INDEPENDENT_CODE ON)
  5 | 
  6 | # ROSS Configuration Options
  7 | 
  8 | ENABLE_TESTING()
  9 | INCLUDE(CTest)
 10 | 
 11 | LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/core/cmake/")
 12 | 
 13 | # Follow section based on Spack doc: 
 14 | # https://spack.readthedocs.io/en/latest/workflows.html#write-the-cmake-build
 15 | # enable @rpath in the install name for any shared library being built
 16 | # note: it is planned that a future version of CMake will enable this by default
 17 | set(CMAKE_MACOSX_RPATH 1)
 18 | 
 19 | # Always use full RPATH
 20 | # http://www.cmake.org/Wiki/CMake_RPATH_handling
 21 | # http://www.kitware.com/blog/home/post/510
 22 | 
 23 | # use, i.e. don't skip the full RPATH for the build tree
 24 | SET(CMAKE_SKIP_BUILD_RPATH  FALSE)
 25 | 
 26 | # when building, don't use the install RPATH already
 27 | # (but later on when installing)
 28 | SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
 29 | 
 30 | # add the automatically determined parts of the RPATH
 31 | # which point to directories outside the build tree to the install RPATH
 32 | SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 33 | 
 34 | # the RPATH to be used when installing, but only if it's not a system directory
 35 | LIST(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir)
 36 | IF("${isSystemDir}" STREQUAL "-1")
 37 |    SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
 38 | ENDIF("${isSystemDir}" STREQUAL "-1")
 39 | 
 40 | # end of spack-related addition
 41 | 
 42 | # We probably don't want this to run on every build.
 43 | option(COVERALLS "Generate coveralls data" OFF)
 44 | 
 45 | if (COVERALLS)
 46 |     include(Coveralls)
 47 |     coveralls_turn_on_coverage()
 48 | endif()
 49 | 
 50 | # Priority Queue Implementation
 51 | SET(QUEUE splay)
 52 | # Other queue implementations are no longer supported.
 53 | # SET(QUEUE splay CACHE STRING "Queue type chosen by the user at configure time")
 54 | # SET_PROPERTY(CACHE QUEUE PROPERTY STRINGS splay calendar heap kp_splay)
 55 | 
 56 | # Random Library
 57 | SET(RAND clcg4)
 58 | 
 59 | # Network option
 60 | # tcp option removed
 61 | SET(NETWORK mpi)
 62 | 
 63 | # GVT algorithm option
 64 | #SET(GVT 7oclock)
 65 | SET(GVT mpi_allreduce)
 66 | 
 67 | # Architecture setting and management
 68 | SET(VALID_ARCH NO)
 69 | OPTION(ROSS_CLOCK_OVERRIDE "override platform detection to use gtod clock" NO)
 70 | 
 71 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386)
 72 |   SET(VALID_ARCH YES)
 73 |   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall")
 74 |   ADD_DEFINITIONS(-D_GNU_SOURCE)
 75 |   SET(CLOCK i386)
 76 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386)
 77 | 
 78 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgl)
 79 |   SET(VALID_ARCH YES)
 80 |   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -qflag=i:i -qattr=full -O5")
 81 |   SET(OPTIONS "-qtune=440 -qarch=440d")
 82 |   SET(CLOCK bgl)
 83 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgl)
 84 | 
 85 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgp)
 86 |   SET(VALID_ARCH YES)
 87 |   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -qflag=i:i -qattr=full -O5")
 88 |   SET(OPTIONS "-qtune=450 -qarch=450d")
 89 |   SET(CLOCK bgl)
 90 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgp)
 91 | 
 92 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgq)
 93 |   SET(VALID_ARCH YES)
 94 |   OPTION(USE_BGPM "Blue Gene/Q specific Performance Counters" OFF)
 95 |   SET(BGPM_INCLUDE /bgsys/drivers/ppcfloor/bgpm/include/)
 96 |   ADD_LIBRARY(imp_bgpm STATIC IMPORTED)
 97 |   SET_PROPERTY(TARGET imp_bgpm PROPERTY IMPORTED_LOCATION /bgsys/drivers/ppcfloor/bgpm/lib/libbgpm.a)
 98 |   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${BGPM_INCLUDE} -O5 -qstrict -qprefetch=aggressive -qarch=qp -qtune=qp -qmaxmem=-1 -qsimd=noauto -qreport -qhot")
 99 |   SET(CLOCK bgq)
100 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL bgq)
101 | 
102 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL ppc64)
103 |   SET(VALID_ARCH YES)
104 |   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall")
105 |   ADD_DEFINITIONS(-D_GNU_SOURCE)
106 |   SET(CLOCK ppc)
107 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL ppc64)
108 | 
109 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL ppc64le)
110 |   SET(VALID_ARCH YES)
111 |   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O5 -qprefetch=aggressive -qarch=pwr9 -qtune=auto -qmaxmem=-1 -qsimd=noauto -qhot")
112 |   SET(CLOCK ppc64le)
113 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL ppc64le)
114 | 
115 | IF((${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64) OR (${CMAKE_SYSTEM_PROCESSOR} STREQUAL amd64))
116 |   SET(VALID_ARCH YES)
117 |   SET(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}")
118 |   ADD_DEFINITIONS(-D_GNU_SOURCE)
119 |   SET(CLOCK amd64)
120 | ENDIF((${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64) OR (${CMAKE_SYSTEM_PROCESSOR} STREQUAL amd64))
121 | 
122 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL aarch64)
123 |   SET(VALID_ARCH YES)
124 |   SET(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}")
125 |   ADD_DEFINITIONS(-D_GNU_SOURCE)
126 |   SET(CLOCK aarch64)
127 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL aarch64)
128 | 
129 | IF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL armv7l)
130 |   SET(VALID_ARCH YES)
131 |   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
132 |   ADD_DEFINITIONS(-D_GNU_SOURCE)
133 |   SET(CLOCK armv7l)
134 | ENDIF(${CMAKE_SYSTEM_PROCESSOR} STREQUAL armv7l)
135 | 
136 | IF(VALID_ARCH AND NOT ${ROSS_CLOCK_OVERRIDE})
137 |   MESSAGE(STATUS "System architecture detected: ${CMAKE_SYSTEM_PROCESSOR}")
138 |   MESSAGE(STATUS "Using C_FLAGS: ${CMAKE_C_FLAGS}")
139 |   MESSAGE(STATUS "Using CLOCK: ${CLOCK}")
140 | ELSE(VALID_ARCH AND NOT ${ROSS_CLOCK_OVERRIDE})
141 |   MESSAGE(WARNING "System architecture not recognized!\n"
142 |     "Found: ${CMAKE_SYSTEM_PROCESSOR}\n"
143 |     "Falling back to get-time-of-day clock implementation.")
144 |   SET(CLOCK gtod)
145 | ENDIF(VALID_ARCH AND NOT ${ROSS_CLOCK_OVERRIDE})
146 | 
147 | 
148 | ## MPI
149 | INCLUDE(SetupMPI)
150 | IF(MPI_C_FOUND)
151 |         INCLUDE_DIRECTORIES(${MPI_C_INCLUDE_PATH})
152 |         LIST(APPEND ROSS_EXTERNAL_LIBS ${MPI_C_LIBRARIES})
153 | ELSE(MPI_C_FOUND)
154 |         MESSAGE("WARNING: Could not find MPI!")
155 |         MESSAGE("         Either add an MPI compiler to your path (using modules)")
156 |         MESSAGE("         Or force CMake to build using the correct compiler (`export CC=mpicc`)")
157 | ENDIF(MPI_C_FOUND)
158 | 
159 | # ROSS Core code
160 | ADD_SUBDIRECTORY(core)
161 | 
162 | # ROSS Models directory
163 | OPTION(ROSS_BUILD_MODELS "Build ROSS models?" OFF)
164 | IF(ROSS_BUILD_MODELS)
165 |   ADD_SUBDIRECTORY(models)
166 | ENDIF(ROSS_BUILD_MODELS)
167 | 
168 | # ROSS Documentation
169 | FIND_PACKAGE(Doxygen)
170 | IF(DOXYGEN_FOUND)
171 |   OPTION(ROSS_BUILD_DOXYGEN "Build Doxygen documentation?" OFF)
172 |   IF(ROSS_BUILD_DOXYGEN)
173 |     ADD_SUBDIRECTORY(docs)
174 |   ENDIF(ROSS_BUILD_DOXYGEN)
175 | ENDIF(DOXYGEN_FOUND)
176 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013, Rensselaer Polytechnic Institute
 2 | All rights reserved.
 3 | 
 4 | Redistribution and  use in  source and binary  forms, with  or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |   Redistributions  of  source code  must  retain  the above  copyright
 9 |   notice, this list of conditions and the following disclaimer.
10 | 
11 |   Redistributions in  binary form  must reproduce the  above copyright
12 |   notice, this list of conditions  and the following disclaimer in the
13 |   documentation and/or other materials provided with the distribution.
14 | 
15 |   Neither the  name of Rensselaer Polytechnic Institute  nor the names
16 |   of  its contributors  may be  used  to endorse  or promote  products
17 |   derived   from  this   software  without   specific   prior  written
18 |   permission.
19 | 
20 | THIS SOFTWARE  IS PROVIDED BY  THE COPYRIGHT HOLDERS  AND CONTRIBUTORS
21 | "AS  IS" AND  ANY EXPRESS  OR IMPLIED  WARRANTIES, INCLUDING,  BUT NOT
22 | LIMITED TO, THE IMPLIED  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE  ARE DISCLAIMED. IN NO EVENT  SHALL THE COPYRIGHT
24 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL  DAMAGES (INCLUDING,  BUT  NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF  LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY,  OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Welcome to Simplified ROSS!
 2 | 
 3 | Welcome to a leaner, meaner, *faster* version of ROSS.
 4 | While the entire history of ROSS has been preserved in this repository, a major change in the directory structure has made getting the full history of a file somewhat of a pain.
 5 | You may find the now-deprecated version at the [ROSS-Legacy tag](https://github.com/ROSS-org/ROSS/releases/tag/Legacy) in this repository.
 6 | Using this repository you can compare files from the new `ROSS/core` to `ROSS/ross`.
 7 | For a detailed list of changes between old ROSS and SR please visit [the wiki](https://github.com/ROSS-org/ROSS/wiki/Differences-between-Simplified-ROSS-and-ROSS-Legacy).
 8 | 
 9 | For the most recent docs and other important posts about ROSS, see the [ROSS webpage](http://ross-org.github.io).
10 | 
11 | [![Build Status](https://travis-ci.com/ROSS-org/ROSS.svg?branch=master)](https://travis-ci.com/ROSS-org/ROSS)
12 | [![codecov.io](http://codecov.io/github/ROSS-org/ROSS/coverage.svg?branch=master)](http://codecov.io/github/ROSS-org/ROSS?branch=master)
13 | [![Doxygen](https://img.shields.io/badge/doxygen-reference-blue.svg)](http://ross-org.github.io/ROSS-docs/docs/html)
14 | 
15 | ## History
16 | 
17 | ROSS's history starts with a one-week re-implementation of [Georgia Tech Time Warp (GTW)](http://www.cc.gatech.edu/computing/pads/tech-parallel-gtw.html) by Shawn Pearce and Dave Bauer in 1999.
18 | After 10 years of in-house development, version 5.0 of [Rensselaer's Optimistic Simulation System](http://sourceforge.net/projects/pdes/) went live at SourceForge.net.
19 | Thus the official version history began!
20 | 
21 | Through the years ROSS has migrated from CVS, to SVN, to Git and GitHub.com.
22 | The code was maintained by Chris Carothers and his graduate students at RPI ([publications](http://cs.rpi.edu//~chrisc/#publications)).
23 | Over the years, several features (including a shared-memory version) were implemented within ROSS.
24 | Some of these features have since been optimized out, leaving behind cruft.
25 | 
26 | In early 2015 a sleeker version of ROSS was released.
27 | Developed as Simplified ROSS ([gonsie/SR](http://github.com/gonsie/SR)), this version removed many files, functions, and variables that had become deprecated over time.
28 | 
29 | ## Requirements
30 | 
31 | 1. ROSS is written in C standard and thus requires a C compiler (C11 is prefered, but not required).
32 | 2. The build system is [CMake](http://cmake.org), and we require version 3.5 or higher.
33 | 3. ROSS relies on MPI.
34 |    We recommend the [MPICH](http://www.mpich.org) implementation.
35 | 
36 | ## Startup Instructions
37 | 
38 | 1. Clone the repository to your local machine:
39 |   ```
40 |   git clone -b master --single-branch git@github.com:ROSS-org/ROSS.git
41 |   cd ROSS
42 |   ```
43 |   Since the ROSS repostiory is quite large, it is recommended that you only clone the master branch.
44 |   To speed up the clone command even more, use the `--depth=1` argument.
45 | 
46 | 2. *Optional* Install the submodules:
47 |   ```
48 |   git submodule init
49 |   git submodule update
50 |   ```
51 |   Currently, ROSS includes one submodule:
52 |   - [RISA](https://github.com/ROSS-org/RISA) ROSS In Situ Analysis
53 | 
54 | 3. *Optional* Symlink your model to ROSS.
55 | Please [this blog post](https://ross-org.github.io/setup/build-model-with-ross.html) for details about creating and integrating a model with ROSS.
56 |   ```
57 |   ln -s ~/path-to/your-existing-model models/your-model-name
58 |   ```
59 | 
60 | 4. Create a build directory.
61 | ROSS developers typically do out-of-tree builds.  See the [Installation page](https://ross-org.github.io/setup/installation.html) for more details.
62 |   ```
63 |   cd ~/directory-of-builds/
64 |   mkdir ROSS-build
65 |   cd ROSS-build
66 |   ccmake ~/path-to/ROSS
67 |   ```
68 | 
69 | 5. Make your model(s) with one of the following commands
70 |   ```
71 |   make -k         // ignore errors from other models
72 |   make -j 12      // parallel build
73 |   make model-name // build only one model
74 |   ```
75 | 
76 | 6. Run your model.
77 | See [this blog post](https://ross-org.github.io/setup/running-sim.html) for details about the ROSS command line options.
78 |   ```
79 |   cd ~/directory-of-builds/ROSS-build/models/your-model
80 |   ./your-model --synch=1               // sequential mode
81 |   mpirun -np 2 ./your-model --synch=2  // conservative mode
82 |   mpirun -np 2 ./your-model --synch=3  // optimistic mode
83 |   ./your-model --synch=4               // optimistic debug mode (note: not a parallel execution!)
84 |   ```
85 | 


--------------------------------------------------------------------------------
/conf/travis-install-mpi.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # this conf file is taken from the mpi4py project
 3 | # http://mpi4py.scipy.org/
 4 | set -e
 5 | case $1 in
 6 |   mpich1) set -x;
 7 |     sudo apt-get install -q gfortran mpich-shmem-bin libmpich-shmem1.0-dev;;
 8 |   mpich2) set -x;
 9 |     sudo apt-get install -q gfortran mpich2 libmpich2-dev;;
10 |   mpich3) set -x;
11 |     sudo apt-get install -q gfortran libcr0 default-jdk;
12 |     wget -q http://www.cebacad.net/files/mpich/ubuntu/mpich-3.1/mpich_3.1-1ubuntu_amd64.deb;
13 |     sudo dpkg -i ./mpich_3.1-1ubuntu_amd64.deb;
14 |     rm -f ./mpich_3.1-1ubuntu_amd64.deb;;
15 |   openmpi) set -x;
16 |     sudo apt-get install -q gfortran openmpi-bin openmpi-common libopenmpi-dev;;
17 |   *)
18 |     echo "Unknown MPI implementation:" $1; exit 1;;
19 | esac
20 | 


--------------------------------------------------------------------------------
/core/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | PROJECT(ROSS C)
  2 | INCLUDE_DIRECTORIES(${ROSS_SOURCE_DIR} ${ROSS_BINARY_DIR})
  3 | 
  4 | SET(ross_srcs
  5 | 
  6 | queue/tw-queue.h
  7 | queue/${QUEUE}.c
  8 | 
  9 | ross-random.h
 10 | ross-random.c
 11 | rand-${RAND}.h
 12 | rand-${RAND}.c
 13 | 
 14 | clock/${CLOCK}.h
 15 | clock/${CLOCK}.c
 16 | 
 17 | ross-gvt.h
 18 | gvt/${GVT}.h
 19 | gvt/${GVT}.c
 20 | 
 21 | network-${NETWORK}.h
 22 | network-${NETWORK}.c
 23 | 
 24 | hash-quadratic.h
 25 | hash-quadratic.c
 26 | 
 27 | buddy.h
 28 | buddy.c
 29 | lz4.h
 30 | lz4.c
 31 | tw-state.c
 32 | 
 33 | ross-extern.h
 34 | ross-global.c
 35 | 
 36 | ross-inline.h
 37 | ross-kernel-inline.h
 38 | ross.h
 39 | ross-types.h
 40 | 
 41 | tw-eventq.h
 42 | tw-event.c
 43 | 
 44 | tw-opts.h
 45 | tw-opts.c
 46 | 
 47 | tw-pe.c
 48 | tw-kp.c
 49 | tw-lp.c
 50 | 
 51 | tw-timing.h
 52 | tw-timing.c
 53 | 
 54 | tw-sched.c
 55 | tw-setup.c
 56 | tw-stats.c
 57 | tw-util.c
 58 | 
 59 | instrumentation/st-instrumentation.h
 60 | instrumentation/st-instrumentation.c
 61 | instrumentation/st-stats-buffer.c
 62 | instrumentation/st-sim-engine.c
 63 | instrumentation/st-event-trace.c
 64 | instrumentation/st-model-data.c
 65 | instrumentation/ross-lps/analysis-lp.h
 66 | instrumentation/ross-lps/analysis-lp.c
 67 | instrumentation/ross-lps/specialized-lps.c)
 68 | 
 69 | # ROSS VERSION INFORMATION
 70 | ## Print Date and Time at top of ROSS output
 71 | INCLUDE (CheckFunctionExists)
 72 | CHECK_FUNCTION_EXISTS(ctime HAVE_CTIME)
 73 | 
 74 | ## Print ROSS Git Hash
 75 | # From http://stackoverflow.com/questions/1435953/how-can-i-pass-git-sha1-to-compiler-as-definition-using-cmake
 76 | # Now following this approach (which is based on the previous):
 77 | # http://ipenguin.ws/2012/11/cmake-automatically-use-git-tags-as.html
 78 | # This way lets us use the actual version numbers of ROSS, instead of the git commit
 79 | LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
 80 | INCLUDE(GetGitRevisionDescription)
 81 | #GET_GIT_HEAD_REVISION(GIT_REFSPEC GIT_SHA1)
 82 | 
 83 | # changed to look at the working tree and not the latest commit hash -- allows for use of --dirty.
 84 | git_describe_working_tree(VERSION --tags --dirty)
 85 | message(STATUS "ROSS VERSION=${VERSION}")
 86 | 
 87 | #parse the version information into pieces.
 88 | string(REGEX REPLACE "^v([0-9]+)\\..*" "\\1" VERSION_MAJOR "${VERSION}")
 89 | string(REGEX REPLACE "^v[0-9]+\\.([0-9]+).*" "\\1" VERSION_MINOR "${VERSION}")
 90 | string(REGEX REPLACE "^v[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" VERSION_PATCH "${VERSION}")
 91 | string(REGEX REPLACE "^v[0-9]+\\.[0-9]+\\.[0-9]+(.*)" "\\1" VERSION_SHA1 "${VERSION}")
 92 | # VERSION_SHORT used in the ross.pc file
 93 | set(VERSION_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
 94 | 
 95 | 
 96 | # Data Structure for remote Events
 97 | # If AVL_TREE is OFF, ROSS reverts to hashing
 98 | OPTION(AVL_TREE "Use AVL trees for optimistic mode events? (hash tables otherwise)" ON)
 99 | IF(AVL_TREE)
100 |   SET(ross_srcs ${ross_srcs} avl_tree.h avl_tree.c)
101 | ENDIF(AVL_TREE)
102 | 
103 | # RIO: Restart IO
104 | OPTION(USE_RIO "Enable RIO checkpointing library?" OFF)
105 | IF(USE_RIO)
106 |   SET(ross_srcs ${ross_srcs} rio/io-mpi.c rio/io-serialize.c)
107 |   INCLUDE_DIRECTORIES(rio)
108 | ENDIF(USE_RIO)
109 | 
110 | # Damaris I/O and data management
111 | OPTION(USE_DAMARIS "Build with Damaris library (for in situ vis/analysis)?" OFF)
112 | if (USE_DAMARIS)
113 |     ADD_SUBDIRECTORY(risa)
114 |     INCLUDE_DIRECTORIES(${DAMARIS_INCLUDE})
115 |     SET(ross_srcs ${ross_srcs} ${ROSS_Damaris_SOURCE_DIR}/core/damaris.h)
116 | ENDIF(USE_DAMARIS)
117 | 
118 | # Use deterministic unbiased RNG tiebreaker for event ties
119 | OPTION(USE_RAND_TIEBREAKER "Build with deterministic unbiased tiebreaker for event ties" ON)
120 | 
121 | # Use debugging-friendly memory allocation
122 | OPTION(ROSS_ALLOC_DEBUG "Use naive allocator to be more friendly to memory debugging tools" OFF)
123 | 
124 | OPTION(RAND_NORMAL "Turn on state for normal distribution" ON)
125 | 
126 | # Show timing data at end of run
127 | OPTION(ROSS_timing "Perform ROSS timings" ON)
128 | 
129 | # Used by ross-kernel-inline.h
130 | OPTION(ROSS_runtime_checks "Perform ID checks" OFF)
131 | IF(CMAKE_BUILD_TYPE MATCHES Debug)
132 | 	SET(ROSS_runtime_checks ON CACHE BOOL "Perform ID checks" FORCE)
133 | ENDIF(CMAKE_BUILD_TYPE MATCHES Debug)
134 | 
135 | # Set all options
136 | #SET(OPTIONS "${OPTIONS} -DROSS_QUEUE_${QUEUE} -DROSS_RAND_${RAND} -DROSS_NETWORK_mpi -DROSS_CLOCK_${CLOCK} -DROSS_GVT_${GVT} -DARCH_${ARCH}")
137 | #SET(OPTIONS "${OPTIONS} -DROSS_QUEUE_${QUEUE} -DROSS_RAND_${RAND} -DROSS_NETWORK_mpi -DROSS_CLOCK_${CLOCK} -DROSS_GVT_${GVT} -DARCH_${ARCH}" PARENT_SCOPE)
138 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DROSS_OPTION_LIST='\"${OPTIONS}\"'")
139 | 
140 | # Generate Library
141 | OPTION(ROSS_BUILD_SHARED_LIBS "Build shared libraries instead of static" OFF)
142 | SET(BUILD_SHARED_LIBS ${ROSS_BUILD_SHARED_LIBS})
143 | ADD_LIBRARY(ROSS ${ross_srcs})
144 | SET_TARGET_PROPERTIES(ROSS PROPERTIES OUTPUT_NAME ROSS)
145 | TARGET_LINK_LIBRARIES(ROSS ${ROSS_EXTERNAL_LIBS})
146 | TARGET_INCLUDE_DIRECTORIES(ROSS INTERFACE ${MPI_C_INCLUDE_PATH})
147 | 
148 | if (COVERALLS)
149 |     set(COVERAGE_SRCS ${ross_srcs})
150 |     # Don't check coverage on lz4
151 |     list(REMOVE_ITEM COVERAGE_SRCS lz4.h lz4.c)
152 | 
153 |     # Create the coveralls target.
154 |     coveralls_setup(
155 |         "${COVERAGE_SRCS}" # The source files.
156 |         OFF)               # If we should upload.
157 | 
158 | endif()
159 | 
160 | # Build Specific Config Header
161 | CONFIGURE_FILE(config.h.in config.h)
162 | SET(ross_srcs ${ross_srcs} config.h)
163 | SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in PROPERTIES GENERATED FALSE)
164 | SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_BINARY_DIR}/config.h PROPERTIES GENERATED TRUE)
165 | 
166 | 
167 | # CODES config bin
168 | SET(ROSS_CC $ENV{CC})
169 | SET(ROSS_CXX $ENV{CXX})
170 | CONFIGURE_FILE(ross-config.in ross-config @ONLY)
171 | CONFIGURE_FILE(ross.pc.in ross.pc @ONLY)
172 | SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/ross.pc.in PROPERTIES GENERATED FALSE)
173 | SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_BINARY_DIR}/ross.pc PROPERTIES GENERATED TRUE)
174 | 
175 | 
176 | # Make Install
177 | INSTALL(FILES ${ROSS_BINARY_DIR}/ross-config DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
178 | INSTALL(FILES ${ROSS_BINARY_DIR}/config.h DESTINATION include)
179 | INSTALL(DIRECTORY ${ROSS_SOURCE_DIR}/ DESTINATION include FILES_MATCHING PATTERN "*.h")
180 | INSTALL(TARGETS ROSS EXPORT ROSS-targets DESTINATION lib)
181 | INSTALL(EXPORT ROSS-targets DESTINATION lib)
182 | INSTALL(FILES ROSSConfig.cmake DESTINATION lib)
183 | INSTALL(FILES ${ROSS_BINARY_DIR}/ross.pc DESTINATION lib/pkgconfig)
184 | 


--------------------------------------------------------------------------------
/core/ROSSConfig.cmake:
--------------------------------------------------------------------------------
1 | GET_FILENAME_COMPONENT(SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
2 | INCLUDE(${SELF_DIR}/ROSS-targets.cmake)
3 | GET_FILENAME_COMPONENT(ROSS_INCLUDE_DIRS "${SELF_DIR}/../include" ABSOLUTE)
4 | 


--------------------------------------------------------------------------------
/core/avl_tree.h:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | /* Copied and modified from http://pine.cs.yale.edu/pinewiki/C/AvlTree google cache */
 4 | 
 5 | /* implementation of an AVL tree with explicit heights */
 6 | 
 7 | struct avlNode {
 8 |   struct avlNode *child[2];    /* left and right */
 9 |   tw_event *key;
10 |   int height;
11 |   struct avlNode *next;        /* for ROSS weird linked-list memory */
12 | };
13 | 
14 | /* empty avl tree is just a null pointer */
15 | 
16 | #define AVL_EMPTY (0)
17 | 
18 | /* free a tree */
19 | void avlDestroy(AvlTree t);
20 | 
21 | /* return the height of a tree */
22 | int avlGetHeight(AvlTree t);
23 | 
24 | /* return nonzero if key is present in tree */
25 | int avlSearch(AvlTree t, tw_event *key);
26 | 
27 | /* insert a new element into a tree */
28 | /* note *t is actual tree */
29 | void avlInsert(AvlTree *t, tw_event *key);
30 | 
31 | /* run sanity checks on tree (for debugging) */
32 | /* assert will fail if heights are wrong */
33 | void avlSanityCheck(AvlTree t);
34 | 
35 | /* print all keys of the tree in order */
36 | void avlPrintKeys(AvlTree t);
37 | 
38 | /* delete and return minimum value in a tree */
39 | tw_event * avlDeleteMin(AvlTree *t);
40 | 
41 | tw_event * avlDelete(AvlTree *t, tw_event *key);
42 | 
43 | AvlTree avl_alloc(void);
44 | 
45 | void avl_free(AvlTree t);
46 | 


--------------------------------------------------------------------------------
/core/buddy.h:
--------------------------------------------------------------------------------
 1 | #ifndef BUDDY_H
 2 | #define BUDDY_H
 3 | 
 4 | #include <sys/queue.h>
 5 | 
 6 | /**
 7 |  * @file buddy.h
 8 |  * @brief Buddy-system memory allocator
 9 |  */
10 | 
11 | typedef enum purpose { FREE, USED } purpose_t;
12 | 
13 | #define BUDDY_ALIGN_PREF (32 - 2 * sizeof(void*) - sizeof(uint32_t) - sizeof(purpose_t))
14 | 
15 | /**
16 |  * Metadata about this particular block
17 |  * (and stored at the beginning of this block).
18 |  * One per allocated block of memory.
19 |  * Should be 32 bytes to not screw up alignment.
20 |  */
21 | typedef struct buddy_list
22 | {
23 |     // Should be two pointers
24 |     LIST_ENTRY(buddy_list) next_freelist;
25 |     uint32_t size;
26 |     purpose_t use;
27 |     char padding[BUDDY_ALIGN_PREF];
28 | } buddy_list_t;
29 | 
30 | typedef enum valid { VALID, INVALID } valid_t;
31 | 
32 | /**
33 |  * Bucket of 2^order sized free memory blocks.
34 |  */
35 | typedef struct buddy_list_bucket
36 | {
37 |     LIST_HEAD(buddy_list_head, buddy_list) ptr;
38 |     unsigned int count;
39 |     unsigned int order;
40 |     valid_t is_valid;
41 | } buddy_list_bucket_t;
42 | 
43 | buddy_list_bucket_t * create_buddy_table(unsigned int power_of_two);
44 | void *buddy_alloc(unsigned size);
45 | void buddy_free(void *ptr);
46 | 
47 | #endif /* BUDDY_H */
48 | 


--------------------------------------------------------------------------------
/core/clock/aarch64.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 |   
 3 | #ifndef __GNUC__
 4 | #  error gcc asm extensions required
 5 | #endif
 6 | #if ! (defined(__aarch64__))
 7 | #  error only aarch64 platform supported
 8 | #endif
 9 | 
10 | /*
11 |  * Does same stuff as the amd64, but uses  cntvct_el0
12 |  */
13 | static const tw_optdef clock_opts [] =
14 | {
15 |         TWOPT_GROUP("ROSS Timing"),
16 |         TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"),
17 |         TWOPT_END()
18 | };
19 | 
20 | const tw_optdef *tw_clock_setup(void)
21 | {
22 |         return clock_opts;
23 | }
24 | 
25 | 
26 | 
27 | void
28 | tw_clock_init(tw_pe * me)
29 | {
30 |         me->clock_time = 0;
31 |         me->clock_offset = tw_clock_read();
32 | }
33 | 
34 | tw_clock
35 | tw_clock_now(tw_pe * me)
36 | {
37 |         me->clock_time = tw_clock_read() - me->clock_offset;
38 |         return me->clock_time;
39 | }
40 | 


--------------------------------------------------------------------------------
/core/clock/aarch64.h:
--------------------------------------------------------------------------------
 1 | #ifndef INC_clock_aarch64
 2 | #define INC_clock_aarch64
 3 | 
 4 | typedef uint64_t tw_clock;
 5 | 
 6 | static inline tw_clock  tw_clock_read(void)
 7 | {
 8 |         tw_clock result=0;
 9 | #ifdef ROSS_timing
10 |        asm volatile ("mrs %0, cntvct_el0" : "=r" (result));
11 | #endif
12 |         return result;
13 | }
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/core/clock/amd64.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | #ifndef __GNUC__
 4 | #  error gcc asm extensions required
 5 | #endif
 6 | #if ! (defined(__amd64__) || defined(__x86_64__))
 7 | #  error only amd64 platform supported
 8 | #endif
 9 | 
10 | /*
11 |  * Our function below calls the "rdtsc" x86 assembly language function
12 |  * to obtain the current clock cycle value.
13 |  */
14 | static const tw_optdef clock_opts [] =
15 | {
16 | 	TWOPT_GROUP("ROSS Timing"),
17 | 	TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"),
18 | 	TWOPT_END()
19 | };
20 | 
21 | const tw_optdef *tw_clock_setup(void)
22 | {
23 | 	return clock_opts;
24 | }
25 | 
26 | 
27 | 
28 | void
29 | tw_clock_init(tw_pe * me)
30 | {
31 | 	me->clock_time = 0;
32 | 	me->clock_offset = tw_clock_read();
33 | }
34 | 
35 | tw_clock
36 | tw_clock_now(tw_pe * me)
37 | {
38 | 	me->clock_time = tw_clock_read() - me->clock_offset;
39 | 	return me->clock_time;
40 | }
41 | 


--------------------------------------------------------------------------------
/core/clock/amd64.h:
--------------------------------------------------------------------------------
 1 | #ifndef INC_clock_amd64
 2 | #define INC_clock_amd64
 3 | 
 4 | typedef uint64_t tw_clock;
 5 | 
 6 | static inline tw_clock  tw_clock_read(void)
 7 | {
 8 | 	tw_clock result=0;
 9 | #ifdef ROSS_timing
10 | 	unsigned a, d; 
11 | 
12 | 	do {
13 | 		__asm__ __volatile__("rdtsc" : "=a" (a), "=d" (d)); 
14 | 		result = ((uint64_t)a) | (((uint64_t)d) << 32);
15 | 	} while (__builtin_expect ((int) result == -1, 0));
16 | #endif
17 | 	return result;
18 | }
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/core/clock/armv7l.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | 	This implementation of an ARM v7 clock reader utilizes the
 3 | 	Performance Monitoring Unit (PMU) on Cortex-A7 chips.
 4 | 	Unfortunately, access to the cycle counter from userspace
 5 | 	is disabled by default. A kernel module that enables access
 6 | 	from userspace is required or the system will fault.
 7 | 
 8 | 	An example kernel module that does just that can be found:
 9 | 	https://github.com/nmcglohon/armv7l-userspace-counter.git
10 | 
11 | 	More information can be found:
12 | 	http://neocontra.blogspot.com/2013/05/user-mode-performance-counters-for.html
13 |  */
14 | 
15 | #include <ross.h>
16 | 
17 | #ifndef __GNUC__
18 | #  error gcc asm extensions required
19 | #endif
20 | #if ! (defined(__arm__))
21 | #  error only 32 bit arm platform supported
22 | #endif
23 | 
24 | static const tw_optdef clock_opts [] =
25 | {
26 | 	TWOPT_GROUP("ROSS Timing"),
27 | 	TWOPT_STIME("clock-rate", g_tw_clock_rate, "CPU Clock Rate"),
28 | 	TWOPT_END()
29 | };
30 | 
31 | const tw_optdef *tw_clock_setup(void)
32 | {
33 | 	return clock_opts;
34 | }
35 | 
36 | 
37 | void tw_clock_init(tw_pe * me)
38 | {
39 | 	me->clock_time = 0;
40 | 	me->clock_offset = tw_clock_read();
41 | }
42 | 
43 | 
44 | tw_clock tw_clock_now(tw_pe * me)
45 | {
46 | 	me->clock_time = tw_clock_read() - me->clock_offset;
47 | 	return me->clock_time;
48 | }
49 | 


--------------------------------------------------------------------------------
/core/clock/armv7l.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | 	This implementation of an ARM v7 clock reader utilizes the
 3 | 	Performance Monitoring Unit (PMU) on Cortex-A7 chips.
 4 | 	Unfortunately, access to the cycle counter from userspace
 5 | 	is disabled by default. A kernel module that enables access
 6 | 	from userspace is required or the system will fault.
 7 | 
 8 | 	An example kernel module that does just that can be found:
 9 | 	https://github.com/nmcglohon/armv7l-userspace-counter.git
10 | 
11 | 	More information can be found:
12 | 	http://neocontra.blogspot.com/2013/05/user-mode-performance-counters-for.html
13 |  */
14 | 
15 | #ifndef INC_clock_armv7l
16 | #define INC_clock_armv7l
17 | 
18 | typedef unsigned int tw_clock;
19 | 
20 | static inline tw_clock  tw_clock_read(void)
21 | {
22 | 	unsigned int result;
23 | #ifdef ROSS_timing
24 | 	do {
25 | 		__asm__ __volatile__ ("MRC p15, 0, %0, c9, c13, 0" : "=r"(result));
26 | 	} while (__builtin_expect ((int) result == -1, 0));
27 | #endif
28 | 
29 | 	return result;
30 | }
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/core/clock/bgl.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | static const tw_optdef clock_opts [] =
 4 | {
 5 | 	TWOPT_GROUP("ROSS Timing"),
 6 | 	TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"),
 7 | 	TWOPT_END()
 8 | };
 9 | 
10 | const tw_optdef *tw_clock_setup(void)
11 | {
12 | 	return clock_opts;
13 | }
14 | 
15 | tw_clock
16 | tw_clock_read(void)
17 | {
18 | 	tw_clock	result = 0;
19 | #ifdef ROSS_timing
20 | 	unsigned long int upper, lower,tmp;
21 | 
22 | 	__asm__ volatile(
23 | 		"0:                  \n"
24 | 		"\tmftbu   %0           \n"
25 | 		"\tmftb    %1           \n"
26 | 		"\tmftbu   %2           \n"
27 | 		"\tcmpw    %2,%0        \n"
28 | 		"\tbne     0b         \n"
29 | 		: "=r"(upper),"=r"(lower),"=r"(tmp)
30 | 	);
31 | 
32 | 	result = upper;
33 | 	result = result<<32;
34 | 	result = result|lower;
35 | #endif
36 | 	return(result);
37 | }
38 | 
39 | void
40 | tw_clock_init(tw_pe * me)
41 | {
42 | 	me->clock_time = 0;
43 | 	me->clock_offset = tw_clock_read();
44 | }
45 | 
46 | tw_clock
47 | tw_clock_now(tw_pe * me)
48 | {
49 | 	me->clock_time = tw_clock_read() - me->clock_offset;
50 | 	return me->clock_time;
51 | }
52 | 


--------------------------------------------------------------------------------
/core/clock/bgl.h:
--------------------------------------------------------------------------------
1 | #ifndef INC_clock_bgl
2 | #define INC_clock_bgl
3 | 
4 | typedef unsigned long long tw_clock;
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/core/clock/bgq.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | static const tw_optdef clock_opts [] =
 4 | {
 5 | 	TWOPT_GROUP("ROSS Timing"),
 6 | 	TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"),
 7 | 	TWOPT_END()
 8 | };
 9 | 
10 | const tw_optdef *tw_clock_setup(void)
11 | {
12 | 	return clock_opts;
13 | }
14 | 
15 | tw_clock
16 | tw_clock_read(void)
17 | {
18 | 	tw_clock	result = 0;
19 | #ifdef ROSS_timing
20 |         result = GetTimeBase();
21 | #endif
22 | 	return(result);
23 | }
24 | 
25 | void
26 | tw_clock_init(tw_pe * me)
27 | {
28 | 	me->clock_time = 0;
29 | 	me->clock_offset = tw_clock_read();
30 | }
31 | 
32 | tw_clock
33 | tw_clock_now(tw_pe * me)
34 | {
35 | 	me->clock_time = tw_clock_read() - me->clock_offset;
36 | 	return me->clock_time;
37 | }
38 | 


--------------------------------------------------------------------------------
/core/clock/bgq.h:
--------------------------------------------------------------------------------
1 | #ifndef INC_clock_bgq
2 | #define INC_clock_bgq
3 | 
4 | #include<hwi/include/bqc/A2_inlines.h>
5 | 
6 | typedef unsigned long long tw_clock;
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/core/clock/gtod.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | extern unsigned long long g_tw_clock_rate;
 4 | 
 5 | static const tw_optdef clock_opts [] =
 6 | {
 7 |  TWOPT_GROUP("ROSS Timing"),
 8 |  TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"),
 9 |  TWOPT_END()
10 | };
11 | 
12 | const tw_optdef *tw_clock_setup(void)
13 | {
14 | 	return clock_opts;
15 | }
16 | 
17 | tw_clock tw_clock_read(void)
18 | {
19 | #ifdef ZERO_BASED
20 |   static volatile int inited = 0;
21 |   static volatile tw_clock base = 0;
22 | #else
23 |   const tw_clock base = 0;
24 | #endif
25 | 
26 |   const tw_clock scale = 1000000;
27 |   struct timeval tv;
28 |   gettimeofday(&tv,NULL);
29 | 
30 | #ifdef ZERO_BASED
31 |   if(inited == 0) {
32 |     base = ((tw_clock) tv.tv_sec)*scale + (tw_clock) tv.tv_usec;
33 |     inited = 1;
34 |   }
35 | #endif
36 | 
37 |   return
38 |     (((tw_clock) tv.tv_sec)*scale + (tw_clock) tv.tv_usec) - base;
39 | }
40 | 
41 | void
42 | tw_clock_init(tw_pe * me)
43 | {
44 | 	me->clock_time = 0;
45 | 	me->clock_offset = tw_clock_read();
46 | }
47 | 
48 | tw_clock
49 | tw_clock_now(tw_pe * me)
50 | {
51 | 	me->clock_time = tw_clock_read() - me->clock_offset;
52 | 	return me->clock_time;
53 | }
54 | 


--------------------------------------------------------------------------------
/core/clock/gtod.h:
--------------------------------------------------------------------------------
1 | #ifndef INC_clock_gtod
2 | #define INC_clock_gtod
3 | 
4 | typedef uint64_t tw_clock;
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/core/clock/i386.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | #ifndef __GNUC__
 4 | #  error gcc asm extensions required
 5 | #endif
 6 | #ifndef __i386__
 7 | #  error only i386 platform supported
 8 | #endif
 9 | 
10 | static const tw_optdef clock_opts [] =
11 | {
12 | 	TWOPT_GROUP("ROSS Timing"),
13 | 	TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"),
14 | 	TWOPT_END()
15 | };
16 | 
17 | const tw_optdef *tw_clock_setup(void)
18 | {
19 | 	return clock_opts;
20 | }
21 | 
22 | tw_clock tw_clock_read(void)
23 | {
24 | 	tw_clock result;
25 | 	do {
26 | 		__asm__ __volatile__("rdtsc" : "=A" (result)); 
27 | 	} while (__builtin_expect ((int) result == -1, 0));
28 | 	return result;
29 | }
30 | 
31 | void
32 | tw_clock_init(tw_pe * me)
33 | {
34 | 	me->clock_time = 0;
35 | 	me->clock_offset = tw_clock_read();
36 | }
37 | 
38 | tw_clock
39 | tw_clock_now(tw_pe * me)
40 | {
41 | 	me->clock_time = tw_clock_read() - me->clock_offset;
42 | 	return me->clock_time;
43 | }
44 | 


--------------------------------------------------------------------------------
/core/clock/i386.h:
--------------------------------------------------------------------------------
1 | #ifndef INC_clock_i386
2 | #define INC_clock_i386
3 | 
4 | typedef uint64_t tw_clock;
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/core/clock/ia64.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | #ifndef __GNUC__
 4 | #  error gcc asm extensions required
 5 | #endif
 6 | #ifndef __ia64__
 7 | #  error only ia64 platform supported
 8 | #endif
 9 | 
10 | static tw_clock tw_clock_read(void)
11 | {
12 | 	tw_clock result;
13 | 	do {
14 | 		__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
15 | 	} while (__builtin_expect ((int) result == -1, 0));
16 | 	return result;
17 | }
18 | 
19 | void
20 | tw_clock_init(tw_pe * me)
21 | {
22 | 	me->clock_time = 0;
23 | 	me->clock_offset = tw_clock_read();
24 | }
25 | 
26 | tw_clock
27 | tw_clock_now(tw_pe * me)
28 | {
29 | 	me->clock_time = tw_clock_read() - me->clock_offset;
30 | 	return me->clock_time;
31 | }
32 | 


--------------------------------------------------------------------------------
/core/clock/ia64.h:
--------------------------------------------------------------------------------
1 | #ifndef INC_clock_ia64
2 | #define INC_clock_ia64
3 | 
4 | typedef uint64_t tw_clock;
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/core/clock/ppc.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | #ifndef __GNUC__
 4 | #  error gcc asm extensions required
 5 | #endif
 6 | #if !(defined __ppc__ || defined __PPC__)
 7 | #  error only ppc platform supported
 8 | #endif
 9 | 
10 | tw_clock tw_clock_read(void)
11 | {
12 | 	unsigned long tbu;
13 | 	unsigned long tb1;
14 | 	unsigned long tbu1;
15 | 
16 | 	do {
17 | 		asm volatile(
18 | 			"mftbu %2\n\t"
19 | 			"mftb  %0\n\t"
20 | 			"mftbu %1\n\t"
21 | 		: "=r"(tb1), "=r"(tbu), "=r"(tbu1) );
22 | 	} while (tbu != tbu1);
23 | 
24 | 	return ( ((tw_clock)tbu) << 32 ) | tb1;
25 | }
26 | 
27 | void
28 | tw_clock_init(tw_pe * me)
29 | {
30 | 	me->clock_time = 0;
31 | 	me->clock_offset = tw_clock_read();
32 | }
33 | 
34 | tw_clock
35 | tw_clock_now(tw_pe * me)
36 | {
37 | 	me->clock_time = tw_clock_read() - me->clock_offset;
38 | 	return me->clock_time;
39 | }
40 | 


--------------------------------------------------------------------------------
/core/clock/ppc.h:
--------------------------------------------------------------------------------
1 | #ifndef INC_clock_ppc
2 | #define INC_clock_ppc
3 | 
4 | typedef uint64_t tw_clock;
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/core/clock/ppc64le.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | extern unsigned long long g_tw_clock_rate;
 4 | 
 5 | static const tw_optdef clock_opts [] =
 6 | {
 7 |  TWOPT_GROUP("ROSS Timing"),
 8 |  TWOPT_ULONGLONG("clock-rate", g_tw_clock_rate, "CPU Clock Rate"),
 9 |  TWOPT_END()
10 | };
11 | 
12 | const tw_optdef *tw_clock_setup(void)
13 | {
14 | 
15 |     // reset from default to 512MHz as that's the timebase for the POWER9 system.
16 |     g_tw_clock_rate = 512000000.0;
17 |     return clock_opts;
18 | }
19 | 
20 | tw_clock tw_clock_read(void)
21 | {
22 |   unsigned int tbl, tbu0, tbu1;
23 | 
24 |   do {
25 |     __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
26 |     __asm__ __volatile__ ("mftb %0" : "=r"(tbl));
27 |     __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
28 |   } while (tbu0 != tbu1);
29 | 
30 |   return (((unsigned long long)tbu0) << 32) | tbl;
31 | }
32 | 
33 | void
34 | tw_clock_init(tw_pe * me)
35 | {
36 |     me->clock_time = 0;
37 |     me->clock_offset = tw_clock_read();
38 | }
39 | 
40 | tw_clock
41 | tw_clock_now(tw_pe * me)
42 | {
43 |     me->clock_time = tw_clock_read() - me->clock_offset;
44 |     return me->clock_time;
45 | }
46 | 


--------------------------------------------------------------------------------
/core/clock/ppc64le.h:
--------------------------------------------------------------------------------
1 | #ifndef INC_clock_ppc64le
2 | #define INC_clock_ppc64le
3 | 
4 | typedef uint64_t tw_clock;
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/core/cmake/Coveralls.cmake:
--------------------------------------------------------------------------------
  1 | #
  2 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  3 | # of this software and associated documentation files (the "Software"), to deal
  4 | # in the Software without restriction, including without limitation the rights
  5 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  6 | # copies of the Software, and to permit persons to whom the Software is
  7 | # furnished to do so, subject to the following conditions:
  8 | #
  9 | # The above copyright notice and this permission notice shall be included in all
 10 | # copies or substantial portions of the Software.
 11 | #
 12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 13 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 14 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 16 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 17 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 18 | # SOFTWARE.
 19 | #
 20 | # Copyright (C) 2014 Joakim Söderberg <joakim.soderberg@gmail.com>
 21 | #
 22 | 
 23 | 
 24 | #
 25 | # Param _COVERAGE_SRCS	A list of source files that coverage should be collected for.
 26 | # Param _COVERALLS_UPLOAD Upload the result to coveralls?
 27 | #
 28 | function(coveralls_setup _COVERAGE_SRCS _COVERALLS_UPLOAD)
 29 | 
 30 | 	if (ARGC GREATER 2)
 31 | 		set(_CMAKE_SCRIPT_PATH ${ARGN})
 32 | 		message("Coveralls: Using alternate CMake script dir: ${_CMAKE_SCRIPT_PATH}")
 33 | 	else()
 34 | 		set(_CMAKE_SCRIPT_PATH ${PROJECT_SOURCE_DIR}/cmake)
 35 | 	endif()
 36 | 
 37 | 	if (NOT EXISTS "${_CMAKE_SCRIPT_PATH}/CoverallsClear.cmake")
 38 | 		message(FATAL_ERROR "Coveralls: Missing ${_CMAKE_SCRIPT_PATH}/CoverallsClear.cmake")
 39 | 	endif()
 40 | 
 41 | 	if (NOT EXISTS "${_CMAKE_SCRIPT_PATH}/CoverallsGenerateGcov.cmake")
 42 | 		message(FATAL_ERROR "Coveralls: Missing ${_CMAKE_SCRIPT_PATH}/CoverallsGenerateGcov.cmake")
 43 | 	endif()
 44 | 
 45 | 	# When passing a CMake list to an external process, the list
 46 | 	# will be converted from the format "1;2;3" to "1 2 3".
 47 | 	# This means the script we're calling won't see it as a list
 48 | 	# of sources, but rather just one long path. We remedy this
 49 | 	# by replacing ";" with "*" and then reversing that in the script
 50 | 	# that we're calling.
 51 | 	# http://cmake.3232098.n2.nabble.com/Passing-a-CMake-list-quot-as-is-quot-to-a-custom-target-td6505681.html
 52 | 	set(COVERAGE_SRCS_TMP ${_COVERAGE_SRCS})
 53 | 	set(COVERAGE_SRCS "")
 54 | 	foreach (COVERAGE_SRC ${COVERAGE_SRCS_TMP})
 55 | 		set(COVERAGE_SRCS "${COVERAGE_SRCS}*${COVERAGE_SRC}")
 56 | 	endforeach()
 57 | 
 58 | 	#message("Coverage sources: ${COVERAGE_SRCS}")
 59 | 	set(COVERALLS_FILE ${PROJECT_BINARY_DIR}/coveralls.json)
 60 | 
 61 | 	add_custom_target(coveralls_generate
 62 | 
 63 | 		# Zero the coverage counters.
 64 | 		COMMAND ${CMAKE_COMMAND}
 65 | 				-P "${_CMAKE_SCRIPT_PATH}/CoverallsClear.cmake"
 66 | 
 67 | 		# Run regress tests.
 68 | 		COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure
 69 | 
 70 | 		# Generate Gcov and translate it into coveralls JSON.
 71 | 		# We do this by executing an external CMake script.
 72 | 		# (We don't want this to run at CMake generation time, but after compilation and everything has run).
 73 | 		COMMAND ${CMAKE_COMMAND}
 74 | 				-DCOVERAGE_SRCS="${COVERAGE_SRCS}" # TODO: This is passed like: "a b c", not "a;b;c"
 75 | 				-DCOVERALLS_OUTPUT_FILE="${COVERALLS_FILE}"
 76 | 				-DCOV_PATH="${PROJECT_BINARY_DIR}"
 77 | 				-DPROJECT_ROOT="${PROJECT_SOURCE_DIR}"
 78 | 				-P "${_CMAKE_SCRIPT_PATH}/CoverallsGenerateGcov.cmake"
 79 | 
 80 | 		WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
 81 | 		COMMENT "Generating coveralls output..."
 82 | 		)
 83 | 
 84 | 	if (_COVERALLS_UPLOAD)
 85 | 		message("COVERALLS UPLOAD: ON")
 86 | 
 87 | 		find_program(CURL_EXECUTABLE curl)
 88 | 
 89 | 		if (NOT CURL_EXECUTABLE)
 90 | 			message(FATAL_ERROR "Coveralls: curl not found! Aborting")
 91 | 		endif()
 92 | 
 93 | 		add_custom_target(coveralls_upload
 94 | 			# Upload the JSON to coveralls.
 95 | 			COMMAND ${CURL_EXECUTABLE}
 96 | 					-S -F json_file=@${COVERALLS_FILE}
 97 | 					https://coveralls.io/api/v1/jobs
 98 | 
 99 | 			DEPENDS coveralls_generate
100 | 
101 | 			WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
102 | 			COMMENT "Uploading coveralls output...")
103 | 
104 | 		add_custom_target(coveralls DEPENDS coveralls_upload)
105 | 	else()
106 | 		message("COVERALLS UPLOAD: OFF")
107 | 		add_custom_target(coveralls DEPENDS coveralls_generate)
108 | 	endif()
109 | 
110 | endfunction()
111 | 
112 | macro(coveralls_turn_on_coverage)
113 | 	if(NOT (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
114 | 		AND (NOT "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang"))
115 | 		message(FATAL_ERROR "Coveralls: Compiler ${CMAKE_C_COMPILER_ID} is not GNU gcc! Aborting... You can set this on the command line using CC=/usr/bin/gcc CXX=/usr/bin/g++ cmake <options> ..")
116 | 	endif()
117 | 
118 | 	if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
119 | 		message(FATAL_ERROR "Coveralls: Code coverage results with an optimised (non-Debug) build may be misleading! Add -DCMAKE_BUILD_TYPE=Debug")
120 | 	endif()
121 | 
122 | 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
123 | 	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
124 | endmacro()
125 | 
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/core/cmake/CoverallsClear.cmake:
--------------------------------------------------------------------------------
 1 | #
 2 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 3 | # of this software and associated documentation files (the "Software"), to deal
 4 | # in the Software without restriction, including without limitation the rights
 5 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 6 | # copies of the Software, and to permit persons to whom the Software is
 7 | # furnished to do so, subject to the following conditions:
 8 | #
 9 | # The above copyright notice and this permission notice shall be included in all
10 | # copies or substantial portions of the Software.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 | # SOFTWARE.
19 | #
20 | # Copyright (C) 2014 Joakim Söderberg <joakim.soderberg@gmail.com>
21 | #
22 | 
23 | file(REMOVE_RECURSE ${PROJECT_BINARY_DIR}/*.gcda)
24 | 
25 | 


--------------------------------------------------------------------------------
/core/cmake/GetGitRevisionDescription.LICENSE_1_0.txt:
--------------------------------------------------------------------------------
 1 | Boost Software License - Version 1.0 - August 17th, 2003
 2 | 
 3 | Permission is hereby granted, free of charge, to any person or organization
 4 | obtaining a copy of the software and accompanying documentation covered by
 5 | this license (the "Software") to use, reproduce, display, distribute,
 6 | execute, and transmit the Software, and to prepare derivative works of the
 7 | Software, and to permit third-parties to whom the Software is furnished to
 8 | do so, all subject to the following:
 9 | 
10 | The copyright notices in the Software and this entire statement, including
11 | the above license grant, this restriction and the following disclaimer,
12 | must be included in all copies of the Software, in whole or in part, and
13 | all derivative works of the Software, unless such copies or derivative
14 | works are solely in the form of machine-executable object code generated by
15 | a source language processor.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/core/cmake/GetGitRevisionDescription.cmake.in:
--------------------------------------------------------------------------------
 1 | # 
 2 | # Internal file for GetGitRevisionDescription.cmake
 3 | #
 4 | # Requires CMake 2.6 or newer (uses the 'function' command)
 5 | #
 6 | # Original Author:
 7 | # 2009-2010 Ryan Pavlik <rpavlik@iastate.edu> <abiryan@ryand.net>
 8 | # http://academic.cleardefinition.com
 9 | # Iowa State University HCI Graduate Program/VRAC
10 | #
11 | # Copyright Iowa State University 2009-2010.
12 | # Distributed under the Boost Software License, Version 1.0.
13 | # (See accompanying file LICENSE_1_0.txt or copy at
14 | # http://www.boost.org/LICENSE_1_0.txt)
15 | 
16 | set(HEAD_HASH)
17 | 
18 | file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024)
19 | 
20 | string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS)
21 | if(HEAD_CONTENTS MATCHES "ref")
22 | 	# named branch
23 | 	string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}")
24 | 	if(EXISTS "@GIT_DIR@/${HEAD_REF}")
25 | 		configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY)
26 | 	elseif(EXISTS "@GIT_DIR@/logs/${HEAD_REF}")
27 | 		configure_file("@GIT_DIR@/logs/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY)
28 | 		set(HEAD_HASH "${HEAD_REF}")
29 | 	endif()
30 | else()
31 | 	# detached HEAD
32 | 	configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY)
33 | endif()
34 | 
35 | if(NOT HEAD_HASH)
36 | 	file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024)
37 | 	string(STRIP "${HEAD_HASH}" HEAD_HASH)
38 | endif()
39 | 


--------------------------------------------------------------------------------
/core/cmake/SetupMPI.cmake:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Copyright (c) 2017, Lawrence Livermore National Security, LLC.
 3 | #
 4 | # Produced at the Lawrence Livermore National Laboratory
 5 | #
 6 | # LLNL-CODE-725085
 7 | #
 8 | # All rights reserved.
 9 | #
10 | # This file is part of BLT.
11 | #
12 | # For additional details, please also read BLT/LICENSE.
13 | #
14 | # Redistribution and use in source and binary forms, with or without
15 | # modification, are permitted provided that the following conditions are met:
16 | #
17 | # * Redistributions of source code must retain the above copyright notice,
18 | #   this list of conditions and the disclaimer below.
19 | #
20 | # * Redistributions in binary form must reproduce the above copyright notice,
21 | #   this list of conditions and the disclaimer (as noted below) in the
22 | #   documentation and/or other materials provided with the distribution.
23 | #
24 | # * Neither the name of the LLNS/LLNL nor the names of its contributors may
25 | #   be used to endorse or promote products derived from this software without
26 | #   specific prior written permission.
27 | #
28 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 | # ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY,
32 | # LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY
33 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 | # DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
37 | # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
38 | # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 | # POSSIBILITY OF SUCH DAMAGE.
40 | #
41 | ###############################################################################
42 | 
43 | ################################
44 | # MPI
45 | ################################
46 | 
47 | find_package(MPI)
48 | message(STATUS "MPI C Compile Flags:  ${MPI_C_COMPILE_FLAGS}")
49 | message(STATUS "MPI C Include Path:   ${MPI_C_INCLUDE_PATH}")
50 | message(STATUS "MPI C Link Flags:     ${MPI_C_LINK_FLAGS}")
51 | message(STATUS "MPI C Libraries:      ${MPI_C_LIBRARIES}")
52 | 
53 | message(STATUS "MPI CXX Compile Flags: ${MPI_CXX_COMPILE_FLAGS}")
54 | message(STATUS "MPI CXX Include Path:  ${MPI_CXX_INCLUDE_PATH}")
55 | message(STATUS "MPI CXX Link Flags:    ${MPI_CXX_LINK_FLAGS}")
56 | message(STATUS "MPI CXX Libraries:     ${MPI_CXX_LIBRARIES}")
57 | 
58 | message(STATUS "MPI Executable:       ${MPIEXEC}")
59 | message(STATUS "MPI Num Proc Flag:    ${MPIEXEC_NUMPROC_FLAG}")
60 | 
61 | 
62 | if (ENABLE_FORTRAN)
63 |     # Determine if we should use fortran mpif.h header or fortran mpi module
64 |     find_path(mpif_path
65 |         NAMES "mpif.h"
66 |         PATHS ${MPI_Fortran_INCLUDE_PATH}
67 |         NO_DEFAULT_PATH
68 |         )
69 | 
70 |     if(mpif_path)
71 |         set(MPI_Fortran_USE_MPIF ON CACHE PATH "")
72 |         message(STATUS "Using MPI Fortran header: mpif.h")
73 |     else()
74 |         set(MPI_Fortran_USE_MPIF OFF CACHE PATH "")
75 |         message(STATUS "Using MPI Fortran module: mpi.mod")
76 |     endif()
77 | endif()
78 | 


--------------------------------------------------------------------------------
/core/config.h.in:
--------------------------------------------------------------------------------
 1 | // ROSS Configuration Options
 2 | #define ROSS_QUEUE_${QUEUE}
 3 | #define ROSS_RAND_${RAND}
 4 | #define ROSS_NETWORK_${NETWORK}
 5 | #define ROSS_GVT_${GVT}
 6 | #define ROSS_CLOCK_${CLOCK}
 7 | #define ARCH_${ARCH}
 8 | 
 9 | // ROSS Core
10 | #cmakedefine HAVE_CTIME 1
11 | #define ROSS_VERSION "${VERSION}"
12 | #cmakedefine AVL_TREE 1
13 | #cmakedefine USE_BGPM
14 | #cmakedefine RAND_NORMAL
15 | #cmakedefine ROSS_timing
16 | #cmakedefine ROSS_runtime_checks
17 | #cmakedefine ROSS_ALLOC_DEBUG
18 | #cmakedefine USE_RIO
19 | #cmakedefine USE_DAMARIS
20 | #cmakedefine USE_RAND_TIEBREAKER
21 | 


--------------------------------------------------------------------------------
/core/gvt/7oclock.h.old:
--------------------------------------------------------------------------------
 1 | #ifndef INC_gvt_7oclock_h
 2 | #define INC_gvt_7oclock_h
 3 | 
 4 | 	/* Clock Computation Variables:
 5 | 	 *
 6 | 	 * The clock is used to implement the 7 O'Clock Algorithm, but
 7 | 	 * is useful in other areas, such as determining how long it takes to
 8 | 	 * actually complete tasks, such as enq's and deq's.
 9 | 	 */
10 | static tw_volatile int g_tw_7oclock_node_flag;
11 | static tw_volatile tw_clock g_tw_clock_max_send_delta_t;
12 | static tw_volatile tw_clock g_tw_clock_gvt_interval;
13 | static tw_volatile tw_clock g_tw_clock_gvt_window_size;
14 | 
15 | static tw_stime gvt_print_interval = 0.1;
16 | static tw_stime percent_complete = 0.0;
17 | 
18 | static inline int 
19 | tw_gvt_inprogress(tw_pe * pe)
20 | {
21 | #if 0
22 | 	return (g_tw_7oclock_node_flag == -g_tw_npe && 
23 | 		tw_clock_now(pe) < g_tw_clock_gvt_interval ? 0 : 1);
24 | #endif
25 | 	return (g_tw_7oclock_node_flag >= 0 || 
26 | 		tw_clock_now(pe) + g_tw_clock_max_send_delta_t >= g_tw_clock_gvt_interval);
27 | }
28 | 
29 | static inline void 
30 | gvt_print(tw_stime gvt)
31 | {
32 | 	if(gvt_print_interval > 1.0)
33 | 		return;
34 | 
35 | 	if(percent_complete == 0.0)
36 | 	{
37 | 		percent_complete = gvt_print_interval;
38 | 		return;
39 | 	}
40 | 
41 | 	printf("GVT #%d: simulation %d%% complete (",
42 | 		g_tw_gvt_done,
43 | 		(int) min(100, floor(100 * (gvt/g_tw_ts_end))));
44 | 
45 | 	if (gvt == DBL_MAX)
46 | 		printf("GVT = %s", "MAX");
47 | 	else
48 | 		printf("GVT = %.4f", gvt);
49 | 
50 | 	printf(").\n");
51 | 	percent_complete += gvt_print_interval;
52 | }
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/core/gvt/mpi_allreduce.h:
--------------------------------------------------------------------------------
 1 | #ifndef INC_gvt_mpi_allreduce_h
 2 | #define INC_gvt_mpi_allreduce_h
 3 | 
 4 | static double gvt_print_interval = 0.01;
 5 | static double percent_complete = 0.0;
 6 | 
 7 | static inline int
 8 | tw_gvt_inprogress(tw_pe * pe)
 9 | {
10 | 	return pe->gvt_status;
11 | }
12 | 
13 | static inline void
14 | gvt_print(tw_stime gvt)
15 | {
16 | 	if(gvt_print_interval == 1.0)
17 | 		return;
18 | 
19 | 	if(percent_complete == 0.0)
20 | 	{
21 | 		percent_complete = gvt_print_interval;
22 | 		return;
23 | 	}
24 | 
25 |     double ts = TW_STIME_DBL(gvt);
26 | 
27 |     printf("GVT #%d: simulation %d%% complete, max event queue size %u (",
28 |                g_tw_gvt_done,
29 |                (int) ROSS_MIN(100, floor(100 * (ts/g_tw_ts_end))),
30 |                tw_pq_max_size(g_tw_pe->pq));
31 | 
32 |     if (ts == DBL_MAX)
33 |         printf("GVT = %s", "MAX");
34 |     else
35 |         printf("GVT = %.4f", ts);
36 | 
37 |     printf(").\n");
38 | 
39 | #ifdef AVL_TREE
40 |     printf("AVL tree size: %d\n", g_tw_pe->avl_tree_size);
41 | #endif
42 | 
43 | 	percent_complete += gvt_print_interval;
44 | }
45 | 
46 | extern tw_stat st_get_allreduce_count();
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/core/hash-quadratic.c:
--------------------------------------------------------------------------------
  1 | #include <ross.h>
  2 | #ifdef AVL_TREE
  3 | #include "avl_tree.h"
  4 | #endif /* AVL_TREE */
  5 | 
  6 | #ifndef AVL_TREE
  7 | static void     rehash(tw_hash * hash_t, int pe);
  8 | static int find_entry(tw_event ** hash_t, tw_event * event, int hash_size, int pe);
  9 | static void     insert(tw_event ** hash_t, tw_event * event, int hash_size);
 10 | static int find_empty(tw_event ** hash_t, tw_event * event, int hash_size);
 11 | static int      next_prime(int ptst);
 12 | static tw_event **allocate_table(int hash_size);
 13 | static int hash_(tw_eventid event_id, int hash_size);
 14 | #endif
 15 | static int      is_prime(int ptst);
 16 | tw_event *hash_search(tw_event ** hash_t, tw_event *evt, int size);
 17 | 
 18 | void     hash_print(tw_hash * h);
 19 | 
 20 | static unsigned int ncpu = 1;
 21 | unsigned int g_tw_hash_size = 31;
 22 | 
 23 | #ifndef AVL_TREE
 24 | int
 25 | hash_(tw_eventid event_id, int hash_size)
 26 | {
 27 | 	return event_id % hash_size;
 28 | }
 29 | #endif
 30 | 
 31 | void           *
 32 | tw_hash_create()
 33 | {
 34 | #ifdef AVL_TREE
 35 |   unsigned int i;
 36 |   AvlTree avl_list;
 37 | 
 38 |   g_tw_pe->avl_tree_size = 0;
 39 | 
 40 |   g_tw_avl_node_count = 1 << g_tw_avl_node_count;
 41 |   avl_list = (AvlTree) tw_calloc(TW_LOC, "avl tree", sizeof(struct avlNode), g_tw_avl_node_count);
 42 | 
 43 |   for (i = 0; i < g_tw_avl_node_count - 1; i++) {
 44 |     avl_list[i].next = &avl_list[i + 1];
 45 |   }
 46 |   avl_list[i].next = NULL;
 47 | 
 48 |   g_tw_pe->avl_list_head = &avl_list[0];
 49 | 
 50 |   return NULL;
 51 | #else
 52 | 	tw_hash        *h;
 53 | 	unsigned int             pi;
 54 | 
 55 | 	ncpu = tw_nnodes();
 56 | 	h = (tw_hash *) tw_calloc(TW_LOC, "tw_hash", sizeof(tw_hash), 1);
 57 | 
 58 | 	if (!h)
 59 | 		tw_error(TW_LOC, "Cannot allocate tw_hash.");
 60 | 
 61 | 	h->num_stored = (int *) tw_calloc(TW_LOC, "tw_hash", sizeof(int) * ncpu, 1);
 62 | 	h->hash_sizes = (unsigned int *) tw_calloc(TW_LOC, "tw_hash", sizeof(int) * ncpu, 1);
 63 | 	h->incoming = (tw_event ***) tw_calloc(TW_LOC, "tw_hash", sizeof(tw_event *)* ncpu, 1);
 64 | 
 65 | 	if(!is_prime(g_tw_hash_size))
 66 | 		g_tw_hash_size = next_prime(g_tw_hash_size);
 67 | 
 68 | 	for (pi = 0; pi < ncpu; pi++)
 69 | 	{
 70 | 		h->num_stored[pi] = 0;
 71 | 		h->hash_sizes[pi] = g_tw_hash_size;
 72 | 		h->incoming[pi] = allocate_table(h->hash_sizes[pi]);
 73 | 	}
 74 | 
 75 | 	return (void *) h;
 76 | #endif
 77 | }
 78 | 
 79 | void
 80 | tw_hash_insert(void *h, tw_event * event, long pe)
 81 | {
 82 | #ifdef AVL_TREE
 83 |   (void) h;
 84 |   (void) pe;
 85 |   tw_clock start;
 86 | 
 87 |   g_tw_pe->avl_tree_size++;
 88 | 
 89 |   start = tw_clock_read();
 90 |   avlInsert(&event->dest_lp->kp->avl_tree, event);
 91 |   g_tw_pe->stats.s_avl += tw_clock_read() - start;
 92 | #else
 93 | 	tw_hash        *hash_t;
 94 | 
 95 | 	hash_t = (tw_hash *) h;
 96 | 
 97 | 	insert(hash_t->incoming[pe], event, hash_t->hash_sizes[pe]);
 98 | 
 99 | 	(hash_t->num_stored[pe])++;
100 | 	if (hash_t->num_stored[pe] > floor(hash_t->hash_sizes[pe] * MAX_FRACTION))
101 | 	{
102 | 		rehash(hash_t, pe);
103 | 	}
104 | #endif
105 | }
106 | 
107 | #ifndef AVL_TREE
108 | void
109 | insert(tw_event ** hash_t, tw_event * event, int hash_size)
110 | {
111 | 	int             key = 0;
112 | 
113 | 	key = find_empty(hash_t, event, hash_size);
114 | 	hash_t[key] = event;
115 | }
116 | 
117 | void
118 | rehash(tw_hash * hash_t, int pe)
119 | {
120 | 	int             old_size;
121 | 	int             old_stored;
122 | 	int             i;
123 | 	tw_event      **old_list;
124 | 
125 | 	old_stored = hash_t->num_stored[pe];
126 | 	old_list = hash_t->incoming[pe];
127 | 	old_size = hash_t->hash_sizes[pe];
128 | 
129 | 	hash_t->num_stored[pe] = 0;
130 | 	hash_t->hash_sizes[pe] = next_prime(hash_t->hash_sizes[pe]);
131 | 	hash_t->incoming[pe] = allocate_table(hash_t->hash_sizes[pe]);
132 | 
133 | 	for (i = 0; i < old_size; i++)
134 | 	{
135 | 		if (old_list[i] != NULL)
136 | 		{
137 | 			insert(hash_t->incoming[pe], old_list[i], hash_t->hash_sizes[pe]);
138 | 			(hash_t->num_stored[pe])++;
139 | 		}
140 | 	}
141 | 
142 | 	if(old_stored != hash_t->num_stored[pe])
143 | 		tw_error(TW_LOC, "Did not rehash properly!");
144 | 
145 | #if VERIFY_HASH_QUAD
146 | 	printf("\nHASH TABLE RESIZED: old size = %d, new size = %d \n\n", old_size,
147 | 		   hash_t->hash_sizes[pe]);
148 | #endif
149 | }
150 | 
151 | int
152 | find_empty(tw_event ** hash_t, tw_event * event, int hash_size)
153 | {
154 | 	unsigned int    i;
155 | 	int key;
156 | 
157 | 	i = 0;
158 | 	key = hash_(event->event_id, hash_size);
159 | 
160 | 	if(0 > key)
161 | 		tw_error(TW_LOC, "here!");
162 | 
163 | 	while (hash_t[key])
164 | 	{
165 | 		key += 2 * (++i) - 1;
166 | 		if (key >= hash_size)
167 | 			key -= hash_size;
168 | 	}
169 | 
170 | 	return key;
171 | }
172 | 
173 | int
174 | find_entry(tw_event ** hash_t, tw_event * event, int hash_size, int pe)
175 | {
176 | 	unsigned int    i;
177 | 	int key;
178 | 
179 | 	i = 0;
180 | 	key = hash_(event->event_id, hash_size);
181 | 
182 | 	while (hash_t[key] == NULL || event->event_id != hash_t[key]->event_id)
183 | 	{
184 | 		key += 2 * (++i) - 1;
185 | 		if (key >= hash_size)
186 | 			key -= hash_size;
187 | 
188 | 		if (key > hash_size)
189 | 		{
190 | 			tw_error(TW_LOC, "Cannot find event in hash table: PE %d, key %d, size %d\n",
191 | 				pe, key, hash_size);
192 | 		}
193 | 	}
194 | 
195 | 	return key;
196 | }
197 | 
198 | tw_event      **
199 | allocate_table(int hash_size)
200 | {
201 | 	return (tw_event **) tw_calloc(TW_LOC, "tw_hash", sizeof(tw_event *) * hash_size, 1);
202 | }
203 | #endif
204 | 
205 | tw_event       *
206 | tw_hash_remove(void *h, tw_event * event, long pe)
207 | {
208 | #if AVL_TREE
209 |   (void) h;
210 |   (void) pe;
211 |   tw_event *ret;
212 |   tw_clock start;
213 | 
214 |   g_tw_pe->avl_tree_size--;
215 | 
216 |   start = tw_clock_read();
217 |   ret = avlDelete(&event->dest_lp->kp->avl_tree, event);
218 |   g_tw_pe->stats.s_avl += tw_clock_read() - start;
219 |   return ret;
220 | #else
221 | 	tw_hash        *hash_t = (tw_hash *) h;
222 | 	tw_event       *ret_event;
223 | 	int             key;
224 | 
225 | 	if(pe > tw_nnodes() - 1)
226 | 		tw_error(TW_LOC, "bad pe id");
227 | 
228 | 	key = find_entry(hash_t->incoming[pe], event, hash_t->hash_sizes[pe], pe);
229 | 	ret_event = hash_t->incoming[pe][key];
230 | 
231 | 	hash_t->incoming[pe][key] = NULL;
232 | 	(hash_t->num_stored[pe])--;
233 | 
234 | 	return ret_event;
235 | #endif
236 | }
237 | 
238 | int
239 | next_prime(int ptst)
240 | {
241 | 
242 | 	ptst = ptst * 2 + 1;
243 | 
244 | 	if (is_prime(ptst))
245 | 	{
246 | 		// printf("%d is prime.\n", ptst);
247 | 		return ptst;
248 | 	}
249 | 	// printf("Searching forward for next prime... ");
250 | 	while (!is_prime(ptst))
251 | 		ptst += 2;
252 | 
253 | 	// printf("found %d.\n",ptst);
254 | 
255 | 	return ptst;
256 | }
257 | 
258 | int
259 | is_prime(int ptst)
260 | {
261 | 	long            pmaxseek, a;
262 | 	int             prim_found;
263 | 
264 | 	if (ptst % 2 == 0)
265 | 		return 0;
266 | 
267 | 	prim_found = 1;
268 | 	pmaxseek = (long)sqrt((double)ptst) + 1;
269 | 
270 | 	for (a = 3; a <= pmaxseek; a++, a++)
271 | 	{
272 | 		if (!(ptst % a))
273 | 		{
274 | 			prim_found = 0;
275 | 			break;
276 | 		}
277 | 	}
278 | 
279 | 	return prim_found;
280 | }
281 | 
282 | tw_event *
283 | hash_search(tw_event ** hash_t, tw_event *evt, int size)
284 | {
285 | 	int             j, empty;
286 | 	tw_event       *e;
287 | 
288 | 	for (empty = 0, j = 0; j < size; j++)
289 | 	{
290 | 		e = hash_t[j];
291 | 
292 | 		if (e && (e->event_id == evt->event_id))
293 | 		{
294 | 			printf("Found event in hash: %d\n", j);
295 | 			return e;
296 | 		} else
297 | 			empty++;
298 | 	}
299 | 
300 | 	printf("%ld: HASH has %d empty cells. \n", g_tw_mynode, empty);
301 | 
302 | 	return NULL;
303 | }
304 | 
305 | void
306 | hash_print(tw_hash * h)
307 | {
308 | 	unsigned int             i, j, empty;
309 | 	unsigned int   *sizes = h->hash_sizes;
310 | 	int            *stored = h->num_stored;
311 | 	tw_event      **hash_t;
312 | 	tw_event       *e;
313 | 
314 | 	for (i = 0; i < ncpu; i++)
315 | 	{
316 | 		printf("PE %d: \n", i);
317 | 		printf("table size: %d \n", sizes[i]);
318 | 		printf("num_stored: %d \n\n", stored[i]);
319 | 
320 | 		hash_t = h->incoming[i];
321 | 
322 | 		for (empty = 0, j = 0; j < sizes[i]; j++)
323 | 		{
324 | 			e = hash_t[j];
325 | 
326 | 			if (e)
327 | 			{
328 | 				//printf("recv_ts = %f \n", e->recv_ts);
329 | 				//printf("%d: %ld \n\n", j, e->event_id);
330 | 			} else
331 | 				empty++;
332 | 		}
333 | 		printf("PE %d has %d empty cells. \n", i, empty);
334 | 	}
335 | }
336 | 


--------------------------------------------------------------------------------
/core/hash-quadratic.h:
--------------------------------------------------------------------------------
 1 | #ifndef INC_hash_quadratic_h
 2 | #define INC_hash_quadratic_h
 3 | 
 4 | #define MAX_FRACTION 0.50
 5 | 
 6 | extern unsigned int	 g_tw_hash_size;
 7 | 
 8 | typedef struct tw_hash tw_hash;
 9 | 
10 | struct tw_hash
11 | {
12 | 	tw_event     ***incoming;
13 | 	int            *num_stored;
14 | 	unsigned int   *hash_sizes;
15 | };
16 | 
17 | 
18 | /*
19 |  * hash-quadratic.c
20 |  */
21 | extern void    *tw_hash_create();
22 | extern void     tw_hash_insert(void *h, tw_event * event, long pe);
23 | extern tw_event *tw_hash_remove(void *h, tw_event * event, long pe);
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/core/instrumentation/ross-lps/analysis-lp.h:
--------------------------------------------------------------------------------
 1 | #ifndef INC_analysis_lp_h
 2 | #define INC_analysis_lp_h
 3 | 
 4 | #include <ross.h>
 5 | 
 6 | typedef struct analysis_state analysis_state;
 7 | typedef struct analysis_msg analysis_msg;
 8 | typedef struct model_sample_data model_sample_data;
 9 | typedef struct lp_metadata lp_metadata;
10 | 
11 | typedef enum{
12 |     ALP_NONE,
13 |     ALP_FULL, // LP, KP, and PE
14 |     ALP_KP, // KP and PE
15 |     ALP_PE, //PE only
16 |     ALP_MODEL
17 | } analysis_type_flag;
18 | 
19 | struct analysis_msg
20 | {
21 |     tw_lpid src;
22 |     tw_stime timestamp;
23 | 
24 | };
25 | 
26 | struct lp_metadata
27 | {
28 |     tw_lpid lpid;
29 |     tw_kpid kpid;
30 |     tw_peid peid;
31 |     tw_stime ts;
32 |     tw_stime real_time;
33 |     int sample_sz;
34 |     int flag; // 0 == PE, 1 == KP, 2 == LP, 3 == model
35 | };
36 | 
37 | struct model_sample_data
38 | {
39 |     model_sample_data *prev;
40 |     model_sample_data *next;
41 |     tw_stime timestamp;
42 |     void **lp_data;          /* data for each LP on the associated KP at this sampling point */
43 | };
44 | 
45 | struct analysis_state
46 | {
47 |     tw_lpid analysis_id; // id among analysis LPs only
48 |     int num_lps;
49 |     int num_lps_sim;
50 |     tw_lpid *lp_list; // list of LPs that the analysis LP is responsible for
51 |     tw_lpid *lp_list_sim;
52 |     model_sample_data *model_samples_head;
53 |     model_sample_data *model_samples_current;
54 |     model_sample_data *model_samples_tail;
55 | };
56 | 
57 | void analysis_init(analysis_state *s, tw_lp *lp);
58 | void analysis_event(analysis_state *s, tw_bf *bf, analysis_msg *m, tw_lp *lp);
59 | void analysis_event_rc(analysis_state *s, tw_bf *bf, analysis_msg *m, tw_lp *lp);
60 | void analysis_commit(analysis_state *s, tw_bf *bf, analysis_msg *m, tw_lp *lp);
61 | void analysis_finish(analysis_state *s, tw_lp *lp);
62 | void collect_sim_engine_data(tw_pe *pe, tw_lp *lp, analysis_state *s, tw_stime current_rt);
63 | tw_peid analysis_map(tw_lpid gid);
64 | 
65 | extern tw_lpid analysis_start_gid;
66 | void st_analysis_lp_settype(tw_lpid lpid);
67 | #endif
68 | 


--------------------------------------------------------------------------------
/core/instrumentation/ross-lps/specialized-lps.c:
--------------------------------------------------------------------------------
 1 | #include "ross.h"
 2 | #include "analysis-lp.h"
 3 | 
 4 | /*
 5 |  * This file is for general set up functions related to setting up any
 6 |  * ROSS specialized LPs.
 7 |  *
 8 |  * TODO add some output on these LPs, add some counters that can be subtracted from other counters, so we're not including this in the model info
 9 |  */
10 | 
11 | int g_st_use_analysis_lps = 0;
12 | tw_lpid g_st_analysis_nlp = 0;
13 | int g_st_sample_count = 65536;
14 | 
15 | tw_lpid analysis_start_gid = 0;
16 | tw_lpid g_st_total_model_lps = 0;
17 | 
18 | void specialized_lp_setup()
19 | {
20 |     if (g_st_engine_stats == VT_STATS || g_st_engine_stats == ALL_STATS || 
21 |             g_st_model_stats == VT_STATS || g_st_model_stats == ALL_STATS)
22 |     {
23 |         g_st_use_analysis_lps = 1;
24 |         st_buffer_init(ANALYSIS_LP);
25 |     }
26 |     else
27 |         return;
28 | 
29 |     // determine total LPs used by model and assign value to analysis_start_gid
30 |     if (g_tw_synchronization_protocol != SEQUENTIAL)
31 |         MPI_Allreduce(&g_tw_nlp, &g_st_total_model_lps, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_ROSS);
32 |     else
33 |         g_st_total_model_lps = g_tw_nlp;
34 | 
35 |     analysis_start_gid = g_st_total_model_lps;
36 |     g_st_analysis_nlp = g_tw_nkp; // # of analysis LPs per PE
37 | 
38 | 
39 | }
40 | 
41 | void specialized_lp_init_mapping()
42 | {
43 |     tw_lpid lpid;
44 |     for(lpid = 0; lpid < g_st_analysis_nlp; lpid++)
45 |     {
46 |         tw_lp_onpe(g_tw_nlp + lpid, g_tw_pe, analysis_start_gid + g_tw_mynode * g_st_analysis_nlp + lpid);
47 |         tw_lp_onkp(g_tw_lp[g_tw_nlp + lpid], g_tw_kp[lpid]); // analysis lpid == kpid
48 |         st_analysis_lp_settype(g_tw_nlp + lpid);
49 |     }
50 | }
51 | 
52 | void specialized_lp_run()
53 | {
54 |     // has to be set at beginning of tw_run, in case model changes g_tw_ts_end between calling tw_init and tw_run
55 |     if (g_st_sampling_end == 0)
56 |         g_st_sampling_end = g_tw_ts_end;
57 | }
58 | 
59 | const tw_optdef special_lp_opt[] =
60 | {
61 |     TWOPT_GROUP("Specialized ROSS LPs"),
62 |     //TWOPT_UINT("analysis-lps", g_st_use_analysis_lps, "Set to 1 to turn on analysis LPs (1 per KP) for virtual time sampling"),
63 |     TWOPT_UINT("sample-count", g_st_sample_count, "Number of samples to allocate in memory"),
64 |     TWOPT_END()
65 | };
66 | 
67 | const tw_optdef *st_special_lp_opts(void)
68 | {
69 | 	return special_lp_opt;
70 | }
71 | 


--------------------------------------------------------------------------------
/core/instrumentation/st-event-trace.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | int g_st_ev_trace = 0;
 4 | 
 5 | static short evtype_warned = 0;
 6 | 
 7 | // collect src LP, dest LP, virtual time stamp, real time start
 8 | // model can implement callback function to collect model level data, e.g. event type
 9 | void st_collect_event_data(tw_event *cev, double recv_rt)
10 | {
11 |     tw_clock start_cycle_time = tw_clock_read();
12 |     int collect_flag = 1;
13 |     st_event_data ev_data;
14 |     ev_data.src_lp = (unsigned int) cev->send_lp;
15 |     ev_data.dest_lp = (unsigned int) cev->dest_lp->gid;
16 |     ev_data.send_vts = (float) TW_STIME_DBL(cev->send_ts);
17 |     ev_data.recv_vts = (float) TW_STIME_DBL(cev->recv_ts);
18 |     ev_data.real_ts = (float) recv_rt;
19 |     int total_sz = sizeof(ev_data);
20 | 
21 |     if (!cev->dest_lp->model_types && !evtype_warned && g_tw_mynode == g_tw_masternode)
22 |     {
23 |         fprintf(stderr, "WARNING: node: %ld: %s:%i: ", g_tw_mynode, __FILE__, __LINE__);
24 |         fprintf(stderr, "The struct st_model_types has not been defined! No model level data will be collected\n");
25 |         evtype_warned = 1;
26 |     }
27 | 
28 |     if (cev->dest_lp->model_types && cev->dest_lp->model_types->ev_trace)
29 |         ev_data.model_data_sz = cev->dest_lp->model_types->ev_sz;
30 |     else
31 |         ev_data.model_data_sz = 0;
32 | 
33 | 
34 |     total_sz += ev_data.model_data_sz;
35 |     char buffer[total_sz];
36 | 
37 |     if (ev_data.model_data_sz > 0)
38 |         (*cev->dest_lp->model_types->ev_trace)(tw_event_data(cev), cev->dest_lp, &buffer[sizeof(ev_data)], &collect_flag);
39 | 
40 |     if (collect_flag)
41 |     {
42 |         memcpy(&buffer[0], &ev_data, sizeof(ev_data));
43 |         if (g_tw_synchronization_protocol != SEQUENTIAL)
44 |             st_buffer_push(EV_TRACE, &buffer[0], total_sz);
45 |         else if (g_tw_synchronization_protocol == SEQUENTIAL && !g_st_disable_out)
46 |             fwrite(buffer, total_sz, 1, seq_ev_trace);
47 | 
48 |     }
49 |     g_tw_pe->stats.s_stat_comp += tw_clock_read() - start_cycle_time;
50 | }
51 | 


--------------------------------------------------------------------------------
/core/instrumentation/st-instrumentation.c:
--------------------------------------------------------------------------------
  1 | #include <ross.h>
  2 | #include <sys/stat.h>
  3 | 
  4 | char g_st_stats_out[INST_MAX_LENGTH] = {0};
  5 | char g_st_stats_path[4096] = {0};
  6 | int g_st_pe_data = 1;
  7 | int g_st_kp_data = 0;
  8 | int g_st_lp_data = 0;
  9 | int g_st_disable_out = 0;
 10 | 
 11 | int g_st_model_stats = 0;
 12 | int g_st_engine_stats = 0;
 13 | 
 14 | int g_st_gvt_sampling = 0;
 15 | int g_st_num_gvt = 10;
 16 | 
 17 | int g_st_rt_sampling = 0;
 18 | tw_clock g_st_rt_interval = 1000;
 19 | tw_clock g_st_rt_samp_start_cycles = 0;
 20 | 
 21 | double g_st_vt_interval = 1000000;
 22 | double g_st_sampling_end = 0;
 23 | 
 24 | 
 25 | 
 26 | static const tw_optdef inst_options[] = {
 27 |     TWOPT_GROUP("ROSS Instrumentation"),
 28 |     TWOPT_UINT("engine-stats", g_st_engine_stats, "Collect sim engine level stats; 0 don't collect, 1 GVT-sampling, 2 RT sampling, 3 VT sampling, 4 All sampling modes"),
 29 |     TWOPT_UINT("model-stats", g_st_model_stats, "Collect model level stats (requires model-level implementation); 0 don't collect, 1 GVT-sampling, 2 RT sampling, 3 VT sampling, 4 all sampling modes"),
 30 |     TWOPT_UINT("num-gvt", g_st_num_gvt, "number of GVT computations between GVT-based sampling points"),
 31 |     TWOPT_ULONGLONG("rt-interval", g_st_rt_interval, "real time sampling interval in ms"),
 32 |     TWOPT_DOUBLE("vt-interval", g_st_vt_interval, "Virtual time sampling interval"),
 33 |     TWOPT_DOUBLE("vt-samp-end", g_st_sampling_end, "End time for virtual time sampling (if different from g_tw_ts_end)"),
 34 |     TWOPT_UINT("pe-data", g_st_pe_data, "Turn on/off collection of sim engine data at PE level"),
 35 |     TWOPT_UINT("kp-data", g_st_kp_data, "Turn on/off collection of sim engine data at KP level"),
 36 |     TWOPT_UINT("lp-data", g_st_lp_data, "Turn on/off collection of sim engine data at LP level"),
 37 |     TWOPT_UINT("event-trace", g_st_ev_trace, "collect detailed data on all events for specified LPs; 0, no trace, 1 full trace, 2 only events causing rollbacks, 3 only committed events"),
 38 |     TWOPT_CHAR("stats-prefix", g_st_stats_out, "prefix for filename(s) for stats output"),
 39 |     TWOPT_CHAR("stats-path", g_st_stats_path, "path to directory to save instrumentation output"),
 40 |     TWOPT_UINT("buffer-size", g_st_buffer_size, "size of buffer in bytes for stats collection"),
 41 |     TWOPT_UINT("buffer-free", g_st_buffer_free_percent, "percentage of free space left in buffer before writing out at GVT"),
 42 |     TWOPT_UINT("disable-output", g_st_disable_out, "used for perturbation analysis; buffer never dumped to file when 1"),
 43 |     TWOPT_END()
 44 | };
 45 | 
 46 | const tw_optdef *st_inst_opts(void)
 47 | {
 48 | 	return inst_options;
 49 | }
 50 | 
 51 | void st_inst_init(void)
 52 | {
 53 |     specialized_lp_run();
 54 | 
 55 |     if (!(g_st_engine_stats || g_st_model_stats || g_st_ev_trace))
 56 |         return;
 57 | 
 58 |     // setup appropriate flags for various instrumentation modes
 59 |     // set up files and buffers for necessary instrumentation modes
 60 |     if (g_st_engine_stats == GVT_STATS || g_st_engine_stats == ALL_STATS)
 61 |     {
 62 |         g_st_gvt_sampling = 1;
 63 |         st_buffer_init(GVT_COL);
 64 |     }
 65 |     if (g_st_engine_stats == RT_STATS || g_st_engine_stats == ALL_STATS)
 66 |     {
 67 |         g_st_rt_sampling = 1;
 68 |         st_buffer_init(RT_COL);
 69 |     }
 70 | 
 71 |     if (g_st_model_stats == GVT_STATS || g_st_model_stats == ALL_STATS)
 72 |         g_st_gvt_sampling = 1;
 73 |     if (g_st_model_stats == RT_STATS || g_st_model_stats == ALL_STATS)
 74 |         g_st_rt_sampling = 1;
 75 | 
 76 |     if (g_st_rt_sampling)
 77 |     {
 78 |         g_st_rt_interval = g_st_rt_interval * g_tw_clock_rate / 1000;
 79 |         g_st_rt_samp_start_cycles = tw_clock_read();
 80 |     }
 81 | 
 82 |     if (g_st_ev_trace)
 83 |         st_buffer_init(EV_TRACE);
 84 |     if (g_st_model_stats)
 85 |         st_buffer_init(MODEL_COL);
 86 | }
 87 | 
 88 | void st_inst_dump()
 89 | {
 90 |     if (g_st_disable_out)
 91 |         return;
 92 | 
 93 |     if (g_st_engine_stats == GVT_STATS || g_st_engine_stats == ALL_STATS)
 94 |         st_buffer_write(0, GVT_COL);
 95 |     if (g_st_engine_stats == RT_STATS || g_st_engine_stats == ALL_STATS)
 96 |         st_buffer_write(0, RT_COL);
 97 |     if (g_st_ev_trace)
 98 |         st_buffer_write(0, EV_TRACE);
 99 |     if (g_st_model_stats)
100 |         st_buffer_write(0, MODEL_COL);
101 |     if (g_st_use_analysis_lps)
102 |         st_buffer_write(0, ANALYSIS_LP);
103 | }
104 | 
105 | void st_inst_finalize(tw_pe *me)
106 | {
107 |     if (g_st_engine_stats == GVT_STATS || g_st_engine_stats == ALL_STATS)
108 |         st_buffer_finalize(GVT_COL);
109 |     if (g_st_engine_stats == RT_STATS || g_st_engine_stats == ALL_STATS)
110 |     {
111 |         // collect data one final time to account for time between last sample and sim end time
112 |         st_collect_engine_data(me, RT_COL);
113 |         st_buffer_finalize(RT_COL);
114 |     }
115 |     if (g_st_ev_trace)
116 |         st_buffer_finalize(EV_TRACE);
117 |     if (g_st_model_stats)
118 |         st_buffer_finalize(MODEL_COL);
119 |     if (g_st_use_analysis_lps)
120 |         st_buffer_finalize(ANALYSIS_LP);
121 | 
122 | }
123 | 


--------------------------------------------------------------------------------
/core/instrumentation/st-instrumentation.h:
--------------------------------------------------------------------------------
  1 | #ifndef INC_st_instrumentation_h
  2 | #define	INC_st_instrumentation_h
  3 | 
  4 | /*
  5 |  * Header file for all of the ROSS instrumentation
  6 |  */
  7 | 
  8 | #include <ross.h>
  9 | #include <inttypes.h>
 10 | 
 11 | #define INST_MAX_LENGTH 4096
 12 | 
 13 | /* st-stats-buffer.c */
 14 | #define st_buffer_free_space(buf) (buf->size - buf->count)
 15 | #define st_buffer_write_ptr(buf) (buf->buffer + buf->write_pos)
 16 | #define st_buffer_read_ptr(buf) (buf->buffer + buf->read_pos)
 17 | 
 18 | typedef struct{
 19 |     char *buffer;
 20 |     int size;
 21 |     int write_pos;
 22 |     int read_pos;
 23 |     int count;
 24 | } st_stats_buffer;
 25 | 
 26 | extern char stats_directory[INST_MAX_LENGTH];
 27 | extern int g_st_buffer_size;
 28 | extern int g_st_buffer_free_percent;
 29 | extern FILE *seq_ev_trace, *seq_model, *seq_analysis;
 30 | 
 31 | void st_buffer_allocate();
 32 | void st_buffer_init(int type);
 33 | void st_buffer_push(int type, char *data, int size);
 34 | void st_buffer_write(int end_of_sim, int type);
 35 | void st_buffer_finalize(int type);
 36 | 
 37 | /* st-instrumentation.c */
 38 | typedef struct sample_metadata sample_metadata;
 39 | 
 40 | typedef enum{
 41 |     GVT_COL,
 42 |     RT_COL,
 43 |     ANALYSIS_LP,
 44 |     EV_TRACE,
 45 |     MODEL_COL,
 46 |     NUM_COL_TYPES
 47 | } collection_types;
 48 | 
 49 | typedef enum{
 50 |     PE_TYPE,
 51 |     KP_TYPE,
 52 |     LP_TYPE,
 53 |     MODEL_TYPE
 54 | } inst_data_types;
 55 | 
 56 | typedef enum {
 57 |     GRAN_PE,
 58 |     GRAN_KP,
 59 |     GRAN_LP,
 60 |     GRAN_ALL
 61 | } granularity_types;
 62 | 
 63 | struct sample_metadata
 64 | {
 65 |     int flag;
 66 |     int sample_sz;
 67 |     tw_stime ts;
 68 |     double real_time;
 69 | };
 70 | 
 71 | extern char g_st_stats_out[INST_MAX_LENGTH];
 72 | extern char g_st_stats_path[INST_MAX_LENGTH];
 73 | extern int g_st_pe_data;
 74 | extern int g_st_kp_data;
 75 | extern int g_st_lp_data;
 76 | extern int g_st_disable_out;
 77 | 
 78 | extern int g_st_model_stats;
 79 | extern int g_st_engine_stats;
 80 | 
 81 | extern int g_st_gvt_sampling;
 82 | extern int g_st_num_gvt;
 83 | 
 84 | extern int g_st_rt_sampling;
 85 | extern tw_clock g_st_rt_interval;
 86 | extern tw_clock g_st_rt_samp_start_cycles;
 87 | 
 88 | extern const tw_optdef *st_inst_opts();
 89 | extern void st_inst_init(void);
 90 | extern void st_inst_dump();
 91 | extern void st_inst_finalize(tw_pe *me);
 92 | 
 93 | /*
 94 |  * st-sim-engine.c
 95 |  * Simulation Engine related instrumentation
 96 |  */
 97 | typedef struct st_pe_stats st_pe_stats;
 98 | typedef struct st_kp_stats st_kp_stats;
 99 | typedef struct st_lp_stats st_lp_stats;
100 | 
101 | struct st_pe_stats{
102 |     unsigned int peid;
103 | 
104 |     unsigned int s_nevent_processed;
105 |     unsigned int s_nevent_abort;
106 |     unsigned int s_e_rbs;
107 |     unsigned int s_rb_total;
108 |     unsigned int s_rb_secondary;
109 |     unsigned int s_fc_attempts;
110 |     unsigned int s_pq_qsize;
111 |     unsigned int s_nsend_network;
112 |     unsigned int s_nread_network;
113 |     //unsigned int s_nsend_remote_rb;
114 |     //unsigned int s_nsend_loc_remote;
115 |     //unsigned int s_nsend_net_remote;
116 |     unsigned int s_ngvts;
117 |     unsigned int s_pe_event_ties;
118 |     unsigned int all_reduce_count;
119 |     float efficiency;
120 | 
121 |     float s_net_read;
122 |     float s_net_other;
123 |     float s_gvt;
124 |     float s_fossil_collect;
125 |     float s_event_abort;
126 |     float s_event_process;
127 |     float s_pq;
128 |     float s_rollback;
129 |     float s_cancel_q;
130 |     float s_avl;
131 |     float s_buddy;
132 |     float s_lz4;
133 | };
134 | 
135 | struct st_kp_stats{
136 |     unsigned int peid;
137 |     unsigned int kpid;
138 | 
139 |     unsigned int s_nevent_processed;
140 |     unsigned int s_nevent_abort;
141 |     unsigned int s_e_rbs;
142 |     unsigned int s_rb_total;
143 |     unsigned int s_rb_secondary;
144 |     unsigned int s_nsend_network;
145 |     unsigned int s_nread_network;
146 |     float time_ahead_gvt;
147 |     float efficiency;
148 | };
149 | 
150 | struct st_lp_stats{
151 |     unsigned int peid;
152 |     unsigned int kpid;
153 |     unsigned int lpid;
154 | 
155 |     unsigned int s_nevent_processed;
156 |     unsigned int s_nevent_abort;
157 |     unsigned int s_e_rbs;
158 |     unsigned int s_nsend_network;
159 |     unsigned int s_nread_network;
160 |     float efficiency;
161 | };
162 | 
163 | void st_collect_engine_data(tw_pe *me, int col_type);
164 | void st_collect_engine_data_pes(tw_pe *pe, sample_metadata *sample_md, tw_statistics *s, int col_type);
165 | void st_collect_engine_data_kps(tw_pe *me, tw_kp *kp, sample_metadata *sample_md, int col_type);
166 | void st_collect_engine_data_lps(tw_lp *lp, sample_metadata *sample_md, int col_type);
167 | 
168 | /*
169 |  * st-event-trace.c
170 |  */
171 | typedef enum{
172 |     NO_TRACE,
173 |     FULL_TRACE,
174 |     RB_TRACE,
175 |     COMMIT_TRACE
176 | } traces_enum;
177 | 
178 | typedef struct {
179 |     unsigned int src_lp;
180 |     unsigned int dest_lp;
181 |     float send_vts;
182 |     float recv_vts;
183 |     float real_ts;
184 |     unsigned int model_data_sz;
185 | } st_event_data;
186 | 
187 | // collect_flag allows for specific events to be turned on/off in tracing
188 | typedef void (*ev_trace_f) (void *msg, tw_lp *lp, char *buffer, int *collect_flag);
189 | 
190 | extern int g_st_ev_trace;
191 | 
192 | void st_collect_event_data(tw_event *cev, double recv_rt);
193 | 
194 | /*
195 |  * ross-lps/analysis-lp.c
196 |  */
197 | typedef void (*sample_event_f)(void *state, tw_bf *b, tw_lp *lp, void *sample);
198 | typedef void (*sample_revent_f)(void *state, tw_bf *b, tw_lp *lp, void *sample);
199 | extern void specialized_lp_setup();
200 | extern void specialized_lp_init_mapping();
201 | extern void specialized_lp_run();
202 | extern const tw_optdef *st_special_lp_opts(void);
203 | extern int g_st_use_analysis_lps;
204 | extern tw_lpid g_st_analysis_nlp;
205 | extern double g_st_vt_interval;
206 | extern double g_st_sampling_end;
207 | extern tw_lpid g_st_total_model_lps;
208 | extern int g_st_sample_count;
209 | 
210 | /*
211 |  * st-model-data.c
212 |  */
213 | // function to be implemented in LP for collection of model level stats
214 | typedef void (*model_stat_f) (void *sv, tw_lp *lp, char *buffer);
215 | typedef struct st_model_types st_model_types;
216 | 
217 | /*
218 |  * Struct to help ROSS collect model-level data
219 |  * */
220 | struct st_model_types {
221 |     ev_trace_f ev_trace;         /**< @brief function pointer to collect data about all events for given LP */
222 |     size_t ev_sz;                /**< @brief size of data collected from model for each event */
223 |     model_stat_f model_stat_fn;  /**< @brief function pointer to collect model level data for RT and GVT-based instrumentation */
224 |     size_t mstat_sz;             /**< @brief size of data collected from model at sampling points */
225 |     sample_event_f sample_event_fn;
226 |     sample_revent_f sample_revent_fn;
227 |     size_t sample_struct_sz;
228 | };
229 | 
230 | typedef enum{
231 |     NO_STATS,
232 |     GVT_STATS,
233 |     RT_STATS,
234 |     VT_STATS,
235 |     ALL_STATS
236 | } stats_types_enum;
237 | 
238 | typedef struct {
239 |     unsigned int peid;
240 |     unsigned int kpid;
241 |     unsigned int lpid;
242 |     float gvt;
243 |     int stats_type;
244 |     unsigned int model_sz;
245 | } model_metadata;
246 | 
247 | extern st_model_types *g_st_model_types;
248 | 
249 | void st_model_setup_types(tw_lp *lp);
250 | void st_model_settype(tw_lpid i, st_model_types *model_types);
251 | void st_collect_model_data(tw_pe *pe, double current_rt, int stats_type);
252 | 
253 | #endif
254 | 


--------------------------------------------------------------------------------
/core/instrumentation/st-model-data.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | st_model_types *g_st_model_types = NULL;
 4 | static int model_type_warned = 0;
 5 | 
 6 | 
 7 | // if model uses tw_lp_setup_types() to set lp->type, it will also call
 8 | // this function to set up the functions types for model-level data collection
 9 | // because this can make use of the already defined type mapping
10 | void st_model_setup_types(tw_lp *lp)
11 | {
12 |     if (g_st_model_types)
13 |         lp->model_types = &g_st_model_types[g_tw_lp_typemap(lp->gid)];
14 |     else if (!model_type_warned && g_tw_mynode == g_tw_masternode)
15 |     {
16 |         fprintf(stderr, "WARNING: node: %ld: %s:%i: ", g_tw_mynode, __FILE__, __LINE__);
17 |         fprintf(stderr, "The g_st_model_types has not been defined! No model level data will be collected\n");
18 |         model_type_warned = 1;
19 |     }
20 | 
21 | }
22 | 
23 | // if model uses tw_lp_settypes(), model will also need to call
24 | // this function to set up function types for model-level data collection
25 | void st_model_settype(tw_lpid i, st_model_types *model_types)
26 | {
27 |     if (model_types)
28 |     {
29 |         tw_lp *lp = g_tw_lp[i];
30 |         lp->model_types = model_types;
31 |     }
32 |     else if (!model_type_warned && g_tw_mynode == g_tw_masternode)
33 |     {
34 |         fprintf(stderr, "WARNING: node: %ld: %s:%i: ", g_tw_mynode, __FILE__, __LINE__);
35 |         fprintf(stderr, "The struct st_model_types has not been defined for at least 1 LP type! No model level data will be collected for LP types without a valid st_model_types struct defined.\n");
36 |         model_type_warned = 1;
37 |     }
38 | }
39 | 
40 | /*
41 |  * This function allows for ROSS to collect model level data, when not using Analysis LPs.
42 |  * Call this function when collecting simulation level data (GVT-based and/or real time-based).
43 |  * Loop through all LPs on this PE and collect stats
44 |  */
45 | void st_collect_model_data(tw_pe *pe, double current_rt, int stats_type)
46 | {
47 |     tw_clock start_cycle_time = tw_clock_read();
48 |     int index;
49 |     tw_lpid lpid = 0;
50 |     int total_sz = 0;
51 |     tw_lp *clp;
52 |     sample_metadata sample_md;
53 |     model_metadata model_md;
54 |     sample_md.flag = MODEL_TYPE;
55 |     sample_md.sample_sz = sizeof(model_md);
56 |     sample_md.real_time = current_rt;
57 |     model_md.peid = (unsigned int) g_tw_mynode;
58 | #ifdef USE_RAND_TIEBREAKER
59 |     model_md.gvt = (float) TW_STIME_DBL(pe->GVT_sig.recv_ts);
60 | #else
61 |     model_md.gvt = (float) TW_STIME_DBL(pe->GVT);
62 | #endif
63 |     model_md.stats_type = stats_type;
64 | 
65 |     for (lpid = 0; lpid < g_tw_nlp; lpid++)
66 |     {
67 |         index = 0;
68 |         clp = g_tw_lp[lpid];
69 |         if (!clp->model_types || !clp->model_types->model_stat_fn)
70 |         {
71 |             // may not want to collect model stats on every LP type, so if not defined, just continue
72 |             continue;
73 |         }
74 | 
75 |         sample_md.ts = tw_now(clp);
76 |         model_md.kpid = (unsigned int) clp->kp->id;
77 |         model_md.lpid = (unsigned int) clp->gid;
78 |         model_md.model_sz = (unsigned int) clp->model_types->mstat_sz;
79 |         total_sz = sizeof(sample_md) + sizeof(model_md) + model_md.model_sz;
80 |         char buffer[total_sz];
81 |         memcpy(&buffer[0], &sample_md, sizeof(sample_md));
82 |         index += sizeof(sample_md);
83 |         memcpy(&buffer[index], &model_md, sizeof(model_md));
84 |         index += sizeof(model_md);
85 | 
86 |         if (model_md.model_sz > 0)
87 |         {
88 |             (*clp->model_types->model_stat_fn)(clp->cur_state, clp, &buffer[index]);
89 | 
90 |             if (g_tw_synchronization_protocol != SEQUENTIAL)
91 |                 st_buffer_push(MODEL_COL, &buffer[0], total_sz);
92 |             else if (g_tw_synchronization_protocol == SEQUENTIAL && !g_st_disable_out)
93 |                 fwrite(buffer, total_sz, 1, seq_model);
94 |         }
95 |     }
96 |     pe->stats.s_stat_comp += tw_clock_read() - start_cycle_time;
97 | }
98 | 


--------------------------------------------------------------------------------
/core/instrumentation/st-sim-engine.c:
--------------------------------------------------------------------------------
  1 | #include <ross.h>
  2 | #include <sys/stat.h>
  3 | #define __STDC_FORMAT_MACROS 1
  4 | 
  5 | long g_st_current_interval = 0;
  6 | static tw_statistics last_pe_stats[3];
  7 | static tw_stat last_all_reduce_cnt = 0;
  8 | 
  9 | /* wrapper to call gvt instrumentation functions depending on which granularity to use */
 10 | void st_collect_engine_data(tw_pe *pe, int col_type)
 11 | {
 12 |     tw_clock start_time = tw_clock_read();
 13 |     tw_kp *kp;
 14 |     tw_lp *lp;
 15 |     unsigned int i;
 16 |     tw_statistics s;
 17 |     bzero(&s, sizeof(s));
 18 |     tw_get_stats(pe, &s);
 19 | 
 20 |     sample_metadata sample_md;
 21 | #ifdef USE_RAND_TIEBREAKER
 22 |     sample_md.ts = pe->GVT_sig.recv_ts;
 23 | #else
 24 |     sample_md.ts = pe->GVT;
 25 | #endif
 26 |     sample_md.real_time = (double)tw_clock_read() / g_tw_clock_rate;
 27 | 
 28 |     if (g_st_pe_data)
 29 |         st_collect_engine_data_pes(pe, &sample_md, &s, col_type);
 30 |     if (g_st_kp_data)
 31 |     {
 32 |         for (i = 0; i < g_tw_nkp; i++)
 33 |         {
 34 |             kp = tw_getkp(i);
 35 |             st_collect_engine_data_kps(pe, kp, &sample_md, col_type);
 36 |         }
 37 |     }
 38 |     if (g_st_lp_data)
 39 |     {
 40 |         for (i = 0; i < g_tw_nlp; i++)
 41 |         {
 42 |             lp = tw_getlp(i);
 43 |             st_collect_engine_data_lps(lp, &sample_md, col_type);
 44 |         }
 45 |     }
 46 |     pe->stats.s_stat_comp += tw_clock_read() - start_time;
 47 | }
 48 | 
 49 | void st_collect_engine_data_pes(tw_pe *pe, sample_metadata *sample_md, tw_statistics *s, int col_type)
 50 | {
 51 |     st_pe_stats pe_stats;
 52 |     int buf_size = sizeof(*sample_md) + sizeof(pe_stats);
 53 |     char buffer[buf_size];
 54 |     tw_stat all_reduce_cnt = st_get_allreduce_count();
 55 | 
 56 |     // sample_md time stamps were set in the calling function
 57 |     sample_md->flag = PE_TYPE;
 58 |     sample_md->sample_sz = sizeof(pe_stats);
 59 | 
 60 |     pe_stats.peid = (unsigned int) g_tw_mynode;
 61 |     pe_stats.s_nevent_processed = (unsigned int)( s->s_nevent_processed-last_pe_stats[col_type].s_nevent_processed);
 62 |     pe_stats.s_nevent_abort = (unsigned int)(s->s_nevent_abort-last_pe_stats[col_type].s_nevent_abort);
 63 |     pe_stats.s_e_rbs = (unsigned int)(s->s_e_rbs-last_pe_stats[col_type].s_e_rbs);
 64 |     pe_stats.s_rb_total = (unsigned int)( s->s_rb_total-last_pe_stats[col_type].s_rb_total);
 65 |     pe_stats.s_rb_secondary = (unsigned int)(s->s_rb_secondary-last_pe_stats[col_type].s_rb_secondary);
 66 |     pe_stats.s_fc_attempts = (unsigned int)(s->s_fc_attempts-last_pe_stats[col_type].s_fc_attempts);
 67 |     pe_stats.s_pq_qsize = tw_pq_get_size(pe->pq);
 68 |     pe_stats.s_nsend_network = (unsigned int)(s->s_nsend_network-last_pe_stats[col_type].s_nsend_network);
 69 |     pe_stats.s_nread_network = (unsigned int)(s->s_nread_network-last_pe_stats[col_type].s_nread_network);
 70 |     pe_stats.s_pe_event_ties = (unsigned int)(s->s_pe_event_ties-last_pe_stats[col_type].s_pe_event_ties);
 71 |     pe_stats.s_ngvts = (unsigned int)(g_tw_gvt_done - last_pe_stats[col_type].s_ngvts);
 72 |     pe_stats.all_reduce_count = (unsigned int)(all_reduce_cnt-last_all_reduce_cnt);
 73 | 
 74 |     // I think it's possible for net_events to be negative over some interval of simulation time
 75 |     // e.g., if in the current interval we've happened to process more rollback events than forward events
 76 |     // for now, just report efficiency as 0 in this case?
 77 |     int net_events = pe_stats.s_nevent_processed - pe_stats.s_e_rbs;
 78 |     if (net_events > 0)
 79 |         pe_stats.efficiency = (float) 100.0 * (1.0 - ((float) pe_stats.s_e_rbs / (float) net_events));
 80 |     else
 81 |         pe_stats.efficiency = 0;
 82 | 
 83 |     // TODO set a starting clock rate and subtract that from the counters?
 84 |     // because PEs on different nodes will probably have different starting points for cycle counters
 85 |     pe_stats.s_net_read = (float)(pe->stats.s_net_read - last_pe_stats[col_type].s_net_read) / g_tw_clock_rate;
 86 |     pe_stats.s_net_other = (float)(pe->stats.s_net_other - last_pe_stats[col_type].s_net_other) / g_tw_clock_rate;
 87 |     pe_stats.s_gvt = (float)(pe->stats.s_gvt - last_pe_stats[col_type].s_gvt) / g_tw_clock_rate;
 88 |     pe_stats.s_fossil_collect = (float)(pe->stats.s_fossil_collect - last_pe_stats[col_type].s_fossil_collect) / g_tw_clock_rate;
 89 |     pe_stats.s_event_abort = (float)(pe->stats.s_event_abort - last_pe_stats[col_type].s_event_abort) / g_tw_clock_rate;
 90 |     pe_stats.s_event_process = (float)(pe->stats.s_event_process - last_pe_stats[col_type].s_event_process) / g_tw_clock_rate;
 91 |     pe_stats.s_pq = (float)(pe->stats.s_pq - last_pe_stats[col_type].s_pq) / g_tw_clock_rate;
 92 |     pe_stats.s_rollback = (float)(pe->stats.s_rollback - last_pe_stats[col_type].s_rollback) / g_tw_clock_rate;
 93 |     pe_stats.s_cancel_q = (float)(pe->stats.s_cancel_q - last_pe_stats[col_type].s_cancel_q) / g_tw_clock_rate;
 94 |     pe_stats.s_avl = (float)(pe->stats.s_avl - last_pe_stats[col_type].s_avl) / g_tw_clock_rate;
 95 |     pe_stats.s_buddy = (float)(pe->stats.s_buddy - last_pe_stats[col_type].s_buddy) / g_tw_clock_rate;
 96 |     pe_stats.s_lz4 = (float)(pe->stats.s_lz4 - last_pe_stats[col_type].s_lz4) / g_tw_clock_rate;
 97 | 
 98 |     memcpy(&buffer[0], sample_md, sizeof(*sample_md));
 99 |     memcpy(&buffer[sizeof(*sample_md)], &pe_stats, sizeof(pe_stats));
100 |     st_buffer_push(col_type, &buffer[0], buf_size);
101 | 
102 |     memcpy(&last_pe_stats[col_type], s, sizeof(tw_statistics));
103 |     last_all_reduce_cnt = all_reduce_cnt;
104 | }
105 | 
106 | void st_collect_engine_data_kps(tw_pe *pe, tw_kp *kp, sample_metadata *sample_md, int col_type)
107 | {
108 |     st_kp_stats kp_stats;
109 |     int buf_size = sizeof(*sample_md) + sizeof(kp_stats);
110 |     char buffer[buf_size];
111 |     int index = 0;
112 | 
113 |     // sample_md time stamps were set in the calling function
114 |     sample_md->flag = KP_TYPE;
115 |     sample_md->sample_sz = sizeof(kp_stats);
116 | 
117 |     kp_stats.peid = (unsigned int) g_tw_mynode;
118 | 
119 |     kp_stats.kpid = kp->id;
120 | 
121 |     kp_stats.s_nevent_processed = (unsigned int)(kp->kp_stats->s_nevent_processed - kp->last_stats[col_type]->s_nevent_processed);
122 |     kp_stats.s_nevent_abort = (unsigned int)(kp->kp_stats->s_nevent_abort - kp->last_stats[col_type]->s_nevent_abort);
123 |     kp_stats.s_e_rbs = (unsigned int)(kp->kp_stats->s_e_rbs - kp->last_stats[col_type]->s_e_rbs);
124 |     kp_stats.s_rb_total = (unsigned int)(kp->kp_stats->s_rb_total - kp->last_stats[col_type]->s_rb_total);
125 |     kp_stats.s_rb_secondary = (unsigned int)(kp->kp_stats->s_rb_secondary - kp->last_stats[col_type]->s_rb_secondary);
126 |     kp_stats.s_nsend_network = (unsigned int)(kp->kp_stats->s_nsend_network - kp->last_stats[col_type]->s_nsend_network);
127 |     kp_stats.s_nread_network = (unsigned int)(kp->kp_stats->s_nread_network - kp->last_stats[col_type]->s_nread_network);
128 | #ifdef USE_RAND_TIEBREAKER
129 |     kp_stats.time_ahead_gvt = (float)(TW_STIME_DBL(kp->last_sig.recv_ts) - TW_STIME_DBL(pe->GVT_sig.recv_ts));
130 | #else
131 |     kp_stats.time_ahead_gvt = (float)(TW_STIME_DBL(kp->last_time) - TW_STIME_DBL(pe->GVT));
132 | #endif
133 | 
134 |     int net_events = kp_stats.s_nevent_processed - kp_stats.s_e_rbs;
135 |     if (net_events > 0)
136 |         kp_stats.efficiency = (float) 100.0 * (1.0 - ((float) kp_stats.s_e_rbs / (float) net_events));
137 |     else
138 |         kp_stats.efficiency = 0;
139 | 
140 |     memcpy(kp->last_stats[col_type], kp->kp_stats, sizeof(st_kp_stats));
141 | 
142 |     memcpy(&buffer[index], sample_md, sizeof(*sample_md));
143 |     index += sizeof(*sample_md);
144 |     memcpy(&buffer[index], &kp_stats, sizeof(kp_stats));
145 |     index += sizeof(kp_stats);
146 | 
147 |     if (index != buf_size)
148 |         tw_error(TW_LOC, "size of data being pushed to buffer is incorrect!\n");
149 | 
150 |     st_buffer_push(col_type, &buffer[0], buf_size);
151 | }
152 | 
153 | void st_collect_engine_data_lps(tw_lp *lp, sample_metadata *sample_md, int col_type)
154 | {
155 |     st_lp_stats lp_stats;
156 |     int buf_size = sizeof(*sample_md) + sizeof(lp_stats);
157 |     char buffer[buf_size];
158 |     int index = 0;
159 | 
160 |     // sample_md time stamps were set in the calling function
161 |     sample_md->flag = LP_TYPE;
162 |     sample_md->sample_sz = sizeof(lp_stats);
163 | 
164 |     lp_stats.peid = (unsigned int) g_tw_mynode;
165 | 
166 |     lp_stats.kpid = lp->kp->id;
167 |     lp_stats.lpid = lp->gid;
168 | 
169 |     lp_stats.s_nevent_processed = (unsigned int)(lp->lp_stats->s_nevent_processed - lp->last_stats[col_type]->s_nevent_processed);
170 |     lp_stats.s_nevent_abort = (unsigned int)(lp->lp_stats->s_nevent_abort - lp->last_stats[col_type]->s_nevent_abort);
171 |     lp_stats.s_e_rbs = (unsigned int)(lp->lp_stats->s_e_rbs - lp->last_stats[col_type]->s_e_rbs);
172 |     lp_stats.s_nsend_network = (unsigned int)(lp->lp_stats->s_nsend_network - lp->last_stats[col_type]->s_nsend_network);
173 |     lp_stats.s_nread_network = (unsigned int)(lp->lp_stats->s_nread_network - lp->last_stats[col_type]->s_nread_network);
174 | 
175 |     int net_events = lp_stats.s_nevent_processed - lp_stats.s_e_rbs;
176 |     if (net_events > 0)
177 |         lp_stats.efficiency = (float) 100.0 * (1.0 - ((float) lp_stats.s_e_rbs / (float) net_events));
178 |     else
179 |         lp_stats.efficiency = 0;
180 | 
181 |     memcpy(lp->last_stats[col_type], lp->lp_stats, sizeof(st_lp_stats));
182 | 
183 |     memcpy(&buffer[index], sample_md, sizeof(*sample_md));
184 |     index += sizeof(*sample_md);
185 |     memcpy(&buffer[index], &lp_stats, sizeof(lp_stats));
186 |     index += sizeof(lp_stats);
187 | 
188 |     if (index != buf_size)
189 |         tw_error(TW_LOC, "size of data being pushed to buffer is incorrect!\n");
190 | 
191 |     st_buffer_push(col_type, &buffer[0], buf_size);
192 | }
193 | 


--------------------------------------------------------------------------------
/core/instrumentation/st-stats-buffer.c:
--------------------------------------------------------------------------------
  1 | #include <ross.h>
  2 | #include <time.h>
  3 | #include <sys/stat.h>
  4 | 
  5 | static long missed_bytes = 0;
  6 | static MPI_Offset *prev_offsets = NULL;
  7 | static MPI_File *buffer_fh = NULL;
  8 | char stats_directory[INST_MAX_LENGTH];
  9 | int g_st_buffer_size = 8000000;
 10 | int g_st_buffer_free_percent = 15;
 11 | static int buffer_overflow_warned = 0;
 12 | static const char *file_suffix[NUM_COL_TYPES];
 13 | FILE *seq_ev_trace, *seq_model, *seq_analysis;
 14 | static st_stats_buffer **g_st_buffer;
 15 | 
 16 | void st_buffer_allocate()
 17 | {
 18 |     if (!(g_st_engine_stats || g_st_model_stats || g_st_ev_trace || g_st_use_analysis_lps))
 19 |         return;
 20 | 
 21 |     int i, rc;
 22 | 
 23 |     // setup directory for instrumentation output
 24 |     if (g_tw_mynode == g_tw_masternode)
 25 |     {
 26 |         if (!g_st_stats_path[0])
 27 |             sprintf(g_st_stats_path, "stats-output");
 28 |         rc = mkdir(g_st_stats_path, S_IRUSR | S_IWUSR | S_IXUSR);
 29 |         if (rc == -1)
 30 |         {
 31 | 	  // this check gets rid of the GCC warning about trunciated string inputs
 32 | 	  if( snprintf(stats_directory, sizeof(stats_directory), "%s-%ld-%ld", g_st_stats_path, (long)getpid(), (long)time(NULL)) ==
 33 | 	      sizeof(stats_directory)  )
 34 | 	    {
 35 | 	      printf("Error in st_buffer_allocate: stats_directory name lacked sufficient space and was truncaited\n");
 36 | 	      exit(-1);
 37 | 	    }
 38 | 	  mkdir(stats_directory, S_IRUSR | S_IWUSR | S_IXUSR);
 39 |         }
 40 |         else
 41 |             sprintf(stats_directory, "%s", g_st_stats_path);
 42 |     }
 43 | 
 44 |     // make sure everyone has the directory name
 45 |     MPI_Bcast(stats_directory, INST_MAX_LENGTH, MPI_CHAR, g_tw_masternode, MPI_COMM_ROSS);
 46 | 
 47 |     // allocate buffer pointers
 48 |     g_st_buffer = (st_stats_buffer**) tw_calloc(TW_LOC, "instrumentation (buffer)", sizeof(st_stats_buffer*), NUM_COL_TYPES);
 49 | 
 50 |     // setup MPI file offsets
 51 |     if (!prev_offsets)
 52 |     {
 53 |         prev_offsets = (MPI_Offset*) tw_calloc(TW_LOC, "statistics collection (buffer)", sizeof(MPI_Offset), NUM_COL_TYPES);
 54 |         for (i = 0; i < NUM_COL_TYPES; i++)
 55 |             prev_offsets[i] = 0;
 56 |     }
 57 | 
 58 |     // set up file handlers
 59 |     if (!buffer_fh)
 60 |         buffer_fh = (MPI_File*) tw_calloc(TW_LOC, "statistics collection (buffer)", sizeof(MPI_File), NUM_COL_TYPES);
 61 | 
 62 | }
 63 | 
 64 | /* initialize circular buffer for stats collection
 65 |  * basically the read position marks the beginning of used space in the buffer
 66 |  * while the write postion marks the end of used space in the buffer
 67 |  */
 68 | void st_buffer_init(int type)
 69 | {
 70 |     char filename[INST_MAX_LENGTH];
 71 |     file_suffix[0] = "gvt";
 72 |     file_suffix[1] = "rt";
 73 |     file_suffix[2] = "analysis-lps";
 74 |     file_suffix[3] = "evtrace";
 75 |     file_suffix[4] = "model";
 76 | 
 77 |     g_st_buffer[type] = (st_stats_buffer*) tw_calloc(TW_LOC, "statistics collection (buffer)", sizeof(st_stats_buffer), 1);
 78 |     g_st_buffer[type]->size  = g_st_buffer_size;
 79 |     g_st_buffer[type]->write_pos = 0;
 80 |     g_st_buffer[type]->read_pos = 0;
 81 |     g_st_buffer[type]->count = 0;
 82 |     g_st_buffer[type]->buffer = (char*) tw_calloc(TW_LOC, "statistics collection (buffer)", 1, g_st_buffer[type]->size);
 83 | 
 84 |     // set up MPI File
 85 |     if (!g_st_disable_out)
 86 |     {
 87 |         if (!g_st_stats_out[0])
 88 |             sprintf(g_st_stats_out, "ross-stats");
 89 | 	// this check gets rid of the GCC warning about trunciated string inputs
 90 |         if( snprintf(filename, sizeof(filename), "%s/%s-%s.bin", stats_directory, g_st_stats_out, file_suffix[type]) ==
 91 | 	    sizeof(filename))
 92 | 	  {
 93 | 	    printf("Error in st_buffer_init: filename lacked sufficient space and was truncaited\n");
 94 | 	    exit(-1);
 95 | 	  }
 96 |         if (g_tw_synchronization_protocol != SEQUENTIAL)
 97 |             MPI_File_open(MPI_COMM_ROSS, filename, MPI_MODE_CREATE | MPI_MODE_EXCL | MPI_MODE_WRONLY, MPI_INFO_NULL, &buffer_fh[type]);
 98 |         else if (strcmp(file_suffix[type], "evtrace") == 0 && g_tw_synchronization_protocol == SEQUENTIAL)
 99 |             seq_ev_trace = fopen(filename, "w");
100 |         else if (strcmp(file_suffix[type], "model") == 0 && g_tw_synchronization_protocol == SEQUENTIAL)
101 |             seq_model = fopen(filename, "w");
102 |         else if (type == ANALYSIS_LP && g_tw_synchronization_protocol == SEQUENTIAL)
103 |             seq_analysis = fopen(filename, "w");
104 | 
105 |     }
106 | }
107 | 
108 | /* write stats to buffer
109 |  * currently does not overwrite in cases of overflow, just records the amount of overflow in bytes
110 |  * for later reporting
111 |  */
112 | void st_buffer_push(int type, char *data, int size)
113 | {
114 |     int size1, size2;
115 |     if (!g_st_disable_out && st_buffer_free_space(g_st_buffer[type]) < size)
116 |     {
117 |         if (!buffer_overflow_warned)
118 |         {
119 |             printf("WARNING: Stats buffer overflow on rank %lu\n", g_tw_mynode);
120 |             buffer_overflow_warned = 1;
121 |             printf("tw_now() = %f\n", TW_STIME_DBL(tw_now(g_tw_lp[0])));
122 |         }
123 |         missed_bytes += size;
124 |         size = 0; // if we can't push it all, don't push anything to buffer
125 |     }
126 | 
127 |     if (size)
128 |     {
129 |         if ((size1 = g_st_buffer[type]->size - g_st_buffer[type]->write_pos) >= size)
130 |         {
131 |             // can use only one memcpy here
132 |             memcpy(st_buffer_write_ptr(g_st_buffer[type]), data, size);
133 |             g_st_buffer[type]->write_pos += size;
134 |         }
135 |         else // data to be stored wraps around end of physical array
136 |         {
137 |             size2 = size - size1;
138 |             memcpy(st_buffer_write_ptr(g_st_buffer[type]), data, size1);
139 |             memcpy(g_st_buffer[type]->buffer, data + size1, size2);
140 |             g_st_buffer[type]->write_pos = size2;
141 |         }
142 |     }
143 |     g_st_buffer[type]->count += size;
144 |     //printf("PE %ld wrote %d bytes to buffer; %d bytes of free space left\n", g_tw_mynode, size, st_buffer_free_space(g_st_buffer[type]));
145 | }
146 | 
147 | /* determine whether to dump buffer to file
148 |  * should only be called at GVT! */
149 | void st_buffer_write(int end_of_sim, int type)
150 | {
151 |     MPI_Offset offset = prev_offsets[type];
152 |     MPI_File *fh = &buffer_fh[type];
153 |     int write_to_file = 0;
154 |     int my_write_size = 0;
155 |     unsigned int i;
156 |     int write_sizes[tw_nnodes()];
157 |     tw_clock start_cycle_time = tw_clock_read();
158 | 
159 |     my_write_size = g_st_buffer[type]->count;
160 | 
161 |     MPI_Allgather(&my_write_size, 1, MPI_INT, &write_sizes[0], 1, MPI_INT, MPI_COMM_ROSS);
162 |     if (end_of_sim)
163 |         write_to_file = 1;
164 |     else
165 |     {
166 |         for (i = 0; i < tw_nnodes(); i++)
167 |         {
168 |             if ((double) write_sizes[i] / g_st_buffer_size >= g_st_buffer_free_percent / 100.0)
169 |                 write_to_file = 1;
170 |         }
171 |     }
172 | 
173 |     if (write_to_file)
174 |     {
175 |         for (i = 0; i < tw_nnodes(); i++)
176 |         {
177 |             if (i < g_tw_mynode)
178 |                 offset += write_sizes[i];
179 |             prev_offsets[type] += write_sizes[i];
180 |         }
181 |         //printf("rank %ld writing %d bytes at offset %lld (prev_offsets[ANALYSIS_LP] = %lld)\n", g_tw_mynode, my_write_size, offset, prev_offsets[type]);
182 |         // dump buffer to file
183 |         MPI_Status status;
184 |         g_tw_pe->stats.s_stat_comp += tw_clock_read() - start_cycle_time;
185 |         start_cycle_time = tw_clock_read();
186 |         MPI_File_write_at_all(*fh, offset, st_buffer_read_ptr(g_st_buffer[type]), my_write_size, MPI_BYTE, &status);
187 |         g_tw_pe->stats.s_stat_write += tw_clock_read() - start_cycle_time;
188 | 
189 |         // reset the buffer
190 |         g_st_buffer[type]->write_pos = 0;
191 |         g_st_buffer[type]->read_pos = 0;
192 |         g_st_buffer[type]->count = 0;
193 |         buffer_overflow_warned = 0;
194 |     }
195 |     else
196 |         g_tw_pe->stats.s_stat_comp += tw_clock_read() - start_cycle_time;
197 | }
198 | 
199 | /* make sure we write out any remaining buffer data */
200 | void st_buffer_finalize(int type)
201 | {
202 |     // check if any data needs to be written out
203 |     if (!g_st_disable_out)
204 |         st_buffer_write(1, type);
205 | 
206 |     printf("PE %ld: There were %ld bytes of data missed because of buffer overflow\n", g_tw_mynode, missed_bytes);
207 | 
208 |     MPI_File_close(&buffer_fh[type]);
209 | 
210 | }
211 | 


--------------------------------------------------------------------------------
/core/network-mpi.h:
--------------------------------------------------------------------------------
  1 | #ifndef INC_network_mpi_h
  2 | #define INC_network_mpi_h
  3 | 
  4 | typedef long tw_node;
  5 | 
  6 | extern MPI_Comm MPI_COMM_ROSS;
  7 | 
  8 | /**
  9 |  * @brief Initalize the network library and parse options.
 10 |  *
 11 |  * argc and argv are pointers to the original command line; the
 12 |  * network library may edit these before the option parser sees
 13 |  * them allowing for network implementation specific argument
 14 |  * handling to occur.
 15 |  *
 16 |  * It's possible for a model to init MPI itself, as this
 17 |  * function will first check if MPI is already initialized before
 18 |  * attempting to call MPI_Init().
 19 |  *
 20 |  * This function also sets the global variables
 21 |  * g_tw_masternode and g_tw_mynode.
 22 |  *
 23 |  * @param[in] argc Pointer to command line arg count
 24 |  * @param[in] argv Pointer to command line args
 25 |  * @return tw_optdef array to be included in overall process
 26 |  * command line argument display and parsing; NULL may be returned
 27 |  * to indicate the implementation has no options it wants included.
 28 |  */
 29 | const tw_optdef *tw_net_init(int *argc, char ***argv);
 30 | 
 31 | /**
 32 |  * @brief Setup the MPI_COMM_ROSS communicator to use instead of MPI_COMM_WORLD.
 33 |  *
 34 |  * This function should be called before tw_net_init.
 35 |  * @param[in] comm Custom MPI communicator for setting MPI_COMM_ROSS
 36 |  */
 37 | void tw_comm_set(MPI_Comm comm);
 38 | 
 39 | /**
 40 |  * @brief Starts the network library after option parsing.
 41 |  *
 42 |  * Makes calls to initialize the PE (g_tw_pe), create the hash/AVL tree
 43 |  * (for optimistic modes), and queues for posted sends/recvs.
 44 |  * Also pre-posts MPI Irecvs operations.
 45 |  */
 46 | void tw_net_start(void);
 47 | 
 48 | /**
 49 |  * @brief Stops the network library after simulation end.
 50 |  *
 51 |  * Checks to see if custom communicator was used. If not, finalizes MPI.
 52 |  * Otherwise, the application is expected to finalize MPI itself.
 53 |  */
 54 | void tw_net_stop(void);
 55 | 
 56 | /** Aborts the entire simulation when a grave error is found. */
 57 | void tw_net_abort(void) NORETURN;
 58 | 
 59 | /**
 60 |  * @brief starts service_queues() to poll network
 61 |  *
 62 |  * @param[in] me pointer to the PE
 63 |  */
 64 | extern void tw_net_read(tw_pe *);
 65 | 
 66 | /**
 67 |  * @brief Adds the event to the outgoing queue of events to be sent,
 68 |  * polls for finished sends, and attempts to start sends from outq.
 69 |  *
 70 |  * @param[in] e remote event to be sent
 71 |  */
 72 | extern void tw_net_send(tw_event *);
 73 | 
 74 | /**
 75 |  * @brief Cancel the given remote event by either removing from the outq
 76 |  * or sending an antimessage, depending on the status of the original positive send.
 77 |  *
 78 |  * @param[in] e remote event to be canceled
 79 |  */
 80 | extern void tw_net_cancel(tw_event *);
 81 | 
 82 | /** Obtain the total number of PEs executing the simulation.
 83 |  *
 84 |  * @return number of ROSS PEs/MPI world size
 85 |  */
 86 | extern unsigned tw_nnodes(void);
 87 | 
 88 | /** Block until all nodes call the barrier. */
 89 | extern void tw_net_barrier(void);
 90 | 
 91 | /**
 92 |  * @brief Obtain the lowest timestamp inside the network buffers.
 93 |  *
 94 |  * @return minimum timestamp for this PE's network buffers
 95 |  */
 96 | extern tw_stime tw_net_minimum(void);
 97 | 
 98 | #ifdef USE_RAND_TIEBREAKER
 99 | /**
100 |  * @brief Obtain the event signature for the lowest ordered event inside the network buffers.
101 |  *
102 |  * @return minimum event signature for this PE's network buffers
103 |  */
104 | extern tw_event_sig tw_net_minimum_sig(void);
105 | #endif
106 | 
107 | /**
108 |  * @brief Function to reduce all the statistics for output.
109 |  * @attention Notice that the MPI_Reduce "count" parameter is greater than one.
110 |  * We are reducing on multiple variables *simultaneously* so if you change
111 |  * this function or the struct tw_statistics, you must update the other.
112 |  **/
113 | extern tw_statistics *tw_net_statistics(tw_pe *, tw_statistics *);
114 | 
115 | #endif
116 | 


--------------------------------------------------------------------------------
/core/queue/heap.c.old:
--------------------------------------------------------------------------------
  1 | /**********************************************************************
  2 |  * Additional Contributions and Acknowledgements
  3 |  *   Kalyan Perumalla - Ga Tech
  4 |  *
  5 |  *   This implementation is an adaption of the implementation done
  6 |  *   by Kalyan for Ga Tech Time Warp
  7 |  **********************************************************************/
  8 | 
  9 | #include <ross.h>
 10 | 
 11 | typedef tw_event *ELEMENT_TYPE;
 12 | typedef double KEY_TYPE;
 13 | #define KEY(e) (e->recv_ts)
 14 | 
 15 | struct tw_pq
 16 | {
 17 |   unsigned long nelems;
 18 |   unsigned long curr_max;
 19 |   ELEMENT_TYPE *elems; /* Array [0..curr_max] of ELEMENT_TYPE */
 20 | };
 21 | 
 22 | #define SWAP(heap,x,y,t) { \
 23 |     t = heap->elems[x]; \
 24 |     heap->elems[x] = heap->elems[y]; \
 25 |     heap->elems[y] = t; \
 26 |     heap->elems[x]->heap_index = x; \
 27 |     heap->elems[y]->heap_index = y; \
 28 |     }
 29 | 
 30 | /*---------------------------------------------------------------------------*/
 31 | void*
 32 | tw_unsafe_realloc(
 33 |   const char *file,
 34 |   int line,
 35 |   const char *for_who,
 36 |   void *addr,
 37 |   size_t len)
 38 | {
 39 |   malloc_calls++;
 40 |   total_allocated += len;
 41 |   addr = realloc(addr, len);
 42 |   if (!addr)
 43 |     tw_error(
 44 |       file, line,
 45 |       "Cannot allocate %lu bytes for %s",
 46 |       (unsigned long)len,
 47 |       for_who);
 48 |   return addr;
 49 | }
 50 | 
 51 | /*---------------------------------------------------------------------------*/
 52 | static inline ELEMENT_TYPE HeapPeekTop( tw_pq *h )
 53 | {
 54 |   return (h->nelems <= 0) ? 0 : h->elems[0];
 55 | }
 56 | 
 57 | /*---------------------------------------------------------------------------*/
 58 | static void sift_down( tw_pq *h, int i )
 59 | {
 60 |   int n = h->nelems, k = i, j, c1, c2;
 61 |   ELEMENT_TYPE temp;
 62 | 
 63 |   if( n <= 1 ) return;
 64 | 
 65 |   /* Stops when neither child is "strictly less than" parent */
 66 |   do{
 67 |     j = k;
 68 |     c1 = c2 = 2*k+1;
 69 |     c2++;
 70 |     if( c1 < n && KEY(h->elems[c1]) < KEY(h->elems[k]) ) k = c1;
 71 |     if( c2 < n && KEY(h->elems[c2]) < KEY(h->elems[k]) ) k = c2;
 72 |     SWAP( h, j, k, temp );
 73 |   }while( j != k );
 74 | }
 75 | 
 76 | /*---------------------------------------------------------------------------*/
 77 | static void percolate_up( tw_pq *h, int i )
 78 | {
 79 |   int n = h->nelems, k = i, j, p;
 80 |   ELEMENT_TYPE temp;
 81 | 
 82 |   if( n <= 1 ) return;
 83 | 
 84 |   /* Stops when parent is "less than or equal to" child */
 85 |   do
 86 |     {
 87 |       j = k;
 88 |       if( (p = (k+1)/2) )
 89 | 	{
 90 | 	  --p;
 91 | 	  if( KEY(h->elems[k]) < KEY(h->elems[p]) ) k = p;
 92 | 	}
 93 |       SWAP( h, j, k, temp );
 94 |     }while( j != k );
 95 | }
 96 | 
 97 | /*---------------------------------------------------------------------------*/
 98 | void tw_pq_enqueue(tw_pq *h, ELEMENT_TYPE e )
 99 | {
100 |   if( h->nelems >= h->curr_max )
101 |     {
102 | 	  const unsigned int i = 50000;
103 | 	  const unsigned int u = h->curr_max;
104 |       h->curr_max += i;
105 |       h->elems = tw_unsafe_realloc(
106 | 		TW_LOC,
107 | 		"heap queue elements",
108 | 		h->elems,
109 | 		sizeof(*h->elems) * h->curr_max);
110 | 	  memset(&h->elems[u], 0, sizeof(*h->elems) * i);
111 |     }
112 | 
113 |   e->heap_index = h->nelems;
114 |   h->elems[h->nelems++] = e;
115 |   percolate_up( h, h->nelems-1 );
116 | 
117 |   e->state.owner = TW_pe_pq;
118 |   e->next = NULL;
119 |   e->prev = NULL;
120 | }
121 | 
122 | /*---------------------------------------------------------------------------*/
123 | ELEMENT_TYPE tw_pq_dequeue(tw_pq *h)
124 | {
125 |   if( h->nelems <= 0 )
126 |     return 0;
127 |   else
128 |     {
129 |       ELEMENT_TYPE e = h->elems[0];
130 |       h->elems[0] = h->elems[--h->nelems];
131 |       h->elems[0]->heap_index = 0;
132 |       sift_down( h, 0 );
133 |       e->state.owner = 0;
134 |       return e;
135 |     }
136 | }
137 | 
138 | /*---------------------------------------------------------------------------*/
139 | #if 0
140 | static void DumpBucket( void *pq, FILE *fp )
141 | {
142 |   int i;
143 |   tw_pq *h = (tw_pq *)(pq);
144 |   fprintf( fp, "[ " );
145 |   for( i = 0; i < h->nelems; i++ )
146 |     {
147 |       fprintf( fp, "%s", ( i && i % 10 == 0 ) ? "\n\t" : "" );
148 |       fprintf( fp, "%s%lf", (i ? ", ":""), KEY(h->elems[i]) );
149 |     }
150 |   fprintf( fp, " ]\n" );
151 |   fflush( fp );
152 | }
153 | #endif
154 |   
155 | /*---------------------------------------------------------------------------*/
156 | void tw_pq_delete_any(tw_pq *h, tw_event * victim)
157 | {
158 |   int i = victim->heap_index;
159 | 
160 |   if( !(0 <= i && i < h->nelems) || (h->elems[i]->heap_index != i) )
161 |     {
162 |       fprintf( stderr, "Fatal: Bad node in FEL!\n" ); exit(2);
163 |     }
164 |   else
165 |     {
166 |       h->nelems--;
167 |       victim->state.owner = 0;
168 | 
169 |       if( h->nelems > 0 )
170 | 	{
171 | 	  ELEMENT_TYPE successor = h->elems[h->nelems];
172 | 	  h->elems[i] = successor;
173 | 	  successor->heap_index = i;
174 | 	  if( KEY(successor) <= KEY(victim) ) percolate_up( h, i );
175 | 	  else sift_down( h, i );
176 | 	}
177 |     }
178 | }
179 | 
180 | /*---------------------------------------------------------------------------*/
181 | tw_pq * tw_pq_create(void)
182 | {
183 |   tw_pq *h = tw_calloc(TW_LOC, "heap queue", sizeof(tw_pq), 1);
184 |   h->nelems = 0;
185 |   h->curr_max = (2*g_tw_events_per_pe);
186 |   h->elems = tw_unsafe_realloc(
187 | 	TW_LOC,
188 | 	"heap queue elements",
189 | 	NULL,
190 | 	sizeof(*h->elems) * h->curr_max);
191 |   memset(h->elems, 0, sizeof(*h->elems) * h->curr_max);
192 | 
193 |   return h;
194 | }
195 | 
196 | /*---------------------------------------------------------------------------*/
197 | tw_stime tw_pq_minimum(tw_pq *pq)
198 | {
199 |   ELEMENT_TYPE e = HeapPeekTop(pq);
200 |   double retval = e ? KEY(e) : HUGE_VAL;
201 |   return retval;
202 | }
203 | 
204 | /*---------------------------------------------------------------------------*/
205 | unsigned int tw_pq_get_size( tw_pq *pq )
206 | {
207 |   return ( pq->nelems );
208 | }
209 | 


--------------------------------------------------------------------------------
/core/queue/tw-queue.h:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | // This is the API for the tw queue system
 4 | // There are several queue implementations
 5 | 
 6 | tw_pq *tw_pq_create(void);
 7 | void tw_pq_enqueue(tw_pq *, tw_event *);
 8 | tw_event *tw_pq_dequeue(tw_pq *);
 9 | tw_stime tw_pq_minimum(tw_pq *);
10 | #ifdef USE_RAND_TIEBREAKER
11 | tw_event_sig tw_pq_minimum_sig(tw_pq *);
12 | #endif
13 | void tw_pq_delete_any(tw_pq *, tw_event *);
14 | unsigned int tw_pq_get_size(tw_pq *);
15 | unsigned int tw_pq_max_size(tw_pq *);
16 | #ifdef ROSS_QUEUE_kp_splay
17 | tw_eventpq * tw_eventpq_create(void);
18 | #endif
19 | 


--------------------------------------------------------------------------------
/core/rand-clcg4.h:
--------------------------------------------------------------------------------
 1 | #ifndef INC_clcg4_h
 2 | #define INC_clcg4_h
 3 | 
 4 | typedef int32_t * tw_seed;
 5 | 
 6 | struct tw_rng
 7 | {
 8 | 	/*
 9 | 	 * equals a[i]^{m[i]-2} mod m[i]
10 | 	 */
11 | 	long long	b[4];
12 | 
13 | 	/*
14 | 	 * a[j]^{2^w} et a[j]^{2^{v+w}}.
15 | 	 */
16 | 	int32_t	m[4];
17 | 	int32_t	a[4];
18 | 	int32_t	aw[4];
19 | 	int32_t	avw[4];
20 | 
21 | 	// the seed..
22 | 	int32_t	seed[4];
23 | };
24 | 
25 | enum SeedType
26 | {
27 | 	InitialSeed, LastSeed, NewSeed
28 | };
29 | 
30 | typedef enum SeedType SeedType;
31 | 
32 | struct tw_rng_stream
33 | {
34 |     unsigned long count;
35 | 	int32_t	 Ig[4];
36 | 	int32_t	 Lg[4];
37 | 	int32_t	 Cg[4];
38 | 
39 | 	//tw_rng	*rng;
40 | 
41 | #ifdef RAND_NORMAL
42 | 	double	 tw_normal_u1;
43 | 	double	 tw_normal_u2;
44 | 	int	 tw_normal_flipflop;
45 | #endif
46 | };
47 | 
48 | extern tw_rng	*rng_init(int v, int w);
49 | extern tw_rng   *rng_core_init(int v, int w);
50 | extern void     rng_set_initial_seed();
51 | extern void     rng_init_generator(tw_rng_stream * g, SeedType Where, tw_rng * the_rng);
52 | extern void     rng_set_seed(tw_rng_stream * g, uint32_t * s, tw_rng * the_rng);
53 | extern void     rng_get_state(tw_rng_stream * g, uint32_t * s);
54 | extern void     rng_write_state(tw_rng_stream * g, FILE *f);
55 | extern double   rng_gen_val(tw_rng_stream * g);
56 | extern double   rng_gen_reverse_val(tw_rng_stream * g);
57 | 
58 | #endif
59 | 


--------------------------------------------------------------------------------
/core/rio/README.md:
--------------------------------------------------------------------------------
 1 | # RIO: A Checkpoint/Restart API for ROSS
 2 | 
 3 | RIO (ROSS Restart IO) is a checkpointing API for [Rensselaer's Optimistic Simulation System](https://github.com/ROSS-org/ROSS).
 4 | RIO is for checkpoint-restart operations and in its current state it cannot be used to created incremental checkpoints for fault tolerance.
 5 | 
 6 | ## Limitations
 7 | 
 8 | As ROSS is developed, full RIO functionality may be lacking and certain LP and event information may not be saved in a checkpoint.
 9 | At this time, the following features are not compatible with RIO:
10 | - delta encoding
11 | - LP suspend
12 | - instrumentation
13 | 
14 | ## Documentation
15 | 
16 | The documentation for RIO can be found on the ROSS website (Look for the RIO section on the [archive page](https://ROSS-org.github.io/archive.html)).
17 | The documentation includes:
18 | 
19 | - [Overview](https://ROSS-org.github.io/rio/rio-overview.html)
20 | - [API Description](https://ROSS-org.github.io/rio/rio-api.html)
21 | - [Checkpoint Description](https://ROSS-org.github.io/rio/rio-files.html)
22 | - [Adding RIO to a Model](https://ROSS-org.github.io/rio/rio-cmake.html)
23 | 
24 | ## Example Usage
25 | 
26 | The full RIO API has been implemented in the [PHOLD-IO model](https://github.com/ROSS-org/pholdio).
27 | 
28 | ## Coding Conventions
29 | 
30 | RIO API functionality is prefixed with `io`.
31 | 


--------------------------------------------------------------------------------
/core/rio/io-serialize.c:
--------------------------------------------------------------------------------
 1 | #include "ross.h"
 2 | 
 3 | size_t io_lp_serialize (tw_lp *lp, void *buffer) {
 4 |     int i, j;
 5 | 
 6 |     io_lp_store tmp;
 7 | 
 8 |     tmp.gid = lp->gid;
 9 |     for (i = 0; i < g_tw_nRNG_per_lp; i++) {
10 |         for (j = 0; j < 4; j++) {
11 |             tmp.rng[j] = lp->rng->Ig[j];
12 |             tmp.rng[j+4] = lp->rng->Lg[j];
13 |             tmp.rng[j+8] = lp->rng->Cg[j];
14 |         }
15 | #ifdef RAND_NORMAL
16 |         tmp.tw_normal_u1 = lp->rng->tw_normal_u1;
17 |         tmp.tw_normal_u2 = lp->rng->tw_normal_u2;
18 |         tmp.tw_normal_flipflop = lp->rng->tw_normal_flipflop;
19 | #endif
20 |     }
21 |     tmp.critical_path = lp->critical_path;
22 | 
23 |     memcpy(buffer, &tmp, sizeof(io_lp_store));
24 |     return sizeof(io_lp_store);
25 | }
26 | 
27 | size_t io_lp_deserialize (tw_lp *lp, void *buffer) {
28 |     int i, j;
29 | 
30 |     io_lp_store tmp;
31 |     memcpy(&tmp, buffer, sizeof(io_lp_store));
32 | 
33 |     lp->gid = tmp.gid;
34 | 
35 |     for (i = 0; i < g_tw_nRNG_per_lp; i++) {
36 |         for (j = 0; j < 4; j++) {
37 |             lp->rng->Ig[j] = tmp.rng[j];
38 |             lp->rng->Lg[j] = tmp.rng[j+4];
39 |             lp->rng->Cg[j] = tmp.rng[j+8];
40 |         }
41 | #ifdef RAND_NORMAL
42 |         lp->rng->tw_normal_u1 = tmp.tw_normal_u1;
43 |         lp->rng->tw_normal_u2 = tmp.tw_normal_u2;
44 |         lp->rng->tw_normal_flipflop = tmp.tw_normal_flipflop;
45 | #endif
46 |     }
47 |     lp->critical_path = tmp.critical_path;
48 | 
49 |     return sizeof(io_lp_store);
50 | }
51 | 
52 | size_t io_event_serialize (tw_event *e, void *buffer) {
53 |     int i;
54 | 
55 |     io_event_store tmp;
56 | 
57 |     memcpy(&(tmp.cv), &(e->cv), sizeof(tw_bf));
58 |     tmp.critical_path = e->critical_path;
59 |     tmp.dest_lp = (tw_lpid)e->dest_lp; // ROSS HACK: dest_lp is gid
60 |     tmp.src_lp = e->src_lp->gid;
61 |     tmp.recv_ts = e->recv_ts - g_tw_ts_end;
62 | 
63 |     memcpy(buffer, &tmp, sizeof(io_event_store));
64 |     // printf("Storing event going to %lu at %f\n", tmp.dest_lp, tmp.recv_ts);
65 |     return sizeof(io_event_store);
66 | }
67 | 
68 | size_t io_event_deserialize (tw_event *e, void *buffer) {
69 |     int i;
70 | 
71 |     io_event_store tmp;
72 |     memcpy(&tmp, buffer, sizeof(io_event_store));
73 |     e->critical_path = tmp.critical_path;
74 | 
75 |     memcpy(&(e->cv), &(tmp.cv), sizeof(tw_bf));
76 |     e->dest_lp = (tw_lp *) tmp.dest_lp; // ROSS HACK: e->dest_lp is GID for a bit
77 |     //undo pointer to GID conversion
78 |     if (g_tw_mapping == LINEAR) {
79 |         e->src_lp = g_tw_lp[((tw_lpid)tmp.src_lp) - g_tw_lp_offset];
80 |     } else if (g_tw_mapping == CUSTOM) {
81 |         e->src_lp = g_tw_custom_lp_global_to_local_map((tw_lpid)tmp.src_lp);
82 |     } else {
83 |         tw_error(TW_LOC, "RIO ERROR: Unsupported mapping");
84 |     }
85 |     e->recv_ts = tmp.recv_ts;
86 |     // printf("Loading event going to %lu at %f\n", tmp.dest_lp, tmp.recv_ts);
87 |     return sizeof(io_event_store);
88 | }
89 | 


--------------------------------------------------------------------------------
/core/rio/io.h:
--------------------------------------------------------------------------------
  1 | #ifndef INC_io_h
  2 | #define INC_io_h
  3 | 
  4 | //Elsa Gonsiorowski
  5 | //Rensselaer Polytechnic Institute
  6 | //Decemeber 13, 2013
  7 | 
  8 | // ** Global IO System variables ** //
  9 | 
 10 | // Set with command line --io-files
 11 | // should be consistent across the system
 12 | extern int g_io_number_of_files;
 13 | 
 14 | // Register opts with ROSS
 15 | extern const tw_optdef io_opts[3];
 16 | 
 17 | enum io_load_e {
 18 | 	NONE,		// default value
 19 | 	PRE_INIT,	// load LPs then lp->init
 20 | 	INIT,		// load LPs instead lp->init
 21 | 	POST_INIT,	// load LPs after lp->init
 22 | };
 23 | typedef enum io_load_e io_load_type;
 24 | extern io_load_type g_io_load_at;
 25 | extern char g_io_checkpoint_name[1024];
 26 | 
 27 | // Should be set in main, before call to io_init
 28 | // Maximum number of events that will be scheduled past end time
 29 | extern int g_io_events_buffered_per_rank;
 30 | 
 31 | // ** API Functions, Types, and Variables ** //
 32 | 
 33 | void io_register_model_version(char *sha1);
 34 | void io_init();
 35 | 
 36 | void io_load_checkpoint(char * master_filename, io_load_type load_at);
 37 | void io_store_checkpoint(char * master_filename, int data_file_number);
 38 | void io_appending_job();
 39 | 
 40 | // LP type map and function struct
 41 | typedef void (*serialize_f)(void * state, void * buffer, tw_lp *lp);
 42 | typedef void (*deserialize_f)(void * state, void * buffer, tw_lp *lp);
 43 | typedef size_t (*model_size_f)(void * state, tw_lp *lp);
 44 | 
 45 | typedef struct {
 46 |     serialize_f serialize;
 47 |     deserialize_f deserialize;
 48 |     model_size_f model_size;
 49 | } io_lptype;
 50 | 
 51 | extern io_lptype * g_io_lp_types;
 52 | 
 53 | // ** Internal IO types, variables, and functions ** //
 54 | 
 55 | typedef struct {
 56 | 	int part;
 57 | 	int file;
 58 | 	int offset;
 59 | 	int size;
 60 | 	int lp_count;
 61 | 	int ev_count;
 62 | } io_partition;
 63 | static int io_partition_field_count = 6;
 64 | 
 65 | typedef struct {
 66 | 	tw_lpid gid;
 67 | 	int32_t rng[12];
 68 | #ifdef RAND_NORMAL
 69 | 	double tw_normal_u1;
 70 | 	double tw_normal_u2;
 71 | 	int tw_normal_flipflop;
 72 | #endif
 73 | 	unsigned int critical_path;
 74 | } io_lp_store;
 75 | 
 76 | typedef struct {
 77 | 	tw_bf cv;
 78 | 	unsigned int critical_path;
 79 | 	tw_lpid dest_lp;
 80 | 	tw_lpid src_lp;
 81 | 	tw_stime recv_ts;
 82 | 	// NOTE: not storing tw_memory or tw_out
 83 | } io_event_store;
 84 | 
 85 | extern io_partition * g_io_partitions;
 86 | 
 87 | // Functions Called Directly from ROSS
 88 | void io_load_events(tw_pe * me);
 89 | void io_event_cancel(tw_event *e);
 90 | void io_read_checkpoint();
 91 | 
 92 | // SERIALIZE FUNCTIONS for LP and EVENT structs
 93 | // found in io-serialize.c
 94 | size_t io_lp_serialize (tw_lp * lp, void * buffer);
 95 | size_t io_lp_deserialize (tw_lp * lp, void * buffer);
 96 | size_t io_event_serialize (tw_event * e, void * buffer);
 97 | size_t io_event_deserialize (tw_event * e, void * buffer);
 98 | 
 99 | // INLINE function for buffering events past end time
100 | extern tw_eventq g_io_buffered_events;
101 | extern tw_eventq g_io_free_events;
102 | extern tw_event * io_event_grab(tw_pe *pe);
103 | #endif
104 | 


--------------------------------------------------------------------------------
/core/ross-config.in:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | ROSS_CC="@ROSS_CC@"
 4 | ROSS_CXX="@ROSS_CXX@"
 5 | ROSS_LD="@ROSS_CC@"
 6 | 
 7 | prefix=`(cd $(dirname $0);pwd) | sed -e 's/\/bin//'`
 8 | CFLAGS_OPTS=''
 9 | usage="\
10 | Usage: ross-config [--cflags] [--ldflags] [--libs] [--cc] [--cxx] [--ld]"
11 | 
12 | if test $# -eq 0; then
13 |       echo "${usage}" 1>&2
14 |       exit 1
15 | fi
16 | 
17 | while test $# -gt 0; do
18 |   case "$1" in
19 |   -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
20 |   *) optarg= ;;
21 |   esac
22 | 
23 |   case $1 in
24 |     --cc)
25 |         echo "${ROSS_CC}"
26 | 
27 | 	;;
28 |     --cxx)
29 |         echo "${ROSS_CXX}"
30 | 
31 | 	;;
32 |     --ld)
33 |         echo "${ROSS_LD}"
34 | 
35 | 	;;
36 |     --cflags)
37 | 	echo "-I${prefix}/include ${CFLAGS_OPTS}"
38 | 
39 |       ;;
40 |     --ldflags)
41 | 	echo -L${prefix}/lib
42 | 
43 |       ;;
44 |     --libs)
45 | 	libflags="-lROSS -lm"
46 | 	echo $libflags
47 |     ;;
48 |     *)
49 | 	echo "${usage}" 1>&2
50 | 	exit 1
51 |       ;;
52 |   esac
53 |   shift
54 | done
55 | 


--------------------------------------------------------------------------------
/core/ross-extern.h:
--------------------------------------------------------------------------------
  1 | #ifndef INC_ross_extern_h
  2 | #define INC_ross_extern_h
  3 | 
  4 | extern void tw_rand_init_streams(tw_lp * lp, unsigned int nstreams, unsigned int n_core_streams);
  5 | 
  6 | /*
  7 |  * tw-stats.c
  8 |  */
  9 | extern void tw_get_stats(tw_pe * me, tw_statistics *s);
 10 | extern void tw_stats(tw_pe *me);
 11 | 
 12 | /*
 13 |  * ross-global.c
 14 |  */
 15 | extern tw_synch g_tw_synchronization_protocol;
 16 | extern map_local_f g_tw_custom_lp_global_to_local_map;
 17 | extern map_custom_f g_tw_custom_initial_mapping;
 18 | extern tw_lp_map g_tw_mapping;
 19 | extern tw_lpid  g_tw_nlp;
 20 | extern tw_lpid  g_tw_total_lps; //Total LPs in the simulation
 21 | extern tw_lpid g_tw_lp_offset;
 22 | extern tw_kpid  g_tw_nkp;
 23 | extern tw_lp **g_tw_lp;
 24 | extern tw_kp **g_tw_kp;
 25 | extern int      g_tw_fossil_attempts;
 26 | extern unsigned int g_tw_nRNG_per_lp;
 27 | extern unsigned int g_tw_nRNG_core_per_lp; //Separate ROSS engine-only use RNG streams
 28 | extern tw_lpid g_tw_rng_default;
 29 | extern tw_seed g_tw_rng_seed;
 30 | extern tw_seed g_tw_core_rng_seed;
 31 | extern unsigned int g_tw_mblock;
 32 | extern unsigned int g_tw_gvt_interval;
 33 | extern unsigned long long g_tw_max_opt_lookahead;
 34 | extern unsigned long long g_tw_gvt_realtime_interval;
 35 | extern unsigned long long g_tw_gvt_interval_start_cycles;
 36 | extern double g_tw_ts_end;
 37 | extern unsigned int g_tw_sim_started;
 38 | extern size_t g_tw_msg_sz;
 39 | extern size_t g_tw_event_msg_sz;
 40 | extern size_t       g_tw_delta_sz;
 41 | extern uint32_t     g_tw_buddy_alloc;
 42 | extern buddy_list_bucket_t *g_tw_buddy_master;
 43 | extern uint32_t g_tw_avl_node_count;
 44 | 
 45 | extern unsigned int g_tw_lz4_knob;
 46 | 
 47 | extern double         g_tw_lookahead;
 48 | extern double         g_tw_min_detected_offset;
 49 | 
 50 | extern tw_pe *g_tw_pe;
 51 | extern unsigned int      g_tw_events_per_pe;
 52 | extern unsigned int      g_tw_events_per_pe_extra;
 53 | 
 54 | extern unsigned int g_tw_gvt_threshold;
 55 | extern unsigned int g_tw_gvt_done;
 56 | 
 57 | extern unsigned int g_tw_net_device_size;
 58 | extern tw_peid g_tw_mynode;
 59 | extern tw_peid g_tw_masternode;
 60 | 
 61 | extern FILE *g_tw_csv;
 62 | 
 63 | extern tw_lptype * g_tw_lp_types;
 64 | extern tw_typemap_f g_tw_lp_typemap;
 65 | 
 66 | /*
 67 |  * Cycle Counter variables
 68 |  */
 69 | extern tw_clock g_tw_cycles_gvt;
 70 | extern tw_clock g_tw_cycles_ev_abort;
 71 | extern tw_clock g_tw_cycles_ev_proc;
 72 | extern tw_clock g_tw_cycles_ev_queue;
 73 | extern tw_clock g_tw_cycles_rbs;
 74 | extern tw_clock g_tw_cycles_cancel;
 75 | 
 76 | /*
 77 |  * clock-*
 78 |  */
 79 | extern const tw_optdef *tw_clock_setup();
 80 | extern void tw_clock_init(tw_pe * me);
 81 | extern tw_clock tw_clock_now(tw_pe * me);
 82 | extern tw_clock tw_clock_read();
 83 | extern unsigned long long g_tw_clock_rate;
 84 | 
 85 | /*
 86 |  * tw-event.c
 87 |  */
 88 | extern void tw_event_send(tw_event * event);
 89 | extern void tw_event_rollback(tw_event * event);
 90 | #ifdef USE_RAND_TIEBREAKER
 91 | extern int tw_event_sig_compare(tw_event_sig e_sig, tw_event_sig n_sig);
 92 | #endif
 93 | 
 94 | /*
 95 |  * ross-inline.h
 96 |  */
 97 | static inline void  tw_event_free(tw_pe *, tw_event *);
 98 | static inline void  tw_free_output_messages(tw_event *e, int print_message);
 99 | static inline void * tw_event_data(tw_event * event);
100 | 
101 | /*
102 |  * tw-lp.c
103 |  */
104 | extern tw_lp *tw_lp_next_onpe(tw_lp * last, tw_pe * pe);
105 | extern void tw_lp_settype(tw_lpid lp, tw_lptype * type);
106 | extern void tw_lp_onpe(tw_lpid index, tw_pe * pe, tw_lpid id);
107 | extern void tw_lp_onkp(tw_lp * lp, tw_kp * kp);
108 | extern void tw_init_lps(tw_pe * me);
109 | extern void tw_pre_run_lps(tw_pe * me);
110 | extern void tw_lp_setup_types();
111 | extern void tw_lp_suspend(tw_lp * lp, int do_orig_event_rc, int error_num );
112 | 
113 | /*
114 |  * tw-kp.c
115 |  */
116 | extern void     tw_kp_onpe(tw_kpid id, tw_pe * pe);
117 | extern void     kp_fossil_remote(tw_kp * kp);
118 | extern tw_kp*   tw_kp_next_onpe(tw_kp * last, tw_pe * pe);
119 | extern void     tw_init_kps(tw_pe * me);
120 | extern tw_out*  tw_kp_grab_output_buffer(tw_kp *kp);
121 | extern void     tw_kp_put_back_output_buffer(tw_out *out);
122 | 
123 | extern void tw_kp_rollback_event(tw_event *event);
124 | extern void tw_kp_rollback_to(tw_kp * kp, tw_stime to);
125 | #ifdef USE_RAND_TIEBREAKER
126 | extern void tw_kp_rollback_to_sig(tw_kp * kp, tw_event_sig to_sig);
127 | #endif
128 | 
129 | /*
130 |  * tw-pe.c
131 |  */
132 | extern void tw_pe_settype(const tw_petype * type);
133 | extern void tw_pe_init(void);
134 | extern void tw_pe_fossil_collect(void);
135 | 
136 | /*
137 |  * tw-setup.c
138 |  */
139 | extern void tw_init(int *argc, char ***argv);
140 | extern void tw_define_lps(tw_lpid nlp, size_t msg_sz);
141 | extern void tw_run(void);
142 | extern void tw_end(void);
143 | extern tw_lpid map_onetype (tw_lpid gid);
144 | 
145 | /*
146 |  * tw-sched.c
147 |  */
148 | extern void tw_sched_init(tw_pe * me);
149 | extern void tw_scheduler_sequential(tw_pe * me);
150 | extern void tw_scheduler_conservative(tw_pe * me);
151 | extern void tw_scheduler_optimistic(tw_pe * me);
152 | extern void tw_scheduler_optimistic_debug(tw_pe * me);
153 | extern void tw_scheduler_optimistic_realtime(tw_pe * me);
154 | 
155 | /*
156 |  * tw-state.c
157 |  */
158 | extern void tw_snapshot(tw_lp *lp, size_t state_sz);
159 | extern long tw_snapshot_delta(tw_lp *lp, size_t state_sz);
160 | extern void tw_snapshot_restore(tw_lp *lp, size_t state_sz);
161 | 
162 | /*
163 |  * tw-timing.c
164 |  */
165 | extern   void     tw_wall_now(tw_wtime * t);
166 | extern   void     tw_wall_sub(tw_wtime * r, tw_wtime * a, tw_wtime * b);
167 | extern   double   tw_wall_to_double(tw_wtime * t);
168 | 
169 | /*
170 |  * tw-util.c
171 |  */
172 | 
173 | #define TW_LOC __FILE__,__LINE__
174 | extern int tw_output(tw_lp *lp, const char *fmt, ...);
175 | extern void tw_error(const char *file, int line, const char *fmt, ...) NORETURN;
176 | extern void tw_warning(const char *file, int line, const char *fmt, ...);
177 | extern void tw_printf(const char *file, int line, const char *fmt, ...);
178 | extern void tw_calloc_stats(size_t *alloc, size_t *waste);
179 | extern void* tw_calloc(const char *file, int line, const char *for_who, size_t e_sz, size_t n);
180 | 
181 | #endif
182 | 


--------------------------------------------------------------------------------
/core/ross-global.c:
--------------------------------------------------------------------------------
  1 | #include <ross.h>
  2 | 
  3 | 	/*
  4 | 	 * LP data structures are allocated dynamically when the
  5 | 	 * process starts up based on the number it requires.
  6 | 	 *
  7 | 	 * g_tw_nlp         -- Number of LPs on this processor
  8 | 	 * g_tw_lp_offset   -- global id of g_tw_lp[0] (on this processor)
  9 | 	 * g_tw_nkp         -- Number of KPs on this processor
 10 |                             IF this is 1, then it gets over written as nkp_per_pe * g_tw_npe
 11 |                             thus it is total KPs in simulation, not on this processor
 12 | 	 * g_tw_lp          -- Public LP object array (on this processor)
 13 | 	 * g_tw_kp          -- Public KP object array (on this processor)
 14 | 	 * g_tw_fossil_attempts  -- Number of times fossil_collect is called
 15 |          * g_tw_nRNG_per_lp -- Number of RNG per LP
 16 | 	 * g_tw_nRNG_core_per_lp -- Number of ROSS core RNG per LP for use by ROSS engine exclusively
 17 | 	 */
 18 | 
 19 | tw_synch     g_tw_synchronization_protocol=NO_SYNCH;
 20 | map_local_f  g_tw_custom_lp_global_to_local_map=NULL;
 21 | map_custom_f g_tw_custom_initial_mapping=NULL;
 22 | tw_lp_map    g_tw_mapping=LINEAR;
 23 | 
 24 | tw_lpid         g_tw_nlp = 0;
 25 | tw_lpid         g_tw_total_lps = 0; //Total LPs in the simulation
 26 | tw_lpid		g_tw_lp_offset = 0;
 27 | tw_kpid         g_tw_nkp = 1;
 28 | tw_lp		**g_tw_lp = NULL;
 29 | tw_kp		**g_tw_kp = NULL;
 30 | int             g_tw_fossil_attempts = 0;
 31 | unsigned int    g_tw_nRNG_per_lp = 1;
 32 | unsigned int    g_tw_nRNG_core_per_lp = 1;
 33 | tw_lpid         g_tw_rng_default = 1;
 34 | tw_seed        g_tw_rng_seed = NULL;
 35 | tw_seed        g_tw_core_rng_seed = NULL;
 36 | unsigned int	g_tw_sim_started = 0;
 37 | size_t g_tw_msg_sz;
 38 | size_t g_tw_delta_sz = 0;
 39 | uint32_t g_tw_buddy_alloc = 0; /**< Allocation for buddy system */
 40 | buddy_list_bucket_t *g_tw_buddy_master = 0;
 41 | uint32_t g_tw_avl_node_count = 18;
 42 | 
 43 | /** Tunable LZ4 param.  Higher = faster with less compression.
 44 |  17 is a good value for speed with 1 being the default value. */
 45 | unsigned int g_tw_lz4_knob = 17;
 46 | 
 47 | size_t		g_tw_event_msg_sz = 0;
 48 | 
 49 |         /*
 50 |          * Minimum lookahead for a model -- model defined when
 51 |          * using the Simple Synchronization Protocol (conservative)
 52 |          */
 53 | double g_tw_lookahead=0.005;
 54 | 
 55 |         /*
 56 |          * Minimum detected timestamp offset used by the simulation at
 57 |          * runtime, can be used to help tune conservative protocol runs.
 58 |          */
 59 | double g_tw_min_detected_offset=DBL_MAX;
 60 | 
 61 | 	/**
 62 | 	 * Number of messages to process at once out of the PQ before
 63 | 	 * returning back to handling things like GVT, message recption,
 64 | 	 * etc.  AKA the "batch" parameter to ROSS.
 65 | 	 */
 66 | unsigned int g_tw_mblock = 16;
 67 | unsigned int g_tw_gvt_interval = 16;
 68 | unsigned long long g_tw_max_opt_lookahead = ULLONG_MAX;
 69 | unsigned long long g_tw_gvt_realtime_interval; // calculated at runtime
 70 | unsigned long long g_tw_gvt_interval_start_cycles = 0;
 71 | 
 72 | double     g_tw_ts_end = 100000.0;
 73 | 
 74 | /*
 75 |  * g_tw_pe              -- Public PE pointer
 76 |  * g_tw_events_per_pe   -- Number of events to place in for each PE.
 77 |  *                         MUST be > 1 because of abort buffer.
 78 |  */
 79 | tw_pe *g_tw_pe = NULL;
 80 | unsigned int    g_tw_events_per_pe = 2048;
 81 | /** Number of extra events allocated per PE.  Command-line customizable. */
 82 | unsigned int    g_tw_events_per_pe_extra = 0;
 83 | 
 84 | unsigned int	g_tw_gvt_threshold = 1000;
 85 | unsigned int	g_tw_gvt_done = 0;
 86 | 
 87 | 	/*
 88 | 	 * Network variables:
 89 | 	 * g_tw_masternode -- pointer to GVT net node, for GVT comp
 90 | 	 */
 91 | unsigned int	g_tw_net_device_size = 0;
 92 | tw_peid		g_tw_mynode = 0;
 93 | tw_peid		g_tw_masternode = 0;
 94 | 
 95 | FILE		*g_tw_csv = NULL;
 96 | 
 97 | 
 98 | /*
 99 |  *
100 |  */
101 | 
102 | unsigned long long g_tw_clock_rate=1000000000.0; // Default to 1 GHz
103 | 
104 | // LP Type Mapping
105 | tw_lptype * g_tw_lp_types = NULL;
106 | tw_typemap_f g_tw_lp_typemap = &map_onetype;
107 | 


--------------------------------------------------------------------------------
/core/ross-gvt.h:
--------------------------------------------------------------------------------
 1 | #ifndef INC_ross_gvt_h
 2 | #define INC_ross_gvt_h
 3 | 
 4 | /*
 5 |  * Initialize the GVT library and parse options.
 6 |  */
 7 | 
 8 | /* setup the GVT library (config cmd line args, etc) */
 9 | extern const tw_optdef *tw_gvt_setup(void);
10 | 
11 | /* start the GVT library (init vars, etc) */
12 | extern void tw_gvt_start(void);
13 | 
14 | /* 
15 |  * GVT computation is broken into two stages:
16 |  * stage 1: determine if GVT computation should be started
17 |  * stage 2: compute GVT
18 |  */
19 | extern void tw_gvt_step1(tw_pe *);
20 | extern void tw_gvt_step1_realtime(tw_pe *);
21 | extern void tw_gvt_step2(tw_pe *);
22 | 
23 | /*
24 |  * Provide a mechanism to force a GVT computation outside of the 
25 |  * GVT interval (optional)
26 |  */
27 | extern void tw_gvt_force_update(void);
28 | extern void tw_gvt_force_update_realtime(void);
29 | 
30 | /* Set the PE GVT value */
31 | extern int tw_gvt_set(tw_pe * pe, tw_stime LVT);
32 | 
33 | /* Returns true if GVT in progress, false otherwise */
34 | static inline int  tw_gvt_inprogress(tw_pe * pe);
35 | 
36 | /* Statistics collection and printing function */
37 | extern void tw_gvt_stats(FILE * F);
38 | #endif
39 | 


--------------------------------------------------------------------------------
/core/ross-inline.h:
--------------------------------------------------------------------------------
  1 | #ifndef INC_ross_inline_h
  2 | #define INC_ross_inline_h
  3 | 
  4 | static inline tw_event *
  5 | tw_event_grab(tw_pe *pe)
  6 | {
  7 |   tw_event *e = tw_eventq_pop(&pe->free_q);
  8 | 
  9 |   if (e)
 10 |     {
 11 |       e->cancel_next = NULL;
 12 |       e->caused_by_me = NULL;
 13 |       e->cause_next = NULL;
 14 |       e->prev = e->next = NULL;
 15 | 
 16 |       memset(&e->state, 0, sizeof(e->state));
 17 |       memset(&e->event_id, 0, sizeof(e->event_id));
 18 |     }
 19 |   return e;
 20 | }
 21 | 
 22 | static inline void
 23 | tw_free_output_messages(tw_event *e, int print_message)
 24 | {
 25 |     while (e->out_msgs) {
 26 |         tw_out *temp = e->out_msgs;
 27 |         if (print_message)
 28 |             printf("%s", temp->message);
 29 |         e->out_msgs = temp->next;
 30 |         // Put it back
 31 |         tw_kp_put_back_output_buffer(temp);
 32 |     }
 33 | }
 34 | 
 35 | /**
 36 |  * Creates an event with a given priority in range [0,1], lower value is higher priority
 37 |  *
 38 |  * @bug There's a bug in this function.  We put dest_gid, which is
 39 |  * a 64-bit value, into dest_lp which may be a 32-bit pointer.
 40 |  */
 41 | static inline tw_event *
 42 | tw_event_new_user_prio(tw_lpid dest_gid, tw_stime offset_ts, tw_lp * sender, tw_stime prio)
 43 | {
 44 |   tw_pe *send_pe;
 45 |   tw_event *e;
 46 |   tw_stime recv_ts;
 47 | 
 48 |   if (TW_STIME_DBL(offset_ts) < 0.0) {
 49 |     tw_error(TW_LOC, "Cannot send events into the past! Sending LP: %lu\n", sender->gid);
 50 |   }
 51 | 
 52 |   if (TW_STIME_DBL(prio) < 0.0 || TW_STIME_DBL(prio) > 1.0) {
 53 |     tw_error(TW_LOC, "Cannot specify an event priority outside of range [0.0,1.0]");
 54 |   }
 55 | 
 56 |   send_pe = sender->pe;
 57 |   recv_ts = TW_STIME_ADD(tw_now(sender), offset_ts);
 58 | 
 59 |   if(g_tw_synchronization_protocol == CONSERVATIVE)
 60 |   {
 61 |     /* keep track of the smallest timestamp offset we have seen */
 62 |   if(TW_STIME_DBL(offset_ts) < g_tw_min_detected_offset)
 63 |     g_tw_min_detected_offset = TW_STIME_DBL(offset_ts);
 64 |   }
 65 | 
 66 |   /* If this event will be past the end time, or there
 67 |    * are no more free events available, use abort event.
 68 |    */
 69 |   if (TW_STIME_DBL(recv_ts) >= g_tw_ts_end) {
 70 | #ifdef USE_RIO
 71 |     e = io_event_grab(send_pe);
 72 | #else
 73 |     e = send_pe->abort_event;
 74 | #endif
 75 |     send_pe->stats.s_events_past_end++;
 76 |   } else {
 77 |     e = tw_event_grab(send_pe);
 78 |     if (!e) {
 79 |         if (g_tw_synchronization_protocol == CONSERVATIVE
 80 |                 || g_tw_synchronization_protocol == SEQUENTIAL) {
 81 |         tw_error(TW_LOC,
 82 |                 "No free event buffers. Try increasing via g_tw_events_per_pe"
 83 |                 " or --extramem");
 84 |         }
 85 |         else
 86 |             e = send_pe->abort_event;
 87 |     }
 88 |   }
 89 | 
 90 |   e->send_pe = sender->pe->id;
 91 |   e->dest_lp = (tw_lp *) dest_gid;
 92 |   e->dest_lpid = dest_gid;
 93 |   e->src_lp = sender;
 94 |   e->recv_ts = recv_ts;
 95 |   e->send_ts = tw_now(sender);
 96 |   e->critical_path = sender->critical_path + 1;
 97 | 
 98 | 
 99 | #ifdef USE_RAND_TIEBREAKER
100 |   e->sig.priority = prio;
101 |   tw_event *now_event = sender->kp->pe->cur_event;
102 |   tw_stime u_rand_val = tw_rand_unif(sender->core_rng); //create a random number used to deterministically break event ties, this is rolled back in tw_event_rollback() during the sender LP cancel loop
103 |   e->sig.recv_ts = recv_ts;
104 |   if (offset_ts == 0) {
105 |     if (now_event->sig.tie_lineage_length > MAX_TIE_CHAIN)
106 |       tw_error(TW_LOC, "Maximum zero-offset tie chain reached (%d), increase #define in ross-types.h",MAX_TIE_CHAIN);
107 |     memcpy(e->sig.event_tiebreaker, now_event->sig.event_tiebreaker, sizeof(tw_stime)*(now_event->sig.tie_lineage_length));
108 |     e->sig.event_tiebreaker[now_event->sig.tie_lineage_length] = u_rand_val;
109 |     e->sig.tie_lineage_length = now_event->sig.tie_lineage_length + 1;
110 |   }
111 |   else {
112 |     e->sig.event_tiebreaker[0] = u_rand_val;
113 |     e->sig.tie_lineage_length = 1;
114 |   }
115 | #endif
116 | 
117 |   tw_free_output_messages(e, 0);
118 | 
119 |   return e;
120 | }
121 | 
122 | /**
123 |  * @bug There's a bug in this function.  We put dest_gid, which is
124 |  * a 64-bit value, into dest_lp which may be a 32-bit pointer.
125 |  */
126 | static inline tw_event *
127 | tw_event_new(tw_lpid dest_gid, tw_stime offset_ts, tw_lp * sender)
128 | {
129 |   return tw_event_new_user_prio(dest_gid, offset_ts, sender, 1);
130 | }
131 | 
132 | 
133 | static inline void
134 | tw_event_free(tw_pe *pe, tw_event *e)
135 | {
136 |   /*
137 |    * During the course of a rollback, events are supposed to put
138 |    * the membufs back on the event.  The event is then cancelled
139 |    * and freed -- which is how a membuf could end up on a freed
140 |    * event.
141 |    */
142 |   tw_free_output_messages(e, 0);
143 | 
144 |   if (e->delta_buddy) {
145 |     tw_clock start = tw_clock_read();
146 |     buddy_free(e->delta_buddy);
147 |     g_tw_pe->stats.s_buddy += (tw_clock_read() - start);
148 |     e->delta_buddy = 0;
149 |   }
150 | 
151 |   e->state.owner = TW_pe_free_q;
152 | 
153 |   tw_eventq_unshift(&pe->free_q, e);
154 | }
155 | 
156 | static inline void *
157 | tw_event_data(tw_event * event)
158 | {
159 |   return event + 1;
160 | }
161 | 
162 | #endif
163 | 


--------------------------------------------------------------------------------
/core/ross-kernel-inline.h:
--------------------------------------------------------------------------------
  1 | #ifndef INC_ross_kernel_inline_h
  2 | #define INC_ross_kernel_inline_h
  3 | #include "instrumentation/st-instrumentation.h"
  4 | 
  5 | #define ROSS_MAX(a,b) ((a) > (b) ? (a) : (b))
  6 | #define ROSS_MIN(a,b) ((a) < (b) ? (a) : (b))
  7 | 
  8 | static inline tw_lp *
  9 |      tw_getlocal_lp(tw_lpid gid)
 10 | {
 11 |   tw_lpid id = gid;
 12 | 
 13 |   // finding analysis LPs doesn't depend on model's choice of mapping
 14 |   if (g_st_use_analysis_lps && gid >= g_st_total_model_lps)
 15 |   {
 16 |       return g_tw_lp[(gid - g_st_total_model_lps) % g_tw_nkp + g_tw_nlp];
 17 |   }
 18 | 
 19 |   switch (g_tw_mapping) {
 20 |   case CUSTOM:
 21 |       return( g_tw_custom_lp_global_to_local_map( gid ) );
 22 |   case ROUND_ROBIN:
 23 |       id = gid / tw_nnodes();
 24 |       break;
 25 |   case LINEAR:
 26 |       id = gid - g_tw_lp_offset;
 27 |       break;
 28 |   }
 29 | 
 30 | #ifdef ROSS_runtime_checks
 31 |       if (id >= g_tw_nlp)
 32 |           tw_error(TW_LOC, "ID %d exceeded MAX LPs", id);
 33 |       if (gid != g_tw_lp[id]->gid)
 34 |           tw_error(TW_LOC, "Inconsistent LP Mapping");
 35 | #endif /* ROSS_runtime_checks */
 36 | 
 37 |       return g_tw_lp[id];
 38 | }
 39 | 
 40 | static inline tw_lp *
 41 |      tw_getlp(tw_lpid id)
 42 | {
 43 | #ifdef ROSS_runtime_checks
 44 |   if (id >= g_tw_nlp + g_st_analysis_nlp)
 45 |     tw_error(TW_LOC, "ID %d exceeded MAX LPs", id);
 46 |   if (id != g_tw_lp[id]->id)
 47 |     tw_error(TW_LOC, "Inconsistent LP Mapping");
 48 | #endif /* ROSS_runtime_checks */
 49 |   return g_tw_lp[id];
 50 | }
 51 | 
 52 | static inline tw_kp *
 53 |      tw_getkp(tw_kpid id)
 54 | {
 55 | #ifdef ROSS_runtime_checks
 56 |   if (id >= g_tw_nkp)
 57 |     tw_error(TW_LOC, "ID %u exceeded MAX KPs", id);
 58 |   if( g_tw_kp[id] == NULL )
 59 |     tw_error(TW_LOC, "Local KP %u found NULL \n", id );
 60 |   if (id != g_tw_kp[id]->id)
 61 |     tw_error(TW_LOC, "Inconsistent KP Mapping");
 62 | #endif /* ROSS_runtime_checks */
 63 | 
 64 |   return g_tw_kp[id];
 65 | }
 66 | 
 67 | static inline int
 68 |      tw_ismaster(void)
 69 | {
 70 |   return (g_tw_mynode == g_tw_masternode);
 71 | }
 72 | 
 73 | static inline void *
 74 |      tw_getstate(tw_lp * lp)
 75 | {
 76 |   return lp->cur_state;
 77 | }
 78 | 
 79 | #ifdef USE_RAND_TIEBREAKER
 80 | static inline tw_stime
 81 |      tw_now(tw_lp const * lp)
 82 | {
 83 |   return (lp->kp->last_sig.recv_ts);
 84 | }
 85 | 
 86 | static inline tw_event_sig
 87 |      tw_now_sig(tw_lp const *lp)
 88 | {
 89 |   return (lp->kp->last_sig);
 90 | }
 91 | #else
 92 | static inline tw_stime
 93 |      tw_now(tw_lp const * lp)
 94 | {
 95 |   return (lp->kp->last_time);
 96 | }
 97 | #endif
 98 | 
 99 | #endif
100 | 


--------------------------------------------------------------------------------
/core/ross-random.c:
--------------------------------------------------------------------------------
  1 | #include <ross.h>
  2 | 
  3 | /*
  4 |  * tw_rand_init
  5 |  */
  6 | tw_rng	*
  7 | tw_rand_init(uint32_t v, uint32_t w)
  8 | {
  9 | 	return rng_init(v, w);
 10 | }
 11 | 
 12 | /*
 13 |  * tw_rand_core_init
 14 |  */
 15 | tw_rng	*
 16 | tw_rand_core_init(uint32_t v, uint32_t w)
 17 | {
 18 |     return rng_core_init(v, w);
 19 | }
 20 | 
 21 | /*
 22 |  * tw_rand_integer
 23 |  *
 24 |  * For LP # gen, return a uniform rn from low to high 
 25 |  */
 26 | /**
 27 |  * NOTE: Don't pass negative values to low!
 28 |  */
 29 | long 
 30 | tw_rand_integer(tw_rng_stream * g, long low, long high)
 31 | {
 32 | 	long safe_high = high;
 33 | 
 34 | 	if (safe_high != LONG_MAX) {
 35 | 		safe_high += 1;
 36 | 	}
 37 | 
 38 | 	if (safe_high <= low) {
 39 | 		return (0);
 40 | 	} else {
 41 | 		return (low + (long)(tw_rand_unif(g) * (safe_high - low)));
 42 | 	}
 43 | }
 44 | 
 45 | unsigned long
 46 | tw_rand_ulong(tw_rng_stream * g, unsigned long low, unsigned long high)
 47 | {
 48 | 	unsigned long safe_high = high;
 49 | 
 50 | 	if (safe_high != ULONG_MAX) {
 51 | 		safe_high += 1;
 52 | 	}
 53 | 
 54 |     if (safe_high < low) {
 55 |         return (0);
 56 |     } else {
 57 |         return (low + (unsigned long)(tw_rand_unif(g) * (safe_high - low)));
 58 |     }
 59 | }
 60 | 
 61 | long 
 62 | tw_rand_binomial(tw_rng_stream * g, long N, double P)
 63 | {
 64 | 	long            sucesses, trials;
 65 | 
 66 | 	sucesses = 0;
 67 | 
 68 | 	for (trials = 0; trials < N; trials++)
 69 | 	{
 70 | 		if (tw_rand_unif(g) <= P)
 71 | 			sucesses++;
 72 | 	}
 73 | 
 74 | 	return (sucesses);
 75 | }
 76 | 
 77 | double 
 78 | tw_rand_exponential(tw_rng_stream * g, double Lambda)
 79 | {
 80 | 	return (-Lambda * log(tw_rand_unif(g)));
 81 | }
 82 | 
 83 | double 
 84 | tw_rand_pareto(tw_rng_stream * g, double shape, double scale)
 85 | {
 86 |   return( scale * 1.0/pow(tw_rand_unif(g), 1/shape) );
 87 | }
 88 | 
 89 | double 
 90 | tw_rand_gamma(tw_rng_stream * g, double shape, double scale)
 91 | {
 92 | 	double          a, b, q, phi, d;
 93 | 
 94 | 	if (shape > 1)
 95 | 	{
 96 | 		a = 1 / sqrt(2 * shape - 1);
 97 | 		b = shape - log(4);
 98 | 		q = shape + 1 / a;
 99 | 		phi = 4.5;
100 | 		d = 1 + log(phi);
101 | 
102 | 		while (1)
103 | 		{
104 | 			double          U_One = tw_rand_unif(g);
105 | 			double          U_Two = tw_rand_unif(g);
106 | 			double          V = a * log(U_One / (1 - U_One));
107 | 			double          Y = shape * exp(V);
108 | 			double          Z = U_One * U_One * U_Two;
109 | 			double          W = b + q * V - Y;
110 | 
111 | 			double          temp1 = W + d - phi * Z;
112 | 			double          temp2 = log(Z);
113 | 
114 | 			if (temp1 >= 0 || W >= temp2)
115 | 				return (scale * Y);
116 | 
117 | 		}
118 | 	} else if (shape == 1)
119 | 	{
120 | 		return (tw_rand_exponential(g, scale));
121 | 	} else
122 | 	{
123 | 		b = (exp(1) + shape) / exp(1);
124 | 
125 | 		while (1)
126 | 		{
127 | 			double          U_One = tw_rand_unif(g);
128 | 			double          P = b * U_One;
129 | 
130 | 			if (P <= 1)
131 | 			{
132 | 				double          Y = pow(P, (1 / shape));
133 | 				double          U_Two = tw_rand_unif(g);
134 | 
135 | 				if (U_Two <= exp(-Y))
136 | 					return (scale * Y);
137 | 			} else
138 | 			{
139 | 				double          Y = -log((b - P) / shape);
140 | 				double          U_Two = tw_rand_unif(g);
141 | 
142 | 				if (U_Two <= pow(Y, (shape - 1)))
143 | 					return (scale * Y);
144 | 			}
145 | 		}
146 | 	}
147 | }
148 | 
149 | long 
150 | tw_rand_geometric(tw_rng_stream * g, double P)
151 | {
152 | 	int             count = 1;
153 | 
154 | 	while (tw_rand_unif(g) > P)
155 | 		count++;
156 | 
157 | 	return (count);
158 | }
159 | 
160 | double 
161 | tw_rand_normal01(tw_rng_stream * g, unsigned int *rng_calls)
162 | {
163 | #ifndef RAND_NORMAL
164 | 	tw_error(TW_LOC, "Please compile using -DRAND_NORMAL!");
165 | #endif
166 | 
167 | #ifdef RAND_NORMAL
168 | 	*rng_calls = 0;
169 | 	g->tw_normal_flipflop = !g->tw_normal_flipflop;
170 | 
171 |   if ((g->tw_normal_flipflop)  || 
172 |       (g->tw_normal_u1< 0.0)   || 
173 |       (g->tw_normal_u1 >= 1.0) || 
174 |       (g->tw_normal_u2 < 0.0)  || 
175 |       (g->tw_normal_u2 > 1.0))
176 |     {
177 |       g->tw_normal_u1 = tw_rand_unif(g);
178 |       g->tw_normal_u2 = tw_rand_unif(g);
179 |       *rng_calls = 2;
180 | 
181 |       return (sqrt(-2.0 * log(g->tw_normal_u1)) * sin(tw_opi * g->tw_normal_u2));
182 |     } 
183 |   else
184 |     {
185 |       return (sqrt(-2.0 * log(g->tw_normal_u1)) * cos(tw_opi * g->tw_normal_u2));
186 |     }
187 | #endif
188 | }
189 | 
190 | double 
191 | tw_rand_normal_sd(tw_rng_stream * g, double Mu, double Sd, unsigned int *rng_calls)
192 | {
193 |   return ( Mu + (tw_rand_normal01(g, rng_calls) * Sd));
194 | }
195 | 
196 | long 
197 | tw_rand_poisson(tw_rng_stream * g, double Lambda)
198 | {
199 |   double          a, b;
200 |   long            count;
201 |   
202 |   a = exp(-Lambda);
203 |   b = 1;
204 |   count = 0;
205 |   
206 |   b = b * tw_rand_unif(g);
207 |   
208 |   while (b >= a)
209 |     {
210 |       count++;
211 |       b = b * tw_rand_unif(g);
212 |     }
213 |   
214 |   return (count);
215 | }
216 | 
217 | double
218 | tw_rand_lognormal(tw_rng_stream * g, double mean, double sd, unsigned int *rng_calls)
219 | {
220 |   return (exp( mean + sd * tw_rand_normal01(g, rng_calls)));
221 | }
222 | 
223 | double
224 | tw_rand_weibull(tw_rng_stream * g, double mean, double shape)
225 | {
226 |   double scale = mean /  tgamma( ((double)1.0 + (double)1.0/shape));
227 |   return(scale * pow(-log( tw_rand_unif(g)), (double)1.0/shape));
228 | }
229 | 


--------------------------------------------------------------------------------
/core/ross-random.h:
--------------------------------------------------------------------------------
 1 | #ifndef INC_tw_rand_h
 2 | #define	INC_tw_rand_h
 3 | 
 4 | #define tw_opi 6.28318530718
 5 | #define tw_rand_unif(G)			rng_gen_val(G)
 6 | #define tw_rand_reverse_unif(G)	rng_gen_reverse_val(G)
 7 | 
 8 | typedef struct tw_rng tw_rng;
 9 | typedef struct tw_rng_stream tw_rng_stream;
10 | 
11 | /*
12 |  * Public Function Prototypes
13 |  */
14 | extern tw_rng	*tw_rand_init(uint32_t v, uint32_t w);
15 | extern tw_rng   *tw_rand_core_init(uint32_t v, uint32_t w);
16 | extern void	    tw_rand_initial_seed(tw_rng_stream * g, tw_lpid id, tw_rng * the_rng);
17 | extern long     tw_rand_integer(tw_rng_stream * g, long low, long high);
18 | extern unsigned long tw_rand_ulong(tw_rng_stream * g, unsigned long low, unsigned long high);
19 | extern long     tw_rand_binomial(tw_rng_stream * g, long N, double P);
20 | extern double   tw_rand_exponential(tw_rng_stream * g, double Lambda);
21 | extern double   tw_rand_pareto(tw_rng_stream * g, double scale, double shape);
22 | extern double   tw_rand_gamma(tw_rng_stream * g, double shape, double scale);
23 | extern long     tw_rand_geometric(tw_rng_stream * g, double P);
24 | extern double   tw_rand_normal01(tw_rng_stream * g, unsigned int *rng_calls);
25 | extern double   tw_rand_normal_sd(tw_rng_stream * g, double Mu, double Sd, unsigned int *rng_calls);
26 | extern long     tw_rand_poisson(tw_rng_stream * g, double Lambda);
27 | extern double   tw_rand_weibull(tw_rng_stream * g, double mean, double shape);
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/core/ross.h:
--------------------------------------------------------------------------------
  1 | #ifndef INC_ross_h
  2 | #define INC_ross_h
  3 | 
  4 | /** @mainpage Rensselaer's Optimistic Simulation System (ROSS)
  5 |     @section intro_sec Introduction
  6 | 
  7 |     ROSS is an acronym for Rensselaer's Optimistic Simulation System. It is a
  8 |     parallel discrete-event simulator that executes on shared-memory
  9 |     multiprocessor systems. ROSS is geared for running large-scale simulation
 10 |     models (i.e., 100K to even 1 million object models).  The synchronization
 11 |     mechanism is based on Time Warp. Time Warp is an optimistic
 12 |     synchronization mechanism develop by Jefferson and Sowizral [10, 11] used
 13 |     in the parallelization of discrete-event simulation. The distributed
 14 |     simulator consists of a collection of logical processes or LPs, each
 15 |     modeling a distinct component of the system being modeled, e.g., a server
 16 |     in a queuing network. LPs communicate by exchanging timestamped event
 17 |     messages, e.g., denoting the arrival of a new job at that server.
 18 | 
 19 |     The Time Warp mechanism uses a detection-and-recovery protocol to
 20 |     synchronize the computation. Any time an LP determines that it has
 21 |     processed events out of timestamp order, it "rolls back" those events, and
 22 |     re-executes them. For a detailed discussion of Time Warp as well as other
 23 |     parallel simulation protocols we refer the reader to [8]
 24 | 
 25 |     ROSS was modeled after a Time Warp simulator called GTW or Georgia Tech
 26 |     Time Warp[7]. ROSS helped to demonstrate that Time Warp simulators can be
 27 |     run efficiently both in terms of speed and memory usage relative to a
 28 |     high-performance sequential simulator.
 29 | 
 30 |     To achieve high parallel performance, ROSS uses a technique call Reverse
 31 |     Computation. Here, the roll back mechanism in the optimistic simulator is
 32 |     realized not by classic state-saving, but by literally allowing to the
 33 |     greatest possible extent events to be reverse. Thus, as models are
 34 |     developed for parallel execution, both the forward and reverse execution
 35 |     code must be written. Currently, both are done by hand. We are
 36 |     investigating automatic methods that are able to generate the reverse
 37 |     execution code using only the forward execution code as input. For more
 38 |     information on ROSS and Reverse Computation we refer the interested reader
 39 |     to [4, 5]. Both of these text are provided as additional reading in the
 40 |     ROSS distribution.
 41 | 
 42 | @section license_sec License
 43 | Copyright (c) 2013, Rensselaer Polytechnic Institute
 44 | All rights reserved.
 45 | 
 46 | Redistribution and  use in  source and binary  forms, with  or without
 47 | modification, are permitted provided that the following conditions are
 48 | met:
 49 | 
 50 |   Redistributions  of  source code  must  retain  the above  copyright
 51 |   notice, this list of conditions and the following disclaimer.
 52 | 
 53 |   Redistributions in  binary form  must reproduce the  above copyright
 54 |   notice, this list of conditions  and the following disclaimer in the
 55 |   documentation and/or other materials provided with the distribution.
 56 | 
 57 |   Neither the  name of Rensselaer Polytechnic Institute  nor the names
 58 |   of  its contributors  may be  used  to endorse  or promote  products
 59 |   derived   from  this   software  without   specific   prior  written
 60 |   permission.
 61 | 
 62 | THIS SOFTWARE  IS PROVIDED BY  THE COPYRIGHT HOLDERS  AND CONTRIBUTORS
 63 | "AS  IS" AND  ANY EXPRESS  OR IMPLIED  WARRANTIES, INCLUDING,  BUT NOT
 64 | LIMITED TO, THE IMPLIED  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 65 | A PARTICULAR PURPOSE  ARE DISCLAIMED. IN NO EVENT  SHALL THE COPYRIGHT
 66 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 67 | SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL  DAMAGES (INCLUDING,  BUT  NOT
 68 | LIMITED TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE,
 69 | DATA, OR PROFITS; OR BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY
 70 | THEORY OF  LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY,  OR TORT
 71 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING  IN ANY WAY OUT OF THE USE
 72 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 73 | 
 74 | */
 75 | 
 76 | /*******************************************************************
 77 |  * The location of this include is important, as it is outside of  *
 78 |  * the __cplusplus check.  This is required as the mpi header will *
 79 |  * mess up and complain if we force it into an extern "C" context. *
 80 |  *******************************************************************/
 81 | #include <mpi.h>
 82 | 
 83 | #ifdef __cplusplus
 84 | extern "C" {
 85 | #endif
 86 | 
 87 | #ifndef ARRAY_SIZE
 88 | #define ARRAY_SIZE(a) ( sizeof((a)) / sizeof((a)[0]) )
 89 | #endif
 90 | 
 91 | #ifdef __GNUC__
 92 | #  define NORETURN __attribute__((__noreturn__))
 93 | #else
 94 | #  define NORETURN
 95 | #  ifndef __attribute__
 96 | #    define __attribute__(x)
 97 | #  endif
 98 | #endif
 99 | 
100 | /*********************************************************************
101 |  *
102 |  * Include ``standard'' headers that most of ROSS will require.
103 |  *
104 |  ********************************************************************/
105 | 
106 | #include "config.h"
107 | 
108 | #include <errno.h>
109 | #include <sys/types.h>
110 | #include <math.h>
111 | #include <limits.h>
112 | #include <stdlib.h>
113 | #include <unistd.h>
114 | #include <string.h>
115 | #include <stdio.h>
116 | #include <stdarg.h>
117 | #include <stdint.h>
118 | 
119 | #if !defined(DBL_MAX)
120 | #include <float.h>
121 | #endif
122 | 
123 | #include <sys/time.h>
124 | #include <time.h>
125 | 
126 | #ifdef USE_BGPM
127 | #include<bgpm.h>
128 | #endif
129 | 
130 | #ifdef ROSS_INTERNAL
131 | #undef malloc
132 | #undef calloc
133 | #undef realloc
134 | #undef strdup
135 | #undef free
136 | 
137 | #  define malloc(a) must_use_tw_calloc_not_malloc
138 | #  define calloc(a,b) must_use_tw_calloc_not_calloc
139 | #  define realloc(a,b) must_use_tw_calloc_not_realloc
140 | #  define strdup(b) must_use_tw_calloc_not_strdup
141 | #  define free(b) must_not_use_free
142 | #endif
143 | 
144 | // #include "config.h" -- moved to individual files that need them -- e.g., tw-setup.c
145 | 
146 | /* tw_peid -- Processing Element "PE" id */
147 | typedef unsigned long tw_peid;
148 | 
149 | /* tw_stime -- Simulation time value for sim clock (NOT wall!) */
150 | typedef double tw_stime;
151 | #define MPI_TYPE_TW_STIME   MPI_DOUBLE
152 | #define TW_STIME_CRT(x)     (x)
153 | #define TW_STIME_DBL(x)     (x)
154 | #define TW_STIME_CMP(x, y)  (((x) < (y)) ? -1 : ((x) > (y)))
155 | #define TW_STIME_ADD(x, y)  ((x) + (y))
156 | #define TW_STIME_MAX        DBL_MAX
157 | 
158 | /* tw_lpid -- Logical Process "LP" id */
159 | //typedef unsigned long long tw_lpid;
160 | typedef uint64_t tw_lpid;
161 | 
162 | 
163 | #include "buddy.h"
164 | #include "ross-random.h"
165 | 
166 | #ifdef ROSS_RAND_clcg4
167 | #  include "rand-clcg4.h"
168 | #endif
169 | 
170 | #ifdef ROSS_CLOCK_i386
171 | #  include "clock/i386.h"
172 | #endif
173 | #ifdef ROSS_CLOCK_amd64
174 | #  include "clock/amd64.h"
175 | #endif
176 | #ifdef ROSS_CLOCK_ia64
177 | #  include "clock/ia64.h"
178 | #endif
179 | #ifdef ROSS_CLOCK_ppc
180 | #  include "clock/ppc.h"
181 | #endif
182 | #ifdef ROSS_CLOCK_ppc64le
183 | #  include "clock/ppc64le.h"
184 | #endif
185 | #ifdef ROSS_CLOCK_bgl
186 | #  include "clock/bgl.h"
187 | #endif
188 | #ifdef ROSS_CLOCK_bgq
189 | #  include "clock/bgq.h"
190 | #endif
191 | #ifdef ROSS_CLOCK_aarch64
192 | #  include "clock/aarch64.h"
193 | #endif
194 | #ifdef ROSS_CLOCK_armv7l
195 | #  include "clock/armv7l.h"
196 | #endif
197 | #ifdef ROSS_CLOCK_gtod
198 | #  include "clock/gtod.h"
199 | #endif
200 | 
201 | #include "tw-timing.h"
202 | #include "ross-types.h"
203 | #include "tw-opts.h"
204 | 
205 | #ifdef ROSS_NETWORK_mpi
206 | #  include "network-mpi.h"
207 | #endif
208 | 
209 | #include "ross-gvt.h"
210 | #include "ross-extern.h"
211 | #include "ross-kernel-inline.h"
212 | #include "hash-quadratic.h"
213 | 
214 | #include "queue/tw-queue.h"
215 | 
216 | #ifdef ROSS_GVT_7oclock
217 | #  include "gvt/7oclock.h"
218 | #endif
219 | #ifdef ROSS_GVT_mpi_allreduce
220 | #  include "mpi.h"
221 | #  include "gvt/mpi_allreduce.h"
222 | #endif
223 | 
224 | #include "instrumentation/st-instrumentation.h"
225 | 
226 | #ifdef USE_DAMARIS
227 | #include "damaris/core/damaris.h"
228 | #endif
229 | 
230 | #include "tw-eventq.h"
231 | 
232 | #ifdef USE_RIO
233 | #include "rio/io.h"
234 | #endif
235 | 
236 | #include "ross-inline.h"
237 | 
238 | 
239 | #ifdef __cplusplus
240 | }
241 | #endif
242 | 
243 | #endif
244 | 


--------------------------------------------------------------------------------
/core/ross.pc.in:
--------------------------------------------------------------------------------
 1 | prefix = @CMAKE_INSTALL_PREFIX@
 2 | 
 3 | ross_cflags=-I${prefix}/include
 4 | ross_ldflags=-L${prefix}/lib -Wl,-rpath,${prefix}/lib
 5 | ross_libs=-lROSS -lm
 6 | 
 7 | Name: ROSS
 8 | Description: Rensselaer's Optimistic Simulation System
 9 | Version: @VERSION_SHORT@
10 | URL: https://github.com/ROSS-org/ROSS
11 | Requires:
12 | Libs: ${ross_ldflags} ${ross_libs}
13 | Cflags: ${ross_cflags}
14 | 


--------------------------------------------------------------------------------
/core/tw-kp.c:
--------------------------------------------------------------------------------
  1 | #include <ross.h>
  2 | 
  3 | void
  4 | tw_kp_onpe(tw_kpid id, tw_pe * pe)
  5 | {
  6 | 	if(id >= g_tw_nkp)
  7 | 		tw_error(TW_LOC, "ID %d exceeded MAX KPs", id);
  8 | 
  9 | 	if(g_tw_kp[id])
 10 | 		tw_error(TW_LOC, "KP already allocated: %lld\n", id);
 11 | 
 12 | 	g_tw_kp[id] = (tw_kp *) tw_calloc(TW_LOC, "Local KP", sizeof(tw_kp), 1);
 13 | 
 14 | 	g_tw_kp[id]->id = id;
 15 | 	g_tw_kp[id]->pe = pe;
 16 | 
 17 | #ifdef ROSS_QUEUE_kp_splay
 18 | 	g_tw_kp[id]->pq = tw_eventpq_create();
 19 | #endif
 20 | }
 21 | 
 22 | 
 23 | #ifdef USE_RAND_TIEBREAKER
 24 | void
 25 | tw_kp_rollback_to_sig(tw_kp * kp, tw_event_sig to_sig)
 26 | {
 27 |     tw_event    *e;
 28 |     tw_clock pq_start;
 29 | 
 30 |     kp->s_rb_total++;
 31 |     kp->kp_stats->s_rb_total++;
 32 | 
 33 |     while (kp->pevent_q.size && tw_event_sig_compare(kp->pevent_q.head->sig, to_sig) >= 0)
 34 |     {
 35 |         e = tw_eventq_shift(&kp->pevent_q);
 36 | 
 37 |         // rollback first
 38 |         tw_event_rollback(e);
 39 | 
 40 |         // reset kp pointers
 41 |         if (kp->pevent_q.size == 0)
 42 |         {
 43 |             // kp->last_time = kp->pe->GVT;
 44 |             kp->last_sig = kp->pe->GVT_sig;
 45 |         } else
 46 |         {
 47 |             // kp->last_time = kp->pevent_q.head->recv_ts;
 48 |             kp->last_sig = kp->pevent_q.head->sig;
 49 |         }
 50 | 
 51 |         // place event back into priority queue
 52 |         pq_start = tw_clock_read();
 53 |         tw_pq_enqueue(kp->pe->pq, e);
 54 |         kp->pe->stats.s_pq += tw_clock_read() - pq_start;
 55 |     }
 56 | }
 57 | #else
 58 | void
 59 | tw_kp_rollback_to(tw_kp * kp, tw_stime to)
 60 | {
 61 |         tw_event       *e;
 62 |         tw_clock pq_start;
 63 | 
 64 |         kp->s_rb_total++;
 65 |         // instrumentation
 66 |         kp->kp_stats->s_rb_total++;
 67 | 
 68 | #if VERIFY_ROLLBACK
 69 |         printf("%d %d: rb_to %f, now = %f \n",
 70 |                kp->pe->id, kp->id, TW_STIME_DBL(to), TW_STIME_DBL(kp->last_time));
 71 | #endif
 72 | 
 73 |         while(kp->pevent_q.size && TW_STIME_CMP(kp->pevent_q.head->recv_ts, to) >= 0)
 74 |         {
 75 |                 e = tw_eventq_shift(&kp->pevent_q);
 76 | 
 77 |                 /*
 78 |                  * rollback first
 79 |                  */
 80 |                 tw_event_rollback(e);
 81 | 
 82 |                 /*
 83 |                  * reset kp pointers
 84 |                  */
 85 |                 if (kp->pevent_q.size == 0)
 86 |                 {
 87 |                         kp->last_time = kp->pe->GVT;
 88 |                 } else
 89 |                 {
 90 |                         kp->last_time = kp->pevent_q.head->recv_ts;
 91 |                 }
 92 | 
 93 |                 /*
 94 |                  * place event back into priority queue
 95 |                  */
 96 |                 pq_start = tw_clock_read();
 97 |                 tw_pq_enqueue(kp->pe->pq, e);
 98 |                 kp->pe->stats.s_pq += tw_clock_read() - pq_start;
 99 |         }
100 | }
101 | #endif
102 | 
103 | void
104 | tw_kp_rollback_event(tw_event * event)
105 | {
106 |     tw_event       *e = NULL;
107 |     tw_kp          *kp;
108 |     tw_pe          *pe;
109 |     tw_clock pq_start;
110 | 
111 |     kp = event->dest_lp->kp;
112 |     pe = kp->pe;
113 | 
114 |     kp->s_rb_total++;
115 | 	kp->s_rb_secondary++;
116 |     // instrumentation
117 |     kp->kp_stats->s_rb_total++;
118 | 	kp->kp_stats->s_rb_secondary++;
119 | 
120 | #if VERIFY_ROLLBACK
121 |         printf("%d %d: rb_event: %f \n", pe->id, kp->id, event->recv_ts);
122 | 
123 | 	if(!kp->pevent_q.size)
124 | 		tw_error(TW_LOC, "Attempting to rollback empty pevent_q!");
125 | #endif
126 | 
127 | 	e = tw_eventq_shift(&kp->pevent_q);
128 |         while(e != event)
129 | 	{
130 | #ifdef USE_RAND_TIEBREAKER
131 |                 kp->last_sig = kp->pevent_q.head->sig;
132 | #else
133 |                 kp->last_time = kp->pevent_q.head->recv_ts;
134 | #endif
135 | 		tw_event_rollback(e);
136 |                 pq_start = tw_clock_read();
137 |                 tw_pq_enqueue(pe->pq, e);
138 |                 pe->stats.s_pq += tw_clock_read() - pq_start;
139 | 
140 | 		e = tw_eventq_shift(&kp->pevent_q);
141 |         }
142 | 
143 |         tw_event_rollback(e);
144 | 
145 | #ifdef USE_RAND_TIEBREAKER
146 |         if (0 == kp->pevent_q.size)
147 |                 kp->last_sig = kp->pe->GVT_sig;
148 |         else
149 |                 kp->last_sig = kp->pevent_q.head->sig;
150 | #else
151 |         if (0 == kp->pevent_q.size)
152 |                 kp->last_time = kp->pe->GVT;
153 |         else
154 |                 kp->last_time = kp->pevent_q.head->recv_ts;
155 | #endif
156 | }
157 | 
158 | #ifndef NUM_OUT_MESG
159 | #define NUM_OUT_MESG 2000
160 | #endif
161 | static tw_out*
162 | init_output_messages(tw_kp *kp)
163 | {
164 |     int i;
165 | 
166 |     tw_out *ret = (tw_out *) tw_calloc(TW_LOC, "tw_out", sizeof(struct tw_out), NUM_OUT_MESG);
167 | 
168 |     for (i = 0; i < NUM_OUT_MESG - 1; i++) {
169 |         ret[i].next = &ret[i + 1];
170 |         ret[i].owner = kp;
171 |     }
172 |     ret[i].next = NULL;
173 |     ret[i].owner = kp;
174 | 
175 |     return ret;
176 | }
177 | 
178 | void
179 | tw_init_kps(tw_pe * me)
180 | {
181 | 	tw_kpid i;
182 |     int j;
183 | 
184 | 	for (i = 0; i < g_tw_nkp; i++)
185 | 	{
186 | 		tw_kp *kp = tw_getkp(i);
187 | 
188 | 		if (kp->pe != me)
189 | 			continue;
190 | 
191 | 		kp->id = i;
192 |         kp->s_nevent_processed = 0;
193 | 		kp->s_e_rbs = 0;
194 | 		kp->s_rb_total = 0;
195 | 		kp->s_rb_secondary = 0;
196 |         if (g_tw_synchronization_protocol == OPTIMISTIC ||
197 | 	    g_tw_synchronization_protocol == OPTIMISTIC_DEBUG ||
198 | 	    g_tw_synchronization_protocol == OPTIMISTIC_REALTIME) {
199 |             kp->output = init_output_messages(kp);
200 |         }
201 | 
202 |         // instrumentation setup
203 |         kp->kp_stats = (st_kp_stats*) tw_calloc(TW_LOC, "KP instrumentation", sizeof(st_kp_stats), 1);
204 |         for (j = 0; j < 3; j++)
205 |             kp->last_stats[j] = (st_kp_stats*) tw_calloc(TW_LOC, "KP instrumentation", sizeof(st_kp_stats), 1);
206 | 	}
207 | }
208 | 
209 | tw_out *
210 | tw_kp_grab_output_buffer(tw_kp *kp)
211 | {
212 |     if (kp->output) {
213 |         tw_out *ret = kp->output;
214 |         kp->output = kp->output->next;
215 |         ret->next = 0;
216 |         return ret;
217 |     }
218 | 
219 |     return NULL;
220 | }
221 | 
222 | void
223 | tw_kp_put_back_output_buffer(tw_out *out)
224 | {
225 |     tw_kp *kp = out->owner;
226 | 
227 |     if (kp->output) {
228 |         out->next = kp->output;
229 |         kp->output = out;
230 |     }
231 |     else {
232 |         kp->output = out;
233 |         kp->output->next = NULL;
234 |     }
235 | }
236 | 


--------------------------------------------------------------------------------
/core/tw-lp.c:
--------------------------------------------------------------------------------
  1 | #include <ross.h>
  2 | 
  3 | /**
  4 |  * @file tw-lp.c
  5 |  * @brief tw_lp_settype is defined here!
  6 |  */
  7 | 
  8 | // IMPORTANT: this function replaces tw_lp_settype
  9 | // g_tw_lp_types must be defined
 10 | // g_tw_lp_typemap must be defined
 11 | void tw_lp_setup_types () {
 12 | 	if ( !g_tw_lp_types ) {
 13 | 		tw_error(TW_LOC, "No LP types are defined");
 14 | 	}
 15 | 
 16 | 	if ( !g_tw_lp_typemap ) {
 17 | 		tw_error(TW_LOC, "No LP type mapping is defined");
 18 | 	}
 19 | 
 20 | 	unsigned int i;
 21 | 	for (i = 0; i < g_tw_nlp; i++) {
 22 | 		tw_lp *lp = g_tw_lp[i];
 23 | 		lp->type = &g_tw_lp_types[g_tw_lp_typemap(lp->gid)];
 24 | 
 25 |         if (g_st_ev_trace)
 26 |             st_model_setup_types(lp);
 27 | 	}
 28 | 
 29 | }
 30 | 
 31 | /**
 32 |  * IMPORTANT: This function should be called after tw_define_lps.  It
 33 |  * copies the function pointers which define the LP to the appropriate
 34 |  * location for *each* LP, i.e. you probably want to call this more than
 35 |  * once.
 36 |  */
 37 | void
 38 | tw_lp_settype(tw_lpid id, tw_lptype * type)
 39 | {
 40 | 	tw_lp *lp = g_tw_lp[id];
 41 | 
 42 | 	if(id >= g_tw_nlp + g_st_analysis_nlp)
 43 | 		tw_error(TW_LOC, "ID %ld exceeded MAX LPs (%ld)", id, g_tw_nlp + g_st_analysis_nlp);
 44 | 
 45 | 	if(!lp || !lp->pe)
 46 | 		tw_error(TW_LOC, "LP %u has no PE assigned.", lp->gid);
 47 | 
 48 | 	// memcpy(&lp->type, type, sizeof(*type));
 49 | 	lp->type = type;
 50 | 
 51 |         if (type->state_sz > g_tw_delta_sz) {
 52 |             g_tw_delta_sz = type->state_sz;
 53 |         }
 54 | }
 55 | 
 56 | void
 57 | tw_lp_onpe(tw_lpid id, tw_pe * pe, tw_lpid gid)
 58 | {
 59 | 	if(id >= g_tw_nlp + g_st_analysis_nlp)
 60 | 		tw_error(TW_LOC, "ID %d exceeded MAX LPs", id);
 61 | 
 62 | 	if(g_tw_lp[id])
 63 | 		tw_error(TW_LOC, "LP already allocated: %lld\n", id);
 64 | 
 65 | 	g_tw_lp[id] = (tw_lp *) tw_calloc(TW_LOC, "Local LP", sizeof(tw_lp), 1);
 66 | 
 67 | 	g_tw_lp[id]->gid = gid;
 68 | 	g_tw_lp[id]->id = id;
 69 | 	g_tw_lp[id]->pe = pe;
 70 | }
 71 | 
 72 | void
 73 | tw_lp_onkp(tw_lp * lp, tw_kp * kp)
 74 | {
 75 | 	if(!lp)
 76 | 		tw_error(TW_LOC, "Bad LP pointer!");
 77 | 
 78 | 	lp->kp = kp;
 79 | 	kp->lp_count++;
 80 | }
 81 | 
 82 | void
 83 | tw_init_lps(tw_pe * me)
 84 | {
 85 | 	tw_lpid i;
 86 |     int j;
 87 | 
 88 | 	for(i = 0; i < g_tw_nlp + g_st_analysis_nlp; i++)
 89 | 	{
 90 | 		tw_lp * lp = g_tw_lp[i];
 91 | 
 92 | 		if (lp->pe != me)
 93 | 			continue;
 94 | 
 95 | 		// Allocate initial state vector for this LP
 96 | 		if(!lp->cur_state) {
 97 | 			lp->cur_state = tw_calloc(TW_LOC, "state vector", lp->type->state_sz, 1);
 98 | 		}
 99 | 
100 |         lp->lp_stats = (st_lp_stats*) tw_calloc(TW_LOC, "LP instrumentation", sizeof(st_lp_stats), 1);
101 |         for (j = 0; j < 3; j++)
102 |             lp->last_stats[j] = (st_lp_stats*) tw_calloc(TW_LOC, "LP instrumentation", sizeof(st_lp_stats), 1);
103 | 
104 | #ifndef USE_RIO
105 | 		if (lp->type->init)
106 | 		{
107 | 			me->cur_event = me->abort_event;
108 | 			me->cur_event->caused_by_me = NULL;
109 | 
110 | 			(*(init_f)lp->type->init) (lp->cur_state, lp);
111 | 
112 | 			if (me->cev_abort)
113 | 				tw_error(TW_LOC, "ran out of events during init");
114 | 		}
115 | #endif
116 | 	}
117 | #ifdef USE_RIO
118 | 	// RIO requires that all LPs have been allocated
119 | 	if (g_io_load_at == PRE_INIT || g_io_load_at == INIT) {
120 | 		tw_clock start = tw_clock_read();
121 |         io_read_checkpoint();
122 |         me->stats.s_rio_load += (tw_clock_read() - start);
123 |     }
124 |     if (g_io_load_at != INIT) {
125 |     	tw_clock start = tw_clock_read();
126 |     	for (i = 0; i < g_tw_nlp; i++) {
127 | 			tw_lp * lp = g_tw_lp[i];
128 | 			me->cur_event = me->abort_event;
129 | 			me->cur_event->caused_by_me = NULL;
130 | 
131 | 			(*(init_f)lp->type->init) (lp->cur_state, lp);
132 | 
133 | 			if (me->cev_abort) {
134 | 				tw_error(TW_LOC, "ran out of events during init");
135 | 			}
136 | 		}
137 | 		me->stats.s_rio_lp_init += (tw_clock_read() - start);
138 | 	}
139 |     if (g_io_load_at == POST_INIT) {
140 | 		tw_clock start = tw_clock_read();
141 |         io_read_checkpoint();
142 |         me->stats.s_rio_load += (tw_clock_read() - start);
143 |     }
144 | #endif
145 | }
146 | 
147 | void tw_pre_run_lps (tw_pe * me) {
148 | 	tw_lpid i;
149 | 
150 | 	for(i = 0; i < g_tw_nlp + g_st_analysis_nlp; i++) {
151 | 		tw_lp * lp = g_tw_lp[i];
152 | 
153 | 		if (lp->pe != me)
154 | 			continue;
155 | 
156 | 		if (lp->type->pre_run) {
157 | 			me->cur_event = me->abort_event;
158 | 			me->cur_event->caused_by_me = NULL;
159 | 
160 | 			(*(pre_run_f)lp->type->pre_run) (lp->cur_state, lp);
161 | 
162 | 			if (me->cev_abort)
163 | 				tw_error(TW_LOC, "ran out of events during pre_run");
164 | 		}
165 | 	}
166 | }
167 | 
168 | /********************************************************************//**
169 |             LP Suspension Design Notes! (John Jenkins, ANL)
170 | 
171 | Many times, when developing optimistic models, we are able to
172 | determine < LP state, event > pairs which represent infeasible model
173 | behavior. These types of simulation states typically arise when time
174 | warp causes us to receive and potentially process messages in an order
175 | we don't expect.
176 | 
177 | For example, consider a client/server protocol in which a server sends
178 | an ACK to a client upon completion of some event. In optimistic mode,
179 | the client can see what amounts to duplicate ACKs from the server due
180 | to the server LP rolling back and re-sending an ACK.
181 | 
182 | While some models can gracefully cope with such issues, more complex
183 | models can have troubles (the client in the example could for instance
184 | destroy the request metadata after receiving an ACK).
185 | 
186 | A solution, as noted in the "Dark Side of Risk" paper, is to introduce
187 | LP "self-suspend" functionality. If an LP is able to detect a < state,
188 | message > pair which is incorrect / unexpected in a well-behaved
189 | simulation, the LP should be able to put itself into suspend mode,
190 | refusing to process messages until rolled back to a pre < state,
191 | message > state. There are two benefits: 1) it greatly reduces the
192 | difficulty in tracking down and distinguishing proper model bugs from
193 | bugs arising from time-warp related issues such as out-of-order event
194 | receipt and 2) it improves simulation performance by pruning the
195 | number of processed events that we know are invalid and will be rolled
196 | back anyways.
197 | 
198 | I suggest the function signature tw_suspend(tw_lp *lp, int
199 | do_suspend_event_rc, const char * format, ...), with the following
200 | semantics:
201 | 
202 | After a call to tw_suspend, all subsequent events (both forward and
203 | reverse) that arrive at the suspended LP shall be processed as if they
204 | were no-ops. The reverse event handler of the event that caused the
205 | suspend will be run if do_orig_event_rc is nonzero; otherwise, the
206 | reverse event handler shall additionally be a no-op. Typically,
207 | do_orig_event_rc == 0 is desired, as good coding practices for
208 | moderate-or-greater complexity simulations dictate state/event
209 | validation prior to modifying LP state (partial rollbacks are very
210 | undesirable), but there may be messy logic in the user code for which
211 | a partial rollback is warranted (operations that free memory as a side
212 | effect of operations, for example).  An LP exits suspend state upon
213 | rolling back the event that caused the suspend (whether or not that
214 | event is processed as a no-op).  Upon GVT, if an LP is in self-suspend
215 | mode and the event that caused the suspend has a timestamp less than
216 | that of GVT, then the simulator shall report the format string of
217 | suspended LP(s) and exit.  A NULL format string is acceptable for
218 | performance purposes, e.g. when doing "production" simulation runs.
219 | 
220 | @param lp Pointer to the LP we're suspending
221 | @param do_orig_event_rc A bool indicating whether or not to skip the RC function
222 | @param error_num User-specified value for tracking purposes; ROSS ignores this
223 | 
224 | *************************************************************************/
225 | 
226 | void
227 | tw_lp_suspend(tw_lp * lp, int do_orig_event_rc, int error_num )
228 | {
229 |   if(!lp)
230 |     tw_error(TW_LOC, "Bad LP pointer!");
231 | 
232 |   lp->suspend_flag=1;
233 |   lp->suspend_event = lp->pe->cur_event; // only valid prior to GVT
234 | #ifdef USE_RAND_TIEBREAKER
235 |   lp->suspend_sig = tw_now_sig(lp);
236 | #else
237 |   lp->suspend_time = tw_now(lp);
238 | #endif
239 |   lp->suspend_error_number = error_num;
240 |   lp->suspend_do_orig_event_rc = do_orig_event_rc;
241 | 
242 | }
243 | 


--------------------------------------------------------------------------------
/core/tw-opts.h:
--------------------------------------------------------------------------------
 1 | #ifndef INC_tw_opts_h
 2 | #define INC_tw_opts_h
 3 | 
 4 | enum tw_opttype
 5 | {
 6 | 	TWOPTTYPE_GROUP = 1,
 7 | 	TWOPTTYPE_ULONG,       /**< value must be an "unsigned long*"      */
 8 | 	TWOPTTYPE_ULONGLONG,   /**< value must be an "unsigned long long*" */
 9 | 	TWOPTTYPE_UINT,        /**< value must be an "unsigned int*"       */
10 | 	TWOPTTYPE_STIME,       /**< value must be a  "tw_stime*"           */
11 |         TWOPTTYPE_DOUBLE,      /**< value must be a  "double *"            */
12 | 	TWOPTTYPE_CHAR,        /**< value must be a  "char *"              */
13 |         TWOPTTYPE_FLAG,        /**< value must be an "unsigned int*"       */
14 | 	TWOPTTYPE_SHOWHELP
15 | };
16 | typedef enum tw_opttype tw_opttype;
17 | 
18 | typedef struct tw_optdef tw_optdef;
19 | struct tw_optdef
20 | {
21 | 	tw_opttype type;
22 | 	const char *name;
23 | 	const char *help;
24 | 	void *value;
25 | };
26 | 
27 | #define TWOPT_GROUP(h)         { TWOPTTYPE_GROUP,    NULL, (h), NULL }
28 | #define TWOPT_ULONG(n,v,h)     { TWOPTTYPE_ULONG,     (n), (h), &(v) }
29 | #define TWOPT_ULONGLONG(n,v,h) { TWOPTTYPE_ULONGLONG, (n), (h), &(v) }
30 | #define TWOPT_UINT(n,v,h)      { TWOPTTYPE_UINT,      (n), (h), &(v) }
31 | #define TWOPT_STIME(n,v,h)     { TWOPTTYPE_STIME,     (n), (h), &(v) }
32 | #define TWOPT_DOUBLE(n,v,h)    { TWOPTTYPE_DOUBLE,    (n), (h), &(v) }
33 | #define TWOPT_CHAR(n,v,h)      { TWOPTTYPE_CHAR,      (n), (h), &(v) }
34 | #define TWOPT_FLAG(n,v,h)      { TWOPTTYPE_FLAG,      (n), (h), &(v) }
35 | #define TWOPT_END()            { (tw_opttype)0,     NULL, NULL, NULL }
36 | 
37 | /** Remove options from the command line arguments. */
38 | extern void tw_opt_parse(int *argc, char ***argv);
39 | /** Add an opt group */
40 | extern void tw_opt_add(const tw_optdef *options);
41 | /** Pretty-print the option descriptions (for --help) */
42 | extern void tw_opt_print(void);
43 | /** Pretty-print the option descriptions and current values */
44 | extern void tw_opt_settings(FILE *f);
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/core/tw-pe.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | static void dummy_pe_f (tw_pe *pe)
 4 | {
 5 |     (void) pe;
 6 | }
 7 | 
 8 | void
 9 | tw_pe_settype(const tw_petype * type)
10 | {
11 | 	if (!g_tw_pe)
12 | 		tw_error(TW_LOC, "Undefined PE!");
13 | 
14 | #define copy_pef(f, d) \
15 | 		g_tw_pe->type.f = type->f ? type->f : d
16 | 
17 | 	copy_pef(pre_lp_init, dummy_pe_f);
18 | 	copy_pef(post_lp_init, dummy_pe_f);
19 | 	copy_pef(gvt, dummy_pe_f);
20 | 	copy_pef(final, dummy_pe_f);
21 | 
22 | #undef copy_pef
23 | }
24 | 
25 | /**
26 |  * initialize individual PE structs
27 |  *
28 |  * must be called after tw_nnodes / MPI world size is set.
29 |  *
30 |  */
31 | void
32 | tw_pe_init(void)
33 | {
34 |     if (g_tw_pe) tw_error(TW_LOC, "PE %u already initialized", g_tw_mynode);
35 | 
36 |     g_tw_pe = (tw_pe*)tw_calloc(TW_LOC, "PE Struct", sizeof(*g_tw_pe), 1);
37 | 
38 |     g_tw_pe->id = g_tw_mynode;
39 | 
40 |     tw_petype no_type;
41 |     memset(&no_type, 0, sizeof(no_type));
42 |     tw_pe_settype(&no_type);
43 | 
44 | #ifdef USE_RAND_TIEBREAKER
45 | 	g_tw_pe->trans_msg_sig = tw_get_init_sig(TW_STIME_MAX, 1, TW_STIME_MAX);
46 | #else
47 | 	g_tw_pe->trans_msg_ts = TW_STIME_MAX;
48 | #endif
49 | 	g_tw_pe->gvt_status = 0;
50 | 
51 | 	g_tw_pe->rng = tw_rand_init(31, 41);
52 | 	g_tw_pe->core_rng = tw_rand_core_init(31, 41); // Core RNG must have same v & w values as main RNG
53 | 
54 |     //If we're in (some variation of) optimistic mode, we need this hash
55 |     if (g_tw_synchronization_protocol == OPTIMISTIC ||
56 |         g_tw_synchronization_protocol == OPTIMISTIC_DEBUG ||
57 |         g_tw_synchronization_protocol == OPTIMISTIC_REALTIME) {
58 |         g_tw_pe->hash_t = tw_hash_create();
59 |     } else {
60 |         g_tw_pe->hash_t = NULL;
61 |     }
62 | 
63 | }
64 | 
65 | void
66 | tw_pe_fossil_collect(void)
67 | {
68 | 	tw_kp	*kp;
69 | 
70 | 	unsigned int	 i;
71 | 
72 | 	g_tw_fossil_attempts++;
73 | 
74 | 	for(i = 0; i < g_tw_nkp; i++)
75 | 	{
76 | 		kp = tw_getkp(i);
77 | 		tw_eventq_fossil_collect(&kp->pevent_q, g_tw_pe);
78 | 	}
79 | 
80 | }
81 | 


--------------------------------------------------------------------------------
/core/tw-state.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | #include <assert.h>
 3 | #include "lz4.h"
 4 | 
 5 | /**
 6 |  * Make a snapshot of the LP state and store it into the delta buffer
 7 |  */
 8 | void
 9 | tw_snapshot(tw_lp *lp, size_t state_sz)
10 | {
11 |     assert(lp->pe->delta_buffer[0] && "increase --buddy-size argument!");
12 |     memcpy(lp->pe->delta_buffer[0], lp->cur_state, state_sz);
13 | }
14 | 
15 | /**
16 |  * Create the delta from the current state and the snapshot.
17 |  * Compress it.
18 |  * @return The size of the compressed data placed in delta_buffer[1].
19 |  */
20 | long
21 | tw_snapshot_delta(tw_lp *lp, size_t state_sz)
22 | {
23 |     unsigned long i;
24 |     tw_clock start;
25 |     int ret_size = 0;
26 |     unsigned char *current_state = (unsigned char *)lp->cur_state;
27 |     unsigned char *snapshot = lp->pe->delta_buffer[0];
28 |     void *scratch = lp->pe->delta_buffer[2];
29 | 
30 |     for (i = 0; i < state_sz; i++) {
31 |         snapshot[i] = current_state[i] - snapshot[i];
32 |     }
33 | 
34 |     start = tw_clock_read();
35 |     ret_size = LZ4_compress_fast_extState(scratch, (char*)snapshot, (char*)lp->pe->delta_buffer[1], state_sz, g_tw_delta_sz, g_tw_lz4_knob);
36 |     g_tw_pe->stats.s_lz4 += (tw_clock_read() - start);
37 |     if (ret_size < 0) {
38 |         tw_error(TW_LOC, "LZ4_compress error");
39 |     }
40 | 
41 |     start = tw_clock_read();
42 |     lp->pe->cur_event->delta_buddy = buddy_alloc(ret_size);
43 |     g_tw_pe->stats.s_buddy += (tw_clock_read() - start);
44 |     assert(lp->pe->cur_event->delta_buddy);
45 |     lp->pe->cur_event->delta_size = ret_size;
46 |     memcpy(lp->pe->cur_event->delta_buddy, lp->pe->delta_buffer[1], ret_size);
47 | 
48 |     return ret_size;
49 | }
50 | 
51 | /**
52 |  * Restore the state of lp to the (decompressed) data held in buffer
53 |  */
54 | void
55 | tw_snapshot_restore(tw_lp *lp, size_t state_sz)
56 | {
57 |     unsigned int i;
58 |     tw_clock start = tw_clock_read();
59 |     unsigned char *snapshot = (unsigned char *)lp->pe->cur_event->delta_buddy;
60 |     unsigned char *current_state = (unsigned char *)lp->cur_state;
61 | 
62 |     int ret = LZ4_decompress_fast((char *)snapshot, (char*)lp->pe->delta_buffer[0], state_sz);
63 |     g_tw_pe->stats.s_lz4 += (tw_clock_read() - start);
64 |     if (ret < 0) {
65 |         tw_error(TW_LOC, "LZ4_decompress_fast error");
66 |     }
67 | 
68 |     snapshot = lp->pe->delta_buffer[0];
69 |     for (i = 0; i < state_sz; i++) {
70 |         current_state[i] = current_state[i] - snapshot[i];
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/core/tw-stats.c:
--------------------------------------------------------------------------------
  1 | #include <ross.h>
  2 | 
  3 | #ifndef ROSS_DO_NOT_PRINT
  4 | static void
  5 | show_lld(const char *name, tw_stat v)
  6 | {
  7 | 	printf("\t%-50s %11lld\n", name, v);
  8 | 	fprintf(g_tw_csv, ",%lld", v);
  9 | }
 10 | 
 11 | static void
 12 | show_2f(const char *name, double v)
 13 | {
 14 | 	printf("\t%-50s %11.2f %%\n", name, v);
 15 | 	fprintf(g_tw_csv, ",%.2f", v);
 16 | }
 17 | 
 18 | static void
 19 | show_1f(const char *name, double v)
 20 | {
 21 | 	printf("\t%-50s %11.1f\n", name, v);
 22 | 	fprintf(g_tw_csv, ",%.2f", v);
 23 | }
 24 | 
 25 | static void
 26 | show_4f(const char *name, double v)
 27 | {
 28 | 	printf("\t%-50s %11.4lf\n", name, v);
 29 | 	fprintf(g_tw_csv, ",%.4lf", v);
 30 | }
 31 | 
 32 | #endif
 33 | 
 34 | void
 35 | tw_get_stats(tw_pe * pe, tw_statistics *s)
 36 | {
 37 | 	tw_kp *kp;
 38 | 
 39 | 	unsigned int i;
 40 | 
 41 | 	if (pe != g_tw_pe)
 42 | 		return;
 43 | 
 44 | 	if (0 == g_tw_sim_started)
 45 | 		return;
 46 | 
 47 |     tw_wtime rt;
 48 | 
 49 |     tw_wall_sub(&rt, &pe->end_time, &pe->start_time);
 50 | 
 51 |     s->s_max_run_time = ROSS_MAX(s->s_max_run_time, tw_wall_to_double(&rt));
 52 |     s->s_nevent_abort += pe->stats.s_nevent_abort;
 53 |     s->s_pq_qsize += tw_pq_get_size(pe->pq);
 54 | 
 55 |     s->s_nsend_net_remote += pe->stats.s_nsend_net_remote;
 56 |     s->s_nsend_loc_remote += pe->stats.s_nsend_loc_remote;
 57 | 
 58 |     s->s_nsend_network += pe->stats.s_nsend_network;
 59 |     s->s_nread_network += pe->stats.s_nread_network;
 60 |     s->s_nsend_remote_rb += pe->stats.s_nsend_remote_rb;
 61 | 
 62 |     s->s_total += pe->stats.s_total;
 63 |     s->s_init += pe->stats.s_init;
 64 |     s->s_net_read += pe->stats.s_net_read;
 65 |     s->s_net_other += pe->stats.s_net_other;
 66 |     s->s_gvt += pe->stats.s_gvt;
 67 |     s->s_fossil_collect += pe->stats.s_fossil_collect;
 68 |     s->s_event_abort += pe->stats.s_event_abort;
 69 |     s->s_event_process += pe->stats.s_event_process;
 70 |     s->s_pq += pe->stats.s_pq;
 71 |     s->s_rollback += pe->stats.s_rollback;
 72 |     s->s_cancel_q += pe->stats.s_cancel_q;
 73 |     s->s_pe_event_ties += pe->stats.s_pe_event_ties;
 74 |     s->s_min_detected_offset = g_tw_min_detected_offset;
 75 |     s->s_avl += pe->stats.s_avl;
 76 |     s->s_buddy += pe->stats.s_buddy;
 77 |     s->s_lz4 += pe->stats.s_lz4;
 78 |     s->s_stat_comp += pe->stats.s_stat_comp;
 79 |     s->s_stat_write += pe->stats.s_stat_write;
 80 |     s->s_events_past_end += pe->stats.s_events_past_end;
 81 | #ifdef USE_RIO
 82 |     s->s_rio_load += pe->stats.s_rio_load;
 83 |     s->s_rio_lp_init += pe->stats.s_rio_lp_init;
 84 | #endif
 85 |     s->s_alp_nevent_processed = pe->stats.s_alp_nevent_processed;
 86 |     s->s_alp_e_rbs = pe->stats.s_alp_e_rbs;
 87 | 
 88 |     for(i = 0; i < g_tw_nkp; i++)
 89 |     {
 90 |         kp = tw_getkp(i);
 91 |         s->s_nevent_processed += kp->s_nevent_processed;
 92 |         s->s_e_rbs += kp->s_e_rbs;
 93 |         s->s_rb_total += kp->s_rb_total;
 94 |         s->s_rb_secondary += kp->s_rb_secondary;
 95 |     }
 96 | 
 97 | 
 98 | 	s->s_fc_attempts = g_tw_fossil_attempts;
 99 | 	s->s_net_events = s->s_nevent_processed - s->s_e_rbs;
100 | 	s->s_rb_primary = s->s_rb_total - s->s_rb_secondary;
101 | }
102 | 
103 | void st_print_analysis_LP_stats(tw_statistics *s)
104 | {
105 |     tw_stat model_nevent = s->s_nevent_processed - s->s_alp_nevent_processed;
106 |     tw_stat model_e_rbs = s->s_e_rbs - s->s_alp_e_rbs;
107 |     tw_stat model_net = model_nevent - model_e_rbs;
108 |     tw_stat analysis_net = s->s_alp_nevent_processed - s->s_alp_e_rbs;
109 | 
110 |     printf("\nSeparate Statistics for Model and Analysis LPs\n");
111 |     printf("Model LPs:\n");
112 | 	show_lld("Total Events Processed", model_nevent);
113 | 	show_lld("Events Rolled Back", model_e_rbs);
114 | 	show_lld("Net Events Processed", model_net);
115 | 	show_2f("Efficiency", 100.0 * (1.0 - ((double) model_e_rbs / (double) model_net)));
116 |     
117 |     printf("\nAnalysis LPs:\n");
118 | 	show_lld("Total Events Processed", s->s_alp_nevent_processed);
119 | 	show_lld("Events Rolled Back", s->s_alp_e_rbs);
120 | 	show_lld("Net Events Processed", analysis_net);
121 | 	show_2f("Efficiency", 100.0 * (1.0 - ((double) s->s_alp_e_rbs / (double) analysis_net)));
122 | }
123 | 
124 | void
125 | tw_stats(tw_pe *me)
126 | {
127 |     tw_statistics s;
128 | 	bzero(&s, sizeof(s));
129 | 	size_t m_alloc, m_waste;
130 | 	tw_calloc_stats(&m_alloc, &m_waste);
131 |     tw_lp *lp = NULL;
132 |     unsigned int i;
133 |     for(i = 0; i < g_tw_nlp + g_st_analysis_nlp; i++)
134 |     {
135 |         lp = tw_getlp(i);
136 |         if (lp->type->final)
137 |             (*lp->type->final) (lp->cur_state, lp);
138 |     }
139 |     tw_get_stats(me, &s);
140 | 	s = *(tw_net_statistics(me, &s));
141 | 
142 | 	if (!tw_ismaster())
143 | 		return;
144 | 
145 | #ifndef ROSS_DO_NOT_PRINT
146 | 	printf("\n\t: Running Time = %.4f seconds\n", s.s_max_run_time);
147 | 	fprintf(g_tw_csv, "%.4f", s.s_max_run_time);
148 | 
149 | 	printf("\nTW Library Statistics:\n");
150 | 	show_lld("Total Events Processed", s.s_nevent_processed);
151 | 	show_lld("Events Aborted (part of RBs)", s.s_nevent_abort);
152 | 	show_lld("Events Rolled Back", s.s_e_rbs);
153 | 	show_lld("Event Ties Detected in PE Queues", s.s_pe_event_ties);
154 |         if(g_tw_synchronization_protocol == CONSERVATIVE)
155 |             printf("\t%-50s %11.9lf\n",
156 |                "Minimum TS Offset Detected in Conservative Mode",
157 |                (double) s.s_min_detected_offset);
158 | 	show_2f("Efficiency", 100.0 * (1.0 - ((double) s.s_e_rbs / (double) s.s_net_events)));
159 | 	show_lld("Total Remote (shared mem) Events Processed", s.s_nsend_loc_remote);
160 | 
161 | 	show_2f(
162 | 		"Percent Remote Events",
163 | 		( (double)s.s_nsend_loc_remote
164 | 		/ (double)s.s_net_events)
165 | 		* 100.0
166 | 	);
167 | 
168 | 	show_lld("Total Remote (network) Events Processed", s.s_nsend_net_remote);
169 | 	show_2f(
170 | 		"Percent Remote Events",
171 | 		( (double)s.s_nsend_net_remote
172 | 		/ (double)s.s_net_events)
173 | 		* 100.0
174 | 	);
175 | 
176 | 	printf("\n");
177 | 	show_lld("Total Roll Backs ", s.s_rb_total);
178 | 	show_lld("Primary Roll Backs ", s.s_rb_primary);
179 | 	show_lld("Secondary Roll Backs ", s.s_rb_secondary);
180 | 	show_lld("Fossil Collect Attempts", s.s_fc_attempts);
181 | 	show_lld("Total GVT Computations", g_tw_gvt_done);
182 | 
183 | 	printf("\n");
184 | 	show_lld("Net Events Processed", s.s_net_events);
185 | 	show_1f(
186 | 		"Event Rate (events/sec)",
187 | 		((double)s.s_net_events / s.s_max_run_time)
188 | 	);
189 | 
190 |         show_lld("Total Events Scheduled Past End Time", s.s_events_past_end);
191 | 
192 | 	printf("\nTW Memory Statistics:\n");
193 | 	show_lld("Events Allocated", 1 + g_tw_events_per_pe + g_tw_events_per_pe_extra);
194 | 	show_lld("Memory Allocated", m_alloc / 1024);
195 | 	show_lld("Memory Wasted", m_waste / 1024);
196 | 
197 | 	if (tw_nnodes() > 1) {
198 | 		printf("\n");
199 | 		printf("TW Network Statistics:\n");
200 | 		show_lld("Remote sends", s.s_nsend_network);
201 | 		show_lld("Remote recvs", s.s_nread_network);
202 | 	}
203 | 
204 | 	printf("\nTW Data Structure sizes in bytes (sizeof):\n");
205 | 	show_lld("PE struct", sizeof(tw_pe));
206 | 	show_lld("KP struct", sizeof(tw_kp));
207 | 	show_lld("LP struct", sizeof(tw_lp));
208 | 	show_lld("LP Model struct", lp->type->state_sz);
209 | 	show_lld("LP RNGs", sizeof(*lp->rng));
210 | 	show_lld("Total LP", sizeof(tw_lp) + lp->type->state_sz + sizeof(*lp->rng));
211 | 	show_lld("Event struct", sizeof(tw_event));
212 | 	show_lld("Event struct with Model", sizeof(tw_event) + g_tw_msg_sz);
213 | 
214 | #ifdef ROSS_timing
215 | 	printf("\nTW Clock Cycle Statistics (MAX values in secs at %1.4lf GHz):\n", g_tw_clock_rate / 1000000000.0);
216 | 	show_4f("Initialization", (double) s.s_init / g_tw_clock_rate);
217 | 	show_4f("Priority Queue (enq/deq)", (double) s.s_pq / g_tw_clock_rate);
218 |     show_4f("AVL Tree (insert/delete)", (double) s.s_avl / g_tw_clock_rate);
219 |     show_4f("LZ4 (de)compression", (double) s.s_lz4 / g_tw_clock_rate);
220 |     show_4f("Buddy system", (double) s.s_buddy / g_tw_clock_rate);
221 | #ifdef USE_RIO
222 |     show_4f("RIO Loading", (double) s.s_rio_load / g_tw_clock_rate);
223 |     show_4f("RIO LP Init", (double) s.s_rio_lp_init / g_tw_clock_rate);
224 | #endif
225 | 	show_4f("Event Processing", (double) s.s_event_process / g_tw_clock_rate);
226 | 	show_4f("Event Cancel", (double) s.s_cancel_q / g_tw_clock_rate);
227 | 	show_4f("Event Abort", (double) s.s_event_abort / g_tw_clock_rate);
228 | 	printf("\n");
229 | 	show_4f("GVT", (double) s.s_gvt / g_tw_clock_rate);
230 | 	show_4f("Fossil Collect", (double) s.s_fossil_collect / g_tw_clock_rate);
231 | 	show_4f("Primary Rollbacks", (double) s.s_rollback / g_tw_clock_rate);
232 | 	show_4f("Network Read", (double) s.s_net_read / g_tw_clock_rate);
233 | 	show_4f("Other Network", (double) s.s_net_other / g_tw_clock_rate);
234 | 	show_4f("Instrumentation (computation)", (double) s.s_stat_comp / g_tw_clock_rate);
235 | 	show_4f("Instrumentation (write)", (double) s.s_stat_write / g_tw_clock_rate);
236 | 	show_4f("Total Time (Note: Using Running Time above for Speedup)", (double) s.s_total / g_tw_clock_rate);
237 | #endif
238 | 
239 | 	tw_gvt_stats(stdout);
240 |     
241 |     if (g_st_use_analysis_lps)
242 |         st_print_analysis_LP_stats(&s);
243 | #endif
244 | }
245 | 


--------------------------------------------------------------------------------
/core/tw-timing.c:
--------------------------------------------------------------------------------
 1 | #include <ross.h>
 2 | 
 3 | void
 4 | tw_wall_now(tw_wtime * t)
 5 | {
 6 | 	if(0 != gettimeofday((struct timeval *)t, NULL))
 7 | 		tw_error(TW_LOC, "Unable to get time of day!");
 8 | }
 9 | 
10 | void
11 | tw_wall_sub(tw_wtime * r, tw_wtime * a, tw_wtime * b)
12 | {
13 | 	r->tv_sec = a->tv_sec - b->tv_sec;
14 | 	r->tv_usec = a->tv_usec - b->tv_usec;
15 | 
16 | 	if (r->tv_usec < 0)
17 | 	{
18 | 		r->tv_sec--;
19 | 		r->tv_usec += 1000000;
20 | 	}
21 | }
22 | 
23 | double
24 | tw_wall_to_double(tw_wtime * t)
25 | {
26 | 	return (double)t->tv_sec + (((double)t->tv_usec) / 1000000);
27 | }
28 | 


--------------------------------------------------------------------------------
/core/tw-timing.h:
--------------------------------------------------------------------------------
1 | #ifndef INC_tw_timing_h
2 | #define INC_tw_timing_h
3 | 
4 | typedef struct timeval tw_wtime;
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/core/tw-util.c:
--------------------------------------------------------------------------------
  1 | #include <ross.h>
  2 | 
  3 | /**
  4 |  * Rollback-aware printf, i.e. if the event gets rolled back, undo the printf.
  5 |  * We can'd do that of course so we store the message in a buffer until GVT.
  6 |  */
  7 | int
  8 | tw_output(tw_lp *lp, const char *fmt, ...)
  9 | {
 10 |     int ret = 0;
 11 |     va_list ap;
 12 |     tw_event *cev;
 13 |     tw_out *temp;
 14 | 
 15 |     if (g_tw_synchronization_protocol == SEQUENTIAL || g_tw_synchronization_protocol == CONSERVATIVE) {
 16 |         va_start(ap, fmt);
 17 |         vfprintf(stdout, fmt, ap);
 18 |         va_end(ap);
 19 |         return 0;
 20 |     }
 21 | 
 22 |     tw_out *out = tw_kp_grab_output_buffer(lp->kp);
 23 |     if (!out) {
 24 |         tw_printf(TW_LOC, "kp (%d) has no available output buffers\n", lp->kp->id);
 25 |         tw_printf(TW_LOC, "This event may be rolled back!");
 26 |         va_start(ap, fmt);
 27 |         vfprintf(stdout, fmt, ap);
 28 |         va_end(ap);
 29 |         return 0;
 30 |     }
 31 | 
 32 |     cev = lp->pe->cur_event;
 33 | 
 34 |     if (cev->out_msgs == 0) {
 35 |         cev->out_msgs = out;
 36 |     }
 37 |     else {
 38 |         // Attach it to the end
 39 |         temp = cev->out_msgs;
 40 | 
 41 |         while (temp->next != 0) {
 42 |             temp = temp->next;
 43 |         }
 44 |         temp->next = out;
 45 |     }
 46 | 
 47 |     va_start(ap, fmt);
 48 |     ret = vsnprintf(out->message, sizeof(out->message), fmt, ap);
 49 |     va_end(ap);
 50 |     if (ret >= 0 && (unsigned)ret < sizeof(out->message)) {
 51 |         // Should be successful
 52 |     }
 53 |     else {
 54 |         tw_printf(TW_LOC, "Message may be too large?");
 55 |     }
 56 | 
 57 |     return ret;
 58 | }
 59 | 
 60 | void
 61 | tw_printf(const char *file, int line, const char *fmt, ...)
 62 | {
 63 | 	va_list	ap;
 64 | 
 65 | 	va_start(ap, fmt);
 66 | 	fprintf(stdout, "%s:%i: ", file, line);
 67 | 	vfprintf(stdout, fmt, ap);
 68 | 	fprintf(stdout, "\n");
 69 | 	fflush(stdout);
 70 | 	va_end(ap);
 71 | }
 72 | 
 73 | void
 74 | tw_error(const char *file, int line, const char *fmt, ...)
 75 | {
 76 | 	va_list	ap;
 77 | 
 78 | 	va_start(ap, fmt);
 79 | 	fprintf(stdout, "node: %ld: error: %s:%i: ", g_tw_mynode, file, line);
 80 | 	vfprintf(stdout, fmt, ap);
 81 | 	fprintf(stdout, "\n");
 82 | 	fflush(stdout);
 83 | 	fflush(stdout);
 84 | 	va_end(ap);
 85 | 
 86 | 	tw_net_abort();
 87 | }
 88 | 
 89 | void
 90 | tw_warning(const char *file, int line, const char *fmt, ...)
 91 | {
 92 | 	va_list	ap;
 93 | 
 94 | 	va_start(ap, fmt);
 95 | 	fprintf(stdout, "node: %ld: warning: %s:%i: ", g_tw_mynode, file, line);
 96 | 	vfprintf(stdout, fmt, ap);
 97 | 	fprintf(stdout, "\n");
 98 | 	fflush(stdout);
 99 | 	fflush(stdout);
100 | 	va_end(ap);
101 | }
102 | 
103 | struct mem_pool
104 | {
105 | 	struct mem_pool *next_pool;
106 | 	char *next_free;
107 | 	char *end_free;
108 | }__attribute__((aligned(8)));
109 | 
110 | static struct mem_pool *main_pool;
111 | 
112 | //static const size_t pool_size = 512 * 1024 - sizeof(struct mem_pool);
113 | static const size_t pool_size = (512 * 1024) - 32;
114 | static const size_t pool_align = ROSS_MAX(sizeof(double),sizeof(void*));
115 | static size_t total_allocated;
116 | static unsigned malloc_calls;
117 | static void* my_malloc(size_t len);
118 | 
119 | void
120 | tw_calloc_stats(
121 | 	size_t *bytes_alloc,
122 | 	size_t *bytes_wasted)
123 | {
124 | 	struct mem_pool *p;
125 | 
126 | 	*bytes_alloc = total_allocated;
127 | 	*bytes_wasted = malloc_calls * (sizeof(void*) + sizeof(size_t));
128 | 
129 | 	for (p = main_pool; p; p = p->next_pool)
130 | 		*bytes_wasted += p->end_free - p->next_free;
131 | }
132 | 
133 | /* debug version - don't use pool allocator so tools like valgrind can
134 |  * detect memory bugs */
135 | #ifdef ROSS_ALLOC_DEBUG
136 | 
137 | void*
138 | tw_calloc(
139 | 	const char *file,
140 | 	int line,
141 | 	const char *for_who,
142 | 	size_t e_sz,
143 | 	size_t n)
144 | {
145 |     void *r = calloc(e_sz, n);
146 |     if (!r){
147 | 		tw_error(
148 | 			file, line,
149 | 			"Cannot allocate %lu bytes for %u %s",
150 | 			(unsigned long)e_sz,
151 | 			n,
152 | 			for_who);
153 |     }
154 |     return r;
155 | }
156 | 
157 | #else
158 | 
159 | static void*
160 | pool_alloc(size_t len)
161 | {
162 | 	struct mem_pool *p;
163 | 	void *r;
164 | 
165 | 	for (p = main_pool; p; p = p->next_pool)
166 | 		if ((unsigned)(p->end_free - p->next_free) >= len)
167 | 			break;
168 | 
169 | 	if (!p) {
170 | 		if (len >= pool_size) {
171 | 			r = my_malloc(len);
172 | 			goto ret;
173 | 		}
174 | 
175 | 		p = (struct mem_pool *) my_malloc(pool_size + 32);
176 | 		if (!p) {
177 | 			r = NULL;
178 | 			goto ret;
179 | 		}
180 | 
181 | 		p->next_pool = main_pool;
182 | 		//p->next_free = (char*)(p + 1);
183 |                 p->next_free = (char *)((size_t)32 + (size_t)p);
184 | 		if( 7 & (size_t)(p->next_free) )
185 | 		    printf("pool_alloc: WARNING found pool start address (%p) NOT 8 byte aligned\n", p->next_free);
186 | 		p->end_free = p->next_free + pool_size;
187 | 		main_pool = p;
188 | 	}
189 | 
190 | 	r = p->next_free;
191 | 	p->next_free += len;
192 | 
193 | 	if( 7 & (size_t)r || 7 & (size_t)(p->next_free) )
194 | 	    printf("pool_alloc: WARNING found return ptr (%p) or next_free (%p) NOT 8 bytes aligned\n", r, p->next_free );
195 | 
196 | ret:
197 | 	if (r)
198 | 		total_allocated += len;
199 | 	return r;
200 | }
201 | 
202 | void*
203 | tw_calloc(
204 | 	const char *file,
205 | 	int line,
206 | 	const char *for_who,
207 | 	size_t e_sz,
208 | 	size_t n)
209 | {
210 | 	void *r;
211 | 
212 | 	if(e_sz & (pool_align - 1))
213 | 	{
214 | 	    e_sz += pool_align - (e_sz & (pool_align - 1));
215 | 	    // printf("%s:%d:%s: realigned size to %d \n", file, line, for_who, e_sz );
216 | 	}
217 | 
218 | 	e_sz *= n;
219 | 	if (!e_sz)
220 | 		return NULL;
221 | 
222 | 	r = pool_alloc(e_sz);
223 | 	if (!r)
224 | 		tw_error(
225 | 			file, line,
226 | 			"Cannot allocate %lu bytes for %u %s"
227 | 			" (need total of %lu KiB)",
228 | 			(unsigned long)e_sz,
229 | 			n,
230 | 			for_who,
231 | 			(unsigned long)((total_allocated + e_sz) / 1024));
232 | 	memset(r, 0, e_sz);
233 | 	return r;
234 | }
235 | 
236 | #endif
237 | 
238 | #undef malloc
239 | static void*
240 | my_malloc(size_t len)
241 | {
242 | 	malloc_calls++;
243 | 	return malloc(len);
244 | }
245 | 
246 | #undef realloc
247 | 


--------------------------------------------------------------------------------
/docs/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | IF(ROSS_BUILD_DOXYGEN)
 2 | 
 3 | IF(DOXYGEN_DOT_FOUND)
 4 |   SET(HAVE_DOT YES)
 5 | 
 6 |   OPTION(DOXYGEN_CALL_GRAPHS "Build Doxygen Call Graphs" OFF)
 7 |   IF(DOXYGEN_CALL_GRAPHS)
 8 |     SET(CALL_GRAPHS_ON YES)
 9 |   ELSE(DOXYGEN_CALL_GRAPHS)
10 |     SET(CALL_GRAPHS_ON NO)
11 |   ENDIF(DOXYGEN_CALL_GRAPHS)
12 | 
13 |   OPTION(DOXYGEN_CALLER_GRAPHS "Build Doxygen Caller Graphs" OFF)
14 |   IF(DOXYGEN_CALLER_GRAPHS)
15 |     SET(CALLER_GRAPHS_ON YES)
16 |   ELSE(DOXYGEN_CALLER_GRAPHS)
17 |     SET(CALLER_GRAPHS_ON NO)
18 |   ENDIF(DOXYGEN_CALLER_GRAPHS)
19 | 
20 |   OPTION(DOXYGEN_DEFAULT_GRAPHS "Build Default Doxygen Graphs" OFF)
21 |   IF(DOXYGEN_DEFAULT_GRAPHS)
22 |     SET(DEFAULT_GRAPHS_ON YES)
23 |   ELSE(DOXYGEN_DEFAULT_GRAPHS)
24 |     SET(DEFAULT_GRAPHS_ON NO)
25 |   ENDIF(DOXYGEN_DEFAULT_GRAPHS)
26 | 
27 | ELSE(DOXYGEN_DOT_FOUND)
28 |   SET(HAVE_DOT NO)
29 |   SET(CALL_GRAPHS NO)
30 |   SET(CALLER_GRAPHS NO)
31 |   SET(DEFAULT_GRAPHS NO)
32 | ENDIF(DOXYGEN_DOT_FOUND)
33 | 
34 | SET(DOXYGEN_OUTPUT ${CMAKE_BINARY_DIR}/docs/html/index.html)
35 | 
36 | CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.user.in ${CMAKE_BINARY_DIR}/Doxyfile.user)
37 | 
38 | ADD_CUSTOM_COMMAND(
39 |   OUTPUT ${DOXYGEN_OUTPUT}
40 |   COMMAND ${CMAKE_COMMAND} -E echo_append "Building API Documentation..."
41 |   COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile.user
42 |   COMMAND ${CMAKE_COMMAND} -E echo "Done.")
43 | 
44 | ADD_CUSTOM_TARGET(apidoc ALL DEPENDS ${DOXYGEN_OUTPUT})
45 | 
46 | ENDIF(ROSS_BUILD_DOXYGEN)
47 | 


--------------------------------------------------------------------------------
/docs/header.html:
--------------------------------------------------------------------------------
 1 | <!-- HTML header for doxygen 1.8.9.1-->
 2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | <html xmlns="http://www.w3.org/1999/xhtml">
 4 | <head>
 5 | <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 6 | <meta http-equiv="X-UA-Compatible" content="IE=9"/>
 7 | <meta name="generator" content="Doxygen $doxygenversion"/>
 8 | <!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME-->
 9 | <!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME-->
10 | <link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/>
11 | <script type="text/javascript" src="$relpath^jquery.js"></script>
12 | <script type="text/javascript" src="$relpath^dynsections.js"></script>
13 | $treeview
14 | $search
15 | $mathjax
16 | <link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" />
17 | $extrastylesheet
18 | </head>
19 | <body>
20 | <div id="top"><!-- do not remove this div, it is closed by doxygen! -->
21 | 
22 | <!--BEGIN TITLEAREA-->
23 | <div id="titlearea">
24 | <table cellspacing="0" cellpadding="0">
25 |  <tbody>
26 |  <tr style="height: 56px;">
27 |   <!--BEGIN PROJECT_LOGO-->
28 |   <td id="projectlogo"><img alt="Logo" src="$relpath^$projectlogo"/></td>
29 |   <!--END PROJECT_LOGO-->
30 |   <!--BEGIN PROJECT_NAME-->
31 |   <td style="padding-left: 0.5em;">
32 |    <a href="https://github.com/ROSS-org/ROSS">
33 |    <div id="projectname">$projectname
34 |    <!--BEGIN PROJECT_NUMBER-->&#160;<span id="projectnumber">$projectnumber</span><!--END PROJECT_NUMBER-->
35 |    </div>
36 |    </a>
37 |    <!--BEGIN PROJECT_BRIEF--><div id="projectbrief">$projectbrief</div><!--END PROJECT_BRIEF-->
38 |   </td>
39 |   <!--END PROJECT_NAME-->
40 |   <!--BEGIN !PROJECT_NAME-->
41 |    <!--BEGIN PROJECT_BRIEF-->
42 |     <td style="padding-left: 0.5em;">
43 |     <div id="projectbrief">$projectbrief</div>
44 |     </td>
45 |    <!--END PROJECT_BRIEF-->
46 |   <!--END !PROJECT_NAME-->
47 |   <!--BEGIN DISABLE_INDEX-->
48 |    <!--BEGIN SEARCHENGINE-->
49 |    <td>$searchbox</td>
50 |    <!--END SEARCHENGINE-->
51 |   <!--END DISABLE_INDEX-->
52 |  </tr>
53 |  </tbody>
54 | </table>
55 | </div>
56 | <!--END TITLEAREA-->
57 | <!-- end header part -->
58 | 


--------------------------------------------------------------------------------
/models/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ## BASIC SCHEDULERS
 2 | FUNCTION(ROSS_TEST_SCHEDULERS target_name)
 3 | 	ADD_TEST(${target_name}_SCHED_Sequential ${target_name} --synch=1)
 4 | 	ADD_TEST(${target_name}_SCHED_Conservative mpirun -np 2 ./${target_name} --synch=2)
 5 | 	ADD_TEST(${target_name}_SCHED_Optimistic mpirun -np 2 ./${target_name} --synch=3 --extramem=100000)
 6 | 	ADD_TEST(${target_name}_SCHED_Realtime mpirun -np 2 ./${target_name} --synch=5 --gvt-interval=1 --clock-rate=$ENV{CLOCK_SPEED})
 7 | 	ADD_TEST(${target_name}_SCHED_OptDebug ${target_name} --synch=4 --nkp=1 --extramem=1000000)
 8 | 
 9 | 	SET(sched_tests
10 | 		${target_name}_SCHED_Sequential
11 | 		${target_name}_SCHED_Conservative
12 | 		${target_name}_SCHED_Optimistic
13 | 		${target_name}_SCHED_Realtime
14 | 		${target_name}_SCHED_OptDebug)
15 | 	SET_TESTS_PROPERTIES(${sched_tests} PROPERTIES TIMEOUT 60)
16 | ENDFUNCTION(ROSS_TEST_SCHEDULERS)
17 | 
18 | ## INSTRUMENTATION TESTS
19 | FUNCTION(ROSS_TEST_INSTRUMENTATION target_name)
20 | 	ADD_TEST(${target_name}_INST_OptPE mpirun -np 2 ./${target_name} --synch=3 --engine-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
21 | 	ADD_TEST(${target_name}_INST_OptLP mpirun -np 2 ./${target_name} --synch=3 --engine-stats=4 --event-trace=2 --kp-data=1 --lp-data=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
22 | 
23 | 	ADD_TEST(${target_name}_INST_RTOptPE mpirun -np 2 ./${target_name} --synch=5 --engine-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
24 | 	ADD_TEST(${target_name}_INST_RTOptLP mpirun -np 2 ./${target_name} --synch=5 --engine-stats=4 --event-trace=2 --kp-data=1 --lp-data=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
25 | 
26 | 	ADD_TEST(${target_name}_INST_ConsPE mpirun -np 2 ./${target_name} --synch=2 --engine-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
27 | 	ADD_TEST(${target_name}_INST_ConsLP mpirun -np 2 ./${target_name} --synch=2 --engine-stats=4 --event-trace=2 --kp-data=1 --lp-data=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
28 | 
29 |     ADD_TEST(${target_name}_INST_Seq ./${target_name} --synch=1 --event-trace=1 --extramem=100000)
30 | 
31 |     ADD_TEST(${target_name}_INST_Model mpirun -np 2 ./${target_name} --synch=3 --model-stats=4 --event-trace=2 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
32 | 
33 | 	ADD_TEST(${target_name}_INST_DisableOutput mpirun -np 2 ./${target_name} --synch=3 --engine-stats=4 --disable-output=1 --extramem=100000 --vt-interval=10000 --vt-samp-end=100000)
34 | 	ADD_TEST(${target_name}_INST_ForceOverflow mpirun -np 2 ./${target_name} --synch=3 --event-trace=1 --buffer-free=0 --extramem=100000)
35 | 	ADD_TEST(${target_name}_INST_FullEventTrace mpirun -np 2 ./${target_name} --synch=3 --event-trace=1 --extramem=100000)
36 | 	ADD_TEST(${target_name}_INST_RBEventTrace mpirun -np 2 ./${target_name} --synch=3 --event-trace=2 --extramem=100000)
37 | 
38 | 	SET(inst_tests
39 | 		${target_name}_INST_OptPE
40 | 		${target_name}_INST_OptLP
41 | 		${target_name}_INST_RTOptPE
42 | 		${target_name}_INST_RTOptLP
43 | 		${target_name}_INST_ConsPE
44 | 		${target_name}_INST_ConsLP
45 | 		${target_name}_INST_Seq
46 | 		${target_name}_INST_Model
47 | 		${target_name}_INST_DisableOutput
48 | 		${target_name}_INST_ForceOverflow
49 | 		${target_name}_INST_RBEventTrace)
50 | 	SET_TESTS_PROPERTIES(${inst_tests} PROPERTIES TIMEOUT 60)
51 | ENDFUNCTION(ROSS_TEST_INSTRUMENTATION)
52 | 
53 | 
54 | # Automatically search and add model subdirectories
55 | CMAKE_POLICY(SET CMP0009 NEW) # follow sym-links
56 | FILE(GLOB_RECURSE my_list . FOLLOW_SYMLINKS */CMakeLists.txt)
57 | FOREACH(cmf ${my_list})
58 |     GET_FILENAME_COMPONENT(dir_path ${cmf} PATH)
59 |     ADD_SUBDIRECTORY(${dir_path})
60 | #    MESSAGE(${dir_path})
61 | ENDFOREACH(cmf)
62 | 


--------------------------------------------------------------------------------
/models/README.md:
--------------------------------------------------------------------------------
 1 | # Welcome to Modeling!
 2 | 
 3 | The ROSS repository currently links to two model repositories:
 4 | - [A Template Model](http://github.com/nmcglohon/template-model) that can be used as a starting point for any new model.
 5 | - [A Suite of Stable Models](http://github.com/ROSS-org/ROSS-Models) which contains several completed models.
 6 | 
 7 | ## Building Existing Models
 8 | 
 9 | To get the linked model repositories, run the following commands after cloning the ROSS repository:
10 | ```
11 | git submodule init
12 | git submodule update
13 | ```
14 | Then build ROSS as you regularly would.
15 | Be sure to turn on the option to ROSS_BUILD_MODELS in CMake (more details can be found on the [wiki page](http://github.com/ROSS-org/ROSS/wiki/Installation)).
16 | 
17 | ## Creating Your Own Model
18 | 
19 | As you develop your model, the best practice is to do it in a separate git repostroy.
20 | Sym-link your model into this folder and CMake will automatically find it for building.
21 | ```
22 | cd ~/Projects/ROSS/models
23 | ln -s ~/Projects/my-model ./
24 | ```
25 | For more details on creating a model please check out the [wiki page](http://github.com/ROSS-org/ROSS/wiki/Constructing-the-Model).
26 | 


--------------------------------------------------------------------------------
/models/phold/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${ROSS_BINARY_DIR})
 2 | IF(USE_DAMARIS)
 3 |     INCLUDE_DIRECTORIES(${DAMARIS_INCLUDE}) 
 4 | ENDIF(USE_DAMARIS)
 5 | IF(BGPM)
 6 | 	INCLUDE_DIRECTORIES(${ROSS_SOURCE_DIR} ${BGPM_LIB})
 7 | ELSE(NOT(BGPM))
 8 | 	INCLUDE_DIRECTORIES(${ROSS_SOURCE_DIR})
 9 | ENDIF(BGPM)
10 | 
11 | SET(phold_srcs
12 | phold.c		phold.h)
13 | 
14 | 
15 | ADD_EXECUTABLE(phold ${phold_srcs})
16 | ADD_EXECUTABLE(phold_test ${phold_srcs})
17 | 
18 | IF(BGPM)
19 | 	TARGET_LINK_LIBRARIES(phold ROSS imp_bgpm m)
20 | 	TARGET_LINK_LIBRARIES(phold_test ROSS imp_bgpm m)
21 | ELSE(NOT(BGPM))
22 |     IF(USE_DAMARIS)
23 |         TARGET_LINK_LIBRARIES(phold ROSS ROSS_Damaris m)
24 |         TARGET_LINK_LIBRARIES(phold_test ROSS ROSS_Damaris m)
25 |     ELSE(NOT(USE_DAMARIS))
26 |         TARGET_LINK_LIBRARIES(phold ROSS m)
27 |         TARGET_LINK_LIBRARIES(phold_test ROSS m)
28 |     ENDIF(USE_DAMARIS)
29 | ENDIF(BGPM)
30 | 
31 | ROSS_TEST_SCHEDULERS(phold)
32 | ROSS_TEST_INSTRUMENTATION(phold)
33 | 
34 | SET_TARGET_PROPERTIES(phold_test PROPERTIES COMPILE_DEFINITIONS TEST_COMM_ROSS)
35 | ROSS_TEST_SCHEDULERS(phold_test)
36 | ROSS_TEST_INSTRUMENTATION(phold_test)
37 | 
38 | INSTALL(FILES ${ROSS_BINARY_DIR}/../models/phold/phold DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
39 | 


--------------------------------------------------------------------------------
/models/phold/phold.c:
--------------------------------------------------------------------------------
  1 | #include "phold.h"
  2 | 
  3 | 
  4 | tw_peid
  5 | phold_map(tw_lpid gid)
  6 | {
  7 | 	return (tw_peid) gid / g_tw_nlp;
  8 | }
  9 | 
 10 | void
 11 | phold_init(phold_state * s, tw_lp * lp)
 12 | {
 13 |     (void) s;
 14 | 	int              i;
 15 | 
 16 | 	if( stagger )
 17 | 	  {
 18 | 	    for (i = 0; i < g_phold_start_events; i++)
 19 | 	      {
 20 | 		tw_event_send(
 21 | 			      tw_event_new(lp->gid,
 22 | 					   tw_rand_exponential(lp->rng, mean) + lookahead + (tw_stime)(lp->gid % (unsigned int)g_tw_ts_end),
 23 | 					   lp));
 24 | 	      }
 25 | 	  }
 26 | 	else
 27 | 	  {
 28 | 	    for (i = 0; i < g_phold_start_events; i++)
 29 | 	      {
 30 | 		tw_event_send(
 31 | 			      tw_event_new(lp->gid,
 32 | 					   tw_rand_exponential(lp->rng, mean) + lookahead,
 33 | 					   lp));
 34 | 	      }
 35 | 	  }
 36 | }
 37 | 
 38 | void
 39 | phold_pre_run(phold_state * s, tw_lp * lp)
 40 | {
 41 |     (void) s;
 42 |     tw_lpid	 dest;
 43 | 
 44 | 	if(tw_rand_unif(lp->rng) <= percent_remote)
 45 | 	{
 46 | 		dest = tw_rand_integer(lp->rng, 0, ttl_lps - 1);
 47 | 	} else
 48 | 	{
 49 | 		dest = lp->gid;
 50 | 	}
 51 | 
 52 | 	if(dest >= (g_tw_nlp * tw_nnodes()))
 53 | 		tw_error(TW_LOC, "bad dest");
 54 | 
 55 | 	tw_event_send(tw_event_new(dest, tw_rand_exponential(lp->rng, mean) + lookahead, lp));
 56 | }
 57 | 
 58 | void
 59 | phold_event_handler(phold_state * s, tw_bf * bf, phold_message * m, tw_lp * lp)
 60 | {
 61 |     (void) s;
 62 |     (void) m;
 63 | 	tw_lpid	 dest;
 64 | 
 65 | 	if(tw_rand_unif(lp->rng) <= percent_remote)
 66 | 	{
 67 | 		bf->c1 = 1;
 68 | 		dest = tw_rand_integer(lp->rng, 0, ttl_lps - 1);
 69 | 		// Makes PHOLD non-deterministic across processors! Don't uncomment
 70 | 		/* dest += offset_lpid; */
 71 | 		/* if(dest >= ttl_lps) */
 72 | 		/* 	dest -= ttl_lps; */
 73 | 	} else
 74 | 	{
 75 | 		bf->c1 = 0;
 76 | 		dest = lp->gid;
 77 | 	}
 78 | 
 79 | 	if(dest >= (g_tw_nlp * tw_nnodes()))
 80 | 		tw_error(TW_LOC, "bad dest");
 81 | 
 82 | 	tw_event_send(tw_event_new(dest, tw_rand_exponential(lp->rng, mean) + lookahead, lp));
 83 | }
 84 | 
 85 | void
 86 | phold_event_handler_rc(phold_state * s, tw_bf * bf, phold_message * m, tw_lp * lp)
 87 | {
 88 |     (void) s;
 89 |     (void) m;
 90 | 	tw_rand_reverse_unif(lp->rng);
 91 | 	tw_rand_reverse_unif(lp->rng);
 92 | 
 93 | 	if(bf->c1 == 1)
 94 | 		tw_rand_reverse_unif(lp->rng);
 95 | }
 96 | 
 97 | void phold_commit(phold_state * s, tw_bf * bf, phold_message * m, tw_lp * lp)
 98 | {
 99 |     (void) s;
100 |     (void) bf;
101 |     (void) m;
102 |     (void) lp;
103 | }
104 | 
105 | void
106 | phold_finish(phold_state * s, tw_lp * lp)
107 | {
108 |     (void) s;
109 |     (void) lp;
110 | }
111 | 
112 | tw_lptype       mylps[] = {
113 | 	{(init_f) phold_init,
114 |      /* (pre_run_f) phold_pre_run, */
115 |      (pre_run_f) NULL,
116 | 	 (event_f) phold_event_handler,
117 | 	 (revent_f) phold_event_handler_rc,
118 | 	 (commit_f) phold_commit,
119 | 	 (final_f) phold_finish,
120 | 	 (map_f) phold_map,
121 | 	sizeof(phold_state)},
122 | 	{0},
123 | };
124 | 
125 | void event_trace(phold_message *m, tw_lp *lp, char *buffer, int *collect_flag)
126 | {
127 |     (void) m;
128 |     (void) lp;
129 |     (void) buffer;
130 |     (void) collect_flag;
131 |     return;
132 | }
133 | 
134 | void phold_stats_collect(phold_state *s, tw_lp *lp, char *buffer)
135 | {
136 |     (void) s;
137 |     (void) lp;
138 |     (void) buffer;
139 |     return;
140 | }
141 | 
142 | st_model_types model_types[] = {
143 |     {(ev_trace_f) event_trace,
144 |      0,
145 |     (model_stat_f) phold_stats_collect,
146 |     sizeof(int),
147 |     NULL, //(sample_event_f)
148 |     NULL, //(sample_revent_f)
149 |     0},
150 |     {0}
151 | };
152 | 
153 | const tw_optdef app_opt[] =
154 | {
155 | 	TWOPT_GROUP("PHOLD Model"),
156 | 	TWOPT_DOUBLE("remote", percent_remote, "desired remote event rate"),
157 | 	TWOPT_UINT("nlp", nlp_per_pe, "number of LPs per processor"),
158 | 	TWOPT_DOUBLE("mean", mean, "exponential distribution mean for timestamps"),
159 | 	TWOPT_DOUBLE("mult", mult, "multiplier for event memory allocation"),
160 | 	TWOPT_DOUBLE("lookahead", lookahead, "lookahead for events"),
161 | 	TWOPT_UINT("start-events", g_phold_start_events, "number of initial messages per LP"),
162 | 	TWOPT_UINT("stagger", stagger, "Set to 1 to stagger event uniformly across 0 to end time."),
163 | 	TWOPT_UINT("memory", optimistic_memory, "additional memory buffers"),
164 | 	TWOPT_CHAR("run", run_id, "user supplied run name"),
165 | 	TWOPT_END()
166 | };
167 | 
168 | int
169 | main(int argc, char **argv)
170 | {
171 | 
172 | #ifdef TEST_COMM_ROSS
173 |     // Init outside of ROSS
174 |     MPI_Init(&argc, &argv);
175 |     // Split COMM_WORLD in half even/odd
176 |     int mpi_rank;
177 |     MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
178 |     MPI_Comm split_comm;
179 |     MPI_Comm_split(MPI_COMM_WORLD, mpi_rank%2, mpi_rank, &split_comm);
180 |     if(mpi_rank%2 == 1){
181 |         // tests should catch any MPI_COMM_WORLD collectives
182 |         MPI_Finalize();
183 |     }
184 |     // Allows ROSS to function as normal
185 |     tw_comm_set(split_comm);
186 | #endif
187 | 
188 | 	unsigned int i;
189 | 
190 | 	// set a min lookahead of 1.0
191 | 	lookahead = 1.0;
192 | 	tw_opt_add(app_opt);
193 | 	tw_init(&argc, &argv);
194 | 
195 | #ifdef USE_DAMARIS
196 |     if(g_st_ross_rank)
197 |     { // only ross ranks should run code between here and tw_run()
198 | #endif
199 | 	if( lookahead > 1.0 )
200 | 	  tw_error(TW_LOC, "Lookahead > 1.0 .. needs to be less\n");
201 | 
202 | 	//reset mean based on lookahead
203 |         mean = mean - lookahead;
204 | 
205 | 	offset_lpid = g_tw_mynode * nlp_per_pe;
206 | 	ttl_lps = tw_nnodes() * nlp_per_pe;
207 | 	g_tw_events_per_pe = (mult * nlp_per_pe * g_phold_start_events) +
208 | 				optimistic_memory;
209 | 	//g_tw_rng_default = TW_FALSE;
210 | 	g_tw_lookahead = lookahead;
211 | 
212 | 	tw_define_lps(nlp_per_pe, sizeof(phold_message));
213 | 
214 | 	for(i = 0; i < g_tw_nlp; i++)
215 |     {
216 | 		tw_lp_settype(i, &mylps[0]);
217 |         st_model_settype(i, &model_types[0]);
218 |     }
219 | 
220 |         if( g_tw_mynode == 0 )
221 | 	  {
222 | 	    printf("========================================\n");
223 | 	    printf("PHOLD Model Configuration..............\n");
224 | 	    printf("   Lookahead..............%lf\n", lookahead);
225 | 	    printf("   Start-events...........%u\n", g_phold_start_events);
226 | 	    printf("   stagger................%u\n", stagger);
227 | 	    printf("   Mean...................%lf\n", mean);
228 | 	    printf("   Mult...................%lf\n", mult);
229 | 	    printf("   Memory.................%u\n", optimistic_memory);
230 | 	    printf("   Remote.................%lf\n", percent_remote);
231 | 	    printf("========================================\n\n");
232 | 	  }
233 | 
234 | 	tw_run();
235 | #ifdef USE_DAMARIS
236 |     } // end if(g_st_ross_rank)
237 | #endif
238 | 	tw_end();
239 | 
240 | 	return 0;
241 | }
242 | 


--------------------------------------------------------------------------------
/models/phold/phold.h:
--------------------------------------------------------------------------------
 1 | #ifndef INC_phold_h
 2 | #define INC_phold_h
 3 | 
 4 | #include <ross.h>
 5 | 
 6 | 	/*
 7 | 	 * PHOLD Types
 8 | 	 */
 9 | 
10 | typedef struct phold_state phold_state;
11 | typedef struct phold_message phold_message;
12 | 
13 | struct phold_state
14 | {
15 | 	long int	 dummy_state;
16 | };
17 | 
18 | struct phold_message
19 | {
20 | 	long int	 dummy_data;
21 | };
22 | 
23 | 	/*
24 | 	 * PHOLD Globals
25 | 	 */
26 | tw_stime lookahead = 1.0;
27 | static unsigned int stagger = 0;
28 | static unsigned int offset_lpid = 0;
29 | static tw_stime mult = 1.4;
30 | static tw_stime percent_remote = 0.25;
31 | static unsigned int ttl_lps = 0;
32 | static unsigned int nlp_per_pe = 8;
33 | static int g_phold_start_events = 1;
34 | static int optimistic_memory = 100;
35 | 
36 | // rate for timestamp exponential distribution
37 | static tw_stime mean = 1.0;
38 | 
39 | static char run_id[1024] = "undefined";
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------