├── .clang-format
├── .clang-tidy
├── .envrc
├── .github
    └── workflows
    │   ├── build-methods.yml
    │   ├── checks.yml
    │   └── runclangformat.yml
├── .gitignore
├── .pep8
├── BUILD.md
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cmake
    ├── GetLocalLLVM.cmake
    └── superbuild
    │   ├── CMakeLists.txt
    │   └── flang.cmake
├── documentation
    └── flang
    │   ├── README.md
    │   └── hello.f
├── example
    ├── helloworld.cpp
    ├── llvm7-CPU2006-cfg
    │   ├── hotfuncs.ini
    │   ├── machine_model.cfg
    │   └── sched.ini
    └── optsched-cfg
    │   ├── hotfuncs.ini
    │   ├── machine_model.cfg
    │   ├── occupancy_limits.ini
    │   └── sched.ini
├── include
    └── opt-sched
    │   └── Scheduler
    │       ├── OptSchedDDGWrapperBase.h
    │       ├── OptSchedTarget.h
    │       ├── aco.h
    │       ├── array_ref2d.h
    │       ├── bb_spill.h
    │       ├── bit_vector.h
    │       ├── buffers.h
    │       ├── config.h
    │       ├── data_dep.h
    │       ├── defines.h
    │       ├── enumerator.h
    │       ├── gen_sched.h
    │       ├── graph.h
    │       ├── graph_trans.h
    │       ├── graph_trans_ilp.h
    │       ├── graph_trans_ilp_occupancy_preserving.h
    │       ├── hash_table.h
    │       ├── hist_table.h
    │       ├── list_sched.h
    │       ├── lnkd_lst.h
    │       ├── logger.h
    │       ├── machine_model.h
    │       ├── mem_mngr.h
    │       ├── random.h
    │       ├── ready_list.h
    │       ├── reg_alloc.h
    │       ├── register.h
    │       ├── relaxed_sched.h
    │       ├── sched_basic_data.h
    │       ├── sched_region.h
    │       ├── stats.h
    │       └── utilities.h
├── lib
    ├── CMakeLists.txt
    ├── Scheduler
    │   ├── aco.cpp
    │   ├── bb_spill.cpp
    │   ├── buffers.cpp
    │   ├── config.cpp
    │   ├── data_dep.cpp
    │   ├── enumerator.cpp
    │   ├── gen_sched.cpp
    │   ├── graph.cpp
    │   ├── graph_trans.cpp
    │   ├── graph_trans_ilp.cpp
    │   ├── graph_trans_ilp_occupancy_preserving.cpp
    │   ├── hist_table.cpp
    │   ├── list_sched.cpp
    │   ├── logger.cpp
    │   ├── machine_model.cpp
    │   ├── random.cpp
    │   ├── ready_list.cpp
    │   ├── reg_alloc.cpp
    │   ├── register.cpp
    │   ├── relaxed_sched.cpp
    │   ├── sched_basic_data.cpp
    │   ├── sched_region.cpp
    │   ├── stats.cpp
    │   └── utilities.cpp
    └── Wrapper
    │   ├── AMDGPU
    │       ├── GCNOptSched.cpp
    │       ├── GCNOptSched.h
    │       ├── GCNOptSchedReg.h
    │       ├── OptSchedDDGWrapperGCN.cpp
    │       ├── OptSchedDDGWrapperGCN.h
    │       └── OptSchedGCNTarget.cpp
    │   ├── OptSchedDDGWrapperBasic.cpp
    │   ├── OptSchedDDGWrapperBasic.h
    │   ├── OptSchedGenericTarget.cpp
    │   ├── OptSchedMachineWrapper.cpp
    │   ├── OptSchedMachineWrapper.h
    │   ├── OptSchedReg.h
    │   ├── OptimizingScheduler.cpp
    │   └── OptimizingScheduler.h
├── patches
    ├── amdgpu
    │   ├── README.md
    │   ├── ROCm-2.4-LLVM-print-occupancy.patch
    │   └── ROCm-2.4-load-optsched.patch
    ├── llvm6.0
    │   ├── README.md
    │   ├── flang-llvm6-print-spilling-info.patch
    │   └── llvm6-print-spilling-info.patch
    └── llvm7.0
    │   ├── README.md
    │   └── flang-llvm7-print-spilling-info.patch
├── test
    ├── CMakeLists.txt
    ├── Unit
    │   ├── lit.cfg.py
    │   └── lit.site.cfg.py.in
    ├── lit.cfg.py
    └── lit.site.cfg.py.in
├── unittests
    ├── Basic
    │   ├── ArrayRef2DTest.cpp
    │   ├── CMakeLists.txt
    │   ├── ConfigTest.cpp
    │   ├── LinkedListTest.cpp
    │   ├── LoggerTest.cpp
    │   ├── UtilitiesTest.cpp
    │   ├── simple_machine_model.h
    │   └── simple_machine_model_test.cpp
    └── CMakeLists.txt
└── util
    ├── ARM
        ├── build-copy-to-A7.sh
        ├── extract-run-spec-cmd.sh
        ├── gen-CPU2006-cross-ARM.py
        └── run-CPU2006-cross-ARM.sh
    ├── CPU2006
        ├── clean-compile-commands.py
        ├── runspec-wrapper-optsched.py
        └── sched-som.py
    ├── SLIL
        ├── compare-BB-fixed.py
        ├── compare-peaks.py
        ├── compare-static-LB.py
        ├── gather-SLIL-stats.py
        ├── run-filtered-block-tests.py
        └── runspec-wrapper-SLIL.py
    ├── aco_analysis
        └── make_pheromone_pdfs.sh
    ├── analyze
        ├── __init__.py
        ├── _main.py
        ├── _types.py
        ├── _utils.py
        ├── imports
        │   ├── __init__.py
        │   ├── import_cpu2006.py
        │   ├── import_plaidml.py
        │   ├── import_shoc.py
        │   └── import_utils.py
        └── lib
        │   └── compile_times.py
    ├── misc
        ├── count-boundary-spills.py
        ├── count-nodes.py
        ├── ddg2dot.py
        ├── extract-script.py
        ├── findblock.py
        ├── func-stats.py
        ├── json2infolog.py
        ├── rp-compare.py
        ├── spill-compare.py
        ├── spill-count-csv.py
        └── validation-test.py
    ├── plaidbench
        ├── extract-plaidbench-data.py
        ├── get-benchmarks-stats.py
        ├── get-occupancy.py
        ├── get-optsched-stats.py
        ├── get-sched-length.py
        ├── plaidbench-validation-test.py
        └── run-plaidbench.py
    └── readlogs
        └── __init__.py


/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: LLVM
2 | 


--------------------------------------------------------------------------------
/.clang-tidy:
--------------------------------------------------------------------------------
 1 | Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming'
 2 | CheckOptions:
 3 |   - key:             readability-identifier-naming.ClassCase
 4 |     value:           CamelCase
 5 |   - key:             readability-identifier-naming.EnumCase
 6 |     value:           CamelCase
 7 |   - key:             readability-identifier-naming.FunctionCase
 8 |     value:           camelBack
 9 |   - key:             readability-identifier-naming.MemberCase
10 |     value:           CamelCase
11 |   - key:             readability-identifier-naming.ParameterCase
12 |     value:           CamelCase
13 |   - key:             readability-identifier-naming.UnionCase
14 |     value:           CamelCase
15 |   - key:             readability-identifier-naming.VariableCase
16 |     value:           CamelCase
17 | 


--------------------------------------------------------------------------------
/.envrc:
--------------------------------------------------------------------------------
1 | export PYTHONPATH="$PYTHONPATH:$PWD/util"
2 | 


--------------------------------------------------------------------------------
/.github/workflows/runclangformat.yml:
--------------------------------------------------------------------------------
 1 | name: Run ClangFormat
 2 | 
 3 | on:
 4 |   issue_comment: # GitHub PRs are considered the same type of thing as issues.
 5 |     types: [ created ]
 6 | 
 7 | jobs:
 8 |   reformat:
 9 |     name: ClangFormat
10 |     # If we are a pull_request, we have the trigger comment, and the person
11 |     # requesting is the one who made the PR, then we run.
12 |     if: >-
13 |       github.event.issue.pull_request != ''
14 |       && github.event.comment.body == 'Do: Reformat'
15 |       && github.event.comment.user.id == github.event.issue.user.id
16 |     # We must run on a ubuntu, as we use unix-only commands
17 |     runs-on: ubuntu-latest
18 | 
19 |     steps:
20 |       # Add an emote reaction to acknowledge the request.
21 |       # For long-running tasks, this is helpful.
22 |       # For just clang-format, it may not be needed, but it's still nice.
23 |       - name: Acknowledge
24 |         uses: peter-evans/create-or-update-comment@v1.4.1
25 |         with:
26 |           comment-id: ${{ github.event.comment.id }}
27 |           reactions: rocket # (Launch)
28 | 
29 |       - uses: actions/checkout@v2
30 |         with:
31 |           # We need the entire history so that we can rebase many commits.
32 |           fetch-depth: 0
33 | 
34 |       - name: Checkout PR
35 |         uses: dawidd6/action-checkout-pr@v1
36 |         with:
37 |           pr: ${{ github.event.issue.number }}
38 | 
39 |       - name: Get clang-format
40 |         run: sudo apt-get install clang-format-9
41 | 
42 |       - name: Clang format each commit in place
43 |         run: |
44 |           # Git requires user email and name to do commits.
45 |           # As we are only amending each commit, these shouldn't end up in the
46 |           # history.
47 |           # Set user email to the email of the last commit:
48 |           git config --local user.email "$(git log -n 1 --pretty=format:'%ae')"
49 |           git config --local user.name 'GitHub Action'
50 | 
51 |           # Rebases every commit since when this PR branched off of master.
52 |           # The sed command tells git that we want to edit the code for each commit.
53 |           GIT_EDITOR="sed -iE 's/^pick/edit/g'" git rebase -i $(git merge-base master HEAD)
54 | 
55 |           # Abort on error
56 |           set -e
57 |           echo '>>> Beginning Rebasing...'
58 | 
59 |           # While a rebase is ongoing, `git status` contains the text "rebase".
60 |           while [[ -n $(git status | grep rebase) ]]; do
61 |             # Run clang-format
62 |             find . -name '*.hpp' -o -name '*.cpp' | xargs -L1 clang-format-9 -style=file -i --verbose
63 | 
64 |             # Add all changes and update the commit
65 |             echo '>>> Rewriting commit...'
66 |             git add -A
67 |             git commit --amend --no-edit
68 | 
69 |             echo '>>> Continuing Rebasing...'
70 |             git rebase --continue
71 |           done
72 |           echo '>>> Finished Rebasing!'
73 | 
74 |       - name: Push
75 |         run: git push --force -v
76 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /.vscode
3 | __pycache__
4 | *.pyc
5 | 


--------------------------------------------------------------------------------
/.pep8:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | max_line_length = 120
3 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.4.3)
  2 | 
  3 | project(OptSched)
  4 | 
  5 | option(OPTSCHED_INCLUDE_TESTS "Generate build targets for the OptSched unit tests." OFF)
  6 | option(OPTSCHED_ENABLE_AMDGPU "Build the AMDGPU code. Requires that the AMDGPU target is supported." ON)
  7 | 
  8 | set(OPTSCHED_LIT_ARGS "-sv" CACHE STRING "Arguments to pass to lit")
  9 | set(OPTSCHED_EXTRA_LINK_LIBRARIES "" CACHE STRING "Extra link_libraries to pass to OptSched, ;-separated")
 10 | set(OPTSCHED_EXTRA_INCLUDE_DIRS "" CACHE STRING "Extra include_directories to pass to OptSched, ;-separated")
 11 | # To add OptSched debug defines, e.g.:
 12 | # '-DOPTSCHED_EXTRA_DEFINITIONS=-DIS_DEBUG_DEFS_AND_USES;-DIS_DEBUG_DEF_USE_COUNT'
 13 | set(OPTSCHED_EXTRA_DEFINITIONS "" CACHE STRING "Extra add_definitions to pass to OptSched, ;-separated")
 14 | 
 15 | if(TARGET LLVMAMDGPUCodeGen OR TARGET LLVMCodeGen OR TARGET LLVMX86CodeGen)
 16 |   set(llvm_subproject TRUE)
 17 | else()
 18 |   set(llvm_subproject FALSE)
 19 | endif()
 20 | 
 21 | # Not supported
 22 | if(NOT llvm_subproject)
 23 |   set(llvm_version 6.0)
 24 |   if(OPTSCHED_ENABLE_AMDGPU)
 25 |     set(llvm_version 9.0)
 26 |   endif()
 27 |   set(OPTSCHED_LLVM_VERSION ${llvm_version} CACHE STRING "The LLVM version to build OptSched with (independent build only)")
 28 | 
 29 |   find_package(LLVM ${OPTSCHED_LLVM_VERSION} REQUIRED CONFIG)
 30 | endif()
 31 | 
 32 | if(NOT CMAKE_BUILD_TYPE)
 33 |   message(FATAL_ERROR "Must set cmake build type.")
 34 | endif()
 35 | 
 36 | # If we are requested to build the AMDGPU functionality, ensure that we can do so.
 37 | if(OPTSCHED_ENABLE_AMDGPU)
 38 |   if(NOT "AMDGPU" IN_LIST LLVM_ALL_TARGETS)
 39 |     message(FATAL_ERROR "Trying to build the AMDGPU code, but AMDGPU is not supported by this build of LLVM")
 40 |   elseif(LLVM_VERSION VERSION_LESS 13.0)
 41 |     message(FATAL_ERROR "OptSched requries LLVM version >= 13.0 to build the AMDGPU scheduler.")
 42 |   endif()
 43 | endif()
 44 | 
 45 | # If asserts are enabled opt-sched must be built with "IS_DEBUG".
 46 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DIS_DEBUG")
 47 | 
 48 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 49 | set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 50 | 
 51 | set(OPTSCHED_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 52 | set(OPTSCHED_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
 53 | 
 54 | list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}" ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 55 | include(AddLLVM)
 56 | 
 57 | include_directories(
 58 |   ${OPTSCHED_SOURCE_DIR}/include
 59 |   ${OPTSCHED_SOURCE_DIR}/lib
 60 |   ${LLVM_INCLUDE_DIRS}
 61 |   ${OPTSCHED_EXTRA_INCLUDE_DIRS}
 62 | )
 63 | add_definitions(${OPTSCHED_EXTRA_DEFINITIONS})
 64 | link_directories(${OPTSCHED_EXTRA_LINK_LIBRARIES})
 65 | 
 66 | if(NOT llvm_subproject)
 67 |   include(GetLocalLLVM)
 68 | 
 69 |   set(args)
 70 |   if(OPTSCHED_INCLUDE_TESTS)
 71 |     list(APPEND args GTEST)
 72 |   endif()
 73 | 
 74 |   get_local_llvm(${args})
 75 | endif()
 76 | 
 77 | add_subdirectory(lib)
 78 | 
 79 | if(OPTSCHED_INCLUDE_TESTS)
 80 |   include(CTest)
 81 | 
 82 |   add_subdirectory(unittests)
 83 |   list(APPEND OPTSCHED_TEST_DEPS OptSchedUnitTests)
 84 |   list(APPEND OPTSCHED_TEST_PARAMS
 85 |     clang_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/test/Unit/lit.site.cfg
 86 |     )
 87 | 
 88 |   add_subdirectory(test)
 89 | 
 90 |   add_test(NAME OptSchedTests
 91 |     COMMAND ${CMAKE_COMMAND} --build . --target check-optsched
 92 |     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
 93 |   )
 94 |   add_test(NAME OptSched-CompileHelloWorld
 95 |     COMMAND
 96 |       ${LLVM_TOOLS_BINARY_DIR}/clang ${CMAKE_CURRENT_SOURCE_DIR}/example/helloworld.cpp
 97 |       -O3
 98 |   )
 99 | endif()
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![CSUS](http://www.csus.edu/Brand/assets/Logos/Core/Primary/Stacked/Primary_Stacked_3_Color_wht_hndTN.png)](http://www.csus.edu/)
 2 | 
 3 | # OptSched - Optimizing Scheduler
 4 | Combinatorial instruction scheduling research project at CSU Sacramento.
 5 | 
 6 | This plugin for the [LLVM](https://llvm.org/) compiler is an optional machine scheduler. We implement a branch and bound instruction scheduling algorithm.
 7 | 
 8 | ## Requirements
 9 | 
10 | - Ubuntu 16.04 (recommended), or MacOS 10.14
11 | - CMake 3.4.3 or later
12 | - LLVM 6.0 or later
13 | 
14 | ## Building
15 | 
16 | **See [BUILD.md](BUILD.md) for build instructions.**
17 | 
18 | The OptSched plugin can be found in “llvm/lib” after building.
19 | 
20 | ## Configuration files
21 | 
22 | OptSched reads from configuration files at runtime to initialize the scheduler. There are templates in the [example](https://github.com/OptSched/OptSched/tree/master/example/optsched-cfg) directory. The default search location for these files is ```~/.optsched-cfg```. You can optionally specify the path to this directory or any of the configuration files individually with [command-line options](#Command-Line-Options).
23 | 
24 | ## Usage Examples
25 | 
26 | `clang++ -O3 -fplugin=<path/to/OptSched.so> -mllvm -misched=optsched -mllvm -optsched-cfg=<path/to/optsched-cfg>  <example.cpp>`
27 | 
28 | `llc -load <path/to/OptSched.so> -misched=optsched -optsched-cfg=<path/to/optsched-cfg> <example.ll>`
29 | 
30 | ## Command-Line Options
31 | 
32 | When using Clang, pass options to LLVM with `-mllvm`.
33 | 
34 | | CL Opt | Description |
35 | | ------ | ----------- |
36 | | -enable-misched | Enable the machine scheduling pass in LLVM (Targets can override this option). |
37 | | -misched=optsched | Select the optimizing scheduler. |
38 | | -debug-only=optsched | Print debug information from the scheduler. |
39 | | -optsched-cfg=\<string\> | Path to the directory containing configuration files for opt-sched. |
40 | | -optsched-cfg-hotfuncs=\<string\> | Path to the list of hot functions to schedule using opt-sched. |
41 | | -optsched-cfg-machine-model=\<string\> | Path to the machine model specification file for opt-sched. |
42 | | -optsched-cfg-sched=\<string\> | Path to the scheduler options configuration file for opt-sched. |
43 | 


--------------------------------------------------------------------------------
/cmake/GetLocalLLVM.cmake:
--------------------------------------------------------------------------------
 1 | set(LOCAL_LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/.LocalLLVM
 2 |   CACHE PATH "")
 3 | set(LOCAL_LLVM_LIST_DIR ${CMAKE_CURRENT_LIST_DIR})
 4 | mark_as_advanced(LOCAL_LLVM_BINARY_DIR LOCAL_LLVM_LIST_DIR)
 5 | 
 6 | set(LOCAL_LLVM_DIR ${LOCAL_LLVM_BINARY_DIR}/llvm-project
 7 |   CACHE PATH "Path to a local copy of llvm-project, for use in unit tests and the AMDGPU build")
 8 | 
 9 | function(get_local_llvm)
10 |   if(NOT EXISTS LOCAL_LLVM_BINARY_DIR)
11 |     file(MAKE_DIRECTORY ${LOCAL_LLVM_BINARY_DIR})
12 |   endif()
13 | 
14 |   if(NOT EXISTS LOCAL_LLVM_DIR AND
15 |     NOT EXISTS ${LOCAL_LLVM_BINARY_DIR}/llvm-project.download-finished)
16 |     if(NOT EXISTS ${LOCAL_LLVM_BINARY_DIR}/llvm-project.zip)
17 |       file(DOWNLOAD https://github.com/CSUS-LLVM/llvm-project/archive/optsched.zip
18 |         ${LOCAL_LLVM_BINARY_DIR}/llvm-project.zip
19 |         SHOW_PROGRESS
20 |         STATUS result
21 |         TLS_VERIFY ON
22 |         EXPECTED_HASH SHA256=c3a2e966d7182c031973530c0c8e010235577025ca54bfe8159d721f05ca2ed4
23 |       )
24 |       list(GET 0 result downloadFailed)
25 |       list(GET 1 result statusString)
26 | 
27 |       if(downloadFailed)
28 |         message(FATAL_ERROR "Unable to get llvm-project. Failed with ${downloadFailed}: ${statusString}")
29 |       endif()
30 |     endif()
31 | 
32 |     if(EXISTS ${LOCAL_LLVM_BINARY_DIR}/llvm-project-optsched)
33 |       file(REMOVE_RECURSE ${LOCAL_LLVM_BINARY_DIR}/llvm-project-optsched)
34 |     endif()
35 | 
36 |     execute_process(
37 |       COMMAND ${CMAKE_COMMAND} -E tar xzf llvm-project.zip
38 |       WORKING_DIRECTORY ${LOCAL_LLVM_BINARY_DIR}
39 |       RESULTS_VARIABLE unzipError
40 |     )
41 |     if(unzipError)
42 |       message(FATAL_ERROR "Unable to unzip llvm-project. Failed with ${unzipError}")
43 |     endif()
44 | 
45 |     file(RENAME ${LOCAL_LLVM_BINARY_DIR}/llvm-project-optsched ${LOCAL_LLVM_BINARY_DIR}/llvm-project)
46 | 
47 |     # Touch the file. file(TOUCH ...) is CMake 3.12+, but we want to support CMake 3.10
48 |     file(WRITE ${LOCAL_LLVM_BINARY_DIR}/llvm-project.download-finished "")
49 |   endif()
50 | 
51 |   cmake_parse_arguments(ARG "GTEST" "" "" ${ARGN})
52 | 
53 |   if(ARG_UNPARSED_ARGUMENTS)
54 |     message(FATAL_ERROR "Unknown arguments ${ARG_UNPARSED_ARGUMENTS}")
55 |   endif()
56 | 
57 |   set(llvm_dir ${LOCAL_LLVM_DIR}/llvm)
58 |   set(llvm_build_dirs ${LOCAL_LLVM_BINARY_DIR}/llvm_build_dirs)
59 | 
60 |   file(MAKE_DIRECTORY ${llvm_build_dirs})
61 | 
62 |   if(ARG_GTEST)
63 |     # Set things up so that llvm-lit can do its work
64 |     set(LLVM_EXTERNAL_LIT "${llvm_dir}/utils/lit/lit.py" CACHE PATH "Path to llvm-lit")
65 |     add_subdirectory(${llvm_dir}/utils/unittest ${llvm_build_dirs}/googletest)
66 | 
67 |     # Set up GTest include dirs
68 |     include_directories(
69 |       ${llvm_dir}/utils/unittest/googletest/include
70 |       ${llvm_dir}/utils/unittest/googlemock/include
71 |     )
72 |   endif()
73 | endfunction()
74 | 


--------------------------------------------------------------------------------
/cmake/superbuild/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # A SuperBuild project for OptSched.
  2 | #
  3 | # Use this as the CMake source directory to run the superbuild.
  4 | #
  5 | # Documentation of configuration args:
  6 | #
  7 | # In general, see option()s and set(... CACHE ...)s in this CMakeLists.txt and flang.cmake.
  8 | #
  9 | # Some options behave as follows:
 10 | #  - OPTSCHEDSUPER_FLANG: Check out and build flang.
 11 | #  - These options are passed on to llvm (and OptSched) directly from the SuperBuild configuration:
 12 | #      - CMAKE_BUILD_TYPE. Note that the flang build types default to Release, but can be configured.
 13 | #      - LLVM_PARALLEL_LINK_JOBS.
 14 | #  - *_EXTRA_CMAKE_ARGS: Passes these CMake arguments on to the corresponding sub-build.
 15 | #  - The flang builds can be configured to use a custom CMAKE_GENERATOR, separate from the superbuild's generator.
 16 | 
 17 | 
 18 | ############################################
 19 | #
 20 | # As of 5/26/2022, the superbuild script is
 21 | # no longer gauranteed to work.
 22 | #
 23 | ############################################
 24 | 
 25 | cmake_minimum_required(VERSION 3.7)
 26 | 
 27 | project(OptSched-SuperBuild)
 28 | 
 29 | option(OPTSCHEDSUPER_FLANG "Check out and build flang." OFF)
 30 | 
 31 | set(OPTSCHEDSUPER_LLVM_INSTALL_PREFIX   ${CMAKE_CURRENT_BINARY_DIR}/llvm-install      CACHE PATH   "The directory to install llvm")
 32 | set(OPTSCHEDSUPER_LLVM_GIT_REPO         https://github.com/CSUS-LLVM/llvm-project.git CACHE STRING "The git repository to clone for LLVM")
 33 | set(OPTSCHEDSUPER_LLVM_GIT_TAG          optsched                                      CACHE STRING "The tag to checkout within the LLVM git repository")
 34 | set(OPTSCHEDSUPER_LLVM_SOURCE_SUBDIR    llvm                                          CACHE STRING "LLVM lives in this subdirectory of the cloned repository")
 35 | set(OPTSCHEDSUPER_LLVM_EXTRA_CMAKE_ARGS ""                                            CACHE STRING ";-separated list of extra arguments to pass to configure cmake")
 36 | 
 37 | include(ExternalProject)
 38 | include(flang.cmake)
 39 | include(CTest)
 40 | 
 41 | # add_cache_variables(<OUT> <VARS>...)
 42 | # Compute a cache variable argument for each VARS, adding them to OUT.
 43 | function(add_cache_variables OUT_LIST)
 44 |   set(result "${${OUT_LIST}}")
 45 | 
 46 |   foreach(NAME IN LISTS ARGN)
 47 |     if(DEFINED ${NAME})
 48 |       get_property(type CACHE ${NAME} PROPERTY TYPE)
 49 |       if(NOT type)
 50 |         set(type UNINITIALIZED)
 51 |       endif()
 52 |       list(APPEND result -D${NAME}:${type}=${${NAME}})
 53 |     endif()
 54 |   endforeach()
 55 | 
 56 |   set(${OUT_LIST} "${result}" PARENT_SCOPE)
 57 | endfunction()
 58 | 
 59 | set(cache_default_args)
 60 | add_cache_variables(cache_default_args
 61 |   CMAKE_CXX_COMPILER_LAUNCHER
 62 |   CMAKE_C_COMPILER_LAUNCHER
 63 |   OPTSCHED_EXTRA_DEFINITIONS
 64 |   OPTSCHED_EXTRA_LINK_LIBRARIES
 65 | )
 66 | 
 67 | set(ROOT_DIR ${PROJECT_SOURCE_DIR}/../..)
 68 | 
 69 | ExternalProject_Add(llvm
 70 |   GIT_REPOSITORY ${OPTSCHEDSUPER_LLVM_GIT_REPO}
 71 |   GIT_TAG ${OPTSCHEDSUPER_LLVM_GIT_TAG}
 72 |   SOURCE_SUBDIR ${OPTSCHEDSUPER_LLVM_SOURCE_SUBDIR}
 73 |   INSTALL_DIR ${OPTSCHEDSUPER_LLVM_INSTALL_PREFIX}
 74 |   CMAKE_ARGS
 75 |     -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
 76 |     -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
 77 |     -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 78 |     -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
 79 |     -DLLVM_PARALLEL_LINK_JOBS=${LLVM_PARALLEL_LINK_JOBS}
 80 |     -DLLVM_ENABLE_PROJECTS='clang'
 81 |     -DLLVM_TARGETS_TO_BUILD=X86
 82 |     -DLLVM_BUILD_TOOLS=ON
 83 |     -DLLVM_INCLUDE_TESTS=ON
 84 |     -DLLVM_OPTIMIZED_TABLEGEN=ON
 85 |     -DLLVM_EXTERNAL_PROJECTS=OptSched
 86 |     -DLLVM_EXTERNAL_OPTSCHED_SOURCE_DIR=${ROOT_DIR}
 87 |     -DOPTSCHED_ENABLE_AMDGPU=OFF
 88 |     ${OPTSCHEDSUPER_LLVM_EXTRA_CMAKE_ARGS}
 89 |   CMAKE_CACHE_DEFAULT_ARGS
 90 |     ${cache_default_args}
 91 |   USES_TERMINAL_CONFIGURE 1
 92 |   USES_TERMINAL_BUILD 1
 93 |   USES_TERMINAL_INSTALL 1
 94 | )
 95 | 
 96 | if(OPTSCHEDSUPER_FLANG)
 97 |   setup_flang_external_projects("${cache_default_args}")
 98 | endif()
 99 | 
100 | ExternalProject_Get_Property(llvm BINARY_DIR)
101 | set(LLVM_BUILD_DIR ${BINARY_DIR})
102 | 
103 | add_test(NAME OptSchedTests
104 |   COMMAND ${CMAKE_COMMAND} --build . --target check-optsched
105 |   WORKING_DIRECTORY ${LLVM_BUILD_DIR}
106 | )
107 | add_test(NAME OptSched-CompileHelloWorld
108 |   COMMAND
109 |     ${OPTSCHEDSUPER_LLVM_INSTALL_PREFIX}/bin/clang ${ROOT_DIR}/example/helloworld.cpp
110 |     -O3
111 | )
112 | 


--------------------------------------------------------------------------------
/documentation/flang/README.md:
--------------------------------------------------------------------------------
  1 | [![CSUS](http://www.csus.edu/Brand/assets/Logos/Core/Primary/Stacked/Primary_Stacked_3_Color_wht_hndTN.png)](http://www.csus.edu/)
  2 | 
  3 | # OptSched - Optimizing Scheduler
  4 | This directory contains specific instructions on how to build Flang.
  5 | 
  6 | ## Requirements
  7 | 
  8 | - Ubuntu 16.04 is recommended
  9 | - CMake 3.4.3 or later
 10 | - LLVM 6.0 or later
 11 | 
 12 | ## Usage with OptSched
 13 | 
 14 | `<path-to-flang-install-directory>/bin/flang -O3 -fplugin=<path/to/OptSched.so> -mllvm -misched=optsched -mllvm -optsched-cfg=<path/to/optsched-cfg> <example.cpp>`
 15 | 
 16 | ## Building Flang
 17 | 
 18 | #### Flang Build Directory
 19 | 
 20 | 1. Open a bash terminal
 21 | 
 22 | 2. Create a directory where you would like flang to be installed to. For example "flang-install" then navigate inside it:
 23 | 
 24 | `mkdir flang-install && cd flang-install`
 25 | 
 26 | 3. Export an environment variable that will be passed to cmake to indicate flang's installation directory:
 27 | 
 28 | `export FLANG_INSTALL=$(pwd)`
 29 | 
 30 | 4. Navigate outside of the folder:
 31 | 
 32 | `cd ..`
 33 | 
 34 | #### Building Flang LLVM
 35 | 
 36 | 1. Clone Flang LLVM:
 37 | 
 38 | `git clone https://github.com/flang-compiler/llvm.git`
 39 | 
 40 | 2. Navigate to inside the LLVM folder and swap to the release_60 branch:
 41 | 
 42 | `cd llvm && git checkout release_60`
 43 | 
 44 | 3. Download the patch to print spilling info under the OptSched/patches/llvm6.0/ folder named:
 45 | 
 46 | `flang-llvm6-print-spilling-info.patch`
 47 | 
 48 | 4. Move the patch file to inside the Flang LLVM directory
 49 | 
 50 | 5. Apply the patch
 51 | 
 52 | `git am flang-llvm6-print-spilling-info.patch`
 53 | 
 54 | 3. Create a build directory and navigate inside it:
 55 | 
 56 | `mkdir build && cd build`
 57 | 
 58 | 6. Build Flang LLVM
 59 | 
 60 | `cmake -DCMAKE_BUILD_TYPE=Debug '-DLLVM_TARGETS_TO_BUILD=X86' -DLLVM_BUILD_TOOLS=ON -DLLVM_INCLUDE_TESTS=ON -DLLVM_OPTIMIZED_TABLEGEN=ON -DCMAKE_INSTALL_PREFIX=$FLANG_INSTALL ..`
 61 | 
 62 | `make && make install`
 63 | 
 64 | 7. Navigate outside of the Flang LLVM directory:
 65 | 
 66 | `cd ../..`
 67 | 
 68 | #### Building the Flang driver
 69 | 
 70 | 1. Clone the Flang driver
 71 | 
 72 | `git clone https://github.com/flang-compiler/flang-driver.git`
 73 | 
 74 | 2. Navigate to inside the flang driver folder and swap to the release_60 branch:
 75 | 
 76 | `cd flang-driver && git checkout release_60`
 77 | 
 78 | 3. Create a build directory and navigate inside it:
 79 | 
 80 | `mkdir build && cd build`
 81 | 
 82 | 4. Build the Flang driver:
 83 | 
 84 | `cmake -DCMAKE_INSTALL_PREFIX=$FLANG_INSTALL -DLLVM_CONFIG=$FLANG_INSTALL/bin/llvm-config -DCLANG_ENABLE_STATIC_ANALYZER=ON ..`
 85 | 
 86 | `make && make install`
 87 | 
 88 | 5. Navigate outside of the flang driver directory:
 89 | 
 90 | `cd ../..`
 91 | 
 92 | #### Building the OpenMP runtime library
 93 | 
 94 | 1. Clone the OpenMP runtime library:
 95 | 
 96 | `git clone https://github.com/llvm-mirror/openmp.git`
 97 | 
 98 | 2. Navigate to the OpenMP runtime library directory:
 99 | 
100 | `cd openmp/runtime/`
101 | 
102 | 3. Create a build directory and navigate inside it:
103 | 
104 | `mkdir build && cd build`
105 | 
106 | 4. Build the OpenMP runtime library:
107 | 
108 | `cmake -DCMAKE_INSTALL_PREFIX=$FLANG_INSTALL -DCMAKE_CXX_COMPILER=$FLANG_INSTALL/bin/clang++ -DCMAKE_C_COMPILER=$FLANG_INSTALL/bin/clang ../..`
109 | 
110 | `make && make install`
111 | 
112 | 5. Navigate outside of the OpenMP runtime directory:
113 | 
114 | `cd ../../..`
115 | 
116 | #### Building libpgmath
117 | 
118 | 1. Clone Flang
119 | 
120 | `git clone https://github.com/flang-compiler/flang.git`
121 | 
122 | 2. Navigate to inside the flang directory:
123 | 
124 | `cd flang`
125 | 
126 | If you are having issues with AVX-512 when building libpgmath, you may need to swap to an older commit
127 | 
128 | For Ubuntu 16.04: [45d7aeb5886c5965a8e793ef3fa632e7e73de56c](https://github.com/flang-compiler/flang/issues/434#issuecomment-403449362)
129 | 
130 | `git checkout 45d7aeb5886c5965a8e793ef3fa632e7e73de56c`
131 | 
132 | For Ubuntu 18.04: [37e6062d969bf337b964fe8119767046fcbdcdfa](https://github.com/flang-compiler/flang/issues/685)
133 | 
134 | `git checkout 37e6062d969bf337b964fe8119767046fcbdcdfa`
135 | 
136 | 3. Navigate to inside the libpgmath dircetory:
137 | 
138 | `cd runtime/libpgmath`
139 | 
140 | 4. Create a build directory and navigate inside it:
141 | 
142 | `mkdir build && cd build`
143 | 
144 | 5. Build libpgmath:
145 | 
146 | `cmake -DCMAKE_INSTALL_PREFIX=$FLANG_INSTALL -DCMAKE_CXX_COMPILER=$FLANG_INSTALL/bin/clang++ -DCMAKE_C_COMPILER=$FLANG_INSTALL/bin/clang -DCMAKE_Fortran_COMPILER=$FLANG_INSTALL/bin/flang ..`
147 | 
148 | `make && make install`
149 | 
150 | You may need to install gawk if you are encountering a segmentation fault:
151 | 
152 | `sudo apt-get install gawk`
153 | 
154 | 6. Navigate back to the root directory of flang
155 | 
156 | `cd ../../..`
157 | 
158 | #### Building flang
159 | 
160 | 1. While still in the flang directory, create a build directory for flang and navigate inside it:
161 | 
162 | `mkdir build && cd build`
163 | 
164 | 2. Build flang:
165 | 
166 | `cmake -DCMAKE_INSTALL_PREFIX=$FLANG_INSTALL -DCMAKE_CXX_COMPILER=$FLANG_INSTALL/bin/clang++ -DCMAKE_C_COMPILER=$FLANG_INSTALL/bin/clang -DCMAKE_Fortran_COMPILER=$FLANG_INSTALL/bin/flang -DLLVM_CONFIG=$FLANG_INSTALL/bin/llvm-config ..`
167 | 
168 | `make && make install`
169 | 
170 | 3. Navigate outside of the flang directory:
171 | 
172 | `cd ../..`
173 | 
174 | 
175 | #### Testing the build with a hello world fortran file
176 | 
177 | 1. Navigate to the directory where flang was installed. In this example, it was flang-install
178 | 
179 | `cd flang-install`
180 | 
181 | 2. Download the hello.f fortran file and put it in your flang-install directory
182 | 
183 | 3. Compile the file:
184 | 
185 | `./bin/flang hello.f`
186 | 
187 | 4. Run the generated file:
188 | 
189 | `./a.out`
190 | 
191 | If you are getting the error:
192 | 
193 | `"libflang.so: cannot open shared object file: No such file or directory"`
194 | 
195 | You will need to link the flang-install/lib directory to the environment variable LD_LIBRARY_PATH:
196 | 
197 | `export LD_LIBRARY_PATH="$(pwd)/lib"`
198 | 
199 | The resulting output should be:
200 | 
201 | `Hello World!`
202 | 


--------------------------------------------------------------------------------
/documentation/flang/hello.f:
--------------------------------------------------------------------------------
1 |        program hello
2 |           print *, "Hello World!"
3 |        end program hello
4 | 


--------------------------------------------------------------------------------
/example/helloworld.cpp:
--------------------------------------------------------------------------------
1 | #include <cstdio>
2 | 
3 | int main() { std::puts("Hello, World!"); }
4 | 


--------------------------------------------------------------------------------
/example/llvm7-CPU2006-cfg/machine_model.cfg:
--------------------------------------------------------------------------------
 1 | # A simple machine model which always issues one instruction in a cycle or stalls.
 2 | MODEL_NAME: Simple
 3 | 
 4 | # The limit on the total number of instructions that can be issued in one cycle
 5 | ISSUE_RATE: 1
 6 | 
 7 | # Each instruction must have an issue type, i.e. a function unit that the instruction uses.
 8 | ISSUE_TYPE_COUNT: 1
 9 | 
10 | # Default issue type for LLVM instructions.
11 | Default 1
12 | 
13 | DEP_LATENCY_ANTI: 0
14 | DEP_LATENCY_OUTPUT: 1
15 | DEP_LATENCY_OTHER: 1
16 | 
17 | # This will not be used. Reg type info will be taken from the compiler.
18 | REG_TYPE_COUNT: 2
19 | I 1
20 | F 1
21 | 
22 | # Set this to the total number of instructions
23 | INST_TYPE_COUNT: 0
24 | 
25 | # Examples
26 | #1
27 | #INST_TYPE: ADD64rr
28 | #ISSUE_TYPE: Default
29 | #LATENCY: 1
30 | #PIPELINED: YES
31 | #BLOCKS_CYCLE: NO
32 | #SUPPORTED: YES
33 | 
34 | 
35 | #2
36 | #INST_TYPE: IMUL64rr
37 | #ISSUE_TYPE: Default
38 | #LATENCY: 3
39 | #PIPELINED: YES
40 | #BLOCKS_CYCLE: NO
41 | #SUPPORTED: YES
42 | 


--------------------------------------------------------------------------------
/example/optsched-cfg/machine_model.cfg:
--------------------------------------------------------------------------------
 1 | # A simple machine model which always issues one instruction in a cycle or stalls.
 2 | MODEL_NAME: Simple
 3 | 
 4 | # The limit on the total number of instructions that can be issued in one cycle
 5 | ISSUE_RATE: 1
 6 | 
 7 | # Each instruction must have an issue type, i.e. a function unit that the instruction uses.
 8 | ISSUE_TYPE_COUNT: 1
 9 | 
10 | # Default issue type for LLVM instructions.
11 | Default 1
12 | 
13 | DEP_LATENCY_ANTI: 0
14 | DEP_LATENCY_OUTPUT: 1
15 | DEP_LATENCY_OTHER: 1
16 | 
17 | # This will not be used. Reg type info will be taken from the compiler.
18 | REG_TYPE_COUNT: 2
19 | I 1
20 | F 1
21 | 
22 | # Set this to the total number of instructions
23 | INST_TYPE_COUNT: 0
24 | 
25 | # Examples
26 | #1
27 | #INST_TYPE: ADD64rr
28 | #ISSUE_TYPE: Default
29 | #LATENCY: 1
30 | #PIPELINED: YES
31 | #BLOCKS_CYCLE: NO
32 | #SUPPORTED: YES
33 | 
34 | 
35 | #2
36 | #INST_TYPE: IMUL64rr
37 | #ISSUE_TYPE: Default
38 | #LATENCY: 3
39 | #PIPELINED: YES
40 | #BLOCKS_CYCLE: NO
41 | #SUPPORTED: YES
42 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/OptSchedDDGWrapperBase.h:
--------------------------------------------------------------------------------
 1 | //===- OptSchedDDGWrapperBase.h - Interface for DDG wrapper -----*- C++-*--===//
 2 | //
 3 | // Convert an LLVM ScheduleDAG into an OptSched DDG.
 4 | //
 5 | //===----------------------------------------------------------------------===//
 6 | 
 7 | #ifndef LLVM_OPT_SCHED_DDG_WRAPPER_BASE_H
 8 | #define LLVM_OPT_SCHED_DDG_WRAPPER_BASE_H
 9 | 
10 | namespace llvm {
11 | namespace opt_sched {
12 | 
13 | class OptSchedDDGWrapperBase {
14 | public:
15 |   virtual ~OptSchedDDGWrapperBase() = default;
16 | 
17 |   virtual void convertSUnits(bool IgnoreRealEdges,
18 |                              bool IgnoreArtificialEdges) = 0;
19 | 
20 |   virtual void convertRegFiles() = 0;
21 | };
22 | 
23 | } // namespace opt_sched
24 | } // namespace llvm
25 | 
26 | #endif // LLVM_OPT_SCHED_DDG_WRAPPER_BASE_H
27 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/OptSchedTarget.h:
--------------------------------------------------------------------------------
  1 | //===- OptSchedTarget.h - OptSched Target -----------------------*- C++-*--===//
  2 | //
  3 | // Interface for target specific functionality in OptSched. This is a workaround
  4 | // to avoid needing to modify or use target code in the trunk.
  5 | //
  6 | //===----------------------------------------------------------------------===//
  7 | 
  8 | #ifndef LLVM_OPT_SCHED_TARGET_H
  9 | #define LLVM_OPT_SCHED_TARGET_H
 10 | 
 11 | #include "opt-sched/Scheduler/OptSchedDDGWrapperBase.h"
 12 | #include "opt-sched/Scheduler/config.h"
 13 | #include "opt-sched/Scheduler/data_dep.h"
 14 | #include "opt-sched/Scheduler/defines.h"
 15 | #include "opt-sched/Scheduler/machine_model.h"
 16 | #include "llvm/ADT/SmallString.h"
 17 | #include "llvm/ADT/SmallVector.h"
 18 | #include "llvm/CodeGen/MachineScheduler.h"
 19 | 
 20 | namespace llvm {
 21 | namespace opt_sched {
 22 | 
 23 | class OptSchedMachineModel;
 24 | class ScheduleDAGOptSched;
 25 | 
 26 | class OptSchedTarget {
 27 | public:
 28 |   MachineModel *MM;
 29 | 
 30 |   virtual ~OptSchedTarget() = default;
 31 | 
 32 |   virtual std::unique_ptr<OptSchedMachineModel>
 33 |   createMachineModel(const char *configFile) = 0;
 34 | 
 35 |   virtual std::unique_ptr<OptSchedDDGWrapperBase>
 36 |   createDDGWrapper(MachineSchedContext *Context, ScheduleDAGOptSched *DAG,
 37 |                    OptSchedMachineModel *MM, LATENCY_PRECISION LatencyPrecision,
 38 |                    const std::string &RegionID) = 0;
 39 | 
 40 |   virtual void initRegion(ScheduleDAGInstrs *DAG, MachineModel *MM,
 41 |                           Config &OccFile) = 0;
 42 |   virtual void finalizeRegion(const InstSchedule *Schedule) = 0;
 43 |   // FIXME: This is a shortcut to doing the proper thing and creating a RP class
 44 |   // that targets can override. It's hard to justify spending the extra time
 45 |   // when we will be refactoring RP tracking in general if we do a rewrite to
 46 |   // fully integrate the scheduler in LLVM.
 47 |   //
 48 |   // Get target specific cost from peak register pressure (e.g. occupancy for
 49 |   // AMDGPU)
 50 |   virtual InstCount
 51 |   getCost(const llvm::SmallVectorImpl<unsigned> &PRP) const = 0;
 52 | 
 53 |   // Targets that wish to discard the finalized schedule for any reason can
 54 |   // override this.
 55 |   virtual bool shouldKeepSchedule() { return true; }
 56 | 
 57 |   virtual void SetOccupancyLimit(int){/*nothing*/};
 58 |   virtual void SetShouldLimitOcc(bool){/*nothing*/};
 59 |   virtual void SetOccLimitSource(OCC_LIMIT_TYPE){/*nothing*/};
 60 | };
 61 | 
 62 | template <typename FactoryT> class OptSchedRegistryNode {
 63 | public:
 64 |   llvm::SmallString<16> Name;
 65 |   FactoryT Factory;
 66 |   OptSchedRegistryNode *Next;
 67 | 
 68 |   OptSchedRegistryNode(llvm::StringRef Name_, FactoryT Factory_)
 69 |       : Name(Name_), Factory(Factory_) {}
 70 | };
 71 | 
 72 | template <typename FactoryT> class OptSchedRegistry {
 73 | private:
 74 |   OptSchedRegistryNode<FactoryT> *List = nullptr;
 75 |   OptSchedRegistryNode<FactoryT> *Default = nullptr;
 76 | 
 77 | public:
 78 |   void add(OptSchedRegistryNode<FactoryT> *Node) {
 79 |     Node->Next = List;
 80 |     List = Node;
 81 |   }
 82 | 
 83 |   FactoryT getFactoryWithName(llvm::StringRef Name) {
 84 |     FactoryT Factory = nullptr;
 85 |     std::string Match = std::string(Name.data());
 86 | 
 87 |     for (auto I = List; I; I = I->Next) {
 88 |       std::string Temp = std::string(I->Name.data());
 89 |       if (Match.compare(Temp) == 0) {
 90 |         Factory = I->Factory;
 91 |         break;
 92 |       }
 93 |     }
 94 |     return Factory;
 95 |   }
 96 | 
 97 |   void setDefault(llvm::StringRef Name) {
 98 |     OptSchedRegistryNode<FactoryT> Node = nullptr;
 99 |     for (auto I = List; I; I = I->Next)
100 |       if (I->Name == Name) {
101 |         Node = I;
102 |         break;
103 |       }
104 |     assert(Node && "Could not set default factory! None in list with name.");
105 |     Default = Node;
106 |   }
107 | 
108 |   FactoryT getDefaultFactory() {
109 |     assert(Default && "Default factory not set.");
110 |     return Default->Factory;
111 |   }
112 | };
113 | 
114 | class OptSchedTargetRegistry
115 |     : public OptSchedRegistryNode<std::unique_ptr<OptSchedTarget> (*)()> {
116 | public:
117 |   using OptSchedTargetFactory = std::unique_ptr<OptSchedTarget> (*)();
118 |   static OptSchedRegistry<OptSchedTargetFactory> Registry;
119 | 
120 |   OptSchedTargetRegistry(llvm::StringRef Name_, OptSchedTargetFactory Factory_)
121 |       : OptSchedRegistryNode(Name_, Factory_) {
122 |     Registry.add(this);
123 |   }
124 | };
125 | 
126 | } // namespace opt_sched
127 | } // namespace llvm
128 | 
129 | #endif // LLVM_OPT_SCHED_TARGET_H
130 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/aco.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 | Description:  Implements an Ant colony optimizing scheduler
  3 | Author:       Theodore Dubois
  4 | Created:      Nov. 2017
  5 | Updated By:   Ciprian Elies and Vang Thao
  6 | Last Update:  Jan. 2020
  7 | *******************************************************************************/
  8 | 
  9 | #ifndef OPTSCHED_ACO_H
 10 | #define OPTSCHED_ACO_H
 11 | 
 12 | #include "opt-sched/Scheduler/gen_sched.h"
 13 | #include "llvm/ADT/ArrayRef.h"
 14 | #include "llvm/ADT/SetVector.h"
 15 | #include "llvm/ADT/SmallSet.h"
 16 | #include "llvm/ADT/SmallVector.h"
 17 | #include <map>
 18 | #include <memory>
 19 | #include <utility>
 20 | namespace llvm {
 21 | namespace opt_sched {
 22 | 
 23 | typedef double pheromone_t;
 24 | 
 25 | enum class DCF_OPT {
 26 |   OFF,
 27 |   GLOBAL_ONLY,
 28 |   GLOBAL_AND_TIGHTEN,
 29 |   GLOBAL_AND_ITERATION
 30 | };
 31 | 
 32 | struct Choice {
 33 |   SchedInstruction *inst;
 34 |   pheromone_t heuristic; // range 1 to 2
 35 |   InstCount readyOn; // number of cycles until this instruction becomes ready
 36 | };
 37 | 
 38 | class ACOScheduler : public ConstrainedScheduler {
 39 | public:
 40 |   ACOScheduler(DataDepGraph *dataDepGraph, MachineModel *machineModel,
 41 |                InstCount upperBound, SchedPriorities priorities, bool vrfySched,
 42 |                bool IsPostBB);
 43 |   virtual ~ACOScheduler();
 44 |   FUNC_RESULT FindSchedule(InstSchedule *schedule, SchedRegion *region);
 45 |   inline void UpdtRdyLst_(InstCount cycleNum, int slotNum);
 46 |   // Set the initial schedule for ACO
 47 |   // Default is NULL if none are set.
 48 |   void setInitialSched(InstSchedule *Sched);
 49 | 
 50 | private:
 51 |   pheromone_t &Pheromone(SchedInstruction *from, SchedInstruction *to);
 52 |   pheromone_t &Pheromone(InstCount from, InstCount to);
 53 |   pheromone_t Score(SchedInstruction *from, Choice choice);
 54 |   bool shouldReplaceSchedule(InstSchedule *OldSched, InstSchedule *NewSched,
 55 |                              bool IsGlobal);
 56 |   DCF_OPT ParseDCFOpt(const std::string &opt);
 57 | 
 58 |   void PrintPheromone();
 59 | 
 60 |   // pheromone Graph Debugging start
 61 |   llvm::SmallSet<std::string, 0> DbgRgns;
 62 |   llvm::SmallSet<std::pair<InstCount, InstCount>, 0> AntEdges;
 63 |   llvm::SmallSet<std::pair<InstCount, InstCount>, 0> CrntAntEdges;
 64 |   llvm::SmallSet<std::pair<InstCount, InstCount>, 0> IterAntEdges;
 65 |   llvm::SmallSet<std::pair<InstCount, InstCount>, 0> BestAntEdges;
 66 |   std::map<std::pair<InstCount, InstCount>, double> LastHeu;
 67 |   bool IsDbg = false;
 68 |   std::string OutPath;
 69 |   std::string graphDisplayAnnotation(int Frm, int To);
 70 |   std::string getHeuIfPossible(int Frm, int To);
 71 |   void writePheromoneGraph(std::string Stage);
 72 |   void writePGraphRecursive(FILE *Out, SchedInstruction *Ins,
 73 |                             llvm::SetVector<SchedInstruction *> &Visited);
 74 | 
 75 |   // pheromone Graph Debugging end
 76 | 
 77 |   Choice SelectInstruction(const llvm::ArrayRef<Choice> &ready,
 78 |                            SchedInstruction *lastInst);
 79 |   void UpdatePheromone(InstSchedule *schedule);
 80 |   std::unique_ptr<InstSchedule> FindOneSchedule(InstCount TargetRPCost);
 81 |   llvm::SmallVector<pheromone_t, 0> pheromone_;
 82 |   pheromone_t initialValue_;
 83 |   bool use_fixed_bias;
 84 |   int count_;
 85 |   int heuristicImportance_;
 86 |   bool use_tournament;
 87 |   int fixed_bias;
 88 |   double bias_ratio;
 89 |   double local_decay;
 90 |   double decay_factor;
 91 |   int ants_per_iteration;
 92 |   int ants_per_iteration1p;
 93 |   int ants_per_iteration2p;
 94 |   int noImprovementMax;
 95 |   bool print_aco_trace;
 96 |   std::unique_ptr<InstSchedule> InitialSchedule;
 97 |   bool VrfySched_;
 98 |   bool IsPostBB;
 99 |   bool IsTwoPassEn;
100 |   pheromone_t ScRelMax;
101 |   DCF_OPT DCFOption;
102 |   SPILL_COST_FUNCTION DCFCostFn;
103 |   int localCmp = 0, localCmpRej = 0, globalCmp = 0, globalCmpRej = 0;
104 | };
105 | 
106 | } // namespace opt_sched
107 | } // namespace llvm
108 | 
109 | #endif
110 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/array_ref2d.h:
--------------------------------------------------------------------------------
  1 | #ifndef OPTSCHED_ARRAY_REF_2D_H
  2 | #define OPTSCHED_ARRAY_REF_2D_H
  3 | 
  4 | #include "llvm/ADT/ArrayRef.h"
  5 | #include <cassert>
  6 | #include <cstddef>
  7 | 
  8 | namespace llvm {
  9 | namespace opt_sched {
 10 | 
 11 | /**
 12 |  * \brief Provides a 2D view over a single allocation
 13 |  *
 14 |  * \details 2D arrays are best implemented by using a single allocation, then
 15 |  * computing the index into this single allocation based on the 2D location we
 16 |  * are trying to access. This type abstracts away that work, doing it for you.
 17 |  *
 18 |  * \see MutableArrayRef2D
 19 |  */
 20 | template <typename T> class ArrayRef2D {
 21 | public:
 22 |   /**
 23 |    * \brief Constructs an ArrayRef2D with the specified dimensions.
 24 |    * \param Ref     Must have a size precisely Rows * Columns.
 25 |    * \param Rows    The number of rows in this 2D matrix.
 26 |    * \param Columns The number of columns in this 2D matrix.
 27 |    */
 28 |   explicit ArrayRef2D(llvm::ArrayRef<T> Ref, size_t Rows, size_t Columns)
 29 |       : Ref(Ref), Rows(Rows), Columns(Columns) {
 30 |     assert(Rows * Columns == Ref.size());
 31 |   }
 32 | 
 33 |   size_t rows() const { return Rows; }
 34 |   size_t columns() const { return Columns; }
 35 | 
 36 |   /**
 37 |    * \brief Access an element at the specified row and columns. `[{row, col}]`
 38 |    * \detail
 39 |    * A C-style array `int arr[10][20]` is a single contiguous block of memory.
 40 |    * It would be accessed as `arr[row][col]`.
 41 |    * For ArrayRef2D, a single block of memory such as
 42 |    * `int* arr = new int[10 * 20]` is accessed as `ref[{row, col}]`.
 43 |    *
 44 |    * If you want to do x, y indexing, prefer `ref[{y, x}]` over `ref[{x, y}]`.
 45 |    * When accessed in this way, consecutive x values are placed together in
 46 |    * memory, which is usually what is expected.
 47 |    */
 48 |   const T &operator[](size_t(&&RowCol)[2]) const {
 49 |     return Ref[computeIndex(RowCol[0], RowCol[1], Rows, Columns)];
 50 |   }
 51 | 
 52 |   /**
 53 |    * \brief Recovers the underlying ArrayRef.
 54 |    */
 55 |   llvm::ArrayRef<T> underlyingData() const { return Ref; }
 56 | 
 57 | private:
 58 |   llvm::ArrayRef<T> Ref;
 59 |   size_t Rows;
 60 |   size_t Columns;
 61 | 
 62 |   static size_t computeIndex(size_t row, size_t col, size_t Rows,
 63 |                              size_t Columns) {
 64 |     assert(row < Rows && "Invalid row");
 65 |     assert(col < Columns && "Invalid column");
 66 |     size_t index = row * Columns + col;
 67 |     assert(index < Rows * Columns); // Should be redundant with prior asserts.
 68 |     return index;
 69 |   }
 70 | };
 71 | 
 72 | /**
 73 |  * \brief An ArrayRef2D which allows mutation.
 74 |  * \note Inherits from ArrayRef2D, allowing slicing from this type to
 75 |  * ArrayRef2D in the same manner as LLVM's ArrayRef and MutableArrayRef
 76 |  *
 77 |  * \see ArrayRef2D
 78 |  */
 79 | template <typename T> class MutableArrayRef2D : public ArrayRef2D<T> {
 80 | public:
 81 |   explicit MutableArrayRef2D(llvm::MutableArrayRef<T> Ref, size_t Rows,
 82 |                              size_t Columns)
 83 |       : ArrayRef2D<T>(Ref, Rows, Columns) {}
 84 | 
 85 |   /**
 86 |    * \brief Access an element at the specified row and columns. `[{row, col}]`
 87 |    * \returns a _mutable_ reference to the element at the specified location.
 88 |    */
 89 |   T &operator[](size_t(&&RowCol)[2]) const {
 90 |     ArrayRef2D<T> cref = *this;
 91 |     return const_cast<T &>(cref[{RowCol[0], RowCol[1]}]);
 92 |   }
 93 | 
 94 |   /**
 95 |    * \brief Recovers the underlying MutableArrayRef.
 96 |    */
 97 |   llvm::MutableArrayRef<T> underlyingData() const {
 98 |     return static_cast<const llvm::MutableArrayRef<T> &>(
 99 |         ArrayRef2D<T>::underlyingData());
100 |   }
101 | };
102 | } // namespace opt_sched
103 | } // namespace llvm
104 | 
105 | #endif
106 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/buffers.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 | Description:  Defines input buffering classes that can be used for opening,
  3 |               loading, buffering and parsing input files using system-level I/O,
  4 |               which relies on the programmer to do his own buffering, unlike the
  5 |               standard I/O which handles the buffering and hides it from the
  6 |               programmer.
  7 | Author:       Ghassan Shobaki
  8 | Created:      Oct. 1997
  9 | Last Update:  Mar. 2011
 10 | *******************************************************************************/
 11 | 
 12 | #ifndef OPTSCHED_GENERIC_BUFFERS_H
 13 | #define OPTSCHED_GENERIC_BUFFERS_H
 14 | 
 15 | #include "opt-sched/Scheduler/defines.h"
 16 | 
 17 | namespace llvm {
 18 | namespace opt_sched {
 19 | 
 20 | const int INBUF_MAX_PIECES_PERLINE = 30;
 21 | const int INBUF_MAX_LINESIZE = 10000;
 22 | const int DFLT_INPBUF_SIZE = 1000000;
 23 | 
 24 | // String buffer size limits for file/sample names.
 25 | const int MAX_NAMESIZE = 1000;
 26 | 
 27 | enum NXTLINE_TYPE { NXT_EOF, NXT_SPC, NXT_DATA, NXT_ERR };
 28 | 
 29 | // This is an input buffer class for loading, buffering and parsing an input
 30 | // file using system level I/O, where the application program is responsible for
 31 | // allocating an input buffer and loading the file into it in chunks the size of
 32 | // each chunk used in this class is determined by the DFLT_INPBUF_SIZE value
 33 | // defined above. The class provides methods for skipping white space and
 34 | // comments and reading one valid data line at a time.
 35 | // Lexing Assumptions:
 36 | //   - Files do not contain any invalid characters. So if a character is not a
 37 | //     control character (\r, \n, #, \t or space), it is a valid data character.
 38 | //   - Comments on data lines should be preceded by at least one space character
 39 | //   - All files are scanned linewise
 40 | class InputBuffer {
 41 | public:
 42 |   InputBuffer();
 43 |   ~InputBuffer();
 44 |   int Reload();
 45 |   void Clean();
 46 |   void Unload();
 47 |   char *GetBuf() { return buf; }
 48 |   const char *GetFullPath() const { return fullPath; }
 49 |   FUNC_RESULT Load(const char *const fileName, const char *const path,
 50 |                    long maxByts = DFLT_INPBUF_SIZE);
 51 |   FUNC_RESULT Load(const char *const fullPath, long maxByts = DFLT_INPBUF_SIZE);
 52 |   FUNC_RESULT SetBuf(char *buf, long size);
 53 | 
 54 |   // This function skips all comments and white spaces (tabs are not taken
 55 |   // into account), and does not return until it reaches a valid data line or
 56 |   // end of file. If at least one line starting with space is encountered on
 57 |   // the way, the return value will be NXT_SPC. It should always be called
 58 |   // when the current offset is at the first character of a line
 59 |   // (lineStrt==true).
 60 |   NXTLINE_TYPE skipSpaceAndCmnts();
 61 |   NXTLINE_TYPE GetNxtVldLine(int &pieceCnt, char *strngs[], int lngths[]);
 62 | 
 63 | protected:
 64 |   char *buf;
 65 | 
 66 |   long totSize,     // total size of the buffer
 67 |       loadedByts,   // number of bytes loaded
 68 |       crntOfst,     // current offset within the buffer
 69 |       lineEndOfst,  // the offset of the last LF or CR character seen
 70 |       crntLineOfst, // the offset of the current line
 71 |       crntLineNum;  // the current line number
 72 | 
 73 |   int fileHndl;
 74 |   char crntChar, prevChar;
 75 |   bool lastChnk, cmnt, lineStrt, nxtLineRchd;
 76 |   char fullPath[MAX_NAMESIZE];
 77 | 
 78 |   // Keeps going until it encounters a data character or a line start.
 79 |   int skipSpace();
 80 |   // Keeps going until it encounters a new line (assume no embedded comments).
 81 |   int skipCmnt();
 82 |   // Checks if reloading is necessary and does it or detects end of file.
 83 |   int chckReload();
 84 | 
 85 |   NXTLINE_TYPE GetNxtVldLine_(int &pieceCnt, char *str[], int lngth[],
 86 |                               int maxPieceCnt = INBUF_MAX_PIECES_PERLINE);
 87 |   bool IsWhiteSpaceOrLineEnd(char ch);
 88 |   void ReportError(char *msg, char *lineStrt, int frstLngth);
 89 |   void ReportFatalError(char *msg, char *lineStrt, int frstLngth);
 90 | };
 91 | 
 92 | // A specs buffer is an input buffer for parsing a typical input specification
 93 | // or configuration file whose format is line based, i.e., includes one spec
 94 | // or setting per line. This class includes one method for parsing one type
 95 | // of specs
 96 | class SpecsBuffer : public InputBuffer {
 97 | public:
 98 |   SpecsBuffer();
 99 |   explicit SpecsBuffer(char *buf, long size);
100 |   void ReadSpec(const char *const title, char *value);
101 |   void readLine(char *value, int maxPieceCnt);
102 |   void readLstElmnt(char *value);
103 |   int readIntLstElmnt();
104 |   bool ReadFlagSpec(const char *const title, bool dfltValue);
105 |   unsigned long ReadUlongSpec(const char *const title);
106 |   float ReadFloatSpec(const char *const title);
107 |   uint64_t readUInt64Spec(const char *const title);
108 |   int ReadIntSpec(const char *const title);
109 |   int16_t ReadShortSpec(const char *const title);
110 |   FUNC_RESULT checkTitle(const char *const title);
111 |   void ErrorHandle(char *value);
112 | 
113 | protected:
114 |   NXTLINE_TYPE nxtLineType;
115 |   void CombinePieces_(int lngths[], char *strngs[], int startPiece,
116 |                       int endPiece, char *target, int &totLngth);
117 | };
118 | 
119 | } // namespace opt_sched
120 | } // namespace llvm
121 | 
122 | #endif
123 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/config.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 | Description:  Provides an interface to read a configuration file. The format is
 3 |               trivial: each entry is a name and value, separated by whitespace.
 4 |               Multiple entries are also separated by whitespace (usually line
 5 |               breaks). Hash marks after whitespace cause the rest of the line to
 6 |               be ignored.
 7 | Author:       Max Shawabkeh
 8 | Created:      Mar. 2011
 9 | Last Update:  Mar. 2011
10 | *******************************************************************************/
11 | 
12 | #ifndef OPTSCHED_GENERIC_CONFIG_H
13 | #define OPTSCHED_GENERIC_CONFIG_H
14 | 
15 | #include "opt-sched/Scheduler/defines.h"
16 | #include <iostream>
17 | #include <list>
18 | #include <map>
19 | #include <string>
20 | 
21 | namespace llvm {
22 | namespace opt_sched {
23 | 
24 | using std::list;
25 | using std::string;
26 | 
27 | class Config {
28 | public:
29 |   // Loads settings from a configuration file.
30 |   void Load(const string &filepath);
31 |   void Load(std::istream &file);
32 |   // All these functions return the value of a setting record of the given
33 |   // name, with optional automatic parsing and defaults.
34 |   string GetString(const string &name) const;
35 |   string GetString(const string &name, const string &default_) const;
36 |   int64_t GetInt(const string &name) const;
37 |   int64_t GetInt(const string &name, int64_t default_) const;
38 |   float GetFloat(const string &name) const;
39 |   float GetFloat(const string &name, float default_) const;
40 |   bool GetBool(const string &name) const;
41 |   bool GetBool(const string &name, bool default_) const;
42 |   list<string> GetStringList(const string &name) const;
43 |   list<int64_t> GetIntList(const string &name) const;
44 |   list<float> GetFloatList(const string &name) const;
45 | 
46 | protected:
47 |   std::map<string, string> settings;
48 | };
49 | 
50 | class SchedulerOptions : public Config {
51 | public:
52 |   // Since the scheduler flags should only be loaded once we are safe
53 |   // implementing it as a singelton.
54 |   static SchedulerOptions &getInstance();
55 | 
56 |   // Make sure there is no way for a second config object to be accidentally
57 |   // created.
58 |   SchedulerOptions(const SchedulerOptions &) = delete;
59 |   void operator=(const SchedulerOptions &) = delete;
60 | 
61 | private:
62 |   SchedulerOptions() {}
63 | };
64 | 
65 | } // namespace opt_sched
66 | } // namespace llvm
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/defines.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 | Description:  Contains common includes, constants, typedefs and enums.
 3 | Author:       Ghassan Shobaki
 4 | Created:      Oct. 1997
 5 | Last Update:  Mar. 2011
 6 | *******************************************************************************/
 7 | 
 8 | #ifndef OPTSCHED_GENERIC_DEFINES_H
 9 | #define OPTSCHED_GENERIC_DEFINES_H
10 | 
11 | // Define basic constants like NULL.
12 | #include <cstddef>
13 | 
14 | // For integral types of specific byte length.
15 | // The new standard <cinttypes> is still not supported everywhere.
16 | #include <cassert>
17 | #include <stdint.h>
18 | 
19 | namespace llvm {
20 | namespace opt_sched {
21 | 
22 | // The standard time unit.
23 | typedef int64_t Milliseconds;
24 | 
25 | // Instruction count.
26 | typedef int InstCount;
27 | 
28 | // A generic sentinel value. Should be used with care.
29 | // TODO(max): Get rid of this in favor of type- or purpose-specific sentinels.
30 | const int INVALID_VALUE = -1;
31 | 
32 | // Possible function call outcomes.
33 | enum FUNC_RESULT {
34 |   // The function encountered an error.
35 |   RES_ERROR = -1,
36 |   // The function consciously failed.
37 |   RES_FAIL = 0,
38 |   // The function succeeded.
39 |   RES_SUCCESS = 1,
40 |   // The function reached the end of the resource (e.g. file) it operated on.
41 |   RES_END = 2,
42 |   // The function did not finish in the time allocated for it.
43 |   RES_TIMEOUT = 3
44 | };
45 | 
46 | // Which mechanism we are using to limit occupancy
47 | // Limiting occupancy has shown to improve exec perf
48 | // for some kernels
49 | enum OCC_LIMIT_TYPE {
50 |   // NONE
51 |   OLT_NONE,
52 |   // Value provided by sched.ini
53 |   OLT_VALUE,
54 |   // AMD's Heuristic
55 |   OLT_HEUR,
56 |   // Hardcoded File
57 |   OLT_FILE,
58 | };
59 | 
60 | } // namespace opt_sched
61 | } // namespace llvm
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/graph_trans.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 | Description:  Implement graph transformations to be applied before scheduling.
  3 | Author:       Austin Kerbow
  4 | Created:      June. 2017
  5 | Last Update:  June. 2017
  6 | *******************************************************************************/
  7 | 
  8 | #ifndef OPTSCHED_BASIC_GRAPH_TRANS_H
  9 | #define OPTSCHED_BASIC_GRAPH_TRANS_H
 10 | 
 11 | #include "opt-sched/Scheduler/data_dep.h"
 12 | #include "opt-sched/Scheduler/defines.h"
 13 | #include "opt-sched/Scheduler/lnkd_lst.h"
 14 | #include "opt-sched/Scheduler/sched_region.h"
 15 | #include <list>
 16 | #include <memory>
 17 | 
 18 | namespace llvm {
 19 | namespace opt_sched {
 20 | 
 21 | // A and B are independent iff there is no path between A and B (in either
 22 | // direction)
 23 | bool areNodesIndependent(const SchedInstruction *A, const SchedInstruction *B);
 24 | 
 25 | // Adds an edge (A --> B) to the graph, updating recursive neighbors.
 26 | // The type of the added edge is OTHER.
 27 | GraphEdge *addSuperiorEdge(DataDepGraph &DDG, SchedInstruction *A,
 28 |                            SchedInstruction *B, int latency = 0);
 29 | 
 30 | // An abstract graph transformation class.
 31 | class GraphTrans {
 32 | 
 33 | public:
 34 |   GraphTrans(DataDepGraph *dataDepGraph);
 35 |   virtual ~GraphTrans(){};
 36 | 
 37 |   virtual const char *Name() const = 0;
 38 | 
 39 |   // Apply the graph transformation to the DataDepGraph.
 40 |   virtual FUNC_RESULT ApplyTrans() = 0;
 41 | 
 42 |   void SetDataDepGraph(DataDepGraph *dataDepGraph);
 43 | 
 44 |   void SetSchedRegion(SchedRegion *schedRegion);
 45 | 
 46 |   void SetNumNodesInGraph(InstCount numNodesInGraph);
 47 | 
 48 | protected:
 49 |   DataDepGraph *GetDataDepGraph_() const;
 50 |   SchedRegion *GetSchedRegion_() const;
 51 |   InstCount GetNumNodesInGraph_() const;
 52 | 
 53 | private:
 54 |   // A pointer to the graph.
 55 |   DataDepGraph *dataDepGraph_;
 56 | 
 57 |   // A pointer to the scheduling region.
 58 |   SchedRegion *schedRegion_;
 59 | 
 60 |   // The total number of nodes in the graph.
 61 |   InstCount numNodesInGraph_;
 62 | };
 63 | 
 64 | inline DataDepGraph *GraphTrans::GetDataDepGraph_() const {
 65 |   return dataDepGraph_;
 66 | }
 67 | inline void GraphTrans::SetDataDepGraph(DataDepGraph *dataDepGraph) {
 68 |   dataDepGraph_ = dataDepGraph;
 69 | }
 70 | 
 71 | inline SchedRegion *GraphTrans::GetSchedRegion_() const { return schedRegion_; }
 72 | inline void GraphTrans::SetSchedRegion(SchedRegion *schedRegion) {
 73 |   schedRegion_ = schedRegion;
 74 | }
 75 | 
 76 | inline InstCount GraphTrans::GetNumNodesInGraph_() const {
 77 |   return numNodesInGraph_;
 78 | }
 79 | inline void GraphTrans::SetNumNodesInGraph(InstCount numNodesInGraph) {
 80 |   numNodesInGraph_ = numNodesInGraph;
 81 | }
 82 | 
 83 | // Node superiority graph transformation.
 84 | class StaticNodeSupTrans : public GraphTrans {
 85 | public:
 86 |   StaticNodeSupTrans(DataDepGraph *dataDepGraph, bool IsMultiPass);
 87 | 
 88 |   const char *Name() const override { return "rp.nodesup"; }
 89 | 
 90 |   FUNC_RESULT ApplyTrans() override;
 91 | 
 92 |   static bool isNodeSuperior(DataDepGraph &DDG, int A, int B);
 93 | 
 94 |   struct Statistics {
 95 |     int NumEdgesAdded = 0;
 96 |     int NumEdgesRemoved = 0;
 97 |   };
 98 |   static void removeRedundantEdges(DataDepGraph &DDG, int i, int j,
 99 |                                    Statistics &Stats);
100 | 
101 | private:
102 |   // Are multiple passes enabled.
103 |   bool IsMultiPass;
104 | 
105 |   // Return true if node A is superior to node B.
106 |   bool NodeIsSuperior_(SchedInstruction *nodeA, SchedInstruction *nodeB) {
107 |     return isNodeSuperior(*GetDataDepGraph_(), nodeA->GetNum(),
108 |                           nodeB->GetNum());
109 |   }
110 | 
111 |   // Check if there is superiority involving nodes A and B. If yes, choose which
112 |   // edge to add.
113 |   // Returns the added edge if added, else nullptr
114 |   GraphEdge *TryAddingSuperiorEdge_(SchedInstruction *nodeA,
115 |                                     SchedInstruction *nodeB);
116 | 
117 |   // Keep trying to find superior nodes until none can be found or there are no
118 |   // more independent nodes.
119 |   void nodeMultiPass_(
120 |       std::list<std::pair<SchedInstruction *, SchedInstruction *>>);
121 | };
122 | 
123 | } // namespace opt_sched
124 | } // namespace llvm
125 | 
126 | #endif
127 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/graph_trans_ilp.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPTSCHED_BASIC_GRAPH_TRANS_ILP_H
 2 | #define OPTSCHED_BASIC_GRAPH_TRANS_ILP_H
 3 | 
 4 | #include "opt-sched/Scheduler/array_ref2d.h"
 5 | #include "opt-sched/Scheduler/graph_trans.h"
 6 | #include "llvm/ADT/SmallPtrSet.h"
 7 | #include "llvm/ADT/SmallVector.h"
 8 | #include <memory>
 9 | 
10 | namespace llvm {
11 | namespace opt_sched {
12 | 
13 | // Node superiority ILP graph transformation.
14 | class StaticNodeSupILPTrans : public GraphTrans {
15 | public:
16 |   StaticNodeSupILPTrans(DataDepGraph *dataDepGraph);
17 | 
18 |   const char *Name() const override { return "ilp.nodesup"; }
19 | 
20 |   FUNC_RESULT ApplyTrans() override;
21 | 
22 |   struct Statistics {
23 |     int NumEdgesAdded = 0;
24 |     int NumResourceEdgesAdded = 0;
25 |     int NumEdgesRemoved = 0;
26 |   };
27 | 
28 |   struct Data {
29 |     DataDepGraph &DDG;
30 |     MutableArrayRef2D<int> DistanceTable;
31 |     MutableArrayRef2D<int> SuperiorArray;
32 |     llvm::SmallVectorImpl<std::pair<int, int>> &SuperiorNodesList;
33 |     llvm::SmallPtrSetImpl<GraphEdge *> &AddedEdges;
34 |     Statistics &Stats;
35 |   };
36 | 
37 |   static constexpr int SmallSize = 64;
38 | 
39 |   static llvm::SmallVector<int, SmallSize>
40 |   createDistanceTable(DataDepGraph &DDG);
41 | 
42 |   static llvm::SmallVector<int, SmallSize>
43 |   createSuperiorArray(DataDepGraph &DDG, ArrayRef2D<int> DistanceTable);
44 | 
45 |   static llvm::SmallVector<std::pair<int, int>, SmallSize>
46 |   createSuperiorNodesList(ArrayRef2D<int> SuperiorArray);
47 | 
48 |   class DataAlloc {
49 |     friend class StaticNodeSupILPTrans;
50 | 
51 |   public:
52 |     explicit DataAlloc(DataDepGraph &DDG);
53 |     Data &getData() { return *Data_; }
54 | 
55 |   public:
56 |     llvm::SmallVector<int, SmallSize> DistanceTable;
57 |     llvm::SmallVector<int, SmallSize> SuperiorArray;
58 |     llvm::SmallVector<std::pair<int, int>, SmallSize> SuperiorNodesList;
59 |     llvm::SmallPtrSet<GraphEdge *, 32> AddedEdges;
60 |     Statistics Stats = {};
61 | 
62 |   private:
63 |     std::unique_ptr<Data> Data_;
64 |   };
65 | 
66 |   static DataAlloc createData(DataDepGraph &DDG) { return DataAlloc(DDG); }
67 | 
68 |   static void setDistanceTable(Data &Data, int i, int j, int Val);
69 | 
70 |   static void updateDistanceTable(Data &Data, int i, int j);
71 | 
72 |   static void addZeroLatencyEdge(Data &Data, int i, int j);
73 | 
74 |   static void addNecessaryResourceEdges(DataDepGraph &DDG, int i, int j,
75 |                                         Statistics &Stats);
76 | 
77 |   static void addNecessaryResourceEdges(Data &Data, int i, int j) {
78 |     addNecessaryResourceEdges(Data.DDG, i, j, Data.Stats);
79 |   }
80 | 
81 |   static void removeRedundantEdges(DataDepGraph &DDG,
82 |                                    ArrayRef2D<int> DistanceTable, int i, int j,
83 |                                    Statistics &Stats);
84 | 
85 |   static void removeRedundantEdges(Data &Data, int i, int j) {
86 |     removeRedundantEdges(Data.DDG, Data.DistanceTable, i, j, Data.Stats);
87 |   }
88 | };
89 | 
90 | } // namespace opt_sched
91 | } // namespace llvm
92 | 
93 | #endif
94 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/graph_trans_ilp_occupancy_preserving.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 | Description:  Implement graph transformations to be applied before scheduling.
 3 | Author:       Justin Bassett
 4 | Created:      Aug. 2020
 5 | Last Update:  Aug. 2020
 6 | *******************************************************************************/
 7 | 
 8 | #ifndef OPTSCHED_BASIC_GRAPH_TRANS_ILP_OCCUPANCY_PRESERVING_H
 9 | #define OPTSCHED_BASIC_GRAPH_TRANS_ILP_OCCUPANCY_PRESERVING_H
10 | 
11 | #include "opt-sched/Scheduler/graph_trans.h"
12 | 
13 | namespace llvm {
14 | namespace opt_sched {
15 | 
16 | // Node superiority Occupancy preserving ILP graph transformation.
17 | class StaticNodeSupOccupancyPreservingILPTrans : public GraphTrans {
18 | public:
19 |   StaticNodeSupOccupancyPreservingILPTrans(DataDepGraph *dataDepGraph);
20 | 
21 |   const char *Name() const override {
22 |     return "occupancy-preserving-ilp.nodesup";
23 |   }
24 | 
25 |   FUNC_RESULT ApplyTrans() override;
26 | };
27 | 
28 | } // namespace opt_sched
29 | } // namespace llvm
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/hist_table.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 | Description:  Defines a history table class.
  3 | Author:       Ghassan Shobaki
  4 | Created:      Unknown
  5 | Last Update:  Mar. 2011
  6 | *******************************************************************************/
  7 | 
  8 | #ifndef OPTSCHED_ENUM_HIST_TABLE_H
  9 | #define OPTSCHED_ENUM_HIST_TABLE_H
 10 | 
 11 | #include "opt-sched/Scheduler/defines.h"
 12 | #include "opt-sched/Scheduler/enumerator.h"
 13 | #include "opt-sched/Scheduler/gen_sched.h"
 14 | #include "opt-sched/Scheduler/hash_table.h"
 15 | #include "opt-sched/Scheduler/mem_mngr.h"
 16 | #include <cstdio>
 17 | #include <iostream>
 18 | #include <limits>
 19 | #include <memory>
 20 | #include <vector>
 21 | 
 22 | namespace llvm {
 23 | namespace opt_sched {
 24 | 
 25 | class EnumTreeNode;
 26 | class Enumerator;
 27 | 
 28 | // The history version of a tree node to be kept in the history table
 29 | class HistEnumTreeNode {
 30 | public:
 31 |   HistEnumTreeNode();
 32 |   virtual ~HistEnumTreeNode();
 33 | 
 34 |   InstCount GetTime();
 35 |   void PrntPartialSched(std::ostream &out);
 36 |   bool CompPartialScheds(HistEnumTreeNode *othrHist);
 37 |   InstCount GetInstNum();
 38 |   bool IsPrdcsrViaStalls(HistEnumTreeNode *othrNode);
 39 |   HistEnumTreeNode *GetParent();
 40 |   void Clean();
 41 |   void ReplaceParent(HistEnumTreeNode *newParent);
 42 |   // Does the scheduled inst. list of this node match that of the given node
 43 |   bool DoesMatch(EnumTreeNode *node, Enumerator *enumrtr);
 44 |   // Is the sub-problem at this node dominated by the given node's?
 45 |   bool IsDominated(EnumTreeNode *node, Enumerator *enumrtr);
 46 |   // Does the sub-problem at this node dominate the given node's?
 47 |   virtual bool DoesDominate(EnumTreeNode *node, Enumerator *enumrtr);
 48 |   virtual void Construct(EnumTreeNode *node, bool isTemp);
 49 |   virtual void SetCostInfo(EnumTreeNode *node, bool isTemp,
 50 |                            Enumerator *enumrtr);
 51 |   const std::shared_ptr<std::vector<SchedInstruction *>> &GetSuffix() const;
 52 |   void
 53 |   SetSuffix(const std::shared_ptr<std::vector<SchedInstruction *>> &suffix);
 54 |   std::vector<InstCount> GetPrefix() const;
 55 | 
 56 |   inline int getInstNum() { return inst_->GetNum(); }
 57 | 
 58 | protected:
 59 |   HistEnumTreeNode *prevNode_;
 60 | 
 61 |   // The current time or position (or step number) in the scheduling process.
 62 |   // This is equal to the length of the path from the root node to this node.
 63 |   InstCount time_;
 64 | 
 65 |   SchedInstruction *inst_;
 66 | 
 67 | #ifdef IS_DEBUG
 68 |   bool isCnstrctd_;
 69 | #endif
 70 | 
 71 |   bool crntCycleBlkd_;
 72 |   ReserveSlot *rsrvSlots_;
 73 | 
 74 |   // (Chris)
 75 |   std::shared_ptr<std::vector<SchedInstruction *>> suffix_ = nullptr;
 76 | 
 77 |   InstCount SetLastInsts_(SchedInstruction *lastInsts[], InstCount thisTime,
 78 |                           InstCount minTimeToExmn);
 79 |   void SetInstsSchduld_(BitVector *instsSchduld);
 80 |   // Does this history node dominate the given node or history node?
 81 |   bool DoesDominate_(EnumTreeNode *node, HistEnumTreeNode *othrHstry,
 82 |                      ENUMTREE_NODEMODE mode, Enumerator *enumrtr,
 83 |                      InstCount shft);
 84 |   void SetLwrBounds_(InstCount lwrBounds[], SchedInstruction *lastInsts[],
 85 |                      InstCount thisTime, InstCount minTimeToExmn,
 86 |                      Enumerator *enumrtr);
 87 |   void CmputNxtAvlblCycles_(Enumerator *enumrtr, InstCount instsPerType[],
 88 |                             InstCount nxtAvlblCycles[]);
 89 | 
 90 |   virtual void Init_();
 91 |   void AllocLastInsts_(ArrayMemAlloc<SchedInstruction *> *lastInstsAlctr,
 92 |                        Enumerator *enumrtr);
 93 |   bool IsAbslutDmnnt_();
 94 |   InstCount GetMinTimeToExmn_(InstCount nodeTime, Enumerator *enumrtr);
 95 |   InstCount GetLwrBound_(SchedInstruction *inst, int16_t issuRate);
 96 |   void SetRsrvSlots_(EnumTreeNode *node);
 97 | };
 98 | 
 99 | class CostHistEnumTreeNode : public HistEnumTreeNode {
100 | public:
101 |   CostHistEnumTreeNode();
102 |   virtual ~CostHistEnumTreeNode();
103 | 
104 |   void Construct(EnumTreeNode *node, bool isTemp);
105 |   // Does the sub-problem at this node dominate the given node's?
106 |   bool DoesDominate(EnumTreeNode *node, Enumerator *enumrtr);
107 |   void SetCostInfo(EnumTreeNode *node, bool isTemp, Enumerator *enumrtr);
108 | 
109 | protected:
110 |   // Why do we need to copy this data from region->tree_node->hist_node
111 |   InstCount cost_;
112 |   InstCount peakSpillCost_;
113 |   InstCount spillCostSum_;
114 | 
115 |   // (Chris)
116 |   InstCount totalCost_ = -1;
117 |   InstCount partialCost_ = -1;
118 |   bool totalCostIsActualCost_ = false;
119 | 
120 |   InstCount TotalSpillCost_ = -1;
121 |   InstCount PartialSpillCost_ = -1;
122 |   InstCount SuffixRPCost;
123 | 
124 |   bool isLngthFsbl_;
125 | #ifdef IS_DEBUG
126 |   bool costInfoSet_;
127 | #endif
128 | 
129 |   bool chkCostDmntnForSinglePass(EnumTreeNode *node,
130 |                                  LengthCostEnumerator *enumrtr);
131 |   bool chkCostDmntnForTwoPass(EnumTreeNode *Node, LengthCostEnumerator *E);
132 |   bool ChkCostDmntn_(EnumTreeNode *node, LengthCostEnumerator *enumrtr,
133 |                      InstCount &maxShft);
134 |   virtual void Init_();
135 | };
136 | 
137 | } // namespace opt_sched
138 | } // namespace llvm
139 | 
140 | #endif
141 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/list_sched.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 | Description:  Defines a list scheduler, based on the defintions of a generic
 3 |               scheduler and a constrained scheduler in gen_sched.h, which
 4 |               collectively include the meat of the implementation.
 5 | Author:       Ghassan Shobaki
 6 | Created:      Apr. 2002
 7 | Last Update:  Sept. 2013
 8 | *******************************************************************************/
 9 | 
10 | #ifndef OPTSCHED_LIST_SCHED_LIST_SCHED_H
11 | #define OPTSCHED_LIST_SCHED_LIST_SCHED_H
12 | 
13 | #include "opt-sched/Scheduler/gen_sched.h"
14 | 
15 | namespace llvm {
16 | namespace opt_sched {
17 | 
18 | class ListScheduler : public ConstrainedScheduler {
19 | public:
20 |   // Creates a list scheduler for the given dependence graph, machine and
21 |   // schedule upper bound, using the specified heuristic.
22 |   ListScheduler(DataDepGraph *dataDepGraph, MachineModel *machMdl,
23 |                 InstCount schedUprBound, SchedPriorities prirts);
24 |   virtual ~ListScheduler();
25 | 
26 |   // Calculates the schedule and returns it in the passed argument.
27 |   FUNC_RESULT FindSchedule(InstSchedule *sched, SchedRegion *rgn);
28 | 
29 | protected:
30 |   bool isDynmcPrirty_;
31 |   // Adds the instructions that have just become ready at this cycle to the
32 |   // ready list.
33 |   void UpdtRdyLst_(InstCount cycleNum, int slotNum);
34 | 
35 |   // Check whether the next node ID instruction is ready -- used to collect
36 |   // scheduling stats for LLVM generating schedules
37 |   bool CheckForInst(int numToPick) const;
38 | 
39 |   // Pick next instruction to be scheduled. Returns NULL if no instructions are
40 |   // ready.
41 |   virtual SchedInstruction *PickInst() const;
42 | };
43 | 
44 | // Force the list scheduler to maintain the source ordering of the instructions
45 | // regardless of latency or machine model constraints.
46 | class SequentialListScheduler : public ListScheduler {
47 | public:
48 |   SequentialListScheduler(DataDepGraph *dataDepGraph, MachineModel *machMdl,
49 |                           InstCount schedUprBound, SchedPriorities prirts);
50 | 
51 | private:
52 |   // Does this instruction come next in the source ordering after all currently
53 |   // scheduled instructions, e.g. 0, 1, 2, 3, 4.
54 |   bool IsSequentialInstruction(const SchedInstruction *Inst) const;
55 | 
56 |   bool ChkInstLglty_(SchedInstruction *inst) const override;
57 | };
58 | 
59 | // A list scheduler that schedules the instruction with the top heuristic value
60 | // Unalike ListScheduler this class considers instructions that are ready
61 | // in terms of data dependencies, but not in terms of latencies.
62 | // If the instruction with the top heuristic is not ready in terms of latency
63 | // Then stalls will be inserted until it is ready
64 | class StallSchedulingListScheduler : public ListScheduler {
65 | public:
66 |   StallSchedulingListScheduler(DataDepGraph *dataDepGraph,
67 |                                MachineModel *machMdl, InstCount schedUprBound,
68 |                                SchedPriorities prirts);
69 | 
70 |   SchedInstruction *PickInst() const;
71 | };
72 | 
73 | } // namespace opt_sched
74 | } // namespace llvm
75 | 
76 | #endif
77 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/logger.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 | Description:  Implements a simple logger that writes out messages to a file or
  3 |               to a standard stream.
  4 | Author:       Max Shawabkeh
  5 | Created:      Mar. 2011
  6 | Last Update:  Mar. 2011
  7 | *******************************************************************************/
  8 | 
  9 | #ifndef OPTSCHED_GENERIC_LOGGER_H
 10 | #define OPTSCHED_GENERIC_LOGGER_H
 11 | 
 12 | #include "opt-sched/Scheduler/defines.h"
 13 | #include <array>
 14 | #include <cstddef>
 15 | #include <cstdint>
 16 | #include <iosfwd>
 17 | #include <type_traits>
 18 | #include <utility>
 19 | 
 20 | namespace llvm {
 21 | namespace opt_sched {
 22 | 
 23 | namespace Logger {
 24 | // Error severity levels.
 25 | enum LOG_LEVEL {
 26 |   // Fatal error. Exit program.
 27 |   FATAL = 1,
 28 |   // Non-fatal error. Program should continue.
 29 |   ERROR = 2,
 30 |   // Generic non-error logging message.
 31 |   INFO = 4,
 32 |   // A summary message that should be shown only in the summary log.
 33 |   SUMMARY = 8
 34 | };
 35 | 
 36 | // Directs all subsequent log output to the specified output stream. Defaults
 37 | // to the standard error stream if not set.
 38 | void SetLogStream(std::ostream &out);
 39 | std::ostream &GetLogStream();
 40 | 
 41 | // Output a log message of a given level, either with a timestamp or without.
 42 | // Expects a printf-style format string and a variable number of arguments to
 43 | // place into the string.
 44 | void Log(LOG_LEVEL level, bool timed, const char *format_string, ...);
 45 | 
 46 | // Registers a periodic logging function that will respond to being called at
 47 | // most every period milliseconds and act as a no-op until the period has
 48 | // passed. Note that time measuring is in process CPU time.
 49 | void RegisterPeriodicLogger(Milliseconds period, void (*callback)());
 50 | // Runs the previously registered logging function. If the period has not
 51 | // passed since the last call to PeriodicLog() or RegisterPeriodicLogger(),
 52 | // this acts as a no-op.
 53 | void PeriodicLog();
 54 | 
 55 | // Shortcuts for each logging level.
 56 | [[noreturn]] void Fatal(const char *format_string, ...);
 57 | void Error(const char *format_string, ...);
 58 | void Info(const char *format_string, ...);
 59 | void Summary(const char *format_string, ...);
 60 | 
 61 | namespace detail {
 62 | // TODO: When we get C++17, get rid of EventAttrType and EventAttrValue in favor
 63 | // of a std::variant.
 64 | 
 65 | /** Encodes the type of an Event attribute */
 66 | enum class EventAttrType {
 67 |   Int64,
 68 |   UInt64,
 69 |   CStr,
 70 |   Bool,
 71 | };
 72 | 
 73 | /* Gets the type of the argument */
 74 | inline EventAttrType GetEventAttrType(const char *) {
 75 |   return EventAttrType::CStr;
 76 | }
 77 | 
 78 | inline EventAttrType GetEventAttrType(bool) { return EventAttrType::Bool; }
 79 | 
 80 | template <typename Int,
 81 |           typename std::enable_if<std::is_integral<Int>::value, int>::type = 0>
 82 | inline EventAttrType GetEventAttrType(Int) {
 83 |   // Treat anything which is not a uint64_t as an int64_t.
 84 |   // This may aid branch prediction in the implementation.
 85 |   return (std::is_signed<Int>::value || sizeof(Int) < sizeof(int64_t))
 86 |              ? EventAttrType::Int64
 87 |              : EventAttrType::UInt64;
 88 | }
 89 | 
 90 | /** Encodes the value of an Event attribute. */
 91 | union EventAttrValue {
 92 |   int64_t i64;
 93 |   uint64_t u64;
 94 |   const char *cstr;
 95 |   bool b;
 96 | 
 97 |   EventAttrValue(const char *val) : cstr{val} {}
 98 |   EventAttrValue(bool val) : b{val} {}
 99 | 
100 |   template <typename Int, typename std::enable_if<std::is_integral<Int>::value,
101 |                                                   int>::type = 0>
102 |   EventAttrValue(Int val) {
103 |     if (std::is_signed<Int>::value || sizeof(Int) < sizeof(int64_t)) {
104 |       i64 = val;
105 |     } else {
106 |       u64 = val;
107 |     }
108 |   }
109 | };
110 | 
111 | /** The implementation of Logger::Event(...) */
112 | void Event(const std::pair<EventAttrType, EventAttrValue> *attrs,
113 |            size_t numAttrs);
114 | } // namespace detail
115 | 
116 | /**
117 |  * \brief Logs an event in a json format.
118 |  * \detail
119 |  *
120 |  * ``Logger::Event(eventID, [key, value]...)``
121 |  *
122 |  * Logs messages of the format `EVENT: {"event_id": eventID, "key": value...}`,
123 |  * allowing for easier parsing by tools later down the line. The current time is
124 |  * always included.
125 |  *
126 |  * \param eventID a unique ID identifying this event. This should match the
127 |  * regular expression `[A-Z0-9_]+`. That is, this should contain no spaces.
128 |  *
129 |  * \param args An alternating list of keys and values.
130 |  *
131 |  * \warning Any change to a log statement of this format requires a change in
132 |  * our log-parsing scripts.
133 |  */
134 | template <typename... Args>
135 | void Event(const char *eventID, const Args &... args) {
136 |   static_assert(sizeof...(args) % 2 == 0,
137 |                 "Every key must have a corresponding value.");
138 | 
139 |   using EventItem = std::pair<detail::EventAttrType, detail::EventAttrValue>;
140 | 
141 |   std::array<EventItem, sizeof...(args) + 2> arr{
142 |       EventItem(detail::EventAttrType::CStr,
143 |                 detail::EventAttrValue("event_id")),
144 |       EventItem(detail::EventAttrType::CStr, detail::EventAttrValue(eventID)),
145 |       EventItem(detail::GetEventAttrType(args),
146 |                 detail::EventAttrValue(args))...,
147 |   };
148 | 
149 |   detail::Event(arr.data(), arr.size());
150 | }
151 | 
152 | } // namespace Logger
153 | 
154 | } // namespace opt_sched
155 | } // namespace llvm
156 | 
157 | #endif
158 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/mem_mngr.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 | Description:  Implements application-level memory management used avoid the OS
  3 |               overhead in performance-critical sections of the code.
  4 | Author:       Ghassan Shobaki
  5 | Created:      Mar. 2003
  6 | Last Update:  Mar. 2011
  7 | *******************************************************************************/
  8 | 
  9 | #ifndef OPTSCHED_GENERIC_MEM_MNGR_H
 10 | #define OPTSCHED_GENERIC_MEM_MNGR_H
 11 | 
 12 | #include "opt-sched/Scheduler/defines.h"
 13 | #include "opt-sched/Scheduler/lnkd_lst.h"
 14 | #include "opt-sched/Scheduler/logger.h"
 15 | #include <cstring>
 16 | 
 17 | namespace llvm {
 18 | namespace opt_sched {
 19 | 
 20 | template <class T> class MemAlloc {
 21 | public:
 22 |   // Allocates a new memory block of an initial size with an optional maximum
 23 |   // size. If no maximum size is specified, the memory is allocated
 24 |   // dynamically. The size is in the number of objects of type T.
 25 |   inline MemAlloc(int blockSize, int maxSize = INVALID_VALUE);
 26 |   // Deallocates the memory.
 27 |   inline ~MemAlloc();
 28 |   // Marks all allocated memory as unused (and available for reuse).
 29 |   inline void Reset();
 30 |   // Returns an allocated object.
 31 |   inline T *GetObject();
 32 |   // Frees an object and recycles it for future use.
 33 |   inline void FreeObject(T *obj);
 34 | 
 35 | protected:
 36 |   // The number of objects in each memory block allocated.
 37 |   int blockSize_;
 38 |   // The maximum number of objects to keep allocated.
 39 |   int maxSize_;
 40 |   // A buffer including an allocated block of objects
 41 |   T *currentBlock_;
 42 |   // The index of the next available object in the current block.
 43 |   int currentIndex_;
 44 |   // A linked list of previously allocated and fully used blocks.
 45 |   LinkedList<T> allocatedBlocks_;
 46 |   // A linked list of free objects available for reuse.
 47 |   Stack<T> availableObjects_;
 48 |   // Whether an of the already allocated blocks are still unused.
 49 |   bool allocatedBlocksAvailable_;
 50 | 
 51 |   // Makes sure currentBlock_ points to an unused block, allocating a new one
 52 |   // if needed.
 53 |   inline void GetNewBlock_();
 54 |   // Allocates a new block.
 55 |   inline void AllocNewBlock_();
 56 |   // Returns a pointer to an array of count unused objects.
 57 |   inline T *GetObjects_(int count);
 58 | };
 59 | 
 60 | template <class T> class ArrayMemAlloc : public MemAlloc<T> {
 61 | public:
 62 |   // Allocates a memory block that contains arraysPerBlock arrays, each
 63 |   // containing arraySize elements of type T.
 64 |   inline ArrayMemAlloc(int arraysPerBlock, int arraySize)
 65 |       : MemAlloc<T>(arraysPerBlock * arraySize) {
 66 |     arraySize_ = arraySize;
 67 |   }
 68 |   // Returns an allocated array of objects.
 69 |   inline T *GetArray() { return MemAlloc<T>::GetObjects_(arraySize_); }
 70 |   // Frees an array of objects and recycle it for future use.
 71 |   inline void FreeArray(T *array) { FreeObject(array); }
 72 | 
 73 | protected:
 74 |   // The size of each array.
 75 |   int arraySize_;
 76 | };
 77 | 
 78 | template <class T>
 79 | inline MemAlloc<T>::MemAlloc(int blockSize, int maxSize)
 80 |     : availableObjects_(maxSize) {
 81 |   assert(maxSize == INVALID_VALUE || blockSize <= maxSize);
 82 |   blockSize_ = blockSize;
 83 |   maxSize_ = maxSize;
 84 |   currentIndex_ = 0;
 85 |   currentBlock_ = NULL;
 86 |   allocatedBlocksAvailable_ = false;
 87 |   GetNewBlock_();
 88 | }
 89 | 
 90 | template <class T> inline MemAlloc<T>::~MemAlloc() {
 91 |   for (T *blk = allocatedBlocks_.GetFrstElmnt(); blk != NULL;
 92 |        blk = allocatedBlocks_.GetNxtElmnt()) {
 93 |     delete[] blk;
 94 |   }
 95 | }
 96 | 
 97 | template <class T> inline void MemAlloc<T>::Reset() {
 98 |   assert(allocatedBlocks_.GetElmntCnt() >= 1);
 99 |   currentBlock_ = allocatedBlocks_.GetFrstElmnt();
100 |   currentIndex_ = 0;
101 |   availableObjects_.Reset();
102 |   allocatedBlocksAvailable_ = true;
103 | }
104 | 
105 | template <class T> inline void MemAlloc<T>::GetNewBlock_() {
106 |   currentBlock_ = NULL;
107 | 
108 |   if (allocatedBlocksAvailable_) {
109 |     currentBlock_ = allocatedBlocks_.GetNxtElmnt();
110 |     currentIndex_ = 0;
111 |   }
112 | 
113 |   if (currentBlock_ == NULL) {
114 |     allocatedBlocksAvailable_ = false;
115 |     AllocNewBlock_();
116 |   }
117 | }
118 | 
119 | template <class T> inline void MemAlloc<T>::AllocNewBlock_() {
120 |   T *blk = new T[blockSize_];
121 |   allocatedBlocks_.InsrtElmnt(blk);
122 |   currentIndex_ = 0;
123 |   currentBlock_ = blk;
124 | }
125 | 
126 | template <class T> inline T *MemAlloc<T>::GetObjects_(int count) {
127 |   T *obj = availableObjects_.ExtractElmnt();
128 | 
129 |   if (obj == NULL) {
130 |     // If there are no recycled objects available for reuse.
131 |     assert(currentIndex_ <= blockSize_);
132 | 
133 |     if (currentIndex_ == blockSize_) {
134 |       // If the current block is all used up.
135 |       assert(maxSize_ == INVALID_VALUE);
136 |       GetNewBlock_();
137 |       assert(currentIndex_ == 0);
138 |     }
139 | 
140 |     obj = currentBlock_ + currentIndex_;
141 |     currentIndex_ += count;
142 |   }
143 | 
144 |   assert(obj != NULL);
145 |   return obj;
146 | }
147 | 
148 | template <class T> inline T *MemAlloc<T>::GetObject() { return GetObjects_(1); }
149 | 
150 | template <class T> inline void MemAlloc<T>::FreeObject(T *obj) {
151 |   availableObjects_.InsrtElmnt(obj);
152 | }
153 | 
154 | } // namespace opt_sched
155 | } // namespace llvm
156 | 
157 | #endif
158 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/random.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 | Description:  Implements the Kahan's random number generator, with a period of
 3 |               2 ** 40.
 4 | Author:       Ghassan Shobaki
 5 | Created:      Unknown
 6 | Last Update:  Mar. 2011
 7 | *******************************************************************************/
 8 | 
 9 | #ifndef OPTSCHED_GENERIC_RANDOM_H
10 | #define OPTSCHED_GENERIC_RANDOM_H
11 | 
12 | #include "opt-sched/Scheduler/defines.h"
13 | 
14 | namespace llvm {
15 | namespace opt_sched {
16 | 
17 | namespace RandomGen {
18 | // Initialize the random number generator with a seed.
19 | void SetSeed(int32_t iseed);
20 | // Get a random 32-bit value.
21 | uint32_t GetRand32();
22 | // Get a random 32-bit value within a given range, inclusive.
23 | uint32_t GetRand32WithinRange(uint32_t min, uint32_t max);
24 | // Get a random 64-bit value.
25 | uint64_t GetRand64();
26 | // Fill a buffer with a specified number of random bits, rounded to the
27 | // nearest byte boundary.
28 | void GetRandBits(uint16_t bitCnt, unsigned char *dest);
29 | } // namespace RandomGen
30 | 
31 | } // namespace opt_sched
32 | } // namespace llvm
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/ready_list.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 | Description:  Defines a ReadyList class, which is one of the main data
  3 |               structures that an instruction scheduler needs. The ready list is
  4 |               a sorted list of instructions whose data dependencies have been
  5 |               satisfied (their predecessors in the data dependence graph have
  6 |               been scheduled).
  7 | Author:       Ghassan Shobaki
  8 | Created:      Apr. 2002
  9 | Last Update:  Sept. 2013
 10 | *******************************************************************************/
 11 | 
 12 | #ifndef OPTSCHED_BASIC_READY_LIST_H
 13 | #define OPTSCHED_BASIC_READY_LIST_H
 14 | 
 15 | #include "opt-sched/Scheduler/defines.h"
 16 | #include "opt-sched/Scheduler/lnkd_lst.h"
 17 | #include "opt-sched/Scheduler/sched_basic_data.h"
 18 | #include "llvm/ADT/SmallVector.h"
 19 | #include <cstdio>
 20 | 
 21 | namespace llvm {
 22 | namespace opt_sched {
 23 | 
 24 | // A priority list of instruction that are ready to schedule at a given point
 25 | // during the scheduling process.
 26 | class ReadyList {
 27 | public:
 28 |   // Constructs a ready list for the specified dependence graph with the
 29 |   // specified priorities.
 30 |   ReadyList(DataDepGraph *dataDepGraph, SchedPriorities prirts);
 31 |   // Destroys the ready list and deallocates the memory used by it.
 32 |   ~ReadyList();
 33 | 
 34 |   // Resets the list and removes all elements from it.
 35 |   void Reset();
 36 | 
 37 |   // Adds an instruction to the ready list.
 38 |   void AddInst(SchedInstruction *inst);
 39 | 
 40 |   // Adds a list of instructions to the ready list.
 41 |   void AddList(LinkedList<SchedInstruction> *lst);
 42 | 
 43 |   // An iterator that allows accessing the instructions at the current time
 44 |   // in priority order. The first call will return the top priority
 45 |   // instruction, the next will return the instruction with the second rank,
 46 |   // and so on.
 47 |   SchedInstruction *GetNextPriorityInst();
 48 |   SchedInstruction *GetNextPriorityInst(unsigned long &key);
 49 | 
 50 |   // Removes the instruction returned by the last call to
 51 |   // GetNextPriorityInst().
 52 |   void RemoveNextPriorityInst();
 53 | 
 54 |   // Returns the number of instructions currently in the list.
 55 |   InstCount GetInstCnt() const;
 56 | 
 57 |   // Resets the list iterator to point back to the first instruction.
 58 |   void ResetIterator();
 59 | 
 60 |   // Adds instructions at the bottoms of the given two lists which have
 61 |   // not been added to the ready list already, and advance the internal time.
 62 |   // TODO(max): Elaborate.
 63 |   void AddLatestSubLists(LinkedList<SchedInstruction> *lst1,
 64 |                          LinkedList<SchedInstruction> *lst2);
 65 | 
 66 |   // Removes the most recently added sublist of instructions.
 67 |   // TODO(max): Elaborate.
 68 |   void RemoveLatestSubList();
 69 | 
 70 |   // Copies this list to another. Both lists must be empty.
 71 |   void CopyList(ReadyList *otherLst);
 72 | 
 73 |   // Searches the list for an instruction, returning whether it has been found
 74 |   // or not and writing the number of times it was found into hitCnt.
 75 |   bool FindInst(SchedInstruction *inst, int &hitCnt);
 76 | 
 77 |   // Update instruction priorities within the list
 78 |   // Called only if the priorities change dynamically during scheduling
 79 |   void UpdatePriorities();
 80 | 
 81 |   unsigned long MaxPriority();
 82 | 
 83 |   // Prints out the ready list, nicely formatted, into an output stream.
 84 |   void Print(std::ostream &out);
 85 | 
 86 |   // Constructs the priority-list key based on the schemes listed in prirts_.
 87 |   unsigned long CmputKey_(SchedInstruction *inst, bool isUpdate, bool &changed);
 88 | 
 89 |   template <typename InstructionVisitor>
 90 |   void ForEachReadyInstruction(InstructionVisitor &&visitor) const {
 91 |     for (const SchedInstruction &Inst : prirtyLst_) {
 92 |       visitor(Inst);
 93 |     }
 94 |   }
 95 | 
 96 | private:
 97 |   // An ordered vector of priorities
 98 |   SchedPriorities prirts_;
 99 | 
100 |   // The priority list containing the actual instructions.
101 |   PriorityList<SchedInstruction> prirtyLst_;
102 | 
103 |   // TODO(max): Document.
104 |   LinkedList<SchedInstruction> latestSubLst_;
105 | 
106 |   // Array of pointers to KeyedEntry objects
107 |   llvm::SmallVector<KeyedEntry<SchedInstruction, unsigned long> *, 0>
108 |       keyedEntries_;
109 | 
110 |   // Is there a priority scheme that needs to be changed dynamically
111 |   //    bool isDynmcPrirty_;
112 | 
113 |   // The maximum values for each part of the priority key.
114 |   InstCount maxUseCnt_;
115 |   InstCount maxCrtclPath_;
116 |   InstCount maxScsrCnt_;
117 |   InstCount maxLtncySum_;
118 |   InstCount maxNodeID_;
119 |   InstCount maxInptSchedOrder_;
120 | 
121 |   unsigned long maxPriority_;
122 | 
123 |   // The number of bits for each part of the priority key.
124 |   int16_t useCntBits_;
125 |   int16_t crtclPathBits_;
126 |   int16_t scsrCntBits_;
127 |   int16_t ltncySumBits_;
128 |   int16_t nodeID_Bits_;
129 |   int16_t inptSchedOrderBits_;
130 | 
131 |   // Adds instructions at the bottom of a given list which have not been added
132 |   // to the ready list already.
133 |   void AddLatestSubList_(LinkedList<SchedInstruction> *lst);
134 | 
135 |   // Calculates a new priority key given an existing key of size keySize by
136 |   // appending bitCnt bits holding the value val, assuming val < maxVal.
137 |   static void AddPrirtyToKey_(unsigned long &key, int16_t &keySize,
138 |                               int16_t bitCnt, unsigned long val,
139 |                               unsigned long maxVal);
140 | };
141 | 
142 | } // namespace opt_sched
143 | } // namespace llvm
144 | 
145 | #endif
146 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/reg_alloc.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 | Description:  Defines register allocation classes. By looking at the effect
 3 |               of scheduling decisions on the number of spills added during
 4 | simulated register allocation, we can evaluate the performance of the scheduler.
 5 | 
 6 | Author:       Austin Kerbow
 7 | Created:      Oct. 2017
 8 | Last Update:  Oct. 2017
 9 | *******************************************************************************/
10 | #ifndef OPTSCHED_BASIC_REG_ALLOC_H
11 | #define OPTSCHED_BASIC_REG_ALLOC_H
12 | 
13 | #include "opt-sched/Scheduler/data_dep.h"
14 | #include <map>
15 | #include <queue>
16 | #include <stack>
17 | #include <vector>
18 | 
19 | namespace llvm {
20 | namespace opt_sched {
21 | 
22 | using namespace std;
23 | 
24 | /**
25 |  * Class for performing basic top-down register allocation.
26 |  */
27 | class LocalRegAlloc {
28 | public:
29 |   typedef struct RegMap {
30 |     // A queue of instruction numbers that this virtual register is used in.
31 |     queue<int> nextUses;
32 |     // Do we need to spill this virtual register.
33 |     bool isDirty;
34 |     // The physical register that this virtual register is mapped to. If this
35 |     // virtual register is not mapped to a physical register, set to -1.
36 |     int assignedReg;
37 |   } RegMap;
38 | 
39 |   LocalRegAlloc(InstSchedule *instSchedule, DataDepGraph *dataDepGraph);
40 |   virtual ~LocalRegAlloc();
41 |   // Try to allocate registers in the region and count the number of spills
42 |   // added.
43 |   virtual void AllocRegs();
44 |   // Initialize data for register allocation.
45 |   virtual void SetupForRegAlloc();
46 |   // Print information about the amount of spilling in the region after register
47 |   // allocation.
48 |   virtual void PrintSpillInfo(const char *dagName);
49 |   // Return the spill cost of region after register allocation.
50 |   virtual int GetCost() const;
51 |   // Return the number of loads
52 |   int GetNumLoads() const { return numLoads_; }
53 |   // Return the number of stores
54 |   int GetNumStores() const { return numStores_; }
55 | 
56 | private:
57 |   InstSchedule *instSchedule_;
58 |   DataDepGraph *dataDepGraph_;
59 |   int numLoads_;
60 |   int numStores_;
61 |   int numRegTypes_;
62 |   // For each register type, there is a stack that tracks free physical
63 |   // registers.
64 |   vector<stack<int>> freeRegs_;
65 |   // For each virtual register, track the next use and the currently assigned
66 |   // physical register.
67 |   vector<map<int, RegMap>> regMaps_;
68 |   // For each register type, we have a list of physical registers and the
69 |   // current virtual register that is loaded. If the regsiter is free, set to
70 |   // -1.
71 |   vector<vector<int>> physRegs_;
72 | 
73 |   // Find all instructions that use each register.
74 |   void ScanUses_();
75 |   void AllocateReg_(int16_t regType, int virtRegNum);
76 |   // Find a candidate physical register to spill.
77 |   int FindSpillCand_(std::map<int, RegMap> &regMaps, vector<int> &physRegs);
78 |   // Load live-in virtual registers. Live-in registers are defined by the
79 |   // artificial entry instruction.
80 |   void AddLiveIn_(SchedInstruction *artificialEntry);
81 |   // Spill all dirty registers.
82 |   void SpillAll_();
83 | };
84 | 
85 | } // namespace opt_sched
86 | } // namespace llvm
87 | 
88 | #endif
89 | 


--------------------------------------------------------------------------------
/include/opt-sched/Scheduler/utilities.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 | Description:  Contains a few generic utility functions.
 3 | Author:       Ghassan Shobaki
 4 | Created:      Oct. 1997
 5 | Last Update:  Mar. 2017
 6 | *******************************************************************************/
 7 | 
 8 | #ifndef OPTSCHED_GENERIC_UTILITIES_H
 9 | #define OPTSCHED_GENERIC_UTILITIES_H
10 | 
11 | #include "opt-sched/Scheduler/defines.h"
12 | #include <chrono>
13 | 
14 | namespace llvm {
15 | namespace opt_sched {
16 | 
17 | namespace Utilities {
18 | // Calculates the minimum number of bits that can hold a given integer value.
19 | uint16_t clcltBitsNeededToHoldNum(uint64_t value);
20 | // Returns the time that has passed since the start of the process, in
21 | // milliseconds.
22 | Milliseconds GetProcessorTime();
23 | // Returns a reference to an object that is supposed to initialized with the
24 | // start time of the process
25 | extern std::chrono::steady_clock::time_point startTime;
26 | 
27 | // Executes the function, returning the number of milliseconds it took to do so.
28 | template <typename F> Milliseconds countMillisToExecute(F &&fn) {
29 |   const Milliseconds Start = GetProcessorTime();
30 |   fn();
31 |   return GetProcessorTime() - Start;
32 | }
33 | } // namespace Utilities
34 | 
35 | inline uint16_t Utilities::clcltBitsNeededToHoldNum(uint64_t value) {
36 |   uint16_t bitsNeeded = 0;
37 | 
38 |   while (value) {
39 |     value >>= 1;
40 |     bitsNeeded++;
41 |   }
42 |   return bitsNeeded;
43 | }
44 | 
45 | inline Milliseconds Utilities::GetProcessorTime() {
46 |   auto currentTime = std::chrono::steady_clock::now();
47 |   std::chrono::duration<double, std::milli> elapsed = currentTime - startTime;
48 |   return elapsed.count();
49 | }
50 | 
51 | } // namespace opt_sched
52 | } // namespace llvm
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/lib/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(OPTSCHED_SRCS
 2 |   Scheduler/aco.cpp
 3 |   Scheduler/bb_spill.cpp
 4 |   Scheduler/buffers.cpp
 5 |   Scheduler/config.cpp
 6 |   Scheduler/data_dep.cpp
 7 |   Scheduler/enumerator.cpp
 8 |   Scheduler/gen_sched.cpp
 9 |   Scheduler/graph.cpp
10 |   Scheduler/graph_trans.cpp
11 |   Scheduler/graph_trans_ilp.cpp
12 |   Scheduler/graph_trans_ilp_occupancy_preserving.cpp
13 |   Scheduler/hist_table.cpp
14 |   Scheduler/list_sched.cpp
15 |   Scheduler/logger.cpp
16 |   Scheduler/reg_alloc.cpp
17 |   Scheduler/utilities.cpp
18 |   Scheduler/machine_model.cpp
19 |   Scheduler/random.cpp
20 |   Scheduler/ready_list.cpp
21 |   Scheduler/register.cpp
22 |   Scheduler/relaxed_sched.cpp
23 |   Scheduler/sched_basic_data.cpp
24 |   Scheduler/sched_region.cpp
25 |   Scheduler/stats.cpp
26 |   Wrapper/OptimizingScheduler.cpp
27 |   Wrapper/OptSchedMachineWrapper.cpp
28 |   Wrapper/OptSchedDDGWrapperBasic.cpp
29 |   Wrapper/OptSchedGenericTarget.cpp
30 | )
31 | 
32 | set(OPTSCHED_TARGET_DEPS "")
33 | 
34 | if(OPTSCHED_ENABLE_AMDGPU)
35 |   list(APPEND OPTSCHED_SRCS
36 |     Wrapper/AMDGPU/GCNOptSched.cpp
37 |     Wrapper/AMDGPU/OptSchedGCNTarget.cpp
38 |     Wrapper/AMDGPU/OptSchedDDGWrapperGCN.cpp
39 |   )
40 |   if(TARGET LLVMAMDGPUCodeGen)
41 |     list(APPEND OPTSCHED_TARGET_DEPS AMDGPUCommonTableGen)
42 |   endif()
43 | endif()
44 | 
45 | add_llvm_target(OptSched
46 |   STATIC
47 |   ${OPTSCHED_SRCS}
48 | )
49 | add_dependencies(LLVMOptSched ${OPTSCHED_TARGET_DEPS})
50 | #add_definitions(${OPTSCHED_EXTRA_DEFINITIONS})
51 | 


--------------------------------------------------------------------------------
/lib/Scheduler/config.cpp:
--------------------------------------------------------------------------------
  1 | #include "opt-sched/Scheduler/config.h"
  2 | #include "opt-sched/Scheduler/logger.h"
  3 | #include "llvm/Support/ErrorHandling.h"
  4 | #include <fstream>
  5 | #include <llvm/ADT/StringRef.h>
  6 | #include <sstream>
  7 | 
  8 | using namespace llvm::opt_sched;
  9 | 
 10 | using std::istringstream;
 11 | 
 12 | template <class T> T Convert(const string &value) {
 13 |   istringstream ss(value);
 14 |   T number = 0;
 15 |   ss >> number;
 16 |   assert(!ss.fail());
 17 |   return number;
 18 | }
 19 | 
 20 | template <class T> list<T> Split(const string &value) {
 21 |   list<T> values;
 22 |   if (value == "")
 23 |     return values;
 24 | 
 25 |   istringstream ss(value);
 26 |   while (ss) {
 27 |     T item;
 28 |     char delimiter;
 29 |     ss >> item;
 30 |     assert(!ss.fail());
 31 |     ss >> delimiter;
 32 |     assert(ss.fail() || delimiter == ',');
 33 |     values.push_back(item);
 34 |   }
 35 | 
 36 |   return values;
 37 | }
 38 | 
 39 | void Config::Load(const string &filepath) {
 40 |   std::ifstream file(filepath.c_str());
 41 |   Load(file);
 42 | }
 43 | 
 44 | void Config::Load(std::istream &file) {
 45 |   settings.clear();
 46 |   while (!file.eof()) {
 47 |     string name, value, comment;
 48 |     file >> name;
 49 |     while (!file.fail() && name.size() && name[0] == '#') {
 50 |       std::getline(file, comment);
 51 |       file >> name;
 52 |     }
 53 |     file >> value;
 54 |     while (!file.fail() && value.size() && value[0] == '#') {
 55 |       std::getline(file, comment);
 56 |       file >> value;
 57 |     }
 58 |     if (file.fail() || name == "" || value == "")
 59 |       break;
 60 |     settings[name] = value;
 61 |   }
 62 | }
 63 | 
 64 | string Config::GetString(const string &name) const {
 65 |   std::map<string, string>::const_iterator it = settings.find(name);
 66 |   if (it == settings.end()) {
 67 |     llvm::report_fatal_error(
 68 |         llvm::StringRef("No value found for setting " + name), false);
 69 |     return "";
 70 |   } else {
 71 |     return it->second;
 72 |   }
 73 | }
 74 | 
 75 | string Config::GetString(const string &name, const string &default_) const {
 76 |   std::map<string, string>::const_iterator it = settings.find(name);
 77 |   if (it == settings.end()) {
 78 |     return default_;
 79 |   } else {
 80 |     return it->second;
 81 |   }
 82 | }
 83 | 
 84 | int64_t Config::GetInt(const string &name) const {
 85 |   return Convert<int64_t>(GetString(name));
 86 | }
 87 | 
 88 | int64_t Config::GetInt(const string &name, int64_t default_) const {
 89 |   if (settings.find(name) == settings.end()) {
 90 |     return default_;
 91 |   } else {
 92 |     return GetInt(name);
 93 |   }
 94 | }
 95 | 
 96 | float Config::GetFloat(const string &name) const {
 97 |   return Convert<float>(GetString(name));
 98 | }
 99 | 
100 | float Config::GetFloat(const string &name, float default_) const {
101 |   if (settings.find(name) == settings.end()) {
102 |     return default_;
103 |   } else {
104 |     return GetFloat(name);
105 |   }
106 | }
107 | 
108 | bool Config::GetBool(const string &name) const {
109 |   string value = GetString(name);
110 |   if (value == "YES" || value == "yes" || value == "1" || value == "TRUE" ||
111 |       value == "true") {
112 |     return true;
113 |   } else {
114 |     assert(value == "NO" || value == "no" || value == "0" || value == "FALSE" ||
115 |            value == "false");
116 |     return false;
117 |   }
118 | }
119 | 
120 | bool Config::GetBool(const string &name, bool default_) const {
121 |   if (settings.find(name) == settings.end()) {
122 |     return default_;
123 |   } else {
124 |     return GetBool(name);
125 |   }
126 | }
127 | 
128 | list<string> Config::GetStringList(const string &name) const {
129 |   list<string> values;
130 |   string line = GetString(name, "");
131 |   if (line == "")
132 |     return values;
133 | 
134 |   istringstream ss(line);
135 |   string item;
136 | 
137 |   while (std::getline(ss, item, ',')) {
138 |     values.push_back(item);
139 |   }
140 | 
141 |   return values;
142 | }
143 | 
144 | list<int64_t> Config::GetIntList(const string &name) const {
145 |   return Split<int64_t>(GetString(name, ""));
146 | }
147 | 
148 | list<float> Config::GetFloatList(const string &name) const {
149 |   return Split<float>(GetString(name, ""));
150 | }
151 | 
152 | SchedulerOptions &SchedulerOptions::getInstance() {
153 |   static SchedulerOptions instance; // The instance will always be destroyed.
154 |   return instance;
155 | }
156 | 


--------------------------------------------------------------------------------
/lib/Scheduler/graph_trans_ilp_occupancy_preserving.cpp:
--------------------------------------------------------------------------------
 1 | #include "opt-sched/Scheduler/graph_trans_ilp_occupancy_preserving.h"
 2 | 
 3 | #include "opt-sched/Scheduler/graph_trans_ilp.h"
 4 | #include "opt-sched/Scheduler/logger.h"
 5 | #include "llvm/ADT/ArrayRef.h"
 6 | #include "llvm/ADT/STLExtras.h"
 7 | #include "llvm/ADT/SmallVector.h"
 8 | #include <cassert>
 9 | #include <vector>
10 | 
11 | using namespace llvm::opt_sched;
12 | 
13 | // #define IS_DEBUG_OCCUPANCY_PRESERVING_ILP_GRAPH_TRANS
14 | 
15 | #ifdef IS_DEBUG_OCCUPANCY_PRESERVING_ILP_GRAPH_TRANS
16 | #define DEBUG_LOG(...) Logger::Info(__VA_ARGS__)
17 | #else
18 | #define DEBUG_LOG(...) static_cast<void>(0)
19 | #endif
20 | 
21 | using ILP = StaticNodeSupILPTrans;
22 | using RP = StaticNodeSupTrans;
23 | 
24 | StaticNodeSupOccupancyPreservingILPTrans::
25 |     StaticNodeSupOccupancyPreservingILPTrans(DataDepGraph *DDG)
26 |     : GraphTrans(DDG) {}
27 | 
28 | FUNC_RESULT StaticNodeSupOccupancyPreservingILPTrans::ApplyTrans() {
29 |   Logger::Event("GraphTransOccupancyPreservingILPNodeSuperiority");
30 | 
31 |   DataDepGraph &DDG = *GetDataDepGraph_();
32 |   assert(GetNumNodesInGraph_() == DDG.GetNodeCnt());
33 | 
34 |   auto Data_ = ILP::createData(DDG);
35 |   ILP::Data &Data = Data_.getData();
36 | 
37 |   int NumPassedILP = 0;
38 |   int NumFailedRP = 0;
39 | 
40 |   DEBUG_LOG("Starting main algorithm");
41 |   while (!Data.SuperiorNodesList.empty()) {
42 |     auto ij = Data.SuperiorNodesList.pop_back_val();
43 |     const int i = ij.first;
44 |     const int j = ij.second;
45 |     DEBUG_LOG("Considering adding a superior edge (%d, %d)", i, j);
46 | 
47 |     if (!areNodesIndependent(DDG.GetInstByIndx(i), DDG.GetInstByIndx(j))) {
48 |       DEBUG_LOG("Skipping (%d, %d) because nodes are no longer independent\n",
49 |                 i, j);
50 |       continue;
51 |     }
52 |     ++NumPassedILP;
53 |     if (!RP::isNodeSuperior(DDG, i, j)) {
54 |       DEBUG_LOG("(%d, %d) failed the occupancy-preserving conditions\n", i, j);
55 |       ++NumFailedRP;
56 |       continue;
57 |     }
58 | 
59 |     ILP::addZeroLatencyEdge(Data, i, j);
60 |     ILP::addNecessaryResourceEdges(Data, i, j);
61 | 
62 |     ILP::updateDistanceTable(Data, i, j);
63 |     // ILP redundant edges are also redundant from RP point of view.
64 |     // This is because ILP redundant edges are transitive edges with more
65 |     // conditions met, and the RP point of view considers transitive edges to be
66 |     // redundant.
67 |     ILP::removeRedundantEdges(Data, i, j);
68 | 
69 |     DEBUG_LOG("Finished iteration for (%d, %d)\n", i, j);
70 |   }
71 | 
72 |   Logger::Event("GraphTransOccupancyPreservingILPNodeSuperiorityFinished",
73 |                 "superior_edges", Data.Stats.NumEdgesAdded,         //
74 |                 "removed_edges", Data.Stats.NumEdgesRemoved,        //
75 |                 "resource_edges", Data.Stats.NumResourceEdgesAdded, //
76 |                 "passed_ilp", NumPassedILP,                         //
77 |                 "failed_rp", NumFailedRP);
78 | 
79 |   return RES_SUCCESS;
80 | }
81 | 


--------------------------------------------------------------------------------
/lib/Scheduler/logger.cpp:
--------------------------------------------------------------------------------
  1 | #include "opt-sched/Scheduler/logger.h"
  2 | #include <iostream>
  3 | // For va_list, va_start(), va_end().
  4 | #include <cstdarg>
  5 | // For sprintf(), vsprintf().
  6 | #include <cstdio>
  7 | // For exit().
  8 | #include <cstdlib>
  9 | // For GetProcessorTime().
 10 | #include "opt-sched/Scheduler/utilities.h"
 11 | 
 12 | using namespace llvm::opt_sched;
 13 | 
 14 | // An ugly macro to simplify repeated vararg-insertion.
 15 | #define VPRINT(buf, frmt)                                                      \
 16 |   va_list args;                                                                \
 17 |   va_start(args, frmt);                                                        \
 18 |   vsprintf(buf, frmt, args);                                                   \
 19 |   va_end(args);
 20 | 
 21 | // The maximum buffer size for error messages.
 22 | static const int MAX_MSGSIZE = 8000;
 23 | 
 24 | // The current output stream.
 25 | static std::ostream *logStream = &std::cerr;
 26 | 
 27 | // The periodic logging callback.
 28 | static void (*periodLogCallback)() = NULL;
 29 | // The minimum length of (CPU) time between two calls to the periodic logging
 30 | // callback.
 31 | static Milliseconds periodLogPeriod = 0;
 32 | // The CPU time when the period log was last called.
 33 | static Milliseconds periodLogLastTime = 0;
 34 | 
 35 | // The main output function. Calculates the time since process start and formats
 36 | // the specified message with a title and timestamp. Exits the program with exit
 37 | // code = 1 on fatal errors.
 38 | static void Output(Logger::LOG_LEVEL level, bool timed, const char *message) {
 39 |   const char *title = 0;
 40 | 
 41 |   switch (level) {
 42 |   case Logger::FATAL:
 43 |     title = "FATAL";
 44 |     break;
 45 |   case Logger::ERROR:
 46 |     title = "ERROR";
 47 |     break;
 48 |   case Logger::INFO:
 49 |     title = "INFO";
 50 |     break;
 51 |   case Logger::SUMMARY:
 52 |     title = "SUMMARY";
 53 |     break;
 54 |   }
 55 | 
 56 |   (*logStream) << title << ": " << message;
 57 |   if (timed) {
 58 |     (*logStream) << " (Time = " << Utilities::GetProcessorTime() << " ms)";
 59 |   }
 60 |   (*logStream) << std::endl;
 61 | 
 62 |   if (level == Logger::FATAL)
 63 |     exit(1);
 64 | }
 65 | 
 66 | void Logger::SetLogStream(std::ostream &out) { logStream = &out; }
 67 | 
 68 | std::ostream &Logger::GetLogStream() { return *logStream; }
 69 | 
 70 | void Logger::RegisterPeriodicLogger(Milliseconds period, void (*callback)()) {
 71 |   periodLogLastTime = Utilities::GetProcessorTime();
 72 |   periodLogCallback = callback;
 73 |   periodLogPeriod = period;
 74 | }
 75 | 
 76 | void Logger::PeriodicLog() {
 77 |   if (!periodLogCallback) {
 78 |     Error("Periodic log called while no callback was registered.");
 79 |     return;
 80 |   }
 81 | 
 82 |   Milliseconds now = Utilities::GetProcessorTime();
 83 |   ;
 84 |   if (now - periodLogLastTime >= periodLogPeriod) {
 85 |     periodLogCallback();
 86 |     periodLogLastTime = now;
 87 |   }
 88 | }
 89 | 
 90 | void Logger::Log(Logger::LOG_LEVEL level, bool timed, const char *format_string,
 91 |                  ...) {
 92 |   char message_buffer[MAX_MSGSIZE];
 93 |   VPRINT(message_buffer, format_string);
 94 |   Output(level, timed, message_buffer);
 95 | }
 96 | 
 97 | void Logger::Fatal(const char *format_string, ...) {
 98 |   char message_buffer[MAX_MSGSIZE];
 99 |   VPRINT(message_buffer, format_string);
100 |   Output(Logger::FATAL, true, message_buffer);
101 |   exit(1);
102 | }
103 | 
104 | void Logger::Error(const char *format_string, ...) {
105 |   char message_buffer[MAX_MSGSIZE];
106 |   VPRINT(message_buffer, format_string);
107 |   Output(Logger::ERROR, true, message_buffer);
108 | }
109 | 
110 | void Logger::Info(const char *format_string, ...) {
111 |   char message_buffer[MAX_MSGSIZE];
112 |   VPRINT(message_buffer, format_string);
113 |   Output(Logger::INFO, true, message_buffer);
114 | }
115 | 
116 | void Logger::Summary(const char *format_string, ...) {
117 |   char message_buffer[MAX_MSGSIZE];
118 |   VPRINT(message_buffer, format_string);
119 |   Output(Logger::SUMMARY, false, message_buffer);
120 | }
121 | 
122 | using Logger::detail::EventAttrType;
123 | using Logger::detail::EventAttrValue;
124 | 
125 | void Logger::detail::Event(
126 |     const std::pair<EventAttrType, EventAttrValue> *attrs, size_t numAttrs) {
127 |   std::ostream &out = *logStream;
128 | 
129 |   // We alternate using ": " and ", " as the separators.
130 |   // However, we just print the separator before every attribute, meaning that
131 |   // we need to special case the first element, hence the third empty string.
132 |   const char *separators[] = {": ", ", ", ""};
133 |   int sepIndex = 2;
134 | 
135 |   out << "EVENT: {";
136 | 
137 |   for (size_t index = 0; index < numAttrs; ++index,
138 |               // Alternate the separator we are using. Note: !2 == 0
139 |                                            sepIndex = !sepIndex) {
140 |     const auto type = attrs[index].first;
141 |     const auto val = attrs[index].second;
142 | 
143 |     out << separators[sepIndex];
144 | 
145 |     switch (type) {
146 |     case EventAttrType::Bool:
147 |       out << (val.b ? "true" : "false");
148 |       break;
149 |     case EventAttrType::Int64:
150 |       out << val.i64;
151 |       break;
152 |     case EventAttrType::UInt64:
153 |       out << val.u64;
154 |       break;
155 |     case EventAttrType::CStr:
156 |       // TODO(justin): when we have C++14, use std::quoted(val.cstr), which will
157 |       // escape `"`s inside the string.
158 |       out << '"' << val.cstr << '"';
159 |       break;
160 |     default:
161 |       Logger::Fatal("Unknown event type %d. Internal error", (int)type);
162 |     }
163 |   }
164 | 
165 |   out << separators[sepIndex] << "\"time\": " << Utilities::GetProcessorTime()
166 |       << "}\n"
167 |       << std::flush;
168 | }
169 | 


--------------------------------------------------------------------------------
/lib/Scheduler/random.cpp:
--------------------------------------------------------------------------------
  1 | #include "opt-sched/Scheduler/random.h"
  2 | // For memcpy().
  3 | #include <algorithm>
  4 | #include <cstring>
  5 | 
  6 | using namespace llvm::opt_sched;
  7 | 
  8 | // Magic numbers used in the generator formula.
  9 | static const uint32_t A = 0x2faf071d; // 8 * (10 ** 8 - 29) + 5
 10 | static const uint32_t C = 0x3b9ac9c1; // 10 ** 9 - 63
 11 | 
 12 | // Magic lookup table.
 13 | static uint32_t Z[] = {
 14 |     0x8ca0df45, 0x37334f23, 0x4a5901d2, 0xaeede075, 0xd84bd3cf, 0xa1ce3350,
 15 |     0x35074a8f, 0xfd4e6da0, 0xe2c22e6f, 0x045de97e, 0x0e6d45b9, 0x201624a2,
 16 |     0x01e10dca, 0x2810aef2, 0xea0be721, 0x3a3781e4, 0xa3602009, 0xd2ffcf69,
 17 |     0xff7102e9, 0x36fab972, 0x5c3650ff, 0x8cd44c9c, 0x25a4a676, 0xbd6385ce,
 18 |     0xcd55c306, 0xec8a31f5, 0xa87b24ce, 0x1e025786, 0x53d713c9, 0xb29d308f,
 19 |     0x0dc6cf3f, 0xf11139c9, 0x3afb3780, 0x0ed6b24c, 0xef04c8fe, 0xab53d825,
 20 |     0x3ca69893, 0x35460fb1, 0x058ead73, 0x0b567c59, 0xfdddca3f, 0x6317e77d,
 21 |     0xaa5febe5, 0x655f73e2, 0xd42455bb, 0xe845a8bb, 0x351e4a67, 0xa36a9dfb,
 22 |     0x3e0ac91d, 0xbaa0de01, 0xec60dc66, 0xdb29309e, 0xcfa52971, 0x1f3eddaf,
 23 |     0xe14aae61,
 24 | };
 25 | 
 26 | // The current generator state. Magical starting values.
 27 | static long j = 23;
 28 | static long k = 54;
 29 | static uint32_t y[] = {
 30 |     0x8ca0df45, 0x37334f23, 0x4a5901d2, 0xaeede075, 0xd84bd3cf, 0xa1ce3350,
 31 |     0x35074a8f, 0xfd4e6da0, 0xe2c22e6f, 0x045de97e, 0x0e6d45b9, 0x201624a2,
 32 |     0x01e10dca, 0x2810aef2, 0xea0be721, 0x3a3781e4, 0xa3602009, 0xd2ffcf69,
 33 |     0xff7102e9, 0x36fab972, 0x5c3650ff, 0x8cd44c9c, 0x25a4a676, 0xbd6385ce,
 34 |     0xcd55c306, 0xec8a31f5, 0xa87b24ce, 0x1e025786, 0x53d713c9, 0xb29d308f,
 35 |     0x0dc6cf3f, 0xf11139c9, 0x3afb3780, 0x0ed6b24c, 0xef04c8fe, 0xab53d825,
 36 |     0x3ca69893, 0x35460fb1, 0x058ead73, 0x0b567c59, 0xfdddca3f, 0x6317e77d,
 37 |     0xaa5febe5, 0x655f73e2, 0xd42455bb, 0xe845a8bb, 0x351e4a67, 0xa36a9dfb,
 38 |     0x3e0ac91d, 0xbaa0de01, 0xec60dc66, 0xdb29309e, 0xcfa52971, 0x1f3eddaf,
 39 |     0xe14aae61,
 40 | };
 41 | 
 42 | // The last random number.
 43 | static uint32_t randNum;
 44 | 
 45 | void GenerateNextNumber() {
 46 |   randNum = y[j] + y[k];
 47 |   y[k] = randNum;
 48 |   if (--j < 0)
 49 |     j = 54;
 50 |   if (--k < 0)
 51 |     k = 54;
 52 |   randNum &= 0x7fffffff;
 53 | }
 54 | 
 55 | void RandomGen::SetSeed(int32_t iseed) {
 56 |   j = 23;
 57 |   k = 54;
 58 | 
 59 |   if (iseed == 0) {
 60 |     for (int32_t i = 0; i < 55; i++) {
 61 |       y[i] = Z[i];
 62 |     }
 63 |   } else {
 64 |     y[0] = (A * iseed + C) >> 1;
 65 |     for (int32_t i = 1; i < 55; i++) {
 66 |       y[i] = (A * y[i - 1] + C) >> 1;
 67 |     }
 68 |   }
 69 | }
 70 | 
 71 | uint32_t RandomGen::GetRand32WithinRange(uint32_t min, uint32_t max) {
 72 |   GenerateNextNumber();
 73 |   return randNum % (max - min + 1) + min;
 74 | }
 75 | 
 76 | uint32_t RandomGen::GetRand32() {
 77 |   GenerateNextNumber();
 78 |   return randNum;
 79 | }
 80 | 
 81 | uint64_t RandomGen::GetRand64() {
 82 |   uint64_t rand64;
 83 | 
 84 |   GenerateNextNumber();
 85 |   rand64 = randNum;
 86 |   rand64 <<= 32;
 87 | 
 88 |   GenerateNextNumber();
 89 |   rand64 += randNum;
 90 | 
 91 |   return rand64;
 92 | }
 93 | 
 94 | void RandomGen::GetRandBits(uint16_t bitCnt, unsigned char *dest) {
 95 |   uint16_t bytesNeeded = (bitCnt + 7) / 8;
 96 |   uint16_t index = 0;
 97 | 
 98 |   while (bytesNeeded > 0) {
 99 |     GenerateNextNumber();
100 |     uint16_t bytesConsumed = std::min(bytesNeeded, (uint16_t)4);
101 |     memcpy(dest + index, &randNum, bytesConsumed);
102 |     index += bytesConsumed;
103 |     bytesNeeded -= bytesConsumed;
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/lib/Scheduler/utilities.cpp:
--------------------------------------------------------------------------------
1 | #include "opt-sched/Scheduler/utilities.h"
2 | #include <chrono>
3 | 
4 | using namespace llvm::opt_sched;
5 | 
6 | std::chrono::steady_clock::time_point Utilities::startTime =
7 |     std::chrono::steady_clock::now();
8 | 


--------------------------------------------------------------------------------
/lib/Wrapper/AMDGPU/GCNOptSched.cpp:
--------------------------------------------------------------------------------
  1 | //===- GCNOptSched.cpp - AMDGCN Combinatorial scheudler -------------------===//
  2 | //
  3 | // Implements a combinatorial scheduling strategy for AMDGCN.
  4 | //
  5 | //===----------------------------------------------------------------------===//
  6 | 
  7 | #include "GCNOptSched.h"
  8 | #include "AMDGPUExportClustering.h"
  9 | #include "AMDGPUMacroFusion.h"
 10 | #include "GCNSchedStrategy.h"
 11 | #include "SIMachineFunctionInfo.h"
 12 | #include "llvm/Support/Debug.h"
 13 | 
 14 | #define DEBUG_TYPE "optsched"
 15 | 
 16 | using namespace llvm::opt_sched;
 17 | 
 18 | // FIXME: Temporary, eliminate
 19 | static cl::opt<bool>
 20 |     GCNLimitOccWithHints("gcn-limit-occ-with-hints",
 21 |                          cl::desc("Limit occpancy target using perf hints."),
 22 |                          cl::init(false), cl::Hidden);
 23 | 
 24 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 25 | static void getRealRegionPressure(MachineBasicBlock::const_iterator Begin,
 26 |                                   MachineBasicBlock::const_iterator End,
 27 |                                   const LiveIntervals *LIS, StringRef Label) {
 28 |   GCNDownwardRPTracker RP(*LIS);
 29 |   RP.advance(Begin, End, nullptr);
 30 |   dbgs() << "Dumping real RP " << Label << "\n";
 31 |   RP.moveMaxPressure().dump();
 32 | }
 33 | #endif
 34 | 
 35 | ScheduleDAGOptSchedGCN::ScheduleDAGOptSchedGCN(
 36 |     llvm::MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
 37 |     : ScheduleDAGOptSched(C, std::move(S)) {}
 38 | 
 39 | void ScheduleDAGOptSchedGCN::initSchedulers() {
 40 |   // SchedPasses.push_back(GCNMaxOcc);
 41 |   // Add passes in the corresponding order that they are inserted.
 42 |   for (const auto &Pass : PassOrder) {
 43 |     if (Pass == "OCC") // MinRP pass
 44 |       SchedPasses.push_back(OptSchedMaxOcc);
 45 |     else if (Pass == "ILP") // Regular ILP Pass
 46 |       SchedPasses.push_back(OptSchedBalanced);
 47 |     else if (Pass == "ILP_RL") // ILP Reduced Latency Pass
 48 |       SchedPasses.push_back(OptSchedReducedLatency);
 49 |     else
 50 |       llvm::report_fatal_error("Invalid value for pass order: " + Pass, false);
 51 |   }
 52 | 
 53 |   // Also run the sequential scheduler with regular latencies to get the
 54 |   // actual schedule length
 55 |   if (CompileTimeDataPass)
 56 |     SchedPasses.push_back(OptSchedSeqScheduler);
 57 | }
 58 | 
 59 | // Execute scheduling passes.
 60 | // Partially copied GCNScheduleDAGMILive::finalizeSchedule
 61 | void ScheduleDAGOptSchedGCN::finalizeSchedule() {
 62 |   if (TwoPassEnabled && OptSchedEnabled) {
 63 |     initSchedulers();
 64 |     RescheduleRegions.resize(Regions.size());
 65 |     RescheduleRegions.set();
 66 | 
 67 |     LLVM_DEBUG(dbgs() << "Starting two pass scheduling approach\n");
 68 |     TwoPassSchedulingStarted = true;
 69 |     for (const SchedPassStrategy &S : SchedPasses) {
 70 |       MachineBasicBlock *MBB = nullptr;
 71 |       // Reset
 72 |       RegionNumber = ~0u;
 73 | 
 74 |       for (auto &Region : Regions) {
 75 |         RegionBegin = Region.first;
 76 |         RegionEnd = Region.second;
 77 | 
 78 |         if (RegionBegin->getParent() != MBB) {
 79 |           if (MBB)
 80 |             finishBlock();
 81 |           MBB = RegionBegin->getParent();
 82 |           startBlock(MBB);
 83 |         }
 84 |         unsigned NumRegionInstrs = std::distance(begin(), end());
 85 |         enterRegion(MBB, begin(), end(), NumRegionInstrs);
 86 | 
 87 |         // Skip empty scheduling regions (0 or 1 schedulable instructions).
 88 |         if (begin() == end() || begin() == std::prev(end())) {
 89 |           exitRegion();
 90 |           continue;
 91 |         }
 92 |         LLVM_DEBUG(
 93 |             getRealRegionPressure(RegionBegin, RegionEnd, LIS, "Before"));
 94 |         runSchedPass(S);
 95 |         LLVM_DEBUG(getRealRegionPressure(RegionBegin, RegionEnd, LIS, "After"));
 96 |         Region = std::make_pair(RegionBegin, RegionEnd);
 97 |         exitRegion();
 98 |       }
 99 |       finishBlock();
100 |     }
101 |   }
102 | 
103 |   ScheduleDAGMILive::finalizeSchedule();
104 | 
105 |   LLVM_DEBUG(if (isSimRegAllocEnabled()) {
106 |     dbgs() << "*************************************\n";
107 |     dbgs() << "Function: " << MF.getName()
108 |            << "\nTotal Simulated Spills: " << SimulatedSpills << "\n";
109 |     dbgs() << "*************************************\n";
110 |   });
111 | }
112 | 
113 | void ScheduleDAGOptSchedGCN::runSchedPass(SchedPassStrategy S) {
114 |   switch (S) {
115 |   case GCNMaxOcc:
116 |     scheduleGCNMaxOcc();
117 |     break;
118 |   case OptSchedMaxOcc:
119 |     scheduleOptSchedMaxOcc();
120 |     Logger::Event("PassFinished", "num", 1);
121 |     break;
122 |   case OptSchedBalanced:
123 |     RecordTimedOutRegions = true;
124 |     scheduleOptSchedBalanced();
125 |     RecordTimedOutRegions = false;
126 |     Logger::Event("PassFinished", "num", 2);
127 |     break;
128 |   case OptSchedReducedLatency:
129 |     scheduleWithReducedLatencies();
130 |     Logger::Event("PassFinished", "num", 3);
131 |     break;
132 |   case OptSchedSeqScheduler:
133 |     scheduleWithSeqScheduler();
134 |     Logger::Event("PassFinished", "num", 4);
135 |     break;
136 |   }
137 | }
138 | 
139 | void ScheduleDAGOptSchedGCN::scheduleGCNMaxOcc() {
140 |   auto &S = (GCNMaxOccupancySchedStrategy &)*SchedImpl;
141 |   if (GCNLimitOccWithHints) {
142 |     const auto &MFI = *MF.getInfo<SIMachineFunctionInfo>();
143 |     S.setTargetOccupancy(MFI.getMinAllowedOccupancy());
144 |   }
145 | 
146 |   ScheduleDAGMILive::schedule();
147 | }
148 | 
149 | void ScheduleDAGOptSchedGCN::scheduleOptSchedMaxOcc() {
150 |   ScheduleDAGOptSched::scheduleOptSchedMinRP();
151 | }
152 | 
153 | void ScheduleDAGOptSchedGCN::scheduleOptSchedBalanced() {
154 |   ScheduleDAGOptSched::scheduleOptSchedBalanced();
155 | }
156 | 


--------------------------------------------------------------------------------
/lib/Wrapper/AMDGPU/GCNOptSched.h:
--------------------------------------------------------------------------------
 1 | //===- GCNOptSched.h - AMDGCN Combinatorial scheudler -----------*- C++ -*-===//
 2 | //
 3 | //  OptSched combinatorial scheduler driver targeting AMDGCN.
 4 | //
 5 | //===----------------------------------------------------------------------===//
 6 | 
 7 | #ifndef LLVM_GCN_OPT_SCHED_H
 8 | #define LLVM_GCN_OPT_SCHED_H
 9 | 
10 | #include "GCNRegPressure.h"
11 | #include "Wrapper/OptimizingScheduler.h"
12 | 
13 | namespace llvm {
14 | namespace opt_sched {
15 | 
16 | class ScheduleDAGOptSchedGCN : public ScheduleDAGOptSched {
17 | private:
18 |   enum SchedPassStrategy {
19 |     GCNMaxOcc,
20 |     OptSchedMaxOcc,
21 |     OptSchedBalanced,
22 |     OptSchedReducedLatency,
23 |     OptSchedSeqScheduler
24 |   };
25 | 
26 |   // Vector of scheduling passes to execute.
27 |   SmallVector<SchedPassStrategy, 4> SchedPasses;
28 | 
29 | public:
30 |   ScheduleDAGOptSchedGCN(llvm::MachineSchedContext *C,
31 |                          std::unique_ptr<MachineSchedStrategy> S);
32 | 
33 |   // After the scheduler is initialized and the scheduling regions have been
34 |   // recorded, execute the actual scheduling passes here.
35 |   void finalizeSchedule() override;
36 | 
37 |   // Setup and select schedulers.
38 |   void initSchedulers() override;
39 | 
40 |   // TODO: After we refactor OptSched scheduler options put each scheduling
41 |   // pass into its own class.
42 | 
43 |   // Execute a scheduling pass on the function.
44 |   void runSchedPass(SchedPassStrategy S);
45 | 
46 |   // Run GCN max occupancy scheduler.
47 |   void scheduleGCNMaxOcc();
48 | 
49 |   // Run OptSched in RP only (max occupancy) configuration.
50 |   void scheduleOptSchedMaxOcc();
51 | 
52 |   // Run OptSched in ILP/RP balanced mode.
53 |   void scheduleOptSchedBalanced() override;
54 | };
55 | 
56 | } // namespace opt_sched
57 | } // namespace llvm
58 | 
59 | #endif // LLVM_GCN_OPT_SCHED_H
60 | 


--------------------------------------------------------------------------------
/lib/Wrapper/AMDGPU/GCNOptSchedReg.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPT_SCHED_REG
 2 | #define OPT_SCHED_REG
 3 | 
 4 | #include "Wrapper/AMDGPU/GCNOptSched.h"
 5 | #include "Wrapper/AMDGPU/OptSchedGCNTarget.cpp"
 6 | #include "llvm/CodeGen/MachineScheduler.h"
 7 | #include "llvm/Support/raw_ostream.h"
 8 | 
 9 | using namespace llvm;
10 | 
11 | namespace llvm {
12 | namespace opt_sched {
13 | 
14 | // Create OptSched ScheduleDAG.
15 | static ScheduleDAGInstrs *createOptSchedGCN(MachineSchedContext *C) {
16 |   ScheduleDAGMILive *DAG = new ScheduleDAGOptSchedGCN(
17 |       C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
18 |   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
19 |   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
20 |   return DAG;
21 | }
22 | 
23 | static MachineSchedRegistry
24 |     OptSchedGCNMIRegistry("gcn-optsched", "Use the GCN OptSched scheduler.",
25 |                           createOptSchedGCN);
26 | 
27 | } // namespace opt_sched
28 | } // namespace llvm
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/lib/Wrapper/AMDGPU/OptSchedDDGWrapperGCN.h:
--------------------------------------------------------------------------------
 1 | //===-- OptSchedDDGWrapperGCN.h - GCN DDG Wrapper ---------------*- C++ -*-===//
 2 | //
 3 | // Conversion from LLVM ScheduleDAG to OptSched DDG for amdgcn target.
 4 | //
 5 | //===----------------------------------------------------------------------===//
 6 | 
 7 | #ifndef LLVM_OPT_SCHED_DDG_WRAPPER_GCN_H
 8 | #define LLVM_OPT_SCHED_DDG_WRAPPER_GCN_H
 9 | 
10 | #include "GCNRegPressure.h"
11 | #include "Wrapper/OptSchedDDGWrapperBasic.h"
12 | #include "Wrapper/OptimizingScheduler.h"
13 | #include "opt-sched/Scheduler/sched_basic_data.h"
14 | #include "llvm/ADT/DenseMap.h"
15 | #include "llvm/CodeGen/LiveIntervals.h"
16 | 
17 | namespace llvm {
18 | namespace opt_sched {
19 | 
20 | class SubRegSet {
21 | private:
22 |   // Index subreg lanes to OptSched register numbers. Even though we can't map
23 |   // a LaneBitmask index to a specific sub-register, we can still accurately
24 |   // model the correct number of live subregs using lane mask interference.
25 |   SmallVector<unsigned, 8> OptSchedRegMap;
26 | 
27 | public:
28 |   using iterator = SmallVector<unsigned, 8>::iterator;
29 |   // The max number of subregs for this virtual register.
30 |   unsigned Size;
31 |   // OptSched register type
32 |   int16_t Type;
33 | 
34 |   iterator begin() { return OptSchedRegMap.begin(); }
35 |   iterator end() { return OptSchedRegMap.end(); }
36 | 
37 |   SubRegSet(unsigned Size_, int16_t Type_) : Size(Size_), Type(Type_) {
38 |     OptSchedRegMap.resize(Size);
39 |   }
40 |   ~SubRegSet() = default;
41 | };
42 | 
43 | class OptSchedDDGWrapperGCN : public OptSchedDDGWrapperBasic {
44 | private:
45 |   // Map sub-registers in LLVM to a list of live subreg lanes for that register.
46 |   // Each live lane represents either a VGPR32 or SGPR32. In our model each live
47 |   // subreg lane is identified by a separate OptSched register.
48 |   using RegsMap = DenseMap<unsigned, std::unique_ptr<SubRegSet>>;
49 |   RegsMap RegionRegs;
50 |   const std::vector<llvm::SUnit> &SUnits;
51 |   const llvm::LiveIntervals *LIS;
52 |   const llvm::MachineRegisterInfo &MRI;
53 | 
54 |   unsigned getRegKind(unsigned Reg) const;
55 | 
56 |   void addLiveSubRegsAtInstr(const MachineInstr *MI, bool After);
57 | 
58 |   void addSubRegDefs(SchedInstruction *Instr, unsigned Reg,
59 |                      const LaneBitmask &LiveMask, bool LiveIn = false);
60 | 
61 |   void addSubRegUses(SchedInstruction *Instr, unsigned Reg,
62 |                      const LaneBitmask &LiveMask, bool LiveOut = false);
63 | 
64 | public:
65 |   // FIXME: Track VGPR/SGPR tuples or refactor Scheduler to use LLVM/GCN RP
66 |   // tracker.
67 |   enum SubRegKind { SGPR32, VGPR32, TOTAL_KINDS };
68 | 
69 |   OptSchedDDGWrapperGCN(llvm::MachineSchedContext *Context,
70 |                         ScheduleDAGOptSched *DAG, OptSchedMachineModel *MM,
71 |                         LATENCY_PRECISION LatencyPrecision,
72 |                         const std::string &RegionID);
73 | 
74 |   void convertRegFiles() override;
75 | };
76 | 
77 | } // end namespace opt_sched
78 | } // end namespace llvm
79 | 
80 | #endif // LLVM_OPT_SCHED_DDG_WRAPPER_GCN_H
81 | 


--------------------------------------------------------------------------------
/lib/Wrapper/OptSchedGenericTarget.cpp:
--------------------------------------------------------------------------------
 1 | //===- OptSchedGenericTarget.cpp - Generic Target -------------------------===//
 2 | //
 3 | // Implements a generic target stub.
 4 | //
 5 | //===----------------------------------------------------------------------===//
 6 | #include "OptSchedDDGWrapperBasic.h"
 7 | #include "OptSchedMachineWrapper.h"
 8 | #include "opt-sched/Scheduler/OptSchedTarget.h"
 9 | #include "opt-sched/Scheduler/config.h"
10 | #include "opt-sched/Scheduler/defines.h"
11 | #include "opt-sched/Scheduler/machine_model.h"
12 | #include "llvm/ADT/STLExtras.h"
13 | #include "llvm/CodeGen/ScheduleDAGInstrs.h"
14 | #include <memory>
15 | 
16 | using namespace llvm;
17 | using namespace llvm::opt_sched;
18 | 
19 | OptSchedRegistry<OptSchedTargetRegistry::OptSchedTargetFactory>
20 |     OptSchedTargetRegistry::Registry;
21 | 
22 | namespace {
23 | 
24 | class OptSchedGenericTarget : public OptSchedTarget {
25 | public:
26 |   std::unique_ptr<OptSchedMachineModel>
27 |   createMachineModel(const char *ConfigPath) override {
28 |     return std::make_unique<OptSchedMachineModel>(ConfigPath);
29 |   }
30 | 
31 |   std::unique_ptr<OptSchedDDGWrapperBase>
32 |   createDDGWrapper(llvm::MachineSchedContext *Context, ScheduleDAGOptSched *DAG,
33 |                    OptSchedMachineModel *MM, LATENCY_PRECISION LatencyPrecision,
34 |                    const std::string &RegionID) override {
35 |     return std::make_unique<OptSchedDDGWrapperBasic>(
36 |         Context, DAG, MM, LatencyPrecision, RegionID);
37 |   }
38 | 
39 |   void initRegion(llvm::ScheduleDAGInstrs *DAG, MachineModel *MM_,
40 |                   Config &OccFile) override {
41 |     MM = MM_;
42 |   }
43 |   void finalizeRegion(const InstSchedule *Schedule) override {}
44 |   // For generic target find total PRP.
45 |   InstCount getCost(const llvm::SmallVectorImpl<unsigned> &PRP) const override;
46 | 
47 |   void SetOccupancyLimit(int OccupancyLimitParam) override { /*nothing*/
48 |     ;
49 |   }
50 |   void SetShouldLimitOcc(bool ShouldLimitOccParam) override { /*nothing*/
51 |     ;
52 |   }
53 |   void SetOccLimitSource(OCC_LIMIT_TYPE LimitTypeParam) override { /*nothing*/
54 |     ;
55 |   }
56 | };
57 | 
58 | } // end anonymous namespace
59 | 
60 | InstCount OptSchedGenericTarget::getCost(
61 |     const llvm::SmallVectorImpl<unsigned> &PRP) const {
62 |   Logger::Info("in generic get cost");
63 |   InstCount TotalPRP = 0;
64 |   for (int16_t T = 0; T < MM->GetRegTypeCnt(); ++T)
65 |     TotalPRP += PRP[T];
66 |   return TotalPRP;
67 | }
68 | 
69 | namespace llvm {
70 | namespace opt_sched {
71 | 
72 | std::unique_ptr<OptSchedTarget> createOptSchedGenericTarget() {
73 |   return std::make_unique<OptSchedGenericTarget>();
74 | }
75 | 
76 | OptSchedTargetRegistry
77 |     OptSchedGenericTargetRegistry("generic", createOptSchedGenericTarget);
78 | 
79 | } // namespace opt_sched
80 | } // namespace llvm
81 | 


--------------------------------------------------------------------------------
/lib/Wrapper/OptSchedMachineWrapper.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 | Description:  A wrapper that converts an LLVM target to an OptSched
  3 | MachineModel. By default machine models are read from ini files however
  4 | MachineModelGenerator classes may supplement or override the information
  5 | contained in those ini files.
  6 | *******************************************************************************/
  7 | 
  8 | #ifndef OPTSCHED_MACHINE_MODEL_WRAPPER_H
  9 | #define OPTSCHED_MACHINE_MODEL_WRAPPER_H
 10 | 
 11 | #include "opt-sched/Scheduler/machine_model.h"
 12 | #include "llvm/CodeGen/MachineInstr.h"
 13 | #include "llvm/CodeGen/MachineScheduler.h"
 14 | #include "llvm/CodeGen/RegisterClassInfo.h"
 15 | #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 16 | #include "llvm/CodeGen/TargetRegisterInfo.h"
 17 | #include "llvm/MC/MCInstrItineraries.h"
 18 | #include <map>
 19 | 
 20 | using namespace llvm;
 21 | 
 22 | namespace llvm {
 23 | namespace opt_sched {
 24 | 
 25 | class MachineModelGenerator;
 26 | 
 27 | // A wrapper for the OptSched MachineModel
 28 | class OptSchedMachineModel : public MachineModel {
 29 | public:
 30 |   // Use a config file to initialize the machine model.
 31 |   OptSchedMachineModel(const char *configFile);
 32 |   // Convert information about the target machine into the
 33 |   // optimal scheduler machine model
 34 |   void convertMachineModel(const llvm::ScheduleDAGInstrs &dag,
 35 |                            const llvm::RegisterClassInfo *regClassInfo);
 36 |   MachineModelGenerator *getMMGen() { return MMGen.get(); }
 37 |   ~OptSchedMachineModel() = default;
 38 | 
 39 | private:
 40 |   // Should a machine model be generated.
 41 |   bool shouldGenerateMM;
 42 |   // The machine model generator class.
 43 |   std::unique_ptr<MachineModelGenerator> MMGen;
 44 | };
 45 | 
 46 | // Generate a machine model for a specific chip.
 47 | class MachineModelGenerator {
 48 | public:
 49 |   // Generate instruction scheduling type for all instructions in the current
 50 |   // DAG that do not already have assigned instruction types.
 51 |   virtual InstType generateInstrType(const llvm::MachineInstr *instr) = 0;
 52 |   virtual bool generatesAllData() = 0;
 53 |   virtual void generateProcessorData(std::string *mdlName_, int *issueRate_) {}
 54 |   virtual ~MachineModelGenerator() = default;
 55 | };
 56 | 
 57 | // Generate a machine model for the Cortex A7. This will only generate
 58 | // instruction types. Things like issue type and issue rate must be specified
 59 | // correctly in the machine_model.cfg file. Check
 60 | // OptSchedCfg/arch/ARM_cortex_a7_machine_model.cfg for a template.
 61 | class CortexA7MMGenerator : public MachineModelGenerator {
 62 | public:
 63 |   CortexA7MMGenerator(const llvm::ScheduleDAGInstrs *dag, MachineModel *mm);
 64 |   // Generate instruction scheduling type for all instructions in the current
 65 |   // DAG by using LLVM itineraries.
 66 |   InstType generateInstrType(const llvm::MachineInstr *instr);
 67 |   bool generatesAllData() { return false; }
 68 |   virtual ~CortexA7MMGenerator() = default;
 69 | 
 70 | private:
 71 |   // Functional Units
 72 |   enum FU : unsigned {
 73 |     Pipe0 = 1,   // 00000001
 74 |     Pipe1 = 2,   // 00000010
 75 |     LSPipe = 4,  // 00000100
 76 |     NPipe = 8,   // 00001000
 77 |     NLSPipe = 16 // 00010000
 78 |   };
 79 |   const llvm::ScheduleDAGInstrs *DAG;
 80 |   MachineModel *MM;
 81 |   const llvm::InstrItineraryData *IID;
 82 | 
 83 |   // Returns true if a machine instruction should be considered fully pipelined
 84 |   // in the machine model.
 85 |   bool isMIPipelined(const llvm::MachineInstr *inst, unsigned idx) const;
 86 |   // Find the issue type for an instruction.
 87 |   IssueType generateIssueType(const llvm::InstrStage *E) const;
 88 | };
 89 | 
 90 | class CortexA53MMGenerator : public MachineModelGenerator {
 91 | public:
 92 |   CortexA53MMGenerator(const llvm::ScheduleDAGInstrs *dag, MachineModel *mm)
 93 |       : DAG(dag), MM(mm) {}
 94 |   InstType generateInstrType(const llvm::MachineInstr *instr);
 95 |   bool generatesAllData() { return true; }
 96 |   void generateProcessorData(std::string *mdlName_, int *issueRate_);
 97 | 
 98 | private:
 99 |   std::vector<std::string> ResourceIdToIssueType;
100 |   const llvm::ScheduleDAGInstrs *DAG;
101 |   MachineModel *MM;
102 | };
103 | 
104 | } // end namespace opt_sched
105 | } // namespace llvm
106 | 
107 | #endif
108 | 


--------------------------------------------------------------------------------
/lib/Wrapper/OptSchedReg.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPT_SCHED_REG
 2 | #define OPT_SCHED_REG
 3 | 
 4 | #include "OptimizingScheduler.h"
 5 | #include "llvm/CodeGen/MachineScheduler.h"
 6 | 
 7 | using namespace llvm;
 8 | 
 9 | namespace llvm {
10 | namespace opt_sched {
11 | 
12 | // Create OptSched ScheduleDAG.
13 | static ScheduleDAGInstrs *createOptSched(MachineSchedContext *C) {
14 |   ScheduleDAGMILive *DAG =
15 |       new ScheduleDAGOptSched(C, std::make_unique<GenericScheduler>(C));
16 |   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
17 |   // README: if you need the x86 mutations uncomment the next line.
18 |   // addMutation(createX86MacroFusionDAGMutation());
19 |   // You also need to add the next line somewhere above this function
20 |   //#include "../../../../../llvm/lib/Target/X86/X86MacroFusion.h"
21 |   return DAG;
22 | }
23 | 
24 | // Register the machine scheduler.
25 | static MachineSchedRegistry OptSchedMIRegistry("optsched",
26 |                                                "Use the OptSched scheduler.",
27 |                                                createOptSched);
28 | 
29 | } // namespace opt_sched
30 | } // namespace llvm


--------------------------------------------------------------------------------
/patches/amdgpu/README.md:
--------------------------------------------------------------------------------
 1 | [![CSUS](http://www.csus.edu/Brand/assets/Logos/Core/Primary/Stacked/Primary_Stacked_3_Color_wht_hndTN.png)](http://www.csus.edu/)
 2 | 
 3 | # OptSched - Optimizing Scheduler
 4 | This directory contains patches for ROCm 2.4. These patches cause ROCm 2.4 to properly load OptSched and also print occupancy information.
 5 | 
 6 | ## Files
 7 | 
 8 | `ROCm-2.4-LLVM-print-occupancy.patch`
 9 | 
10 | Output occupancy info.
11 | 
12 | `ROCm-2.4-load-optsched.patch`
13 | 
14 | Load the OptSched.so plugin.
15 | 


--------------------------------------------------------------------------------
/patches/amdgpu/ROCm-2.4-LLVM-print-occupancy.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
 2 | index d704a0fae0d..b988d540000 100644
 3 | --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
 4 | +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
 5 | @@ -489,7 +489,13 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 6 |        OutStreamer->EmitBytes(StringRef(Comment));
 7 |      }
 8 |    }
 9 | -
10 | +  const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
11 | +  auto OccVGPRs = STI.getOccupancyWithNumVGPRs(CurrentProgramInfo.NumVGPR);
12 | +  auto OccSGPRs = STI.getOccupancyWithNumSGPRs(CurrentProgramInfo.NumSGPR);
13 | +  auto OccLDS = STI.getOccupancyWithLocalMemSize(MF);
14 | + 
15 | +  auto Occ = std::min(OccVGPRs, std::min(OccSGPRs, OccLDS));
16 | +  dbgs() << "Final occupancy for function " << MF.getName() << ":" << Occ << "\n";
17 |    return false;
18 |  }
19 |  
20 | 


--------------------------------------------------------------------------------
/patches/amdgpu/ROCm-2.4-load-optsched.patch:
--------------------------------------------------------------------------------
 1 | From 84ad382227da24c727fc60c998c4f0c61a5afe0f Mon Sep 17 00:00:00 2001
 2 | From: Austin Kerbow <amkerbow@gmail.com>
 3 | Date: Sun, 25 Nov 2018 20:43:22 -0800
 4 | Subject: [PATCH] Load plugins in-process.
 5 | 
 6 | ---
 7 |  src/driver/AmdCompiler.cpp | 15 +++++++++++++++
 8 |  1 file changed, 15 insertions(+)
 9 | 
10 | diff --git a/src/driver/AmdCompiler.cpp b/src/driver/AmdCompiler.cpp
11 | index 8d34644..e4a4ebd 100644
12 | --- a/src/driver/AmdCompiler.cpp
13 | +++ b/src/driver/AmdCompiler.cpp
14 | @@ -368,6 +368,7 @@ class AMDGPUCompiler : public Compiler {
15 |    ArgStringList GetJobArgsFitered(const Command& job);
16 |    // Parse -mllvm options
17 |    bool ParseLLVMOptions(const std::vector<std::string>& options);
18 | +  bool LoadPlugins(const std::vector<std::string>& plugins);
19 |    bool PrepareCompiler(CompilerInstance& clang, const Command& job);
20 |    bool PrepareAssembler(AssemblerInvocation &Opts, const Command& job);
21 |    bool ExecuteCompiler(CompilerInstance& clang, BackendAction action);
22 | @@ -772,6 +773,19 @@ bool AMDGPUCompiler::ParseLLVMOptions(const std::vector<std::string>& options) {
23 |    return true;
24 |  }
25 |  
26 | +bool AMDGPUCompiler::LoadPlugins(const std::vector<std::string>& plugins) {
27 | +  if (plugins.empty()) { return true; }
28 | +  // Load any requested plugins.
29 | +  for (const auto &path : plugins) {
30 | +    std::string error;
31 | +    if (llvm::sys::DynamicLibrary::LoadLibraryPermanently(path.c_str(), &error)) {
32 | +      diags.Report(diag::err_fe_unable_to_load_plugin) << path << error;
33 | +      return false;
34 | +    }
35 | +  }
36 | +  return true;
37 | +}
38 | +
39 |  void AMDGPUCompiler::ResetOptionsToDefault() {
40 |    cl::ResetAllOptionOccurrences();
41 |    for (auto SC : cl::getRegisteredSubcommands()) {
42 | @@ -791,6 +805,7 @@ bool AMDGPUCompiler::PrepareCompiler(CompilerInstance& clang, const Command& job
43 |      const_cast<const char**>(args.data()),
44 |      const_cast<const char**>(args.data()) + args.size(),
45 |      clang.getDiagnostics())) { return false; }
46 | +  if (!LoadPlugins(clang.getFrontendOpts().Plugins)) { return false; }
47 |    if (!ParseLLVMOptions(clang.getFrontendOpts().LLVMArgs)) { return false; }
48 |    return true;
49 |  }
50 | 


--------------------------------------------------------------------------------
/patches/llvm6.0/README.md:
--------------------------------------------------------------------------------
 1 | [![CSUS](http://www.csus.edu/Brand/assets/Logos/Core/Primary/Stacked/Primary_Stacked_3_Color_wht_hndTN.png)](http://www.csus.edu/)
 2 | 
 3 | # OptSched - Optimizing Scheduler
 4 | This directory contains patches for LLVM 6.0. These patches must be applied before building LLVM 6.0 to print spilling information.
 5 | 
 6 | ## Files
 7 | 
 8 | `llvm6-print-spilling-info.patch`
 9 | 
10 | Thils file is for LLVM 6.0 located under the release/6.x branch at https://github.com/llvm/llvm-project.
11 | 
12 | `flang-llvm6-print-spilling-info.patch`
13 | 
14 | This file is for Flang LLVM 6.0 located under the release_60 branch at https://github.com/flang-compiler/llvm.
15 | 


--------------------------------------------------------------------------------
/patches/llvm7.0/README.md:
--------------------------------------------------------------------------------
 1 | [![CSUS](http://www.csus.edu/Brand/assets/Logos/Core/Primary/Stacked/Primary_Stacked_3_Color_wht_hndTN.png)](http://www.csus.edu/)
 2 | 
 3 | # OptSched - Optimizing Scheduler
 4 | This directory contains patches for LLVM 7.0. These patches must be applied before building LLVM 7.0 to print spilling information.
 5 | 
 6 | ## Files
 7 | 
 8 | `flang-llvm7-print-spilling-info.patch`
 9 | 
10 | This file is for Flang LLVM 7.0 located under the release_70 branch at https://github.com/flang-compiler/llvm.
11 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Test runner infrastructure for OptSched. This configures the OptSched test trees
 2 | # for use by Lit, and delegates to LLVM's lit test handlers.
 3 | 
 4 | configure_lit_site_cfg(
 5 |   ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
 6 |   ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
 7 |   MAIN_CONFIG
 8 |   ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
 9 |   )
10 | 
11 | configure_lit_site_cfg(
12 |   ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.py.in
13 |   ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg.py
14 |   MAIN_CONFIG
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.cfg.py
16 |   )
17 | 
18 | list(APPEND OPTSCHED_TEST_DEPS
19 |   LLVMOptSched
20 |   )
21 | 
22 | set(OPTSCHED_TEST_PARAMS
23 |   optsched_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
24 |   )
25 | 
26 | add_custom_target(optsched-test-depends DEPENDS ${OPTSCHED_TEST_DEPS})
27 | set_target_properties(optsched-test-depends PROPERTIES FOLDER "OptSched tests")
28 | 
29 | add_lit_testsuite(check-optsched "Running the OptSched regression tests"
30 |   ${CMAKE_CURRENT_BINARY_DIR}
31 |   PARAMS ${OPTSCHED_TEST_PARAMS}
32 |   DEPENDS ${OPTSCHED_TEST_DEPS}
33 |   ARGS ${OPTSCHED_TEST_EXTRA_ARGS} ${OPTSCHED_LIT_ARGS}
34 |   )
35 | set_target_properties(check-optsched PROPERTIES FOLDER "OptSched tests")
36 | 
37 | add_lit_testsuites(OPTSCHED ${CMAKE_CURRENT_SOURCE_DIR}
38 |   PARAMS ${OPTSCHED_TEST_PARAMS}
39 |   DEPENDS ${OPTSCHED_TEST_DEPS}
40 | )
41 | 


--------------------------------------------------------------------------------
/test/Unit/lit.cfg.py:
--------------------------------------------------------------------------------
 1 | # -*- Python -*-
 2 | 
 3 | # Configuration file for the 'lit' test runner.
 4 | 
 5 | import os
 6 | import platform
 7 | import subprocess
 8 | 
 9 | import lit.formats
10 | import lit.util
11 | 
12 | # name: The name of this test suite.
13 | config.name = 'OptSched-Unit'
14 | 
15 | # suffixes: A list of file extensions to treat as test files.
16 | config.suffixes = []
17 | 
18 | # test_source_root: The root path where tests are located.
19 | # test_exec_root: The root path where tests should be run.
20 | config.test_exec_root = os.path.join(config.optsched_obj_root, 'unittests')
21 | config.test_source_root = config.test_exec_root
22 | 
23 | config.excludes = ['llvm-project-optsched', 'llvm_build_dirs']
24 | 
25 | # testFormat: The test format to use to interpret tests.
26 | config.test_format = lit.formats.GoogleTest(config.llvm_build_mode, 'Tests')
27 | 


--------------------------------------------------------------------------------
/test/Unit/lit.site.cfg.py.in:
--------------------------------------------------------------------------------
 1 | @LIT_SITE_CFG_IN_HEADER@
 2 | 
 3 | import sys
 4 | 
 5 | config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 6 | config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 7 | config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
 8 | config.llvm_libs_dir = "@LLVM_LIBS_DIR@"
 9 | config.llvm_build_mode = "@LLVM_BUILD_MODE@"
10 | config.optsched_obj_root = "@OPTSCHED_BINARY_DIR@"
11 | config.enable_shared = @ENABLE_SHARED@
12 | config.shlibdir = "@SHLIBDIR@"
13 | config.target_triple = "@TARGET_TRIPLE@"
14 | 
15 | # Support substitution of the tools_dir, libs_dirs, and build_mode with user
16 | # parameters. This is used when we can't determine the tool dir at
17 | # configuration time.
18 | try:
19 |     config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
20 |     config.llvm_libs_dir = config.llvm_libs_dir % lit_config.params
21 |     config.llvm_build_mode = config.llvm_build_mode % lit_config.params
22 | except KeyError:
23 |     e = sys.exc_info()[1]
24 |     key, = e.args
25 |     lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
26 | 
27 | # Let the main config do the real work.
28 | lit_config.load_config(config, "@OPTSCHED_SOURCE_DIR@/test/Unit/lit.cfg.py")
29 | 


--------------------------------------------------------------------------------
/test/lit.cfg.py:
--------------------------------------------------------------------------------
 1 | # -*- Python -*-
 2 | 
 3 | import os
 4 | import platform
 5 | import re
 6 | import subprocess
 7 | import tempfile
 8 | 
 9 | import lit.formats
10 | import lit.util
11 | 
12 | from lit.llvm.subst import ToolSubst
13 | from lit.llvm.subst import FindTool
14 | 
15 | # Configuration file for the 'lit' test runner.
16 | 
17 | # name: The name of this test suite.
18 | config.name = 'OptSched'
19 | 
20 | # suffixes: A list of file extensions to treat as test files.
21 | config.suffixes = ['.c', '.cpp', '.cppm', '.m', '.mm', '.cu',
22 |                    '.ll', '.cl', '.s', '.S', '.modulemap', '.test', '.rs']
23 | 
24 | # excludes: A list of directories to exclude from the testsuite. The 'Inputs'
25 | # subdirectories contain auxiliary inputs for various tests in their parent
26 | # directories.
27 | config.excludes = []
28 | 
29 | # test_source_root: The root path where tests are located.
30 | config.test_source_root = os.path.dirname(__file__)
31 | 
32 | # test_exec_root: The root path where tests should be run.
33 | config.test_exec_root = os.path.join(config.optsched_obj_root, 'test')
34 | 
35 | config.substitutions.append(('%PATH%', config.environment['PATH']))
36 | 


--------------------------------------------------------------------------------
/test/lit.site.cfg.py.in:
--------------------------------------------------------------------------------
 1 | @LIT_SITE_CFG_IN_HEADER@
 2 | 
 3 | import sys
 4 | 
 5 | config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 6 | config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 7 | config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
 8 | config.llvm_libs_dir = "@LLVM_LIBS_DIR@"
 9 | config.llvm_shlib_dir = "@SHLIBDIR@"
10 | config.llvm_plugin_ext = "@LLVM_PLUGIN_EXT@"
11 | config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
12 | config.optsched_obj_root = "@OPTSCHED_BINARY_DIR@"
13 | config.optsched_src_dir = "@OPTSCHED_SOURCE_DIR@"
14 | config.host_triple = "@LLVM_HOST_TRIPLE@"
15 | config.target_triple = "@TARGET_TRIPLE@"
16 | config.host_cxx = "@CMAKE_CXX_COMPILER@"
17 | config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
18 | config.enable_shared = @ENABLE_SHARED@
19 | config.host_arch = "@HOST_ARCH@"
20 | config.enable_abi_breaking_checks = "@LLVM_ENABLE_ABI_BREAKING_CHECKS@"
21 | config.python_executable = "@PYTHON_EXECUTABLE@"
22 | 
23 | # Support substitution of the tools and libs dirs with user parameters. This is
24 | # used when we can't determine the tool dir at configuration time.
25 | try:
26 |     config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
27 |     config.llvm_shlib_dir = config.llvm_shlib_dir % lit_config.params
28 |     config.llvm_libs_dir = config.llvm_libs_dir % lit_config.params
29 | except KeyError:
30 |     e = sys.exc_info()[1]
31 |     key, = e.args
32 |     lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
33 | 
34 | @LIT_SITE_CFG_IN_FOOTER@
35 | 
36 | # Let the main config do the real work.
37 | lit_config.load_config(config, "@OPTSCHED_SOURCE_DIR@/test/lit.cfg.py")
38 | 


--------------------------------------------------------------------------------
/unittests/Basic/ArrayRef2DTest.cpp:
--------------------------------------------------------------------------------
  1 | #include "opt-sched/Scheduler/array_ref2d.h"
  2 | 
  3 | #include <array>
  4 | 
  5 | #include "gtest/gtest.h"
  6 | 
  7 | using namespace llvm::opt_sched;
  8 | 
  9 | namespace {
 10 | TEST(ArrayRef2D, CanAccessElements) {
 11 |   int Arr[] = {
 12 |       1, 2, 3, //
 13 |       4, 5, 6,
 14 |   };
 15 | 
 16 |   ArrayRef2D<int> Ref(Arr, 2, 3);
 17 |   EXPECT_EQ(1, (Ref[{0, 0}]));
 18 |   EXPECT_EQ(2, (Ref[{0, 1}]));
 19 |   EXPECT_EQ(3, (Ref[{0, 2}]));
 20 |   EXPECT_EQ(4, (Ref[{1, 0}]));
 21 |   EXPECT_EQ(5, (Ref[{1, 1}]));
 22 |   EXPECT_EQ(6, (Ref[{1, 2}]));
 23 | }
 24 | 
 25 | TEST(ArrayRef2D, CanGetRowsAndColumns) {
 26 |   int Arr[] = {
 27 |       1, 2, 3, //
 28 |       4, 5, 6,
 29 |   };
 30 | 
 31 |   ArrayRef2D<int> Ref(Arr, 2, 3);
 32 |   EXPECT_EQ(2, Ref.rows());
 33 |   EXPECT_EQ(3, Ref.columns());
 34 | }
 35 | 
 36 | TEST(ArrayRef2D, AccessReturnsReferenceToElements) {
 37 |   int Arr[] = {
 38 |       1, 2, 3, //
 39 |       4, 5, 6,
 40 |   };
 41 | 
 42 |   ArrayRef2D<int> Ref(Arr, 2, 3);
 43 |   EXPECT_EQ(&Arr[0], &(Ref[{0, 0}]));
 44 | }
 45 | 
 46 | TEST(ArrayRef2D, AccessDoesNotAllowChanges) {
 47 |   int Arr[] = {
 48 |       1, 2, 3, //
 49 |       4, 5, 6,
 50 |   };
 51 | 
 52 |   ArrayRef2D<int> Ref(Arr, 2, 3);
 53 |   static_assert(std::is_same<const int &, decltype(Ref[{0, 0}])>::value, "");
 54 | }
 55 | 
 56 | TEST(ArrayRef2D, RequiresRectangle) {
 57 |   int Arr[] = {
 58 |       1, 2, 3, //
 59 |       4, 5,
 60 |   };
 61 | 
 62 |   EXPECT_DEBUG_DEATH(ArrayRef2D<int>(Arr, 2, 3), ".*");
 63 | }
 64 | 
 65 | TEST(ArrayRef2D, AccessingFailsForOutOfBounds) {
 66 |   int Arr[] = {
 67 |       1, 2, 3, //
 68 |       4, 5, 6,
 69 |   };
 70 | 
 71 |   ArrayRef2D<int> Ref(Arr, 2, 3);
 72 |   EXPECT_DEBUG_DEATH((Ref[{5, 10}]), ".*");
 73 | }
 74 | 
 75 | TEST(ArrayRef2D, WorksForEmpty) {
 76 |   std::array<int, 0> Arr{};
 77 | 
 78 |   ArrayRef2D<int> Ref(Arr, 0, 0);
 79 |   EXPECT_EQ(0u, Ref.rows());
 80 |   EXPECT_EQ(0u, Ref.columns());
 81 |   EXPECT_EQ(0u, Ref.underlyingData().size());
 82 | }
 83 | 
 84 | TEST(ArrayRef2D, AccessingEmptyRefFails) {
 85 |   std::array<int, 0> Arr{};
 86 | 
 87 |   ArrayRef2D<int> Ref(Arr, 0, 0);
 88 |   EXPECT_DEBUG_DEATH((Ref[{0, 0}]), ".*");
 89 | }
 90 | 
 91 | TEST(ArrayRef2D, UnderlyingDataIsArrayRef) {
 92 |   int Arr[] = {
 93 |       1, 2, 3, //
 94 |       4, 5, 6,
 95 |   };
 96 | 
 97 |   ArrayRef2D<int> Ref(Arr, 2, 3);
 98 |   static_assert(
 99 |       std::is_same<llvm::ArrayRef<int>, decltype(Ref.underlyingData())>::value,
100 |       "");
101 | }
102 | 
103 | TEST(MutableArrayRef2D, IsConvertibleToArrayRef2D) {
104 |   static_assert(
105 |       std::is_convertible<MutableArrayRef2D<int>, ArrayRef2D<int>>::value, "");
106 | }
107 | 
108 | TEST(MutableArrayRef2D, UnderlyingDataIsMutableArrayRef) {
109 |   int Arr[] = {
110 |       1, 2, 3, //
111 |       4, 5, 6,
112 |   };
113 | 
114 |   MutableArrayRef2D<int> Ref(Arr, 2, 3);
115 |   static_assert(std::is_same<llvm::MutableArrayRef<int>,
116 |                              decltype(Ref.underlyingData())>::value,
117 |                 "");
118 | }
119 | 
120 | TEST(MutableArrayRef2D, CanMutateViaAccess) {
121 |   int Arr[] = {
122 |       1, 2, 3, //
123 |       4, 5, 6,
124 |   };
125 | 
126 |   MutableArrayRef2D<int> Ref(Arr, 2, 3);
127 |   Ref[{1, 1}] = -5;
128 |   EXPECT_EQ(-5, (Ref[{1, 1}]));
129 |   EXPECT_EQ(-5, Arr[4]);
130 | }
131 | } // namespace
132 | 


--------------------------------------------------------------------------------
/unittests/Basic/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_optsched_unittest(OptSchedBasicTests
2 |   ArrayRef2DTest.cpp
3 |   ConfigTest.cpp
4 |   LinkedListTest.cpp
5 |   LoggerTest.cpp
6 |   UtilitiesTest.cpp
7 |   simple_machine_model_test.cpp
8 |   )
9 | 


--------------------------------------------------------------------------------
/unittests/Basic/ConfigTest.cpp:
--------------------------------------------------------------------------------
  1 | #include "opt-sched/Scheduler/config.h"
  2 | 
  3 | #include <sstream>
  4 | 
  5 | #include "gtest/gtest.h"
  6 | 
  7 | using llvm::opt_sched::Config;
  8 | using llvm::opt_sched::SchedulerOptions;
  9 | 
 10 | namespace {
 11 | 
 12 | TEST(Config, ReadString) {
 13 |   Config config;
 14 |   std::istringstream input(R"(
 15 |         KEY VALUE
 16 |     )");
 17 |   config.Load(input);
 18 | 
 19 |   EXPECT_EQ("VALUE", config.GetString("KEY"));
 20 | }
 21 | 
 22 | TEST(Config, ReadStringPath) {
 23 |   Config config;
 24 |   std::istringstream input(R"(
 25 |         KEY some/path/
 26 |     )");
 27 |   config.Load(input);
 28 | 
 29 |   EXPECT_EQ("some/path/", config.GetString("KEY"));
 30 | }
 31 | 
 32 | TEST(Config, ReadInt) {
 33 |   Config config;
 34 |   std::istringstream input(R"(
 35 |         KEY 1
 36 |     )");
 37 |   config.Load(input);
 38 | 
 39 |   EXPECT_EQ(1, config.GetInt("KEY"));
 40 | }
 41 | 
 42 | TEST(Config, ReadFloat) {
 43 |   Config config;
 44 |   std::istringstream input(R"(
 45 |         KEY 1.3
 46 |     )");
 47 |   config.Load(input);
 48 | 
 49 |   EXPECT_EQ(1.3f, config.GetFloat("KEY"));
 50 | }
 51 | 
 52 | class TrueBoolTest : public testing::TestWithParam<std::string> {};
 53 | 
 54 | TEST_P(TrueBoolTest, ReadBool) {
 55 |   Config config;
 56 |   std::istringstream input("KEY " + GetParam());
 57 |   config.Load(input);
 58 | 
 59 |   EXPECT_TRUE(config.GetBool("KEY"));
 60 | }
 61 | 
 62 | INSTANTIATE_TEST_CASE_P(TrueBoolStrings, TrueBoolTest,
 63 |                         testing::Values("1", "yes", "YES", "true", "TRUE"), );
 64 | 
 65 | class FalseBoolTest : public testing::TestWithParam<std::string> {};
 66 | 
 67 | TEST_P(FalseBoolTest, ReadBool) {
 68 |   Config config;
 69 |   std::istringstream input("KEY " + GetParam());
 70 |   config.Load(input);
 71 | 
 72 |   EXPECT_FALSE(config.GetBool("KEY"));
 73 | }
 74 | 
 75 | INSTANTIATE_TEST_CASE_P(FalseBoolStrings, FalseBoolTest,
 76 |                         testing::Values("0", "no", "NO", "false", "FALSE"), );
 77 | 
 78 | class StringListTest : public testing::TestWithParam<
 79 |                            std::pair<std::vector<std::string>, std::string>> {};
 80 | 
 81 | TEST_P(StringListTest, ReadStrings) {
 82 |   Config config;
 83 |   std::istringstream input("KEY " + GetParam().second);
 84 |   config.Load(input);
 85 | 
 86 |   const auto result = config.GetStringList("KEY");
 87 |   const std::vector<std::string> strings(result.begin(), result.end());
 88 | 
 89 |   const std::vector<std::string> expected = GetParam().first;
 90 | 
 91 |   EXPECT_EQ(expected, strings);
 92 | }
 93 | 
 94 | INSTANTIATE_TEST_CASE_P(
 95 |     TestCases, StringListTest,
 96 |     testing::ValuesIn(
 97 |         std::vector<std::pair<std::vector<std::string>, std::string>>{
 98 |             {{"singleton"}, "singleton"},
 99 |             {{"a", "b"}, "a,b"},
100 |             {{"a", "b", "c", "d", "E", "f", "g", "h", "i"},
101 |              "a,b,c,d,E,f,g,h,i"},
102 | 
103 |             {{}, ""},
104 |             {{"singleton"}, "singleton,"},
105 |             {{"a", "b"}, "a,b,\nc,d"},
106 |             {{"a"}, "a, b"},
107 |         }), );
108 | 
109 | class IntListTest : public testing::TestWithParam<
110 |                         std::pair<std::vector<int64_t>, std::string>> {};
111 | 
112 | TEST_P(IntListTest, ReadInts) {
113 |   Config config;
114 |   std::istringstream input("KEY " + GetParam().second);
115 |   config.Load(input);
116 | 
117 |   const auto result = config.GetIntList("KEY");
118 |   const std::vector<int64_t> ints(result.begin(), result.end());
119 | 
120 |   const std::vector<int64_t> expected = GetParam().first;
121 | 
122 |   EXPECT_EQ(expected, ints);
123 | }
124 | 
125 | INSTANTIATE_TEST_CASE_P(
126 |     TestCases, IntListTest,
127 |     testing::ValuesIn(std::vector<std::pair<std::vector<int64_t>, std::string>>{
128 |         {{1}, "1"},
129 |         {{-1, 0}, "-1,0"},
130 | 
131 |         {{}, ""},
132 |         {{-2, -3}, "-2,-3\n4,5"},
133 |         {{832, 43}, "832,43"},
134 |     }), );
135 | 
136 | class FloatListTest : public testing::TestWithParam<
137 |                           std::pair<std::vector<float>, std::string>> {};
138 | 
139 | TEST_P(FloatListTest, ReadFloats) {
140 |   Config config;
141 |   std::istringstream input("KEY " + GetParam().second);
142 |   config.Load(input);
143 | 
144 |   const auto result = config.GetFloatList("KEY");
145 |   const std::vector<float> ints(result.begin(), result.end());
146 | 
147 |   const std::vector<float> expected = GetParam().first;
148 | 
149 |   EXPECT_EQ(expected, ints);
150 | }
151 | 
152 | INSTANTIATE_TEST_CASE_P(
153 |     TestCases, FloatListTest,
154 |     testing::ValuesIn(std::vector<std::pair<std::vector<float>, std::string>>{
155 |         {{1.0f}, "1"},
156 |         {{-1.5f, 0.02f}, "-1.5,0.02"},
157 | 
158 |         {{}, ""},
159 |         {{-0.2f, -3}, "-0.2,-3\n4,5"},
160 |         {{832.123f, 43}, "832.123,43"},
161 |     }), );
162 | 
163 | } // namespace
164 | 


--------------------------------------------------------------------------------
/unittests/Basic/LoggerTest.cpp:
--------------------------------------------------------------------------------
 1 | #include "opt-sched/Scheduler/logger.h"
 2 | 
 3 | #include <sstream>
 4 | 
 5 | #include "gmock/gmock-matchers.h"
 6 | #include "gtest/gtest.h"
 7 | 
 8 | using namespace llvm::opt_sched;
 9 | 
10 | namespace {
11 | class LoggerTest : public ::testing::Test {
12 | protected:
13 |   LoggerTest() : old{Logger::GetLogStream()} { Logger::SetLogStream(log); }
14 | 
15 |   ~LoggerTest() override { Logger::SetLogStream(old); }
16 | 
17 |   std::string getLog() const { return log.str(); }
18 | 
19 | private:
20 |   std::ostream &old;
21 |   std::ostringstream log;
22 | };
23 | 
24 | TEST_F(LoggerTest, EventWorks) {
25 |   Logger::Event("SomeEventID", "key", 42, "key2", "value2", "key3", true,
26 |                 "key4", 123ull, "key5", -123ll);
27 |   EXPECT_THAT(
28 |       getLog(),
29 |       ::testing::MatchesRegex(
30 |           R"(EVENT: \{"event_id": "SomeEventID", "key": 42, "key2": "value2", "key3": true, "key4": 123, "key5": -123, "time": [0-9]+\})"
31 |           "\n"));
32 | }
33 | 
34 | TEST_F(LoggerTest, EmptyEventIncludesOnlyTime) {
35 |   Logger::Event("SomeEventID");
36 |   EXPECT_THAT(getLog(),
37 |               ::testing::MatchesRegex(
38 |                   R"(EVENT: \{"event_id": "SomeEventID", "time": [0-9]+\})"
39 |                   "\n"));
40 | }
41 | } // namespace
42 | 


--------------------------------------------------------------------------------
/unittests/Basic/UtilitiesTest.cpp:
--------------------------------------------------------------------------------
 1 | #include "opt-sched/Scheduler/utilities.h"
 2 | 
 3 | #include "gtest/gtest.h"
 4 | 
 5 | namespace utils = llvm::opt_sched::Utilities;
 6 | 
 7 | namespace {
 8 | 
 9 | TEST(Utilities, clcltBitsNeededToHoldNum) {
10 |   EXPECT_EQ(0, utils::clcltBitsNeededToHoldNum(0));
11 |   EXPECT_EQ(1, utils::clcltBitsNeededToHoldNum(1));
12 |   EXPECT_EQ(2, utils::clcltBitsNeededToHoldNum(2));
13 |   EXPECT_EQ(2, utils::clcltBitsNeededToHoldNum(3));
14 |   EXPECT_EQ(3, utils::clcltBitsNeededToHoldNum(4));
15 | 
16 |   EXPECT_EQ(16, utils::clcltBitsNeededToHoldNum(0x8000));
17 | }
18 | 
19 | } // namespace
20 | 


--------------------------------------------------------------------------------
/unittests/Basic/simple_machine_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPTSCHED_SIMPLE_MACHINE_MODEL_H
 2 | #define OPTSCHED_SIMPLE_MACHINE_MODEL_H
 3 | 
 4 | #include <string.h> // strdup is in the C header, but not the C++ header
 5 | 
 6 | #include "opt-sched/Scheduler/buffers.h"
 7 | #include "opt-sched/Scheduler/machine_model.h"
 8 | 
 9 | inline llvm::opt_sched::MachineModel simpleMachineModel() {
10 |   static constexpr const char SimpleModel[] = R"(
11 | MODEL_NAME: Simple
12 | 
13 | # The limit on the total number of instructions that can be issued in one cycle
14 | ISSUE_RATE: 1
15 | 
16 | # Each instruction must have an issue type, i.e. a function unit that the instruction uses.
17 | ISSUE_TYPE_COUNT: 1
18 | 
19 | # Default issue type for LLVM instructions.
20 | Default 1
21 | 
22 | DEP_LATENCY_ANTI: 0
23 | DEP_LATENCY_OUTPUT: 1
24 | DEP_LATENCY_OTHER: 1
25 | 
26 | # This will not be used. Reg type info will be taken from the compiler.
27 | REG_TYPE_COUNT: 2
28 | I 1
29 | F 1
30 | 
31 | # Set this to the total number of instructions
32 | INST_TYPE_COUNT: 2
33 | 
34 | INST_TYPE: artificial
35 | ISSUE_TYPE: Default
36 | LATENCY: 0
37 | PIPELINED: YES
38 | BLOCKS_CYCLE: NO
39 | SUPPORTED: NO
40 | 
41 | INST_TYPE: Inst
42 | ISSUE_TYPE: Default
43 | LATENCY: 1
44 | PIPELINED: YES
45 | BLOCKS_CYCLE: NO
46 | SUPPORTED: YES
47 |   )";
48 | 
49 |   llvm::opt_sched::SpecsBuffer Buf(strdup(SimpleModel), sizeof(SimpleModel));
50 |   llvm::opt_sched::MachineModel Model(Buf);
51 |   return Model;
52 | }
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/unittests/Basic/simple_machine_model_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "simple_machine_model.h"
 2 | 
 3 | #include "gtest/gtest.h"
 4 | 
 5 | using llvm::opt_sched::MachineModel;
 6 | 
 7 | namespace {
 8 | TEST(SimpleMachineModel, CanBeLoaded) {
 9 |   MachineModel Model = simpleMachineModel();
10 |   EXPECT_EQ(1, Model.GetIssueRate());
11 | }
12 | } // namespace
13 | 


--------------------------------------------------------------------------------
/unittests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_custom_target(OptSchedUnitTests)
 2 | set_target_properties(OptSchedUnitTests PROPERTIES FOLDER "Tests")
 3 | 
 4 | # LLVM turns this off, but we're good to have it:
 5 | add_definitions(-UGTEST_HAS_TR1_TUPLE)
 6 | 
 7 | function(add_optsched_unittest test_dirname)
 8 |   add_unittest(OptSchedUnitTests ${test_dirname} ${ARGN})
 9 | endfunction()
10 | 
11 | # All unit test targets depend on OptSched
12 | add_llvm_library(UnitTest.OptSched STATIC $<TARGET_OBJECTS:LLVMOptSched>)
13 | link_libraries(UnitTest.OptSched)
14 | 
15 | add_subdirectory(Basic)
16 | 


--------------------------------------------------------------------------------
/util/ARM/build-copy-to-A7.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #Build benchmarks and copy them to the A7 machine.
 3 | 
 4 | BENCH="401.bzip2 429.mcf 433.milc 445.gobmk 456.hmmer 458.sjeng 462.libquantum 464.h264ref 470.lbm 482.sphinx3  444.namd 447.dealII 453.povray 471.omnetpp 473.astar"
 5 | 
 6 | # source the shrc
 7 | . ./shrc
 8 | 
 9 | # Try to scrub benchmarks. Catch unchecked error in runspec where the benchmarks are not actually cleaned if
10 | # they were built by another user or root.
11 | echo 'Cleaning benchmarks'
12 | rslt=$(runspec --loose -size=test -iterations=1 -config=Intel_llvm_3.9.cfg --tune=base -r 1 -I -a scrub $BENCH 2>&1 | \
13 | awk '/Couldn'\''t unlink/ { print "1"; exit 1 }' -)
14 | if [ ! -z $rslt ];
15 | then
16 |   echo "Error scrubbing benchmarks. Try with sudo."
17 |   echo "\"sudo sh -c '. ./shrc; runspec --loose -size=test -iterations=1 -config=Intel_llvm_3.9.cfg --tune=base -r 1 -I -a scrub all'\""
18 |   exit 1
19 | fi
20 | 
21 | echo 'Building benchmarks'
22 | runspec --loose -size=test -iterations=1 -config=Intel_llvm_3.9.cfg --tune=base -r 1 -I -a build $BENCH 2>&1 > /dev/null
23 | 
24 | #echo 'Creating fake run directories'
25 | #runspec --fake --loose --size test --tune base --config Intel_llvm_3.9.cfg $BENCH
26 | 
27 | cd ./benchspec/CPU2006/
28 | 
29 | echo 'Creating archive'
30 | tar cJf ziped_benches.tar.xz */exe
31 | 
32 | echo 'Copying to A7 machine'
33 | scp -q ziped_benches.tar.xz ghassan@99.113.71.118:~
34 | 
35 | echo 'Cleaning benchmarks again'
36 | runspec --loose -size=test -iterations=1 -config=Intel_llvm_3.9.cfg --tune=base -r 1 -I -a scrub $BENCH 2>&1 > /dev/null
37 | 
38 | echo 'Done!'
39 | 


--------------------------------------------------------------------------------
/util/ARM/extract-run-spec-cmd.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Run a dry run of the CPU2006 benchmarks and extract the commands necessary
 3 | # for running the binaries on a different machine without runspec.
 4 | # eg: When cross-compiling.
 5 | 
 6 | # ref (reference) or test (test) input size for the benchmarks.
 7 | SIZE=test
 8 | 
 9 | runspec --fake --loose --size $SIZE --tune base --config Intel_llvm_3.9.cfg $1 | \
10 |   awk '/Benchmark invocation/ {record=1} /Benchmark verification/ {record=0} record' - | \
11 |   awk '/echo/ {split($0, res, "\""); print res[2] }'
12 | 


--------------------------------------------------------------------------------
/util/CPU2006/clean-compile-commands.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | import re
 4 | 
 5 | parser = argparse.ArgumentParser(description='Cleans CPU2006 logs, moving compilation commands to the appropriate location if necessary')
 6 | parser.add_argument('files', nargs='+', help='The logs to clean, in place')
 7 | 
 8 | files = parser.parse_args().files
 9 | 
10 | RE_AFTER_FUNCTION = re.compile(r'\*{20,}\nFunction:.*?\*{20,}\n', re.DOTALL)
11 | RE_BUGGED_COMPILE_COMMAND = re.compile(
12 |     r'''
13 |     # Compilation commands will always appear at the beginning of a line if things happened correctly.
14 |     # We're trying to fix it when it doesn't happen correctly.
15 |     ^(?: # (Non-capturing)
16 |         E       # Inside an EVENT: log, but possible happening mid-word (EV/usr/bin/clang++ ...)
17 |         | I     # Inside an INFO: log
18 |     )
19 |     .*?
20 |     (
21 |         # When we see any of the bugged compilation commands,
22 |         (/.*?/[cf]lang.*\n) # clang, clang++, flang
23 |         | (specperl\ /.*\n) # specperl commands
24 |     )   # then we want to match the command and move it to the end.
25 |     ''',
26 |     re.VERBOSE | re.MULTILINE)
27 | 
28 | for file in files:
29 |     with open(file, 'r') as f:
30 |         text = f.read()
31 | 
32 |     # Keep the file content we wish to write back as a list of strings.
33 |     # We will do a join at the end.
34 |     result = []
35 | 
36 |     cur = 0
37 |     # Iterate over the locations that we will place the bugged commands (after next fn)
38 |     for next_fn_m in RE_AFTER_FUNCTION.finditer(text):
39 |         # The strings we will be placing after the fn.
40 |         after_fn = []
41 | 
42 |         # Gather all the bugged compile commands from `cur` to the location of this next_fn_m.
43 |         while True:
44 |             bugged = RE_BUGGED_COMPILE_COMMAND.search(text, cur, next_fn_m.start())
45 | 
46 |             if bugged:
47 |                 result.append(text[cur:bugged.start(1)])
48 |                 after_fn.append(bugged.group(1))
49 |                 cur = bugged.end(1)
50 |             else:
51 |                 result.append(text[cur:next_fn_m.end()])
52 |                 cur = next_fn_m.end()
53 |                 break
54 |         result += after_fn
55 | 
56 |     # Include any remnant
57 |     result.append(text[cur:])
58 | 
59 |     resultstr = ''.join(result)
60 |     with open(file, 'w') as f:
61 |         f.write(resultstr)
62 | 


--------------------------------------------------------------------------------
/util/SLIL/compare-BB-fixed.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import mmap
  3 | import optparse
  4 | import os
  5 | 
  6 | parser = optparse.OptionParser(
  7 |     description='Wrapper around runspec for collecting spill counts.')
  8 | parser.add_option('-b', '--bruteforce',
  9 |                   metavar='filepath',
 10 |                   default=None,
 11 |                   help='Log file of brute force compiler.')
 12 | parser.add_option('-d', '--dynamic',
 13 |                   metavar='filepath',
 14 |                   default=None,
 15 |                   help='Log file of dynamic compiler.')
 16 | 
 17 | args = parser.parse_args()[0]
 18 | 
 19 | bruteForceFile =  args.bruteforce
 20 | bbFile =          args.dynamic
 21 | 
 22 | if not os.path.isfile(bruteForceFile):
 23 |     raise Error("Please specify a valid brute force log file.")
 24 | if not os.path.isfile(bbFile):
 25 |     raise Error("Please specify a valid dynamic log file.")
 26 | 
 27 | regex = re.compile(r'Dag (.*?) (.*?) absolute cost (\d+?) time (\d+)')
 28 | 
 29 | results = {}
 30 | 
 31 | SUCCESS = "optimal"
 32 | TIMEOUT = "timeout"
 33 | FAILED = "failed"
 34 | 
 35 | staticErrorCount = 0
 36 | dynamicErrorCount = 0
 37 | goodCount = 0
 38 | # Gather results from log files (assumed to be just 1 log file per build)
 39 | with open(bruteForceFile) as bff:
 40 |     bffm = mmap.mmap(bff.fileno(), 0, access=mmap.ACCESS_READ)
 41 | 
 42 |     for match in regex.finditer(bffm):
 43 |         dagResult = {}
 44 |         dagResult['bf'] = {}
 45 |         dagResult['bf']['result'] = match.group(2)
 46 |         dagResult['bf']['cost'] = int(match.group(3))
 47 |         dagResult['bf']['time'] = int(match.group(4))
 48 |         results[match.group(1)] = dagResult
 49 | 
 50 |     bffm.close()
 51 | 
 52 | with open(bbFile) as bbf:
 53 |     bbfm = mmap.mmap(bbf.fileno(), 0, access=mmap.ACCESS_READ)
 54 |     for match in regex.finditer(bbfm):
 55 |         if not match.group(1) in results:
 56 |             results[match.group(1)] = {}
 57 |         results[match.group(1)]['bb'] = {}
 58 |         results[match.group(1)]['bb']['result'] = match.group(2)
 59 |         results[match.group(1)]['bb']['cost'] = int(match.group(3))
 60 |         results[match.group(1)]['bb']['time'] = int(match.group(4))
 61 |     bbfm.close()
 62 | 
 63 | 
 64 | #analyze results
 65 | for dagName in results:
 66 |     if not "bf" in results[dagName] or not "bb" in results[dagName]:
 67 |         if len(results[dagName]) > 0:
 68 |             staticErrorCount += 1
 69 |             print("StaticLBError: Found B&B results for one file but not the other")
 70 |             for key in results[dagName]:
 71 |                 print("  %s: Dag %s %s cost %d time %d" % (key, dagName, results[dagName][key]['result'], results[dagName][key]['cost'], results[dagName][key]['time']))
 72 |         continue
 73 |     bfCost = results[dagName]['bf']['cost']
 74 |     bbCost = results[dagName]['bb']['cost']
 75 |     bfResult = results[dagName]['bf']['result']
 76 |     bbResult = results[dagName]['bb']['result']
 77 |     # Case 1: both success -> must be same cost
 78 |     if bfResult == SUCCESS and bbResult == SUCCESS:
 79 |         if bbCost != bfCost:
 80 |             dynamicErrorCount += 1
 81 |             print("DynamicLBError: Dag %s: both implementations optimal, but brute force cost (%d) is different from dynamic cost (%d)" %(dagName, bfCost, bbCost))
 82 |         else:
 83 |             goodCount += 1
 84 |             print("Good: Dag %s: both implementations solved optimally, and both costs match" % dagName)
 85 |     # Case 2: one timeout and other success -> timeout cost shouldn't be better
 86 |     elif bfResult == SUCCESS and bbResult == TIMEOUT:
 87 |         if bbCost < bfCost:
 88 |             dynamicErrorCount += 1
 89 |             print("DynamicLBError: Dag %s: brute force optimal and dynamic timed out, but brute force cost (%d) is worse than dynamic cost (%d)" % (dagName, bfCost, bbCost))
 90 |         else:
 91 |             goodCount += 1
 92 |             print("Good: Dag %s: brute force optimal and dynamic timed out, and brute force cost (%d) is not worse than dynamic cost (%d)" % (dagName, bfCost, bbCost))
 93 |     elif bfResult == TIMEOUT and bbResult == SUCCESS:
 94 |         if bbCost > bfCost:
 95 |             dynamicErrorCount += 1
 96 |             print("DynamicLBError: Dag %s: brute force timed out and dynamic optimal, but brute force cost (%d) is better than dynamic cost (%d)" % (dagName, bfCost, bbCost))
 97 |         else:
 98 |             goodCount += 1
 99 |             print("Good: Dag %s: brute force timed out and dynamic optimal, and brute force cost (%d) is not better than dynamic cost (%d)" % (dagName, bfCost, bbCost))
100 | 
101 | 
102 | print("Good: %d" % goodCount)
103 | print("Static LB Error: %d" % staticErrorCount)
104 | print("Dynamic LB Error: %d" % dynamicErrorCount)
105 | 


--------------------------------------------------------------------------------
/util/SLIL/compare-peaks.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import re
 4 | import mmap
 5 | 
 6 | Regex = re.compile('DAG (.*?) PEAK (\d+)')
 7 | 
 8 | def readPeakCosts(logFile):
 9 |     peakCosts = {}
10 |     with open(logFile) as f:
11 |         m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
12 |         for match in Regex.finditer(m):
13 |             peakCosts[match.group(1)] = int(match.group(2))
14 |         m.close()
15 |     return peakCosts
16 | 
17 | def compareWrapperLogs(path1, path2, logFile):
18 |     benchName = logFile.split(".")[0]
19 |     if not (os.path.isfile(os.path.join(path1, logFile)) and os.path.isfile(os.path.join(path2, logFile))):
20 |         return
21 | 
22 |     peakCosts1 = readPeakCosts(os.path.join(path1, logFile))
23 |     peakCosts2 = readPeakCosts(os.path.join(path2, logFile))
24 | 
25 |     for key in peakCosts1:
26 |         if key in peakCosts2:
27 |             print("%s,%s,%d,%d" % (benchName, key, peakCosts1[key], peakCosts2[key]))
28 | 
29 | if len(sys.argv) != 3:
30 |     raise Exception("Invalid number of arguments")
31 | 
32 | if not os.path.isdir(sys.argv[1]):
33 |     raise Exception("'%s' is not a valid directory" % sys.argv[1])
34 | 
35 | if not os.path.isdir(sys.argv[2]):
36 |     raise Exception("'%s' is not a valid directory" % sys.argv[2])
37 | 
38 | for subdirs, dirs, files in os.walk(sys.argv[1]):
39 |     for f in files:
40 |         compareWrapperLogs(sys.argv[1], sys.argv[2], f)
41 | 


--------------------------------------------------------------------------------
/util/SLIL/compare-static-LB.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import mmap
 3 | import optparse
 4 | import os
 5 | import json
 6 | 
 7 | parser = optparse.OptionParser(
 8 |     description='Wrapper around runspec for collecting spill counts.')
 9 | parser.add_option('-b', '--bruteforce',
10 |                   metavar='filepath',
11 |                   default=None,
12 |                   help='Log file of brute force compiler.')
13 | parser.add_option('-d', '--dynamic',
14 |                   metavar='filepath',
15 |                   default=None,
16 |                   help='Log file of dynamic compiler.')
17 | 
18 | args = parser.parse_args()[0]
19 | 
20 | bruteForceFile =  args.bruteforce
21 | bbFile =          args.dynamic
22 | 
23 | if not os.path.isfile(bruteForceFile):
24 |     raise Error("Please specify a valid brute force log file.")
25 | if not os.path.isfile(bbFile):
26 |     raise Error("Please specify a valid dynamic log file.")
27 | 
28 | results = {}
29 | 
30 | errorCount = 0
31 | equalCount = 0
32 | improvementCount = 0
33 | 
34 | # Gather results from log files (assumed to be just 1 log file per build)
35 | with open(bruteForceFile) as bff:
36 |     bffm = mmap.mmap(bff.fileno(), 0, access=mmap.ACCESS_READ)
37 |     dagResults = {}
38 |     for match in re.finditer(r'EVENT: ({"event_id": "StaticLowerBoundDebugInfo".*)', bffm):
39 |         info = json.loads(match.group(1))
40 |         dagResults[info['name']] = int(info['spill_cost_lb'])
41 |     bffm.close()
42 |     results['bf'] = dagResults
43 | 
44 | with open(bbFile) as bbf:
45 |     bbfm = mmap.mmap(bbf.fileno(), 0, access=mmap.ACCESS_READ)
46 |     dagResults = {}
47 |     for match in re.finditer(r'EVENT: ({"event_id": "StaticLowerBoundDebugInfo".*)', bffm):
48 |         info = json.loads(match.group(1))
49 |         dagResults[info['name']] = int(info['spill_cost_lb'])
50 |     bbfm.close()
51 |     results['bb'] = dagResults
52 | 
53 | #analyze results
54 | #
55 | for dagName in results['bf']:
56 |     bfLowerBound = results['bf'][dagName]
57 |     if not dagName in results['bb']: continue
58 |     bbLowerBound = results['bb'][dagName]
59 |     if bfLowerBound < bbLowerBound:
60 |         print("Improvement: oldLB %d newLB %d dag %s" % (bfLowerBound, bbLowerBound, dagName))
61 |         improvementCount += 1
62 |     elif bfLowerBound == bbLowerBound:
63 |         print("Equal: oldLB %d newLB %d dag %s" % (bfLowerBound, bbLowerBound, dagName))
64 |         equalCount += 1
65 |     else:
66 |         print("Error: oldLB %d newLB %d dag %s" % (bfLowerBound, bbLowerBound, dagName))
67 |         errorCount += 1
68 | print("Improved blocks: %d"% improvementCount)
69 | print("Equal blocks:    %d"% equalCount)
70 | print("Errors:          %d"% errorCount)
71 | 


--------------------------------------------------------------------------------
/util/SLIL/gather-SLIL-stats.py:
--------------------------------------------------------------------------------
  1 | import optparse
  2 | import os
  3 | import mmap
  4 | import re
  5 | 
  6 | regex = re.compile("SLIL stats: DAG (.*?) static LB (\d+) gap size (\d+) enumerated (.*?) optimal (.*?) PERP higher (.*?) \(")
  7 | 
  8 | 
  9 | def debugPrint(msg):
 10 |     #print(msg)
 11 |     pass
 12 | 
 13 | def getBool(msg):
 14 |     if msg == "True": return True
 15 |     elif msg == "False": return False
 16 |     raise Exception("msg is %s" % msg)
 17 | 
 18 | def getStatsFromLogFile(filename, path):
 19 |     # First, organize raw data before calculate aggregate stats
 20 |     functions = {}
 21 |     with open(os.path.join(path,filename)) as f:
 22 |         m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
 23 | 
 24 |         for match in regex.finditer(m):
 25 |             debugPrint("Found match: %s" % match.group(0))
 26 |             blockStats = {}
 27 |             dagName = match.group(1)
 28 | 
 29 |             functionName = dagName.split(':')[0]
 30 |             if not functionName in functions:
 31 |                 debugPrint("Found function %s" % functionName)
 32 |                 functions[functionName] = {}
 33 | 
 34 |             blockStats['staticLB'] = int(match.group(2))
 35 |             blockStats['gapSize'] = int(match.group(3))
 36 |             blockStats['isEnumerated'] = getBool(match.group(4))
 37 |             blockStats['isOptimal'] = getBool(match.group(5))
 38 |             blockStats['isPerpHigher'] = getBool(match.group(6))
 39 | 
 40 |             blockName = dagName.split(':')[1]
 41 |             if blockName in functions[functionName]:
 42 |                 raise Exception("Block %s already exists in function %s!" % (blockName, functionName))
 43 |             functions[functionName][blockName] = blockStats
 44 | 
 45 |         m.close()
 46 | 
 47 |     # Then, calculate aggregate stats per function
 48 |     benchStats = {}
 49 |     for functionName in functions:
 50 |         benchStats[functionName] = {}
 51 |         totalGapSize = 0
 52 |         averageGapPercentage = 0
 53 |         maxGapPercentage = 0
 54 |         totalEnumerated = 0
 55 |         totalOptimal = 0
 56 |         totalOptimalAndEnumerated = 0
 57 |         totalHigherPerp = 0
 58 |         totalOptimalHigherPerp = 0
 59 |         for blockName in functions[functionName]:
 60 |             blockStats = functions[functionName][blockName]
 61 |             gapSize = blockStats['gapSize']
 62 |             totalGapSize += gapSize
 63 |             gapPercentage = float(gapSize) / blockStats['staticLB']
 64 |             averageGapPercentage += gapPercentage
 65 |             if gapPercentage > maxGapPercentage: maxGapPercentage = gapPercentage
 66 |             if blockStats['isOptimal']: totalOptimal += 1
 67 |             if blockStats['isEnumerated']: totalEnumerated += 1
 68 |             if gapSize == 0 and blockStats['isEnumerated']: totalOptimalAndEnumerated += 1
 69 |             if blockStats['isPerpHigher']:
 70 |                 totalHigherPerp += 1
 71 |                 if blockStats['isOptimal']: totalOptimalHigherPerp += 1
 72 |         functionStats = benchStats[functionName]
 73 |         functionStats['totalBlocks'] = len(functions[functionName])
 74 |         functionStats['totalGapSize'] = totalGapSize
 75 |         functionStats['averageGapSize'] = float(totalGapSize) / len(functions[functionName])
 76 |         functionStats['averageGapPercentage'] = float(averageGapPercentage) / len(functions[functionName])
 77 |         functionStats['maxGapPercentage'] = maxGapPercentage
 78 |         functionStats['totalEnumerated'] = totalEnumerated
 79 |         functionStats['totalOptimal'] = totalOptimal
 80 |         functionStats['totalOptimalAndEnumerated'] = totalOptimalAndEnumerated
 81 |         functionStats['totalHigherPerp'] = totalHigherPerp
 82 |         functionStats['totalOptimalHigherPerp'] = totalOptimalHigherPerp
 83 | 
 84 |     return benchStats
 85 | 
 86 | parser = optparse.OptionParser(description='Wrapper around runspec for collecting spill counts.')
 87 | parser.add_option('-p', '--path',
 88 |                   metavar='path',
 89 |                   default=None,
 90 |                   help='Path to log files generated by runspec wrapper.')
 91 | args = parser.parse_args()[0]
 92 | 
 93 | 
 94 | if not os.path.isdir(args.path):
 95 |     raise Exception("Input path: %s is not a folder" % args.path)
 96 | 
 97 | stats = {}
 98 | 
 99 | for filename in os.listdir(args.path):
100 |     benchName = filename.split('.')[0]
101 |     stats[benchName] = getStatsFromLogFile(filename, args.path)
102 | 
103 | debugPrint(stats)
104 | 
105 | 
106 | """
107 |     functionStats['totalGapSize'] = totalGapSize
108 |     functionStats['averageGapSize'] = float(totalGapSize) / len(functions[functionName])
109 |     functionStats['averageGapPercentage'] = float(averageGapPercentage) / len(functions[functionName])
110 |     functionStats['totalEnumerated'] = totalEnumerated
111 |     functionStats['totalOptimal'] = totalOptimal
112 |     functionStats['totalHigherPerp'] = totalHigherPerp
113 |     functionStats['totalOptimalHigherPerp'] = totalOptimalHigherPerp
114 | 
115 | """
116 | 
117 | with open("slilStats.txt", 'w') as f:
118 |     for benchName in stats:
119 |         f.write("====================\n")
120 |         f.write("Benchmark %s\n" % benchName)
121 |         f.write("====================\n")
122 |         for functionName in stats[benchName]:
123 |             f.write("  Function %s\n  ----------------\n" % functionName)
124 |             f.write("    Total blocks: %d\n" % stats[benchName][functionName]['totalBlocks'])
125 |             f.write("    Total gap size: %d\n" % stats[benchName][functionName]['totalGapSize'])
126 |             f.write("    Average gap size: %.02f\n" % stats[benchName][functionName]['averageGapSize'])
127 |             f.write("    Average percent gap size: %.02f%%\n" % stats[benchName][functionName]['averageGapPercentage'])
128 |             f.write("    Maximum percent gap size: %.02f%%\n" % stats[benchName][functionName]['maxGapPercentage'])
129 |             f.write("    Enumerated: %d\n" % stats[benchName][functionName]['totalEnumerated'])
130 |             f.write("    Optimal: %d\n" % stats[benchName][functionName]['totalOptimal'])
131 |             f.write("    Enumerated and zero cost: %d\n" % stats[benchName][functionName]['totalOptimalAndEnumerated'])
132 |             f.write("    Higher PERP: %d\n" % stats[benchName][functionName]['totalHigherPerp'])
133 |             f.write("    Higher PERP and optimal: %d\n" % stats[benchName][functionName]['totalOptimalHigherPerp'])
134 | 


--------------------------------------------------------------------------------
/util/SLIL/run-filtered-block-tests.py:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This bash script will run performance tests for the SLIL cost function for the following scenarios:
 4 | #   1. Take all blocks
 5 | #   2. Take only optimal blocks
 6 | #   3. Take only zero-cost blocks
 7 | # Each scenario will run the scheduler for all functions and for hot functions.
 8 | 
 9 | SCHED_INI_DIR="/home/chris/csc199/LLVM_DRAGONEGG/Generic/OptSchedCfg/"
10 | 
11 | TEST_DIR="/home/chris/csc199/test_118/"
12 | TEST_DIR_SHARED="/home/chris/csc199/test_118_2017_10_15_chris/"
13 | 
14 | CPU2006_DIR="/media/ssd0/CPU2006"
15 | CPU2006_USER_DIR="/media/ssd0/chris/spec"
16 | 
17 | RUNSPEC_SCRUB="runspec --loose -size=ref -iterations=1 -config=Intel_llvm_3.9_chris.cfg --tune=base -r 1 -I -a scrub all"
18 | 
19 | function clean_dirs() {
20 |     echo runspec --loose -size=ref -iterations=1 -config=Intel_llvm_3.9_chris.cfg --tune=base -r 1 -I -a scrub all
21 |     runspec --loose -size=ref -iterations=1 -config=Intel_llvm_3.9_chris.cfg --tune=base -r 1 -I -a scrub all
22 |     echo rm -R $CPU2006_DIR/wrapper* $CPU2006_USER_DIR/result/*
23 |     rm -R $CPU2006_DIR/wrapper* $CPU2006_USER_DIR/result/*
24 | }
25 | 
26 | #  FUNCTION ARGUMENTS:
27 | #    $1: sched.ini file that contains the preconfigured settings
28 | #    $2: name of test
29 | #    $3: subfolder of result
30 | function run_test() {
31 |     clean_dirs
32 | 
33 |     echo cp "$SCHED_INI_DIR/$1" "$SCHED_INI_DIR/sched.ini"
34 |     cp "$SCHED_INI_DIR/$1" "$SCHED_INI_DIR/sched.ini"
35 | 
36 |     echo python runspec-wrapper-chris.py
37 |     python runspec-wrapper-chris.py
38 | 
39 |     echo cp "$SCHED_INI_DIR/$1" "$SCHED_INI_DIR/sched.ini"
40 |     cp "$SCHED_INI_DIR/$1" "$SCHED_INI_DIR/sched.ini"
41 | 
42 |     RESULT_DIR="$TEST_DIR/$3"
43 |     RESULT_DIR_SHARED="$TEST_DIR_SHARED/$3"
44 |     if [ ! -d "$RESULT_DIR" ]; then
45 |         echo mkdir "$RESULT_DIR"
46 |         mkdir "$RESULT_DIR"
47 |     fi
48 |     if [ ! -d "$RESULT_DIR_SHARED" ]; then
49 |         echo mkdir "$RESULT_DIR_SHARED"
50 |         mkdir "$RESULT_DIR_SHARED"
51 |     fi
52 | 
53 |     echo cp "$CPU2006_DIR/wrapper*" "$SCHED_INI_DIR/sched.ini" "$RESULT_DIR"
54 |     cp -R $CPU2006_DIR/wrapper* $SCHED_INI_DIR/sched.ini $RESULT_DIR
55 | 
56 |     echo cp "$CPU2006_DIR/wrapper*" "$SCHED_INI_DIR/sched.ini" "$RESULT_DIR"
57 |     cp $CPU2006_DIR/wrapperStats/*.dat $SCHED_INI_DIR/sched.ini $RESULT_DIR_SHARED
58 | }
59 | 
60 | if [ ! -d "$TEST_DIR" ]; then
61 |     echo "Output folder $TEST_DIR doesn't exist. Creating it now."
62 |     echo mkdir "$TEST_DIR"
63 |     mkdir "$TEST_DIR"
64 | fi
65 | 
66 | if [ ! -d "$TEST_DIR_SHARED" ]; then
67 |     echo mkdir "$TEST_DIR_SHARED"
68 |     mkdir "$TEST_DIR_SHARED"
69 | fi
70 | 
71 | echo "Using $TEST_DIR to collect log files and stat files."
72 | 
73 | echo cd "$CPU2006_DIR"
74 | cd "$CPU2006_DIR"
75 | echo source shrc
76 | source shrc
77 | 
78 | run_test "test_cases/sched.peak.20.300.ini" "" "peak_300insts/"
79 | run_test "test_cases/sched.slil.20.300.ini" "" "slil_300insts/"
80 | 
81 | run_test "test_cases/sched.peak.20.nolimit.ini" "" "peak_nolimit/"
82 | run_test "test_cases/sched.slil.20.nolimit.ini" "" "slil_nolimit/"
83 | 
84 | clean_dirs
85 | 


--------------------------------------------------------------------------------
/util/aco_analysis/make_pheromone_pdfs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #**************************************************************************************
 3 | #Description:   Generates a pdf from the '.dot' files that are generated by the use
 4 | #               of the ACO_DBG_REGIONS sched.ini option
 5 | #Author:        Paul McHugh
 6 | #Last Update:	July 24, 2020
 7 | #**************************************************************************************
 8 | #If run with one argument which is a directory the script will generate the pdfs in
 9 | #the selected directory. if no argument is present the script generates pdf in the
10 | #current directory
11 | #Current/selected must directory contain the '.dot' files you want to process
12 | #Requires write permission in the current/selected directory.
13 | 
14 | #check to see if we have the proper packages installed
15 | if ! ( dot -V &> /dev/null && pdfunite -v &> /dev/null ) ;
16 | then
17 | 	echo -e "\e[31mYou are missing either graphviz or poppler-utils\e[m"
18 | 	echo "Install the necessary packages with the command:"
19 | 	echo -e "\e[32msudo apt install graphviz poppler-utils\e[m"
20 | 	exit
21 | fi
22 | 
23 | if [ ! -z "$1" ] && [ -d "$1" ];
24 | then
25 | 	cd "$1"
26 | fi
27 | 
28 | regions=`ls *"@initial.dot" *"@iteration"*.dot | cut -d@ -f1 |uniq`
29 | for region in $regions
30 | do
31 | 	for file in $region*.dot
32 | 	do
33 | 		dot -Tpdf -o ${file%.dot}.pdf $file
34 | 	done
35 | 
36 | 	rm -f $region.pdf
37 | 	pdfunite `ls -v $region*.pdf` $region.pdf
38 | done
39 | 
40 | echo $regions
41 | tar -czf result_pdfs.tar.gz $(printf '%q.pdf ' $regions)
42 | 


--------------------------------------------------------------------------------
/util/analyze/__init__.py:
--------------------------------------------------------------------------------
1 | from ._types import Logs, Benchmark, Block
2 | from ._main import parse_args
3 | from .imports import import_cpu2006, import_plaidml, import_shoc, import_utils
4 | from ._utils import *
5 | 


--------------------------------------------------------------------------------
/util/analyze/_main.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import argparse
 3 | import json
 4 | import fnmatch
 5 | from typing import Callable
 6 | 
 7 | from .imports import *
 8 | from ._types import Block, Logs
 9 | 
10 | 
11 | def __load_file(file):
12 |     '''
13 |     Load imported log file (imported via one of the import scripts)
14 |     '''
15 |     return pickle.load(file)
16 | 
17 | 
18 | def __load_filepath(filepath):
19 |     with open(filepath, 'rb') as f:
20 |         return __load_file(f)
21 | 
22 | 
23 | def block_filter(filter: dict) -> Callable[[Block], bool]:
24 |     def log_matches(log, pattern):
25 |         if not isinstance(pattern, dict):
26 |             if isinstance(pattern, str):
27 |                 return fnmatch.fnmatchcase(str(log), pattern)
28 |             return log == pattern
29 | 
30 |         return all(
31 |             k in log and log_matches(log[k], v)
32 |             for k, v in pattern.items()
33 |         )
34 | 
35 |     def blk_filter_f(blk):
36 |         return all(
37 |             event in blk and all(log_matches(log, matcher)
38 |                                  for log in blk[event])
39 |             for event, matcher in filter.items()
40 |         )
41 | 
42 |     return blk_filter_f
43 | 
44 | 
45 | def parse_args(parser: argparse.ArgumentParser, *names, args=None):
46 |     '''
47 |     Parses the argument parser with additional common flags.
48 | 
49 |     Use parse_args(parser) instead of parser.parse_args()
50 | 
51 |     Params:
52 |       - *names - variadic: the strings specifying which arguments should be parsed.
53 |                  These should be python_case, not --flag-case.
54 |       - args - The argv to parse from. Defaults to parsing sys.argv
55 |     '''
56 | 
57 |     parser.add_argument(
58 |         '--benchsuite',
59 |         required=True,
60 |         choices=('spec', 'plaidml', 'shoc', 'pickle'),
61 |         help='Select the benchmark suite which the input satisfies.',
62 |     )
63 |     parser.add_argument(
64 |         '--keep-blocks-if',
65 |         default='true',
66 |         type=json.loads,
67 |         help='Keep blocks matching (JSON format)',
68 |     )
69 | 
70 |     args = parser.parse_args(args)
71 | 
72 |     FILE_PARSERS = {
73 |         'pickle': __load_filepath,
74 |         'spec': import_cpu2006.parse,
75 |         'plaidml': import_plaidml.parse,
76 |         'shoc': import_shoc.parse,
77 |     }
78 |     parser = FILE_PARSERS[args.benchsuite]
79 |     blk_filter = block_filter(args.keep_blocks_if) if args.keep_blocks_if is not True else True
80 | 
81 |     args_dict = vars(args)
82 | 
83 |     # Go through the logs inputs and parse them.
84 |     for name in names:
85 |         result = parser(args_dict[name])
86 |         if blk_filter is not True:
87 |             result = result.keep_blocks_if(blk_filter)
88 |         args_dict[name] = result
89 | 
90 |     return args
91 | 


--------------------------------------------------------------------------------
/util/analyze/_types.py:
--------------------------------------------------------------------------------
  1 | class Logs:
  2 |     '''
  3 |     Abstracts a log file as a collection of benchmarks
  4 | 
  5 |     Properties:
  6 |      - logs.benchmarks: a list of the benchmarks this Logs contains.
  7 |     '''
  8 | 
  9 |     def __init__(self, benchmarks):
 10 |         self.benchmarks = benchmarks
 11 | 
 12 |     def merge(self, rhs):
 13 |         '''
 14 |         Merges the logs from the rhs into this.
 15 | 
 16 |         The rhs must have different benchmarks from this Logs
 17 |         '''
 18 |         in_both = set(self.benchmarks) & set(rhs.benchmarks)
 19 |         if in_both:
 20 |             raise ValueError(
 21 |                 'Cannot merge Logs which share common benchmarks', in_both)
 22 | 
 23 |         self.benchmarks += rhs.benchmarks
 24 | 
 25 |         return self
 26 | 
 27 |     def benchmark(self, name):
 28 |         '''
 29 |         Gets the benchmark with the specified name
 30 |         '''
 31 |         for bench in self.benchmarks:
 32 |             if bench.name == name:
 33 |                 return bench
 34 | 
 35 |         raise KeyError(f'No benchmark `{name}` in this Logs')
 36 | 
 37 |     def __iter__(self):
 38 |         '''
 39 |         Iterates over the blocks in every benchmark
 40 |         '''
 41 |         for bench in self.benchmarks:
 42 |             yield from bench.blocks
 43 | 
 44 |     def __repr__(self):
 45 |         benchmarks = ','.join(b.name for b in self.benchmarks)
 46 |         return f'<Logs({benchmarks})>'
 47 | 
 48 |     def keep_blocks_if(self, p):
 49 |         return Logs([bench.keep_blocks_if(p) for bench in self.benchmarks])
 50 | 
 51 | 
 52 | class Benchmark:
 53 |     '''
 54 |     Abstracts a single benchmark in the logs as a collection of blocks
 55 | 
 56 |     Properties:
 57 |      - bench.name: the name of this benchmark
 58 |      - bench.info: miscellaneous information about this benchmark
 59 |      - bench.blocks: the Blocks in this benchmark
 60 |     '''
 61 | 
 62 |     def __init__(self, info, blocks):
 63 |         self.name = info['name']
 64 |         self.info = info
 65 |         self.blocks = blocks
 66 | 
 67 |     def __iter__(self):
 68 |         return iter(self.blocks)
 69 | 
 70 |     @property
 71 |     def benchmarks(self):
 72 |         return (self,)
 73 | 
 74 |     def __repr__(self):
 75 |         return f'<Benchmark({self.info}, {len(self.blocks)} blocks)>'
 76 | 
 77 |     def keep_blocks_if(self, p):
 78 |         return Benchmark(self.info, [blk for blk in self.blocks if p(blk)])
 79 | 
 80 | 
 81 | class Block:
 82 |     '''
 83 |     Abstracts a single block in the logs as a collection of log messages
 84 | 
 85 |     Handles EVENT logs nicely.
 86 | 
 87 |     Properties:
 88 |      - block.name: the name of this block
 89 |      - block.info: miscellaneous information about this block
 90 |      - block.raw_log: the raw log text for this block
 91 |      - block.events: the events in this block
 92 |     '''
 93 | 
 94 |     def __init__(self, info, raw_log, events):
 95 |         self.name = info['name']
 96 |         self.info = info
 97 |         self.raw_log = raw_log
 98 |         self.events = events
 99 | 
100 |     def single(self, event_name):
101 |         '''
102 |         Gets an event with the specified name, requiring exactly one match
103 | 
104 |         raises AssertionError if there is not exactly one event with the specified name
105 |         '''
106 |         result = self.events[event_name]
107 |         if len(result) != 1:
108 |             raise AssertionError(f'Multiple events for {event_name}')
109 | 
110 |         return result[0]
111 | 
112 |     def __getitem__(self, event_name):
113 |         '''
114 |         Gets the events with the specified name
115 |         '''
116 |         return self.events[event_name]
117 | 
118 |     def get(self, event_name, default=None):
119 |         '''
120 |         Gets the events with the specified name, returning the default if the event doesn't exist
121 |         '''
122 |         return self.events.get(event_name, default)
123 | 
124 |     def __contains__(self, event_name):
125 |         return event_name in self.events
126 | 
127 |     def __iter__(self):
128 |         return iter(self.events)
129 | 
130 |     def __repr__(self):
131 |         return f'<Block({self.info}, {len(self.events)} events)>'
132 | 
133 |     def uniqueid(self):
134 |         return frozenset(self.info.items())
135 | 


--------------------------------------------------------------------------------
/util/analyze/_utils.py:
--------------------------------------------------------------------------------
 1 | from ._types import *
 2 | 
 3 | 
 4 | def sum_dicts(ds):
 5 |     '''
 6 |     Sums ds[N]['Key'] for each key for each dict. Assumes each dict has the same keys
 7 |     E.g. sum_dicts({'a': 1, 'b': 2}, {'a': 2, 'b': 3}) produces {'a': 3, 'b': 5}
 8 |     '''
 9 |     if not ds:
10 |         return {}
11 |     return {k: sum(d[k] for d in ds) for k in ds[0].keys()}
12 | 
13 | 
14 | def foreach_bench(analysis_f, *logs, combine=None):
15 |     '''
16 |     Repeats `analysis_f` for each benchmark in `logs`.
17 |     Also computes the analysis for the entire thing.
18 |     If `combine` is given, uses the function to combine it.
19 |     Otherwise, runs `analysis_f` over the entire thing (takes quite some time)
20 | 
21 |     Returns:
22 |         A dictionary containing the per-benchmark results.
23 |         The keys are the benchmark names.
24 |         The run for the entire thing has a key of 'Total'
25 |     '''
26 | 
27 |     if combine is None:
28 |         combine = lambda *args: analysis_f(*logs)
29 | 
30 |     benchmarks = zip(*[log.benchmarks for log in logs])
31 | 
32 |     bench_stats = {bench[0].name: analysis_f(*bench) for bench in benchmarks}
33 |     total = combine(bench_stats.values())
34 | 
35 |     return {
36 |         # Making a new dict so that the "Total" key can be first.
37 |         'Total': total,
38 |         **bench_stats,
39 |     }
40 | 


--------------------------------------------------------------------------------
/util/analyze/imports/__init__.py:
--------------------------------------------------------------------------------
1 | from . import import_cpu2006
2 | from . import import_plaidml
3 | from . import import_shoc
4 | from . import import_utils
5 | 


--------------------------------------------------------------------------------
/util/analyze/imports/import_cpu2006.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import re
 5 | 
 6 | from . import import_utils
 7 | 
 8 | 
 9 | def parse(file):
10 |     assert os.path.isfile(
11 |         file), 'Only single-file CPU2006 logs supported at this time'
12 | 
13 |     with open(file, 'r') as f:
14 |         return import_utils.parse_multi_bench_file(
15 |             f.read(),
16 |             benchstart=re.compile(r'Building (?P<name>\S*)'),
17 |             filename=re.compile(r'/[fc]lang\b.*\s(\S+\.\S+)\n'))
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     import_utils.import_main(
22 |         parse,
23 |         description='Import single-file CPU2006 logs',
24 |     )
25 | 


--------------------------------------------------------------------------------
/util/analyze/imports/import_plaidml.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import pathlib
 5 | 
 6 | from .._types import Logs
 7 | from . import import_utils
 8 | 
 9 | 
10 | def parse(path):
11 |     assert os.path.isdir(path), 'Point to the plaidbench output directory'
12 | 
13 |     benchmark_output_dir = pathlib.Path(path)
14 |     benchmark_dirs = [x for x in benchmark_output_dir.iterdir() if x.is_dir()]
15 |     benchmark_dirs = list(sorted(benchmark_dirs, key=lambda p: p.name))
16 | 
17 |     result = Logs([])
18 | 
19 |     for benchmark_dir in benchmark_dirs:
20 |         logfiles = list(benchmark_dir.glob('*.log'))
21 |         assert len(logfiles) == 1
22 | 
23 |         with logfiles[0].open('r') as f:
24 |             benchname = benchmark_dir.stem
25 |             result.merge(
26 |                 import_utils.parse_single_bench_file(
27 |                     f.read(), benchname=benchname)
28 |             )
29 | 
30 |     return result
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     import_utils.import_main(
35 |         parse,
36 |         description='Import plaidbench directories',
37 |     )
38 | 


--------------------------------------------------------------------------------
/util/analyze/imports/import_shoc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import re
 5 | import pathlib
 6 | 
 7 | from .._types import Logs
 8 | from . import import_utils
 9 | 
10 | 
11 | def parse(path):
12 |     assert os.path.isdir(path), \
13 |         'Point to the SHOC output directory (not the Logs/)'
14 | 
15 |     benchmark_output_dir = pathlib.Path(path)
16 |     benchmark_logs_dir = benchmark_output_dir / 'Logs'
17 |     assert benchmark_logs_dir.is_dir()
18 | 
19 |     benchmarks = list(sorted(benchmark_logs_dir.glob(
20 |         'dev?_*.err'), key=lambda p: p.name))
21 | 
22 |     result = Logs([])
23 |     benchname_re = re.compile(r'dev._(.*)(\.err)?')
24 | 
25 |     for benchmark in benchmarks:
26 |         with benchmark.open('r') as f:
27 |             benchname = benchname_re.search(benchmark.stem).group(1)
28 |             result.merge(
29 |                 import_utils.parse_single_bench_file(
30 |                     f.read(), benchname=benchname)
31 |             )
32 | 
33 |     return result
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     import_utils.import_main(
38 |         parse,
39 |         description='Import SHOC directories',
40 |     )
41 | 


--------------------------------------------------------------------------------
/util/analyze/imports/import_utils.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import json
  3 | import itertools
  4 | import re
  5 | import sys
  6 | from collections import namedtuple
  7 | 
  8 | from .._types import Logs, Benchmark, Block
  9 | 
 10 | _RE_REGION_INFO = re.compile(r'EVENT:.*ProcessDag.*"name": "(?P<name>[^"]*)"')
 11 | 
 12 | 
 13 | def import_main(parsefn, *, description):
 14 |     import argparse
 15 |     parser = argparse.ArgumentParser(description=description)
 16 |     parser.add_argument(
 17 |         '-o', '--output', required=True, help='The output file to write the imported log format to')
 18 |     parser.add_argument('input', help='The input logs to process')
 19 |     args = parser.parse_args()
 20 | 
 21 |     result = parsefn(args.input)
 22 | 
 23 |     with open(args.output, 'wb') as f:
 24 |         pickle.dump(result, f)
 25 | 
 26 | 
 27 | def parse_multi_bench_file(logtext, *, benchstart, filename=None):
 28 |     benchmarks = []
 29 |     for benchm, nextm in _splititer(benchstart, logtext):
 30 |         bench = _parse_benchmark(benchm.groupdict(), logtext,
 31 |                                  benchm.end(), nextm.start(),
 32 |                                  filenamere=filename)
 33 |         benchmarks.append(bench)
 34 | 
 35 |     return Logs(benchmarks)
 36 | 
 37 | 
 38 | def parse_single_bench_file(logtext, *, benchname, filename=None):
 39 |     return Logs([
 40 |         _parse_benchmark(
 41 |             {'name': benchname},
 42 |             logtext, 0, len(logtext),
 43 |             filenamere=filename,
 44 |         )
 45 |     ])
 46 | 
 47 | 
 48 | _FileInfo = namedtuple('_FileInfo', ('filename', 'from_pos'))
 49 | 
 50 | 
 51 | def _each_cons(iterable, n):
 52 |     '''
 53 |     Iterates over each consecutive n items of the iterable.
 54 | 
 55 |     _each_cons((1, 2, 3, 4), 2) # (1, 2), (2, 3), (3, 4)
 56 |     '''
 57 |     iters = [None] * n
 58 |     iters[0] = iter(iterable)
 59 |     for i in range(1, n):
 60 |         iters[i - 1], iters[i] = itertools.tee(iters[i - 1])
 61 |         next(iters[i], None)
 62 |     return zip(*iters)
 63 | 
 64 | 
 65 | class _DummyEnd:
 66 |     def __init__(self, length):
 67 |         self._end = length - 1
 68 | 
 69 |     def start(self):
 70 |         return self._end
 71 | 
 72 |     def end(self):
 73 |         return self._end
 74 | 
 75 | 
 76 | def _splititer(regex, text, pos=0, endpos=None):
 77 |     '''
 78 |     'Splits' the string by the regular expression, using an iterable.
 79 |     Returns both where the regex matches and where it matched next (or the end).
 80 |     '''
 81 |     if endpos is None:
 82 |         endpos = len(text) - 1
 83 | 
 84 |     return _each_cons(
 85 |         itertools.chain(regex.finditer(text, pos, endpos),
 86 |                         (_DummyEnd(endpos + 1),)),
 87 |         2
 88 |     )
 89 | 
 90 | 
 91 | def _parse_benchmark(info, logtext: str, start, end, *, filenamere):
 92 |     NAME = info['name']
 93 | 
 94 |     blocks = []
 95 | 
 96 |     if filenamere and filenamere.search(logtext, start, end):
 97 |         files = [
 98 |             *(_FileInfo(filename=r.group(1), from_pos=r.end())
 99 |               for r in filenamere.finditer(logtext, start, end)),
100 |             _FileInfo(filename=None, from_pos=len(logtext)),
101 |         ][::-1]
102 |     else:
103 |         files = [
104 |             _FileInfo(filename=None, from_pos=start),
105 |             _FileInfo(filename=None, from_pos=len(logtext)),
106 |         ][::-1]
107 | 
108 |     blocks = []
109 | 
110 |     for regionm, nextm in _splititer(_RE_REGION_INFO, logtext, start, end):
111 |         assert regionm.end() > files[-1].from_pos
112 |         if regionm.end() > files[-2].from_pos:
113 |             files.pop()
114 | 
115 |         try:
116 |             filename = files[-1].filename
117 |         except NameError:
118 |             filename = None
119 | 
120 |         regioninfo = {
121 |             'name': regionm['name'],
122 |             'file': filename,
123 |             'benchmark': NAME,
124 |         }
125 |         block = _parse_block(regioninfo, logtext,
126 |                              regionm.start() - 1, nextm.start())
127 |         blocks.append(block)
128 | 
129 |     return Benchmark(info, blocks)
130 | 
131 | 
132 | def _parse_block(info, logtext: str, start, end):
133 |     events = _parse_events(logtext, start, end)
134 |     raw_log = logtext[start:end]
135 | 
136 |     return Block(info, raw_log, events)
137 | 
138 | 
139 | _RE_EVENT_LINE = re.compile(r'\nEVENT: (.*)')
140 | 
141 | 
142 | def _parse_events(block_log, start=0, end=None):
143 |     '''
144 |     Returns a `dict[event_id --> list[event-json]]` of the events in the given log.
145 | 
146 |     `EVENT: {"event_id": "some_id", "value"}`
147 |     becomes `{"some_id": [{"event_id": "some_id", "arg": "value"}, ...], ...}`
148 | 
149 |     If there is only one event of each id, pass the result through
150 |     `parse_as_singular_events(...)` to unwrap the lists.
151 |     '''
152 |     if end is None:
153 |         end = len(block_log)
154 | 
155 |     event_lines = _RE_EVENT_LINE.findall(block_log, start, end)
156 |     events = '[' + ',\n'.join(event_lines) + ']'
157 | 
158 |     try:
159 |         parsed = json.loads(events)
160 |     except json.JSONDecodeError:
161 |         print(events, file=sys.stderr)
162 |         raise
163 | 
164 |     result = dict()
165 | 
166 |     for log in parsed:
167 |         result.setdefault(log['event_id'], []).append(log)
168 | 
169 |     return result
170 | 


--------------------------------------------------------------------------------
/util/analyze/lib/compile_times.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import csv
 4 | import re
 5 | import argparse
 6 | import sys
 7 | 
 8 | import analyze
 9 | from analyze import Block, foreach_bench
10 | 
11 | 
12 | def _block_time(block: Block):
13 |     end = block.single('ScheduleVerifiedSuccessfully')['time']
14 |     start = block.single('ProcessDag')['time']
15 |     return end - start
16 | 
17 | 
18 | def instruction_scheduling_time(logs):
19 |     return sum(_block_time(blk) for blk in logs)
20 | 
21 | 
22 | def total_compile_time_seconds(logs):
23 |     last_logs = logs.benchmarks[-1].blocks[-1].raw_log
24 |     m = re.search(r'(\d+) total seconds elapsed', last_logs)
25 |     assert m, \
26 |         'Logs must contain "total seconds elapsed" output by the SPEC benchmark suite'
27 | 
28 |     return m.group(1)
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     parser = argparse.ArgumentParser()
33 |     parser.add_argument('--variant', choices=('sched', 'total'),
34 |                         help='Which timing variant to use')
35 |     parser.add_argument('logs', help='The logs to analyze')
36 |     args = analyze.parse_args(parser, 'logs')
37 | 
38 |     fn = total_compile_time_seconds if args.variant == 'total' else instruction_scheduling_time
39 |     results = foreach_bench(fn, args.logs, combine=sum)
40 |     writer = csv.DictWriter(sys.stdout, fieldnames=results.keys())
41 |     writer.writeheader()
42 |     writer.writerow(results)
43 | 


--------------------------------------------------------------------------------
/util/misc/count-boundary-spills.py:
--------------------------------------------------------------------------------
 1 | # /bin/python3
 2 | # Run this script with a CPU2006 logfile as the only argument.
 3 | # When using RegAllocFast, find the total number of spills and the proportion of
 4 | # those spills that are added at region and block boundaries.
 5 | 
 6 | import re
 7 | import sys
 8 | 
 9 | RE_FUNC = re.compile('Function: (.*?)\n')
10 | RE_TOTAL_SPILLS = re.compile('END FAST RA: Number of spills: (\d+)\n')
11 | RE_CALL_BOUNDARY_STORES = re.compile('Call Boundary Stores in function: (\d+)\n')
12 | RE_BLOCK_BOUNDARY_STORES = re.compile('Block Boundary Stores in function: (\d+)\n')
13 | RE_LIVE_IN_LOADS = re.compile('Live-In Loads in function: (\d+)\n')
14 | 
15 | totalSpills = 0
16 | totalCallBoundaryStores = 0
17 | totalBlockBoundaryStores = 0
18 | totalLiveInLoads = 0
19 | totalFuncs = 0
20 | #funcs = {}
21 | 
22 | if __name__ == '__main__':
23 |     with open(sys.argv[1]) as inputLog:
24 |         for line in inputLog.readlines():
25 |             searchTotalSpills = RE_TOTAL_SPILLS.findall(line)
26 |             searchCallBoundaryStores = RE_CALL_BOUNDARY_STORES.findall(line)
27 |             searchBlockBoundaryStores = RE_BLOCK_BOUNDARY_STORES.findall(line)
28 |             searchLiveInLoads = RE_LIVE_IN_LOADS.findall(line)
29 |             # TDOD remove
30 |             #searchFunc = RE_FUNC.findall(line)
31 |             #if searchFunc != []:
32 |             #    if searchFunc[0] in funcs:
33 |             #        print(searchFunc[0] + 'Is a copy')
34 |             #    else:
35 |             #        funcs[searchFunc[0]] = 0
36 |             if searchTotalSpills != []:
37 |                 totalSpills += int(searchTotalSpills[0])
38 |                 totalFuncs+=1
39 |             elif searchCallBoundaryStores != []:
40 |                 totalCallBoundaryStores += int(searchCallBoundaryStores[0])
41 |             elif searchBlockBoundaryStores != []:
42 |                 totalBlockBoundaryStores += int(searchBlockBoundaryStores[0])
43 |             elif searchLiveInLoads != []:
44 |                 totalLiveInLoads += int(searchLiveInLoads[0])
45 | 
46 |     print("Total Spills: " + str(totalSpills))
47 |     print("Total Call Boundary Stores: " + str(totalCallBoundaryStores))
48 |     print("Total Block Boundary Stores: " + str(totalBlockBoundaryStores))
49 |     print("Total Live-In Loads: " + str(totalLiveInLoads))
50 |     print("Total funcs: " + str(totalFuncs))
51 | 


--------------------------------------------------------------------------------
/util/misc/count-nodes.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import mmap
 3 | import optparse
 4 | import os
 5 | import json
 6 | 
 7 | NODE_COUNT_RE = re.compile(r'EVENT: (.*"event_id": "NodeExamineCount".*)')
 8 | 
 9 | def getNodeCount(fileName):
10 |     count = 0
11 |     with open(fileName) as bff:
12 |         bffm = mmap.mmap(bff.fileno(), 0, access=mmap.ACCESS_READ)
13 | 
14 |         for match in NODE_COUNT_RE.finditer(bffm):
15 |             count += json.loads(match.group(1))['count']
16 | 
17 |         bffm.close()
18 | 
19 |     return count
20 | 
21 | parser = optparse.OptionParser(
22 |     description='Wrapper around runspec for collecting spill counts.')
23 | parser.add_option('-p', '--path',
24 |                   metavar='path',
25 |                   default=None,
26 |                   help='Log file.')
27 | parser.add_option('--isfolder',
28 |                   action='store_true',
29 |                   help='Specify if parsing a folder.')
30 | 
31 | args = parser.parse_args()[0]
32 | 
33 | total = 0
34 | 
35 | if args.isfolder:
36 |     if not os.path.isdir(args.path):
37 |         raise Error("Please specify a valid folder.")
38 |     for filename in os.listdir(args.path):
39 |         total += getNodeCount(os.path.join(args.path, filename))
40 | else:
41 |     if not os.path.isfile(args.path):
42 |         raise Error("Please specify a valid log file.")
43 |     total += getNodeCount(args.path)
44 | 
45 | print(total)
46 | 


--------------------------------------------------------------------------------
/util/misc/ddg2dot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import argparse
  3 | import sys
  4 | import re
  5 | 
  6 | parser = argparse.ArgumentParser(description='Convert data_dep WriteToFile format to a .dot file')
  7 | parser.add_argument('input', help='The WriteToFile format file to convert. Input a single hyphen (-) to read from stdin')
  8 | parser.add_argument('-o', '--output', help='The destination to write to. Defaults to stdout')
  9 | parser.add_argument('--filter-weights', nargs='*', default=[], help='filter out weights with the respective values')
 10 | parser.add_argument(
 11 |     '--base', help='Consider the edges from this other .ddg when layouting. Those edges will be made invisible.')
 12 | 
 13 | args = parser.parse_args()
 14 | 
 15 | if args.input == '-':
 16 |     infile = sys.stdin
 17 | else:
 18 |     infile = open(args.input, 'r')
 19 | 
 20 | filtered_weights = set(int(x) for x in args.filter_weights)
 21 | 
 22 | text = infile.read()
 23 | infile.close()
 24 | 
 25 | if args.base:
 26 |     with open(args.base) as f:
 27 |         basetext = f.read()
 28 | else:
 29 |     basetext = ''
 30 | 
 31 | NODE_RE = re.compile(r'node (?P<number>\d+) "(?P<name>.*?)"(\s*"(?P<other_name>.*?)")?')
 32 | EDGE_RE = re.compile(r'dep (?P<from>\d+) (?P<to>\d+) "(?P<type>.*?)" (?P<weight>\d+)')
 33 | 
 34 | # Holds the resulting strings as a list of the lines.
 35 | result = ['digraph G {\n']
 36 | 
 37 | # Create the nodes in the graph
 38 | for match in NODE_RE.finditer(text):
 39 |     num = match['number']
 40 |     name = match['name']
 41 |     if name == 'artificial':  # Prettify entry/exit names
 42 |         name = ['exit', 'entry'][match['other_name'] == '__optsched_entry']
 43 | 
 44 |     # Add the node to the graph. Include a node to make it clear what this is
 45 |     result.append(f'    n{num} [label="{name}:n{num}"];\n')
 46 | 
 47 | result.append('\n')
 48 | 
 49 | 
 50 | def create_edge_attrs(**attrs):
 51 |     if not attrs:
 52 |         return ''
 53 |     attrtext = ' '.join(f'{key}="{value}"' for key, value in attrs.items())
 54 |     return f' [{attrtext}]'
 55 | 
 56 | 
 57 | def create_label(filtered_weights, weight, type_):
 58 |     # The additional label text if we want to display the weight
 59 |     # (that is, if the weight is not filtered out)
 60 |     weight_label = '' if int(weight) in filtered_weights else ':' + weight
 61 |     # The actual label text
 62 |     return weight_label if type_ == 'data' else f'{type_}{weight_label}'
 63 | 
 64 | 
 65 | def create_edge(from_, to, **attrs):
 66 |     return f'    n{from_} -> n{to}{create_edge_attrs(**attrs)};\n'
 67 | 
 68 | 
 69 | edges = set()
 70 | 
 71 | # Create the edges in the graph
 72 | for match in EDGE_RE.finditer(text):
 73 |     from_ = match['from']
 74 |     to = match['to']
 75 |     type_ = match['type']
 76 |     weight = match['weight']
 77 | 
 78 |     result.append(
 79 |         create_edge(
 80 |             from_, to,
 81 |             label=create_label(filtered_weights, weight, type_),
 82 |         )
 83 |     )
 84 |     edges.add((from_, to))
 85 | 
 86 | for match in EDGE_RE.finditer(basetext):
 87 |     from_ = match['from']
 88 |     to = match['to']
 89 |     type_ = match['type']
 90 |     weight = match['weight']
 91 | 
 92 |     if (from_, to) not in edges:
 93 |         result.append(
 94 |             create_edge(
 95 |                 from_, to,
 96 |                 label=create_label(filtered_weights, weight, type_),
 97 |                 style="invis",
 98 |             )
 99 |         )
100 | 
101 | # Graph is now finished:
102 | result.append('}\n')
103 | 
104 | filecontents = ''.join(result)
105 | 
106 | if args.output:
107 |     with open(args.output, 'w') as f:
108 |         print(filecontents, file=f)
109 | else:
110 |     print(filecontents)
111 | 


--------------------------------------------------------------------------------
/util/misc/extract-script.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | import os, sys
 4 | import shutil
 5 | import re
 6 | 
 7 | parser = argparse.ArgumentParser(
 8 |     description='Extract a standalone version of an OptSched script')
 9 | parser.add_argument(
10 |     'script', help='The path to script to extract a standalone version of')
11 | parser.add_argument(
12 |     'output', help='The output file to write the extracted script to')
13 | parser.add_argument('--optsched', help='The path to the OptSched directory, '
14 |                     'if this extract-script.py is not in its original location')
15 | 
16 | args = parser.parse_args()
17 | 
18 | OPTSCHED_ROOT = args.optsched if args.optsched else os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
19 | COMMON_FNS = os.path.join(OPTSCHED_ROOT, 'util', 'readlogs', '__init__.py')
20 | 
21 | with open(args.script, 'r') as f:
22 |     script = f.read()
23 | 
24 | with open(COMMON_FNS, 'r') as f:
25 |     readlogs = f.read()
26 | 
27 | 
28 | def replace_module(modulename, modulecontent, script):
29 |     return re.sub(
30 |         r'^(?:(?:\s*from\s+{0}\s+import.*)|(?:\s*import\s+{0}.*))$'.format(re.escape(modulename)),
31 |         modulecontent, script, flags=re.MULTILINE)
32 | 
33 | script = script.replace('sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n', '')
34 | script = replace_module('readlogs', readlogs, script)
35 | 
36 | if 'readlogs' in script:
37 |     sys.exit('Failed to make {} standalone. The "readlogs" library couldn\'t be'
38 |         ' replaced.'.format(args.script))
39 | 
40 | if os.path.isdir(args.output):
41 |     # Allow cp-like behavior of "copy to this directory" rather than requiring a
42 |     # name for the script.
43 |     output = os.path.join(args.output, os.path.basename(args.script))
44 | else:
45 |     output = args.output
46 |     # Allow placing in a non-existent directory
47 |     os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True)
48 | 
49 | with open(output, 'w') as f:
50 |     f.write(script)
51 | # Try to keep all permissions
52 | shutil.copystat(args.script, output)
53 | 


--------------------------------------------------------------------------------
/util/misc/findblock.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | import argparse
 4 | 
 5 | parser = argparse.ArgumentParser(description='Search spills.dat (from runspec-wrapper) to find the benchmark for a block')
 6 | parser.add_argument('spills', help='The spills.dat file to search in. - for stdin')
 7 | parser.add_argument('blocks', help='The blocks to search for. This may include the `:##` part, or it may just be the mangled function name', nargs='*')
 8 | 
 9 | result = parser.parse_args()
10 | 
11 | with open(result.spills, 'r') as f:
12 |     file = f.read()
13 | 
14 | fns = (block.split(':')[0] for block in result.blocks)
15 | 
16 | fn_locs = [file.find(fn) for fn in fns]
17 | fn_benchmarks = [file.rfind(':', 0, fnindex) for fnindex in fn_locs]
18 | fn_benchmark_spans = [(file.rfind('\n', 0, e), e) for e in fn_benchmarks]
19 | fn_benchmarks = [file[b + 1:e] for (b, e) in fn_benchmark_spans]
20 | 
21 | print('\n'.join(fn_benchmarks))
22 | 


--------------------------------------------------------------------------------
/util/misc/func-stats.py:
--------------------------------------------------------------------------------
 1 | #!/bin/python3
 2 | # Find the number of functions that are compiled more than once by LLVM.
 3 | 
 4 | import sys
 5 | import re
 6 | import json
 7 | 
 8 | def get_events_of_id(logs, event_id):
 9 |     event_start = 'EVENT: {"event_id": "{}"'.format(event_id)
10 |     lines = logs.splitlines()
11 |     event_lines = [line.split(' ', 1)[1] for line in lines if line.startswith(event_start)]
12 |     return list(map(json.loads, event_lines))
13 | 
14 | RE_NEW_BENCH = re.compile(r'(\d+)\.(.*) base \.exe default')
15 | RE_BLOCK = re.compile(r'INFO: Processing DAG (.*) with (\d+) insts')
16 | 
17 | if __name__ == "__main__":
18 |     with open(sys.argv[1]) as logfile:
19 |         blocks = {}
20 |         bench = None
21 |         totalRepeats = 0
22 |         totalMismatches = 0
23 |         for line in logfile.readlines():
24 |             matchBench = RE_NEW_BENCH.findall(line)
25 |             matchBlock = get_events_of_id(line)
26 | 
27 |             if matchBench != []:
28 |                 if bench:
29 |                     print('In bench ' + bench + '  found ' + str(totalRepeats) + ' repeat blocks and ' + str(totalMismatches) + ' mismatches in length.')
30 |                 funcs = {}
31 |                 totalRepeats = 0
32 |                 totalMismatches = 0
33 |                 bench = matchBench[0][1]
34 | 
35 |             elif matchBlock != []:
36 |                 name = matchBlock[0][0]
37 |                 insts = matchBlock[0][1]
38 | 
39 |                 if name in blocks:
40 |                     if blocks[name] != insts:
41 |                         totalMismatches += 1
42 | 
43 |                     totalRepeats += 1
44 |                     continue
45 |                 else:
46 |                     blocks[name] = insts
47 | 


--------------------------------------------------------------------------------
/util/misc/rp-compare.py:
--------------------------------------------------------------------------------
 1 | #/usr/bin/python3
 2 | # Calculate how often OptSched's register pressure estimates match LLVM's
 3 | # You must compile OptSched with IS_DEBUG_PEAK_PRESSURE flag enabled.
 4 | 
 5 | import sys
 6 | import re
 7 | 
 8 | # The number of register types.
 9 | MAX_REG_TYPES = 30
10 | 
11 | RP_OPT_INFO = re.compile(r'INFO: OptSchPeakRegPres Index (\d+) Name (.+) Peak (\d+) Limit (\d+)')
12 | RP_AFT_INFO = re.compile(r'INFO: PeakRegPresAfter  Index (\d+) Name (.+) Peak (\d+) Limit (\d+)')
13 | RP_DAG_NAME = re.compile(r'INFO: Processing DAG (.+) with')
14 | 
15 | totalBlocks = 0
16 | totalMismatches = 0
17 | majorMismatches = 0
18 | 
19 | with open(str(sys.argv[1])) as logfile:
20 |     log = logfile.read()
21 |     blocks = log.split("INFO: ********** Opt Scheduling **********")
22 | 
23 | for block in blocks:
24 |     optSchedPressures = [None]*MAX_REG_TYPES
25 |     llvmPressures = [None]*MAX_REG_TYPES
26 |     if (len(RP_DAG_NAME.findall(block)) == 0):
27 |         continue;
28 | 
29 |     totalBlocks+=1
30 |     blockName = RP_DAG_NAME.findall(block)[0]
31 | 
32 |     for matchOpt in RP_OPT_INFO.finditer(block):
33 |         index = int(matchOpt.group(1))
34 |         name = matchOpt.group(2)
35 |         peak = matchOpt.group(3)
36 |         limit = matchOpt.group(4)
37 |         optSchedPressures[index] = {}
38 |         optSchedPressures[index]['name'] = name
39 |         optSchedPressures[index]['peak'] = peak
40 |         optSchedPressures[index]['limit'] = limit
41 | 
42 |     for matchLLVM in RP_AFT_INFO.finditer(block):
43 |         index = int(matchLLVM.group(1))
44 |         name = matchLLVM.group(2)
45 |         peak = matchLLVM.group(3)
46 |         limit = matchLLVM.group(4)
47 |         llvmPressures[index] = {}
48 |         llvmPressures[index]['name'] = name
49 |         llvmPressures[index]['peak'] = peak
50 |         llvmPressures[index]['limit'] = limit
51 | 
52 |     for i in range(MAX_REG_TYPES):
53 |         optP = optSchedPressures[i]
54 |         llvmP = llvmPressures[i]
55 | 
56 |         if (optP['peak'] != llvmP['peak']):
57 |             print('Mismatch in block ' + blockName + '.')
58 |             print('Reg type with mismatch ' + optP['name'] + \
59 |                   ' Limit ' + optP['limit'] + ' Peak OptSched ' + optP['peak'] + \
60 |                   ' Peak LLVM ' + llvmP['peak'] + '.')
61 |             totalMismatches+=1
62 |             # A major mismatch occurs when peak pressure is over physical limit.
63 |             if (max(int(optP['peak']), int(llvmP['peak'])) > int(optP['limit'])):
64 |                 print('Major mismatch!')
65 |                 majorMismatches+=1
66 | 
67 | print('Total blocks processed ' + str(totalBlocks) + '.')
68 | print('Total mismatches ' + str(totalMismatches) + '.')
69 | print('Total major mismatches ' + str(majorMismatches) + '.')
70 | 


--------------------------------------------------------------------------------
/util/misc/spill-compare.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # Compare two log files using the OptSched scheduler with simulate register
 3 | # allocation enabled. Find instances where a reduction in cost does not
 4 | # correspond with a reduction in spills.
 5 | 
 6 | import os, sys
 7 | 
 8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 9 | from readlogs import *
10 | 
11 | regions = {}
12 | totalBlocks = 0
13 | totalMismatches = 0
14 | lowestLength = sys.maxsize
15 | smallestFoundRegion = ''
16 | foundRegion = False
17 | 
18 | 
19 | with open(str(sys.argv[1])) as logfile:
20 |     log1 = logfile.read()
21 |     blocks = [block for block in parse_blocks(log1) if 'BestResult' in block]
22 |     for block in blocks:
23 |         if not 'CostLowerBound' not in block:
24 |             print("WARNING: Block does not have a logged lower bound. Skipping block: " + block,
25 |                 out=sys.stderr)
26 |             continue
27 | 
28 |         totalBlocks += 1
29 | 
30 |         lowerBound = block['CostLowerBound']['cost']
31 |         bestCostInfo = block['BestResult']
32 |         regionName = bestCostInfo['name']
33 |         regionCostBest = bestCostInfo['cost']
34 |         regionLengthBest = bestCostInfo['length']
35 | 
36 |         if 'BestLocalRegAllocSimulation' not in block:
37 |             print(regionName)
38 | 
39 |         regionCostHeuristic = block['HeuristicResult']['spill_cost']
40 |         regionSpillsBest = block['BestLocalRegAllocSimulation']['num_spills']
41 |         regionSpillsHeuristic = block['HeuristicLocalRegAllocSimulation']['num_spills']
42 | 
43 |         if regionCostBest < regionCostHeuristic and regionSpillsBest > regionSpillsHeuristic:
44 |             totalMismatches+=1
45 |             print("Found Region: "  + regionName + " With Length: " + str(regionLengthBest))
46 |             print("Best Cost: " + str(regionCostBest) + " Heuristic Cost: " + str(regionCostHeuristic))
47 |             print("Best Cost (Absolute): " + (lowerBound + regionCostBest))
48 |             print("Best Spills: " + str(regionSpillsBest) + " Heurisitc Spills: " + str(regionSpillsHeuristic))
49 |             if regionLengthBest < lowestLength:
50 |                 foundRegion = True
51 |                 smallestFoundRegion = regionName
52 |                 lowestLength = regionLengthBest
53 | 
54 |     if (foundRegion):
55 |         print("Smallest region with mismatch is: " + str(smallestFoundRegion) + " with length " + str(lowestLength))
56 | 
57 |     print("Processed " + str(totalBlocks) + " blocks")
58 |     print("Found " + str(totalMismatches) + " mismatches")
59 | 


--------------------------------------------------------------------------------
/util/misc/spill-count-csv.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import re
 3 | import os
 4 | 
 5 | BenchmarkRegex = re.compile(r'(.*?):$')
 6 | FunctionRegex = re.compile(r' +(\d+) (.*?)$')
 7 | DEBUG = False
 8 | 
 9 | def debugPrint(str):
10 |     if DEBUG: print(str)
11 | 
12 | if len(sys.argv) != 2:
13 |     raise Exception("Invalid number of arguments. Expected 1")
14 | 
15 | if not os.path.isfile(sys.argv[1]):
16 |     raise Exception("%s is not a file!" % sys.argv[1])
17 | 
18 | with open(sys.argv[1]) as f:
19 |     benchName = ""
20 |     for line in f:
21 |         match = BenchmarkRegex.match(line)
22 |         if not match is None:
23 |             benchName = match.group(1)
24 |             debugPrint("Found benchmark %s" % benchName)
25 |             continue
26 |         match = FunctionRegex.match(line)
27 |         if not match is None:
28 |             debugPrint("Found function %s with %d spills" % (match.group(2), int(match.group(1))))
29 |             sys.stdout.write("%s,%s,%d\n" % (benchName, match.group(2), int(match.group(1))))
30 |         else:
31 |             debugPrint("Not a match: %s" % line)
32 | 


--------------------------------------------------------------------------------
/util/plaidbench/run-plaidbench.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import subprocess
 3 | import argparse
 4 | import os
 5 | 
 6 | #**************************************************************************************
 7 | #Description:	Run all plaidbench benchmarks and redirect output to a directory
 8 | #               that will contain the log file for each benchmark.
 9 | #Author:	    Austin Kerbow
10 | #Modified By:	Justin Bassett
11 | #Last Update:	May 4, 2020
12 | #**************************************************************************************
13 | # Requires write permission in the current directory.
14 | #
15 | # OUTPUT:
16 | #   1.) Directories containing the log for each benchmark located in their own
17 | #       directory.
18 | 
19 | NETWORKS = (
20 |     "densenet121",
21 |     "densenet169",
22 |     "densenet201",
23 |     "inception_resnet_v2",
24 |     "inception_v3",
25 |     "mobilenet",
26 |     "nasnet_large",
27 |     "nasnet_mobile",
28 |     "resnet50",
29 |     "vgg16",
30 |     "vgg19",
31 |     "xception",
32 |     "imdb_lstm",
33 | )
34 | 
35 | EXAMPLES = 4096
36 | BATCH_SIZE = 16
37 | 
38 | parser = argparse.ArgumentParser(description='Run all plaidbench benchmarks, redirecting output to a directory which contains the log file for each benchmark')
39 | parser.add_argument('-n', '--num-iterations', type=int, default=1, help='Number of iterations')
40 | parser.add_argument('output', metavar='DIR', help='The output directory base path')
41 | 
42 | args = parser.parse_args()
43 | 
44 | NUM_ITERATIONS = args.num_iterations
45 | DIRECTORY_BASE_PATH = args.output
46 | 
47 | for i in range(NUM_ITERATIONS):
48 |     DIR_NAME = DIRECTORY_BASE_PATH + '-' + str(i)
49 | 
50 |     for network in NETWORKS:
51 |         RESULT_DIR = os.path.join(DIR_NAME, network)
52 |         os.makedirs(RESULT_DIR, exist_ok=True)
53 | 
54 |         with open(os.path.join(RESULT_DIR, network + '.log'), 'w') as outfile:
55 |             subprocess.run(['plaidbench', '--examples', str(EXAMPLES),
56 |                 '--batch-size', str(BATCH_SIZE),
57 |                 '--results', DIR_NAME,
58 |                 'keras', '--no-fp16', '--no-train', network,
59 |                 ], check=True, stderr=subprocess.STDOUT, stdout=outfile)
60 | 


--------------------------------------------------------------------------------
/util/readlogs/__init__.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | def split_blocks(log):
 4 |     '''
 5 |     Splits the log into the individual blocks.
 6 |     '''
 7 |     return log.split("INFO: ********** Opt Scheduling **********")[1:]
 8 | 
 9 | def parse_events(block_log):
10 |     '''
11 |     Returns a `dict[event_id --> list[event-json]]` of the events in the given log.
12 | 
13 |     `EVENT: {"event_id": "some_id", "value"}`
14 |     becomes `{"some_id": [{"event_id": "some_id", "arg": "value"}, ...], ...}`
15 | 
16 |     If there is only one event of each id, pass the result through
17 |     `parse_as_singular_events(...)` to unwrap the lists.
18 |     '''
19 |     lines = block_log.splitlines()
20 |     event_lines = [line.split(' ', 1)[1] for line in lines if line.startswith('EVENT:')]
21 |     parsed = list(map(json.loads, event_lines))
22 |     result = dict()
23 | 
24 |     for log in parsed:
25 |         result.setdefault(log['event_id'], []).append(log)
26 | 
27 |     return result
28 | 
29 | def parse_blocks(log):
30 |     '''
31 |     Splits the block into individual blocks and parses each block via parse_events().
32 |     '''
33 |     return [parse_events(block) for block in split_blocks(log)]
34 | 
35 | def keep_only_singular_events(logs):
36 |     '''
37 |     Converts a the event `dict[event_id --> list[event-json]]` to
38 |     `dict[event_id --> event-json]` dropping any event which has a duplicated event_id.
39 |     '''
40 |     result = dict()
41 |     for k, v in logs.items():
42 |         if len(v) == 1: result[k] = v[0]
43 |     return result
44 | 
45 | def keep_only_first_event(logs):
46 |     '''
47 |     Converts a the event `dict[event_id --> list[event-json]]` to
48 |     `dict[event_id --> event-json]` keeping only the first of any event for a given event_id.
49 |     '''
50 |     result = dict()
51 |     for k, v in logs.items():
52 |         result[k] = v[0]
53 |     return result
54 | 
55 | def parse_as_singular_events(logs):
56 |     '''
57 |     Converts a the event `dict[event_id --> list[event-json]]` to
58 |     `dict[event_id --> event-json]` requiring exactly one event per event_id.
59 |     '''
60 |     for k, v in logs.items():
61 |         if len(v) != 1: raise AssertionError('Duplicate log events for event ' + k)
62 |     return {k: v[0] for k, v in logs.items()}
63 | 


--------------------------------------------------------------------------------