├── .clang-format ├── .clang-tidy ├── .envrc ├── .github └── workflows │ ├── build-methods.yml │ ├── checks.yml │ └── runclangformat.yml ├── .gitignore ├── .pep8 ├── BUILD.md ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake ├── GetLocalLLVM.cmake └── superbuild │ ├── CMakeLists.txt │ └── flang.cmake ├── documentation └── flang │ ├── README.md │ └── hello.f ├── example ├── helloworld.cpp ├── llvm7-CPU2006-cfg │ ├── hotfuncs.ini │ ├── machine_model.cfg │ └── sched.ini └── optsched-cfg │ ├── hotfuncs.ini │ ├── machine_model.cfg │ ├── occupancy_limits.ini │ └── sched.ini ├── include └── opt-sched │ └── Scheduler │ ├── OptSchedDDGWrapperBase.h │ ├── OptSchedTarget.h │ ├── aco.h │ ├── array_ref2d.h │ ├── bb_spill.h │ ├── bit_vector.h │ ├── buffers.h │ ├── config.h │ ├── data_dep.h │ ├── defines.h │ ├── enumerator.h │ ├── gen_sched.h │ ├── graph.h │ ├── graph_trans.h │ ├── graph_trans_ilp.h │ ├── graph_trans_ilp_occupancy_preserving.h │ ├── hash_table.h │ ├── hist_table.h │ ├── list_sched.h │ ├── lnkd_lst.h │ ├── logger.h │ ├── machine_model.h │ ├── mem_mngr.h │ ├── random.h │ ├── ready_list.h │ ├── reg_alloc.h │ ├── register.h │ ├── relaxed_sched.h │ ├── sched_basic_data.h │ ├── sched_region.h │ ├── stats.h │ └── utilities.h ├── lib ├── CMakeLists.txt ├── Scheduler │ ├── aco.cpp │ ├── bb_spill.cpp │ ├── buffers.cpp │ ├── config.cpp │ ├── data_dep.cpp │ ├── enumerator.cpp │ ├── gen_sched.cpp │ ├── graph.cpp │ ├── graph_trans.cpp │ ├── graph_trans_ilp.cpp │ ├── graph_trans_ilp_occupancy_preserving.cpp │ ├── hist_table.cpp │ ├── list_sched.cpp │ ├── logger.cpp │ ├── machine_model.cpp │ ├── random.cpp │ ├── ready_list.cpp │ ├── reg_alloc.cpp │ ├── register.cpp │ ├── relaxed_sched.cpp │ ├── sched_basic_data.cpp │ ├── sched_region.cpp │ ├── stats.cpp │ └── utilities.cpp └── Wrapper │ ├── AMDGPU │ ├── GCNOptSched.cpp │ ├── GCNOptSched.h │ ├── GCNOptSchedReg.h │ ├── OptSchedDDGWrapperGCN.cpp │ ├── OptSchedDDGWrapperGCN.h │ └── OptSchedGCNTarget.cpp │ ├── OptSchedDDGWrapperBasic.cpp │ ├── OptSchedDDGWrapperBasic.h │ ├── OptSchedGenericTarget.cpp │ ├── OptSchedMachineWrapper.cpp │ ├── OptSchedMachineWrapper.h │ ├── OptSchedReg.h │ ├── OptimizingScheduler.cpp │ └── OptimizingScheduler.h ├── patches ├── amdgpu │ ├── README.md │ ├── ROCm-2.4-LLVM-print-occupancy.patch │ └── ROCm-2.4-load-optsched.patch ├── llvm6.0 │ ├── README.md │ ├── flang-llvm6-print-spilling-info.patch │ └── llvm6-print-spilling-info.patch └── llvm7.0 │ ├── README.md │ └── flang-llvm7-print-spilling-info.patch ├── test ├── CMakeLists.txt ├── Unit │ ├── lit.cfg.py │ └── lit.site.cfg.py.in ├── lit.cfg.py └── lit.site.cfg.py.in ├── unittests ├── Basic │ ├── ArrayRef2DTest.cpp │ ├── CMakeLists.txt │ ├── ConfigTest.cpp │ ├── LinkedListTest.cpp │ ├── LoggerTest.cpp │ ├── UtilitiesTest.cpp │ ├── simple_machine_model.h │ └── simple_machine_model_test.cpp └── CMakeLists.txt └── util ├── ARM ├── build-copy-to-A7.sh ├── extract-run-spec-cmd.sh ├── gen-CPU2006-cross-ARM.py └── run-CPU2006-cross-ARM.sh ├── CPU2006 ├── clean-compile-commands.py ├── runspec-wrapper-optsched.py └── sched-som.py ├── SLIL ├── compare-BB-fixed.py ├── compare-peaks.py ├── compare-static-LB.py ├── gather-SLIL-stats.py ├── run-filtered-block-tests.py └── runspec-wrapper-SLIL.py ├── aco_analysis └── make_pheromone_pdfs.sh ├── analyze ├── __init__.py ├── _main.py ├── _types.py ├── _utils.py ├── imports │ ├── __init__.py │ ├── import_cpu2006.py │ ├── import_plaidml.py │ ├── import_shoc.py │ └── import_utils.py └── lib │ └── compile_times.py ├── misc ├── count-boundary-spills.py ├── count-nodes.py ├── ddg2dot.py ├── extract-script.py ├── findblock.py ├── func-stats.py ├── json2infolog.py ├── rp-compare.py ├── spill-compare.py ├── spill-count-csv.py └── validation-test.py ├── plaidbench ├── extract-plaidbench-data.py ├── get-benchmarks-stats.py ├── get-occupancy.py ├── get-optsched-stats.py ├── get-sched-length.py ├── plaidbench-validation-test.py └── run-plaidbench.py └── readlogs └── __init__.py /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming' 2 | CheckOptions: 3 | - key: readability-identifier-naming.ClassCase 4 | value: CamelCase 5 | - key: readability-identifier-naming.EnumCase 6 | value: CamelCase 7 | - key: readability-identifier-naming.FunctionCase 8 | value: camelBack 9 | - key: readability-identifier-naming.MemberCase 10 | value: CamelCase 11 | - key: readability-identifier-naming.ParameterCase 12 | value: CamelCase 13 | - key: readability-identifier-naming.UnionCase 14 | value: CamelCase 15 | - key: readability-identifier-naming.VariableCase 16 | value: CamelCase 17 | -------------------------------------------------------------------------------- /.envrc: -------------------------------------------------------------------------------- 1 | export PYTHONPATH="$PYTHONPATH:$PWD/util" 2 | -------------------------------------------------------------------------------- /.github/workflows/runclangformat.yml: -------------------------------------------------------------------------------- 1 | name: Run ClangFormat 2 | 3 | on: 4 | issue_comment: # GitHub PRs are considered the same type of thing as issues. 5 | types: [ created ] 6 | 7 | jobs: 8 | reformat: 9 | name: ClangFormat 10 | # If we are a pull_request, we have the trigger comment, and the person 11 | # requesting is the one who made the PR, then we run. 12 | if: >- 13 | github.event.issue.pull_request != '' 14 | && github.event.comment.body == 'Do: Reformat' 15 | && github.event.comment.user.id == github.event.issue.user.id 16 | # We must run on a ubuntu, as we use unix-only commands 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | # Add an emote reaction to acknowledge the request. 21 | # For long-running tasks, this is helpful. 22 | # For just clang-format, it may not be needed, but it's still nice. 23 | - name: Acknowledge 24 | uses: peter-evans/create-or-update-comment@v1.4.1 25 | with: 26 | comment-id: ${{ github.event.comment.id }} 27 | reactions: rocket # (Launch) 28 | 29 | - uses: actions/checkout@v2 30 | with: 31 | # We need the entire history so that we can rebase many commits. 32 | fetch-depth: 0 33 | 34 | - name: Checkout PR 35 | uses: dawidd6/action-checkout-pr@v1 36 | with: 37 | pr: ${{ github.event.issue.number }} 38 | 39 | - name: Get clang-format 40 | run: sudo apt-get install clang-format-9 41 | 42 | - name: Clang format each commit in place 43 | run: | 44 | # Git requires user email and name to do commits. 45 | # As we are only amending each commit, these shouldn't end up in the 46 | # history. 47 | # Set user email to the email of the last commit: 48 | git config --local user.email "$(git log -n 1 --pretty=format:'%ae')" 49 | git config --local user.name 'GitHub Action' 50 | 51 | # Rebases every commit since when this PR branched off of master. 52 | # The sed command tells git that we want to edit the code for each commit. 53 | GIT_EDITOR="sed -iE 's/^pick/edit/g'" git rebase -i $(git merge-base master HEAD) 54 | 55 | # Abort on error 56 | set -e 57 | echo '>>> Beginning Rebasing...' 58 | 59 | # While a rebase is ongoing, `git status` contains the text "rebase". 60 | while [[ -n $(git status | grep rebase) ]]; do 61 | # Run clang-format 62 | find . -name '*.hpp' -o -name '*.cpp' | xargs -L1 clang-format-9 -style=file -i --verbose 63 | 64 | # Add all changes and update the commit 65 | echo '>>> Rewriting commit...' 66 | git add -A 67 | git commit --amend --no-edit 68 | 69 | echo '>>> Continuing Rebasing...' 70 | git rebase --continue 71 | done 72 | echo '>>> Finished Rebasing!' 73 | 74 | - name: Push 75 | run: git push --force -v 76 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /.vscode 3 | __pycache__ 4 | *.pyc 5 | -------------------------------------------------------------------------------- /.pep8: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | max_line_length = 120 3 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4.3) 2 | 3 | project(OptSched) 4 | 5 | option(OPTSCHED_INCLUDE_TESTS "Generate build targets for the OptSched unit tests." OFF) 6 | option(OPTSCHED_ENABLE_AMDGPU "Build the AMDGPU code. Requires that the AMDGPU target is supported." ON) 7 | 8 | set(OPTSCHED_LIT_ARGS "-sv" CACHE STRING "Arguments to pass to lit") 9 | set(OPTSCHED_EXTRA_LINK_LIBRARIES "" CACHE STRING "Extra link_libraries to pass to OptSched, ;-separated") 10 | set(OPTSCHED_EXTRA_INCLUDE_DIRS "" CACHE STRING "Extra include_directories to pass to OptSched, ;-separated") 11 | # To add OptSched debug defines, e.g.: 12 | # '-DOPTSCHED_EXTRA_DEFINITIONS=-DIS_DEBUG_DEFS_AND_USES;-DIS_DEBUG_DEF_USE_COUNT' 13 | set(OPTSCHED_EXTRA_DEFINITIONS "" CACHE STRING "Extra add_definitions to pass to OptSched, ;-separated") 14 | 15 | if(TARGET LLVMAMDGPUCodeGen OR TARGET LLVMCodeGen OR TARGET LLVMX86CodeGen) 16 | set(llvm_subproject TRUE) 17 | else() 18 | set(llvm_subproject FALSE) 19 | endif() 20 | 21 | # Not supported 22 | if(NOT llvm_subproject) 23 | set(llvm_version 6.0) 24 | if(OPTSCHED_ENABLE_AMDGPU) 25 | set(llvm_version 9.0) 26 | endif() 27 | set(OPTSCHED_LLVM_VERSION ${llvm_version} CACHE STRING "The LLVM version to build OptSched with (independent build only)") 28 | 29 | find_package(LLVM ${OPTSCHED_LLVM_VERSION} REQUIRED CONFIG) 30 | endif() 31 | 32 | if(NOT CMAKE_BUILD_TYPE) 33 | message(FATAL_ERROR "Must set cmake build type.") 34 | endif() 35 | 36 | # If we are requested to build the AMDGPU functionality, ensure that we can do so. 37 | if(OPTSCHED_ENABLE_AMDGPU) 38 | if(NOT "AMDGPU" IN_LIST LLVM_ALL_TARGETS) 39 | message(FATAL_ERROR "Trying to build the AMDGPU code, but AMDGPU is not supported by this build of LLVM") 40 | elseif(LLVM_VERSION VERSION_LESS 13.0) 41 | message(FATAL_ERROR "OptSched requries LLVM version >= 13.0 to build the AMDGPU scheduler.") 42 | endif() 43 | endif() 44 | 45 | # If asserts are enabled opt-sched must be built with "IS_DEBUG". 46 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DIS_DEBUG") 47 | 48 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 49 | set(CMAKE_POSITION_INDEPENDENT_CODE ON) 50 | 51 | set(OPTSCHED_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) 52 | set(OPTSCHED_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) 53 | 54 | list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}" ${CMAKE_CURRENT_SOURCE_DIR}/cmake) 55 | include(AddLLVM) 56 | 57 | include_directories( 58 | ${OPTSCHED_SOURCE_DIR}/include 59 | ${OPTSCHED_SOURCE_DIR}/lib 60 | ${LLVM_INCLUDE_DIRS} 61 | ${OPTSCHED_EXTRA_INCLUDE_DIRS} 62 | ) 63 | add_definitions(${OPTSCHED_EXTRA_DEFINITIONS}) 64 | link_directories(${OPTSCHED_EXTRA_LINK_LIBRARIES}) 65 | 66 | if(NOT llvm_subproject) 67 | include(GetLocalLLVM) 68 | 69 | set(args) 70 | if(OPTSCHED_INCLUDE_TESTS) 71 | list(APPEND args GTEST) 72 | endif() 73 | 74 | get_local_llvm(${args}) 75 | endif() 76 | 77 | add_subdirectory(lib) 78 | 79 | if(OPTSCHED_INCLUDE_TESTS) 80 | include(CTest) 81 | 82 | add_subdirectory(unittests) 83 | list(APPEND OPTSCHED_TEST_DEPS OptSchedUnitTests) 84 | list(APPEND OPTSCHED_TEST_PARAMS 85 | clang_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/test/Unit/lit.site.cfg 86 | ) 87 | 88 | add_subdirectory(test) 89 | 90 | add_test(NAME OptSchedTests 91 | COMMAND ${CMAKE_COMMAND} --build . --target check-optsched 92 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 93 | ) 94 | add_test(NAME OptSched-CompileHelloWorld 95 | COMMAND 96 | ${LLVM_TOOLS_BINARY_DIR}/clang ${CMAKE_CURRENT_SOURCE_DIR}/example/helloworld.cpp 97 | -O3 98 | ) 99 | endif() 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CSUS](http://www.csus.edu/Brand/assets/Logos/Core/Primary/Stacked/Primary_Stacked_3_Color_wht_hndTN.png)](http://www.csus.edu/) 2 | 3 | # OptSched - Optimizing Scheduler 4 | Combinatorial instruction scheduling research project at CSU Sacramento. 5 | 6 | This plugin for the [LLVM](https://llvm.org/) compiler is an optional machine scheduler. We implement a branch and bound instruction scheduling algorithm. 7 | 8 | ## Requirements 9 | 10 | - Ubuntu 16.04 (recommended), or MacOS 10.14 11 | - CMake 3.4.3 or later 12 | - LLVM 6.0 or later 13 | 14 | ## Building 15 | 16 | **See [BUILD.md](BUILD.md) for build instructions.** 17 | 18 | The OptSched plugin can be found in “llvm/lib” after building. 19 | 20 | ## Configuration files 21 | 22 | OptSched reads from configuration files at runtime to initialize the scheduler. There are templates in the [example](https://github.com/OptSched/OptSched/tree/master/example/optsched-cfg) directory. The default search location for these files is ```~/.optsched-cfg```. You can optionally specify the path to this directory or any of the configuration files individually with [command-line options](#Command-Line-Options). 23 | 24 | ## Usage Examples 25 | 26 | `clang++ -O3 -fplugin= -mllvm -misched=optsched -mllvm -optsched-cfg= ` 27 | 28 | `llc -load -misched=optsched -optsched-cfg= ` 29 | 30 | ## Command-Line Options 31 | 32 | When using Clang, pass options to LLVM with `-mllvm`. 33 | 34 | | CL Opt | Description | 35 | | ------ | ----------- | 36 | | -enable-misched | Enable the machine scheduling pass in LLVM (Targets can override this option). | 37 | | -misched=optsched | Select the optimizing scheduler. | 38 | | -debug-only=optsched | Print debug information from the scheduler. | 39 | | -optsched-cfg=\ | Path to the directory containing configuration files for opt-sched. | 40 | | -optsched-cfg-hotfuncs=\ | Path to the list of hot functions to schedule using opt-sched. | 41 | | -optsched-cfg-machine-model=\ | Path to the machine model specification file for opt-sched. | 42 | | -optsched-cfg-sched=\ | Path to the scheduler options configuration file for opt-sched. | 43 | -------------------------------------------------------------------------------- /cmake/GetLocalLLVM.cmake: -------------------------------------------------------------------------------- 1 | set(LOCAL_LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/.LocalLLVM 2 | CACHE PATH "") 3 | set(LOCAL_LLVM_LIST_DIR ${CMAKE_CURRENT_LIST_DIR}) 4 | mark_as_advanced(LOCAL_LLVM_BINARY_DIR LOCAL_LLVM_LIST_DIR) 5 | 6 | set(LOCAL_LLVM_DIR ${LOCAL_LLVM_BINARY_DIR}/llvm-project 7 | CACHE PATH "Path to a local copy of llvm-project, for use in unit tests and the AMDGPU build") 8 | 9 | function(get_local_llvm) 10 | if(NOT EXISTS LOCAL_LLVM_BINARY_DIR) 11 | file(MAKE_DIRECTORY ${LOCAL_LLVM_BINARY_DIR}) 12 | endif() 13 | 14 | if(NOT EXISTS LOCAL_LLVM_DIR AND 15 | NOT EXISTS ${LOCAL_LLVM_BINARY_DIR}/llvm-project.download-finished) 16 | if(NOT EXISTS ${LOCAL_LLVM_BINARY_DIR}/llvm-project.zip) 17 | file(DOWNLOAD https://github.com/CSUS-LLVM/llvm-project/archive/optsched.zip 18 | ${LOCAL_LLVM_BINARY_DIR}/llvm-project.zip 19 | SHOW_PROGRESS 20 | STATUS result 21 | TLS_VERIFY ON 22 | EXPECTED_HASH SHA256=c3a2e966d7182c031973530c0c8e010235577025ca54bfe8159d721f05ca2ed4 23 | ) 24 | list(GET 0 result downloadFailed) 25 | list(GET 1 result statusString) 26 | 27 | if(downloadFailed) 28 | message(FATAL_ERROR "Unable to get llvm-project. Failed with ${downloadFailed}: ${statusString}") 29 | endif() 30 | endif() 31 | 32 | if(EXISTS ${LOCAL_LLVM_BINARY_DIR}/llvm-project-optsched) 33 | file(REMOVE_RECURSE ${LOCAL_LLVM_BINARY_DIR}/llvm-project-optsched) 34 | endif() 35 | 36 | execute_process( 37 | COMMAND ${CMAKE_COMMAND} -E tar xzf llvm-project.zip 38 | WORKING_DIRECTORY ${LOCAL_LLVM_BINARY_DIR} 39 | RESULTS_VARIABLE unzipError 40 | ) 41 | if(unzipError) 42 | message(FATAL_ERROR "Unable to unzip llvm-project. Failed with ${unzipError}") 43 | endif() 44 | 45 | file(RENAME ${LOCAL_LLVM_BINARY_DIR}/llvm-project-optsched ${LOCAL_LLVM_BINARY_DIR}/llvm-project) 46 | 47 | # Touch the file. file(TOUCH ...) is CMake 3.12+, but we want to support CMake 3.10 48 | file(WRITE ${LOCAL_LLVM_BINARY_DIR}/llvm-project.download-finished "") 49 | endif() 50 | 51 | cmake_parse_arguments(ARG "GTEST" "" "" ${ARGN}) 52 | 53 | if(ARG_UNPARSED_ARGUMENTS) 54 | message(FATAL_ERROR "Unknown arguments ${ARG_UNPARSED_ARGUMENTS}") 55 | endif() 56 | 57 | set(llvm_dir ${LOCAL_LLVM_DIR}/llvm) 58 | set(llvm_build_dirs ${LOCAL_LLVM_BINARY_DIR}/llvm_build_dirs) 59 | 60 | file(MAKE_DIRECTORY ${llvm_build_dirs}) 61 | 62 | if(ARG_GTEST) 63 | # Set things up so that llvm-lit can do its work 64 | set(LLVM_EXTERNAL_LIT "${llvm_dir}/utils/lit/lit.py" CACHE PATH "Path to llvm-lit") 65 | add_subdirectory(${llvm_dir}/utils/unittest ${llvm_build_dirs}/googletest) 66 | 67 | # Set up GTest include dirs 68 | include_directories( 69 | ${llvm_dir}/utils/unittest/googletest/include 70 | ${llvm_dir}/utils/unittest/googlemock/include 71 | ) 72 | endif() 73 | endfunction() 74 | -------------------------------------------------------------------------------- /cmake/superbuild/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # A SuperBuild project for OptSched. 2 | # 3 | # Use this as the CMake source directory to run the superbuild. 4 | # 5 | # Documentation of configuration args: 6 | # 7 | # In general, see option()s and set(... CACHE ...)s in this CMakeLists.txt and flang.cmake. 8 | # 9 | # Some options behave as follows: 10 | # - OPTSCHEDSUPER_FLANG: Check out and build flang. 11 | # - These options are passed on to llvm (and OptSched) directly from the SuperBuild configuration: 12 | # - CMAKE_BUILD_TYPE. Note that the flang build types default to Release, but can be configured. 13 | # - LLVM_PARALLEL_LINK_JOBS. 14 | # - *_EXTRA_CMAKE_ARGS: Passes these CMake arguments on to the corresponding sub-build. 15 | # - The flang builds can be configured to use a custom CMAKE_GENERATOR, separate from the superbuild's generator. 16 | 17 | 18 | ############################################ 19 | # 20 | # As of 5/26/2022, the superbuild script is 21 | # no longer gauranteed to work. 22 | # 23 | ############################################ 24 | 25 | cmake_minimum_required(VERSION 3.7) 26 | 27 | project(OptSched-SuperBuild) 28 | 29 | option(OPTSCHEDSUPER_FLANG "Check out and build flang." OFF) 30 | 31 | set(OPTSCHEDSUPER_LLVM_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/llvm-install CACHE PATH "The directory to install llvm") 32 | set(OPTSCHEDSUPER_LLVM_GIT_REPO https://github.com/CSUS-LLVM/llvm-project.git CACHE STRING "The git repository to clone for LLVM") 33 | set(OPTSCHEDSUPER_LLVM_GIT_TAG optsched CACHE STRING "The tag to checkout within the LLVM git repository") 34 | set(OPTSCHEDSUPER_LLVM_SOURCE_SUBDIR llvm CACHE STRING "LLVM lives in this subdirectory of the cloned repository") 35 | set(OPTSCHEDSUPER_LLVM_EXTRA_CMAKE_ARGS "" CACHE STRING ";-separated list of extra arguments to pass to configure cmake") 36 | 37 | include(ExternalProject) 38 | include(flang.cmake) 39 | include(CTest) 40 | 41 | # add_cache_variables( ...) 42 | # Compute a cache variable argument for each VARS, adding them to OUT. 43 | function(add_cache_variables OUT_LIST) 44 | set(result "${${OUT_LIST}}") 45 | 46 | foreach(NAME IN LISTS ARGN) 47 | if(DEFINED ${NAME}) 48 | get_property(type CACHE ${NAME} PROPERTY TYPE) 49 | if(NOT type) 50 | set(type UNINITIALIZED) 51 | endif() 52 | list(APPEND result -D${NAME}:${type}=${${NAME}}) 53 | endif() 54 | endforeach() 55 | 56 | set(${OUT_LIST} "${result}" PARENT_SCOPE) 57 | endfunction() 58 | 59 | set(cache_default_args) 60 | add_cache_variables(cache_default_args 61 | CMAKE_CXX_COMPILER_LAUNCHER 62 | CMAKE_C_COMPILER_LAUNCHER 63 | OPTSCHED_EXTRA_DEFINITIONS 64 | OPTSCHED_EXTRA_LINK_LIBRARIES 65 | ) 66 | 67 | set(ROOT_DIR ${PROJECT_SOURCE_DIR}/../..) 68 | 69 | ExternalProject_Add(llvm 70 | GIT_REPOSITORY ${OPTSCHEDSUPER_LLVM_GIT_REPO} 71 | GIT_TAG ${OPTSCHEDSUPER_LLVM_GIT_TAG} 72 | SOURCE_SUBDIR ${OPTSCHEDSUPER_LLVM_SOURCE_SUBDIR} 73 | INSTALL_DIR ${OPTSCHEDSUPER_LLVM_INSTALL_PREFIX} 74 | CMAKE_ARGS 75 | -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} 76 | -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} 77 | -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} 78 | -DCMAKE_INSTALL_PREFIX= 79 | -DLLVM_PARALLEL_LINK_JOBS=${LLVM_PARALLEL_LINK_JOBS} 80 | -DLLVM_ENABLE_PROJECTS='clang' 81 | -DLLVM_TARGETS_TO_BUILD=X86 82 | -DLLVM_BUILD_TOOLS=ON 83 | -DLLVM_INCLUDE_TESTS=ON 84 | -DLLVM_OPTIMIZED_TABLEGEN=ON 85 | -DLLVM_EXTERNAL_PROJECTS=OptSched 86 | -DLLVM_EXTERNAL_OPTSCHED_SOURCE_DIR=${ROOT_DIR} 87 | -DOPTSCHED_ENABLE_AMDGPU=OFF 88 | ${OPTSCHEDSUPER_LLVM_EXTRA_CMAKE_ARGS} 89 | CMAKE_CACHE_DEFAULT_ARGS 90 | ${cache_default_args} 91 | USES_TERMINAL_CONFIGURE 1 92 | USES_TERMINAL_BUILD 1 93 | USES_TERMINAL_INSTALL 1 94 | ) 95 | 96 | if(OPTSCHEDSUPER_FLANG) 97 | setup_flang_external_projects("${cache_default_args}") 98 | endif() 99 | 100 | ExternalProject_Get_Property(llvm BINARY_DIR) 101 | set(LLVM_BUILD_DIR ${BINARY_DIR}) 102 | 103 | add_test(NAME OptSchedTests 104 | COMMAND ${CMAKE_COMMAND} --build . --target check-optsched 105 | WORKING_DIRECTORY ${LLVM_BUILD_DIR} 106 | ) 107 | add_test(NAME OptSched-CompileHelloWorld 108 | COMMAND 109 | ${OPTSCHEDSUPER_LLVM_INSTALL_PREFIX}/bin/clang ${ROOT_DIR}/example/helloworld.cpp 110 | -O3 111 | ) 112 | -------------------------------------------------------------------------------- /documentation/flang/README.md: -------------------------------------------------------------------------------- 1 | [![CSUS](http://www.csus.edu/Brand/assets/Logos/Core/Primary/Stacked/Primary_Stacked_3_Color_wht_hndTN.png)](http://www.csus.edu/) 2 | 3 | # OptSched - Optimizing Scheduler 4 | This directory contains specific instructions on how to build Flang. 5 | 6 | ## Requirements 7 | 8 | - Ubuntu 16.04 is recommended 9 | - CMake 3.4.3 or later 10 | - LLVM 6.0 or later 11 | 12 | ## Usage with OptSched 13 | 14 | `/bin/flang -O3 -fplugin= -mllvm -misched=optsched -mllvm -optsched-cfg= ` 15 | 16 | ## Building Flang 17 | 18 | #### Flang Build Directory 19 | 20 | 1. Open a bash terminal 21 | 22 | 2. Create a directory where you would like flang to be installed to. For example "flang-install" then navigate inside it: 23 | 24 | `mkdir flang-install && cd flang-install` 25 | 26 | 3. Export an environment variable that will be passed to cmake to indicate flang's installation directory: 27 | 28 | `export FLANG_INSTALL=$(pwd)` 29 | 30 | 4. Navigate outside of the folder: 31 | 32 | `cd ..` 33 | 34 | #### Building Flang LLVM 35 | 36 | 1. Clone Flang LLVM: 37 | 38 | `git clone https://github.com/flang-compiler/llvm.git` 39 | 40 | 2. Navigate to inside the LLVM folder and swap to the release_60 branch: 41 | 42 | `cd llvm && git checkout release_60` 43 | 44 | 3. Download the patch to print spilling info under the OptSched/patches/llvm6.0/ folder named: 45 | 46 | `flang-llvm6-print-spilling-info.patch` 47 | 48 | 4. Move the patch file to inside the Flang LLVM directory 49 | 50 | 5. Apply the patch 51 | 52 | `git am flang-llvm6-print-spilling-info.patch` 53 | 54 | 3. Create a build directory and navigate inside it: 55 | 56 | `mkdir build && cd build` 57 | 58 | 6. Build Flang LLVM 59 | 60 | `cmake -DCMAKE_BUILD_TYPE=Debug '-DLLVM_TARGETS_TO_BUILD=X86' -DLLVM_BUILD_TOOLS=ON -DLLVM_INCLUDE_TESTS=ON -DLLVM_OPTIMIZED_TABLEGEN=ON -DCMAKE_INSTALL_PREFIX=$FLANG_INSTALL ..` 61 | 62 | `make && make install` 63 | 64 | 7. Navigate outside of the Flang LLVM directory: 65 | 66 | `cd ../..` 67 | 68 | #### Building the Flang driver 69 | 70 | 1. Clone the Flang driver 71 | 72 | `git clone https://github.com/flang-compiler/flang-driver.git` 73 | 74 | 2. Navigate to inside the flang driver folder and swap to the release_60 branch: 75 | 76 | `cd flang-driver && git checkout release_60` 77 | 78 | 3. Create a build directory and navigate inside it: 79 | 80 | `mkdir build && cd build` 81 | 82 | 4. Build the Flang driver: 83 | 84 | `cmake -DCMAKE_INSTALL_PREFIX=$FLANG_INSTALL -DLLVM_CONFIG=$FLANG_INSTALL/bin/llvm-config -DCLANG_ENABLE_STATIC_ANALYZER=ON ..` 85 | 86 | `make && make install` 87 | 88 | 5. Navigate outside of the flang driver directory: 89 | 90 | `cd ../..` 91 | 92 | #### Building the OpenMP runtime library 93 | 94 | 1. Clone the OpenMP runtime library: 95 | 96 | `git clone https://github.com/llvm-mirror/openmp.git` 97 | 98 | 2. Navigate to the OpenMP runtime library directory: 99 | 100 | `cd openmp/runtime/` 101 | 102 | 3. Create a build directory and navigate inside it: 103 | 104 | `mkdir build && cd build` 105 | 106 | 4. Build the OpenMP runtime library: 107 | 108 | `cmake -DCMAKE_INSTALL_PREFIX=$FLANG_INSTALL -DCMAKE_CXX_COMPILER=$FLANG_INSTALL/bin/clang++ -DCMAKE_C_COMPILER=$FLANG_INSTALL/bin/clang ../..` 109 | 110 | `make && make install` 111 | 112 | 5. Navigate outside of the OpenMP runtime directory: 113 | 114 | `cd ../../..` 115 | 116 | #### Building libpgmath 117 | 118 | 1. Clone Flang 119 | 120 | `git clone https://github.com/flang-compiler/flang.git` 121 | 122 | 2. Navigate to inside the flang directory: 123 | 124 | `cd flang` 125 | 126 | If you are having issues with AVX-512 when building libpgmath, you may need to swap to an older commit 127 | 128 | For Ubuntu 16.04: [45d7aeb5886c5965a8e793ef3fa632e7e73de56c](https://github.com/flang-compiler/flang/issues/434#issuecomment-403449362) 129 | 130 | `git checkout 45d7aeb5886c5965a8e793ef3fa632e7e73de56c` 131 | 132 | For Ubuntu 18.04: [37e6062d969bf337b964fe8119767046fcbdcdfa](https://github.com/flang-compiler/flang/issues/685) 133 | 134 | `git checkout 37e6062d969bf337b964fe8119767046fcbdcdfa` 135 | 136 | 3. Navigate to inside the libpgmath dircetory: 137 | 138 | `cd runtime/libpgmath` 139 | 140 | 4. Create a build directory and navigate inside it: 141 | 142 | `mkdir build && cd build` 143 | 144 | 5. Build libpgmath: 145 | 146 | `cmake -DCMAKE_INSTALL_PREFIX=$FLANG_INSTALL -DCMAKE_CXX_COMPILER=$FLANG_INSTALL/bin/clang++ -DCMAKE_C_COMPILER=$FLANG_INSTALL/bin/clang -DCMAKE_Fortran_COMPILER=$FLANG_INSTALL/bin/flang ..` 147 | 148 | `make && make install` 149 | 150 | You may need to install gawk if you are encountering a segmentation fault: 151 | 152 | `sudo apt-get install gawk` 153 | 154 | 6. Navigate back to the root directory of flang 155 | 156 | `cd ../../..` 157 | 158 | #### Building flang 159 | 160 | 1. While still in the flang directory, create a build directory for flang and navigate inside it: 161 | 162 | `mkdir build && cd build` 163 | 164 | 2. Build flang: 165 | 166 | `cmake -DCMAKE_INSTALL_PREFIX=$FLANG_INSTALL -DCMAKE_CXX_COMPILER=$FLANG_INSTALL/bin/clang++ -DCMAKE_C_COMPILER=$FLANG_INSTALL/bin/clang -DCMAKE_Fortran_COMPILER=$FLANG_INSTALL/bin/flang -DLLVM_CONFIG=$FLANG_INSTALL/bin/llvm-config ..` 167 | 168 | `make && make install` 169 | 170 | 3. Navigate outside of the flang directory: 171 | 172 | `cd ../..` 173 | 174 | 175 | #### Testing the build with a hello world fortran file 176 | 177 | 1. Navigate to the directory where flang was installed. In this example, it was flang-install 178 | 179 | `cd flang-install` 180 | 181 | 2. Download the hello.f fortran file and put it in your flang-install directory 182 | 183 | 3. Compile the file: 184 | 185 | `./bin/flang hello.f` 186 | 187 | 4. Run the generated file: 188 | 189 | `./a.out` 190 | 191 | If you are getting the error: 192 | 193 | `"libflang.so: cannot open shared object file: No such file or directory"` 194 | 195 | You will need to link the flang-install/lib directory to the environment variable LD_LIBRARY_PATH: 196 | 197 | `export LD_LIBRARY_PATH="$(pwd)/lib"` 198 | 199 | The resulting output should be: 200 | 201 | `Hello World!` 202 | -------------------------------------------------------------------------------- /documentation/flang/hello.f: -------------------------------------------------------------------------------- 1 | program hello 2 | print *, "Hello World!" 3 | end program hello 4 | -------------------------------------------------------------------------------- /example/helloworld.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { std::puts("Hello, World!"); } 4 | -------------------------------------------------------------------------------- /example/llvm7-CPU2006-cfg/machine_model.cfg: -------------------------------------------------------------------------------- 1 | # A simple machine model which always issues one instruction in a cycle or stalls. 2 | MODEL_NAME: Simple 3 | 4 | # The limit on the total number of instructions that can be issued in one cycle 5 | ISSUE_RATE: 1 6 | 7 | # Each instruction must have an issue type, i.e. a function unit that the instruction uses. 8 | ISSUE_TYPE_COUNT: 1 9 | 10 | # Default issue type for LLVM instructions. 11 | Default 1 12 | 13 | DEP_LATENCY_ANTI: 0 14 | DEP_LATENCY_OUTPUT: 1 15 | DEP_LATENCY_OTHER: 1 16 | 17 | # This will not be used. Reg type info will be taken from the compiler. 18 | REG_TYPE_COUNT: 2 19 | I 1 20 | F 1 21 | 22 | # Set this to the total number of instructions 23 | INST_TYPE_COUNT: 0 24 | 25 | # Examples 26 | #1 27 | #INST_TYPE: ADD64rr 28 | #ISSUE_TYPE: Default 29 | #LATENCY: 1 30 | #PIPELINED: YES 31 | #BLOCKS_CYCLE: NO 32 | #SUPPORTED: YES 33 | 34 | 35 | #2 36 | #INST_TYPE: IMUL64rr 37 | #ISSUE_TYPE: Default 38 | #LATENCY: 3 39 | #PIPELINED: YES 40 | #BLOCKS_CYCLE: NO 41 | #SUPPORTED: YES 42 | -------------------------------------------------------------------------------- /example/optsched-cfg/machine_model.cfg: -------------------------------------------------------------------------------- 1 | # A simple machine model which always issues one instruction in a cycle or stalls. 2 | MODEL_NAME: Simple 3 | 4 | # The limit on the total number of instructions that can be issued in one cycle 5 | ISSUE_RATE: 1 6 | 7 | # Each instruction must have an issue type, i.e. a function unit that the instruction uses. 8 | ISSUE_TYPE_COUNT: 1 9 | 10 | # Default issue type for LLVM instructions. 11 | Default 1 12 | 13 | DEP_LATENCY_ANTI: 0 14 | DEP_LATENCY_OUTPUT: 1 15 | DEP_LATENCY_OTHER: 1 16 | 17 | # This will not be used. Reg type info will be taken from the compiler. 18 | REG_TYPE_COUNT: 2 19 | I 1 20 | F 1 21 | 22 | # Set this to the total number of instructions 23 | INST_TYPE_COUNT: 0 24 | 25 | # Examples 26 | #1 27 | #INST_TYPE: ADD64rr 28 | #ISSUE_TYPE: Default 29 | #LATENCY: 1 30 | #PIPELINED: YES 31 | #BLOCKS_CYCLE: NO 32 | #SUPPORTED: YES 33 | 34 | 35 | #2 36 | #INST_TYPE: IMUL64rr 37 | #ISSUE_TYPE: Default 38 | #LATENCY: 3 39 | #PIPELINED: YES 40 | #BLOCKS_CYCLE: NO 41 | #SUPPORTED: YES 42 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/OptSchedDDGWrapperBase.h: -------------------------------------------------------------------------------- 1 | //===- OptSchedDDGWrapperBase.h - Interface for DDG wrapper -----*- C++-*--===// 2 | // 3 | // Convert an LLVM ScheduleDAG into an OptSched DDG. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | 7 | #ifndef LLVM_OPT_SCHED_DDG_WRAPPER_BASE_H 8 | #define LLVM_OPT_SCHED_DDG_WRAPPER_BASE_H 9 | 10 | namespace llvm { 11 | namespace opt_sched { 12 | 13 | class OptSchedDDGWrapperBase { 14 | public: 15 | virtual ~OptSchedDDGWrapperBase() = default; 16 | 17 | virtual void convertSUnits(bool IgnoreRealEdges, 18 | bool IgnoreArtificialEdges) = 0; 19 | 20 | virtual void convertRegFiles() = 0; 21 | }; 22 | 23 | } // namespace opt_sched 24 | } // namespace llvm 25 | 26 | #endif // LLVM_OPT_SCHED_DDG_WRAPPER_BASE_H 27 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/OptSchedTarget.h: -------------------------------------------------------------------------------- 1 | //===- OptSchedTarget.h - OptSched Target -----------------------*- C++-*--===// 2 | // 3 | // Interface for target specific functionality in OptSched. This is a workaround 4 | // to avoid needing to modify or use target code in the trunk. 5 | // 6 | //===----------------------------------------------------------------------===// 7 | 8 | #ifndef LLVM_OPT_SCHED_TARGET_H 9 | #define LLVM_OPT_SCHED_TARGET_H 10 | 11 | #include "opt-sched/Scheduler/OptSchedDDGWrapperBase.h" 12 | #include "opt-sched/Scheduler/config.h" 13 | #include "opt-sched/Scheduler/data_dep.h" 14 | #include "opt-sched/Scheduler/defines.h" 15 | #include "opt-sched/Scheduler/machine_model.h" 16 | #include "llvm/ADT/SmallString.h" 17 | #include "llvm/ADT/SmallVector.h" 18 | #include "llvm/CodeGen/MachineScheduler.h" 19 | 20 | namespace llvm { 21 | namespace opt_sched { 22 | 23 | class OptSchedMachineModel; 24 | class ScheduleDAGOptSched; 25 | 26 | class OptSchedTarget { 27 | public: 28 | MachineModel *MM; 29 | 30 | virtual ~OptSchedTarget() = default; 31 | 32 | virtual std::unique_ptr 33 | createMachineModel(const char *configFile) = 0; 34 | 35 | virtual std::unique_ptr 36 | createDDGWrapper(MachineSchedContext *Context, ScheduleDAGOptSched *DAG, 37 | OptSchedMachineModel *MM, LATENCY_PRECISION LatencyPrecision, 38 | const std::string &RegionID) = 0; 39 | 40 | virtual void initRegion(ScheduleDAGInstrs *DAG, MachineModel *MM, 41 | Config &OccFile) = 0; 42 | virtual void finalizeRegion(const InstSchedule *Schedule) = 0; 43 | // FIXME: This is a shortcut to doing the proper thing and creating a RP class 44 | // that targets can override. It's hard to justify spending the extra time 45 | // when we will be refactoring RP tracking in general if we do a rewrite to 46 | // fully integrate the scheduler in LLVM. 47 | // 48 | // Get target specific cost from peak register pressure (e.g. occupancy for 49 | // AMDGPU) 50 | virtual InstCount 51 | getCost(const llvm::SmallVectorImpl &PRP) const = 0; 52 | 53 | // Targets that wish to discard the finalized schedule for any reason can 54 | // override this. 55 | virtual bool shouldKeepSchedule() { return true; } 56 | 57 | virtual void SetOccupancyLimit(int){/*nothing*/}; 58 | virtual void SetShouldLimitOcc(bool){/*nothing*/}; 59 | virtual void SetOccLimitSource(OCC_LIMIT_TYPE){/*nothing*/}; 60 | }; 61 | 62 | template class OptSchedRegistryNode { 63 | public: 64 | llvm::SmallString<16> Name; 65 | FactoryT Factory; 66 | OptSchedRegistryNode *Next; 67 | 68 | OptSchedRegistryNode(llvm::StringRef Name_, FactoryT Factory_) 69 | : Name(Name_), Factory(Factory_) {} 70 | }; 71 | 72 | template class OptSchedRegistry { 73 | private: 74 | OptSchedRegistryNode *List = nullptr; 75 | OptSchedRegistryNode *Default = nullptr; 76 | 77 | public: 78 | void add(OptSchedRegistryNode *Node) { 79 | Node->Next = List; 80 | List = Node; 81 | } 82 | 83 | FactoryT getFactoryWithName(llvm::StringRef Name) { 84 | FactoryT Factory = nullptr; 85 | std::string Match = std::string(Name.data()); 86 | 87 | for (auto I = List; I; I = I->Next) { 88 | std::string Temp = std::string(I->Name.data()); 89 | if (Match.compare(Temp) == 0) { 90 | Factory = I->Factory; 91 | break; 92 | } 93 | } 94 | return Factory; 95 | } 96 | 97 | void setDefault(llvm::StringRef Name) { 98 | OptSchedRegistryNode Node = nullptr; 99 | for (auto I = List; I; I = I->Next) 100 | if (I->Name == Name) { 101 | Node = I; 102 | break; 103 | } 104 | assert(Node && "Could not set default factory! None in list with name."); 105 | Default = Node; 106 | } 107 | 108 | FactoryT getDefaultFactory() { 109 | assert(Default && "Default factory not set."); 110 | return Default->Factory; 111 | } 112 | }; 113 | 114 | class OptSchedTargetRegistry 115 | : public OptSchedRegistryNode (*)()> { 116 | public: 117 | using OptSchedTargetFactory = std::unique_ptr (*)(); 118 | static OptSchedRegistry Registry; 119 | 120 | OptSchedTargetRegistry(llvm::StringRef Name_, OptSchedTargetFactory Factory_) 121 | : OptSchedRegistryNode(Name_, Factory_) { 122 | Registry.add(this); 123 | } 124 | }; 125 | 126 | } // namespace opt_sched 127 | } // namespace llvm 128 | 129 | #endif // LLVM_OPT_SCHED_TARGET_H 130 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/aco.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Implements an Ant colony optimizing scheduler 3 | Author: Theodore Dubois 4 | Created: Nov. 2017 5 | Updated By: Ciprian Elies and Vang Thao 6 | Last Update: Jan. 2020 7 | *******************************************************************************/ 8 | 9 | #ifndef OPTSCHED_ACO_H 10 | #define OPTSCHED_ACO_H 11 | 12 | #include "opt-sched/Scheduler/gen_sched.h" 13 | #include "llvm/ADT/ArrayRef.h" 14 | #include "llvm/ADT/SetVector.h" 15 | #include "llvm/ADT/SmallSet.h" 16 | #include "llvm/ADT/SmallVector.h" 17 | #include 18 | #include 19 | #include 20 | namespace llvm { 21 | namespace opt_sched { 22 | 23 | typedef double pheromone_t; 24 | 25 | enum class DCF_OPT { 26 | OFF, 27 | GLOBAL_ONLY, 28 | GLOBAL_AND_TIGHTEN, 29 | GLOBAL_AND_ITERATION 30 | }; 31 | 32 | struct Choice { 33 | SchedInstruction *inst; 34 | pheromone_t heuristic; // range 1 to 2 35 | InstCount readyOn; // number of cycles until this instruction becomes ready 36 | }; 37 | 38 | class ACOScheduler : public ConstrainedScheduler { 39 | public: 40 | ACOScheduler(DataDepGraph *dataDepGraph, MachineModel *machineModel, 41 | InstCount upperBound, SchedPriorities priorities, bool vrfySched, 42 | bool IsPostBB); 43 | virtual ~ACOScheduler(); 44 | FUNC_RESULT FindSchedule(InstSchedule *schedule, SchedRegion *region); 45 | inline void UpdtRdyLst_(InstCount cycleNum, int slotNum); 46 | // Set the initial schedule for ACO 47 | // Default is NULL if none are set. 48 | void setInitialSched(InstSchedule *Sched); 49 | 50 | private: 51 | pheromone_t &Pheromone(SchedInstruction *from, SchedInstruction *to); 52 | pheromone_t &Pheromone(InstCount from, InstCount to); 53 | pheromone_t Score(SchedInstruction *from, Choice choice); 54 | bool shouldReplaceSchedule(InstSchedule *OldSched, InstSchedule *NewSched, 55 | bool IsGlobal); 56 | DCF_OPT ParseDCFOpt(const std::string &opt); 57 | 58 | void PrintPheromone(); 59 | 60 | // pheromone Graph Debugging start 61 | llvm::SmallSet DbgRgns; 62 | llvm::SmallSet, 0> AntEdges; 63 | llvm::SmallSet, 0> CrntAntEdges; 64 | llvm::SmallSet, 0> IterAntEdges; 65 | llvm::SmallSet, 0> BestAntEdges; 66 | std::map, double> LastHeu; 67 | bool IsDbg = false; 68 | std::string OutPath; 69 | std::string graphDisplayAnnotation(int Frm, int To); 70 | std::string getHeuIfPossible(int Frm, int To); 71 | void writePheromoneGraph(std::string Stage); 72 | void writePGraphRecursive(FILE *Out, SchedInstruction *Ins, 73 | llvm::SetVector &Visited); 74 | 75 | // pheromone Graph Debugging end 76 | 77 | Choice SelectInstruction(const llvm::ArrayRef &ready, 78 | SchedInstruction *lastInst); 79 | void UpdatePheromone(InstSchedule *schedule); 80 | std::unique_ptr FindOneSchedule(InstCount TargetRPCost); 81 | llvm::SmallVector pheromone_; 82 | pheromone_t initialValue_; 83 | bool use_fixed_bias; 84 | int count_; 85 | int heuristicImportance_; 86 | bool use_tournament; 87 | int fixed_bias; 88 | double bias_ratio; 89 | double local_decay; 90 | double decay_factor; 91 | int ants_per_iteration; 92 | int ants_per_iteration1p; 93 | int ants_per_iteration2p; 94 | int noImprovementMax; 95 | bool print_aco_trace; 96 | std::unique_ptr InitialSchedule; 97 | bool VrfySched_; 98 | bool IsPostBB; 99 | bool IsTwoPassEn; 100 | pheromone_t ScRelMax; 101 | DCF_OPT DCFOption; 102 | SPILL_COST_FUNCTION DCFCostFn; 103 | int localCmp = 0, localCmpRej = 0, globalCmp = 0, globalCmpRej = 0; 104 | }; 105 | 106 | } // namespace opt_sched 107 | } // namespace llvm 108 | 109 | #endif 110 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/array_ref2d.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTSCHED_ARRAY_REF_2D_H 2 | #define OPTSCHED_ARRAY_REF_2D_H 3 | 4 | #include "llvm/ADT/ArrayRef.h" 5 | #include 6 | #include 7 | 8 | namespace llvm { 9 | namespace opt_sched { 10 | 11 | /** 12 | * \brief Provides a 2D view over a single allocation 13 | * 14 | * \details 2D arrays are best implemented by using a single allocation, then 15 | * computing the index into this single allocation based on the 2D location we 16 | * are trying to access. This type abstracts away that work, doing it for you. 17 | * 18 | * \see MutableArrayRef2D 19 | */ 20 | template class ArrayRef2D { 21 | public: 22 | /** 23 | * \brief Constructs an ArrayRef2D with the specified dimensions. 24 | * \param Ref Must have a size precisely Rows * Columns. 25 | * \param Rows The number of rows in this 2D matrix. 26 | * \param Columns The number of columns in this 2D matrix. 27 | */ 28 | explicit ArrayRef2D(llvm::ArrayRef Ref, size_t Rows, size_t Columns) 29 | : Ref(Ref), Rows(Rows), Columns(Columns) { 30 | assert(Rows * Columns == Ref.size()); 31 | } 32 | 33 | size_t rows() const { return Rows; } 34 | size_t columns() const { return Columns; } 35 | 36 | /** 37 | * \brief Access an element at the specified row and columns. `[{row, col}]` 38 | * \detail 39 | * A C-style array `int arr[10][20]` is a single contiguous block of memory. 40 | * It would be accessed as `arr[row][col]`. 41 | * For ArrayRef2D, a single block of memory such as 42 | * `int* arr = new int[10 * 20]` is accessed as `ref[{row, col}]`. 43 | * 44 | * If you want to do x, y indexing, prefer `ref[{y, x}]` over `ref[{x, y}]`. 45 | * When accessed in this way, consecutive x values are placed together in 46 | * memory, which is usually what is expected. 47 | */ 48 | const T &operator[](size_t(&&RowCol)[2]) const { 49 | return Ref[computeIndex(RowCol[0], RowCol[1], Rows, Columns)]; 50 | } 51 | 52 | /** 53 | * \brief Recovers the underlying ArrayRef. 54 | */ 55 | llvm::ArrayRef underlyingData() const { return Ref; } 56 | 57 | private: 58 | llvm::ArrayRef Ref; 59 | size_t Rows; 60 | size_t Columns; 61 | 62 | static size_t computeIndex(size_t row, size_t col, size_t Rows, 63 | size_t Columns) { 64 | assert(row < Rows && "Invalid row"); 65 | assert(col < Columns && "Invalid column"); 66 | size_t index = row * Columns + col; 67 | assert(index < Rows * Columns); // Should be redundant with prior asserts. 68 | return index; 69 | } 70 | }; 71 | 72 | /** 73 | * \brief An ArrayRef2D which allows mutation. 74 | * \note Inherits from ArrayRef2D, allowing slicing from this type to 75 | * ArrayRef2D in the same manner as LLVM's ArrayRef and MutableArrayRef 76 | * 77 | * \see ArrayRef2D 78 | */ 79 | template class MutableArrayRef2D : public ArrayRef2D { 80 | public: 81 | explicit MutableArrayRef2D(llvm::MutableArrayRef Ref, size_t Rows, 82 | size_t Columns) 83 | : ArrayRef2D(Ref, Rows, Columns) {} 84 | 85 | /** 86 | * \brief Access an element at the specified row and columns. `[{row, col}]` 87 | * \returns a _mutable_ reference to the element at the specified location. 88 | */ 89 | T &operator[](size_t(&&RowCol)[2]) const { 90 | ArrayRef2D cref = *this; 91 | return const_cast(cref[{RowCol[0], RowCol[1]}]); 92 | } 93 | 94 | /** 95 | * \brief Recovers the underlying MutableArrayRef. 96 | */ 97 | llvm::MutableArrayRef underlyingData() const { 98 | return static_cast &>( 99 | ArrayRef2D::underlyingData()); 100 | } 101 | }; 102 | } // namespace opt_sched 103 | } // namespace llvm 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/buffers.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Defines input buffering classes that can be used for opening, 3 | loading, buffering and parsing input files using system-level I/O, 4 | which relies on the programmer to do his own buffering, unlike the 5 | standard I/O which handles the buffering and hides it from the 6 | programmer. 7 | Author: Ghassan Shobaki 8 | Created: Oct. 1997 9 | Last Update: Mar. 2011 10 | *******************************************************************************/ 11 | 12 | #ifndef OPTSCHED_GENERIC_BUFFERS_H 13 | #define OPTSCHED_GENERIC_BUFFERS_H 14 | 15 | #include "opt-sched/Scheduler/defines.h" 16 | 17 | namespace llvm { 18 | namespace opt_sched { 19 | 20 | const int INBUF_MAX_PIECES_PERLINE = 30; 21 | const int INBUF_MAX_LINESIZE = 10000; 22 | const int DFLT_INPBUF_SIZE = 1000000; 23 | 24 | // String buffer size limits for file/sample names. 25 | const int MAX_NAMESIZE = 1000; 26 | 27 | enum NXTLINE_TYPE { NXT_EOF, NXT_SPC, NXT_DATA, NXT_ERR }; 28 | 29 | // This is an input buffer class for loading, buffering and parsing an input 30 | // file using system level I/O, where the application program is responsible for 31 | // allocating an input buffer and loading the file into it in chunks the size of 32 | // each chunk used in this class is determined by the DFLT_INPBUF_SIZE value 33 | // defined above. The class provides methods for skipping white space and 34 | // comments and reading one valid data line at a time. 35 | // Lexing Assumptions: 36 | // - Files do not contain any invalid characters. So if a character is not a 37 | // control character (\r, \n, #, \t or space), it is a valid data character. 38 | // - Comments on data lines should be preceded by at least one space character 39 | // - All files are scanned linewise 40 | class InputBuffer { 41 | public: 42 | InputBuffer(); 43 | ~InputBuffer(); 44 | int Reload(); 45 | void Clean(); 46 | void Unload(); 47 | char *GetBuf() { return buf; } 48 | const char *GetFullPath() const { return fullPath; } 49 | FUNC_RESULT Load(const char *const fileName, const char *const path, 50 | long maxByts = DFLT_INPBUF_SIZE); 51 | FUNC_RESULT Load(const char *const fullPath, long maxByts = DFLT_INPBUF_SIZE); 52 | FUNC_RESULT SetBuf(char *buf, long size); 53 | 54 | // This function skips all comments and white spaces (tabs are not taken 55 | // into account), and does not return until it reaches a valid data line or 56 | // end of file. If at least one line starting with space is encountered on 57 | // the way, the return value will be NXT_SPC. It should always be called 58 | // when the current offset is at the first character of a line 59 | // (lineStrt==true). 60 | NXTLINE_TYPE skipSpaceAndCmnts(); 61 | NXTLINE_TYPE GetNxtVldLine(int &pieceCnt, char *strngs[], int lngths[]); 62 | 63 | protected: 64 | char *buf; 65 | 66 | long totSize, // total size of the buffer 67 | loadedByts, // number of bytes loaded 68 | crntOfst, // current offset within the buffer 69 | lineEndOfst, // the offset of the last LF or CR character seen 70 | crntLineOfst, // the offset of the current line 71 | crntLineNum; // the current line number 72 | 73 | int fileHndl; 74 | char crntChar, prevChar; 75 | bool lastChnk, cmnt, lineStrt, nxtLineRchd; 76 | char fullPath[MAX_NAMESIZE]; 77 | 78 | // Keeps going until it encounters a data character or a line start. 79 | int skipSpace(); 80 | // Keeps going until it encounters a new line (assume no embedded comments). 81 | int skipCmnt(); 82 | // Checks if reloading is necessary and does it or detects end of file. 83 | int chckReload(); 84 | 85 | NXTLINE_TYPE GetNxtVldLine_(int &pieceCnt, char *str[], int lngth[], 86 | int maxPieceCnt = INBUF_MAX_PIECES_PERLINE); 87 | bool IsWhiteSpaceOrLineEnd(char ch); 88 | void ReportError(char *msg, char *lineStrt, int frstLngth); 89 | void ReportFatalError(char *msg, char *lineStrt, int frstLngth); 90 | }; 91 | 92 | // A specs buffer is an input buffer for parsing a typical input specification 93 | // or configuration file whose format is line based, i.e., includes one spec 94 | // or setting per line. This class includes one method for parsing one type 95 | // of specs 96 | class SpecsBuffer : public InputBuffer { 97 | public: 98 | SpecsBuffer(); 99 | explicit SpecsBuffer(char *buf, long size); 100 | void ReadSpec(const char *const title, char *value); 101 | void readLine(char *value, int maxPieceCnt); 102 | void readLstElmnt(char *value); 103 | int readIntLstElmnt(); 104 | bool ReadFlagSpec(const char *const title, bool dfltValue); 105 | unsigned long ReadUlongSpec(const char *const title); 106 | float ReadFloatSpec(const char *const title); 107 | uint64_t readUInt64Spec(const char *const title); 108 | int ReadIntSpec(const char *const title); 109 | int16_t ReadShortSpec(const char *const title); 110 | FUNC_RESULT checkTitle(const char *const title); 111 | void ErrorHandle(char *value); 112 | 113 | protected: 114 | NXTLINE_TYPE nxtLineType; 115 | void CombinePieces_(int lngths[], char *strngs[], int startPiece, 116 | int endPiece, char *target, int &totLngth); 117 | }; 118 | 119 | } // namespace opt_sched 120 | } // namespace llvm 121 | 122 | #endif 123 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/config.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Provides an interface to read a configuration file. The format is 3 | trivial: each entry is a name and value, separated by whitespace. 4 | Multiple entries are also separated by whitespace (usually line 5 | breaks). Hash marks after whitespace cause the rest of the line to 6 | be ignored. 7 | Author: Max Shawabkeh 8 | Created: Mar. 2011 9 | Last Update: Mar. 2011 10 | *******************************************************************************/ 11 | 12 | #ifndef OPTSCHED_GENERIC_CONFIG_H 13 | #define OPTSCHED_GENERIC_CONFIG_H 14 | 15 | #include "opt-sched/Scheduler/defines.h" 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace llvm { 22 | namespace opt_sched { 23 | 24 | using std::list; 25 | using std::string; 26 | 27 | class Config { 28 | public: 29 | // Loads settings from a configuration file. 30 | void Load(const string &filepath); 31 | void Load(std::istream &file); 32 | // All these functions return the value of a setting record of the given 33 | // name, with optional automatic parsing and defaults. 34 | string GetString(const string &name) const; 35 | string GetString(const string &name, const string &default_) const; 36 | int64_t GetInt(const string &name) const; 37 | int64_t GetInt(const string &name, int64_t default_) const; 38 | float GetFloat(const string &name) const; 39 | float GetFloat(const string &name, float default_) const; 40 | bool GetBool(const string &name) const; 41 | bool GetBool(const string &name, bool default_) const; 42 | list GetStringList(const string &name) const; 43 | list GetIntList(const string &name) const; 44 | list GetFloatList(const string &name) const; 45 | 46 | protected: 47 | std::map settings; 48 | }; 49 | 50 | class SchedulerOptions : public Config { 51 | public: 52 | // Since the scheduler flags should only be loaded once we are safe 53 | // implementing it as a singelton. 54 | static SchedulerOptions &getInstance(); 55 | 56 | // Make sure there is no way for a second config object to be accidentally 57 | // created. 58 | SchedulerOptions(const SchedulerOptions &) = delete; 59 | void operator=(const SchedulerOptions &) = delete; 60 | 61 | private: 62 | SchedulerOptions() {} 63 | }; 64 | 65 | } // namespace opt_sched 66 | } // namespace llvm 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/defines.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Contains common includes, constants, typedefs and enums. 3 | Author: Ghassan Shobaki 4 | Created: Oct. 1997 5 | Last Update: Mar. 2011 6 | *******************************************************************************/ 7 | 8 | #ifndef OPTSCHED_GENERIC_DEFINES_H 9 | #define OPTSCHED_GENERIC_DEFINES_H 10 | 11 | // Define basic constants like NULL. 12 | #include 13 | 14 | // For integral types of specific byte length. 15 | // The new standard is still not supported everywhere. 16 | #include 17 | #include 18 | 19 | namespace llvm { 20 | namespace opt_sched { 21 | 22 | // The standard time unit. 23 | typedef int64_t Milliseconds; 24 | 25 | // Instruction count. 26 | typedef int InstCount; 27 | 28 | // A generic sentinel value. Should be used with care. 29 | // TODO(max): Get rid of this in favor of type- or purpose-specific sentinels. 30 | const int INVALID_VALUE = -1; 31 | 32 | // Possible function call outcomes. 33 | enum FUNC_RESULT { 34 | // The function encountered an error. 35 | RES_ERROR = -1, 36 | // The function consciously failed. 37 | RES_FAIL = 0, 38 | // The function succeeded. 39 | RES_SUCCESS = 1, 40 | // The function reached the end of the resource (e.g. file) it operated on. 41 | RES_END = 2, 42 | // The function did not finish in the time allocated for it. 43 | RES_TIMEOUT = 3 44 | }; 45 | 46 | // Which mechanism we are using to limit occupancy 47 | // Limiting occupancy has shown to improve exec perf 48 | // for some kernels 49 | enum OCC_LIMIT_TYPE { 50 | // NONE 51 | OLT_NONE, 52 | // Value provided by sched.ini 53 | OLT_VALUE, 54 | // AMD's Heuristic 55 | OLT_HEUR, 56 | // Hardcoded File 57 | OLT_FILE, 58 | }; 59 | 60 | } // namespace opt_sched 61 | } // namespace llvm 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/graph_trans.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Implement graph transformations to be applied before scheduling. 3 | Author: Austin Kerbow 4 | Created: June. 2017 5 | Last Update: June. 2017 6 | *******************************************************************************/ 7 | 8 | #ifndef OPTSCHED_BASIC_GRAPH_TRANS_H 9 | #define OPTSCHED_BASIC_GRAPH_TRANS_H 10 | 11 | #include "opt-sched/Scheduler/data_dep.h" 12 | #include "opt-sched/Scheduler/defines.h" 13 | #include "opt-sched/Scheduler/lnkd_lst.h" 14 | #include "opt-sched/Scheduler/sched_region.h" 15 | #include 16 | #include 17 | 18 | namespace llvm { 19 | namespace opt_sched { 20 | 21 | // A and B are independent iff there is no path between A and B (in either 22 | // direction) 23 | bool areNodesIndependent(const SchedInstruction *A, const SchedInstruction *B); 24 | 25 | // Adds an edge (A --> B) to the graph, updating recursive neighbors. 26 | // The type of the added edge is OTHER. 27 | GraphEdge *addSuperiorEdge(DataDepGraph &DDG, SchedInstruction *A, 28 | SchedInstruction *B, int latency = 0); 29 | 30 | // An abstract graph transformation class. 31 | class GraphTrans { 32 | 33 | public: 34 | GraphTrans(DataDepGraph *dataDepGraph); 35 | virtual ~GraphTrans(){}; 36 | 37 | virtual const char *Name() const = 0; 38 | 39 | // Apply the graph transformation to the DataDepGraph. 40 | virtual FUNC_RESULT ApplyTrans() = 0; 41 | 42 | void SetDataDepGraph(DataDepGraph *dataDepGraph); 43 | 44 | void SetSchedRegion(SchedRegion *schedRegion); 45 | 46 | void SetNumNodesInGraph(InstCount numNodesInGraph); 47 | 48 | protected: 49 | DataDepGraph *GetDataDepGraph_() const; 50 | SchedRegion *GetSchedRegion_() const; 51 | InstCount GetNumNodesInGraph_() const; 52 | 53 | private: 54 | // A pointer to the graph. 55 | DataDepGraph *dataDepGraph_; 56 | 57 | // A pointer to the scheduling region. 58 | SchedRegion *schedRegion_; 59 | 60 | // The total number of nodes in the graph. 61 | InstCount numNodesInGraph_; 62 | }; 63 | 64 | inline DataDepGraph *GraphTrans::GetDataDepGraph_() const { 65 | return dataDepGraph_; 66 | } 67 | inline void GraphTrans::SetDataDepGraph(DataDepGraph *dataDepGraph) { 68 | dataDepGraph_ = dataDepGraph; 69 | } 70 | 71 | inline SchedRegion *GraphTrans::GetSchedRegion_() const { return schedRegion_; } 72 | inline void GraphTrans::SetSchedRegion(SchedRegion *schedRegion) { 73 | schedRegion_ = schedRegion; 74 | } 75 | 76 | inline InstCount GraphTrans::GetNumNodesInGraph_() const { 77 | return numNodesInGraph_; 78 | } 79 | inline void GraphTrans::SetNumNodesInGraph(InstCount numNodesInGraph) { 80 | numNodesInGraph_ = numNodesInGraph; 81 | } 82 | 83 | // Node superiority graph transformation. 84 | class StaticNodeSupTrans : public GraphTrans { 85 | public: 86 | StaticNodeSupTrans(DataDepGraph *dataDepGraph, bool IsMultiPass); 87 | 88 | const char *Name() const override { return "rp.nodesup"; } 89 | 90 | FUNC_RESULT ApplyTrans() override; 91 | 92 | static bool isNodeSuperior(DataDepGraph &DDG, int A, int B); 93 | 94 | struct Statistics { 95 | int NumEdgesAdded = 0; 96 | int NumEdgesRemoved = 0; 97 | }; 98 | static void removeRedundantEdges(DataDepGraph &DDG, int i, int j, 99 | Statistics &Stats); 100 | 101 | private: 102 | // Are multiple passes enabled. 103 | bool IsMultiPass; 104 | 105 | // Return true if node A is superior to node B. 106 | bool NodeIsSuperior_(SchedInstruction *nodeA, SchedInstruction *nodeB) { 107 | return isNodeSuperior(*GetDataDepGraph_(), nodeA->GetNum(), 108 | nodeB->GetNum()); 109 | } 110 | 111 | // Check if there is superiority involving nodes A and B. If yes, choose which 112 | // edge to add. 113 | // Returns the added edge if added, else nullptr 114 | GraphEdge *TryAddingSuperiorEdge_(SchedInstruction *nodeA, 115 | SchedInstruction *nodeB); 116 | 117 | // Keep trying to find superior nodes until none can be found or there are no 118 | // more independent nodes. 119 | void nodeMultiPass_( 120 | std::list>); 121 | }; 122 | 123 | } // namespace opt_sched 124 | } // namespace llvm 125 | 126 | #endif 127 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/graph_trans_ilp.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTSCHED_BASIC_GRAPH_TRANS_ILP_H 2 | #define OPTSCHED_BASIC_GRAPH_TRANS_ILP_H 3 | 4 | #include "opt-sched/Scheduler/array_ref2d.h" 5 | #include "opt-sched/Scheduler/graph_trans.h" 6 | #include "llvm/ADT/SmallPtrSet.h" 7 | #include "llvm/ADT/SmallVector.h" 8 | #include 9 | 10 | namespace llvm { 11 | namespace opt_sched { 12 | 13 | // Node superiority ILP graph transformation. 14 | class StaticNodeSupILPTrans : public GraphTrans { 15 | public: 16 | StaticNodeSupILPTrans(DataDepGraph *dataDepGraph); 17 | 18 | const char *Name() const override { return "ilp.nodesup"; } 19 | 20 | FUNC_RESULT ApplyTrans() override; 21 | 22 | struct Statistics { 23 | int NumEdgesAdded = 0; 24 | int NumResourceEdgesAdded = 0; 25 | int NumEdgesRemoved = 0; 26 | }; 27 | 28 | struct Data { 29 | DataDepGraph &DDG; 30 | MutableArrayRef2D DistanceTable; 31 | MutableArrayRef2D SuperiorArray; 32 | llvm::SmallVectorImpl> &SuperiorNodesList; 33 | llvm::SmallPtrSetImpl &AddedEdges; 34 | Statistics &Stats; 35 | }; 36 | 37 | static constexpr int SmallSize = 64; 38 | 39 | static llvm::SmallVector 40 | createDistanceTable(DataDepGraph &DDG); 41 | 42 | static llvm::SmallVector 43 | createSuperiorArray(DataDepGraph &DDG, ArrayRef2D DistanceTable); 44 | 45 | static llvm::SmallVector, SmallSize> 46 | createSuperiorNodesList(ArrayRef2D SuperiorArray); 47 | 48 | class DataAlloc { 49 | friend class StaticNodeSupILPTrans; 50 | 51 | public: 52 | explicit DataAlloc(DataDepGraph &DDG); 53 | Data &getData() { return *Data_; } 54 | 55 | public: 56 | llvm::SmallVector DistanceTable; 57 | llvm::SmallVector SuperiorArray; 58 | llvm::SmallVector, SmallSize> SuperiorNodesList; 59 | llvm::SmallPtrSet AddedEdges; 60 | Statistics Stats = {}; 61 | 62 | private: 63 | std::unique_ptr Data_; 64 | }; 65 | 66 | static DataAlloc createData(DataDepGraph &DDG) { return DataAlloc(DDG); } 67 | 68 | static void setDistanceTable(Data &Data, int i, int j, int Val); 69 | 70 | static void updateDistanceTable(Data &Data, int i, int j); 71 | 72 | static void addZeroLatencyEdge(Data &Data, int i, int j); 73 | 74 | static void addNecessaryResourceEdges(DataDepGraph &DDG, int i, int j, 75 | Statistics &Stats); 76 | 77 | static void addNecessaryResourceEdges(Data &Data, int i, int j) { 78 | addNecessaryResourceEdges(Data.DDG, i, j, Data.Stats); 79 | } 80 | 81 | static void removeRedundantEdges(DataDepGraph &DDG, 82 | ArrayRef2D DistanceTable, int i, int j, 83 | Statistics &Stats); 84 | 85 | static void removeRedundantEdges(Data &Data, int i, int j) { 86 | removeRedundantEdges(Data.DDG, Data.DistanceTable, i, j, Data.Stats); 87 | } 88 | }; 89 | 90 | } // namespace opt_sched 91 | } // namespace llvm 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/graph_trans_ilp_occupancy_preserving.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Implement graph transformations to be applied before scheduling. 3 | Author: Justin Bassett 4 | Created: Aug. 2020 5 | Last Update: Aug. 2020 6 | *******************************************************************************/ 7 | 8 | #ifndef OPTSCHED_BASIC_GRAPH_TRANS_ILP_OCCUPANCY_PRESERVING_H 9 | #define OPTSCHED_BASIC_GRAPH_TRANS_ILP_OCCUPANCY_PRESERVING_H 10 | 11 | #include "opt-sched/Scheduler/graph_trans.h" 12 | 13 | namespace llvm { 14 | namespace opt_sched { 15 | 16 | // Node superiority Occupancy preserving ILP graph transformation. 17 | class StaticNodeSupOccupancyPreservingILPTrans : public GraphTrans { 18 | public: 19 | StaticNodeSupOccupancyPreservingILPTrans(DataDepGraph *dataDepGraph); 20 | 21 | const char *Name() const override { 22 | return "occupancy-preserving-ilp.nodesup"; 23 | } 24 | 25 | FUNC_RESULT ApplyTrans() override; 26 | }; 27 | 28 | } // namespace opt_sched 29 | } // namespace llvm 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/hist_table.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Defines a history table class. 3 | Author: Ghassan Shobaki 4 | Created: Unknown 5 | Last Update: Mar. 2011 6 | *******************************************************************************/ 7 | 8 | #ifndef OPTSCHED_ENUM_HIST_TABLE_H 9 | #define OPTSCHED_ENUM_HIST_TABLE_H 10 | 11 | #include "opt-sched/Scheduler/defines.h" 12 | #include "opt-sched/Scheduler/enumerator.h" 13 | #include "opt-sched/Scheduler/gen_sched.h" 14 | #include "opt-sched/Scheduler/hash_table.h" 15 | #include "opt-sched/Scheduler/mem_mngr.h" 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace llvm { 23 | namespace opt_sched { 24 | 25 | class EnumTreeNode; 26 | class Enumerator; 27 | 28 | // The history version of a tree node to be kept in the history table 29 | class HistEnumTreeNode { 30 | public: 31 | HistEnumTreeNode(); 32 | virtual ~HistEnumTreeNode(); 33 | 34 | InstCount GetTime(); 35 | void PrntPartialSched(std::ostream &out); 36 | bool CompPartialScheds(HistEnumTreeNode *othrHist); 37 | InstCount GetInstNum(); 38 | bool IsPrdcsrViaStalls(HistEnumTreeNode *othrNode); 39 | HistEnumTreeNode *GetParent(); 40 | void Clean(); 41 | void ReplaceParent(HistEnumTreeNode *newParent); 42 | // Does the scheduled inst. list of this node match that of the given node 43 | bool DoesMatch(EnumTreeNode *node, Enumerator *enumrtr); 44 | // Is the sub-problem at this node dominated by the given node's? 45 | bool IsDominated(EnumTreeNode *node, Enumerator *enumrtr); 46 | // Does the sub-problem at this node dominate the given node's? 47 | virtual bool DoesDominate(EnumTreeNode *node, Enumerator *enumrtr); 48 | virtual void Construct(EnumTreeNode *node, bool isTemp); 49 | virtual void SetCostInfo(EnumTreeNode *node, bool isTemp, 50 | Enumerator *enumrtr); 51 | const std::shared_ptr> &GetSuffix() const; 52 | void 53 | SetSuffix(const std::shared_ptr> &suffix); 54 | std::vector GetPrefix() const; 55 | 56 | inline int getInstNum() { return inst_->GetNum(); } 57 | 58 | protected: 59 | HistEnumTreeNode *prevNode_; 60 | 61 | // The current time or position (or step number) in the scheduling process. 62 | // This is equal to the length of the path from the root node to this node. 63 | InstCount time_; 64 | 65 | SchedInstruction *inst_; 66 | 67 | #ifdef IS_DEBUG 68 | bool isCnstrctd_; 69 | #endif 70 | 71 | bool crntCycleBlkd_; 72 | ReserveSlot *rsrvSlots_; 73 | 74 | // (Chris) 75 | std::shared_ptr> suffix_ = nullptr; 76 | 77 | InstCount SetLastInsts_(SchedInstruction *lastInsts[], InstCount thisTime, 78 | InstCount minTimeToExmn); 79 | void SetInstsSchduld_(BitVector *instsSchduld); 80 | // Does this history node dominate the given node or history node? 81 | bool DoesDominate_(EnumTreeNode *node, HistEnumTreeNode *othrHstry, 82 | ENUMTREE_NODEMODE mode, Enumerator *enumrtr, 83 | InstCount shft); 84 | void SetLwrBounds_(InstCount lwrBounds[], SchedInstruction *lastInsts[], 85 | InstCount thisTime, InstCount minTimeToExmn, 86 | Enumerator *enumrtr); 87 | void CmputNxtAvlblCycles_(Enumerator *enumrtr, InstCount instsPerType[], 88 | InstCount nxtAvlblCycles[]); 89 | 90 | virtual void Init_(); 91 | void AllocLastInsts_(ArrayMemAlloc *lastInstsAlctr, 92 | Enumerator *enumrtr); 93 | bool IsAbslutDmnnt_(); 94 | InstCount GetMinTimeToExmn_(InstCount nodeTime, Enumerator *enumrtr); 95 | InstCount GetLwrBound_(SchedInstruction *inst, int16_t issuRate); 96 | void SetRsrvSlots_(EnumTreeNode *node); 97 | }; 98 | 99 | class CostHistEnumTreeNode : public HistEnumTreeNode { 100 | public: 101 | CostHistEnumTreeNode(); 102 | virtual ~CostHistEnumTreeNode(); 103 | 104 | void Construct(EnumTreeNode *node, bool isTemp); 105 | // Does the sub-problem at this node dominate the given node's? 106 | bool DoesDominate(EnumTreeNode *node, Enumerator *enumrtr); 107 | void SetCostInfo(EnumTreeNode *node, bool isTemp, Enumerator *enumrtr); 108 | 109 | protected: 110 | // Why do we need to copy this data from region->tree_node->hist_node 111 | InstCount cost_; 112 | InstCount peakSpillCost_; 113 | InstCount spillCostSum_; 114 | 115 | // (Chris) 116 | InstCount totalCost_ = -1; 117 | InstCount partialCost_ = -1; 118 | bool totalCostIsActualCost_ = false; 119 | 120 | InstCount TotalSpillCost_ = -1; 121 | InstCount PartialSpillCost_ = -1; 122 | InstCount SuffixRPCost; 123 | 124 | bool isLngthFsbl_; 125 | #ifdef IS_DEBUG 126 | bool costInfoSet_; 127 | #endif 128 | 129 | bool chkCostDmntnForSinglePass(EnumTreeNode *node, 130 | LengthCostEnumerator *enumrtr); 131 | bool chkCostDmntnForTwoPass(EnumTreeNode *Node, LengthCostEnumerator *E); 132 | bool ChkCostDmntn_(EnumTreeNode *node, LengthCostEnumerator *enumrtr, 133 | InstCount &maxShft); 134 | virtual void Init_(); 135 | }; 136 | 137 | } // namespace opt_sched 138 | } // namespace llvm 139 | 140 | #endif 141 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/list_sched.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Defines a list scheduler, based on the defintions of a generic 3 | scheduler and a constrained scheduler in gen_sched.h, which 4 | collectively include the meat of the implementation. 5 | Author: Ghassan Shobaki 6 | Created: Apr. 2002 7 | Last Update: Sept. 2013 8 | *******************************************************************************/ 9 | 10 | #ifndef OPTSCHED_LIST_SCHED_LIST_SCHED_H 11 | #define OPTSCHED_LIST_SCHED_LIST_SCHED_H 12 | 13 | #include "opt-sched/Scheduler/gen_sched.h" 14 | 15 | namespace llvm { 16 | namespace opt_sched { 17 | 18 | class ListScheduler : public ConstrainedScheduler { 19 | public: 20 | // Creates a list scheduler for the given dependence graph, machine and 21 | // schedule upper bound, using the specified heuristic. 22 | ListScheduler(DataDepGraph *dataDepGraph, MachineModel *machMdl, 23 | InstCount schedUprBound, SchedPriorities prirts); 24 | virtual ~ListScheduler(); 25 | 26 | // Calculates the schedule and returns it in the passed argument. 27 | FUNC_RESULT FindSchedule(InstSchedule *sched, SchedRegion *rgn); 28 | 29 | protected: 30 | bool isDynmcPrirty_; 31 | // Adds the instructions that have just become ready at this cycle to the 32 | // ready list. 33 | void UpdtRdyLst_(InstCount cycleNum, int slotNum); 34 | 35 | // Check whether the next node ID instruction is ready -- used to collect 36 | // scheduling stats for LLVM generating schedules 37 | bool CheckForInst(int numToPick) const; 38 | 39 | // Pick next instruction to be scheduled. Returns NULL if no instructions are 40 | // ready. 41 | virtual SchedInstruction *PickInst() const; 42 | }; 43 | 44 | // Force the list scheduler to maintain the source ordering of the instructions 45 | // regardless of latency or machine model constraints. 46 | class SequentialListScheduler : public ListScheduler { 47 | public: 48 | SequentialListScheduler(DataDepGraph *dataDepGraph, MachineModel *machMdl, 49 | InstCount schedUprBound, SchedPriorities prirts); 50 | 51 | private: 52 | // Does this instruction come next in the source ordering after all currently 53 | // scheduled instructions, e.g. 0, 1, 2, 3, 4. 54 | bool IsSequentialInstruction(const SchedInstruction *Inst) const; 55 | 56 | bool ChkInstLglty_(SchedInstruction *inst) const override; 57 | }; 58 | 59 | // A list scheduler that schedules the instruction with the top heuristic value 60 | // Unalike ListScheduler this class considers instructions that are ready 61 | // in terms of data dependencies, but not in terms of latencies. 62 | // If the instruction with the top heuristic is not ready in terms of latency 63 | // Then stalls will be inserted until it is ready 64 | class StallSchedulingListScheduler : public ListScheduler { 65 | public: 66 | StallSchedulingListScheduler(DataDepGraph *dataDepGraph, 67 | MachineModel *machMdl, InstCount schedUprBound, 68 | SchedPriorities prirts); 69 | 70 | SchedInstruction *PickInst() const; 71 | }; 72 | 73 | } // namespace opt_sched 74 | } // namespace llvm 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/logger.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Implements a simple logger that writes out messages to a file or 3 | to a standard stream. 4 | Author: Max Shawabkeh 5 | Created: Mar. 2011 6 | Last Update: Mar. 2011 7 | *******************************************************************************/ 8 | 9 | #ifndef OPTSCHED_GENERIC_LOGGER_H 10 | #define OPTSCHED_GENERIC_LOGGER_H 11 | 12 | #include "opt-sched/Scheduler/defines.h" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace llvm { 21 | namespace opt_sched { 22 | 23 | namespace Logger { 24 | // Error severity levels. 25 | enum LOG_LEVEL { 26 | // Fatal error. Exit program. 27 | FATAL = 1, 28 | // Non-fatal error. Program should continue. 29 | ERROR = 2, 30 | // Generic non-error logging message. 31 | INFO = 4, 32 | // A summary message that should be shown only in the summary log. 33 | SUMMARY = 8 34 | }; 35 | 36 | // Directs all subsequent log output to the specified output stream. Defaults 37 | // to the standard error stream if not set. 38 | void SetLogStream(std::ostream &out); 39 | std::ostream &GetLogStream(); 40 | 41 | // Output a log message of a given level, either with a timestamp or without. 42 | // Expects a printf-style format string and a variable number of arguments to 43 | // place into the string. 44 | void Log(LOG_LEVEL level, bool timed, const char *format_string, ...); 45 | 46 | // Registers a periodic logging function that will respond to being called at 47 | // most every period milliseconds and act as a no-op until the period has 48 | // passed. Note that time measuring is in process CPU time. 49 | void RegisterPeriodicLogger(Milliseconds period, void (*callback)()); 50 | // Runs the previously registered logging function. If the period has not 51 | // passed since the last call to PeriodicLog() or RegisterPeriodicLogger(), 52 | // this acts as a no-op. 53 | void PeriodicLog(); 54 | 55 | // Shortcuts for each logging level. 56 | [[noreturn]] void Fatal(const char *format_string, ...); 57 | void Error(const char *format_string, ...); 58 | void Info(const char *format_string, ...); 59 | void Summary(const char *format_string, ...); 60 | 61 | namespace detail { 62 | // TODO: When we get C++17, get rid of EventAttrType and EventAttrValue in favor 63 | // of a std::variant. 64 | 65 | /** Encodes the type of an Event attribute */ 66 | enum class EventAttrType { 67 | Int64, 68 | UInt64, 69 | CStr, 70 | Bool, 71 | }; 72 | 73 | /* Gets the type of the argument */ 74 | inline EventAttrType GetEventAttrType(const char *) { 75 | return EventAttrType::CStr; 76 | } 77 | 78 | inline EventAttrType GetEventAttrType(bool) { return EventAttrType::Bool; } 79 | 80 | template ::value, int>::type = 0> 82 | inline EventAttrType GetEventAttrType(Int) { 83 | // Treat anything which is not a uint64_t as an int64_t. 84 | // This may aid branch prediction in the implementation. 85 | return (std::is_signed::value || sizeof(Int) < sizeof(int64_t)) 86 | ? EventAttrType::Int64 87 | : EventAttrType::UInt64; 88 | } 89 | 90 | /** Encodes the value of an Event attribute. */ 91 | union EventAttrValue { 92 | int64_t i64; 93 | uint64_t u64; 94 | const char *cstr; 95 | bool b; 96 | 97 | EventAttrValue(const char *val) : cstr{val} {} 98 | EventAttrValue(bool val) : b{val} {} 99 | 100 | template ::value, 101 | int>::type = 0> 102 | EventAttrValue(Int val) { 103 | if (std::is_signed::value || sizeof(Int) < sizeof(int64_t)) { 104 | i64 = val; 105 | } else { 106 | u64 = val; 107 | } 108 | } 109 | }; 110 | 111 | /** The implementation of Logger::Event(...) */ 112 | void Event(const std::pair *attrs, 113 | size_t numAttrs); 114 | } // namespace detail 115 | 116 | /** 117 | * \brief Logs an event in a json format. 118 | * \detail 119 | * 120 | * ``Logger::Event(eventID, [key, value]...)`` 121 | * 122 | * Logs messages of the format `EVENT: {"event_id": eventID, "key": value...}`, 123 | * allowing for easier parsing by tools later down the line. The current time is 124 | * always included. 125 | * 126 | * \param eventID a unique ID identifying this event. This should match the 127 | * regular expression `[A-Z0-9_]+`. That is, this should contain no spaces. 128 | * 129 | * \param args An alternating list of keys and values. 130 | * 131 | * \warning Any change to a log statement of this format requires a change in 132 | * our log-parsing scripts. 133 | */ 134 | template 135 | void Event(const char *eventID, const Args &... args) { 136 | static_assert(sizeof...(args) % 2 == 0, 137 | "Every key must have a corresponding value."); 138 | 139 | using EventItem = std::pair; 140 | 141 | std::array arr{ 142 | EventItem(detail::EventAttrType::CStr, 143 | detail::EventAttrValue("event_id")), 144 | EventItem(detail::EventAttrType::CStr, detail::EventAttrValue(eventID)), 145 | EventItem(detail::GetEventAttrType(args), 146 | detail::EventAttrValue(args))..., 147 | }; 148 | 149 | detail::Event(arr.data(), arr.size()); 150 | } 151 | 152 | } // namespace Logger 153 | 154 | } // namespace opt_sched 155 | } // namespace llvm 156 | 157 | #endif 158 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/mem_mngr.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Implements application-level memory management used avoid the OS 3 | overhead in performance-critical sections of the code. 4 | Author: Ghassan Shobaki 5 | Created: Mar. 2003 6 | Last Update: Mar. 2011 7 | *******************************************************************************/ 8 | 9 | #ifndef OPTSCHED_GENERIC_MEM_MNGR_H 10 | #define OPTSCHED_GENERIC_MEM_MNGR_H 11 | 12 | #include "opt-sched/Scheduler/defines.h" 13 | #include "opt-sched/Scheduler/lnkd_lst.h" 14 | #include "opt-sched/Scheduler/logger.h" 15 | #include 16 | 17 | namespace llvm { 18 | namespace opt_sched { 19 | 20 | template class MemAlloc { 21 | public: 22 | // Allocates a new memory block of an initial size with an optional maximum 23 | // size. If no maximum size is specified, the memory is allocated 24 | // dynamically. The size is in the number of objects of type T. 25 | inline MemAlloc(int blockSize, int maxSize = INVALID_VALUE); 26 | // Deallocates the memory. 27 | inline ~MemAlloc(); 28 | // Marks all allocated memory as unused (and available for reuse). 29 | inline void Reset(); 30 | // Returns an allocated object. 31 | inline T *GetObject(); 32 | // Frees an object and recycles it for future use. 33 | inline void FreeObject(T *obj); 34 | 35 | protected: 36 | // The number of objects in each memory block allocated. 37 | int blockSize_; 38 | // The maximum number of objects to keep allocated. 39 | int maxSize_; 40 | // A buffer including an allocated block of objects 41 | T *currentBlock_; 42 | // The index of the next available object in the current block. 43 | int currentIndex_; 44 | // A linked list of previously allocated and fully used blocks. 45 | LinkedList allocatedBlocks_; 46 | // A linked list of free objects available for reuse. 47 | Stack availableObjects_; 48 | // Whether an of the already allocated blocks are still unused. 49 | bool allocatedBlocksAvailable_; 50 | 51 | // Makes sure currentBlock_ points to an unused block, allocating a new one 52 | // if needed. 53 | inline void GetNewBlock_(); 54 | // Allocates a new block. 55 | inline void AllocNewBlock_(); 56 | // Returns a pointer to an array of count unused objects. 57 | inline T *GetObjects_(int count); 58 | }; 59 | 60 | template class ArrayMemAlloc : public MemAlloc { 61 | public: 62 | // Allocates a memory block that contains arraysPerBlock arrays, each 63 | // containing arraySize elements of type T. 64 | inline ArrayMemAlloc(int arraysPerBlock, int arraySize) 65 | : MemAlloc(arraysPerBlock * arraySize) { 66 | arraySize_ = arraySize; 67 | } 68 | // Returns an allocated array of objects. 69 | inline T *GetArray() { return MemAlloc::GetObjects_(arraySize_); } 70 | // Frees an array of objects and recycle it for future use. 71 | inline void FreeArray(T *array) { FreeObject(array); } 72 | 73 | protected: 74 | // The size of each array. 75 | int arraySize_; 76 | }; 77 | 78 | template 79 | inline MemAlloc::MemAlloc(int blockSize, int maxSize) 80 | : availableObjects_(maxSize) { 81 | assert(maxSize == INVALID_VALUE || blockSize <= maxSize); 82 | blockSize_ = blockSize; 83 | maxSize_ = maxSize; 84 | currentIndex_ = 0; 85 | currentBlock_ = NULL; 86 | allocatedBlocksAvailable_ = false; 87 | GetNewBlock_(); 88 | } 89 | 90 | template inline MemAlloc::~MemAlloc() { 91 | for (T *blk = allocatedBlocks_.GetFrstElmnt(); blk != NULL; 92 | blk = allocatedBlocks_.GetNxtElmnt()) { 93 | delete[] blk; 94 | } 95 | } 96 | 97 | template inline void MemAlloc::Reset() { 98 | assert(allocatedBlocks_.GetElmntCnt() >= 1); 99 | currentBlock_ = allocatedBlocks_.GetFrstElmnt(); 100 | currentIndex_ = 0; 101 | availableObjects_.Reset(); 102 | allocatedBlocksAvailable_ = true; 103 | } 104 | 105 | template inline void MemAlloc::GetNewBlock_() { 106 | currentBlock_ = NULL; 107 | 108 | if (allocatedBlocksAvailable_) { 109 | currentBlock_ = allocatedBlocks_.GetNxtElmnt(); 110 | currentIndex_ = 0; 111 | } 112 | 113 | if (currentBlock_ == NULL) { 114 | allocatedBlocksAvailable_ = false; 115 | AllocNewBlock_(); 116 | } 117 | } 118 | 119 | template inline void MemAlloc::AllocNewBlock_() { 120 | T *blk = new T[blockSize_]; 121 | allocatedBlocks_.InsrtElmnt(blk); 122 | currentIndex_ = 0; 123 | currentBlock_ = blk; 124 | } 125 | 126 | template inline T *MemAlloc::GetObjects_(int count) { 127 | T *obj = availableObjects_.ExtractElmnt(); 128 | 129 | if (obj == NULL) { 130 | // If there are no recycled objects available for reuse. 131 | assert(currentIndex_ <= blockSize_); 132 | 133 | if (currentIndex_ == blockSize_) { 134 | // If the current block is all used up. 135 | assert(maxSize_ == INVALID_VALUE); 136 | GetNewBlock_(); 137 | assert(currentIndex_ == 0); 138 | } 139 | 140 | obj = currentBlock_ + currentIndex_; 141 | currentIndex_ += count; 142 | } 143 | 144 | assert(obj != NULL); 145 | return obj; 146 | } 147 | 148 | template inline T *MemAlloc::GetObject() { return GetObjects_(1); } 149 | 150 | template inline void MemAlloc::FreeObject(T *obj) { 151 | availableObjects_.InsrtElmnt(obj); 152 | } 153 | 154 | } // namespace opt_sched 155 | } // namespace llvm 156 | 157 | #endif 158 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/random.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Implements the Kahan's random number generator, with a period of 3 | 2 ** 40. 4 | Author: Ghassan Shobaki 5 | Created: Unknown 6 | Last Update: Mar. 2011 7 | *******************************************************************************/ 8 | 9 | #ifndef OPTSCHED_GENERIC_RANDOM_H 10 | #define OPTSCHED_GENERIC_RANDOM_H 11 | 12 | #include "opt-sched/Scheduler/defines.h" 13 | 14 | namespace llvm { 15 | namespace opt_sched { 16 | 17 | namespace RandomGen { 18 | // Initialize the random number generator with a seed. 19 | void SetSeed(int32_t iseed); 20 | // Get a random 32-bit value. 21 | uint32_t GetRand32(); 22 | // Get a random 32-bit value within a given range, inclusive. 23 | uint32_t GetRand32WithinRange(uint32_t min, uint32_t max); 24 | // Get a random 64-bit value. 25 | uint64_t GetRand64(); 26 | // Fill a buffer with a specified number of random bits, rounded to the 27 | // nearest byte boundary. 28 | void GetRandBits(uint16_t bitCnt, unsigned char *dest); 29 | } // namespace RandomGen 30 | 31 | } // namespace opt_sched 32 | } // namespace llvm 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/ready_list.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Defines a ReadyList class, which is one of the main data 3 | structures that an instruction scheduler needs. The ready list is 4 | a sorted list of instructions whose data dependencies have been 5 | satisfied (their predecessors in the data dependence graph have 6 | been scheduled). 7 | Author: Ghassan Shobaki 8 | Created: Apr. 2002 9 | Last Update: Sept. 2013 10 | *******************************************************************************/ 11 | 12 | #ifndef OPTSCHED_BASIC_READY_LIST_H 13 | #define OPTSCHED_BASIC_READY_LIST_H 14 | 15 | #include "opt-sched/Scheduler/defines.h" 16 | #include "opt-sched/Scheduler/lnkd_lst.h" 17 | #include "opt-sched/Scheduler/sched_basic_data.h" 18 | #include "llvm/ADT/SmallVector.h" 19 | #include 20 | 21 | namespace llvm { 22 | namespace opt_sched { 23 | 24 | // A priority list of instruction that are ready to schedule at a given point 25 | // during the scheduling process. 26 | class ReadyList { 27 | public: 28 | // Constructs a ready list for the specified dependence graph with the 29 | // specified priorities. 30 | ReadyList(DataDepGraph *dataDepGraph, SchedPriorities prirts); 31 | // Destroys the ready list and deallocates the memory used by it. 32 | ~ReadyList(); 33 | 34 | // Resets the list and removes all elements from it. 35 | void Reset(); 36 | 37 | // Adds an instruction to the ready list. 38 | void AddInst(SchedInstruction *inst); 39 | 40 | // Adds a list of instructions to the ready list. 41 | void AddList(LinkedList *lst); 42 | 43 | // An iterator that allows accessing the instructions at the current time 44 | // in priority order. The first call will return the top priority 45 | // instruction, the next will return the instruction with the second rank, 46 | // and so on. 47 | SchedInstruction *GetNextPriorityInst(); 48 | SchedInstruction *GetNextPriorityInst(unsigned long &key); 49 | 50 | // Removes the instruction returned by the last call to 51 | // GetNextPriorityInst(). 52 | void RemoveNextPriorityInst(); 53 | 54 | // Returns the number of instructions currently in the list. 55 | InstCount GetInstCnt() const; 56 | 57 | // Resets the list iterator to point back to the first instruction. 58 | void ResetIterator(); 59 | 60 | // Adds instructions at the bottoms of the given two lists which have 61 | // not been added to the ready list already, and advance the internal time. 62 | // TODO(max): Elaborate. 63 | void AddLatestSubLists(LinkedList *lst1, 64 | LinkedList *lst2); 65 | 66 | // Removes the most recently added sublist of instructions. 67 | // TODO(max): Elaborate. 68 | void RemoveLatestSubList(); 69 | 70 | // Copies this list to another. Both lists must be empty. 71 | void CopyList(ReadyList *otherLst); 72 | 73 | // Searches the list for an instruction, returning whether it has been found 74 | // or not and writing the number of times it was found into hitCnt. 75 | bool FindInst(SchedInstruction *inst, int &hitCnt); 76 | 77 | // Update instruction priorities within the list 78 | // Called only if the priorities change dynamically during scheduling 79 | void UpdatePriorities(); 80 | 81 | unsigned long MaxPriority(); 82 | 83 | // Prints out the ready list, nicely formatted, into an output stream. 84 | void Print(std::ostream &out); 85 | 86 | // Constructs the priority-list key based on the schemes listed in prirts_. 87 | unsigned long CmputKey_(SchedInstruction *inst, bool isUpdate, bool &changed); 88 | 89 | template 90 | void ForEachReadyInstruction(InstructionVisitor &&visitor) const { 91 | for (const SchedInstruction &Inst : prirtyLst_) { 92 | visitor(Inst); 93 | } 94 | } 95 | 96 | private: 97 | // An ordered vector of priorities 98 | SchedPriorities prirts_; 99 | 100 | // The priority list containing the actual instructions. 101 | PriorityList prirtyLst_; 102 | 103 | // TODO(max): Document. 104 | LinkedList latestSubLst_; 105 | 106 | // Array of pointers to KeyedEntry objects 107 | llvm::SmallVector *, 0> 108 | keyedEntries_; 109 | 110 | // Is there a priority scheme that needs to be changed dynamically 111 | // bool isDynmcPrirty_; 112 | 113 | // The maximum values for each part of the priority key. 114 | InstCount maxUseCnt_; 115 | InstCount maxCrtclPath_; 116 | InstCount maxScsrCnt_; 117 | InstCount maxLtncySum_; 118 | InstCount maxNodeID_; 119 | InstCount maxInptSchedOrder_; 120 | 121 | unsigned long maxPriority_; 122 | 123 | // The number of bits for each part of the priority key. 124 | int16_t useCntBits_; 125 | int16_t crtclPathBits_; 126 | int16_t scsrCntBits_; 127 | int16_t ltncySumBits_; 128 | int16_t nodeID_Bits_; 129 | int16_t inptSchedOrderBits_; 130 | 131 | // Adds instructions at the bottom of a given list which have not been added 132 | // to the ready list already. 133 | void AddLatestSubList_(LinkedList *lst); 134 | 135 | // Calculates a new priority key given an existing key of size keySize by 136 | // appending bitCnt bits holding the value val, assuming val < maxVal. 137 | static void AddPrirtyToKey_(unsigned long &key, int16_t &keySize, 138 | int16_t bitCnt, unsigned long val, 139 | unsigned long maxVal); 140 | }; 141 | 142 | } // namespace opt_sched 143 | } // namespace llvm 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/reg_alloc.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Defines register allocation classes. By looking at the effect 3 | of scheduling decisions on the number of spills added during 4 | simulated register allocation, we can evaluate the performance of the scheduler. 5 | 6 | Author: Austin Kerbow 7 | Created: Oct. 2017 8 | Last Update: Oct. 2017 9 | *******************************************************************************/ 10 | #ifndef OPTSCHED_BASIC_REG_ALLOC_H 11 | #define OPTSCHED_BASIC_REG_ALLOC_H 12 | 13 | #include "opt-sched/Scheduler/data_dep.h" 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace llvm { 20 | namespace opt_sched { 21 | 22 | using namespace std; 23 | 24 | /** 25 | * Class for performing basic top-down register allocation. 26 | */ 27 | class LocalRegAlloc { 28 | public: 29 | typedef struct RegMap { 30 | // A queue of instruction numbers that this virtual register is used in. 31 | queue nextUses; 32 | // Do we need to spill this virtual register. 33 | bool isDirty; 34 | // The physical register that this virtual register is mapped to. If this 35 | // virtual register is not mapped to a physical register, set to -1. 36 | int assignedReg; 37 | } RegMap; 38 | 39 | LocalRegAlloc(InstSchedule *instSchedule, DataDepGraph *dataDepGraph); 40 | virtual ~LocalRegAlloc(); 41 | // Try to allocate registers in the region and count the number of spills 42 | // added. 43 | virtual void AllocRegs(); 44 | // Initialize data for register allocation. 45 | virtual void SetupForRegAlloc(); 46 | // Print information about the amount of spilling in the region after register 47 | // allocation. 48 | virtual void PrintSpillInfo(const char *dagName); 49 | // Return the spill cost of region after register allocation. 50 | virtual int GetCost() const; 51 | // Return the number of loads 52 | int GetNumLoads() const { return numLoads_; } 53 | // Return the number of stores 54 | int GetNumStores() const { return numStores_; } 55 | 56 | private: 57 | InstSchedule *instSchedule_; 58 | DataDepGraph *dataDepGraph_; 59 | int numLoads_; 60 | int numStores_; 61 | int numRegTypes_; 62 | // For each register type, there is a stack that tracks free physical 63 | // registers. 64 | vector> freeRegs_; 65 | // For each virtual register, track the next use and the currently assigned 66 | // physical register. 67 | vector> regMaps_; 68 | // For each register type, we have a list of physical registers and the 69 | // current virtual register that is loaded. If the regsiter is free, set to 70 | // -1. 71 | vector> physRegs_; 72 | 73 | // Find all instructions that use each register. 74 | void ScanUses_(); 75 | void AllocateReg_(int16_t regType, int virtRegNum); 76 | // Find a candidate physical register to spill. 77 | int FindSpillCand_(std::map ®Maps, vector &physRegs); 78 | // Load live-in virtual registers. Live-in registers are defined by the 79 | // artificial entry instruction. 80 | void AddLiveIn_(SchedInstruction *artificialEntry); 81 | // Spill all dirty registers. 82 | void SpillAll_(); 83 | }; 84 | 85 | } // namespace opt_sched 86 | } // namespace llvm 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /include/opt-sched/Scheduler/utilities.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: Contains a few generic utility functions. 3 | Author: Ghassan Shobaki 4 | Created: Oct. 1997 5 | Last Update: Mar. 2017 6 | *******************************************************************************/ 7 | 8 | #ifndef OPTSCHED_GENERIC_UTILITIES_H 9 | #define OPTSCHED_GENERIC_UTILITIES_H 10 | 11 | #include "opt-sched/Scheduler/defines.h" 12 | #include 13 | 14 | namespace llvm { 15 | namespace opt_sched { 16 | 17 | namespace Utilities { 18 | // Calculates the minimum number of bits that can hold a given integer value. 19 | uint16_t clcltBitsNeededToHoldNum(uint64_t value); 20 | // Returns the time that has passed since the start of the process, in 21 | // milliseconds. 22 | Milliseconds GetProcessorTime(); 23 | // Returns a reference to an object that is supposed to initialized with the 24 | // start time of the process 25 | extern std::chrono::steady_clock::time_point startTime; 26 | 27 | // Executes the function, returning the number of milliseconds it took to do so. 28 | template Milliseconds countMillisToExecute(F &&fn) { 29 | const Milliseconds Start = GetProcessorTime(); 30 | fn(); 31 | return GetProcessorTime() - Start; 32 | } 33 | } // namespace Utilities 34 | 35 | inline uint16_t Utilities::clcltBitsNeededToHoldNum(uint64_t value) { 36 | uint16_t bitsNeeded = 0; 37 | 38 | while (value) { 39 | value >>= 1; 40 | bitsNeeded++; 41 | } 42 | return bitsNeeded; 43 | } 44 | 45 | inline Milliseconds Utilities::GetProcessorTime() { 46 | auto currentTime = std::chrono::steady_clock::now(); 47 | std::chrono::duration elapsed = currentTime - startTime; 48 | return elapsed.count(); 49 | } 50 | 51 | } // namespace opt_sched 52 | } // namespace llvm 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(OPTSCHED_SRCS 2 | Scheduler/aco.cpp 3 | Scheduler/bb_spill.cpp 4 | Scheduler/buffers.cpp 5 | Scheduler/config.cpp 6 | Scheduler/data_dep.cpp 7 | Scheduler/enumerator.cpp 8 | Scheduler/gen_sched.cpp 9 | Scheduler/graph.cpp 10 | Scheduler/graph_trans.cpp 11 | Scheduler/graph_trans_ilp.cpp 12 | Scheduler/graph_trans_ilp_occupancy_preserving.cpp 13 | Scheduler/hist_table.cpp 14 | Scheduler/list_sched.cpp 15 | Scheduler/logger.cpp 16 | Scheduler/reg_alloc.cpp 17 | Scheduler/utilities.cpp 18 | Scheduler/machine_model.cpp 19 | Scheduler/random.cpp 20 | Scheduler/ready_list.cpp 21 | Scheduler/register.cpp 22 | Scheduler/relaxed_sched.cpp 23 | Scheduler/sched_basic_data.cpp 24 | Scheduler/sched_region.cpp 25 | Scheduler/stats.cpp 26 | Wrapper/OptimizingScheduler.cpp 27 | Wrapper/OptSchedMachineWrapper.cpp 28 | Wrapper/OptSchedDDGWrapperBasic.cpp 29 | Wrapper/OptSchedGenericTarget.cpp 30 | ) 31 | 32 | set(OPTSCHED_TARGET_DEPS "") 33 | 34 | if(OPTSCHED_ENABLE_AMDGPU) 35 | list(APPEND OPTSCHED_SRCS 36 | Wrapper/AMDGPU/GCNOptSched.cpp 37 | Wrapper/AMDGPU/OptSchedGCNTarget.cpp 38 | Wrapper/AMDGPU/OptSchedDDGWrapperGCN.cpp 39 | ) 40 | if(TARGET LLVMAMDGPUCodeGen) 41 | list(APPEND OPTSCHED_TARGET_DEPS AMDGPUCommonTableGen) 42 | endif() 43 | endif() 44 | 45 | add_llvm_target(OptSched 46 | STATIC 47 | ${OPTSCHED_SRCS} 48 | ) 49 | add_dependencies(LLVMOptSched ${OPTSCHED_TARGET_DEPS}) 50 | #add_definitions(${OPTSCHED_EXTRA_DEFINITIONS}) 51 | -------------------------------------------------------------------------------- /lib/Scheduler/config.cpp: -------------------------------------------------------------------------------- 1 | #include "opt-sched/Scheduler/config.h" 2 | #include "opt-sched/Scheduler/logger.h" 3 | #include "llvm/Support/ErrorHandling.h" 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace llvm::opt_sched; 9 | 10 | using std::istringstream; 11 | 12 | template T Convert(const string &value) { 13 | istringstream ss(value); 14 | T number = 0; 15 | ss >> number; 16 | assert(!ss.fail()); 17 | return number; 18 | } 19 | 20 | template list Split(const string &value) { 21 | list values; 22 | if (value == "") 23 | return values; 24 | 25 | istringstream ss(value); 26 | while (ss) { 27 | T item; 28 | char delimiter; 29 | ss >> item; 30 | assert(!ss.fail()); 31 | ss >> delimiter; 32 | assert(ss.fail() || delimiter == ','); 33 | values.push_back(item); 34 | } 35 | 36 | return values; 37 | } 38 | 39 | void Config::Load(const string &filepath) { 40 | std::ifstream file(filepath.c_str()); 41 | Load(file); 42 | } 43 | 44 | void Config::Load(std::istream &file) { 45 | settings.clear(); 46 | while (!file.eof()) { 47 | string name, value, comment; 48 | file >> name; 49 | while (!file.fail() && name.size() && name[0] == '#') { 50 | std::getline(file, comment); 51 | file >> name; 52 | } 53 | file >> value; 54 | while (!file.fail() && value.size() && value[0] == '#') { 55 | std::getline(file, comment); 56 | file >> value; 57 | } 58 | if (file.fail() || name == "" || value == "") 59 | break; 60 | settings[name] = value; 61 | } 62 | } 63 | 64 | string Config::GetString(const string &name) const { 65 | std::map::const_iterator it = settings.find(name); 66 | if (it == settings.end()) { 67 | llvm::report_fatal_error( 68 | llvm::StringRef("No value found for setting " + name), false); 69 | return ""; 70 | } else { 71 | return it->second; 72 | } 73 | } 74 | 75 | string Config::GetString(const string &name, const string &default_) const { 76 | std::map::const_iterator it = settings.find(name); 77 | if (it == settings.end()) { 78 | return default_; 79 | } else { 80 | return it->second; 81 | } 82 | } 83 | 84 | int64_t Config::GetInt(const string &name) const { 85 | return Convert(GetString(name)); 86 | } 87 | 88 | int64_t Config::GetInt(const string &name, int64_t default_) const { 89 | if (settings.find(name) == settings.end()) { 90 | return default_; 91 | } else { 92 | return GetInt(name); 93 | } 94 | } 95 | 96 | float Config::GetFloat(const string &name) const { 97 | return Convert(GetString(name)); 98 | } 99 | 100 | float Config::GetFloat(const string &name, float default_) const { 101 | if (settings.find(name) == settings.end()) { 102 | return default_; 103 | } else { 104 | return GetFloat(name); 105 | } 106 | } 107 | 108 | bool Config::GetBool(const string &name) const { 109 | string value = GetString(name); 110 | if (value == "YES" || value == "yes" || value == "1" || value == "TRUE" || 111 | value == "true") { 112 | return true; 113 | } else { 114 | assert(value == "NO" || value == "no" || value == "0" || value == "FALSE" || 115 | value == "false"); 116 | return false; 117 | } 118 | } 119 | 120 | bool Config::GetBool(const string &name, bool default_) const { 121 | if (settings.find(name) == settings.end()) { 122 | return default_; 123 | } else { 124 | return GetBool(name); 125 | } 126 | } 127 | 128 | list Config::GetStringList(const string &name) const { 129 | list values; 130 | string line = GetString(name, ""); 131 | if (line == "") 132 | return values; 133 | 134 | istringstream ss(line); 135 | string item; 136 | 137 | while (std::getline(ss, item, ',')) { 138 | values.push_back(item); 139 | } 140 | 141 | return values; 142 | } 143 | 144 | list Config::GetIntList(const string &name) const { 145 | return Split(GetString(name, "")); 146 | } 147 | 148 | list Config::GetFloatList(const string &name) const { 149 | return Split(GetString(name, "")); 150 | } 151 | 152 | SchedulerOptions &SchedulerOptions::getInstance() { 153 | static SchedulerOptions instance; // The instance will always be destroyed. 154 | return instance; 155 | } 156 | -------------------------------------------------------------------------------- /lib/Scheduler/graph_trans_ilp_occupancy_preserving.cpp: -------------------------------------------------------------------------------- 1 | #include "opt-sched/Scheduler/graph_trans_ilp_occupancy_preserving.h" 2 | 3 | #include "opt-sched/Scheduler/graph_trans_ilp.h" 4 | #include "opt-sched/Scheduler/logger.h" 5 | #include "llvm/ADT/ArrayRef.h" 6 | #include "llvm/ADT/STLExtras.h" 7 | #include "llvm/ADT/SmallVector.h" 8 | #include 9 | #include 10 | 11 | using namespace llvm::opt_sched; 12 | 13 | // #define IS_DEBUG_OCCUPANCY_PRESERVING_ILP_GRAPH_TRANS 14 | 15 | #ifdef IS_DEBUG_OCCUPANCY_PRESERVING_ILP_GRAPH_TRANS 16 | #define DEBUG_LOG(...) Logger::Info(__VA_ARGS__) 17 | #else 18 | #define DEBUG_LOG(...) static_cast(0) 19 | #endif 20 | 21 | using ILP = StaticNodeSupILPTrans; 22 | using RP = StaticNodeSupTrans; 23 | 24 | StaticNodeSupOccupancyPreservingILPTrans:: 25 | StaticNodeSupOccupancyPreservingILPTrans(DataDepGraph *DDG) 26 | : GraphTrans(DDG) {} 27 | 28 | FUNC_RESULT StaticNodeSupOccupancyPreservingILPTrans::ApplyTrans() { 29 | Logger::Event("GraphTransOccupancyPreservingILPNodeSuperiority"); 30 | 31 | DataDepGraph &DDG = *GetDataDepGraph_(); 32 | assert(GetNumNodesInGraph_() == DDG.GetNodeCnt()); 33 | 34 | auto Data_ = ILP::createData(DDG); 35 | ILP::Data &Data = Data_.getData(); 36 | 37 | int NumPassedILP = 0; 38 | int NumFailedRP = 0; 39 | 40 | DEBUG_LOG("Starting main algorithm"); 41 | while (!Data.SuperiorNodesList.empty()) { 42 | auto ij = Data.SuperiorNodesList.pop_back_val(); 43 | const int i = ij.first; 44 | const int j = ij.second; 45 | DEBUG_LOG("Considering adding a superior edge (%d, %d)", i, j); 46 | 47 | if (!areNodesIndependent(DDG.GetInstByIndx(i), DDG.GetInstByIndx(j))) { 48 | DEBUG_LOG("Skipping (%d, %d) because nodes are no longer independent\n", 49 | i, j); 50 | continue; 51 | } 52 | ++NumPassedILP; 53 | if (!RP::isNodeSuperior(DDG, i, j)) { 54 | DEBUG_LOG("(%d, %d) failed the occupancy-preserving conditions\n", i, j); 55 | ++NumFailedRP; 56 | continue; 57 | } 58 | 59 | ILP::addZeroLatencyEdge(Data, i, j); 60 | ILP::addNecessaryResourceEdges(Data, i, j); 61 | 62 | ILP::updateDistanceTable(Data, i, j); 63 | // ILP redundant edges are also redundant from RP point of view. 64 | // This is because ILP redundant edges are transitive edges with more 65 | // conditions met, and the RP point of view considers transitive edges to be 66 | // redundant. 67 | ILP::removeRedundantEdges(Data, i, j); 68 | 69 | DEBUG_LOG("Finished iteration for (%d, %d)\n", i, j); 70 | } 71 | 72 | Logger::Event("GraphTransOccupancyPreservingILPNodeSuperiorityFinished", 73 | "superior_edges", Data.Stats.NumEdgesAdded, // 74 | "removed_edges", Data.Stats.NumEdgesRemoved, // 75 | "resource_edges", Data.Stats.NumResourceEdgesAdded, // 76 | "passed_ilp", NumPassedILP, // 77 | "failed_rp", NumFailedRP); 78 | 79 | return RES_SUCCESS; 80 | } 81 | -------------------------------------------------------------------------------- /lib/Scheduler/logger.cpp: -------------------------------------------------------------------------------- 1 | #include "opt-sched/Scheduler/logger.h" 2 | #include 3 | // For va_list, va_start(), va_end(). 4 | #include 5 | // For sprintf(), vsprintf(). 6 | #include 7 | // For exit(). 8 | #include 9 | // For GetProcessorTime(). 10 | #include "opt-sched/Scheduler/utilities.h" 11 | 12 | using namespace llvm::opt_sched; 13 | 14 | // An ugly macro to simplify repeated vararg-insertion. 15 | #define VPRINT(buf, frmt) \ 16 | va_list args; \ 17 | va_start(args, frmt); \ 18 | vsprintf(buf, frmt, args); \ 19 | va_end(args); 20 | 21 | // The maximum buffer size for error messages. 22 | static const int MAX_MSGSIZE = 8000; 23 | 24 | // The current output stream. 25 | static std::ostream *logStream = &std::cerr; 26 | 27 | // The periodic logging callback. 28 | static void (*periodLogCallback)() = NULL; 29 | // The minimum length of (CPU) time between two calls to the periodic logging 30 | // callback. 31 | static Milliseconds periodLogPeriod = 0; 32 | // The CPU time when the period log was last called. 33 | static Milliseconds periodLogLastTime = 0; 34 | 35 | // The main output function. Calculates the time since process start and formats 36 | // the specified message with a title and timestamp. Exits the program with exit 37 | // code = 1 on fatal errors. 38 | static void Output(Logger::LOG_LEVEL level, bool timed, const char *message) { 39 | const char *title = 0; 40 | 41 | switch (level) { 42 | case Logger::FATAL: 43 | title = "FATAL"; 44 | break; 45 | case Logger::ERROR: 46 | title = "ERROR"; 47 | break; 48 | case Logger::INFO: 49 | title = "INFO"; 50 | break; 51 | case Logger::SUMMARY: 52 | title = "SUMMARY"; 53 | break; 54 | } 55 | 56 | (*logStream) << title << ": " << message; 57 | if (timed) { 58 | (*logStream) << " (Time = " << Utilities::GetProcessorTime() << " ms)"; 59 | } 60 | (*logStream) << std::endl; 61 | 62 | if (level == Logger::FATAL) 63 | exit(1); 64 | } 65 | 66 | void Logger::SetLogStream(std::ostream &out) { logStream = &out; } 67 | 68 | std::ostream &Logger::GetLogStream() { return *logStream; } 69 | 70 | void Logger::RegisterPeriodicLogger(Milliseconds period, void (*callback)()) { 71 | periodLogLastTime = Utilities::GetProcessorTime(); 72 | periodLogCallback = callback; 73 | periodLogPeriod = period; 74 | } 75 | 76 | void Logger::PeriodicLog() { 77 | if (!periodLogCallback) { 78 | Error("Periodic log called while no callback was registered."); 79 | return; 80 | } 81 | 82 | Milliseconds now = Utilities::GetProcessorTime(); 83 | ; 84 | if (now - periodLogLastTime >= periodLogPeriod) { 85 | periodLogCallback(); 86 | periodLogLastTime = now; 87 | } 88 | } 89 | 90 | void Logger::Log(Logger::LOG_LEVEL level, bool timed, const char *format_string, 91 | ...) { 92 | char message_buffer[MAX_MSGSIZE]; 93 | VPRINT(message_buffer, format_string); 94 | Output(level, timed, message_buffer); 95 | } 96 | 97 | void Logger::Fatal(const char *format_string, ...) { 98 | char message_buffer[MAX_MSGSIZE]; 99 | VPRINT(message_buffer, format_string); 100 | Output(Logger::FATAL, true, message_buffer); 101 | exit(1); 102 | } 103 | 104 | void Logger::Error(const char *format_string, ...) { 105 | char message_buffer[MAX_MSGSIZE]; 106 | VPRINT(message_buffer, format_string); 107 | Output(Logger::ERROR, true, message_buffer); 108 | } 109 | 110 | void Logger::Info(const char *format_string, ...) { 111 | char message_buffer[MAX_MSGSIZE]; 112 | VPRINT(message_buffer, format_string); 113 | Output(Logger::INFO, true, message_buffer); 114 | } 115 | 116 | void Logger::Summary(const char *format_string, ...) { 117 | char message_buffer[MAX_MSGSIZE]; 118 | VPRINT(message_buffer, format_string); 119 | Output(Logger::SUMMARY, false, message_buffer); 120 | } 121 | 122 | using Logger::detail::EventAttrType; 123 | using Logger::detail::EventAttrValue; 124 | 125 | void Logger::detail::Event( 126 | const std::pair *attrs, size_t numAttrs) { 127 | std::ostream &out = *logStream; 128 | 129 | // We alternate using ": " and ", " as the separators. 130 | // However, we just print the separator before every attribute, meaning that 131 | // we need to special case the first element, hence the third empty string. 132 | const char *separators[] = {": ", ", ", ""}; 133 | int sepIndex = 2; 134 | 135 | out << "EVENT: {"; 136 | 137 | for (size_t index = 0; index < numAttrs; ++index, 138 | // Alternate the separator we are using. Note: !2 == 0 139 | sepIndex = !sepIndex) { 140 | const auto type = attrs[index].first; 141 | const auto val = attrs[index].second; 142 | 143 | out << separators[sepIndex]; 144 | 145 | switch (type) { 146 | case EventAttrType::Bool: 147 | out << (val.b ? "true" : "false"); 148 | break; 149 | case EventAttrType::Int64: 150 | out << val.i64; 151 | break; 152 | case EventAttrType::UInt64: 153 | out << val.u64; 154 | break; 155 | case EventAttrType::CStr: 156 | // TODO(justin): when we have C++14, use std::quoted(val.cstr), which will 157 | // escape `"`s inside the string. 158 | out << '"' << val.cstr << '"'; 159 | break; 160 | default: 161 | Logger::Fatal("Unknown event type %d. Internal error", (int)type); 162 | } 163 | } 164 | 165 | out << separators[sepIndex] << "\"time\": " << Utilities::GetProcessorTime() 166 | << "}\n" 167 | << std::flush; 168 | } 169 | -------------------------------------------------------------------------------- /lib/Scheduler/random.cpp: -------------------------------------------------------------------------------- 1 | #include "opt-sched/Scheduler/random.h" 2 | // For memcpy(). 3 | #include 4 | #include 5 | 6 | using namespace llvm::opt_sched; 7 | 8 | // Magic numbers used in the generator formula. 9 | static const uint32_t A = 0x2faf071d; // 8 * (10 ** 8 - 29) + 5 10 | static const uint32_t C = 0x3b9ac9c1; // 10 ** 9 - 63 11 | 12 | // Magic lookup table. 13 | static uint32_t Z[] = { 14 | 0x8ca0df45, 0x37334f23, 0x4a5901d2, 0xaeede075, 0xd84bd3cf, 0xa1ce3350, 15 | 0x35074a8f, 0xfd4e6da0, 0xe2c22e6f, 0x045de97e, 0x0e6d45b9, 0x201624a2, 16 | 0x01e10dca, 0x2810aef2, 0xea0be721, 0x3a3781e4, 0xa3602009, 0xd2ffcf69, 17 | 0xff7102e9, 0x36fab972, 0x5c3650ff, 0x8cd44c9c, 0x25a4a676, 0xbd6385ce, 18 | 0xcd55c306, 0xec8a31f5, 0xa87b24ce, 0x1e025786, 0x53d713c9, 0xb29d308f, 19 | 0x0dc6cf3f, 0xf11139c9, 0x3afb3780, 0x0ed6b24c, 0xef04c8fe, 0xab53d825, 20 | 0x3ca69893, 0x35460fb1, 0x058ead73, 0x0b567c59, 0xfdddca3f, 0x6317e77d, 21 | 0xaa5febe5, 0x655f73e2, 0xd42455bb, 0xe845a8bb, 0x351e4a67, 0xa36a9dfb, 22 | 0x3e0ac91d, 0xbaa0de01, 0xec60dc66, 0xdb29309e, 0xcfa52971, 0x1f3eddaf, 23 | 0xe14aae61, 24 | }; 25 | 26 | // The current generator state. Magical starting values. 27 | static long j = 23; 28 | static long k = 54; 29 | static uint32_t y[] = { 30 | 0x8ca0df45, 0x37334f23, 0x4a5901d2, 0xaeede075, 0xd84bd3cf, 0xa1ce3350, 31 | 0x35074a8f, 0xfd4e6da0, 0xe2c22e6f, 0x045de97e, 0x0e6d45b9, 0x201624a2, 32 | 0x01e10dca, 0x2810aef2, 0xea0be721, 0x3a3781e4, 0xa3602009, 0xd2ffcf69, 33 | 0xff7102e9, 0x36fab972, 0x5c3650ff, 0x8cd44c9c, 0x25a4a676, 0xbd6385ce, 34 | 0xcd55c306, 0xec8a31f5, 0xa87b24ce, 0x1e025786, 0x53d713c9, 0xb29d308f, 35 | 0x0dc6cf3f, 0xf11139c9, 0x3afb3780, 0x0ed6b24c, 0xef04c8fe, 0xab53d825, 36 | 0x3ca69893, 0x35460fb1, 0x058ead73, 0x0b567c59, 0xfdddca3f, 0x6317e77d, 37 | 0xaa5febe5, 0x655f73e2, 0xd42455bb, 0xe845a8bb, 0x351e4a67, 0xa36a9dfb, 38 | 0x3e0ac91d, 0xbaa0de01, 0xec60dc66, 0xdb29309e, 0xcfa52971, 0x1f3eddaf, 39 | 0xe14aae61, 40 | }; 41 | 42 | // The last random number. 43 | static uint32_t randNum; 44 | 45 | void GenerateNextNumber() { 46 | randNum = y[j] + y[k]; 47 | y[k] = randNum; 48 | if (--j < 0) 49 | j = 54; 50 | if (--k < 0) 51 | k = 54; 52 | randNum &= 0x7fffffff; 53 | } 54 | 55 | void RandomGen::SetSeed(int32_t iseed) { 56 | j = 23; 57 | k = 54; 58 | 59 | if (iseed == 0) { 60 | for (int32_t i = 0; i < 55; i++) { 61 | y[i] = Z[i]; 62 | } 63 | } else { 64 | y[0] = (A * iseed + C) >> 1; 65 | for (int32_t i = 1; i < 55; i++) { 66 | y[i] = (A * y[i - 1] + C) >> 1; 67 | } 68 | } 69 | } 70 | 71 | uint32_t RandomGen::GetRand32WithinRange(uint32_t min, uint32_t max) { 72 | GenerateNextNumber(); 73 | return randNum % (max - min + 1) + min; 74 | } 75 | 76 | uint32_t RandomGen::GetRand32() { 77 | GenerateNextNumber(); 78 | return randNum; 79 | } 80 | 81 | uint64_t RandomGen::GetRand64() { 82 | uint64_t rand64; 83 | 84 | GenerateNextNumber(); 85 | rand64 = randNum; 86 | rand64 <<= 32; 87 | 88 | GenerateNextNumber(); 89 | rand64 += randNum; 90 | 91 | return rand64; 92 | } 93 | 94 | void RandomGen::GetRandBits(uint16_t bitCnt, unsigned char *dest) { 95 | uint16_t bytesNeeded = (bitCnt + 7) / 8; 96 | uint16_t index = 0; 97 | 98 | while (bytesNeeded > 0) { 99 | GenerateNextNumber(); 100 | uint16_t bytesConsumed = std::min(bytesNeeded, (uint16_t)4); 101 | memcpy(dest + index, &randNum, bytesConsumed); 102 | index += bytesConsumed; 103 | bytesNeeded -= bytesConsumed; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /lib/Scheduler/utilities.cpp: -------------------------------------------------------------------------------- 1 | #include "opt-sched/Scheduler/utilities.h" 2 | #include 3 | 4 | using namespace llvm::opt_sched; 5 | 6 | std::chrono::steady_clock::time_point Utilities::startTime = 7 | std::chrono::steady_clock::now(); 8 | -------------------------------------------------------------------------------- /lib/Wrapper/AMDGPU/GCNOptSched.cpp: -------------------------------------------------------------------------------- 1 | //===- GCNOptSched.cpp - AMDGCN Combinatorial scheudler -------------------===// 2 | // 3 | // Implements a combinatorial scheduling strategy for AMDGCN. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | 7 | #include "GCNOptSched.h" 8 | #include "AMDGPUExportClustering.h" 9 | #include "AMDGPUMacroFusion.h" 10 | #include "GCNSchedStrategy.h" 11 | #include "SIMachineFunctionInfo.h" 12 | #include "llvm/Support/Debug.h" 13 | 14 | #define DEBUG_TYPE "optsched" 15 | 16 | using namespace llvm::opt_sched; 17 | 18 | // FIXME: Temporary, eliminate 19 | static cl::opt 20 | GCNLimitOccWithHints("gcn-limit-occ-with-hints", 21 | cl::desc("Limit occpancy target using perf hints."), 22 | cl::init(false), cl::Hidden); 23 | 24 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 25 | static void getRealRegionPressure(MachineBasicBlock::const_iterator Begin, 26 | MachineBasicBlock::const_iterator End, 27 | const LiveIntervals *LIS, StringRef Label) { 28 | GCNDownwardRPTracker RP(*LIS); 29 | RP.advance(Begin, End, nullptr); 30 | dbgs() << "Dumping real RP " << Label << "\n"; 31 | RP.moveMaxPressure().dump(); 32 | } 33 | #endif 34 | 35 | ScheduleDAGOptSchedGCN::ScheduleDAGOptSchedGCN( 36 | llvm::MachineSchedContext *C, std::unique_ptr S) 37 | : ScheduleDAGOptSched(C, std::move(S)) {} 38 | 39 | void ScheduleDAGOptSchedGCN::initSchedulers() { 40 | // SchedPasses.push_back(GCNMaxOcc); 41 | // Add passes in the corresponding order that they are inserted. 42 | for (const auto &Pass : PassOrder) { 43 | if (Pass == "OCC") // MinRP pass 44 | SchedPasses.push_back(OptSchedMaxOcc); 45 | else if (Pass == "ILP") // Regular ILP Pass 46 | SchedPasses.push_back(OptSchedBalanced); 47 | else if (Pass == "ILP_RL") // ILP Reduced Latency Pass 48 | SchedPasses.push_back(OptSchedReducedLatency); 49 | else 50 | llvm::report_fatal_error("Invalid value for pass order: " + Pass, false); 51 | } 52 | 53 | // Also run the sequential scheduler with regular latencies to get the 54 | // actual schedule length 55 | if (CompileTimeDataPass) 56 | SchedPasses.push_back(OptSchedSeqScheduler); 57 | } 58 | 59 | // Execute scheduling passes. 60 | // Partially copied GCNScheduleDAGMILive::finalizeSchedule 61 | void ScheduleDAGOptSchedGCN::finalizeSchedule() { 62 | if (TwoPassEnabled && OptSchedEnabled) { 63 | initSchedulers(); 64 | RescheduleRegions.resize(Regions.size()); 65 | RescheduleRegions.set(); 66 | 67 | LLVM_DEBUG(dbgs() << "Starting two pass scheduling approach\n"); 68 | TwoPassSchedulingStarted = true; 69 | for (const SchedPassStrategy &S : SchedPasses) { 70 | MachineBasicBlock *MBB = nullptr; 71 | // Reset 72 | RegionNumber = ~0u; 73 | 74 | for (auto &Region : Regions) { 75 | RegionBegin = Region.first; 76 | RegionEnd = Region.second; 77 | 78 | if (RegionBegin->getParent() != MBB) { 79 | if (MBB) 80 | finishBlock(); 81 | MBB = RegionBegin->getParent(); 82 | startBlock(MBB); 83 | } 84 | unsigned NumRegionInstrs = std::distance(begin(), end()); 85 | enterRegion(MBB, begin(), end(), NumRegionInstrs); 86 | 87 | // Skip empty scheduling regions (0 or 1 schedulable instructions). 88 | if (begin() == end() || begin() == std::prev(end())) { 89 | exitRegion(); 90 | continue; 91 | } 92 | LLVM_DEBUG( 93 | getRealRegionPressure(RegionBegin, RegionEnd, LIS, "Before")); 94 | runSchedPass(S); 95 | LLVM_DEBUG(getRealRegionPressure(RegionBegin, RegionEnd, LIS, "After")); 96 | Region = std::make_pair(RegionBegin, RegionEnd); 97 | exitRegion(); 98 | } 99 | finishBlock(); 100 | } 101 | } 102 | 103 | ScheduleDAGMILive::finalizeSchedule(); 104 | 105 | LLVM_DEBUG(if (isSimRegAllocEnabled()) { 106 | dbgs() << "*************************************\n"; 107 | dbgs() << "Function: " << MF.getName() 108 | << "\nTotal Simulated Spills: " << SimulatedSpills << "\n"; 109 | dbgs() << "*************************************\n"; 110 | }); 111 | } 112 | 113 | void ScheduleDAGOptSchedGCN::runSchedPass(SchedPassStrategy S) { 114 | switch (S) { 115 | case GCNMaxOcc: 116 | scheduleGCNMaxOcc(); 117 | break; 118 | case OptSchedMaxOcc: 119 | scheduleOptSchedMaxOcc(); 120 | Logger::Event("PassFinished", "num", 1); 121 | break; 122 | case OptSchedBalanced: 123 | RecordTimedOutRegions = true; 124 | scheduleOptSchedBalanced(); 125 | RecordTimedOutRegions = false; 126 | Logger::Event("PassFinished", "num", 2); 127 | break; 128 | case OptSchedReducedLatency: 129 | scheduleWithReducedLatencies(); 130 | Logger::Event("PassFinished", "num", 3); 131 | break; 132 | case OptSchedSeqScheduler: 133 | scheduleWithSeqScheduler(); 134 | Logger::Event("PassFinished", "num", 4); 135 | break; 136 | } 137 | } 138 | 139 | void ScheduleDAGOptSchedGCN::scheduleGCNMaxOcc() { 140 | auto &S = (GCNMaxOccupancySchedStrategy &)*SchedImpl; 141 | if (GCNLimitOccWithHints) { 142 | const auto &MFI = *MF.getInfo(); 143 | S.setTargetOccupancy(MFI.getMinAllowedOccupancy()); 144 | } 145 | 146 | ScheduleDAGMILive::schedule(); 147 | } 148 | 149 | void ScheduleDAGOptSchedGCN::scheduleOptSchedMaxOcc() { 150 | ScheduleDAGOptSched::scheduleOptSchedMinRP(); 151 | } 152 | 153 | void ScheduleDAGOptSchedGCN::scheduleOptSchedBalanced() { 154 | ScheduleDAGOptSched::scheduleOptSchedBalanced(); 155 | } 156 | -------------------------------------------------------------------------------- /lib/Wrapper/AMDGPU/GCNOptSched.h: -------------------------------------------------------------------------------- 1 | //===- GCNOptSched.h - AMDGCN Combinatorial scheudler -----------*- C++ -*-===// 2 | // 3 | // OptSched combinatorial scheduler driver targeting AMDGCN. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | 7 | #ifndef LLVM_GCN_OPT_SCHED_H 8 | #define LLVM_GCN_OPT_SCHED_H 9 | 10 | #include "GCNRegPressure.h" 11 | #include "Wrapper/OptimizingScheduler.h" 12 | 13 | namespace llvm { 14 | namespace opt_sched { 15 | 16 | class ScheduleDAGOptSchedGCN : public ScheduleDAGOptSched { 17 | private: 18 | enum SchedPassStrategy { 19 | GCNMaxOcc, 20 | OptSchedMaxOcc, 21 | OptSchedBalanced, 22 | OptSchedReducedLatency, 23 | OptSchedSeqScheduler 24 | }; 25 | 26 | // Vector of scheduling passes to execute. 27 | SmallVector SchedPasses; 28 | 29 | public: 30 | ScheduleDAGOptSchedGCN(llvm::MachineSchedContext *C, 31 | std::unique_ptr S); 32 | 33 | // After the scheduler is initialized and the scheduling regions have been 34 | // recorded, execute the actual scheduling passes here. 35 | void finalizeSchedule() override; 36 | 37 | // Setup and select schedulers. 38 | void initSchedulers() override; 39 | 40 | // TODO: After we refactor OptSched scheduler options put each scheduling 41 | // pass into its own class. 42 | 43 | // Execute a scheduling pass on the function. 44 | void runSchedPass(SchedPassStrategy S); 45 | 46 | // Run GCN max occupancy scheduler. 47 | void scheduleGCNMaxOcc(); 48 | 49 | // Run OptSched in RP only (max occupancy) configuration. 50 | void scheduleOptSchedMaxOcc(); 51 | 52 | // Run OptSched in ILP/RP balanced mode. 53 | void scheduleOptSchedBalanced() override; 54 | }; 55 | 56 | } // namespace opt_sched 57 | } // namespace llvm 58 | 59 | #endif // LLVM_GCN_OPT_SCHED_H 60 | -------------------------------------------------------------------------------- /lib/Wrapper/AMDGPU/GCNOptSchedReg.h: -------------------------------------------------------------------------------- 1 | #ifndef OPT_SCHED_REG 2 | #define OPT_SCHED_REG 3 | 4 | #include "Wrapper/AMDGPU/GCNOptSched.h" 5 | #include "Wrapper/AMDGPU/OptSchedGCNTarget.cpp" 6 | #include "llvm/CodeGen/MachineScheduler.h" 7 | #include "llvm/Support/raw_ostream.h" 8 | 9 | using namespace llvm; 10 | 11 | namespace llvm { 12 | namespace opt_sched { 13 | 14 | // Create OptSched ScheduleDAG. 15 | static ScheduleDAGInstrs *createOptSchedGCN(MachineSchedContext *C) { 16 | ScheduleDAGMILive *DAG = new ScheduleDAGOptSchedGCN( 17 | C, std::make_unique(C)); 18 | DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 19 | DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 20 | return DAG; 21 | } 22 | 23 | static MachineSchedRegistry 24 | OptSchedGCNMIRegistry("gcn-optsched", "Use the GCN OptSched scheduler.", 25 | createOptSchedGCN); 26 | 27 | } // namespace opt_sched 28 | } // namespace llvm 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /lib/Wrapper/AMDGPU/OptSchedDDGWrapperGCN.h: -------------------------------------------------------------------------------- 1 | //===-- OptSchedDDGWrapperGCN.h - GCN DDG Wrapper ---------------*- C++ -*-===// 2 | // 3 | // Conversion from LLVM ScheduleDAG to OptSched DDG for amdgcn target. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | 7 | #ifndef LLVM_OPT_SCHED_DDG_WRAPPER_GCN_H 8 | #define LLVM_OPT_SCHED_DDG_WRAPPER_GCN_H 9 | 10 | #include "GCNRegPressure.h" 11 | #include "Wrapper/OptSchedDDGWrapperBasic.h" 12 | #include "Wrapper/OptimizingScheduler.h" 13 | #include "opt-sched/Scheduler/sched_basic_data.h" 14 | #include "llvm/ADT/DenseMap.h" 15 | #include "llvm/CodeGen/LiveIntervals.h" 16 | 17 | namespace llvm { 18 | namespace opt_sched { 19 | 20 | class SubRegSet { 21 | private: 22 | // Index subreg lanes to OptSched register numbers. Even though we can't map 23 | // a LaneBitmask index to a specific sub-register, we can still accurately 24 | // model the correct number of live subregs using lane mask interference. 25 | SmallVector OptSchedRegMap; 26 | 27 | public: 28 | using iterator = SmallVector::iterator; 29 | // The max number of subregs for this virtual register. 30 | unsigned Size; 31 | // OptSched register type 32 | int16_t Type; 33 | 34 | iterator begin() { return OptSchedRegMap.begin(); } 35 | iterator end() { return OptSchedRegMap.end(); } 36 | 37 | SubRegSet(unsigned Size_, int16_t Type_) : Size(Size_), Type(Type_) { 38 | OptSchedRegMap.resize(Size); 39 | } 40 | ~SubRegSet() = default; 41 | }; 42 | 43 | class OptSchedDDGWrapperGCN : public OptSchedDDGWrapperBasic { 44 | private: 45 | // Map sub-registers in LLVM to a list of live subreg lanes for that register. 46 | // Each live lane represents either a VGPR32 or SGPR32. In our model each live 47 | // subreg lane is identified by a separate OptSched register. 48 | using RegsMap = DenseMap>; 49 | RegsMap RegionRegs; 50 | const std::vector &SUnits; 51 | const llvm::LiveIntervals *LIS; 52 | const llvm::MachineRegisterInfo &MRI; 53 | 54 | unsigned getRegKind(unsigned Reg) const; 55 | 56 | void addLiveSubRegsAtInstr(const MachineInstr *MI, bool After); 57 | 58 | void addSubRegDefs(SchedInstruction *Instr, unsigned Reg, 59 | const LaneBitmask &LiveMask, bool LiveIn = false); 60 | 61 | void addSubRegUses(SchedInstruction *Instr, unsigned Reg, 62 | const LaneBitmask &LiveMask, bool LiveOut = false); 63 | 64 | public: 65 | // FIXME: Track VGPR/SGPR tuples or refactor Scheduler to use LLVM/GCN RP 66 | // tracker. 67 | enum SubRegKind { SGPR32, VGPR32, TOTAL_KINDS }; 68 | 69 | OptSchedDDGWrapperGCN(llvm::MachineSchedContext *Context, 70 | ScheduleDAGOptSched *DAG, OptSchedMachineModel *MM, 71 | LATENCY_PRECISION LatencyPrecision, 72 | const std::string &RegionID); 73 | 74 | void convertRegFiles() override; 75 | }; 76 | 77 | } // end namespace opt_sched 78 | } // end namespace llvm 79 | 80 | #endif // LLVM_OPT_SCHED_DDG_WRAPPER_GCN_H 81 | -------------------------------------------------------------------------------- /lib/Wrapper/OptSchedGenericTarget.cpp: -------------------------------------------------------------------------------- 1 | //===- OptSchedGenericTarget.cpp - Generic Target -------------------------===// 2 | // 3 | // Implements a generic target stub. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | #include "OptSchedDDGWrapperBasic.h" 7 | #include "OptSchedMachineWrapper.h" 8 | #include "opt-sched/Scheduler/OptSchedTarget.h" 9 | #include "opt-sched/Scheduler/config.h" 10 | #include "opt-sched/Scheduler/defines.h" 11 | #include "opt-sched/Scheduler/machine_model.h" 12 | #include "llvm/ADT/STLExtras.h" 13 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" 14 | #include 15 | 16 | using namespace llvm; 17 | using namespace llvm::opt_sched; 18 | 19 | OptSchedRegistry 20 | OptSchedTargetRegistry::Registry; 21 | 22 | namespace { 23 | 24 | class OptSchedGenericTarget : public OptSchedTarget { 25 | public: 26 | std::unique_ptr 27 | createMachineModel(const char *ConfigPath) override { 28 | return std::make_unique(ConfigPath); 29 | } 30 | 31 | std::unique_ptr 32 | createDDGWrapper(llvm::MachineSchedContext *Context, ScheduleDAGOptSched *DAG, 33 | OptSchedMachineModel *MM, LATENCY_PRECISION LatencyPrecision, 34 | const std::string &RegionID) override { 35 | return std::make_unique( 36 | Context, DAG, MM, LatencyPrecision, RegionID); 37 | } 38 | 39 | void initRegion(llvm::ScheduleDAGInstrs *DAG, MachineModel *MM_, 40 | Config &OccFile) override { 41 | MM = MM_; 42 | } 43 | void finalizeRegion(const InstSchedule *Schedule) override {} 44 | // For generic target find total PRP. 45 | InstCount getCost(const llvm::SmallVectorImpl &PRP) const override; 46 | 47 | void SetOccupancyLimit(int OccupancyLimitParam) override { /*nothing*/ 48 | ; 49 | } 50 | void SetShouldLimitOcc(bool ShouldLimitOccParam) override { /*nothing*/ 51 | ; 52 | } 53 | void SetOccLimitSource(OCC_LIMIT_TYPE LimitTypeParam) override { /*nothing*/ 54 | ; 55 | } 56 | }; 57 | 58 | } // end anonymous namespace 59 | 60 | InstCount OptSchedGenericTarget::getCost( 61 | const llvm::SmallVectorImpl &PRP) const { 62 | Logger::Info("in generic get cost"); 63 | InstCount TotalPRP = 0; 64 | for (int16_t T = 0; T < MM->GetRegTypeCnt(); ++T) 65 | TotalPRP += PRP[T]; 66 | return TotalPRP; 67 | } 68 | 69 | namespace llvm { 70 | namespace opt_sched { 71 | 72 | std::unique_ptr createOptSchedGenericTarget() { 73 | return std::make_unique(); 74 | } 75 | 76 | OptSchedTargetRegistry 77 | OptSchedGenericTargetRegistry("generic", createOptSchedGenericTarget); 78 | 79 | } // namespace opt_sched 80 | } // namespace llvm 81 | -------------------------------------------------------------------------------- /lib/Wrapper/OptSchedMachineWrapper.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Description: A wrapper that converts an LLVM target to an OptSched 3 | MachineModel. By default machine models are read from ini files however 4 | MachineModelGenerator classes may supplement or override the information 5 | contained in those ini files. 6 | *******************************************************************************/ 7 | 8 | #ifndef OPTSCHED_MACHINE_MODEL_WRAPPER_H 9 | #define OPTSCHED_MACHINE_MODEL_WRAPPER_H 10 | 11 | #include "opt-sched/Scheduler/machine_model.h" 12 | #include "llvm/CodeGen/MachineInstr.h" 13 | #include "llvm/CodeGen/MachineScheduler.h" 14 | #include "llvm/CodeGen/RegisterClassInfo.h" 15 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" 16 | #include "llvm/CodeGen/TargetRegisterInfo.h" 17 | #include "llvm/MC/MCInstrItineraries.h" 18 | #include 19 | 20 | using namespace llvm; 21 | 22 | namespace llvm { 23 | namespace opt_sched { 24 | 25 | class MachineModelGenerator; 26 | 27 | // A wrapper for the OptSched MachineModel 28 | class OptSchedMachineModel : public MachineModel { 29 | public: 30 | // Use a config file to initialize the machine model. 31 | OptSchedMachineModel(const char *configFile); 32 | // Convert information about the target machine into the 33 | // optimal scheduler machine model 34 | void convertMachineModel(const llvm::ScheduleDAGInstrs &dag, 35 | const llvm::RegisterClassInfo *regClassInfo); 36 | MachineModelGenerator *getMMGen() { return MMGen.get(); } 37 | ~OptSchedMachineModel() = default; 38 | 39 | private: 40 | // Should a machine model be generated. 41 | bool shouldGenerateMM; 42 | // The machine model generator class. 43 | std::unique_ptr MMGen; 44 | }; 45 | 46 | // Generate a machine model for a specific chip. 47 | class MachineModelGenerator { 48 | public: 49 | // Generate instruction scheduling type for all instructions in the current 50 | // DAG that do not already have assigned instruction types. 51 | virtual InstType generateInstrType(const llvm::MachineInstr *instr) = 0; 52 | virtual bool generatesAllData() = 0; 53 | virtual void generateProcessorData(std::string *mdlName_, int *issueRate_) {} 54 | virtual ~MachineModelGenerator() = default; 55 | }; 56 | 57 | // Generate a machine model for the Cortex A7. This will only generate 58 | // instruction types. Things like issue type and issue rate must be specified 59 | // correctly in the machine_model.cfg file. Check 60 | // OptSchedCfg/arch/ARM_cortex_a7_machine_model.cfg for a template. 61 | class CortexA7MMGenerator : public MachineModelGenerator { 62 | public: 63 | CortexA7MMGenerator(const llvm::ScheduleDAGInstrs *dag, MachineModel *mm); 64 | // Generate instruction scheduling type for all instructions in the current 65 | // DAG by using LLVM itineraries. 66 | InstType generateInstrType(const llvm::MachineInstr *instr); 67 | bool generatesAllData() { return false; } 68 | virtual ~CortexA7MMGenerator() = default; 69 | 70 | private: 71 | // Functional Units 72 | enum FU : unsigned { 73 | Pipe0 = 1, // 00000001 74 | Pipe1 = 2, // 00000010 75 | LSPipe = 4, // 00000100 76 | NPipe = 8, // 00001000 77 | NLSPipe = 16 // 00010000 78 | }; 79 | const llvm::ScheduleDAGInstrs *DAG; 80 | MachineModel *MM; 81 | const llvm::InstrItineraryData *IID; 82 | 83 | // Returns true if a machine instruction should be considered fully pipelined 84 | // in the machine model. 85 | bool isMIPipelined(const llvm::MachineInstr *inst, unsigned idx) const; 86 | // Find the issue type for an instruction. 87 | IssueType generateIssueType(const llvm::InstrStage *E) const; 88 | }; 89 | 90 | class CortexA53MMGenerator : public MachineModelGenerator { 91 | public: 92 | CortexA53MMGenerator(const llvm::ScheduleDAGInstrs *dag, MachineModel *mm) 93 | : DAG(dag), MM(mm) {} 94 | InstType generateInstrType(const llvm::MachineInstr *instr); 95 | bool generatesAllData() { return true; } 96 | void generateProcessorData(std::string *mdlName_, int *issueRate_); 97 | 98 | private: 99 | std::vector ResourceIdToIssueType; 100 | const llvm::ScheduleDAGInstrs *DAG; 101 | MachineModel *MM; 102 | }; 103 | 104 | } // end namespace opt_sched 105 | } // namespace llvm 106 | 107 | #endif 108 | -------------------------------------------------------------------------------- /lib/Wrapper/OptSchedReg.h: -------------------------------------------------------------------------------- 1 | #ifndef OPT_SCHED_REG 2 | #define OPT_SCHED_REG 3 | 4 | #include "OptimizingScheduler.h" 5 | #include "llvm/CodeGen/MachineScheduler.h" 6 | 7 | using namespace llvm; 8 | 9 | namespace llvm { 10 | namespace opt_sched { 11 | 12 | // Create OptSched ScheduleDAG. 13 | static ScheduleDAGInstrs *createOptSched(MachineSchedContext *C) { 14 | ScheduleDAGMILive *DAG = 15 | new ScheduleDAGOptSched(C, std::make_unique(C)); 16 | DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); 17 | // README: if you need the x86 mutations uncomment the next line. 18 | // addMutation(createX86MacroFusionDAGMutation()); 19 | // You also need to add the next line somewhere above this function 20 | //#include "../../../../../llvm/lib/Target/X86/X86MacroFusion.h" 21 | return DAG; 22 | } 23 | 24 | // Register the machine scheduler. 25 | static MachineSchedRegistry OptSchedMIRegistry("optsched", 26 | "Use the OptSched scheduler.", 27 | createOptSched); 28 | 29 | } // namespace opt_sched 30 | } // namespace llvm -------------------------------------------------------------------------------- /patches/amdgpu/README.md: -------------------------------------------------------------------------------- 1 | [![CSUS](http://www.csus.edu/Brand/assets/Logos/Core/Primary/Stacked/Primary_Stacked_3_Color_wht_hndTN.png)](http://www.csus.edu/) 2 | 3 | # OptSched - Optimizing Scheduler 4 | This directory contains patches for ROCm 2.4. These patches cause ROCm 2.4 to properly load OptSched and also print occupancy information. 5 | 6 | ## Files 7 | 8 | `ROCm-2.4-LLVM-print-occupancy.patch` 9 | 10 | Output occupancy info. 11 | 12 | `ROCm-2.4-load-optsched.patch` 13 | 14 | Load the OptSched.so plugin. 15 | -------------------------------------------------------------------------------- /patches/amdgpu/ROCm-2.4-LLVM-print-occupancy.patch: -------------------------------------------------------------------------------- 1 | diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp 2 | index d704a0fae0d..b988d540000 100644 3 | --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp 4 | +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp 5 | @@ -489,7 +489,13 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { 6 | OutStreamer->EmitBytes(StringRef(Comment)); 7 | } 8 | } 9 | - 10 | + const GCNSubtarget &STI = MF.getSubtarget(); 11 | + auto OccVGPRs = STI.getOccupancyWithNumVGPRs(CurrentProgramInfo.NumVGPR); 12 | + auto OccSGPRs = STI.getOccupancyWithNumSGPRs(CurrentProgramInfo.NumSGPR); 13 | + auto OccLDS = STI.getOccupancyWithLocalMemSize(MF); 14 | + 15 | + auto Occ = std::min(OccVGPRs, std::min(OccSGPRs, OccLDS)); 16 | + dbgs() << "Final occupancy for function " << MF.getName() << ":" << Occ << "\n"; 17 | return false; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /patches/amdgpu/ROCm-2.4-load-optsched.patch: -------------------------------------------------------------------------------- 1 | From 84ad382227da24c727fc60c998c4f0c61a5afe0f Mon Sep 17 00:00:00 2001 2 | From: Austin Kerbow 3 | Date: Sun, 25 Nov 2018 20:43:22 -0800 4 | Subject: [PATCH] Load plugins in-process. 5 | 6 | --- 7 | src/driver/AmdCompiler.cpp | 15 +++++++++++++++ 8 | 1 file changed, 15 insertions(+) 9 | 10 | diff --git a/src/driver/AmdCompiler.cpp b/src/driver/AmdCompiler.cpp 11 | index 8d34644..e4a4ebd 100644 12 | --- a/src/driver/AmdCompiler.cpp 13 | +++ b/src/driver/AmdCompiler.cpp 14 | @@ -368,6 +368,7 @@ class AMDGPUCompiler : public Compiler { 15 | ArgStringList GetJobArgsFitered(const Command& job); 16 | // Parse -mllvm options 17 | bool ParseLLVMOptions(const std::vector& options); 18 | + bool LoadPlugins(const std::vector& plugins); 19 | bool PrepareCompiler(CompilerInstance& clang, const Command& job); 20 | bool PrepareAssembler(AssemblerInvocation &Opts, const Command& job); 21 | bool ExecuteCompiler(CompilerInstance& clang, BackendAction action); 22 | @@ -772,6 +773,19 @@ bool AMDGPUCompiler::ParseLLVMOptions(const std::vector& options) { 23 | return true; 24 | } 25 | 26 | +bool AMDGPUCompiler::LoadPlugins(const std::vector& plugins) { 27 | + if (plugins.empty()) { return true; } 28 | + // Load any requested plugins. 29 | + for (const auto &path : plugins) { 30 | + std::string error; 31 | + if (llvm::sys::DynamicLibrary::LoadLibraryPermanently(path.c_str(), &error)) { 32 | + diags.Report(diag::err_fe_unable_to_load_plugin) << path << error; 33 | + return false; 34 | + } 35 | + } 36 | + return true; 37 | +} 38 | + 39 | void AMDGPUCompiler::ResetOptionsToDefault() { 40 | cl::ResetAllOptionOccurrences(); 41 | for (auto SC : cl::getRegisteredSubcommands()) { 42 | @@ -791,6 +805,7 @@ bool AMDGPUCompiler::PrepareCompiler(CompilerInstance& clang, const Command& job 43 | const_cast(args.data()), 44 | const_cast(args.data()) + args.size(), 45 | clang.getDiagnostics())) { return false; } 46 | + if (!LoadPlugins(clang.getFrontendOpts().Plugins)) { return false; } 47 | if (!ParseLLVMOptions(clang.getFrontendOpts().LLVMArgs)) { return false; } 48 | return true; 49 | } 50 | -------------------------------------------------------------------------------- /patches/llvm6.0/README.md: -------------------------------------------------------------------------------- 1 | [![CSUS](http://www.csus.edu/Brand/assets/Logos/Core/Primary/Stacked/Primary_Stacked_3_Color_wht_hndTN.png)](http://www.csus.edu/) 2 | 3 | # OptSched - Optimizing Scheduler 4 | This directory contains patches for LLVM 6.0. These patches must be applied before building LLVM 6.0 to print spilling information. 5 | 6 | ## Files 7 | 8 | `llvm6-print-spilling-info.patch` 9 | 10 | Thils file is for LLVM 6.0 located under the release/6.x branch at https://github.com/llvm/llvm-project. 11 | 12 | `flang-llvm6-print-spilling-info.patch` 13 | 14 | This file is for Flang LLVM 6.0 located under the release_60 branch at https://github.com/flang-compiler/llvm. 15 | -------------------------------------------------------------------------------- /patches/llvm7.0/README.md: -------------------------------------------------------------------------------- 1 | [![CSUS](http://www.csus.edu/Brand/assets/Logos/Core/Primary/Stacked/Primary_Stacked_3_Color_wht_hndTN.png)](http://www.csus.edu/) 2 | 3 | # OptSched - Optimizing Scheduler 4 | This directory contains patches for LLVM 7.0. These patches must be applied before building LLVM 7.0 to print spilling information. 5 | 6 | ## Files 7 | 8 | `flang-llvm7-print-spilling-info.patch` 9 | 10 | This file is for Flang LLVM 7.0 located under the release_70 branch at https://github.com/flang-compiler/llvm. 11 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Test runner infrastructure for OptSched. This configures the OptSched test trees 2 | # for use by Lit, and delegates to LLVM's lit test handlers. 3 | 4 | configure_lit_site_cfg( 5 | ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in 6 | ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py 7 | MAIN_CONFIG 8 | ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py 9 | ) 10 | 11 | configure_lit_site_cfg( 12 | ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.py.in 13 | ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg.py 14 | MAIN_CONFIG 15 | ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.cfg.py 16 | ) 17 | 18 | list(APPEND OPTSCHED_TEST_DEPS 19 | LLVMOptSched 20 | ) 21 | 22 | set(OPTSCHED_TEST_PARAMS 23 | optsched_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg 24 | ) 25 | 26 | add_custom_target(optsched-test-depends DEPENDS ${OPTSCHED_TEST_DEPS}) 27 | set_target_properties(optsched-test-depends PROPERTIES FOLDER "OptSched tests") 28 | 29 | add_lit_testsuite(check-optsched "Running the OptSched regression tests" 30 | ${CMAKE_CURRENT_BINARY_DIR} 31 | PARAMS ${OPTSCHED_TEST_PARAMS} 32 | DEPENDS ${OPTSCHED_TEST_DEPS} 33 | ARGS ${OPTSCHED_TEST_EXTRA_ARGS} ${OPTSCHED_LIT_ARGS} 34 | ) 35 | set_target_properties(check-optsched PROPERTIES FOLDER "OptSched tests") 36 | 37 | add_lit_testsuites(OPTSCHED ${CMAKE_CURRENT_SOURCE_DIR} 38 | PARAMS ${OPTSCHED_TEST_PARAMS} 39 | DEPENDS ${OPTSCHED_TEST_DEPS} 40 | ) 41 | -------------------------------------------------------------------------------- /test/Unit/lit.cfg.py: -------------------------------------------------------------------------------- 1 | # -*- Python -*- 2 | 3 | # Configuration file for the 'lit' test runner. 4 | 5 | import os 6 | import platform 7 | import subprocess 8 | 9 | import lit.formats 10 | import lit.util 11 | 12 | # name: The name of this test suite. 13 | config.name = 'OptSched-Unit' 14 | 15 | # suffixes: A list of file extensions to treat as test files. 16 | config.suffixes = [] 17 | 18 | # test_source_root: The root path where tests are located. 19 | # test_exec_root: The root path where tests should be run. 20 | config.test_exec_root = os.path.join(config.optsched_obj_root, 'unittests') 21 | config.test_source_root = config.test_exec_root 22 | 23 | config.excludes = ['llvm-project-optsched', 'llvm_build_dirs'] 24 | 25 | # testFormat: The test format to use to interpret tests. 26 | config.test_format = lit.formats.GoogleTest(config.llvm_build_mode, 'Tests') 27 | -------------------------------------------------------------------------------- /test/Unit/lit.site.cfg.py.in: -------------------------------------------------------------------------------- 1 | @LIT_SITE_CFG_IN_HEADER@ 2 | 3 | import sys 4 | 5 | config.llvm_src_root = "@LLVM_SOURCE_DIR@" 6 | config.llvm_obj_root = "@LLVM_BINARY_DIR@" 7 | config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" 8 | config.llvm_libs_dir = "@LLVM_LIBS_DIR@" 9 | config.llvm_build_mode = "@LLVM_BUILD_MODE@" 10 | config.optsched_obj_root = "@OPTSCHED_BINARY_DIR@" 11 | config.enable_shared = @ENABLE_SHARED@ 12 | config.shlibdir = "@SHLIBDIR@" 13 | config.target_triple = "@TARGET_TRIPLE@" 14 | 15 | # Support substitution of the tools_dir, libs_dirs, and build_mode with user 16 | # parameters. This is used when we can't determine the tool dir at 17 | # configuration time. 18 | try: 19 | config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params 20 | config.llvm_libs_dir = config.llvm_libs_dir % lit_config.params 21 | config.llvm_build_mode = config.llvm_build_mode % lit_config.params 22 | except KeyError: 23 | e = sys.exc_info()[1] 24 | key, = e.args 25 | lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) 26 | 27 | # Let the main config do the real work. 28 | lit_config.load_config(config, "@OPTSCHED_SOURCE_DIR@/test/Unit/lit.cfg.py") 29 | -------------------------------------------------------------------------------- /test/lit.cfg.py: -------------------------------------------------------------------------------- 1 | # -*- Python -*- 2 | 3 | import os 4 | import platform 5 | import re 6 | import subprocess 7 | import tempfile 8 | 9 | import lit.formats 10 | import lit.util 11 | 12 | from lit.llvm.subst import ToolSubst 13 | from lit.llvm.subst import FindTool 14 | 15 | # Configuration file for the 'lit' test runner. 16 | 17 | # name: The name of this test suite. 18 | config.name = 'OptSched' 19 | 20 | # suffixes: A list of file extensions to treat as test files. 21 | config.suffixes = ['.c', '.cpp', '.cppm', '.m', '.mm', '.cu', 22 | '.ll', '.cl', '.s', '.S', '.modulemap', '.test', '.rs'] 23 | 24 | # excludes: A list of directories to exclude from the testsuite. The 'Inputs' 25 | # subdirectories contain auxiliary inputs for various tests in their parent 26 | # directories. 27 | config.excludes = [] 28 | 29 | # test_source_root: The root path where tests are located. 30 | config.test_source_root = os.path.dirname(__file__) 31 | 32 | # test_exec_root: The root path where tests should be run. 33 | config.test_exec_root = os.path.join(config.optsched_obj_root, 'test') 34 | 35 | config.substitutions.append(('%PATH%', config.environment['PATH'])) 36 | -------------------------------------------------------------------------------- /test/lit.site.cfg.py.in: -------------------------------------------------------------------------------- 1 | @LIT_SITE_CFG_IN_HEADER@ 2 | 3 | import sys 4 | 5 | config.llvm_src_root = "@LLVM_SOURCE_DIR@" 6 | config.llvm_obj_root = "@LLVM_BINARY_DIR@" 7 | config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" 8 | config.llvm_libs_dir = "@LLVM_LIBS_DIR@" 9 | config.llvm_shlib_dir = "@SHLIBDIR@" 10 | config.llvm_plugin_ext = "@LLVM_PLUGIN_EXT@" 11 | config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" 12 | config.optsched_obj_root = "@OPTSCHED_BINARY_DIR@" 13 | config.optsched_src_dir = "@OPTSCHED_SOURCE_DIR@" 14 | config.host_triple = "@LLVM_HOST_TRIPLE@" 15 | config.target_triple = "@TARGET_TRIPLE@" 16 | config.host_cxx = "@CMAKE_CXX_COMPILER@" 17 | config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" 18 | config.enable_shared = @ENABLE_SHARED@ 19 | config.host_arch = "@HOST_ARCH@" 20 | config.enable_abi_breaking_checks = "@LLVM_ENABLE_ABI_BREAKING_CHECKS@" 21 | config.python_executable = "@PYTHON_EXECUTABLE@" 22 | 23 | # Support substitution of the tools and libs dirs with user parameters. This is 24 | # used when we can't determine the tool dir at configuration time. 25 | try: 26 | config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params 27 | config.llvm_shlib_dir = config.llvm_shlib_dir % lit_config.params 28 | config.llvm_libs_dir = config.llvm_libs_dir % lit_config.params 29 | except KeyError: 30 | e = sys.exc_info()[1] 31 | key, = e.args 32 | lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) 33 | 34 | @LIT_SITE_CFG_IN_FOOTER@ 35 | 36 | # Let the main config do the real work. 37 | lit_config.load_config(config, "@OPTSCHED_SOURCE_DIR@/test/lit.cfg.py") 38 | -------------------------------------------------------------------------------- /unittests/Basic/ArrayRef2DTest.cpp: -------------------------------------------------------------------------------- 1 | #include "opt-sched/Scheduler/array_ref2d.h" 2 | 3 | #include 4 | 5 | #include "gtest/gtest.h" 6 | 7 | using namespace llvm::opt_sched; 8 | 9 | namespace { 10 | TEST(ArrayRef2D, CanAccessElements) { 11 | int Arr[] = { 12 | 1, 2, 3, // 13 | 4, 5, 6, 14 | }; 15 | 16 | ArrayRef2D Ref(Arr, 2, 3); 17 | EXPECT_EQ(1, (Ref[{0, 0}])); 18 | EXPECT_EQ(2, (Ref[{0, 1}])); 19 | EXPECT_EQ(3, (Ref[{0, 2}])); 20 | EXPECT_EQ(4, (Ref[{1, 0}])); 21 | EXPECT_EQ(5, (Ref[{1, 1}])); 22 | EXPECT_EQ(6, (Ref[{1, 2}])); 23 | } 24 | 25 | TEST(ArrayRef2D, CanGetRowsAndColumns) { 26 | int Arr[] = { 27 | 1, 2, 3, // 28 | 4, 5, 6, 29 | }; 30 | 31 | ArrayRef2D Ref(Arr, 2, 3); 32 | EXPECT_EQ(2, Ref.rows()); 33 | EXPECT_EQ(3, Ref.columns()); 34 | } 35 | 36 | TEST(ArrayRef2D, AccessReturnsReferenceToElements) { 37 | int Arr[] = { 38 | 1, 2, 3, // 39 | 4, 5, 6, 40 | }; 41 | 42 | ArrayRef2D Ref(Arr, 2, 3); 43 | EXPECT_EQ(&Arr[0], &(Ref[{0, 0}])); 44 | } 45 | 46 | TEST(ArrayRef2D, AccessDoesNotAllowChanges) { 47 | int Arr[] = { 48 | 1, 2, 3, // 49 | 4, 5, 6, 50 | }; 51 | 52 | ArrayRef2D Ref(Arr, 2, 3); 53 | static_assert(std::is_same::value, ""); 54 | } 55 | 56 | TEST(ArrayRef2D, RequiresRectangle) { 57 | int Arr[] = { 58 | 1, 2, 3, // 59 | 4, 5, 60 | }; 61 | 62 | EXPECT_DEBUG_DEATH(ArrayRef2D(Arr, 2, 3), ".*"); 63 | } 64 | 65 | TEST(ArrayRef2D, AccessingFailsForOutOfBounds) { 66 | int Arr[] = { 67 | 1, 2, 3, // 68 | 4, 5, 6, 69 | }; 70 | 71 | ArrayRef2D Ref(Arr, 2, 3); 72 | EXPECT_DEBUG_DEATH((Ref[{5, 10}]), ".*"); 73 | } 74 | 75 | TEST(ArrayRef2D, WorksForEmpty) { 76 | std::array Arr{}; 77 | 78 | ArrayRef2D Ref(Arr, 0, 0); 79 | EXPECT_EQ(0u, Ref.rows()); 80 | EXPECT_EQ(0u, Ref.columns()); 81 | EXPECT_EQ(0u, Ref.underlyingData().size()); 82 | } 83 | 84 | TEST(ArrayRef2D, AccessingEmptyRefFails) { 85 | std::array Arr{}; 86 | 87 | ArrayRef2D Ref(Arr, 0, 0); 88 | EXPECT_DEBUG_DEATH((Ref[{0, 0}]), ".*"); 89 | } 90 | 91 | TEST(ArrayRef2D, UnderlyingDataIsArrayRef) { 92 | int Arr[] = { 93 | 1, 2, 3, // 94 | 4, 5, 6, 95 | }; 96 | 97 | ArrayRef2D Ref(Arr, 2, 3); 98 | static_assert( 99 | std::is_same, decltype(Ref.underlyingData())>::value, 100 | ""); 101 | } 102 | 103 | TEST(MutableArrayRef2D, IsConvertibleToArrayRef2D) { 104 | static_assert( 105 | std::is_convertible, ArrayRef2D>::value, ""); 106 | } 107 | 108 | TEST(MutableArrayRef2D, UnderlyingDataIsMutableArrayRef) { 109 | int Arr[] = { 110 | 1, 2, 3, // 111 | 4, 5, 6, 112 | }; 113 | 114 | MutableArrayRef2D Ref(Arr, 2, 3); 115 | static_assert(std::is_same, 116 | decltype(Ref.underlyingData())>::value, 117 | ""); 118 | } 119 | 120 | TEST(MutableArrayRef2D, CanMutateViaAccess) { 121 | int Arr[] = { 122 | 1, 2, 3, // 123 | 4, 5, 6, 124 | }; 125 | 126 | MutableArrayRef2D Ref(Arr, 2, 3); 127 | Ref[{1, 1}] = -5; 128 | EXPECT_EQ(-5, (Ref[{1, 1}])); 129 | EXPECT_EQ(-5, Arr[4]); 130 | } 131 | } // namespace 132 | -------------------------------------------------------------------------------- /unittests/Basic/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_optsched_unittest(OptSchedBasicTests 2 | ArrayRef2DTest.cpp 3 | ConfigTest.cpp 4 | LinkedListTest.cpp 5 | LoggerTest.cpp 6 | UtilitiesTest.cpp 7 | simple_machine_model_test.cpp 8 | ) 9 | -------------------------------------------------------------------------------- /unittests/Basic/ConfigTest.cpp: -------------------------------------------------------------------------------- 1 | #include "opt-sched/Scheduler/config.h" 2 | 3 | #include 4 | 5 | #include "gtest/gtest.h" 6 | 7 | using llvm::opt_sched::Config; 8 | using llvm::opt_sched::SchedulerOptions; 9 | 10 | namespace { 11 | 12 | TEST(Config, ReadString) { 13 | Config config; 14 | std::istringstream input(R"( 15 | KEY VALUE 16 | )"); 17 | config.Load(input); 18 | 19 | EXPECT_EQ("VALUE", config.GetString("KEY")); 20 | } 21 | 22 | TEST(Config, ReadStringPath) { 23 | Config config; 24 | std::istringstream input(R"( 25 | KEY some/path/ 26 | )"); 27 | config.Load(input); 28 | 29 | EXPECT_EQ("some/path/", config.GetString("KEY")); 30 | } 31 | 32 | TEST(Config, ReadInt) { 33 | Config config; 34 | std::istringstream input(R"( 35 | KEY 1 36 | )"); 37 | config.Load(input); 38 | 39 | EXPECT_EQ(1, config.GetInt("KEY")); 40 | } 41 | 42 | TEST(Config, ReadFloat) { 43 | Config config; 44 | std::istringstream input(R"( 45 | KEY 1.3 46 | )"); 47 | config.Load(input); 48 | 49 | EXPECT_EQ(1.3f, config.GetFloat("KEY")); 50 | } 51 | 52 | class TrueBoolTest : public testing::TestWithParam {}; 53 | 54 | TEST_P(TrueBoolTest, ReadBool) { 55 | Config config; 56 | std::istringstream input("KEY " + GetParam()); 57 | config.Load(input); 58 | 59 | EXPECT_TRUE(config.GetBool("KEY")); 60 | } 61 | 62 | INSTANTIATE_TEST_CASE_P(TrueBoolStrings, TrueBoolTest, 63 | testing::Values("1", "yes", "YES", "true", "TRUE"), ); 64 | 65 | class FalseBoolTest : public testing::TestWithParam {}; 66 | 67 | TEST_P(FalseBoolTest, ReadBool) { 68 | Config config; 69 | std::istringstream input("KEY " + GetParam()); 70 | config.Load(input); 71 | 72 | EXPECT_FALSE(config.GetBool("KEY")); 73 | } 74 | 75 | INSTANTIATE_TEST_CASE_P(FalseBoolStrings, FalseBoolTest, 76 | testing::Values("0", "no", "NO", "false", "FALSE"), ); 77 | 78 | class StringListTest : public testing::TestWithParam< 79 | std::pair, std::string>> {}; 80 | 81 | TEST_P(StringListTest, ReadStrings) { 82 | Config config; 83 | std::istringstream input("KEY " + GetParam().second); 84 | config.Load(input); 85 | 86 | const auto result = config.GetStringList("KEY"); 87 | const std::vector strings(result.begin(), result.end()); 88 | 89 | const std::vector expected = GetParam().first; 90 | 91 | EXPECT_EQ(expected, strings); 92 | } 93 | 94 | INSTANTIATE_TEST_CASE_P( 95 | TestCases, StringListTest, 96 | testing::ValuesIn( 97 | std::vector, std::string>>{ 98 | {{"singleton"}, "singleton"}, 99 | {{"a", "b"}, "a,b"}, 100 | {{"a", "b", "c", "d", "E", "f", "g", "h", "i"}, 101 | "a,b,c,d,E,f,g,h,i"}, 102 | 103 | {{}, ""}, 104 | {{"singleton"}, "singleton,"}, 105 | {{"a", "b"}, "a,b,\nc,d"}, 106 | {{"a"}, "a, b"}, 107 | }), ); 108 | 109 | class IntListTest : public testing::TestWithParam< 110 | std::pair, std::string>> {}; 111 | 112 | TEST_P(IntListTest, ReadInts) { 113 | Config config; 114 | std::istringstream input("KEY " + GetParam().second); 115 | config.Load(input); 116 | 117 | const auto result = config.GetIntList("KEY"); 118 | const std::vector ints(result.begin(), result.end()); 119 | 120 | const std::vector expected = GetParam().first; 121 | 122 | EXPECT_EQ(expected, ints); 123 | } 124 | 125 | INSTANTIATE_TEST_CASE_P( 126 | TestCases, IntListTest, 127 | testing::ValuesIn(std::vector, std::string>>{ 128 | {{1}, "1"}, 129 | {{-1, 0}, "-1,0"}, 130 | 131 | {{}, ""}, 132 | {{-2, -3}, "-2,-3\n4,5"}, 133 | {{832, 43}, "832,43"}, 134 | }), ); 135 | 136 | class FloatListTest : public testing::TestWithParam< 137 | std::pair, std::string>> {}; 138 | 139 | TEST_P(FloatListTest, ReadFloats) { 140 | Config config; 141 | std::istringstream input("KEY " + GetParam().second); 142 | config.Load(input); 143 | 144 | const auto result = config.GetFloatList("KEY"); 145 | const std::vector ints(result.begin(), result.end()); 146 | 147 | const std::vector expected = GetParam().first; 148 | 149 | EXPECT_EQ(expected, ints); 150 | } 151 | 152 | INSTANTIATE_TEST_CASE_P( 153 | TestCases, FloatListTest, 154 | testing::ValuesIn(std::vector, std::string>>{ 155 | {{1.0f}, "1"}, 156 | {{-1.5f, 0.02f}, "-1.5,0.02"}, 157 | 158 | {{}, ""}, 159 | {{-0.2f, -3}, "-0.2,-3\n4,5"}, 160 | {{832.123f, 43}, "832.123,43"}, 161 | }), ); 162 | 163 | } // namespace 164 | -------------------------------------------------------------------------------- /unittests/Basic/LoggerTest.cpp: -------------------------------------------------------------------------------- 1 | #include "opt-sched/Scheduler/logger.h" 2 | 3 | #include 4 | 5 | #include "gmock/gmock-matchers.h" 6 | #include "gtest/gtest.h" 7 | 8 | using namespace llvm::opt_sched; 9 | 10 | namespace { 11 | class LoggerTest : public ::testing::Test { 12 | protected: 13 | LoggerTest() : old{Logger::GetLogStream()} { Logger::SetLogStream(log); } 14 | 15 | ~LoggerTest() override { Logger::SetLogStream(old); } 16 | 17 | std::string getLog() const { return log.str(); } 18 | 19 | private: 20 | std::ostream &old; 21 | std::ostringstream log; 22 | }; 23 | 24 | TEST_F(LoggerTest, EventWorks) { 25 | Logger::Event("SomeEventID", "key", 42, "key2", "value2", "key3", true, 26 | "key4", 123ull, "key5", -123ll); 27 | EXPECT_THAT( 28 | getLog(), 29 | ::testing::MatchesRegex( 30 | R"(EVENT: \{"event_id": "SomeEventID", "key": 42, "key2": "value2", "key3": true, "key4": 123, "key5": -123, "time": [0-9]+\})" 31 | "\n")); 32 | } 33 | 34 | TEST_F(LoggerTest, EmptyEventIncludesOnlyTime) { 35 | Logger::Event("SomeEventID"); 36 | EXPECT_THAT(getLog(), 37 | ::testing::MatchesRegex( 38 | R"(EVENT: \{"event_id": "SomeEventID", "time": [0-9]+\})" 39 | "\n")); 40 | } 41 | } // namespace 42 | -------------------------------------------------------------------------------- /unittests/Basic/UtilitiesTest.cpp: -------------------------------------------------------------------------------- 1 | #include "opt-sched/Scheduler/utilities.h" 2 | 3 | #include "gtest/gtest.h" 4 | 5 | namespace utils = llvm::opt_sched::Utilities; 6 | 7 | namespace { 8 | 9 | TEST(Utilities, clcltBitsNeededToHoldNum) { 10 | EXPECT_EQ(0, utils::clcltBitsNeededToHoldNum(0)); 11 | EXPECT_EQ(1, utils::clcltBitsNeededToHoldNum(1)); 12 | EXPECT_EQ(2, utils::clcltBitsNeededToHoldNum(2)); 13 | EXPECT_EQ(2, utils::clcltBitsNeededToHoldNum(3)); 14 | EXPECT_EQ(3, utils::clcltBitsNeededToHoldNum(4)); 15 | 16 | EXPECT_EQ(16, utils::clcltBitsNeededToHoldNum(0x8000)); 17 | } 18 | 19 | } // namespace 20 | -------------------------------------------------------------------------------- /unittests/Basic/simple_machine_model.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTSCHED_SIMPLE_MACHINE_MODEL_H 2 | #define OPTSCHED_SIMPLE_MACHINE_MODEL_H 3 | 4 | #include // strdup is in the C header, but not the C++ header 5 | 6 | #include "opt-sched/Scheduler/buffers.h" 7 | #include "opt-sched/Scheduler/machine_model.h" 8 | 9 | inline llvm::opt_sched::MachineModel simpleMachineModel() { 10 | static constexpr const char SimpleModel[] = R"( 11 | MODEL_NAME: Simple 12 | 13 | # The limit on the total number of instructions that can be issued in one cycle 14 | ISSUE_RATE: 1 15 | 16 | # Each instruction must have an issue type, i.e. a function unit that the instruction uses. 17 | ISSUE_TYPE_COUNT: 1 18 | 19 | # Default issue type for LLVM instructions. 20 | Default 1 21 | 22 | DEP_LATENCY_ANTI: 0 23 | DEP_LATENCY_OUTPUT: 1 24 | DEP_LATENCY_OTHER: 1 25 | 26 | # This will not be used. Reg type info will be taken from the compiler. 27 | REG_TYPE_COUNT: 2 28 | I 1 29 | F 1 30 | 31 | # Set this to the total number of instructions 32 | INST_TYPE_COUNT: 2 33 | 34 | INST_TYPE: artificial 35 | ISSUE_TYPE: Default 36 | LATENCY: 0 37 | PIPELINED: YES 38 | BLOCKS_CYCLE: NO 39 | SUPPORTED: NO 40 | 41 | INST_TYPE: Inst 42 | ISSUE_TYPE: Default 43 | LATENCY: 1 44 | PIPELINED: YES 45 | BLOCKS_CYCLE: NO 46 | SUPPORTED: YES 47 | )"; 48 | 49 | llvm::opt_sched::SpecsBuffer Buf(strdup(SimpleModel), sizeof(SimpleModel)); 50 | llvm::opt_sched::MachineModel Model(Buf); 51 | return Model; 52 | } 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /unittests/Basic/simple_machine_model_test.cpp: -------------------------------------------------------------------------------- 1 | #include "simple_machine_model.h" 2 | 3 | #include "gtest/gtest.h" 4 | 5 | using llvm::opt_sched::MachineModel; 6 | 7 | namespace { 8 | TEST(SimpleMachineModel, CanBeLoaded) { 9 | MachineModel Model = simpleMachineModel(); 10 | EXPECT_EQ(1, Model.GetIssueRate()); 11 | } 12 | } // namespace 13 | -------------------------------------------------------------------------------- /unittests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_custom_target(OptSchedUnitTests) 2 | set_target_properties(OptSchedUnitTests PROPERTIES FOLDER "Tests") 3 | 4 | # LLVM turns this off, but we're good to have it: 5 | add_definitions(-UGTEST_HAS_TR1_TUPLE) 6 | 7 | function(add_optsched_unittest test_dirname) 8 | add_unittest(OptSchedUnitTests ${test_dirname} ${ARGN}) 9 | endfunction() 10 | 11 | # All unit test targets depend on OptSched 12 | add_llvm_library(UnitTest.OptSched STATIC $) 13 | link_libraries(UnitTest.OptSched) 14 | 15 | add_subdirectory(Basic) 16 | -------------------------------------------------------------------------------- /util/ARM/build-copy-to-A7.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #Build benchmarks and copy them to the A7 machine. 3 | 4 | BENCH="401.bzip2 429.mcf 433.milc 445.gobmk 456.hmmer 458.sjeng 462.libquantum 464.h264ref 470.lbm 482.sphinx3 444.namd 447.dealII 453.povray 471.omnetpp 473.astar" 5 | 6 | # source the shrc 7 | . ./shrc 8 | 9 | # Try to scrub benchmarks. Catch unchecked error in runspec where the benchmarks are not actually cleaned if 10 | # they were built by another user or root. 11 | echo 'Cleaning benchmarks' 12 | rslt=$(runspec --loose -size=test -iterations=1 -config=Intel_llvm_3.9.cfg --tune=base -r 1 -I -a scrub $BENCH 2>&1 | \ 13 | awk '/Couldn'\''t unlink/ { print "1"; exit 1 }' -) 14 | if [ ! -z $rslt ]; 15 | then 16 | echo "Error scrubbing benchmarks. Try with sudo." 17 | echo "\"sudo sh -c '. ./shrc; runspec --loose -size=test -iterations=1 -config=Intel_llvm_3.9.cfg --tune=base -r 1 -I -a scrub all'\"" 18 | exit 1 19 | fi 20 | 21 | echo 'Building benchmarks' 22 | runspec --loose -size=test -iterations=1 -config=Intel_llvm_3.9.cfg --tune=base -r 1 -I -a build $BENCH 2>&1 > /dev/null 23 | 24 | #echo 'Creating fake run directories' 25 | #runspec --fake --loose --size test --tune base --config Intel_llvm_3.9.cfg $BENCH 26 | 27 | cd ./benchspec/CPU2006/ 28 | 29 | echo 'Creating archive' 30 | tar cJf ziped_benches.tar.xz */exe 31 | 32 | echo 'Copying to A7 machine' 33 | scp -q ziped_benches.tar.xz ghassan@99.113.71.118:~ 34 | 35 | echo 'Cleaning benchmarks again' 36 | runspec --loose -size=test -iterations=1 -config=Intel_llvm_3.9.cfg --tune=base -r 1 -I -a scrub $BENCH 2>&1 > /dev/null 37 | 38 | echo 'Done!' 39 | -------------------------------------------------------------------------------- /util/ARM/extract-run-spec-cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Run a dry run of the CPU2006 benchmarks and extract the commands necessary 3 | # for running the binaries on a different machine without runspec. 4 | # eg: When cross-compiling. 5 | 6 | # ref (reference) or test (test) input size for the benchmarks. 7 | SIZE=test 8 | 9 | runspec --fake --loose --size $SIZE --tune base --config Intel_llvm_3.9.cfg $1 | \ 10 | awk '/Benchmark invocation/ {record=1} /Benchmark verification/ {record=0} record' - | \ 11 | awk '/echo/ {split($0, res, "\""); print res[2] }' 12 | -------------------------------------------------------------------------------- /util/CPU2006/clean-compile-commands.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import re 4 | 5 | parser = argparse.ArgumentParser(description='Cleans CPU2006 logs, moving compilation commands to the appropriate location if necessary') 6 | parser.add_argument('files', nargs='+', help='The logs to clean, in place') 7 | 8 | files = parser.parse_args().files 9 | 10 | RE_AFTER_FUNCTION = re.compile(r'\*{20,}\nFunction:.*?\*{20,}\n', re.DOTALL) 11 | RE_BUGGED_COMPILE_COMMAND = re.compile( 12 | r''' 13 | # Compilation commands will always appear at the beginning of a line if things happened correctly. 14 | # We're trying to fix it when it doesn't happen correctly. 15 | ^(?: # (Non-capturing) 16 | E # Inside an EVENT: log, but possible happening mid-word (EV/usr/bin/clang++ ...) 17 | | I # Inside an INFO: log 18 | ) 19 | .*? 20 | ( 21 | # When we see any of the bugged compilation commands, 22 | (/.*?/[cf]lang.*\n) # clang, clang++, flang 23 | | (specperl\ /.*\n) # specperl commands 24 | ) # then we want to match the command and move it to the end. 25 | ''', 26 | re.VERBOSE | re.MULTILINE) 27 | 28 | for file in files: 29 | with open(file, 'r') as f: 30 | text = f.read() 31 | 32 | # Keep the file content we wish to write back as a list of strings. 33 | # We will do a join at the end. 34 | result = [] 35 | 36 | cur = 0 37 | # Iterate over the locations that we will place the bugged commands (after next fn) 38 | for next_fn_m in RE_AFTER_FUNCTION.finditer(text): 39 | # The strings we will be placing after the fn. 40 | after_fn = [] 41 | 42 | # Gather all the bugged compile commands from `cur` to the location of this next_fn_m. 43 | while True: 44 | bugged = RE_BUGGED_COMPILE_COMMAND.search(text, cur, next_fn_m.start()) 45 | 46 | if bugged: 47 | result.append(text[cur:bugged.start(1)]) 48 | after_fn.append(bugged.group(1)) 49 | cur = bugged.end(1) 50 | else: 51 | result.append(text[cur:next_fn_m.end()]) 52 | cur = next_fn_m.end() 53 | break 54 | result += after_fn 55 | 56 | # Include any remnant 57 | result.append(text[cur:]) 58 | 59 | resultstr = ''.join(result) 60 | with open(file, 'w') as f: 61 | f.write(resultstr) 62 | -------------------------------------------------------------------------------- /util/SLIL/compare-BB-fixed.py: -------------------------------------------------------------------------------- 1 | import re 2 | import mmap 3 | import optparse 4 | import os 5 | 6 | parser = optparse.OptionParser( 7 | description='Wrapper around runspec for collecting spill counts.') 8 | parser.add_option('-b', '--bruteforce', 9 | metavar='filepath', 10 | default=None, 11 | help='Log file of brute force compiler.') 12 | parser.add_option('-d', '--dynamic', 13 | metavar='filepath', 14 | default=None, 15 | help='Log file of dynamic compiler.') 16 | 17 | args = parser.parse_args()[0] 18 | 19 | bruteForceFile = args.bruteforce 20 | bbFile = args.dynamic 21 | 22 | if not os.path.isfile(bruteForceFile): 23 | raise Error("Please specify a valid brute force log file.") 24 | if not os.path.isfile(bbFile): 25 | raise Error("Please specify a valid dynamic log file.") 26 | 27 | regex = re.compile(r'Dag (.*?) (.*?) absolute cost (\d+?) time (\d+)') 28 | 29 | results = {} 30 | 31 | SUCCESS = "optimal" 32 | TIMEOUT = "timeout" 33 | FAILED = "failed" 34 | 35 | staticErrorCount = 0 36 | dynamicErrorCount = 0 37 | goodCount = 0 38 | # Gather results from log files (assumed to be just 1 log file per build) 39 | with open(bruteForceFile) as bff: 40 | bffm = mmap.mmap(bff.fileno(), 0, access=mmap.ACCESS_READ) 41 | 42 | for match in regex.finditer(bffm): 43 | dagResult = {} 44 | dagResult['bf'] = {} 45 | dagResult['bf']['result'] = match.group(2) 46 | dagResult['bf']['cost'] = int(match.group(3)) 47 | dagResult['bf']['time'] = int(match.group(4)) 48 | results[match.group(1)] = dagResult 49 | 50 | bffm.close() 51 | 52 | with open(bbFile) as bbf: 53 | bbfm = mmap.mmap(bbf.fileno(), 0, access=mmap.ACCESS_READ) 54 | for match in regex.finditer(bbfm): 55 | if not match.group(1) in results: 56 | results[match.group(1)] = {} 57 | results[match.group(1)]['bb'] = {} 58 | results[match.group(1)]['bb']['result'] = match.group(2) 59 | results[match.group(1)]['bb']['cost'] = int(match.group(3)) 60 | results[match.group(1)]['bb']['time'] = int(match.group(4)) 61 | bbfm.close() 62 | 63 | 64 | #analyze results 65 | for dagName in results: 66 | if not "bf" in results[dagName] or not "bb" in results[dagName]: 67 | if len(results[dagName]) > 0: 68 | staticErrorCount += 1 69 | print("StaticLBError: Found B&B results for one file but not the other") 70 | for key in results[dagName]: 71 | print(" %s: Dag %s %s cost %d time %d" % (key, dagName, results[dagName][key]['result'], results[dagName][key]['cost'], results[dagName][key]['time'])) 72 | continue 73 | bfCost = results[dagName]['bf']['cost'] 74 | bbCost = results[dagName]['bb']['cost'] 75 | bfResult = results[dagName]['bf']['result'] 76 | bbResult = results[dagName]['bb']['result'] 77 | # Case 1: both success -> must be same cost 78 | if bfResult == SUCCESS and bbResult == SUCCESS: 79 | if bbCost != bfCost: 80 | dynamicErrorCount += 1 81 | print("DynamicLBError: Dag %s: both implementations optimal, but brute force cost (%d) is different from dynamic cost (%d)" %(dagName, bfCost, bbCost)) 82 | else: 83 | goodCount += 1 84 | print("Good: Dag %s: both implementations solved optimally, and both costs match" % dagName) 85 | # Case 2: one timeout and other success -> timeout cost shouldn't be better 86 | elif bfResult == SUCCESS and bbResult == TIMEOUT: 87 | if bbCost < bfCost: 88 | dynamicErrorCount += 1 89 | print("DynamicLBError: Dag %s: brute force optimal and dynamic timed out, but brute force cost (%d) is worse than dynamic cost (%d)" % (dagName, bfCost, bbCost)) 90 | else: 91 | goodCount += 1 92 | print("Good: Dag %s: brute force optimal and dynamic timed out, and brute force cost (%d) is not worse than dynamic cost (%d)" % (dagName, bfCost, bbCost)) 93 | elif bfResult == TIMEOUT and bbResult == SUCCESS: 94 | if bbCost > bfCost: 95 | dynamicErrorCount += 1 96 | print("DynamicLBError: Dag %s: brute force timed out and dynamic optimal, but brute force cost (%d) is better than dynamic cost (%d)" % (dagName, bfCost, bbCost)) 97 | else: 98 | goodCount += 1 99 | print("Good: Dag %s: brute force timed out and dynamic optimal, and brute force cost (%d) is not better than dynamic cost (%d)" % (dagName, bfCost, bbCost)) 100 | 101 | 102 | print("Good: %d" % goodCount) 103 | print("Static LB Error: %d" % staticErrorCount) 104 | print("Dynamic LB Error: %d" % dynamicErrorCount) 105 | -------------------------------------------------------------------------------- /util/SLIL/compare-peaks.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import re 4 | import mmap 5 | 6 | Regex = re.compile('DAG (.*?) PEAK (\d+)') 7 | 8 | def readPeakCosts(logFile): 9 | peakCosts = {} 10 | with open(logFile) as f: 11 | m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) 12 | for match in Regex.finditer(m): 13 | peakCosts[match.group(1)] = int(match.group(2)) 14 | m.close() 15 | return peakCosts 16 | 17 | def compareWrapperLogs(path1, path2, logFile): 18 | benchName = logFile.split(".")[0] 19 | if not (os.path.isfile(os.path.join(path1, logFile)) and os.path.isfile(os.path.join(path2, logFile))): 20 | return 21 | 22 | peakCosts1 = readPeakCosts(os.path.join(path1, logFile)) 23 | peakCosts2 = readPeakCosts(os.path.join(path2, logFile)) 24 | 25 | for key in peakCosts1: 26 | if key in peakCosts2: 27 | print("%s,%s,%d,%d" % (benchName, key, peakCosts1[key], peakCosts2[key])) 28 | 29 | if len(sys.argv) != 3: 30 | raise Exception("Invalid number of arguments") 31 | 32 | if not os.path.isdir(sys.argv[1]): 33 | raise Exception("'%s' is not a valid directory" % sys.argv[1]) 34 | 35 | if not os.path.isdir(sys.argv[2]): 36 | raise Exception("'%s' is not a valid directory" % sys.argv[2]) 37 | 38 | for subdirs, dirs, files in os.walk(sys.argv[1]): 39 | for f in files: 40 | compareWrapperLogs(sys.argv[1], sys.argv[2], f) 41 | -------------------------------------------------------------------------------- /util/SLIL/compare-static-LB.py: -------------------------------------------------------------------------------- 1 | import re 2 | import mmap 3 | import optparse 4 | import os 5 | import json 6 | 7 | parser = optparse.OptionParser( 8 | description='Wrapper around runspec for collecting spill counts.') 9 | parser.add_option('-b', '--bruteforce', 10 | metavar='filepath', 11 | default=None, 12 | help='Log file of brute force compiler.') 13 | parser.add_option('-d', '--dynamic', 14 | metavar='filepath', 15 | default=None, 16 | help='Log file of dynamic compiler.') 17 | 18 | args = parser.parse_args()[0] 19 | 20 | bruteForceFile = args.bruteforce 21 | bbFile = args.dynamic 22 | 23 | if not os.path.isfile(bruteForceFile): 24 | raise Error("Please specify a valid brute force log file.") 25 | if not os.path.isfile(bbFile): 26 | raise Error("Please specify a valid dynamic log file.") 27 | 28 | results = {} 29 | 30 | errorCount = 0 31 | equalCount = 0 32 | improvementCount = 0 33 | 34 | # Gather results from log files (assumed to be just 1 log file per build) 35 | with open(bruteForceFile) as bff: 36 | bffm = mmap.mmap(bff.fileno(), 0, access=mmap.ACCESS_READ) 37 | dagResults = {} 38 | for match in re.finditer(r'EVENT: ({"event_id": "StaticLowerBoundDebugInfo".*)', bffm): 39 | info = json.loads(match.group(1)) 40 | dagResults[info['name']] = int(info['spill_cost_lb']) 41 | bffm.close() 42 | results['bf'] = dagResults 43 | 44 | with open(bbFile) as bbf: 45 | bbfm = mmap.mmap(bbf.fileno(), 0, access=mmap.ACCESS_READ) 46 | dagResults = {} 47 | for match in re.finditer(r'EVENT: ({"event_id": "StaticLowerBoundDebugInfo".*)', bffm): 48 | info = json.loads(match.group(1)) 49 | dagResults[info['name']] = int(info['spill_cost_lb']) 50 | bbfm.close() 51 | results['bb'] = dagResults 52 | 53 | #analyze results 54 | # 55 | for dagName in results['bf']: 56 | bfLowerBound = results['bf'][dagName] 57 | if not dagName in results['bb']: continue 58 | bbLowerBound = results['bb'][dagName] 59 | if bfLowerBound < bbLowerBound: 60 | print("Improvement: oldLB %d newLB %d dag %s" % (bfLowerBound, bbLowerBound, dagName)) 61 | improvementCount += 1 62 | elif bfLowerBound == bbLowerBound: 63 | print("Equal: oldLB %d newLB %d dag %s" % (bfLowerBound, bbLowerBound, dagName)) 64 | equalCount += 1 65 | else: 66 | print("Error: oldLB %d newLB %d dag %s" % (bfLowerBound, bbLowerBound, dagName)) 67 | errorCount += 1 68 | print("Improved blocks: %d"% improvementCount) 69 | print("Equal blocks: %d"% equalCount) 70 | print("Errors: %d"% errorCount) 71 | -------------------------------------------------------------------------------- /util/SLIL/gather-SLIL-stats.py: -------------------------------------------------------------------------------- 1 | import optparse 2 | import os 3 | import mmap 4 | import re 5 | 6 | regex = re.compile("SLIL stats: DAG (.*?) static LB (\d+) gap size (\d+) enumerated (.*?) optimal (.*?) PERP higher (.*?) \(") 7 | 8 | 9 | def debugPrint(msg): 10 | #print(msg) 11 | pass 12 | 13 | def getBool(msg): 14 | if msg == "True": return True 15 | elif msg == "False": return False 16 | raise Exception("msg is %s" % msg) 17 | 18 | def getStatsFromLogFile(filename, path): 19 | # First, organize raw data before calculate aggregate stats 20 | functions = {} 21 | with open(os.path.join(path,filename)) as f: 22 | m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) 23 | 24 | for match in regex.finditer(m): 25 | debugPrint("Found match: %s" % match.group(0)) 26 | blockStats = {} 27 | dagName = match.group(1) 28 | 29 | functionName = dagName.split(':')[0] 30 | if not functionName in functions: 31 | debugPrint("Found function %s" % functionName) 32 | functions[functionName] = {} 33 | 34 | blockStats['staticLB'] = int(match.group(2)) 35 | blockStats['gapSize'] = int(match.group(3)) 36 | blockStats['isEnumerated'] = getBool(match.group(4)) 37 | blockStats['isOptimal'] = getBool(match.group(5)) 38 | blockStats['isPerpHigher'] = getBool(match.group(6)) 39 | 40 | blockName = dagName.split(':')[1] 41 | if blockName in functions[functionName]: 42 | raise Exception("Block %s already exists in function %s!" % (blockName, functionName)) 43 | functions[functionName][blockName] = blockStats 44 | 45 | m.close() 46 | 47 | # Then, calculate aggregate stats per function 48 | benchStats = {} 49 | for functionName in functions: 50 | benchStats[functionName] = {} 51 | totalGapSize = 0 52 | averageGapPercentage = 0 53 | maxGapPercentage = 0 54 | totalEnumerated = 0 55 | totalOptimal = 0 56 | totalOptimalAndEnumerated = 0 57 | totalHigherPerp = 0 58 | totalOptimalHigherPerp = 0 59 | for blockName in functions[functionName]: 60 | blockStats = functions[functionName][blockName] 61 | gapSize = blockStats['gapSize'] 62 | totalGapSize += gapSize 63 | gapPercentage = float(gapSize) / blockStats['staticLB'] 64 | averageGapPercentage += gapPercentage 65 | if gapPercentage > maxGapPercentage: maxGapPercentage = gapPercentage 66 | if blockStats['isOptimal']: totalOptimal += 1 67 | if blockStats['isEnumerated']: totalEnumerated += 1 68 | if gapSize == 0 and blockStats['isEnumerated']: totalOptimalAndEnumerated += 1 69 | if blockStats['isPerpHigher']: 70 | totalHigherPerp += 1 71 | if blockStats['isOptimal']: totalOptimalHigherPerp += 1 72 | functionStats = benchStats[functionName] 73 | functionStats['totalBlocks'] = len(functions[functionName]) 74 | functionStats['totalGapSize'] = totalGapSize 75 | functionStats['averageGapSize'] = float(totalGapSize) / len(functions[functionName]) 76 | functionStats['averageGapPercentage'] = float(averageGapPercentage) / len(functions[functionName]) 77 | functionStats['maxGapPercentage'] = maxGapPercentage 78 | functionStats['totalEnumerated'] = totalEnumerated 79 | functionStats['totalOptimal'] = totalOptimal 80 | functionStats['totalOptimalAndEnumerated'] = totalOptimalAndEnumerated 81 | functionStats['totalHigherPerp'] = totalHigherPerp 82 | functionStats['totalOptimalHigherPerp'] = totalOptimalHigherPerp 83 | 84 | return benchStats 85 | 86 | parser = optparse.OptionParser(description='Wrapper around runspec for collecting spill counts.') 87 | parser.add_option('-p', '--path', 88 | metavar='path', 89 | default=None, 90 | help='Path to log files generated by runspec wrapper.') 91 | args = parser.parse_args()[0] 92 | 93 | 94 | if not os.path.isdir(args.path): 95 | raise Exception("Input path: %s is not a folder" % args.path) 96 | 97 | stats = {} 98 | 99 | for filename in os.listdir(args.path): 100 | benchName = filename.split('.')[0] 101 | stats[benchName] = getStatsFromLogFile(filename, args.path) 102 | 103 | debugPrint(stats) 104 | 105 | 106 | """ 107 | functionStats['totalGapSize'] = totalGapSize 108 | functionStats['averageGapSize'] = float(totalGapSize) / len(functions[functionName]) 109 | functionStats['averageGapPercentage'] = float(averageGapPercentage) / len(functions[functionName]) 110 | functionStats['totalEnumerated'] = totalEnumerated 111 | functionStats['totalOptimal'] = totalOptimal 112 | functionStats['totalHigherPerp'] = totalHigherPerp 113 | functionStats['totalOptimalHigherPerp'] = totalOptimalHigherPerp 114 | 115 | """ 116 | 117 | with open("slilStats.txt", 'w') as f: 118 | for benchName in stats: 119 | f.write("====================\n") 120 | f.write("Benchmark %s\n" % benchName) 121 | f.write("====================\n") 122 | for functionName in stats[benchName]: 123 | f.write(" Function %s\n ----------------\n" % functionName) 124 | f.write(" Total blocks: %d\n" % stats[benchName][functionName]['totalBlocks']) 125 | f.write(" Total gap size: %d\n" % stats[benchName][functionName]['totalGapSize']) 126 | f.write(" Average gap size: %.02f\n" % stats[benchName][functionName]['averageGapSize']) 127 | f.write(" Average percent gap size: %.02f%%\n" % stats[benchName][functionName]['averageGapPercentage']) 128 | f.write(" Maximum percent gap size: %.02f%%\n" % stats[benchName][functionName]['maxGapPercentage']) 129 | f.write(" Enumerated: %d\n" % stats[benchName][functionName]['totalEnumerated']) 130 | f.write(" Optimal: %d\n" % stats[benchName][functionName]['totalOptimal']) 131 | f.write(" Enumerated and zero cost: %d\n" % stats[benchName][functionName]['totalOptimalAndEnumerated']) 132 | f.write(" Higher PERP: %d\n" % stats[benchName][functionName]['totalHigherPerp']) 133 | f.write(" Higher PERP and optimal: %d\n" % stats[benchName][functionName]['totalOptimalHigherPerp']) 134 | -------------------------------------------------------------------------------- /util/SLIL/run-filtered-block-tests.py: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This bash script will run performance tests for the SLIL cost function for the following scenarios: 4 | # 1. Take all blocks 5 | # 2. Take only optimal blocks 6 | # 3. Take only zero-cost blocks 7 | # Each scenario will run the scheduler for all functions and for hot functions. 8 | 9 | SCHED_INI_DIR="/home/chris/csc199/LLVM_DRAGONEGG/Generic/OptSchedCfg/" 10 | 11 | TEST_DIR="/home/chris/csc199/test_118/" 12 | TEST_DIR_SHARED="/home/chris/csc199/test_118_2017_10_15_chris/" 13 | 14 | CPU2006_DIR="/media/ssd0/CPU2006" 15 | CPU2006_USER_DIR="/media/ssd0/chris/spec" 16 | 17 | RUNSPEC_SCRUB="runspec --loose -size=ref -iterations=1 -config=Intel_llvm_3.9_chris.cfg --tune=base -r 1 -I -a scrub all" 18 | 19 | function clean_dirs() { 20 | echo runspec --loose -size=ref -iterations=1 -config=Intel_llvm_3.9_chris.cfg --tune=base -r 1 -I -a scrub all 21 | runspec --loose -size=ref -iterations=1 -config=Intel_llvm_3.9_chris.cfg --tune=base -r 1 -I -a scrub all 22 | echo rm -R $CPU2006_DIR/wrapper* $CPU2006_USER_DIR/result/* 23 | rm -R $CPU2006_DIR/wrapper* $CPU2006_USER_DIR/result/* 24 | } 25 | 26 | # FUNCTION ARGUMENTS: 27 | # $1: sched.ini file that contains the preconfigured settings 28 | # $2: name of test 29 | # $3: subfolder of result 30 | function run_test() { 31 | clean_dirs 32 | 33 | echo cp "$SCHED_INI_DIR/$1" "$SCHED_INI_DIR/sched.ini" 34 | cp "$SCHED_INI_DIR/$1" "$SCHED_INI_DIR/sched.ini" 35 | 36 | echo python runspec-wrapper-chris.py 37 | python runspec-wrapper-chris.py 38 | 39 | echo cp "$SCHED_INI_DIR/$1" "$SCHED_INI_DIR/sched.ini" 40 | cp "$SCHED_INI_DIR/$1" "$SCHED_INI_DIR/sched.ini" 41 | 42 | RESULT_DIR="$TEST_DIR/$3" 43 | RESULT_DIR_SHARED="$TEST_DIR_SHARED/$3" 44 | if [ ! -d "$RESULT_DIR" ]; then 45 | echo mkdir "$RESULT_DIR" 46 | mkdir "$RESULT_DIR" 47 | fi 48 | if [ ! -d "$RESULT_DIR_SHARED" ]; then 49 | echo mkdir "$RESULT_DIR_SHARED" 50 | mkdir "$RESULT_DIR_SHARED" 51 | fi 52 | 53 | echo cp "$CPU2006_DIR/wrapper*" "$SCHED_INI_DIR/sched.ini" "$RESULT_DIR" 54 | cp -R $CPU2006_DIR/wrapper* $SCHED_INI_DIR/sched.ini $RESULT_DIR 55 | 56 | echo cp "$CPU2006_DIR/wrapper*" "$SCHED_INI_DIR/sched.ini" "$RESULT_DIR" 57 | cp $CPU2006_DIR/wrapperStats/*.dat $SCHED_INI_DIR/sched.ini $RESULT_DIR_SHARED 58 | } 59 | 60 | if [ ! -d "$TEST_DIR" ]; then 61 | echo "Output folder $TEST_DIR doesn't exist. Creating it now." 62 | echo mkdir "$TEST_DIR" 63 | mkdir "$TEST_DIR" 64 | fi 65 | 66 | if [ ! -d "$TEST_DIR_SHARED" ]; then 67 | echo mkdir "$TEST_DIR_SHARED" 68 | mkdir "$TEST_DIR_SHARED" 69 | fi 70 | 71 | echo "Using $TEST_DIR to collect log files and stat files." 72 | 73 | echo cd "$CPU2006_DIR" 74 | cd "$CPU2006_DIR" 75 | echo source shrc 76 | source shrc 77 | 78 | run_test "test_cases/sched.peak.20.300.ini" "" "peak_300insts/" 79 | run_test "test_cases/sched.slil.20.300.ini" "" "slil_300insts/" 80 | 81 | run_test "test_cases/sched.peak.20.nolimit.ini" "" "peak_nolimit/" 82 | run_test "test_cases/sched.slil.20.nolimit.ini" "" "slil_nolimit/" 83 | 84 | clean_dirs 85 | -------------------------------------------------------------------------------- /util/aco_analysis/make_pheromone_pdfs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #************************************************************************************** 3 | #Description: Generates a pdf from the '.dot' files that are generated by the use 4 | # of the ACO_DBG_REGIONS sched.ini option 5 | #Author: Paul McHugh 6 | #Last Update: July 24, 2020 7 | #************************************************************************************** 8 | #If run with one argument which is a directory the script will generate the pdfs in 9 | #the selected directory. if no argument is present the script generates pdf in the 10 | #current directory 11 | #Current/selected must directory contain the '.dot' files you want to process 12 | #Requires write permission in the current/selected directory. 13 | 14 | #check to see if we have the proper packages installed 15 | if ! ( dot -V &> /dev/null && pdfunite -v &> /dev/null ) ; 16 | then 17 | echo -e "\e[31mYou are missing either graphviz or poppler-utils\e[m" 18 | echo "Install the necessary packages with the command:" 19 | echo -e "\e[32msudo apt install graphviz poppler-utils\e[m" 20 | exit 21 | fi 22 | 23 | if [ ! -z "$1" ] && [ -d "$1" ]; 24 | then 25 | cd "$1" 26 | fi 27 | 28 | regions=`ls *"@initial.dot" *"@iteration"*.dot | cut -d@ -f1 |uniq` 29 | for region in $regions 30 | do 31 | for file in $region*.dot 32 | do 33 | dot -Tpdf -o ${file%.dot}.pdf $file 34 | done 35 | 36 | rm -f $region.pdf 37 | pdfunite `ls -v $region*.pdf` $region.pdf 38 | done 39 | 40 | echo $regions 41 | tar -czf result_pdfs.tar.gz $(printf '%q.pdf ' $regions) 42 | -------------------------------------------------------------------------------- /util/analyze/__init__.py: -------------------------------------------------------------------------------- 1 | from ._types import Logs, Benchmark, Block 2 | from ._main import parse_args 3 | from .imports import import_cpu2006, import_plaidml, import_shoc, import_utils 4 | from ._utils import * 5 | -------------------------------------------------------------------------------- /util/analyze/_main.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import argparse 3 | import json 4 | import fnmatch 5 | from typing import Callable 6 | 7 | from .imports import * 8 | from ._types import Block, Logs 9 | 10 | 11 | def __load_file(file): 12 | ''' 13 | Load imported log file (imported via one of the import scripts) 14 | ''' 15 | return pickle.load(file) 16 | 17 | 18 | def __load_filepath(filepath): 19 | with open(filepath, 'rb') as f: 20 | return __load_file(f) 21 | 22 | 23 | def block_filter(filter: dict) -> Callable[[Block], bool]: 24 | def log_matches(log, pattern): 25 | if not isinstance(pattern, dict): 26 | if isinstance(pattern, str): 27 | return fnmatch.fnmatchcase(str(log), pattern) 28 | return log == pattern 29 | 30 | return all( 31 | k in log and log_matches(log[k], v) 32 | for k, v in pattern.items() 33 | ) 34 | 35 | def blk_filter_f(blk): 36 | return all( 37 | event in blk and all(log_matches(log, matcher) 38 | for log in blk[event]) 39 | for event, matcher in filter.items() 40 | ) 41 | 42 | return blk_filter_f 43 | 44 | 45 | def parse_args(parser: argparse.ArgumentParser, *names, args=None): 46 | ''' 47 | Parses the argument parser with additional common flags. 48 | 49 | Use parse_args(parser) instead of parser.parse_args() 50 | 51 | Params: 52 | - *names - variadic: the strings specifying which arguments should be parsed. 53 | These should be python_case, not --flag-case. 54 | - args - The argv to parse from. Defaults to parsing sys.argv 55 | ''' 56 | 57 | parser.add_argument( 58 | '--benchsuite', 59 | required=True, 60 | choices=('spec', 'plaidml', 'shoc', 'pickle'), 61 | help='Select the benchmark suite which the input satisfies.', 62 | ) 63 | parser.add_argument( 64 | '--keep-blocks-if', 65 | default='true', 66 | type=json.loads, 67 | help='Keep blocks matching (JSON format)', 68 | ) 69 | 70 | args = parser.parse_args(args) 71 | 72 | FILE_PARSERS = { 73 | 'pickle': __load_filepath, 74 | 'spec': import_cpu2006.parse, 75 | 'plaidml': import_plaidml.parse, 76 | 'shoc': import_shoc.parse, 77 | } 78 | parser = FILE_PARSERS[args.benchsuite] 79 | blk_filter = block_filter(args.keep_blocks_if) if args.keep_blocks_if is not True else True 80 | 81 | args_dict = vars(args) 82 | 83 | # Go through the logs inputs and parse them. 84 | for name in names: 85 | result = parser(args_dict[name]) 86 | if blk_filter is not True: 87 | result = result.keep_blocks_if(blk_filter) 88 | args_dict[name] = result 89 | 90 | return args 91 | -------------------------------------------------------------------------------- /util/analyze/_types.py: -------------------------------------------------------------------------------- 1 | class Logs: 2 | ''' 3 | Abstracts a log file as a collection of benchmarks 4 | 5 | Properties: 6 | - logs.benchmarks: a list of the benchmarks this Logs contains. 7 | ''' 8 | 9 | def __init__(self, benchmarks): 10 | self.benchmarks = benchmarks 11 | 12 | def merge(self, rhs): 13 | ''' 14 | Merges the logs from the rhs into this. 15 | 16 | The rhs must have different benchmarks from this Logs 17 | ''' 18 | in_both = set(self.benchmarks) & set(rhs.benchmarks) 19 | if in_both: 20 | raise ValueError( 21 | 'Cannot merge Logs which share common benchmarks', in_both) 22 | 23 | self.benchmarks += rhs.benchmarks 24 | 25 | return self 26 | 27 | def benchmark(self, name): 28 | ''' 29 | Gets the benchmark with the specified name 30 | ''' 31 | for bench in self.benchmarks: 32 | if bench.name == name: 33 | return bench 34 | 35 | raise KeyError(f'No benchmark `{name}` in this Logs') 36 | 37 | def __iter__(self): 38 | ''' 39 | Iterates over the blocks in every benchmark 40 | ''' 41 | for bench in self.benchmarks: 42 | yield from bench.blocks 43 | 44 | def __repr__(self): 45 | benchmarks = ','.join(b.name for b in self.benchmarks) 46 | return f'' 47 | 48 | def keep_blocks_if(self, p): 49 | return Logs([bench.keep_blocks_if(p) for bench in self.benchmarks]) 50 | 51 | 52 | class Benchmark: 53 | ''' 54 | Abstracts a single benchmark in the logs as a collection of blocks 55 | 56 | Properties: 57 | - bench.name: the name of this benchmark 58 | - bench.info: miscellaneous information about this benchmark 59 | - bench.blocks: the Blocks in this benchmark 60 | ''' 61 | 62 | def __init__(self, info, blocks): 63 | self.name = info['name'] 64 | self.info = info 65 | self.blocks = blocks 66 | 67 | def __iter__(self): 68 | return iter(self.blocks) 69 | 70 | @property 71 | def benchmarks(self): 72 | return (self,) 73 | 74 | def __repr__(self): 75 | return f'' 76 | 77 | def keep_blocks_if(self, p): 78 | return Benchmark(self.info, [blk for blk in self.blocks if p(blk)]) 79 | 80 | 81 | class Block: 82 | ''' 83 | Abstracts a single block in the logs as a collection of log messages 84 | 85 | Handles EVENT logs nicely. 86 | 87 | Properties: 88 | - block.name: the name of this block 89 | - block.info: miscellaneous information about this block 90 | - block.raw_log: the raw log text for this block 91 | - block.events: the events in this block 92 | ''' 93 | 94 | def __init__(self, info, raw_log, events): 95 | self.name = info['name'] 96 | self.info = info 97 | self.raw_log = raw_log 98 | self.events = events 99 | 100 | def single(self, event_name): 101 | ''' 102 | Gets an event with the specified name, requiring exactly one match 103 | 104 | raises AssertionError if there is not exactly one event with the specified name 105 | ''' 106 | result = self.events[event_name] 107 | if len(result) != 1: 108 | raise AssertionError(f'Multiple events for {event_name}') 109 | 110 | return result[0] 111 | 112 | def __getitem__(self, event_name): 113 | ''' 114 | Gets the events with the specified name 115 | ''' 116 | return self.events[event_name] 117 | 118 | def get(self, event_name, default=None): 119 | ''' 120 | Gets the events with the specified name, returning the default if the event doesn't exist 121 | ''' 122 | return self.events.get(event_name, default) 123 | 124 | def __contains__(self, event_name): 125 | return event_name in self.events 126 | 127 | def __iter__(self): 128 | return iter(self.events) 129 | 130 | def __repr__(self): 131 | return f'' 132 | 133 | def uniqueid(self): 134 | return frozenset(self.info.items()) 135 | -------------------------------------------------------------------------------- /util/analyze/_utils.py: -------------------------------------------------------------------------------- 1 | from ._types import * 2 | 3 | 4 | def sum_dicts(ds): 5 | ''' 6 | Sums ds[N]['Key'] for each key for each dict. Assumes each dict has the same keys 7 | E.g. sum_dicts({'a': 1, 'b': 2}, {'a': 2, 'b': 3}) produces {'a': 3, 'b': 5} 8 | ''' 9 | if not ds: 10 | return {} 11 | return {k: sum(d[k] for d in ds) for k in ds[0].keys()} 12 | 13 | 14 | def foreach_bench(analysis_f, *logs, combine=None): 15 | ''' 16 | Repeats `analysis_f` for each benchmark in `logs`. 17 | Also computes the analysis for the entire thing. 18 | If `combine` is given, uses the function to combine it. 19 | Otherwise, runs `analysis_f` over the entire thing (takes quite some time) 20 | 21 | Returns: 22 | A dictionary containing the per-benchmark results. 23 | The keys are the benchmark names. 24 | The run for the entire thing has a key of 'Total' 25 | ''' 26 | 27 | if combine is None: 28 | combine = lambda *args: analysis_f(*logs) 29 | 30 | benchmarks = zip(*[log.benchmarks for log in logs]) 31 | 32 | bench_stats = {bench[0].name: analysis_f(*bench) for bench in benchmarks} 33 | total = combine(bench_stats.values()) 34 | 35 | return { 36 | # Making a new dict so that the "Total" key can be first. 37 | 'Total': total, 38 | **bench_stats, 39 | } 40 | -------------------------------------------------------------------------------- /util/analyze/imports/__init__.py: -------------------------------------------------------------------------------- 1 | from . import import_cpu2006 2 | from . import import_plaidml 3 | from . import import_shoc 4 | from . import import_utils 5 | -------------------------------------------------------------------------------- /util/analyze/imports/import_cpu2006.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import re 5 | 6 | from . import import_utils 7 | 8 | 9 | def parse(file): 10 | assert os.path.isfile( 11 | file), 'Only single-file CPU2006 logs supported at this time' 12 | 13 | with open(file, 'r') as f: 14 | return import_utils.parse_multi_bench_file( 15 | f.read(), 16 | benchstart=re.compile(r'Building (?P\S*)'), 17 | filename=re.compile(r'/[fc]lang\b.*\s(\S+\.\S+)\n')) 18 | 19 | 20 | if __name__ == '__main__': 21 | import_utils.import_main( 22 | parse, 23 | description='Import single-file CPU2006 logs', 24 | ) 25 | -------------------------------------------------------------------------------- /util/analyze/imports/import_plaidml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import pathlib 5 | 6 | from .._types import Logs 7 | from . import import_utils 8 | 9 | 10 | def parse(path): 11 | assert os.path.isdir(path), 'Point to the plaidbench output directory' 12 | 13 | benchmark_output_dir = pathlib.Path(path) 14 | benchmark_dirs = [x for x in benchmark_output_dir.iterdir() if x.is_dir()] 15 | benchmark_dirs = list(sorted(benchmark_dirs, key=lambda p: p.name)) 16 | 17 | result = Logs([]) 18 | 19 | for benchmark_dir in benchmark_dirs: 20 | logfiles = list(benchmark_dir.glob('*.log')) 21 | assert len(logfiles) == 1 22 | 23 | with logfiles[0].open('r') as f: 24 | benchname = benchmark_dir.stem 25 | result.merge( 26 | import_utils.parse_single_bench_file( 27 | f.read(), benchname=benchname) 28 | ) 29 | 30 | return result 31 | 32 | 33 | if __name__ == '__main__': 34 | import_utils.import_main( 35 | parse, 36 | description='Import plaidbench directories', 37 | ) 38 | -------------------------------------------------------------------------------- /util/analyze/imports/import_shoc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import re 5 | import pathlib 6 | 7 | from .._types import Logs 8 | from . import import_utils 9 | 10 | 11 | def parse(path): 12 | assert os.path.isdir(path), \ 13 | 'Point to the SHOC output directory (not the Logs/)' 14 | 15 | benchmark_output_dir = pathlib.Path(path) 16 | benchmark_logs_dir = benchmark_output_dir / 'Logs' 17 | assert benchmark_logs_dir.is_dir() 18 | 19 | benchmarks = list(sorted(benchmark_logs_dir.glob( 20 | 'dev?_*.err'), key=lambda p: p.name)) 21 | 22 | result = Logs([]) 23 | benchname_re = re.compile(r'dev._(.*)(\.err)?') 24 | 25 | for benchmark in benchmarks: 26 | with benchmark.open('r') as f: 27 | benchname = benchname_re.search(benchmark.stem).group(1) 28 | result.merge( 29 | import_utils.parse_single_bench_file( 30 | f.read(), benchname=benchname) 31 | ) 32 | 33 | return result 34 | 35 | 36 | if __name__ == '__main__': 37 | import_utils.import_main( 38 | parse, 39 | description='Import SHOC directories', 40 | ) 41 | -------------------------------------------------------------------------------- /util/analyze/imports/import_utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import json 3 | import itertools 4 | import re 5 | import sys 6 | from collections import namedtuple 7 | 8 | from .._types import Logs, Benchmark, Block 9 | 10 | _RE_REGION_INFO = re.compile(r'EVENT:.*ProcessDag.*"name": "(?P[^"]*)"') 11 | 12 | 13 | def import_main(parsefn, *, description): 14 | import argparse 15 | parser = argparse.ArgumentParser(description=description) 16 | parser.add_argument( 17 | '-o', '--output', required=True, help='The output file to write the imported log format to') 18 | parser.add_argument('input', help='The input logs to process') 19 | args = parser.parse_args() 20 | 21 | result = parsefn(args.input) 22 | 23 | with open(args.output, 'wb') as f: 24 | pickle.dump(result, f) 25 | 26 | 27 | def parse_multi_bench_file(logtext, *, benchstart, filename=None): 28 | benchmarks = [] 29 | for benchm, nextm in _splititer(benchstart, logtext): 30 | bench = _parse_benchmark(benchm.groupdict(), logtext, 31 | benchm.end(), nextm.start(), 32 | filenamere=filename) 33 | benchmarks.append(bench) 34 | 35 | return Logs(benchmarks) 36 | 37 | 38 | def parse_single_bench_file(logtext, *, benchname, filename=None): 39 | return Logs([ 40 | _parse_benchmark( 41 | {'name': benchname}, 42 | logtext, 0, len(logtext), 43 | filenamere=filename, 44 | ) 45 | ]) 46 | 47 | 48 | _FileInfo = namedtuple('_FileInfo', ('filename', 'from_pos')) 49 | 50 | 51 | def _each_cons(iterable, n): 52 | ''' 53 | Iterates over each consecutive n items of the iterable. 54 | 55 | _each_cons((1, 2, 3, 4), 2) # (1, 2), (2, 3), (3, 4) 56 | ''' 57 | iters = [None] * n 58 | iters[0] = iter(iterable) 59 | for i in range(1, n): 60 | iters[i - 1], iters[i] = itertools.tee(iters[i - 1]) 61 | next(iters[i], None) 62 | return zip(*iters) 63 | 64 | 65 | class _DummyEnd: 66 | def __init__(self, length): 67 | self._end = length - 1 68 | 69 | def start(self): 70 | return self._end 71 | 72 | def end(self): 73 | return self._end 74 | 75 | 76 | def _splititer(regex, text, pos=0, endpos=None): 77 | ''' 78 | 'Splits' the string by the regular expression, using an iterable. 79 | Returns both where the regex matches and where it matched next (or the end). 80 | ''' 81 | if endpos is None: 82 | endpos = len(text) - 1 83 | 84 | return _each_cons( 85 | itertools.chain(regex.finditer(text, pos, endpos), 86 | (_DummyEnd(endpos + 1),)), 87 | 2 88 | ) 89 | 90 | 91 | def _parse_benchmark(info, logtext: str, start, end, *, filenamere): 92 | NAME = info['name'] 93 | 94 | blocks = [] 95 | 96 | if filenamere and filenamere.search(logtext, start, end): 97 | files = [ 98 | *(_FileInfo(filename=r.group(1), from_pos=r.end()) 99 | for r in filenamere.finditer(logtext, start, end)), 100 | _FileInfo(filename=None, from_pos=len(logtext)), 101 | ][::-1] 102 | else: 103 | files = [ 104 | _FileInfo(filename=None, from_pos=start), 105 | _FileInfo(filename=None, from_pos=len(logtext)), 106 | ][::-1] 107 | 108 | blocks = [] 109 | 110 | for regionm, nextm in _splititer(_RE_REGION_INFO, logtext, start, end): 111 | assert regionm.end() > files[-1].from_pos 112 | if regionm.end() > files[-2].from_pos: 113 | files.pop() 114 | 115 | try: 116 | filename = files[-1].filename 117 | except NameError: 118 | filename = None 119 | 120 | regioninfo = { 121 | 'name': regionm['name'], 122 | 'file': filename, 123 | 'benchmark': NAME, 124 | } 125 | block = _parse_block(regioninfo, logtext, 126 | regionm.start() - 1, nextm.start()) 127 | blocks.append(block) 128 | 129 | return Benchmark(info, blocks) 130 | 131 | 132 | def _parse_block(info, logtext: str, start, end): 133 | events = _parse_events(logtext, start, end) 134 | raw_log = logtext[start:end] 135 | 136 | return Block(info, raw_log, events) 137 | 138 | 139 | _RE_EVENT_LINE = re.compile(r'\nEVENT: (.*)') 140 | 141 | 142 | def _parse_events(block_log, start=0, end=None): 143 | ''' 144 | Returns a `dict[event_id --> list[event-json]]` of the events in the given log. 145 | 146 | `EVENT: {"event_id": "some_id", "value"}` 147 | becomes `{"some_id": [{"event_id": "some_id", "arg": "value"}, ...], ...}` 148 | 149 | If there is only one event of each id, pass the result through 150 | `parse_as_singular_events(...)` to unwrap the lists. 151 | ''' 152 | if end is None: 153 | end = len(block_log) 154 | 155 | event_lines = _RE_EVENT_LINE.findall(block_log, start, end) 156 | events = '[' + ',\n'.join(event_lines) + ']' 157 | 158 | try: 159 | parsed = json.loads(events) 160 | except json.JSONDecodeError: 161 | print(events, file=sys.stderr) 162 | raise 163 | 164 | result = dict() 165 | 166 | for log in parsed: 167 | result.setdefault(log['event_id'], []).append(log) 168 | 169 | return result 170 | -------------------------------------------------------------------------------- /util/analyze/lib/compile_times.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import csv 4 | import re 5 | import argparse 6 | import sys 7 | 8 | import analyze 9 | from analyze import Block, foreach_bench 10 | 11 | 12 | def _block_time(block: Block): 13 | end = block.single('ScheduleVerifiedSuccessfully')['time'] 14 | start = block.single('ProcessDag')['time'] 15 | return end - start 16 | 17 | 18 | def instruction_scheduling_time(logs): 19 | return sum(_block_time(blk) for blk in logs) 20 | 21 | 22 | def total_compile_time_seconds(logs): 23 | last_logs = logs.benchmarks[-1].blocks[-1].raw_log 24 | m = re.search(r'(\d+) total seconds elapsed', last_logs) 25 | assert m, \ 26 | 'Logs must contain "total seconds elapsed" output by the SPEC benchmark suite' 27 | 28 | return m.group(1) 29 | 30 | 31 | if __name__ == '__main__': 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument('--variant', choices=('sched', 'total'), 34 | help='Which timing variant to use') 35 | parser.add_argument('logs', help='The logs to analyze') 36 | args = analyze.parse_args(parser, 'logs') 37 | 38 | fn = total_compile_time_seconds if args.variant == 'total' else instruction_scheduling_time 39 | results = foreach_bench(fn, args.logs, combine=sum) 40 | writer = csv.DictWriter(sys.stdout, fieldnames=results.keys()) 41 | writer.writeheader() 42 | writer.writerow(results) 43 | -------------------------------------------------------------------------------- /util/misc/count-boundary-spills.py: -------------------------------------------------------------------------------- 1 | # /bin/python3 2 | # Run this script with a CPU2006 logfile as the only argument. 3 | # When using RegAllocFast, find the total number of spills and the proportion of 4 | # those spills that are added at region and block boundaries. 5 | 6 | import re 7 | import sys 8 | 9 | RE_FUNC = re.compile('Function: (.*?)\n') 10 | RE_TOTAL_SPILLS = re.compile('END FAST RA: Number of spills: (\d+)\n') 11 | RE_CALL_BOUNDARY_STORES = re.compile('Call Boundary Stores in function: (\d+)\n') 12 | RE_BLOCK_BOUNDARY_STORES = re.compile('Block Boundary Stores in function: (\d+)\n') 13 | RE_LIVE_IN_LOADS = re.compile('Live-In Loads in function: (\d+)\n') 14 | 15 | totalSpills = 0 16 | totalCallBoundaryStores = 0 17 | totalBlockBoundaryStores = 0 18 | totalLiveInLoads = 0 19 | totalFuncs = 0 20 | #funcs = {} 21 | 22 | if __name__ == '__main__': 23 | with open(sys.argv[1]) as inputLog: 24 | for line in inputLog.readlines(): 25 | searchTotalSpills = RE_TOTAL_SPILLS.findall(line) 26 | searchCallBoundaryStores = RE_CALL_BOUNDARY_STORES.findall(line) 27 | searchBlockBoundaryStores = RE_BLOCK_BOUNDARY_STORES.findall(line) 28 | searchLiveInLoads = RE_LIVE_IN_LOADS.findall(line) 29 | # TDOD remove 30 | #searchFunc = RE_FUNC.findall(line) 31 | #if searchFunc != []: 32 | # if searchFunc[0] in funcs: 33 | # print(searchFunc[0] + 'Is a copy') 34 | # else: 35 | # funcs[searchFunc[0]] = 0 36 | if searchTotalSpills != []: 37 | totalSpills += int(searchTotalSpills[0]) 38 | totalFuncs+=1 39 | elif searchCallBoundaryStores != []: 40 | totalCallBoundaryStores += int(searchCallBoundaryStores[0]) 41 | elif searchBlockBoundaryStores != []: 42 | totalBlockBoundaryStores += int(searchBlockBoundaryStores[0]) 43 | elif searchLiveInLoads != []: 44 | totalLiveInLoads += int(searchLiveInLoads[0]) 45 | 46 | print("Total Spills: " + str(totalSpills)) 47 | print("Total Call Boundary Stores: " + str(totalCallBoundaryStores)) 48 | print("Total Block Boundary Stores: " + str(totalBlockBoundaryStores)) 49 | print("Total Live-In Loads: " + str(totalLiveInLoads)) 50 | print("Total funcs: " + str(totalFuncs)) 51 | -------------------------------------------------------------------------------- /util/misc/count-nodes.py: -------------------------------------------------------------------------------- 1 | import re 2 | import mmap 3 | import optparse 4 | import os 5 | import json 6 | 7 | NODE_COUNT_RE = re.compile(r'EVENT: (.*"event_id": "NodeExamineCount".*)') 8 | 9 | def getNodeCount(fileName): 10 | count = 0 11 | with open(fileName) as bff: 12 | bffm = mmap.mmap(bff.fileno(), 0, access=mmap.ACCESS_READ) 13 | 14 | for match in NODE_COUNT_RE.finditer(bffm): 15 | count += json.loads(match.group(1))['count'] 16 | 17 | bffm.close() 18 | 19 | return count 20 | 21 | parser = optparse.OptionParser( 22 | description='Wrapper around runspec for collecting spill counts.') 23 | parser.add_option('-p', '--path', 24 | metavar='path', 25 | default=None, 26 | help='Log file.') 27 | parser.add_option('--isfolder', 28 | action='store_true', 29 | help='Specify if parsing a folder.') 30 | 31 | args = parser.parse_args()[0] 32 | 33 | total = 0 34 | 35 | if args.isfolder: 36 | if not os.path.isdir(args.path): 37 | raise Error("Please specify a valid folder.") 38 | for filename in os.listdir(args.path): 39 | total += getNodeCount(os.path.join(args.path, filename)) 40 | else: 41 | if not os.path.isfile(args.path): 42 | raise Error("Please specify a valid log file.") 43 | total += getNodeCount(args.path) 44 | 45 | print(total) 46 | -------------------------------------------------------------------------------- /util/misc/ddg2dot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import sys 4 | import re 5 | 6 | parser = argparse.ArgumentParser(description='Convert data_dep WriteToFile format to a .dot file') 7 | parser.add_argument('input', help='The WriteToFile format file to convert. Input a single hyphen (-) to read from stdin') 8 | parser.add_argument('-o', '--output', help='The destination to write to. Defaults to stdout') 9 | parser.add_argument('--filter-weights', nargs='*', default=[], help='filter out weights with the respective values') 10 | parser.add_argument( 11 | '--base', help='Consider the edges from this other .ddg when layouting. Those edges will be made invisible.') 12 | 13 | args = parser.parse_args() 14 | 15 | if args.input == '-': 16 | infile = sys.stdin 17 | else: 18 | infile = open(args.input, 'r') 19 | 20 | filtered_weights = set(int(x) for x in args.filter_weights) 21 | 22 | text = infile.read() 23 | infile.close() 24 | 25 | if args.base: 26 | with open(args.base) as f: 27 | basetext = f.read() 28 | else: 29 | basetext = '' 30 | 31 | NODE_RE = re.compile(r'node (?P\d+) "(?P.*?)"(\s*"(?P.*?)")?') 32 | EDGE_RE = re.compile(r'dep (?P\d+) (?P\d+) "(?P.*?)" (?P\d+)') 33 | 34 | # Holds the resulting strings as a list of the lines. 35 | result = ['digraph G {\n'] 36 | 37 | # Create the nodes in the graph 38 | for match in NODE_RE.finditer(text): 39 | num = match['number'] 40 | name = match['name'] 41 | if name == 'artificial': # Prettify entry/exit names 42 | name = ['exit', 'entry'][match['other_name'] == '__optsched_entry'] 43 | 44 | # Add the node to the graph. Include a node to make it clear what this is 45 | result.append(f' n{num} [label="{name}:n{num}"];\n') 46 | 47 | result.append('\n') 48 | 49 | 50 | def create_edge_attrs(**attrs): 51 | if not attrs: 52 | return '' 53 | attrtext = ' '.join(f'{key}="{value}"' for key, value in attrs.items()) 54 | return f' [{attrtext}]' 55 | 56 | 57 | def create_label(filtered_weights, weight, type_): 58 | # The additional label text if we want to display the weight 59 | # (that is, if the weight is not filtered out) 60 | weight_label = '' if int(weight) in filtered_weights else ':' + weight 61 | # The actual label text 62 | return weight_label if type_ == 'data' else f'{type_}{weight_label}' 63 | 64 | 65 | def create_edge(from_, to, **attrs): 66 | return f' n{from_} -> n{to}{create_edge_attrs(**attrs)};\n' 67 | 68 | 69 | edges = set() 70 | 71 | # Create the edges in the graph 72 | for match in EDGE_RE.finditer(text): 73 | from_ = match['from'] 74 | to = match['to'] 75 | type_ = match['type'] 76 | weight = match['weight'] 77 | 78 | result.append( 79 | create_edge( 80 | from_, to, 81 | label=create_label(filtered_weights, weight, type_), 82 | ) 83 | ) 84 | edges.add((from_, to)) 85 | 86 | for match in EDGE_RE.finditer(basetext): 87 | from_ = match['from'] 88 | to = match['to'] 89 | type_ = match['type'] 90 | weight = match['weight'] 91 | 92 | if (from_, to) not in edges: 93 | result.append( 94 | create_edge( 95 | from_, to, 96 | label=create_label(filtered_weights, weight, type_), 97 | style="invis", 98 | ) 99 | ) 100 | 101 | # Graph is now finished: 102 | result.append('}\n') 103 | 104 | filecontents = ''.join(result) 105 | 106 | if args.output: 107 | with open(args.output, 'w') as f: 108 | print(filecontents, file=f) 109 | else: 110 | print(filecontents) 111 | -------------------------------------------------------------------------------- /util/misc/extract-script.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import os, sys 4 | import shutil 5 | import re 6 | 7 | parser = argparse.ArgumentParser( 8 | description='Extract a standalone version of an OptSched script') 9 | parser.add_argument( 10 | 'script', help='The path to script to extract a standalone version of') 11 | parser.add_argument( 12 | 'output', help='The output file to write the extracted script to') 13 | parser.add_argument('--optsched', help='The path to the OptSched directory, ' 14 | 'if this extract-script.py is not in its original location') 15 | 16 | args = parser.parse_args() 17 | 18 | OPTSCHED_ROOT = args.optsched if args.optsched else os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 19 | COMMON_FNS = os.path.join(OPTSCHED_ROOT, 'util', 'readlogs', '__init__.py') 20 | 21 | with open(args.script, 'r') as f: 22 | script = f.read() 23 | 24 | with open(COMMON_FNS, 'r') as f: 25 | readlogs = f.read() 26 | 27 | 28 | def replace_module(modulename, modulecontent, script): 29 | return re.sub( 30 | r'^(?:(?:\s*from\s+{0}\s+import.*)|(?:\s*import\s+{0}.*))$'.format(re.escape(modulename)), 31 | modulecontent, script, flags=re.MULTILINE) 32 | 33 | script = script.replace('sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n', '') 34 | script = replace_module('readlogs', readlogs, script) 35 | 36 | if 'readlogs' in script: 37 | sys.exit('Failed to make {} standalone. The "readlogs" library couldn\'t be' 38 | ' replaced.'.format(args.script)) 39 | 40 | if os.path.isdir(args.output): 41 | # Allow cp-like behavior of "copy to this directory" rather than requiring a 42 | # name for the script. 43 | output = os.path.join(args.output, os.path.basename(args.script)) 44 | else: 45 | output = args.output 46 | # Allow placing in a non-existent directory 47 | os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True) 48 | 49 | with open(output, 'w') as f: 50 | f.write(script) 51 | # Try to keep all permissions 52 | shutil.copystat(args.script, output) 53 | -------------------------------------------------------------------------------- /util/misc/findblock.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import argparse 4 | 5 | parser = argparse.ArgumentParser(description='Search spills.dat (from runspec-wrapper) to find the benchmark for a block') 6 | parser.add_argument('spills', help='The spills.dat file to search in. - for stdin') 7 | parser.add_argument('blocks', help='The blocks to search for. This may include the `:##` part, or it may just be the mangled function name', nargs='*') 8 | 9 | result = parser.parse_args() 10 | 11 | with open(result.spills, 'r') as f: 12 | file = f.read() 13 | 14 | fns = (block.split(':')[0] for block in result.blocks) 15 | 16 | fn_locs = [file.find(fn) for fn in fns] 17 | fn_benchmarks = [file.rfind(':', 0, fnindex) for fnindex in fn_locs] 18 | fn_benchmark_spans = [(file.rfind('\n', 0, e), e) for e in fn_benchmarks] 19 | fn_benchmarks = [file[b + 1:e] for (b, e) in fn_benchmark_spans] 20 | 21 | print('\n'.join(fn_benchmarks)) 22 | -------------------------------------------------------------------------------- /util/misc/func-stats.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | # Find the number of functions that are compiled more than once by LLVM. 3 | 4 | import sys 5 | import re 6 | import json 7 | 8 | def get_events_of_id(logs, event_id): 9 | event_start = 'EVENT: {"event_id": "{}"'.format(event_id) 10 | lines = logs.splitlines() 11 | event_lines = [line.split(' ', 1)[1] for line in lines if line.startswith(event_start)] 12 | return list(map(json.loads, event_lines)) 13 | 14 | RE_NEW_BENCH = re.compile(r'(\d+)\.(.*) base \.exe default') 15 | RE_BLOCK = re.compile(r'INFO: Processing DAG (.*) with (\d+) insts') 16 | 17 | if __name__ == "__main__": 18 | with open(sys.argv[1]) as logfile: 19 | blocks = {} 20 | bench = None 21 | totalRepeats = 0 22 | totalMismatches = 0 23 | for line in logfile.readlines(): 24 | matchBench = RE_NEW_BENCH.findall(line) 25 | matchBlock = get_events_of_id(line) 26 | 27 | if matchBench != []: 28 | if bench: 29 | print('In bench ' + bench + ' found ' + str(totalRepeats) + ' repeat blocks and ' + str(totalMismatches) + ' mismatches in length.') 30 | funcs = {} 31 | totalRepeats = 0 32 | totalMismatches = 0 33 | bench = matchBench[0][1] 34 | 35 | elif matchBlock != []: 36 | name = matchBlock[0][0] 37 | insts = matchBlock[0][1] 38 | 39 | if name in blocks: 40 | if blocks[name] != insts: 41 | totalMismatches += 1 42 | 43 | totalRepeats += 1 44 | continue 45 | else: 46 | blocks[name] = insts 47 | -------------------------------------------------------------------------------- /util/misc/rp-compare.py: -------------------------------------------------------------------------------- 1 | #/usr/bin/python3 2 | # Calculate how often OptSched's register pressure estimates match LLVM's 3 | # You must compile OptSched with IS_DEBUG_PEAK_PRESSURE flag enabled. 4 | 5 | import sys 6 | import re 7 | 8 | # The number of register types. 9 | MAX_REG_TYPES = 30 10 | 11 | RP_OPT_INFO = re.compile(r'INFO: OptSchPeakRegPres Index (\d+) Name (.+) Peak (\d+) Limit (\d+)') 12 | RP_AFT_INFO = re.compile(r'INFO: PeakRegPresAfter Index (\d+) Name (.+) Peak (\d+) Limit (\d+)') 13 | RP_DAG_NAME = re.compile(r'INFO: Processing DAG (.+) with') 14 | 15 | totalBlocks = 0 16 | totalMismatches = 0 17 | majorMismatches = 0 18 | 19 | with open(str(sys.argv[1])) as logfile: 20 | log = logfile.read() 21 | blocks = log.split("INFO: ********** Opt Scheduling **********") 22 | 23 | for block in blocks: 24 | optSchedPressures = [None]*MAX_REG_TYPES 25 | llvmPressures = [None]*MAX_REG_TYPES 26 | if (len(RP_DAG_NAME.findall(block)) == 0): 27 | continue; 28 | 29 | totalBlocks+=1 30 | blockName = RP_DAG_NAME.findall(block)[0] 31 | 32 | for matchOpt in RP_OPT_INFO.finditer(block): 33 | index = int(matchOpt.group(1)) 34 | name = matchOpt.group(2) 35 | peak = matchOpt.group(3) 36 | limit = matchOpt.group(4) 37 | optSchedPressures[index] = {} 38 | optSchedPressures[index]['name'] = name 39 | optSchedPressures[index]['peak'] = peak 40 | optSchedPressures[index]['limit'] = limit 41 | 42 | for matchLLVM in RP_AFT_INFO.finditer(block): 43 | index = int(matchLLVM.group(1)) 44 | name = matchLLVM.group(2) 45 | peak = matchLLVM.group(3) 46 | limit = matchLLVM.group(4) 47 | llvmPressures[index] = {} 48 | llvmPressures[index]['name'] = name 49 | llvmPressures[index]['peak'] = peak 50 | llvmPressures[index]['limit'] = limit 51 | 52 | for i in range(MAX_REG_TYPES): 53 | optP = optSchedPressures[i] 54 | llvmP = llvmPressures[i] 55 | 56 | if (optP['peak'] != llvmP['peak']): 57 | print('Mismatch in block ' + blockName + '.') 58 | print('Reg type with mismatch ' + optP['name'] + \ 59 | ' Limit ' + optP['limit'] + ' Peak OptSched ' + optP['peak'] + \ 60 | ' Peak LLVM ' + llvmP['peak'] + '.') 61 | totalMismatches+=1 62 | # A major mismatch occurs when peak pressure is over physical limit. 63 | if (max(int(optP['peak']), int(llvmP['peak'])) > int(optP['limit'])): 64 | print('Major mismatch!') 65 | majorMismatches+=1 66 | 67 | print('Total blocks processed ' + str(totalBlocks) + '.') 68 | print('Total mismatches ' + str(totalMismatches) + '.') 69 | print('Total major mismatches ' + str(majorMismatches) + '.') 70 | -------------------------------------------------------------------------------- /util/misc/spill-compare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # Compare two log files using the OptSched scheduler with simulate register 3 | # allocation enabled. Find instances where a reduction in cost does not 4 | # correspond with a reduction in spills. 5 | 6 | import os, sys 7 | 8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | from readlogs import * 10 | 11 | regions = {} 12 | totalBlocks = 0 13 | totalMismatches = 0 14 | lowestLength = sys.maxsize 15 | smallestFoundRegion = '' 16 | foundRegion = False 17 | 18 | 19 | with open(str(sys.argv[1])) as logfile: 20 | log1 = logfile.read() 21 | blocks = [block for block in parse_blocks(log1) if 'BestResult' in block] 22 | for block in blocks: 23 | if not 'CostLowerBound' not in block: 24 | print("WARNING: Block does not have a logged lower bound. Skipping block: " + block, 25 | out=sys.stderr) 26 | continue 27 | 28 | totalBlocks += 1 29 | 30 | lowerBound = block['CostLowerBound']['cost'] 31 | bestCostInfo = block['BestResult'] 32 | regionName = bestCostInfo['name'] 33 | regionCostBest = bestCostInfo['cost'] 34 | regionLengthBest = bestCostInfo['length'] 35 | 36 | if 'BestLocalRegAllocSimulation' not in block: 37 | print(regionName) 38 | 39 | regionCostHeuristic = block['HeuristicResult']['spill_cost'] 40 | regionSpillsBest = block['BestLocalRegAllocSimulation']['num_spills'] 41 | regionSpillsHeuristic = block['HeuristicLocalRegAllocSimulation']['num_spills'] 42 | 43 | if regionCostBest < regionCostHeuristic and regionSpillsBest > regionSpillsHeuristic: 44 | totalMismatches+=1 45 | print("Found Region: " + regionName + " With Length: " + str(regionLengthBest)) 46 | print("Best Cost: " + str(regionCostBest) + " Heuristic Cost: " + str(regionCostHeuristic)) 47 | print("Best Cost (Absolute): " + (lowerBound + regionCostBest)) 48 | print("Best Spills: " + str(regionSpillsBest) + " Heurisitc Spills: " + str(regionSpillsHeuristic)) 49 | if regionLengthBest < lowestLength: 50 | foundRegion = True 51 | smallestFoundRegion = regionName 52 | lowestLength = regionLengthBest 53 | 54 | if (foundRegion): 55 | print("Smallest region with mismatch is: " + str(smallestFoundRegion) + " with length " + str(lowestLength)) 56 | 57 | print("Processed " + str(totalBlocks) + " blocks") 58 | print("Found " + str(totalMismatches) + " mismatches") 59 | -------------------------------------------------------------------------------- /util/misc/spill-count-csv.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | import os 4 | 5 | BenchmarkRegex = re.compile(r'(.*?):$') 6 | FunctionRegex = re.compile(r' +(\d+) (.*?)$') 7 | DEBUG = False 8 | 9 | def debugPrint(str): 10 | if DEBUG: print(str) 11 | 12 | if len(sys.argv) != 2: 13 | raise Exception("Invalid number of arguments. Expected 1") 14 | 15 | if not os.path.isfile(sys.argv[1]): 16 | raise Exception("%s is not a file!" % sys.argv[1]) 17 | 18 | with open(sys.argv[1]) as f: 19 | benchName = "" 20 | for line in f: 21 | match = BenchmarkRegex.match(line) 22 | if not match is None: 23 | benchName = match.group(1) 24 | debugPrint("Found benchmark %s" % benchName) 25 | continue 26 | match = FunctionRegex.match(line) 27 | if not match is None: 28 | debugPrint("Found function %s with %d spills" % (match.group(2), int(match.group(1)))) 29 | sys.stdout.write("%s,%s,%d\n" % (benchName, match.group(2), int(match.group(1)))) 30 | else: 31 | debugPrint("Not a match: %s" % line) 32 | -------------------------------------------------------------------------------- /util/plaidbench/run-plaidbench.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import subprocess 3 | import argparse 4 | import os 5 | 6 | #************************************************************************************** 7 | #Description: Run all plaidbench benchmarks and redirect output to a directory 8 | # that will contain the log file for each benchmark. 9 | #Author: Austin Kerbow 10 | #Modified By: Justin Bassett 11 | #Last Update: May 4, 2020 12 | #************************************************************************************** 13 | # Requires write permission in the current directory. 14 | # 15 | # OUTPUT: 16 | # 1.) Directories containing the log for each benchmark located in their own 17 | # directory. 18 | 19 | NETWORKS = ( 20 | "densenet121", 21 | "densenet169", 22 | "densenet201", 23 | "inception_resnet_v2", 24 | "inception_v3", 25 | "mobilenet", 26 | "nasnet_large", 27 | "nasnet_mobile", 28 | "resnet50", 29 | "vgg16", 30 | "vgg19", 31 | "xception", 32 | "imdb_lstm", 33 | ) 34 | 35 | EXAMPLES = 4096 36 | BATCH_SIZE = 16 37 | 38 | parser = argparse.ArgumentParser(description='Run all plaidbench benchmarks, redirecting output to a directory which contains the log file for each benchmark') 39 | parser.add_argument('-n', '--num-iterations', type=int, default=1, help='Number of iterations') 40 | parser.add_argument('output', metavar='DIR', help='The output directory base path') 41 | 42 | args = parser.parse_args() 43 | 44 | NUM_ITERATIONS = args.num_iterations 45 | DIRECTORY_BASE_PATH = args.output 46 | 47 | for i in range(NUM_ITERATIONS): 48 | DIR_NAME = DIRECTORY_BASE_PATH + '-' + str(i) 49 | 50 | for network in NETWORKS: 51 | RESULT_DIR = os.path.join(DIR_NAME, network) 52 | os.makedirs(RESULT_DIR, exist_ok=True) 53 | 54 | with open(os.path.join(RESULT_DIR, network + '.log'), 'w') as outfile: 55 | subprocess.run(['plaidbench', '--examples', str(EXAMPLES), 56 | '--batch-size', str(BATCH_SIZE), 57 | '--results', DIR_NAME, 58 | 'keras', '--no-fp16', '--no-train', network, 59 | ], check=True, stderr=subprocess.STDOUT, stdout=outfile) 60 | -------------------------------------------------------------------------------- /util/readlogs/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | def split_blocks(log): 4 | ''' 5 | Splits the log into the individual blocks. 6 | ''' 7 | return log.split("INFO: ********** Opt Scheduling **********")[1:] 8 | 9 | def parse_events(block_log): 10 | ''' 11 | Returns a `dict[event_id --> list[event-json]]` of the events in the given log. 12 | 13 | `EVENT: {"event_id": "some_id", "value"}` 14 | becomes `{"some_id": [{"event_id": "some_id", "arg": "value"}, ...], ...}` 15 | 16 | If there is only one event of each id, pass the result through 17 | `parse_as_singular_events(...)` to unwrap the lists. 18 | ''' 19 | lines = block_log.splitlines() 20 | event_lines = [line.split(' ', 1)[1] for line in lines if line.startswith('EVENT:')] 21 | parsed = list(map(json.loads, event_lines)) 22 | result = dict() 23 | 24 | for log in parsed: 25 | result.setdefault(log['event_id'], []).append(log) 26 | 27 | return result 28 | 29 | def parse_blocks(log): 30 | ''' 31 | Splits the block into individual blocks and parses each block via parse_events(). 32 | ''' 33 | return [parse_events(block) for block in split_blocks(log)] 34 | 35 | def keep_only_singular_events(logs): 36 | ''' 37 | Converts a the event `dict[event_id --> list[event-json]]` to 38 | `dict[event_id --> event-json]` dropping any event which has a duplicated event_id. 39 | ''' 40 | result = dict() 41 | for k, v in logs.items(): 42 | if len(v) == 1: result[k] = v[0] 43 | return result 44 | 45 | def keep_only_first_event(logs): 46 | ''' 47 | Converts a the event `dict[event_id --> list[event-json]]` to 48 | `dict[event_id --> event-json]` keeping only the first of any event for a given event_id. 49 | ''' 50 | result = dict() 51 | for k, v in logs.items(): 52 | result[k] = v[0] 53 | return result 54 | 55 | def parse_as_singular_events(logs): 56 | ''' 57 | Converts a the event `dict[event_id --> list[event-json]]` to 58 | `dict[event_id --> event-json]` requiring exactly one event per event_id. 59 | ''' 60 | for k, v in logs.items(): 61 | if len(v) != 1: raise AssertionError('Duplicate log events for event ' + k) 62 | return {k: v[0] for k, v in logs.items()} 63 | --------------------------------------------------------------------------------