├── .flake8 ├── .github └── workflows │ ├── build_and_upload_wheels.yaml │ ├── test-for-pandas-3.yml │ └── unit-tests.yaml ├── .gitignore ├── .gitlab-ci.yml ├── .readthedocs.yml ├── CITATION.cff ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── NOTICE ├── README.md ├── build_requirements.sh ├── codecov.yml ├── docs ├── Makefile ├── _pygments │ └── style.py ├── _static │ ├── 2024_08_08_Thicket_Tutorial.pdf │ └── custom.css ├── composing_parallel_sorting_data.ipynb ├── conf.py ├── developer_guide.rst ├── environment.yml ├── extrap-with-metadata-aggregated.ipynb ├── generating_data.rst ├── getting_started.rst ├── groupby_aggregate.ipynb ├── images │ ├── Table-Tree-Revised-gradien.png │ ├── appended_statsdf.png │ ├── empty_statsdf.png │ ├── ensembleframe.png │ ├── metadataframe.png │ ├── ql-original.png │ ├── thicket-tutorial-slide-preview.png │ ├── thicket_gifs_and_source_vids │ │ ├── metadata_changing_axis.gif │ │ ├── metadata_color_encoding_and_subselecting.gif │ │ ├── metadata_selecting_data.gif │ │ ├── metadata_vis_load.gif │ │ └── topdown_analysis.gif │ ├── thicket_tutorial_34_1.png │ ├── thicket_tutorial_36_1.png │ └── thicket_tutorial_37_1.png ├── index.rst ├── modeling_parallel_sorting_data.ipynb ├── nsight_compute.ipynb ├── publications.rst ├── query_language.ipynb ├── requirements.txt ├── source │ ├── modules.rst │ ├── thicket.external.rst │ ├── thicket.rst │ ├── thicket.stats.rst │ └── thicket.vis.rst ├── stats-functions.ipynb ├── thicket_rajaperf_clustering.ipynb ├── thicket_tutorial.ipynb ├── tma_speedup_clustering.ipynb ├── tutorial_materials.rst ├── user_guide.rst └── vis_docs.rst ├── install.sh ├── license.py ├── logo-notext.png ├── pyproject.toml ├── pytest.ini ├── requirements.txt ├── setup.py └── thicket ├── __init__.py ├── ensemble.py ├── external ├── __init__.py └── console.py ├── groupby.py ├── helpers.py ├── model_extrap.py ├── ncu.py ├── query.py ├── stats ├── __init__.py ├── calc_boxplot_statistics.py ├── calc_temporal_pattern.py ├── check_normality.py ├── confidence_interval.py ├── correlation_nodewise.py ├── display_boxplot.py ├── display_heatmap.py ├── display_histogram.py ├── display_violinplot.py ├── distance.py ├── maximum.py ├── mean.py ├── median.py ├── minimum.py ├── percentiles.py ├── preference.py ├── scoring.py ├── stats_utils.py ├── std.py ├── sum.py ├── ttest.py └── variance.py ├── tests ├── conftest.py ├── data │ ├── caliper-ordered │ │ └── 230525-151052_1930517_eWbGeyrlBOPT.cali │ ├── example-json │ │ └── user_ensemble.json │ ├── example-timeseries │ │ ├── cxx.cali │ │ ├── mem_power_timeseries.cali │ │ ├── memory_timeseries.cali │ │ └── timeseries.cali │ ├── mpi_scaling_cali │ │ ├── 125_cores.cali │ │ ├── 216_cores.cali │ │ ├── 27_cores.cali │ │ ├── 343_cores.cali │ │ └── 64_cores.cali │ └── rajaperf │ │ ├── lassen │ │ ├── clang10.0.1_nvcc10.2.89_1048576 │ │ │ ├── 1 │ │ │ │ ├── Base_CUDA-block_128.cali │ │ │ │ └── Base_CUDA-block_256.cali │ │ │ ├── 2 │ │ │ │ └── Base_CUDA-block_128.cali │ │ │ ├── 3 │ │ │ │ └── Base_CUDA-block_128.cali │ │ │ ├── 4 │ │ │ │ └── Base_CUDA-block_128.cali │ │ │ ├── 5 │ │ │ │ └── Base_CUDA-block_128.cali │ │ │ ├── 6 │ │ │ │ └── Base_CUDA-block_128.cali │ │ │ ├── 7 │ │ │ │ └── Base_CUDA-block_128.cali │ │ │ ├── 8 │ │ │ │ └── Base_CUDA-block_128.cali │ │ │ ├── 9 │ │ │ │ └── Base_CUDA-block_128.cali │ │ │ └── 10 │ │ │ │ └── Base_CUDA-block_128.cali │ │ └── clang10.0.1_nvcc10.2.89_2097152 │ │ │ └── 1 │ │ │ ├── Base_CUDA-block_128.cali │ │ │ └── Base_CUDA-block_256.cali │ │ └── quartz │ │ ├── gcc10.3.1_1048576 │ │ └── O3 │ │ │ ├── 1 │ │ │ └── Base_Seq-default.cali │ │ │ ├── 2 │ │ │ └── Base_Seq-default.cali │ │ │ ├── 3 │ │ │ └── Base_Seq-default.cali │ │ │ └── 4 │ │ │ └── Base_Seq-default.cali │ │ └── gcc10.3.1_2097152 │ │ └── O3 │ │ └── 1 │ │ └── Base_Seq-default.cali ├── test_add_root_node.py ├── test_caliperreader.py ├── test_concat_thickets.py ├── test_copy.py ├── test_display.py ├── test_ensemble.py ├── test_filter_metadata.py ├── test_filter_profile.py ├── test_filter_stats.py ├── test_from_statsframes.py ├── test_get_node.py ├── test_groupby.py ├── test_intersection.py ├── test_json_interface.py ├── test_model_extrap.py ├── test_ncu.py ├── test_pickle.py ├── test_query.py ├── test_query_stats.py ├── test_reader_dispatch.py ├── test_stats.py ├── test_thicket.py ├── test_timeseries.py ├── test_tree.py └── utils.py ├── thicket.py ├── utils.py ├── version.py └── vis ├── __init__.py ├── package.json ├── scripts ├── globals.js ├── pcp │ ├── datautil.js │ ├── globals.js │ ├── pcp.js │ ├── scatter.js │ ├── stackedarea.js │ └── store.js ├── topdown │ ├── stackedbars.js │ └── topdown.js └── treetable.js ├── static ├── pcp_bundle.html ├── pcp_bundle.js ├── topdown_bundle.html ├── topdown_bundle.js ├── treetable_bundle.html └── treetable_bundle.js ├── static_fixer.py ├── templates ├── pcp.html ├── topdown.html └── treetable.html ├── visualizations.py └── webpack.config.js /.flake8: -------------------------------------------------------------------------------- 1 | # -*- conf -*- 2 | # flake8 settings for thicket 3 | # 4 | # These are the flake8 settings recommended by Black 5 | # See https://github.com/psf/black 6 | # 7 | [flake8] 8 | max-line-length = 88 9 | select = C,E,F,W,B,B950 10 | ignore = E501,W503,E203 11 | builtins = IPython 12 | exclude = 13 | .eggs, 14 | .git, 15 | .hg, 16 | .mypy_cache, 17 | .tox, 18 | .venv, 19 | _build, 20 | buck-out, 21 | build, 22 | dist 23 | -------------------------------------------------------------------------------- /.github/workflows/build_and_upload_wheels.yaml: -------------------------------------------------------------------------------- 1 | name: Build wheels for Thicket 2 | 3 | on: 4 | # Uncomment for testing through a PR 5 | # pull_request: 6 | # branches: [develop, releases/**] 7 | workflow_dispatch: 8 | release: 9 | types: 10 | - published 11 | push: 12 | tags: 13 | - 'v**' 14 | 15 | jobs: 16 | 17 | # TODO: if we ever add compiled code to Thicket (e.g., Cython modules), 18 | # remove the build_wheels job 19 | build_wheels: 20 | name: Build wheels for Linux 21 | runs-on: ubuntu-latest 22 | strategy: 23 | matrix: 24 | python-version: [3.6, 3.7, 3.8, 3.9, '3.10', 3.11] 25 | steps: 26 | - uses: actions/checkout@v3 27 | 28 | - name: Set up Python ${{ matrix.python-version }} 29 | uses: actions/setup-python@v2 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | 33 | - name: Get PyPA build 34 | run: python -m pip install build 35 | 36 | - name: Build wheel for Python ${{ matrix.python_version }} 37 | run: python -m build -w 38 | 39 | - uses: actions/upload-artifact@v3 40 | with: 41 | path: dist/*.whl 42 | 43 | # TODO: if we ever add compiled code to Thicket (e.g., Cython modules), 44 | # uncomment these steps to build wheel files 45 | # 46 | # build_wheels_linux_3: 47 | # name: Build wheels for Linux 48 | # runs-on: ubuntu-latest 49 | # steps: 50 | # - uses: actions/checkout@v3 51 | 52 | # - name: Set up QEMU to support non-x86 architectures 53 | # uses: docker/setup-qemu-action@v2 54 | # with: 55 | # platforms: all 56 | 57 | # - uses: pypa/cibuildwheel@v2.12.1 58 | # env: 59 | # CIBW_SKIP: pp* *musllinux* 60 | # CIBW_ARCHS_LINUX: auto aarch64 ppc64le 61 | 62 | # - uses: actions/upload-artifact@v3 63 | # with: 64 | # path: ./wheelhouse/*.whl 65 | 66 | # build_wheels_linux_27: 67 | # name: Build wheels for Python 2.7 on Linux 68 | # runs-on: ubuntu-latest 69 | # steps: 70 | # - uses: actions/checkout@v3 71 | 72 | # # Neeed to use cibuildwheel 1 for Python 2.7 73 | # - uses: pypa/cibuildwheel@v1.12.0 74 | # env: 75 | # CIBW_SKIP: pp* 76 | # CIBW_ARCHS_LINUX: auto 77 | # CIBW_PROJECT_REQUIRES_PYTHON: "~=2.7" 78 | 79 | # - uses: actions/upload-artifact@v3 80 | # with: 81 | # path: ./wheelhouse/*.whl 82 | 83 | # TODO: uncomment if/when we decide to build wheels for macOS 84 | # build_wheels_macos_36_37: 85 | # name: Build wheels for Python 3.6 and 3.7 on macOS 86 | # runs-on: macos-12 87 | # steps: 88 | # - uses: actions/checkout@v3 89 | 90 | # - uses: pypa/cibuildwheel@v2.12.1 91 | # env: 92 | # CIBW_SKIP: pp* 93 | # CIBW_ARCHS_MACOS: x86_64 94 | # CIBW_PROJECT_REQUIRES_PYTHON: ">=3.6,<3.8" 95 | 96 | # - uses: actions/upload-artifact@v3 97 | # with: 98 | # path: ./wheelhouse/*.whl 99 | 100 | # build_wheels_macos_38_plus: 101 | # name: Build wheels for Python 3.8+ on macOS 102 | # runs-on: macos-12 103 | # steps: 104 | # - uses: actions/checkout@v3 105 | 106 | # - uses: pypa/cibuildwheel@v2.12.1 107 | # env: 108 | # CIBW_SKIP: pp* 109 | # CIBW_ARCHS_MACOS: x86_64 universal2 arm64 110 | # CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8" 111 | 112 | # - uses: actions/upload-artifact@v3 113 | # with: 114 | # path: ./wheelhouse/*.whl 115 | 116 | # build_wheels_macos_27: 117 | # name: Build wheels for Python 2.7 on macOS 118 | # runs-on: macos-12 119 | # steps: 120 | # - uses: actions/checkout@v3 121 | 122 | # # Neeed to use cibuildwheel 1 for Python 2.7 123 | # - uses: pypa/cibuildwheel@v1.12.0 124 | # env: 125 | # CIBW_SKIP: pp* 126 | # CIBW_BUILD: cp27-macosx_x86_64 127 | 128 | # - uses: actions/upload-artifact@v3 129 | # with: 130 | # path: ./wheelhouse/*.whl 131 | 132 | build_sdist: 133 | name: Build sdist 134 | runs-on: ubuntu-latest 135 | steps: 136 | - uses: actions/checkout@v3 137 | 138 | - name: Get PyPA build 139 | run: python -m pip install build 140 | 141 | - name: Build sdist 142 | run: python -m build -s 143 | 144 | - uses: actions/upload-artifact@v3 145 | with: 146 | path: dist/*.tar.gz 147 | 148 | test_upload_to_pypi: 149 | needs: 150 | # - build_wheels_linux_3 151 | # - build_wheels_linux_27 152 | # - build_wheels_macos_36_37 153 | # - build_wheels_macos_38_plus 154 | # - build_wheels_macos_27 155 | - build_wheels 156 | - build_sdist 157 | runs-on: ubuntu-latest 158 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 159 | steps: 160 | - uses: actions/download-artifact@v3 161 | with: 162 | name: artifact 163 | path: dist 164 | 165 | - uses: pypa/gh-action-pypi-publish@v1.5.0 166 | with: 167 | user: __token__ 168 | password: ${{ secrets.THICKET_TEST_PYPI_API_TOKEN }} 169 | repository_url: https://test.pypi.org/legacy/ 170 | 171 | upload_to_pypi: 172 | needs: 173 | # - build_wheels_linux_3 174 | # - build_wheels_linux_27 175 | # - build_wheels_macos_36_37 176 | # - build_wheels_macos_38_plus 177 | # - build_wheels_macos_27 178 | - build_wheels 179 | - build_sdist 180 | runs-on: ubuntu-latest 181 | if: github.event_name == 'release' && github.event.action == 'published' 182 | steps: 183 | - uses: actions/download-artifact@v3 184 | with: 185 | name: artifact 186 | path: dist 187 | 188 | - uses: pypa/gh-action-pypi-publish@v1.5.0 189 | with: 190 | user: __token__ 191 | password: ${{ secrets.PYPI_API_TOKEN }} 192 | -------------------------------------------------------------------------------- /.github/workflows/test-for-pandas-3.yml: -------------------------------------------------------------------------------- 1 | name: Run unit tests with Pandas 3.0 2 | 3 | on: 4 | push: 5 | branches: [ develop ] 6 | pull_request: 7 | branches: [ develop, releases/** ] 8 | 9 | jobs: 10 | run_pandas_3_unit_tests: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - name: Set up Python 3.11 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: 3.11 21 | 22 | - name: Set up Node 23 | uses: actions/setup-node@v4 24 | with: 25 | node-version: node 26 | 27 | - name: Install Pandas 3.0 (i.e., Nightly) 28 | run: | 29 | python3 -m pip install --upgrade pip pytest 30 | python3 -m pip install --upgrade --force-reinstall --pre --extra-index \ 31 | https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas[pyarrow] 32 | 33 | - name: Install Python 3 dependencies 34 | run: | 35 | python3 -m pip install git+https://github.com/LLNL/hatchet.git@develop 36 | python3 -m pip install -r requirements.txt 37 | python3 -m pip install . 38 | python3 -m pip list 39 | 40 | # From here on, I'm making use of the trick from the following 41 | # GitHub repo to not fail the full CI/CD pipeline if this fails: 42 | # https://github.com/burningmantech/ranger-ims-server/pull/1347/files 43 | - name: Basic test with PyTest 44 | id: test 45 | run: | 46 | set +e # Prevent immediate exit if pytest reports errors 47 | PYTHONPATH=. $(which pytest) 48 | status=$? 49 | if [ ${status} -ne 0 ]; then 50 | echo "==================================" 51 | echo "Unit tests with Pandas 3.0 FAILED!" 52 | echo "==================================" 53 | echo "::warning::Optional CI test with Pandas 3 failed" 54 | echo "optional_fail=true" >> "${GITHUB_OUTPUT}" 55 | echo "optional_fail_status=${status}" >> "${GITHUB_OUTPUT}" 56 | else 57 | echo "==================================" 58 | echo "Unit tests with Pandas 3.0 PASSED!" 59 | echo "==================================" 60 | fi 61 | exit 0 62 | 63 | - name: Find issue comment 64 | if: github.event_name == 'push' 65 | uses: peter-evans/find-comment@v3 66 | id: find_comment 67 | with: 68 | issue-number: 221 69 | comment-author: 'github-actions[bot]' 70 | body-includes: Pandas 3.0 Unit Tests 71 | 72 | - name: Add comment to PR if test failed 73 | if: github.event_name == 'push' && steps.test.outputs.optional_fail == 'true' 74 | uses: peter-evans/create-or-update-comment@v4 75 | with: 76 | comment-id: ${{ steps.find_comment.outputs.comment-id }} 77 | issue-number: 221 78 | body: | 79 | ### Pandas 3.0 Unit Tests Failed! 80 | Due to breaking changes in Pandas 3.0 (namely, copy-on-write), we are performing optional 81 | tests of Thicket against the nightly release of Pandas 3.0. 82 | 83 | This is not a full testing failure, and it will not prevent your PR from being merged 84 | at this time. However, as we prepare for the release of Pandas 3.0, we encourage 85 | all developers to design their code to work with Pandas 3.0, if possible. 86 | 87 | Pytest status code: ${{ steps.test.outputs.optional_fail_status }} 88 | Action log: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} 89 | edit-mode: 'replace' 90 | 91 | - name: Add comment to PR if test passed 92 | if: github.event_name == 'push' && steps.test.outputs.optional_fail == 'false' 93 | uses: peter-evans/create-or-update-comment@v4 94 | with: 95 | comment-id: ${{ steps.find_comment.outputs.comment-id }} 96 | issue-number: 221 97 | body: | 98 | ### Pandas 3.0 Unit Tests Passed! 99 | Due to breaking changes in Pandas 3.0 (namely, copy-on-write), we are performing optional 100 | tests of Thicket against the nightly release of Pandas 3.0. 101 | 102 | This PR passed unit tests when run with Pandas 3.0! 103 | edit-mode: 'replace' -------------------------------------------------------------------------------- /.github/workflows/unit-tests.yaml: -------------------------------------------------------------------------------- 1 | name: unit tests 2 | 3 | on: 4 | push: 5 | branches: [ develop ] 6 | pull_request: 7 | branches: [ develop, releases/** ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | matrix: 15 | # TODO: add macos-latest 16 | os: [ubuntu-latest] 17 | python-version: [3.8, 3.9, "3.10", "3.11"] 18 | exclude: 19 | - os: macos-latest 20 | python-version: [3.5, 3.6] 21 | 22 | steps: 23 | - uses: actions/checkout@v2 24 | 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | 30 | - name: Set up Node 31 | uses: actions/setup-node@v4 32 | with: 33 | # Obtain the latest cached version of npm (either local cache or action cache) 34 | node-version: node 35 | 36 | - name: Install Python3 dependencies 37 | run: | 38 | python -m pip install --upgrade pip pytest 39 | pip install -r requirements.txt 40 | python -m pip install --upgrade --force-reinstall git+https://github.com/LLNL/hatchet.git@develop 41 | python setup.py install 42 | python setup.py build_ext --inplace 43 | python -m pip list 44 | 45 | - name: Update Black 46 | if: ${{ matrix.python-version == 3.8 }} 47 | run: | 48 | pip install flake8-pytest-importorskip 49 | pip install --upgrade click==8.0.4 50 | pip install black==21.12b0 51 | pip install flake8==4.0.1 52 | 53 | - name: Lint and Format Check with Flake8 and Black 54 | if: ${{ matrix.python-version == 3.8 }} 55 | run: | 56 | black --diff --check . 57 | flake8 58 | 59 | - name: Check License Headers 60 | run: | 61 | python license.py verify 62 | 63 | - name: Install ExtraP 64 | if: ${{ matrix.python-version >= 3.7 }} 65 | run: | 66 | pip install extrap 67 | 68 | - name: Install coverage tools 69 | run: | 70 | pip install codecov 71 | pip install pytest-cov 72 | 73 | - name: Basic Test with pytest 74 | run: | 75 | PYTHONPATH=. $(which pytest) --cov=./ --cov-report=xml 76 | 77 | - name: Upload coverage to Codecov 78 | uses: codecov/codecov-action@v4 79 | env: 80 | CODECOV_TOKEN: ${{ secrets.THICKET_CODECOV_TOKEN }} 81 | with: 82 | directory: ./coverage/reports/ 83 | env_vars: OS,PYTHON 84 | files: /home/runner/work/thicket/thicket/coverage.xml 85 | flags: unittests 86 | verbose: true 87 | fail_ci_if_error: true 88 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Always ignored 2 | *__pycache__/ 3 | *.ipynb_checkpoints 4 | *build/ 5 | *dist/ 6 | *.egg-info/ 7 | vis/node_modules/ 8 | 9 | # Profiling 10 | *.pstats 11 | 12 | # Testing 13 | *out*.txt 14 | 15 | # Build 16 | *build/ 17 | *dist/ 18 | *.egg-info/ 19 | 20 | **/node_modules/ 21 | **/package-lock.json 22 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | PYTHON_ENVIRONMENT_PATH: .venv 3 | PIP_CACHE_DIR: .cache/pip 4 | FF_ENABLE_JOB_CLEANUP: "true" 5 | 6 | stages: 7 | - environment 8 | - format 9 | - build-and-test 10 | - cleanup 11 | 12 | # Create a python env and cache it 13 | # Note: The cache needs to be removed manually in CI if the python environment 14 | # is changed, by clicking "CI/CD - Pipelines - Clear Runner Caches". 15 | configure_python: 16 | variables: 17 | GIT_STRATEGY: none 18 | tags: 19 | - shell 20 | - catalyst 21 | stage: environment 22 | script: 23 | - /usr/tce/packages/python/python-3.7.2/bin/virtualenv -p /usr/tce/packages/python/python-3.7.2/bin/python3 ${PYTHON_ENVIRONMENT_PATH} 24 | - . ${PYTHON_ENVIRONMENT_PATH}/bin/activate 25 | - PYTHON_EXECUTABLE=$(which python3) 26 | - ${PYTHON_EXECUTABLE} -m pip install matplotlib pandas numpy glob2 27 | - ${PYTHON_EXECUTABLE} -m pip install pytest 28 | - ${PYTHON_EXECUTABLE} -m pip install click==8.0.4 29 | - ${PYTHON_EXECUTABLE} -m pip install black==21.12b0 30 | - ${PYTHON_EXECUTABLE} -m pip install flake8==4.0.1 31 | - ${PYTHON_EXECUTABLE} -m pip install cython multiprocess textX caliper-reader 32 | cache: 33 | paths: 34 | - ${PYTHON_ENVIRONMENT_PATH} 35 | - ${PIP_CACHE_DIR} 36 | 37 | .venv: 38 | cache: 39 | paths: 40 | - ${PYTHON_ENVIRONMENT_PATH} 41 | - ${PIP_CACHE_DIR} 42 | policy: pull 43 | 44 | lint-format-check: 45 | tags: 46 | - shell 47 | - catalyst 48 | extends: .venv 49 | stage: format 50 | script: 51 | - . ${PYTHON_ENVIRONMENT_PATH}/bin/activate 52 | - $(which python3) --version 53 | - $(which python3) -m flake8 --version 54 | - $(which python3) -m flake8 55 | - $(which python3) -m black --check --diff . 56 | 57 | check-license-headers: 58 | tags: 59 | - shell 60 | - catalyst 61 | extends: .venv 62 | stage: format 63 | script: 64 | - . ${PYTHON_ENVIRONMENT_PATH}/bin/activate 65 | - $(which python3) --version 66 | - $(which python3) license.py verify 67 | 68 | build-and-test-py37: 69 | tags: 70 | - shell 71 | - catalyst 72 | stage: build-and-test 73 | extends: .venv 74 | script: 75 | - . ${PYTHON_ENVIRONMENT_PATH}/bin/activate 76 | - $(which python3) -m pip install --ignore-installed --no-deps -r requirements.txt 77 | - $(which python3) setup.py install 78 | - PYTHONPATH=. $(which python3) -m pytest 79 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | #.readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | build: 9 | os: "ubuntu-latest" 10 | tools: 11 | python: "3.11" 12 | 13 | # Build documentation in the docs/ directory with Sphinx 14 | sphinx: 15 | configuration: docs/conf.py 16 | 17 | # We recommend specifying your dependencies to enable reproducible builds: 18 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 19 | python: 20 | install: 21 | - requirements: docs/requirements.txt 22 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: Thicket 3 | message: "If you use Thicket, please cite it as below." 4 | repository-code: https://github.com/llnl/thicket 5 | preferred-citation: 6 | type: conference-paper 7 | doi: 10.1145/3588195.3592989 8 | url: https://github.com/llnl/thicket 9 | authors: 10 | - family-names: Brink 11 | given-names: Stephanie 12 | - family-names: McKinsey 13 | given-names: Michael 14 | - family-names: Boehme 15 | given-names: David 16 | - family-names: Scully-Allison 17 | given-names: Connor 18 | - family-names: Lumsden 19 | given-names: Ian 20 | - family-names: Hawkins 21 | given-names: Daryl 22 | - family-names: Burgess 23 | given-names: Treece 24 | - family-names: Lama 25 | given-names: Vanessa 26 | - family-names: Luettgau 27 | given-names: Jakob 28 | - family-names: Isaacs 29 | given-names: Katherine E. 30 | - family-names: Taufer 31 | given-names: Michela 32 | - family-names: Pearce 33 | given-names: Olga 34 | title: "Thicket: Seeing the Performance Experiment Forest for the Individual Run Trees" 35 | conference: 36 | name: "International Symposium on High-Performance Parallel and Distributed Computing" 37 | city: "Orlando" 38 | region: "Florida" 39 | country: "USA" 40 | date-start: 2023-06-20 41 | date-end: 2023-06-23 42 | year: 2023 43 | notes: LLNL-CODE-834749 44 | publisher: 45 | name: ACM 46 | city: "New York" 47 | region: "New York" 48 | country: "USA" 49 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Thicket 2 | 3 | This document is intented for developers who want to add new features or 4 | bugfixes to Thicket. It assumes you have some familiarity with Git and GitLab. 5 | It will discuss what a good merge request looks like, and the tests that your 6 | merge request must pass before it can be merged into Thicket. 7 | 8 | ## Forking Thicket 9 | 10 | First, you should create a fork. This will create a copy of the Thicket 11 | repository that you own, and will ensure you can push your changes up to GitLab 12 | and create merge requests. 13 | 14 | ## Developing a New Feature 15 | 16 | New features should be based on the `develop` branch. When you want to create a 17 | new feature, first ensure you have an up-to-date copy of the `develop` branch: 18 | 19 | $ git fetch origin 20 | $ git checkout develop 21 | $ git merge --ff-only origin/develop 22 | 23 | You can now create a new branch to develop your feature on: 24 | 25 | $ git checkout -b feature/ 26 | 27 | Proceed to develop your feature on this branch, and add tests that will 28 | utilize your new code. If you are creating new methods or classes, please add 29 | code comments. 30 | 31 | Once your feature is complete and your tests are passing, you can push your 32 | branch to your fork on GitLab and create a merge request. 33 | 34 | ## Developing a Bug Fix 35 | 36 | First, check if the change you want to make has been fixed in `develop`. If so, 37 | we suggest you either start using the `develop` branch, or temporarily apply 38 | the fix to whichever version of Thicket you are using. 39 | 40 | Assuming there is an unsolved bug, first make sure you have an up-to-date copy 41 | of the develop branch: 42 | 43 | $ git fetch origin 44 | $ git checkout develop 45 | $ git merge --ff-only origin/develop 46 | 47 | Then create a new branch for your bugfix: 48 | 49 | $ git checkout -b bugfix/ 50 | 51 | First, add a test that reproduces the bug you have found. Then develop your 52 | bugfix as normal, and make sure the test shows the bugfix has been resolved. 53 | 54 | Once you are finished, you can push your branch to your fork on GitLab, then 55 | create a merge request. 56 | 57 | ## Creating a Pull Request 58 | 59 | You can create a new merge request 60 | [here](https://github.com/llnl/thicket/pulls). Ensure that 61 | your merge request base is the `develop` branch of Thicket. 62 | 63 | Add a short, descriptive title explaining the bugfix or the feature you have 64 | added, and put a longer description of the changes you have made in the 65 | description box. 66 | 67 | Once your merge request has been created, it will be run through our automated 68 | tests and also be reviewed by Thicket developers. Providing the branch passes 69 | both the tests and reviews, it will be merged into Thicket. 70 | 71 | ## Tests 72 | 73 | Thicket uses GitLab for continuous integration tests. Our tests are 74 | automatically run against every new pull request, and passing all tests is a 75 | requirement for merging your merge request. If you are developing a bugfix or a 76 | new feature, please add a test that checks the correctness of your new code. 77 | 78 | Thicket's unit tests can be found in the `test` directory and are split up by 79 | component. 80 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022, Lawrence Livermore National Security, LLC. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and associated documentation files (the "Software"), 5 | to deal in the Software without restriction, including without limitation 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom the 8 | Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include NOTICE 3 | include thicket/vis/package.json 4 | include thicket/vis/webpack.config.js 5 | recursive-include thicket/vis/scripts * 6 | recursive-include thicket/vis/static * 7 | recursive-include thicket/vis/templates * 8 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | This work was produced under the auspices of the U.S. Department of 2 | Energy by Lawrence Livermore National Laboratory under Contract 3 | DE-AC52-07NA27344. 4 | 5 | This work was prepared as an account of work sponsored by an agency of 6 | the United States Government. Neither the United States Government nor 7 | Lawrence Livermore National Security, LLC, nor any of their employees 8 | makes any warranty, expressed or implied, or assumes any legal liability 9 | or responsibility for the accuracy, completeness, or usefulness of any 10 | information, apparatus, product, or process disclosed, or represents that 11 | its use would not infringe privately owned rights. 12 | 13 | Reference herein to any specific commercial product, process, or service 14 | by trade name, trademark, manufacturer, or otherwise does not necessarily 15 | constitute or imply its endorsement, recommendation, or favoring by the 16 | United States Government or Lawrence Livermore National Security, LLC. 17 | 18 | The views and opinions of authors expressed herein do not necessarily 19 | state or reflect those of the United States Government or Lawrence 20 | Livermore National Security, LLC, and shall not be used for advertising 21 | or product endorsement purposes. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # thicket Thicket 2 | 3 | [![Build Status](https://github.com/llnl/thicket/actions/workflows/unit-tests.yaml/badge.svg)](https://github.com/llnl/thicket/actions) 4 | [![codecov.io](https://codecov.io/github/LLNL/thicket/coverage.svg?branch=develop)](https://codecov.io/github/LLNL/thicket?branch=develop) 5 | [![Read the Docs](http://readthedocs.org/projects/thicket/badge/?version=latest)](http://thicket.readthedocs.io) 6 | [![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 7 | 8 | # Thicket 9 | 10 | A Python-based toolkit for Exploratory Data Analysis (EDA) of parallel performance data 11 | that enables performance optimization and understanding of applications’ performance on 12 | supercomputers. It bridges the performance tool gap between being able to consider only 13 | a single instance of a simulation run (e.g., single platform, single measurement tool, 14 | or single scale) and finding actionable insights in multi-dimensional, multi-scale, 15 | multi-architecture, and multi-tool performance datasets. You can find detailed 16 | documentation, along with tutorials of Thicket in the 17 | [ReadtheDocs](https://thicket.readthedocs.io/en/latest/). 18 | 19 | ### Installation 20 | 21 | To use thicket, install it with pip: 22 | 23 | ``` 24 | $ pip install llnl-thicket 25 | ``` 26 | 27 | Or, if you want to develop with this repo directly, run the install script from the 28 | root directory, which will build the package and add the cloned directory to 29 | your `PYTHONPATH`: 30 | 31 | ``` 32 | $ source install.sh 33 | ``` 34 | 35 | ### Contact Us 36 | 37 | You can direct any feature requests or questions to the Lawrence Livermore National 38 | Lab's Thicket development team by emailing either Stephanie Brink (brink2@llnl.gov) 39 | or Olga Pearce (pearce8@llnl.gov). 40 | 41 | ### Contributing 42 | 43 | To contribute to Thicket, please open a [pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests) to the `develop` branch. Your pull request must pass Thicket's unit tests, and must be [PEP 8](https://peps.python.org/pep-0008/) compliant. Please open issues for questions, feature requests, or bug reports. 44 | 45 | Authors and citations 46 | --------------------- 47 | Many thanks to Thicket's [contributors](https://github.com/llnl/thicket/graphs/contributors). 48 | 49 | Thicket was created by Olga Pearce and Stephanie Brink. 50 | 51 | To cite Thicket, please use the following citation: 52 | 53 | * Stephanie Brink, Michael McKinsey, David Boehme, Connor Scully-Allison, Ian Lumsden, Daryl Hawkins, Treece Burgess, Vanessa Lama, Jakob Lüttgau, Katherine E. Isaacs, Michela Taufer, and Olga Pearce. 2023. Thicket: Seeing the Performance Experiment Forest for the Individual Run Trees. In the 32nd International Symposium on High-Performance Parallel and Distributed Computing (HPDC'23), August 2023, Pages 281–293. [doi.org/10.1145/3588195.3592989](https://doi.org/10.1145/3588195.3592989). 54 | 55 | On GitHub, you can copy this citation in APA or BibTeX format via the "Cite this 56 | repository" button. Or, see [CITATION.cff](https://github.com/llnl/thicket/blob/develop/CITATION.cff) for the raw BibTeX. 57 | 58 | ### License 59 | 60 | Thicket is distributed under the terms of the MIT license. 61 | 62 | All contributions must be made under the MIT license. Copyrights in the 63 | Thicket project are retained by contributors. No copyright assignment is 64 | required to contribute to Thicket. 65 | 66 | See [LICENSE](https://github.com/llnl/thicket/blob/develop/LICENSE) and 67 | [NOTICE](https://github.com/llnl/thicket/blob/develop/NOTICE) for details. 68 | 69 | SPDX-License-Identifier: MIT 70 | 71 | LLNL-CODE-834749 72 | -------------------------------------------------------------------------------- /build_requirements.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | CURR_PY_INTERPRETER="python3" 6 | 7 | if [[ ! -z "${PYTHON_INTERPRETER}" ]]; then 8 | CURR_PY_INTERPRETER=${PYTHON_INTERPRETER} 9 | fi 10 | 11 | ${CURR_PY_INTERPRETER} setup.py egg_info > /dev/null 12 | mv ./llnl_thicket.egg-info/requires.txt . 13 | mv requires.txt requirements.txt 14 | rm -rf ./llnl_thicket.egg-info/ 15 | 16 | echo "New requirements.txt file generated!" 17 | echo "Before trying to use it, remove any 'extras' headers" 18 | echo "that might have been added." 19 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | round: down 3 | range: "65...100" 4 | 5 | comment: 6 | layout: "header, diff, changes, tree" 7 | behavior: new 8 | require_changes: false 9 | require_base: false 10 | require_head: true 11 | after_n_builds: 4 12 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | # Minimal makefile for Sphinx documentation 7 | # 8 | 9 | # You can set these variables from the command line. 10 | SPHINXOPTS = 11 | SPHINXBUILD = sphinx-build 12 | SPHINXPROJ = thicket 13 | SOURCEDIR = . 14 | BUILDDIR = _build 15 | 16 | # Put it first so that "make" without argument is like "make help". 17 | help: 18 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 19 | 20 | .PHONY: help Makefile 21 | 22 | # Catch-all target: route all unknown targets to Sphinx using the new 23 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 24 | %: Makefile 25 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 26 | -------------------------------------------------------------------------------- /docs/_pygments/style.py: -------------------------------------------------------------------------------- 1 | # The name of the Pygments (syntax highlighting) style to use. 2 | from pygments.styles.default import DefaultStyle 3 | from pygments.token import Generic 4 | 5 | 6 | # modifications to the default style 7 | class ThicketStyle(DefaultStyle): 8 | styles = DefaultStyle.styles.copy() 9 | background_color = "#f4f4f8" 10 | styles[Generic.Output] = "#355" 11 | styles[Generic.Prompt] = "bold #346ec9" 12 | -------------------------------------------------------------------------------- /docs/_static/2024_08_08_Thicket_Tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/_static/2024_08_08_Thicket_Tutorial.pdf -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | div.prompt.empty.docutils.container{ 2 | display: none; 3 | } 4 | 5 | .wy-nav-content { 6 | min-width: 90%; 7 | } -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | # -*- coding: utf-8 -*- 7 | 8 | # Configuration file for the Sphinx documentation builder. 9 | # 10 | # This file does only contain a selection of the most common options. For a 11 | # full list see the documentation: 12 | # http://www.sphinx-doc.org/en/master/config 13 | 14 | # -- Path setup -------------------------------------------------------------- 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | 20 | import sys 21 | 22 | sys.path.insert(0, "..") 23 | 24 | # -- Project information ----------------------------------------------------- 25 | 26 | project = "thicket" 27 | copyright = "2022, Lawrence Livermore National Security, LLC" 28 | author = "LLNL Developers" 29 | 30 | # -- General configuration --------------------------------------------------- 31 | 32 | # If your documentation needs a minimal Sphinx version, state it here. 33 | # 34 | # needs_sphinx = '1.0' 35 | 36 | # Add any Sphinx extension module names here, as strings. They can be 37 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 38 | # ones. 39 | extensions = [ 40 | "sphinx.ext.autodoc", 41 | "sphinx.ext.napoleon", 42 | "sphinx_thebe", 43 | "myst_parser", 44 | "nbsphinx", 45 | "sphinx_rtd_theme", 46 | ] 47 | 48 | thebe_config = { 49 | "repository_url": "https://github.com/LLNL/thicket-tutorial.git", 50 | } 51 | 52 | # The suffix(es) of source filenames. 53 | # You can specify multiple suffix as a list of string: 54 | # 55 | source_suffix = [".rst", ".md"] 56 | 57 | # The language for content autogenerated by Sphinx. Refer to documentation 58 | # for a list of supported languages. 59 | # 60 | # This is also used if you do content translation via gettext catalogs. 61 | # Usually you set "language" from the command line for these cases. 62 | # language = None 63 | 64 | # List of patterns, relative to source directory, that match files and 65 | # directories to ignore when looking for source files. 66 | # This pattern also affects html_static_path and html_extra_path . 67 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 68 | 69 | 70 | # -- Options for HTML output ------------------------------------------------- 71 | 72 | # The theme to use for HTML and HTML Help pages. See the documentation for 73 | # a list of builtin themes. 74 | # 75 | html_theme = "sphinx_rtd_theme" 76 | 77 | 78 | # Theme options are theme-specific and customize the look and feel of a theme 79 | # further. For a list of options available for each theme, see the 80 | # documentation. 81 | # 82 | html_theme_options = { 83 | "canonical_url": "", 84 | "analytics_id": "", 85 | "logo_only": True, 86 | "display_version": True, 87 | "prev_next_buttons_location": "bottom", 88 | "style_external_links": False, 89 | # Toc options 90 | "collapse_navigation": True, 91 | "sticky_navigation": True, 92 | "navigation_depth": 4, 93 | "includehidden": True, 94 | "titles_only": False, 95 | } 96 | 97 | # Add any paths that contain custom static files (such as style sheets) here, 98 | # relative to this directory. They are copied after the builtin static files, 99 | # so a file named "default.css" will overwrite the builtin "default.css". 100 | html_static_path = ["_static/"] 101 | 102 | html_css_files = ["custom.css"] 103 | 104 | html_logo = "../logo-notext.png" 105 | 106 | # The name of an image file (within the static path) to use as favicon of the 107 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 108 | # pixels large. 109 | html_favicon = "" 110 | -------------------------------------------------------------------------------- /docs/developer_guide.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Copyright 2022 Lawrence Livermore National Security, LLC and other 3 | Thicket Project Developers. See the top-level LICENSE file for details. 4 | 5 | SPDX-License-Identifier: MIT 6 | 7 | ################# 8 | Developer Guide 9 | ################# 10 | 11 | ************************* 12 | Contributing to Thicket 13 | ************************* 14 | 15 | If you are interested in contributing a new data reader, a feature, or a bugfix to 16 | Thicket, please read below. This guide discusses the contributing workflow used in the 17 | Thicket project, and the granularity of pull requests (PRs). 18 | 19 | Branches 20 | ======== 21 | 22 | The develop branch in Thicket that has the latest contributions is named ``develop``. 23 | All pull requests should start from ``develop`` and target ``develop``. 24 | 25 | There is a branch for each minor release series. Release branches originate from 26 | ``develop`` and have tags for each revision release in the series. 27 | 28 | Continuous Integration 29 | ====================== 30 | 31 | Thicket uses `GitHub Actions `_ for Continuous 32 | Integration testing. This means that every time you submit a pull request, a series of 33 | tests are run to make sure you did not accidentally introduce any bugs into Thicket. 34 | Your PR will not be accepted until it passes all of these tests. 35 | 36 | Currently, we perform 2 types of tests: 37 | 38 | Unit tests 39 | ---------- 40 | 41 | Unit tests ensure that Thicket's core API is working as expected. If you add a new data 42 | reader or new functionality to the Thicket API, you should add unit tests that provide 43 | adequate coverage for your code. You should also check that your changes pass all unit 44 | tests. You can do this by typing: 45 | 46 | .. code:: console 47 | 48 | $ pytest 49 | 50 | Style tests 51 | ----------- 52 | 53 | Thicket uses `Flake8 `_ to test for `PEP 8 54 | `_ compliance. You can check for compliance 55 | using: 56 | 57 | .. code:: console 58 | 59 | $ flake8 60 | 61 | Contributing Workflow 62 | ===================== 63 | 64 | Thicket is in active development, so the ``develop`` branch in Thicket has frequent 65 | merges of new pull requests. The recommended way to contribute a pull request is to fork 66 | the Thicket repo in your own space (if you already have a fork, make sure is it 67 | up-to-date), and then create a new branch off of ``develop``. 68 | 69 | We prefer that commits pertaining to different components of Thicket (core Thicket API, 70 | visualization tools, etc.) prefix the component name in the commit message (for example 71 | ``: descriptive message``. 72 | 73 | GitHub provides a detailed `tutorial 74 | `_ 75 | on creating pull requests. 76 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | name: thicket-docs 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - sphinx-thebe 7 | - nbsphinx 8 | - myst-parser 9 | - llnl-hatchet 10 | - thicket 11 | - extrap 12 | - scipy 13 | -------------------------------------------------------------------------------- /docs/generating_data.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Copyright 2022 Lawrence Livermore National Security, LLC and other 3 | Thicket Project Developers. See the top-level LICENSE file for details. 4 | 5 | SPDX-License-Identifier: MIT 6 | 7 | ############################### 8 | Generating Profiling Datasets 9 | ############################### 10 | 11 | ********* 12 | Caliper 13 | ********* 14 | 15 | Caliper can be installed using `Spack `_ or manually from its `GitHub 16 | repository `__. Instructions to build Caliper manually 17 | can be found in its `documentation `__. 18 | 19 | To record performance profiles using Caliper, you need to include ``cali.h`` and call 20 | the ``cali_init()`` function in your source code. You also need to link the Caliper 21 | library in your executable or load it using ``LD_PRELOAD``. Information about basic 22 | Caliper usage can be found in the `Caliper documentation 23 | `__. 24 | 25 | To generate profiling data, you can use Caliper's `built-in profiling configurations 26 | `_ customized for thicket: 27 | ``hatchet-region-profile`` and ``spot`` or ``hatchet-sample-profile``. The former 28 | generates a profile based on user annotations in the code while the latter generates a 29 | call path profile (similar to HPCToolkit's output). If you want to use one of the 30 | built-in configurations, you should set the ``CALI_CONFIG`` environment variable (e.g. 31 | ``CALI_CONFIG=hatchet-sample-profile``). 32 | 33 | You can read more about Caliper services in the `Caliper documentation 34 | `__. Thicket can currently only read 35 | .cali files, that is a native Caliper output. 36 | 37 | .. _ref-adiak: 38 | 39 | ******* 40 | Adiak 41 | ******* 42 | 43 | Adiak can be used with Caliper to record program metadata. You can use Adiak, a C/C++ 44 | library to record environment information (user, launchdata, system name, etc.) and 45 | program configuration (input problem description, problem size, etc.). To build Caliper 46 | with Adiak support, ``-DWITH_ADIAK=On`` is required. Adiak proides built-in fucntions to 47 | collect common environment metadata that enables performance comparisons across 48 | different runs. Some common metadata that can be used with thicket are `launchdate` or 49 | `clustername`, where a user can use this metadata information to organize the 50 | performance data with the help of thicket's capabilities. 51 | 52 | .. code-block:: 53 | 54 | adiak_user(); /* user name */ 55 | adiak_uid(); /* user id */ 56 | adiak_launchdate(); /* program start time (UNIX timestamp) */ 57 | adiak_executable(); /* executable name */ 58 | adiak_executablepath(); /* full executable file path */ 59 | adiak_cmdline(); /* command line parameters */ 60 | adiak_hostname(); /* current host name */ 61 | adiak_clustername(); /* cluster name */ 62 | adiak_job_size(); /* MPI job size */ 63 | adiak_hostlist(); /* all host names in this MPI job */ 64 | adiak_walltime(); /* wall-clock job runtime */ 65 | adiak_cputime(); /* job cpu runtime */ 66 | adiak_systime(); /* job sys runtime */ 67 | 68 | ``adiak::value()`` records key:value pairs with overloads for many data types 69 | 70 | .. code-block:: 71 | 72 | #include 73 | 74 | vector ints { 1, 2, 3, 4 }; 75 | 76 | adiak::value(“myvec”, ints); 77 | adiak::value(“myint”, 42); 78 | adiak::value(“mydouble”, 3.14); 79 | adiak::value(“mystring”, “hi”); 80 | adiak::value(“mypath”, adiak::path(“/dev/null”)); 81 | adiak::value(“compiler”, adiak::version(“gcc@8.3.0”)); 82 | 83 | ``adiak_nameval()`` uses printf()-style descriptors to determine data types 84 | 85 | .. code-block:: 86 | 87 | #include 88 | 89 | int ints[] = { 1, 2, 3, 4 }; 90 | 91 | adiak_nameval(“myvec”, adiak_general, NULL, “[%d]”, ints, 4); 92 | adiak_nameval(“myint”, adiak_general, NULL, “%d”, 42); 93 | adiak_nameval(“mydouble”, adiak_general, NULL, “%f”, 3.14); 94 | adiak_nameval(“mystring”, adiak_general, NULL, “%s”, “hi”); 95 | adiak_nameval(“mypath”, adiak_general, NULL, “%p”, “/dev/null”); 96 | adiak_nameval(“compiler”, adiak_general, NULL, “%v”, “gcc@8.3.0”); 97 | 98 | You can learn more about the Adiak library in the `Adiak documentation 99 | `__. 100 | -------------------------------------------------------------------------------- /docs/getting_started.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Copyright 2022 Lawrence Livermore National Security, LLC and other 3 | Thicket Project Developers. See the top-level LICENSE file for details. 4 | 5 | SPDX-License-Identifier: MIT 6 | 7 | ################# 8 | Getting Started 9 | ################# 10 | 11 | *************** 12 | Prerequisites 13 | *************** 14 | 15 | Thicket has the following minimum requirements, which must be installed before Thicket 16 | is run: 17 | 18 | #. Python 3 (3.6 - 3.11) 19 | #. hatchet 20 | #. pandas >= 1.1 21 | #. numpy 22 | #. matplotlib, and 23 | #. scipy 24 | 25 | **************** 26 | Other Packages 27 | **************** 28 | 29 | #. Extrap: extrap, matplotlib 30 | #. Vis: beautifulsoup4 31 | #. Plotting: seaborn 32 | 33 | For installation options for the extra packages, refer to the installation instructions 34 | below in `Install and Build Thicket`_. Thicket is available on `GitHub 35 | `_. 36 | 37 | ************** 38 | Installation 39 | ************** 40 | 41 | You can get thicket from its `GitHub repository `_ 42 | using this command: 43 | 44 | .. code:: console 45 | 46 | $ git clone https://github.com/llnl/thicket.git 47 | 48 | This will create a directory called ``thicket``. 49 | 50 | Install and Build Thicket 51 | ========================= 52 | 53 | To build thicket and update your PYTHONPATH, run the following shell script from the 54 | thicket root directory: 55 | 56 | .. code:: console 57 | 58 | $ source ./install.sh 59 | 60 | Note: The ``source`` keyword is required to update your PYTHONPATH environment variable. 61 | It is not necessary if you have already manually added the thicket directory to your 62 | PYTHONPATH. 63 | 64 | Alternatively, you can install thicket using pip: 65 | 66 | .. code:: console 67 | 68 | $ pip install llnl-thicket 69 | 70 | You can install the other packages mentioned above for additional features of thicket. 71 | Below is an example of installing thicket with extrap. 72 | 73 | .. code:: console 74 | 75 | $ pip install llnl-thicket[extrap] 76 | 77 | Check Installation 78 | ================== 79 | 80 | After installing thicket, you should be able to import thicket when running the Python 81 | interpreter in interactive mode: 82 | 83 | .. code:: console 84 | 85 | $ python 86 | Python 3.7.4 (default, Jul 11 2019, 01:08:00) 87 | [Clang 10.0.1 (clang-1001.0.46.4)] on darwin 88 | Type "help", "copyright", "credits" or "license" for more information. 89 | >>> 90 | 91 | Typing ``import thicket`` at the prompt should succeed without any error messages: 92 | 93 | .. code:: console 94 | 95 | >>> import thicket 96 | >>> 97 | 98 | Interactive Visualization 99 | ========================= 100 | 101 | Thicket provides an interactive visualization which can be run inside of your Jupyter 102 | notebooks. It is dependent on different mechanism for building, which we describe here. 103 | 104 | The software in the ``thicket/vis`` subdirectory (i.e., the ``thicket.vis`` package) 105 | requires `Node.js and the Node Package Manager (NPM) `_ 106 | for the development and building of JavaScript code. 107 | 108 | Building Visualization Code for Users 109 | ===================================== 110 | 111 | If you are just using our built-in visualizations, the visualization code will be built 112 | automatically when you access the ``thicket.vis`` module. All that users have to do is 113 | make sure they have NPM installed. If NPM is not installed, accessing the ``thicket.vis`` 114 | module will raise a ``FileNotFoundError``. 115 | 116 | Building Visualization Code for Developers 117 | ========================================== 118 | 119 | If you are developing a visualization, it is recommended that you build the visualization 120 | code manually. To manually build this code, follow the instructions below. 121 | 122 | Installing Node Packages 123 | ======================== 124 | 125 | Once you have Node and NPM installed on your system, you can install all necessary node 126 | packages by running the following line in your terminal from the ``thicket/vis`` 127 | directory: 128 | 129 | .. code:: console 130 | 131 | >>> npm install 132 | 133 | Building Out JavaScript Code with Webpack 134 | ========================================= 135 | 136 | To build out JavaScript into the static bundles used by the Jupyter visualizations, run 137 | the following line from the ``thicket/vis`` directory in your terminal: 138 | 139 | .. code:: console 140 | 141 | >>> npm run build 142 | 143 | Alternatively, you can run the following line to force bundles to automatically update 144 | when you change the JavaScript source code: 145 | 146 | .. code:: console 147 | 148 | >>> npm run watch 149 | -------------------------------------------------------------------------------- /docs/images/Table-Tree-Revised-gradien.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/Table-Tree-Revised-gradien.png -------------------------------------------------------------------------------- /docs/images/appended_statsdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/appended_statsdf.png -------------------------------------------------------------------------------- /docs/images/empty_statsdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/empty_statsdf.png -------------------------------------------------------------------------------- /docs/images/ensembleframe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/ensembleframe.png -------------------------------------------------------------------------------- /docs/images/metadataframe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/metadataframe.png -------------------------------------------------------------------------------- /docs/images/ql-original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/ql-original.png -------------------------------------------------------------------------------- /docs/images/thicket-tutorial-slide-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/thicket-tutorial-slide-preview.png -------------------------------------------------------------------------------- /docs/images/thicket_gifs_and_source_vids/metadata_changing_axis.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/thicket_gifs_and_source_vids/metadata_changing_axis.gif -------------------------------------------------------------------------------- /docs/images/thicket_gifs_and_source_vids/metadata_color_encoding_and_subselecting.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/thicket_gifs_and_source_vids/metadata_color_encoding_and_subselecting.gif -------------------------------------------------------------------------------- /docs/images/thicket_gifs_and_source_vids/metadata_selecting_data.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/thicket_gifs_and_source_vids/metadata_selecting_data.gif -------------------------------------------------------------------------------- /docs/images/thicket_gifs_and_source_vids/metadata_vis_load.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/thicket_gifs_and_source_vids/metadata_vis_load.gif -------------------------------------------------------------------------------- /docs/images/thicket_gifs_and_source_vids/topdown_analysis.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/thicket_gifs_and_source_vids/topdown_analysis.gif -------------------------------------------------------------------------------- /docs/images/thicket_tutorial_34_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/thicket_tutorial_34_1.png -------------------------------------------------------------------------------- /docs/images/thicket_tutorial_36_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/thicket_tutorial_36_1.png -------------------------------------------------------------------------------- /docs/images/thicket_tutorial_37_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/docs/images/thicket_tutorial_37_1.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Copyright 2022 Lawrence Livermore National Security, LLC and other 3 | Thicket Project Developers. See the top-level LICENSE file for details. 4 | 5 | SPDX-License-Identifier: MIT 6 | 7 | .. 8 | thicket documentation master file, created by 9 | sphinx-quickstart on Tue Jun 26 08:43:21 2018. 10 | You can adapt this file completely to your liking, but it should at least 11 | contain the root `toctree` directive. 12 | 13 | ######### 14 | Thicket 15 | ######### 16 | 17 | Thicket is a python-based toolkit for Exploratory Data Analysis (EDA) of parallel 18 | performance data that enables performance optimization and understanding of 19 | applications' performance on supercomputers. It bridges the performance tool gap between 20 | being able to consider only a single instance of a simulation run (e.g., single 21 | platform, single measurement tool, or single scale) and finding actionable insights in 22 | multi-dimensional, multi-scale, multi-architecture, and multi-tool performance datasets. 23 | 24 | You can get thicket from its `GitHub repository `_: 25 | 26 | .. code:: console 27 | 28 | $ git clone https://github.com/llnl/thicket.git 29 | 30 | or install it using pip: 31 | 32 | .. code:: console 33 | 34 | $ pip install llnl-thicket 35 | 36 | If you are new to thicket and want to start using it, see :doc:`Getting Started 37 | `. 38 | 39 | .. toctree:: 40 | :maxdepth: 2 41 | :caption: User Docs 42 | 43 | getting_started 44 | user_guide 45 | generating_data 46 | 47 | If you encounter bugs while using thicket, you can report them by opening an issue on 48 | `GitHub `_. 49 | 50 | .. toctree:: 51 | :maxdepth: 2 52 | :caption: Tutorials 53 | 54 | tutorial_materials 55 | thicket_tutorial.ipynb 56 | thicket_rajaperf_clustering.ipynb 57 | extrap-with-metadata-aggregated.ipynb 58 | stats-functions.ipynb 59 | query_language.ipynb 60 | groupby_aggregate.ipynb 61 | nsight_compute.ipynb 62 | composing_parallel_sorting_data.ipynb 63 | modeling_parallel_sorting_data.ipynb 64 | tma_speedup_clustering.ipynb 65 | vis_docs 66 | 67 | .. toctree:: 68 | :maxdepth: 2 69 | :caption: Reference 70 | 71 | publications 72 | 73 | .. toctree:: 74 | :maxdepth: 2 75 | :caption: Developer Docs 76 | 77 | developer_guide 78 | 79 | .. toctree:: 80 | :maxdepth: 2 81 | :caption: API Docs 82 | 83 | Thicket API Docs 84 | 85 | #################### 86 | Indices and tables 87 | #################### 88 | 89 | - :ref:`genindex` 90 | - :ref:`modindex` 91 | - :ref:`search` 92 | -------------------------------------------------------------------------------- /docs/publications.rst: -------------------------------------------------------------------------------- 1 | ****************************** 2 | Publications and Presentations 3 | ****************************** 4 | 5 | Publications 6 | ============ 7 | 8 | - Olga Pearce, Jason Burmark, Rich Hornung, Befikir Bogale, Ian Lumsden, Michael McKinsey, Dewi Yokelson, David Boehme, Stephanie Brink, Michela Taufer, Tom Scogland. RAJA Performance Suite: Performance Portability Analysis with Caliper and Thicket. In Proceedings of the SC'24 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis (SC-W '24), Atlanta, GA. 9 | 10 | - Stephanie Brink, Michael McKinsey, David Boehme, Connor Scully-Allison, Ian Lumsden, Daryl Hawkins, Treece Burrgess, Vanessa Lama, Jakob Luettgau, Katherine E. Issacs, Michela Taufer, Olga Pearce. Thicket: Seeing the Performance Experiment Forest for the Individual Run Trees. In Proceedings of the 32nd International Symposium on High-Performance Parallel and Distributed Computing (HPDC '23), Orlando, FL. 11 | 12 | Posters 13 | ======= 14 | 15 | - Dragana Grbic. Integrating HPCToolkit with Tools for Automated Analysis. Presented at SC '24. Best Poster Candidate. 16 | 17 | - Befikir Bogale. Cluster-based Methodology for Characterizing the Performance of Portable Applications. Presented at SC '24. 18 | 19 | - Dewi Yokelson, David Boehme, Stephanie Brink, Olga Pearce, Allen Malony. Timeseries Visualization of Performance Metrics. Presented at ISC '24. 20 | 21 | - Ian Lumsden, Jakob Luettgau, Vanessa Lama, Connor Scully-Allison, Stephanie Brink, Katherine E. Issacs, Olga Pearce, Michela Taufer. Identifying Performance Bottlenecks in Scientific Applications with Call Path Querying. Presented at the 2023 Salishan Conference. 22 | 23 | Tutorials 24 | ========= 25 | 26 | - HPCIC Tutorials: Caliper, Hatchet, and Thicket. Virtual. August 8, 2024. `YouTube `_ 27 | 28 | - RADIUSS AWS Tutorials: Caliper, Hatchet, and Thicket. Virtual. August 14, 2023. `YouTube `_ 29 | 30 | Presentations 31 | ============= 32 | 33 | - Scalable Tools Workshop: Thicket: Growth of the Heterogeneous Performance Experiment Forest. August 12, 2024. `Slides `_ 34 | 35 | - Scalable Tools Workshop: Programmatic Analysis of Large-Scale Performance Data. August, 12, 2024. `Slides `_ 36 | 37 | - Scalable Tools Workshop: Thicket: Seeing the Performance Experiment Forest for the Individual Run Trees. June 20, 2023. `Slides `_ 38 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | pandas 3 | pydot 4 | numpy 5 | PyYAML 6 | cython 7 | multiprocess 8 | textX 9 | caliper-reader 10 | sphinx-thebe 11 | nbsphinx 12 | myst-parser 13 | sphinx==7.2.6 14 | sphinx-rtd-theme==1.3.0 15 | llnl-hatchet 16 | thicket 17 | seaborn 18 | scipy 19 | extrap 20 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | thicket 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | thicket 8 | -------------------------------------------------------------------------------- /docs/source/thicket.external.rst: -------------------------------------------------------------------------------- 1 | thicket.external package 2 | ======================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | thicket.external.console module 8 | ------------------------------- 9 | 10 | .. automodule:: thicket.external.console 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: thicket.external 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /docs/source/thicket.rst: -------------------------------------------------------------------------------- 1 | thicket package 2 | =============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | thicket.external 11 | thicket.stats 12 | thicket.vis 13 | 14 | Submodules 15 | ---------- 16 | 17 | thicket.ensemble module 18 | ----------------------- 19 | 20 | .. automodule:: thicket.ensemble 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | thicket.groupby module 26 | ---------------------- 27 | 28 | .. automodule:: thicket.groupby 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | thicket.helpers module 34 | ---------------------- 35 | 36 | .. automodule:: thicket.helpers 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | 41 | thicket.model\_extrap module 42 | ---------------------------- 43 | 44 | .. automodule:: thicket.model_extrap 45 | :members: 46 | :undoc-members: 47 | :show-inheritance: 48 | 49 | thicket.ncu module 50 | ------------------ 51 | 52 | .. automodule:: thicket.ncu 53 | :members: 54 | :undoc-members: 55 | :show-inheritance: 56 | 57 | thicket.thicket module 58 | ---------------------- 59 | 60 | .. automodule:: thicket.thicket 61 | :members: 62 | :undoc-members: 63 | :show-inheritance: 64 | 65 | thicket.utils module 66 | -------------------- 67 | 68 | .. automodule:: thicket.utils 69 | :members: 70 | :undoc-members: 71 | :show-inheritance: 72 | 73 | thicket.version module 74 | ---------------------- 75 | 76 | .. automodule:: thicket.version 77 | :members: 78 | :undoc-members: 79 | :show-inheritance: 80 | 81 | Module contents 82 | --------------- 83 | 84 | .. automodule:: thicket 85 | :members: 86 | :undoc-members: 87 | :show-inheritance: 88 | -------------------------------------------------------------------------------- /docs/source/thicket.stats.rst: -------------------------------------------------------------------------------- 1 | thicket.stats package 2 | ===================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | thicket.stats.calc\_boxplot\_statistics module 8 | ---------------------------------------------- 9 | 10 | .. automodule:: thicket.stats.calc_boxplot_statistics 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | thicket.stats.check\_normality module 16 | ------------------------------------- 17 | 18 | .. automodule:: thicket.stats.check_normality 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | thicket.stats.correlation\_nodewise module 24 | ------------------------------------------ 25 | 26 | .. automodule:: thicket.stats.correlation_nodewise 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | thicket.stats.display\_boxplot module 32 | ------------------------------------- 33 | 34 | .. automodule:: thicket.stats.display_boxplot 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | thicket.stats.display\_heatmap module 40 | ------------------------------------- 41 | 42 | .. automodule:: thicket.stats.display_heatmap 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | thicket.stats.display\_histogram module 48 | --------------------------------------- 49 | 50 | .. automodule:: thicket.stats.display_histogram 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | thicket.stats.maximum module 56 | ---------------------------- 57 | 58 | .. automodule:: thicket.stats.maximum 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | thicket.stats.mean module 64 | ------------------------- 65 | 66 | .. automodule:: thicket.stats.mean 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | thicket.stats.median module 72 | --------------------------- 73 | 74 | .. automodule:: thicket.stats.median 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | thicket.stats.minimum module 80 | ---------------------------- 81 | 82 | .. automodule:: thicket.stats.minimum 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | thicket.stats.percentiles module 88 | -------------------------------- 89 | 90 | .. automodule:: thicket.stats.percentiles 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | 95 | thicket.stats.preference module 96 | ------------------------------- 97 | 98 | .. automodule:: thicket.stats.preference 99 | :members: 100 | :undoc-members: 101 | :show-inheritance: 102 | 103 | thicket.stats.std module 104 | ------------------------ 105 | 106 | .. automodule:: thicket.stats.std 107 | :members: 108 | :undoc-members: 109 | :show-inheritance: 110 | 111 | thicket.stats.ttest module 112 | -------------------------- 113 | 114 | .. automodule:: thicket.stats.ttest 115 | :members: 116 | :undoc-members: 117 | :show-inheritance: 118 | 119 | thicket.stats.variance module 120 | ----------------------------- 121 | 122 | .. automodule:: thicket.stats.variance 123 | :members: 124 | :undoc-members: 125 | :show-inheritance: 126 | 127 | Module contents 128 | --------------- 129 | 130 | .. automodule:: thicket.stats 131 | :members: 132 | :undoc-members: 133 | :show-inheritance: 134 | -------------------------------------------------------------------------------- /docs/source/thicket.vis.rst: -------------------------------------------------------------------------------- 1 | thicket.vis package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | thicket.vis.static\_fixer module 8 | -------------------------------- 9 | 10 | .. automodule:: thicket.vis.static_fixer 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | thicket.vis.visualizations module 16 | --------------------------------- 17 | 18 | .. automodule:: thicket.vis.visualizations 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: thicket.vis 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/tutorial_materials.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Copyright 2022 Lawrence Livermore National Security, LLC and other 3 | Thicket Project Developers. See the top-level LICENSE file for details. 4 | 5 | SPDX-License-Identifier: MIT 6 | 7 | ############################### 8 | Tutorial Materials 9 | ############################### 10 | 11 | This is an introduction to Thicket with a presentation and live demos. It was 12 | presented as a virtual event at the `2024 HPC Innovation Center Tutorial Series 13 | `_, 14 | August 8, 2024, alongside Caliper. 15 | 16 | .. image:: images/thicket-tutorial-slide-preview.png 17 | :target: _static/2024_08_08_Thicket_Tutorial.pdf 18 | :height: 72px 19 | :align: left 20 | :alt: Slide Preview 21 | 22 | :download:`Download Slides <_static/2024_08_08_Thicket_Tutorial.pdf>`. 23 | 24 | We provide scripts that take you through some of the available features in 25 | Thicket. They correspond to sections in the slides above. 26 | 27 | To run through the scripts, you can follow the instructions to build the docker 28 | image in `thicket-tutorial `_. 29 | -------------------------------------------------------------------------------- /docs/user_guide.rst: -------------------------------------------------------------------------------- 1 | *************** 2 | User Guide 3 | *************** 4 | 5 | Thicket Components 6 | ======================= 7 | A thicket object is a flexible data model that enables the structured analysis of unstructured performance data. 8 | The four components of thicket are the call tree, performance data, metadata, and 9 | aggregated statistics, as shown in the figure below. 10 | 11 | 12 | .. figure:: images/Table-Tree-Revised-gradien.png 13 | :width: 600 14 | :align: center 15 | 16 | Figure 1: The four components of the thicket object. 17 | 18 | Performance Data 19 | ======================= 20 | The performance data table is a multi-dimensional, multi-indexed structure with one or more rows of data associated 21 | with each node of the call tree. Each row associated with a node of the call tree 22 | represents a different execution of the associated call tree node. Below is an 23 | example of a performance data table stored in a thicket object. 24 | 25 | .. figure:: images/ensembleframe.png 26 | :width: 800 27 | :align: center 28 | 29 | Figure 2: Example performance data table in thicket. 30 | 31 | | 32 | The performance data's call tree structure can be seen below with corresponding nodes. This structure extends to both the 33 | performance data and aggregated statistics table. 34 | 35 | .. figure:: images/ql-original.png 36 | :width: 400 37 | :align: center 38 | 39 | Figure 3: Example call tree in thicket. 40 | 41 | 42 | | 43 | Metadata 44 | ======================= 45 | 46 | During Thicket construction, the available metadata about each 47 | run is read in and composed into a metadata table. 48 | The metadata table can contain all available information about each of the 49 | application runs in the thicket, 50 | such as batch info (the time of the run, the user), 51 | machine information (OS, processor type, number of processors used), 52 | build information (compiler, optimization levels), 53 | and runtime parameters for the application. 54 | Thicket's functionality leverages the available metadata to enable 55 | dataset manipulation such as filtering on any of the metadata fields. 56 | 57 | .. note:: 58 | 59 | See the :ref:`Adiak ` section in :doc:`Generating Profiling Datasets 60 | ` for a description on how to enrich your profiling data with 61 | metadata. 62 | 63 | .. figure:: images/metadataframe.png 64 | :width: 600 65 | :align: center 66 | 67 | Figure 4: Example metadata table in thicket with information about each run. 68 | 69 | | 70 | Aggregated Statistics 71 | ======================= 72 | 73 | The aggregated statistics table supports an order-reduction mechanism and stores processed applications’ performance. 74 | Each row of the aggregated statistic table holds data aggregated across all profiles associated with a particular call tree node. 75 | Below is an example of an empty aggregated statistics table just containing the nodes. 76 | 77 | .. figure:: images/empty_statsdf.png 78 | :width: 600 79 | :align: center 80 | 81 | Figure 5: Example of an empty aggregated statistics table in thicket. 82 | 83 | Thicket provides users with capabilities for computing common aggregated statistics on their performance data, such as mean and standard deviation. Below is an example 84 | of an aggregated statistics table with appended results from a statistical calculation. 85 | 86 | .. figure:: images/appended_statsdf.png 87 | :width: 600 88 | :align: center 89 | 90 | Figure 6: Example aggregated statistics table in thicket with mean and median 91 | calculated on a single column (e.g., Total_time) from the performance data table. 92 | 93 | | 94 | -------------------------------------------------------------------------------- /docs/vis_docs.rst: -------------------------------------------------------------------------------- 1 | .. 2 | Copyright 2022 Lawrence Livermore National Security, LLC and other 3 | Thicket Project Developers. See the top-level LICENSE file for details. 4 | 5 | SPDX-License-Identifier: MIT 6 | 7 | ##################################### 8 | Thicket Visualization Demonstration 9 | ##################################### 10 | 11 | ******************* 12 | Top-down Analysis 13 | ******************* 14 | 15 | In this gif we demonstrate the top-down analysis visualization. Each top-down 16 | metric is color-coded. The colors associated with each metric are shown by the 17 | legend at the top of the visualization. 18 | 19 | .. only:: html 20 | 21 | .. figure:: images/thicket_gifs_and_source_vids/topdown_analysis.gif 22 | 23 | This visualization shows how the distribution of topdown metrics associated 24 | with each node changes as the problem size increases. Each group represents a 25 | series of trials at a given problem size and each bar represents a single 26 | profiling run. 27 | 28 | Near the end of the gif we see a series of run which become more backend bound 29 | as the problem size increases; highlighting an opportunity for optimization. 30 | 31 | *************************** 32 | Parallel Coordinates Plot 33 | *************************** 34 | 35 | The visualization is then initialized using the `%metadata_vis` magic command. 36 | This command has multiple arguments. The Thicket we are visualizing, the 37 | specific metadata we are interested in, and the node metrics which the scatter 38 | plots will show. Initially the parallel coordinates are empty until we select 39 | runs from a scatterplot. 40 | 41 | .. only:: html 42 | 43 | .. figure:: images/thicket_gifs_and_source_vids/metadata_vis_load.gif 44 | 45 | In this gif, we show the loading of libraries into our notebook and the 46 | subsequent loading of data into a thicket object. 47 | 48 | The left scatterplot plots a metric against one metadata value to provide a 49 | perspective of how an independent variable may impact the measured performance 50 | across a range of runs. The right scatterplot plots two metrics relative to one 51 | another. The axis can be changed using the dropdown menus above each chart. 52 | 53 | .. only:: html 54 | 55 | .. figure:: images/thicket_gifs_and_source_vids/metadata_changing_axis.gif 56 | 57 | To populate the lines on the parallel coordinate plot, a user brushes over one 58 | of the scatterplots. Either scatterplot can be brushed over. In this example, we 59 | select all the data points but fewer can be selected at a time. 60 | 61 | .. only:: html 62 | 63 | .. figure:: images/thicket_gifs_and_source_vids/metadata_selecting_data.gif 64 | 65 | To better identify patterns of behavior linked to specific metadata across all 66 | metadata values, we provide a means to color profiles by a particular metadata 67 | field. A user may click on any of the crayon icons to the left of a parallel 68 | coordinate axis to color all lines and dots in the scatterplot according to 69 | that metadata field. 70 | 71 | Quantitative variables are colored on a spectrum from light to dark green. 72 | Categorical variables are colored with a discrete color map. In this case, we 73 | have only one "user" who is colored blue. 74 | 75 | At the end of this gif, we demonstrate sub-selecting a group of large 76 | parallelism and large runtime profiles. 77 | 78 | .. only:: html 79 | 80 | .. figure:: images/thicket_gifs_and_source_vids/metadata_color_encoding_and_subselecting.gif 81 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | case *"$PWD"* in 4 | "$PYTHONPATH") 5 | ;; 6 | 7 | *) 8 | PYTHONPATH=$PWD:$PYTHONPATH 9 | ;; 10 | esac 11 | 12 | python setup.py build_ext --inplace 13 | python thicket/vis/static_fixer.py 14 | -------------------------------------------------------------------------------- /logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/logo-notext.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.poetry] 6 | name = "llnl-thicket" 7 | version = "2024.2.1" 8 | description = "A Python-based toolkit for analyzing ensemble performance data." 9 | license = "MIT" 10 | 11 | [tool.ruff] 12 | line-length = 88 13 | target-version = 'py37' 14 | include = ['\.pyi?$'] 15 | exclude = [ 16 | ".eggs", 17 | ".git", 18 | ".hg", 19 | ".mypy_cache", 20 | ".tox", 21 | ".venv", 22 | "_build", 23 | "buck-out", 24 | "build", 25 | "dist", 26 | ] 27 | 28 | [tool.black] 29 | line-length = 88 30 | target-version = ['py37'] 31 | include = '\.pyi?$' 32 | exclude = ''' 33 | /( 34 | \.eggs 35 | | \.git 36 | | \.hg 37 | | \.mypy_cache 38 | | \.tox 39 | | \.venv 40 | | _build 41 | | buck-out 42 | | build 43 | | dist 44 | )/ 45 | ''' 46 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | [pytest] 7 | addopts = --durations=20 -ra 8 | testpaths = thicket/tests 9 | python_files = test_*.py 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | numpy < 2.0.0 3 | pandas>=1.1 4 | llnl-hatchet 5 | extrap 6 | matplotlib 7 | seaborn 8 | beautifulsoup4 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from setuptools import setup 7 | from codecs import open 8 | from os import path 9 | 10 | 11 | def readme(): 12 | here = path.abspath(path.dirname(__file__)) 13 | with open(path.join(here, "README.md"), encoding="utf-8") as f: 14 | return f.read() 15 | 16 | 17 | # Get the version in a safe way which does not reference thicket `__init__` file 18 | # per python docs: # https://packaging.python.org/guides/single-sourcing-package-version/ 19 | version = {} 20 | with open("./thicket/version.py") as fp: 21 | exec(fp.read(), version) 22 | 23 | 24 | setup( 25 | name="llnl-thicket", 26 | version=version["__version__"], 27 | license="MIT", 28 | description="Toolkit for exploratory data analysis of ensemble performance data", 29 | long_description=readme(), 30 | long_description_content_type="text/markdown", 31 | keywords="", 32 | project_urls={ 33 | "Source Code": "https://github.com/LLNL/thicket", 34 | "Documentation": "https://thicket.readthedocs.io/", 35 | }, 36 | python_requires=">=3.6.1", 37 | packages=[ 38 | "thicket", 39 | "thicket.stats", 40 | "thicket.vis", 41 | "thicket.external", 42 | ], 43 | include_package_data=True, 44 | install_requires=[ 45 | "scipy", 46 | "numpy < 2.0.0", 47 | "pandas >= 1.1", 48 | "llnl-hatchet", 49 | "tqdm", 50 | "more-itertools", 51 | ], 52 | extras_require={ 53 | "extrap": ["extrap", "matplotlib"], 54 | "plotting": ["seaborn"], 55 | "vis": ["beautifulsoup4"], 56 | }, 57 | ) 58 | -------------------------------------------------------------------------------- /thicket/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | # make flake8 unused names in this file. 7 | # flake8: noqa: F401 8 | 9 | __path__ = __import__("pkgutil").extend_path(__path__, __name__) 10 | 11 | # Imports of subdirectories to prevent namespace package issues 12 | # Don't re-export vis so that we don't trigger NPM building on every Thicket import 13 | from . import ( 14 | external as external, 15 | stats as stats, 16 | ) 17 | 18 | from .ensemble import Ensemble 19 | from .thicket import Thicket 20 | from .thicket import InvalidFilter 21 | from .thicket import EmptyMetadataTable 22 | from .version import __version__ 23 | -------------------------------------------------------------------------------- /thicket/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLNL/thicket/83d592b86c9410bdee55de88f368f304376b0149/thicket/external/__init__.py -------------------------------------------------------------------------------- /thicket/query.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | # make flake8 unused names in this file. 7 | # flake8: noqa: F401 8 | 9 | from hatchet.query import ( 10 | # New style queries 11 | # ################# 12 | # 13 | # Core query types 14 | Query, 15 | ObjectQuery, 16 | StringQuery, 17 | parse_string_dialect, 18 | # Compound queries 19 | CompoundQuery, 20 | ConjunctionQuery, 21 | DisjunctionQuery, 22 | ExclusiveDisjunctionQuery, 23 | NegationQuery, 24 | # Errors 25 | InvalidQueryPath, 26 | InvalidQueryFilter, 27 | RedundantQueryFilterWarning, 28 | BadNumberNaryQueryArgs, 29 | # 30 | # Old style queries 31 | # ################# 32 | AbstractQuery, 33 | NaryQuery, 34 | AndQuery, 35 | IntersectionQuery, 36 | OrQuery, 37 | UnionQuery, 38 | XorQuery, 39 | SymDifferenceQuery, 40 | NotQuery, 41 | QueryMatcher, 42 | CypherQuery, 43 | parse_cypher_query, 44 | is_hatchet_query, 45 | ) 46 | 47 | __all__ = [ 48 | "Query", 49 | "ObjectQuery", 50 | "StringQuery", 51 | "parse_string_dialect", 52 | "CompoundQuery", 53 | "ConjunctionQuery", 54 | "DisjunctionQuery", 55 | "ExclusiveDisjunctionQuery", 56 | "NegationQuery", 57 | "InvalidQueryPath", 58 | "InvalidQueryFilter", 59 | "RedundantQueryFilterWarning", 60 | "BadNumberNaryQueryArgs", 61 | "is_hatchet_query", 62 | ] 63 | 64 | 65 | def is_new_style_query(query_obj): 66 | return issubclass(type(query_obj), Query) or issubclass( 67 | type(query_obj), CompoundQuery 68 | ) 69 | 70 | 71 | def is_old_style_query(query_obj): 72 | return issubclass(type(query_obj), AbstractQuery) 73 | -------------------------------------------------------------------------------- /thicket/stats/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | # make flake8 unused names in this file. 7 | # flake8: noqa: F401 8 | 9 | from .maximum import maximum 10 | from .mean import mean 11 | from .median import median 12 | from .minimum import minimum 13 | from .sum import sum 14 | from .percentiles import percentiles 15 | from .std import std 16 | from .variance import variance 17 | from .calc_boxplot_statistics import calc_boxplot_statistics 18 | from .correlation_nodewise import correlation_nodewise 19 | from .check_normality import check_normality 20 | from .scoring import score_delta_mean_delta_stdnorm 21 | from .scoring import score_delta_mean_delta_coefficient_of_variation 22 | from .scoring import score_bhattacharyya 23 | from .scoring import score_hellinger 24 | from .preference import preference 25 | from .calc_temporal_pattern import calc_temporal_pattern 26 | from .distance import bhattacharyya_distance 27 | from .distance import hellinger_distance 28 | from .confidence_interval import confidence_interval 29 | 30 | 31 | try: 32 | import seaborn as sns 33 | except: 34 | print("Seaborn not found, so skipping imports of plotting in thicket.stats") 35 | print("To enable this plotting, install seaborn or thicket[plotting]") 36 | else: 37 | from .display_boxplot import display_boxplot 38 | from .display_histogram import display_histogram 39 | from .display_heatmap import display_heatmap 40 | from .display_violinplot import display_violinplot_thicket 41 | from .display_violinplot import display_violinplot 42 | -------------------------------------------------------------------------------- /thicket/stats/calc_temporal_pattern.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import pandas as pd 7 | import numpy as np 8 | 9 | from ..utils import verify_thicket_structures 10 | 11 | 12 | def calc_temporal_pattern(thicket, columns=None): 13 | """Calculate the associated temporal pattern with the passed in columns. 14 | 15 | Designed to take in a timeseries thicket, and append two columns to the 16 | aggregated statistics (statsframe) table for the temporal pattern calculated on each node over time. 17 | 18 | The two additional columns include the _temporal_score, and the _pattern associated with that score. 19 | The score assigns a value between 0 and 1 based on how drastically the values change over time 20 | 21 | Arguments: 22 | thicket (thicket): timeseries Thicket object 23 | columns (list): List of numeric columns to calculate temporal pattern. 24 | Note, if using a columnar joined thicket a list of tuples must be passed in 25 | with the format (column index, column name). 26 | 27 | Returns: 28 | (list): returns a list of output statsframe column names 29 | """ 30 | if columns is None: 31 | raise ValueError( 32 | "To see a list of valid columns, run 'Thicket.performance_cols'." 33 | ) 34 | 35 | if "iteration" not in thicket.dataframe.index.names: 36 | raise ValueError( 37 | "Must have a timeseries thicket with iteration as an index level" 38 | ) 39 | 40 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 41 | 42 | output_column_names = [] 43 | 44 | for column in columns: 45 | if not pd.api.types.is_numeric_dtype(thicket.dataframe[column]): 46 | raise ValueError("Column data type must be numeric") 47 | pattern_col = [] 48 | score_col = [] 49 | # for any node that has temporal values we can calculate the pattern per node 50 | for node, node_df in thicket.dataframe.groupby(level=0): 51 | # if the node has any nans, pattern is none 52 | if node_df[column].isna().values.any(): 53 | pattern = "none" 54 | score = np.nan 55 | else: 56 | values = node_df[column] 57 | score = 1 - (sum(values) / (max(values) * len(values))) 58 | if score < 0.2: 59 | pattern = "constant" 60 | elif score >= 0.2 and score < 0.4: 61 | pattern = "phased" 62 | elif score >= 0.4 and score < 0.6: 63 | pattern = "dynamic" 64 | else: 65 | pattern = "sporadic" 66 | pattern_col.append(pattern) 67 | score_col.append(score) 68 | 69 | # add the new columns to the statsframe and output list 70 | pattern_column_name = column + "_pattern" 71 | score_column_name = column + "_temporal_score" 72 | thicket.statsframe.dataframe[pattern_column_name] = pattern_col 73 | thicket.statsframe.dataframe[score_column_name] = score_col 74 | output_column_names.append(pattern_column_name) 75 | output_column_names.append(score_column_name) 76 | 77 | return output_column_names 78 | -------------------------------------------------------------------------------- /thicket/stats/check_normality.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import pandas as pd 7 | from scipy import stats 8 | 9 | from ..utils import verify_thicket_structures 10 | from .stats_utils import cache_stats_op 11 | 12 | 13 | @cache_stats_op 14 | def check_normality(thicket, columns=None): 15 | """Determine if the data is normal or non-normal for each node in the performance 16 | data table. 17 | 18 | Designed to take in a thicket, and append one or more columns to the aggregated 19 | statistics table. A true boolean value will be appended if the data is normal and a 20 | false boolean value will be appended if the data is non-normal. 21 | 22 | For this test, the more data the better. Preferably you would want to have 20 data 23 | points (20 files) in a dataset to have an accurate result. 24 | 25 | Arguments: 26 | thicket (thicket): Thicket object 27 | columns (list): List of hardware/timing metrics to perform normality test on. 28 | Note, if using a columnar joined thicket a list of tuples must be passed in 29 | with the format (column index, column name). 30 | 31 | Returns: 32 | (list): returns a list of output statsframe column names 33 | """ 34 | if columns is None: 35 | raise ValueError( 36 | "To see a list of valid columns, run 'Thicket.performance_cols'." 37 | ) 38 | 39 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 40 | 41 | output_column_names = [] 42 | 43 | # thicket object without columnar index 44 | if thicket.dataframe.columns.nlevels == 1: 45 | df = ( 46 | thicket.dataframe.select_dtypes(include="number") 47 | .reset_index() 48 | .groupby("node") 49 | .agg(stats.shapiro) 50 | ) 51 | for column in columns: 52 | output_column_names.append(column + "_normality") 53 | for i in range(0, len(df[column])): 54 | pvalue = df[column][i].pvalue 55 | 56 | if pvalue < 0.05: 57 | thicket.statsframe.dataframe.loc[ 58 | df.index[i], column + "_normality" 59 | ] = "False" 60 | elif pvalue > 0.05: 61 | thicket.statsframe.dataframe.loc[ 62 | df.index[i], column + "_normality" 63 | ] = "True" 64 | else: 65 | thicket.stataframe.dataframe.loc[ 66 | df.index[i], column + "_normality" 67 | ] = pd.NA 68 | # check to see if exclusive metric 69 | if column in thicket.exc_metrics: 70 | thicket.statsframe.exc_metrics.append(column + "_normality") 71 | # check to see if inclusive metric 72 | else: 73 | thicket.statsframe.inc_metrics.append(column + "_normality") 74 | # columnar joined thicket object 75 | else: 76 | df = ( 77 | thicket.dataframe.select_dtypes(include="number") 78 | .reset_index(level=1) 79 | .groupby("node") 80 | .agg(stats.shapiro) 81 | ) 82 | for idx, column in columns: 83 | output_column_names.append((idx, column + "_normality")) 84 | for i in range(0, len(df[(idx, column)])): 85 | pvalue = df[(idx, column)][i].pvalue 86 | 87 | if pvalue < 0.05: 88 | thicket.statsframe.dataframe.loc[ 89 | df.index[i], (idx, column + "_normality") 90 | ] = "False" 91 | elif pvalue > 0.05: 92 | thicket.statsframe.dataframe.loc[ 93 | df.index[i], (idx, column + "_normality") 94 | ] = "True" 95 | else: 96 | thicket.statsframe.dataframe.loc[ 97 | df.index[i], (idx, column + "_normality") 98 | ] = pd.NA 99 | # check to see if exclusive metric 100 | if (idx, column) in thicket.exc_metrics: 101 | thicket.statsframe.exc_metrics.append((idx, column + "_normality")) 102 | # check to see if inclusive metric 103 | else: 104 | thicket.statsframe.inc_metrics.append((idx, column + "_normality")) 105 | 106 | # sort columns in index 107 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 108 | 109 | return output_column_names 110 | -------------------------------------------------------------------------------- /thicket/stats/confidence_interval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import scipy.stats 9 | 10 | import thicket as th 11 | from ..utils import verify_thicket_structures 12 | from .stats_utils import cache_stats_op 13 | 14 | 15 | @cache_stats_op 16 | def confidence_interval(thicket, columns=None, confidence_level=0.95): 17 | r"""Calculate the confidence interval for each node in the performance data table. 18 | 19 | Designed to take in a thicket, and append one or more columns to the aggregated 20 | statistics table for the confidence interval calculation for each node. 21 | 22 | A confidence interval is a range of values, derived from sample data, that is 23 | likely to contain the true population parameter with a specified level of confidence. 24 | It provides an estimate of uncertainty around a sample statistic, indicating how much 25 | variability is expected if the sampling process were repeated multiple times. 26 | 27 | Arguments: 28 | thicket (thicket): Thicket object 29 | columns (list): List of hardware/timing metrics to perform confidence interval 30 | calculation on. Note, if using a columnar_joined thicket a list of tuples 31 | must be passed in with the format (column index, column name). 32 | confidence_level (float): The confidence level (often 0.90, 0.95, or 0.99) 33 | indicates the degree of confidence that the true parameter lies within the interval. 34 | 35 | Returns: 36 | (list): returns a list of output statsframe column names 37 | 38 | Equation: 39 | .. math:: 40 | 41 | \text{CI} = \bar{x} \pm z \left( \frac{\sigma}{\sqrt{n}} \right) 42 | """ 43 | if columns is None or not isinstance(columns, list): 44 | raise ValueError("Value passed to 'columns' must be of type list.") 45 | 46 | if not isinstance(confidence_level, float): 47 | raise ValueError(r"Value passed to 'confidence_level' must be of type float.") 48 | 49 | if confidence_level >= 1 or confidence_level <= 0: 50 | raise ValueError( 51 | r"Value passed to 'confidence_level' must be in the range of (0, 1)." 52 | ) 53 | 54 | verify_thicket_structures(thicket.dataframe, columns=columns) 55 | 56 | output_column_names = [] 57 | 58 | sample_sizes = [] 59 | 60 | # Calculate mean and standard deviation 61 | mean_cols = th.stats.mean(thicket, columns=columns) 62 | std_cols = th.stats.std(thicket, columns=columns) 63 | 64 | # Convert confidence level to Z score 65 | z = scipy.stats.norm.ppf((1 + confidence_level) / 2) 66 | 67 | # Get number of profiles per node 68 | idx = pd.IndexSlice 69 | for node in thicket.dataframe.index.get_level_values(0).unique().tolist(): 70 | node_df = thicket.dataframe.loc[idx[node, :]] 71 | sample_sizes.append(len(node_df)) 72 | 73 | # Calculate confidence interval for every column 74 | for i in range(0, len(columns)): 75 | x = thicket.statsframe.dataframe[mean_cols[i]] 76 | s = thicket.statsframe.dataframe[std_cols[i]] 77 | 78 | c_p = x + (z * (s / np.sqrt(sample_sizes))) 79 | c_m = x - (z * (s / np.sqrt(sample_sizes))) 80 | 81 | out = pd.Series(list(zip(c_m, c_p)), index=thicket.statsframe.dataframe.index) 82 | 83 | if thicket.dataframe.columns.nlevels == 1: 84 | out_col = f"confidence_interval_{confidence_level}_{columns[i]}" 85 | else: 86 | out_col = ( 87 | columns[i][0], 88 | f"confidence_interval_{confidence_level}_{columns[i][1]}", 89 | ) 90 | 91 | output_column_names.append(out_col) 92 | thicket.statsframe.dataframe[out_col] = out 93 | 94 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 95 | 96 | return output_column_names 97 | -------------------------------------------------------------------------------- /thicket/stats/correlation_nodewise.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from scipy import stats 7 | 8 | from ..utils import verify_thicket_structures 9 | from .stats_utils import cache_stats_op 10 | 11 | 12 | @cache_stats_op 13 | def correlation_nodewise(thicket, column1=None, column2=None, correlation="pearson"): 14 | """Calculate the nodewise correlation for each node in the performance data table. 15 | 16 | Designed to take in a thicket, and append one or more columns to the aggregated 17 | statistics table for the nodewise correlation calculation for each node. 18 | 19 | Note: Resulting columns from correlation nodewise will currently not be appended 20 | to exc_metrics or inc_metrics until creating new data structure to store 21 | combined metrics (inclusive + exclusive). 22 | 23 | Arguments: 24 | thicket (thicket): Thicket object 25 | column1 (str): First comparison column. Note, if using a columnar joined thicket 26 | a tuple must be passed in with the format (column index, column name). 27 | column2 (str): Second comparison column. Note, if using a columnar joined 28 | thicket a tuple must be passed in with the format 29 | (column index, column name). 30 | correlation (str): correlation test to perform -- pearson (default), spearman, 31 | and kendall. 32 | 33 | Returns: 34 | (list): returns a list of output statsframe column names 35 | """ 36 | if column1 is None or column2 is None: 37 | raise ValueError( 38 | "To see a list of valid columns, run 'Thicket.performance_cols'." 39 | ) 40 | 41 | verify_thicket_structures( 42 | thicket.dataframe, index=["node"], columns=[column1, column2] 43 | ) 44 | 45 | output_column_names = [] 46 | 47 | # thicket object without columnar index 48 | if thicket.dataframe.columns.nlevels == 1: 49 | df = thicket.dataframe.reset_index().groupby("node") 50 | correlated = [] 51 | for node, item in df: 52 | if correlation == "pearson": 53 | correlated.append( 54 | stats.pearsonr( 55 | df.get_group(node)[column1], 56 | df.get_group(node)[column2], 57 | )[0] 58 | ) 59 | elif correlation == "spearman": 60 | correlated.append( 61 | stats.spearmanr( 62 | df.get_group(node)[column1], 63 | df.get_group(node)[column2], 64 | )[0] 65 | ) 66 | elif correlation == "kendall": 67 | correlated.append( 68 | stats.kendalltau( 69 | df.get_group(node)[column1], 70 | df.get_group(node)[column2], 71 | )[0] 72 | ) 73 | else: 74 | raise ValueError( 75 | "Invalid correlation, options are pearson, spearman, and kendall." 76 | ) 77 | thicket.statsframe.dataframe[ 78 | column1 + "_vs_" + column2 + " " + correlation 79 | ] = correlated 80 | output_column_names.append(column1 + "_vs_" + column2 + " " + correlation) 81 | # columnar joined thicket object 82 | else: 83 | df = thicket.dataframe.reset_index().groupby("node") 84 | correlated = [] 85 | for node, item in df: 86 | if correlation == "pearson": 87 | correlated.append( 88 | stats.pearsonr( 89 | df.get_group(node)[column1], 90 | df.get_group(node)[column2], 91 | )[0] 92 | ) 93 | elif correlation == "spearman": 94 | correlated.append( 95 | stats.spearmanr( 96 | df.get_group(node)[column1], 97 | df.get_group(node)[column2], 98 | )[0] 99 | ) 100 | elif correlation == "kendall": 101 | correlated.append( 102 | stats.kendalltau( 103 | df.get_group(node)[column1], 104 | df.get_group(node)[column2], 105 | )[0] 106 | ) 107 | else: 108 | raise ValueError( 109 | "Invalid correlation, options are pearson, spearman, and kendall." 110 | ) 111 | if column1[0] != column2[0]: 112 | column_name = ( 113 | "Union statistics", 114 | column1[1] + "_vs_" + column2[1] + " " + correlation, 115 | ) 116 | thicket.statsframe.dataframe[column_name] = correlated 117 | output_column_names.append(column_name) 118 | else: 119 | column_idx = column1[0] 120 | column_name = ( 121 | column_idx, 122 | column1[1] + "_vs_" + column2[1] + " " + correlation, 123 | ) 124 | thicket.statsframe.dataframe[column_name] = correlated 125 | output_column_names.append(column_name) 126 | 127 | # sort columns in index 128 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 129 | 130 | return output_column_names 131 | -------------------------------------------------------------------------------- /thicket/stats/display_boxplot.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import pandas as pd 7 | import seaborn as sns 8 | import hatchet as ht 9 | 10 | import thicket as th 11 | from ..utils import verify_thicket_structures 12 | 13 | 14 | def display_boxplot(thicket, nodes=None, columns=None, **kwargs): 15 | """Display a boxplot for each user passed node(s) and column(s). The passed nodes 16 | and columns must be from the performance data table. 17 | 18 | Designed to take in a thicket, and display a plot with one or more boxplots 19 | depending on the number of nodes and columns passed. 20 | 21 | Arguments: 22 | thicket (thicket): Thicket object 23 | nodes (list): List of nodes to view on the x-axis 24 | column (list): List of hardware/timing metrics to view on the y-axis. Note, if 25 | using a columnar joined thicket a list of tuples must be passed in with the 26 | format (column index, column name). 27 | 28 | Returns: 29 | (matplotlib Axes): Object for managing boxplot. 30 | """ 31 | if columns is None or nodes is None: 32 | raise ValueError( 33 | "Both 'nodes' and 'columns' must be provided. To see a list of valid columns, run 'Thicket.performance_cols'." 34 | ) 35 | if not isinstance(thicket, th.Thicket): 36 | raise ValueError( 37 | "Value passed to 'thicket' argument must be of type thicket.Thicket." 38 | ) 39 | if not isinstance(nodes, list): 40 | raise ValueError("Value passed to 'nodes' argument must be of type list.") 41 | if not isinstance(columns, list): 42 | raise ValueError("Value passed to 'columns' argument must be of type list.") 43 | for node in nodes: 44 | if not isinstance(node, ht.node.Node): 45 | raise ValueError( 46 | "Value(s) passed to node argument must be of type hatchet.node.Node." 47 | ) 48 | 49 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 50 | 51 | # thicket object without columnar index 52 | if thicket.dataframe.columns.nlevels == 1: 53 | df = pd.melt( 54 | thicket.dataframe.reset_index(), 55 | id_vars=["node", "name"], 56 | value_vars=columns, 57 | var_name="Performance counter", 58 | value_name=" ", 59 | ) 60 | 61 | position = [] 62 | for node in nodes: 63 | idx = df.index[df["node"] == node] 64 | for pos in idx: 65 | position.append(pos) 66 | 67 | # rename columns such that the x-axis label is "node" and not "name", tick marks 68 | # will be node names 69 | filtered_df = df.loc[position].rename( 70 | columns={"node": "hatchet node", "name": "node"} 71 | ) 72 | 73 | if len(columns) > 1: 74 | return sns.boxplot( 75 | data=filtered_df, x="node", y=" ", hue="Performance counter", **kwargs 76 | ) 77 | else: 78 | return sns.boxplot(data=filtered_df, x="node", y=" ", **kwargs) 79 | # columnar joined thicket object 80 | else: 81 | 82 | def column_name_mapper(current_cols): 83 | if current_cols[0] in ["node", "name"]: 84 | return current_cols[0] 85 | 86 | return str(current_cols) 87 | 88 | cols = [str(c) for c in columns] 89 | df_subset = thicket.dataframe[[("name", ""), *columns]].reset_index() 90 | df_subset.columns = df_subset.columns.to_flat_index().map(column_name_mapper) 91 | df_subset["name"] = thicket.dataframe["name"].tolist() 92 | 93 | df = pd.melt( 94 | df_subset, 95 | id_vars=["node", "name"], 96 | value_vars=cols, 97 | var_name="Performance counter", 98 | value_name=" ", 99 | ) 100 | 101 | position = [] 102 | for node in nodes: 103 | idx = df.index[df["node"] == node] 104 | for pos in idx: 105 | position.append(pos) 106 | 107 | # rename columns such that the x-axis label is "node" and not "name", tick marks 108 | # will be node names 109 | filtered_df = df.loc[position].rename( 110 | columns={"node": "hatchet node", "name": "node"} 111 | ) 112 | 113 | if len(columns) > 1: 114 | return sns.boxplot( 115 | data=filtered_df, x="node", y=" ", hue="Performance counter", **kwargs 116 | ) 117 | else: 118 | return sns.boxplot(data=filtered_df, x="node", y=" ", **kwargs) 119 | -------------------------------------------------------------------------------- /thicket/stats/display_heatmap.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import seaborn as sns 7 | 8 | import thicket as th 9 | from ..utils import verify_thicket_structures 10 | 11 | 12 | def display_heatmap(thicket, columns=None, **kwargs): 13 | """Display a heatmap which contains a full list of nodes and user passed columns. 14 | Columns must be from the aggregated statistics table. 15 | 16 | Arguments: 17 | thicket (thicket): Thicket object 18 | columns (list): List of hardware/timing metrics from aggregated statistics table 19 | to display. Note: if using a column thicket, the argument must be a tuple. 20 | 21 | Returns: 22 | (matplotlib Axes): Object for managing heatmap plot. 23 | """ 24 | if columns is None: 25 | raise ValueError( 26 | "Chosen columns must be from the thicket.statsframe.dataframe." 27 | ) 28 | if not isinstance(thicket, th.Thicket): 29 | raise ValueError( 30 | "Value passed to 'thicket' argument must be of type thicket.Thicket." 31 | ) 32 | if not isinstance(columns, list): 33 | raise ValueError("Value passed to 'columns' argument must be of type list.") 34 | 35 | verify_thicket_structures( 36 | thicket.statsframe.dataframe, index=["node"], columns=columns 37 | ) 38 | 39 | # thicket object without columnar index 40 | if thicket.dataframe.columns.nlevels == 1: 41 | thicket.statsframe.dataframe.index = thicket.statsframe.dataframe.index.map(str) 42 | ax = sns.heatmap(thicket.statsframe.dataframe[columns], **kwargs) 43 | 44 | return ax 45 | # columnar joined thicket object 46 | else: 47 | thicket.statsframe.dataframe.index = thicket.statsframe.dataframe.index.map(str) 48 | 49 | initial_idx = columns[0][0] 50 | cols = [columns[0][1]] 51 | for i in range(1, len(columns)): 52 | if initial_idx != columns[i][0]: 53 | raise ValueError( 54 | "Columns specified as tuples must have the same column index (first element)." 55 | ) 56 | else: 57 | cols.append(columns[i][1]) 58 | 59 | ax = sns.heatmap(thicket.statsframe.dataframe[initial_idx][cols], **kwargs) 60 | 61 | ax.set_title(initial_idx) 62 | 63 | return ax 64 | -------------------------------------------------------------------------------- /thicket/stats/display_histogram.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import hatchet as ht 7 | 8 | import thicket as th 9 | from ..utils import verify_thicket_structures 10 | 11 | 12 | def display_histogram(thicket, node=None, column=None, **kwargs): 13 | """Display a histogram for a user passed node and column. Node and column must come 14 | from the performance data table. 15 | 16 | Arguments: 17 | thicket (thicket): Thicket object 18 | node (node): Node object 19 | column (str): Column from performance data table. Note: if using a 20 | column thicket, the argument must be a tuple. 21 | 22 | Returns: 23 | (matplotlib.AxesSubplot or numpy.ndarray of them) 24 | """ 25 | 26 | if column is None or node is None: 27 | raise ValueError( 28 | "Both 'node' and 'column' must be provided. To see a list of valid columns, run 'Thicket.performance_cols'." 29 | ) 30 | if not isinstance(thicket, th.Thicket): 31 | raise ValueError( 32 | "Value passed to 'thicket' argument must be of type thicket.Thicket." 33 | ) 34 | if not isinstance(node, ht.node.Node): 35 | raise ValueError( 36 | "Value passed to 'node' argument must be of type hatchet.node.Node." 37 | ) 38 | if not isinstance(column, (str, tuple)): 39 | raise ValueError( 40 | "Value passed to column argument must be of type str (or tuple(str) for column thickets)." 41 | ) 42 | 43 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=[column]) 44 | 45 | # thicket object without columnar index 46 | if thicket.dataframe.columns.nlevels == 1: 47 | ax = thicket.dataframe.loc[node].hist(column=column, **kwargs) 48 | 49 | return ax 50 | # columnar joined thicket object 51 | else: 52 | ax = thicket.dataframe.loc[node].hist(column=column, **kwargs) 53 | 54 | return ax 55 | -------------------------------------------------------------------------------- /thicket/stats/maximum.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from ..utils import verify_thicket_structures 7 | from .stats_utils import cache_stats_op 8 | 9 | 10 | @cache_stats_op 11 | def maximum(thicket, columns=None): 12 | """Determine the maximum for each node in the performance data table. 13 | 14 | Designed to take in a thicket, and append one or more columns to the aggregated 15 | statistics table for the maximum value for each node. 16 | 17 | The maximum is the highest observation for a node and its associated profiles. 18 | 19 | Arguments: 20 | thicket (thicket): Thicket object 21 | columns (list): List of hardware/timing metrics to determine maximum value for. 22 | Note, if using a columnar joined thicket a list of tuples must be passed in 23 | with the format (column index, column name). 24 | 25 | Returns: 26 | (list): returns a list of output statsframe column names 27 | """ 28 | if columns is None: 29 | raise ValueError( 30 | "To see a list of valid columns, run 'Thicket.performance_cols'." 31 | ) 32 | 33 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 34 | 35 | output_column_names = [] 36 | 37 | # thicket object without columnar index 38 | if thicket.dataframe.columns.nlevels == 1: 39 | df = thicket.dataframe[columns].reset_index().groupby("node").agg(max) 40 | for column in columns: 41 | output_column_names.append(column + "_max") 42 | thicket.statsframe.dataframe[column + "_max"] = df[column] 43 | # check to see if exclusive metric 44 | if column in thicket.exc_metrics: 45 | thicket.statsframe.exc_metrics.append(column + "_max") 46 | # check to see if inclusive metric 47 | else: 48 | thicket.statsframe.inc_metrics.append(column + "_max") 49 | 50 | # columnar joined thicket object 51 | else: 52 | df = thicket.dataframe[columns].reset_index(level=1).groupby("node").agg(max) 53 | for idx, column in columns: 54 | output_column_names.append((idx, column + "_max")) 55 | thicket.statsframe.dataframe[(idx, column + "_max")] = df[(idx, column)] 56 | # check to see if exclusive metric 57 | if (idx, column) in thicket.exc_metrics: 58 | thicket.statsframe.exc_metrics.append((idx, column + "_max")) 59 | # check to see if inclusive metric 60 | else: 61 | thicket.statsframe.inc_metrics.append((idx, column + "_max")) 62 | 63 | # sort columns in index 64 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 65 | 66 | return output_column_names 67 | -------------------------------------------------------------------------------- /thicket/stats/mean.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import numpy as np 7 | 8 | from ..utils import verify_thicket_structures 9 | from .stats_utils import cache_stats_op 10 | 11 | 12 | @cache_stats_op 13 | def mean(thicket, columns=None): 14 | """Calculate the mean for each node in the performance data table. 15 | 16 | Designed to take in a thicket, and append one or more columns to the 17 | aggregated statistics table for the mean calculation for each node. 18 | 19 | Arguments: 20 | thicket (thicket): Thicket object 21 | columns (list): List of hardware/timing metrics to perform mean calculation on. 22 | Note, if using a columnar joined thicket a list of tuples must be passed in 23 | with the format (column index, column name). 24 | 25 | Returns: 26 | (list): returns a list of output statsframe column names 27 | """ 28 | if columns is None: 29 | raise ValueError( 30 | "To see a list of valid columns, run 'Thicket.performance_cols'." 31 | ) 32 | 33 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 34 | 35 | output_column_names = [] 36 | 37 | # thicket object without columnar index 38 | if thicket.dataframe.columns.nlevels == 1: 39 | df = thicket.dataframe[columns].reset_index().groupby("node").agg(np.mean) 40 | for column in columns: 41 | output_column_names.append(column + "_mean") 42 | thicket.statsframe.dataframe[column + "_mean"] = df[column] 43 | # check to see if exclusive metric 44 | if column in thicket.exc_metrics: 45 | thicket.statsframe.exc_metrics.append(column + "_mean") 46 | # check to see if inclusive metric 47 | else: 48 | thicket.statsframe.inc_metrics.append(column + "_mean") 49 | # columnar joined thicket object 50 | else: 51 | df = ( 52 | thicket.dataframe[columns].reset_index(level=1).groupby("node").agg(np.mean) 53 | ) 54 | for idx, column in columns: 55 | output_column_names.append((idx, column + "_mean")) 56 | thicket.statsframe.dataframe[(idx, column + "_mean")] = df[(idx, column)] 57 | # check to see if exclusive metric 58 | if (idx, column) in thicket.exc_metrics: 59 | thicket.statsframe.exc_metrics.append((idx, column + "_mean")) 60 | # check to see if inclusive metric 61 | else: 62 | thicket.statsframe.inc_metrics.append((idx, column + "_mean")) 63 | 64 | # sort columns in index 65 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 66 | 67 | return output_column_names 68 | -------------------------------------------------------------------------------- /thicket/stats/median.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import numpy as np 7 | 8 | from ..utils import verify_thicket_structures 9 | from .stats_utils import cache_stats_op 10 | 11 | 12 | @cache_stats_op 13 | def median(thicket, columns=None): 14 | """Calculate the median for each node in the performance data table. 15 | 16 | Designed to take in a thicket, and append one or more columns to the 17 | aggregated statistics table for the median calculation for each node. 18 | 19 | Arguments: 20 | thicket (thicket): Thicket object 21 | columns (list): List of hardware/timing metrics to perform median calculation 22 | on. Note, if using a columnar joined thicket a list of tuples must be passed 23 | in with the format (column index, column name). 24 | 25 | Returns: 26 | (list): returns a list of output statsframe column names 27 | """ 28 | if columns is None: 29 | raise ValueError( 30 | "To see a list of valid columns, run 'Thicket.performance_cols'." 31 | ) 32 | 33 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 34 | 35 | output_column_names = [] 36 | 37 | # thicket object without columnar index 38 | if thicket.dataframe.columns.nlevels == 1: 39 | df = thicket.dataframe[columns].reset_index().groupby("node").agg(np.median) 40 | for column in columns: 41 | output_column_names.append(column + "_median") 42 | thicket.statsframe.dataframe[column + "_median"] = df[column] 43 | # check to see if exclusive metric 44 | if column in thicket.exc_metrics: 45 | thicket.statsframe.exc_metrics.append(column + "_median") 46 | # check to see if inclusive metric 47 | else: 48 | thicket.statsframe.inc_metrics.append(column + "_median") 49 | 50 | # columnar joined thicket object 51 | else: 52 | df = ( 53 | thicket.dataframe[columns] 54 | .reset_index(level=1) 55 | .groupby("node") 56 | .agg(np.median) 57 | ) 58 | for idx, column in columns: 59 | output_column_names.append(str((idx, column + "_median"))) 60 | thicket.statsframe.dataframe[(idx, column + "_median")] = df[(idx, column)] 61 | # check to see if exclusive metric 62 | if (idx, column) in thicket.exc_metrics: 63 | thicket.statsframe.exc_metrics.append((idx, column + "_median")) 64 | # check to see if inclusive metric 65 | else: 66 | thicket.statsframe.inc_metrics.append((idx, column + "_median")) 67 | 68 | # sort columns in index 69 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 70 | 71 | return output_column_names 72 | -------------------------------------------------------------------------------- /thicket/stats/minimum.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from ..utils import verify_thicket_structures 7 | from .stats_utils import cache_stats_op 8 | 9 | 10 | @cache_stats_op 11 | def minimum(thicket, columns=None): 12 | """Determine the minimum for each node in the performance data table. 13 | 14 | Designed to take in a thicket, and append one or more columns to the aggregated 15 | statistics table for the minimum value for each node. 16 | 17 | The minimum is the lowest observation for a node and its associated profiles. 18 | 19 | Arguments: 20 | thicket (thicket): Thicket object 21 | columns (list): List of hardware/timing metrics to determine minimum value for. 22 | Note, if using a columnar joined thicket a list of tuples must be passed in 23 | with the format (column index, column name). 24 | 25 | Returns: 26 | (list): returns a list of output statsframe column names 27 | """ 28 | if columns is None: 29 | raise ValueError( 30 | "To see a list of valid columns, run 'Thicket.performance_cols'." 31 | ) 32 | 33 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 34 | 35 | output_column_names = [] 36 | 37 | # thicket object without columnar index 38 | if thicket.dataframe.columns.nlevels == 1: 39 | df = thicket.dataframe[columns].reset_index().groupby("node").agg(min) 40 | for column in columns: 41 | output_column_names.append(column + "_min") 42 | thicket.statsframe.dataframe[column + "_min"] = df[column] 43 | # check to see if exclusive metric 44 | if column in thicket.exc_metrics: 45 | thicket.statsframe.exc_metrics.append(column + "_min") 46 | # check to see if inclusive metric 47 | else: 48 | thicket.statsframe.inc_metrics.append(column + "_min") 49 | # columnar joined thicket object 50 | else: 51 | df = thicket.dataframe[columns].reset_index(level=1).groupby("node").agg(min) 52 | for idx, column in columns: 53 | output_column_names.append((idx, column + "_min")) 54 | thicket.statsframe.dataframe[(idx, column + "_min")] = df[(idx, column)] 55 | # check to see if exclusive metric 56 | if (idx, column) in thicket.exc_metrics: 57 | thicket.statsframe.exc_metrics.append((idx, column + "_min")) 58 | # check to see if inclusive metric 59 | else: 60 | thicket.statsframe.inc_metrics.append((idx, column + "_min")) 61 | 62 | # sort columns in index 63 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 64 | 65 | return output_column_names 66 | -------------------------------------------------------------------------------- /thicket/stats/percentiles.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import pandas as pd 7 | 8 | from ..utils import verify_thicket_structures 9 | from .stats_utils import cache_stats_op 10 | 11 | 12 | @cache_stats_op 13 | def percentiles(thicket, columns=None, percentiles=[0.25, 0.50, 0.75]): 14 | """Calculate the q-th percentile for each node in the performance data table. 15 | 16 | Designed to take in a thicket, and append one or more columns to the aggregated 17 | statistics table for the q-th percentile calculation for each node. Each percentile 18 | calculation is a separate column in the statistics table, where the column will 19 | have the format: {columnName}_percentiles_{percentile}. 20 | 21 | The 25th percentile is the lower quartile, and is the value at which 25% of the 22 | answers lie below that value. 23 | 24 | The 50th percentile, is the median and half of the values lie below the median and 25 | half lie above the median. 26 | 27 | The 75th percentile is the upper quartile, and is the value at which 25% of the 28 | answers lie above that value and 75% of the answers lie below that value. 29 | 30 | Arguments: 31 | thicket (thicket): Thicket object 32 | columns (list): List of hardware/timing metrics to perform percentile 33 | calculation on. Note if using a columnar joined thicket a list of tuples 34 | must be passed in with the format (column index, column name). 35 | percentiles (list): List of percentile values that is desired to be calculated 36 | for each column in columns. If no list is specified, the default values, 37 | [0.25, 0.50, 0.75] are used for calculations 38 | 39 | Returns: 40 | (list): returns a list of output statsframe column names 41 | """ 42 | if not percentiles: 43 | percentiles = [0.25, 0.50, 0.75] 44 | 45 | # Enforce that percentiles are in range of [0.0, 1.0] 46 | for percentile in percentiles: 47 | if percentile < 0.0 or percentile > 1.0: 48 | raise ValueError( 49 | "Percentile {} is out of range of [0.0, 1.0]".format(percentile) 50 | ) 51 | 52 | if columns is None: 53 | raise ValueError( 54 | "To see a list of valid columns, run 'Thicket.performance_cols'." 55 | ) 56 | 57 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 58 | 59 | output_column_names = [] 60 | 61 | # select numeric columns within thicket (.quantiles) will not work without this step 62 | numerics = ["int16", "int32", "int64", "float16", "float32", "float64"] 63 | 64 | # thicket object without columnar index 65 | if thicket.dataframe.columns.nlevels == 1: 66 | df_num = thicket.dataframe.select_dtypes(include=numerics)[columns] 67 | df = df_num.reset_index().groupby("node").quantile(percentiles) 68 | for column in columns: 69 | calculated_percentiles = [] 70 | for node in pd.unique(df.reset_index()["node"].tolist()): 71 | calculated_percentiles.append(list(df.loc[node][column])) 72 | 73 | for index, percentile in enumerate(percentiles): 74 | column_to_append = column + "_percentiles_" + str(int(percentile * 100)) 75 | output_column_names.append(column_to_append) 76 | thicket.statsframe.dataframe[column_to_append] = [ 77 | x[index] for x in calculated_percentiles 78 | ] 79 | 80 | # check to see if exclusive metric and that the metric is not already in the metrics list 81 | if ( 82 | column in thicket.exc_metrics 83 | and column_to_append not in thicket.statsframe.exc_metrics 84 | ): 85 | thicket.statsframe.exc_metrics.append(column_to_append) 86 | # check inclusive metrics 87 | elif ( 88 | column in thicket.inc_metrics 89 | and column_to_append not in thicket.statsframe.inc_metrics 90 | ): 91 | thicket.statsframe.inc_metrics.append(column_to_append) 92 | 93 | # columnar joined thicket object 94 | else: 95 | df_num = thicket.dataframe.select_dtypes(include=numerics)[columns] 96 | df = df_num.reset_index(level=1).groupby("node").quantile(percentiles) 97 | 98 | for idx_level, column in columns: 99 | calculated_percentiles = [] 100 | 101 | # Get all the calculated values into a list for each node 102 | for node in pd.unique(df.reset_index()["node"].tolist()): 103 | calculated_percentiles.append(list(df.loc[node][(idx_level, column)])) 104 | 105 | # Go through each of the percentiles, and make them it's own column 106 | for index, percentile in enumerate(percentiles): 107 | column_to_append = ( 108 | idx_level, 109 | "{}_percentiles_{}".format(column, str(int(percentile * 100))), 110 | ) 111 | output_column_names.append(column_to_append) 112 | thicket.statsframe.dataframe[column_to_append] = [ 113 | x[index] for x in calculated_percentiles 114 | ] 115 | 116 | # check to see if exclusive metric 117 | if ( 118 | (idx_level, column) in thicket.exc_metrics 119 | and column_to_append not in thicket.statsframe.exc_metrics 120 | ): 121 | thicket.statsframe.exc_metrics.append(column_to_append) 122 | # check to see if inclusive metric 123 | elif ( 124 | (idx_level, column) in thicket.inc_metrics 125 | and column_to_append not in thicket.statsframe.inc_metrics 126 | ): 127 | thicket.statsframe.inc_metrics.append(column_to_append) 128 | 129 | # sort columns in index 130 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 131 | 132 | return output_column_names 133 | -------------------------------------------------------------------------------- /thicket/stats/preference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from ..utils import verify_thicket_structures 7 | from .stats_utils import cache_stats_op 8 | from .ttest import __ttest 9 | 10 | __statistical_tests = {"ttest": __ttest} 11 | 12 | 13 | @cache_stats_op 14 | def preference(thicket, columns, comparison_func, *args, test="ttest", **kwargs): 15 | """Determine a preference between compilers, architecture, platform, etc. 16 | 17 | Designed to take in a thicket and will append eight total columns to the 18 | aggregated statistics table. As a note, preferred will stand for the preferred 19 | choice between two options. 20 | - _mean 21 | - _std 22 | - _mean 23 | - _std 24 | - vs _tvalue 25 | - vs _tstatistic 26 | - vs _std_preferred 27 | - vs _mean_preferred 28 | 29 | Arguments: 30 | thicket (thicket): Thicket object 31 | columns (list): List of hardware/timing metrics to determine a preference for. 32 | Note, if using a columnar joined thicket a list of tuples must be passed in 33 | with the format (column index, column name). List should be length 2. 34 | comparison_func (function): User-defined python or lambda function to decide a 35 | preference. 36 | test (str): User-selected test. 37 | 38 | Returns: 39 | (list): returns a list of output statsframe column names 40 | """ 41 | if len(columns) != 2: 42 | raise ValueError("Must specify 2 columns in columns=.") 43 | 44 | if test not in __statistical_tests.keys(): 45 | raise ValueError("Test is not available.") 46 | 47 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 48 | 49 | output_column_names = [] 50 | 51 | if test == "ttest": 52 | tvalue, t_statistics = __statistical_tests[test]( 53 | thicket, columns, *args, **kwargs 54 | ) 55 | 56 | pref_mean = [] 57 | pref_std = [] 58 | 59 | # thicket object without columnar index 60 | if thicket.dataframe.columns.nlevels == 1: 61 | for i, t_statistic in enumerate(t_statistics): 62 | if t_statistic < -1 * tvalue or t_statistic > tvalue: 63 | pref_mean.append( 64 | comparison_func( 65 | thicket.statsframe.dataframe[columns[0] + "_mean"][i], 66 | thicket.statsframe.dataframe[columns[1] + "_mean"][i], 67 | ) 68 | ) 69 | pref_std.append( 70 | comparison_func( 71 | thicket.statsframe.dataframe[columns[0] + "_std"][i], 72 | thicket.statsframe.dataframe[columns[1] + "_std"][i], 73 | ) 74 | ) 75 | else: 76 | pref_mean.append("No preference") 77 | pref_std.append("No preference") 78 | aggregated_cols = columns[0] + " vs " + columns[1] 79 | thicket.statsframe.dataframe[aggregated_cols + "_std_preferred"] = pref_std 80 | thicket.statsframe.dataframe[aggregated_cols + "_mean_preferred"] = pref_mean 81 | 82 | output_column_names.append(aggregated_cols + "_std_preferred") 83 | output_column_names.append(aggregated_cols + "_mean_preferred") 84 | # columnar joined thicket object 85 | else: 86 | idx_mean = [(index, col + "_mean") for index, col in columns] 87 | idx_std = [(index, col + "_std") for index, col in columns] 88 | for i, t_statistic in enumerate(t_statistics): 89 | if t_statistic < -1 * tvalue or t_statistic > tvalue: 90 | pref_mean.append( 91 | comparison_func( 92 | thicket.statsframe.dataframe[idx_mean[0]][i], 93 | thicket.statsframe.dataframe[idx_mean[1]][i], 94 | ) 95 | ) 96 | pref_std.append( 97 | comparison_func( 98 | thicket.statsframe.dataframe[idx_std[0]][i], 99 | thicket.statsframe.dataframe[idx_std[1]][i], 100 | ) 101 | ) 102 | else: 103 | pref_mean.append("No preference") 104 | pref_std.append("No preference") 105 | 106 | aggregated_cols = ( 107 | str(columns[0]).replace("'", "") + " vs " + str(columns[1]).replace("'", "") 108 | ) 109 | 110 | col_name = ["Preference", aggregated_cols] 111 | thicket.statsframe.dataframe[ 112 | (col_name[0], col_name[1] + "_std_preferred") 113 | ] = pref_std 114 | thicket.statsframe.dataframe[ 115 | (col_name[0], col_name[1] + "_mean_preferred") 116 | ] = pref_mean 117 | 118 | output_column_names.append((col_name[0], col_name[1] + "_std_preferred")) 119 | output_column_names.append((col_name[0], col_name[1] + "_mean_preferred")) 120 | 121 | return output_column_names 122 | -------------------------------------------------------------------------------- /thicket/stats/stats_utils.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | 4 | def cache_stats_op(func): 5 | """Python decorator that handles insertion of stats operations in the thicket statsframe_ops_cache.""" 6 | 7 | @wraps(func) 8 | def wrapper(thicket, *args, **kwargs): 9 | output_columns = func(thicket, *args, **kwargs) 10 | if func not in thicket.statsframe_ops_cache: 11 | thicket.statsframe_ops_cache[func] = {} 12 | 13 | for column in output_columns: 14 | thicket.statsframe_ops_cache[func][column] = (args, kwargs) 15 | return output_columns 16 | 17 | return wrapper 18 | -------------------------------------------------------------------------------- /thicket/stats/std.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import numpy as np 7 | 8 | from ..utils import verify_thicket_structures 9 | from .stats_utils import cache_stats_op 10 | 11 | 12 | @cache_stats_op 13 | def std(thicket, columns=None): 14 | """Calculate the standard deviation for each node in the performance data table. 15 | 16 | Designed to take in a thicket, and append one or more columns to the aggregated 17 | statistics table for the standard deviation calculation for each node. 18 | 19 | Standard deviation describes how dispersed the data is in relation to the mean. 20 | 21 | Arguments: 22 | thicket (thicket): Thicket object 23 | columns (list): List of hardware/timing metrics to perform standard deviation 24 | calculation on. Note, if using a columnar_joined thicket a list of tuples 25 | must be passed in with the format (column index, column name). 26 | 27 | Returns: 28 | (list): returns a list of output statsframe column names 29 | """ 30 | if columns is None: 31 | raise ValueError( 32 | "To see a list of valid columns, run 'Thicket.performance_cols'." 33 | ) 34 | 35 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 36 | 37 | output_column_names = [] 38 | 39 | # thicket object without columnar index 40 | if thicket.dataframe.columns.nlevels == 1: 41 | df = thicket.dataframe[columns].reset_index().groupby("node").agg(np.std) 42 | for column in columns: 43 | output_column_names.append(column + "_std") 44 | thicket.statsframe.dataframe[column + "_std"] = df[column] 45 | # check to see if exclusive metric 46 | if column in thicket.exc_metrics: 47 | thicket.statsframe.exc_metrics.append(column + "_std") 48 | # check to see if inclusive metric 49 | else: 50 | thicket.statsframe.inc_metrics.append(column + "_std") 51 | # columnar joined thicket object 52 | else: 53 | df = thicket.dataframe[columns].reset_index(level=1).groupby("node").agg(np.std) 54 | for idx, column in columns: 55 | output_column_names.append((idx, column + "_std")) 56 | thicket.statsframe.dataframe[(idx, column + "_std")] = df[(idx, column)] 57 | # check to see if exclusive metric 58 | if (idx, column) in thicket.exc_metrics: 59 | thicket.statsframe.exc_metrics.append((idx, column + "_std")) 60 | # check to see if inclusive metric 61 | else: 62 | thicket.statsframe.inc_metrics.append((idx, column + "_std")) 63 | 64 | # sort columns in index 65 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 66 | 67 | return output_column_names 68 | -------------------------------------------------------------------------------- /thicket/stats/sum.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import numpy as np 7 | 8 | from ..utils import verify_thicket_structures 9 | from .stats_utils import cache_stats_op 10 | 11 | 12 | @cache_stats_op 13 | def sum(thicket, columns=None): 14 | """Calculate the sum for each node in the performance data table. 15 | 16 | Designed to take in a thicket, and append one or more columns to the 17 | aggregated statistics table for the sum calculation for each node. 18 | 19 | Arguments: 20 | thicket (thicket): Thicket object 21 | columns (list): List of hardware/timing metrics to perform sum calculation on. 22 | Note, if using a columnar joined thicket a list of tuples must be passed in 23 | with the format (column index, column name). 24 | 25 | Returns: 26 | (list): returns a list of output statsframe column names 27 | """ 28 | if columns is None: 29 | raise ValueError( 30 | "To see a list of valid columns, run 'Thicket.performance_cols'." 31 | ) 32 | 33 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 34 | 35 | output_column_names = [] 36 | 37 | # thicket object without columnar index 38 | if thicket.dataframe.columns.nlevels == 1: 39 | df = thicket.dataframe[columns].reset_index().groupby("node").agg(np.sum) 40 | for column in columns: 41 | output_column_names.append(column + "_sum") 42 | thicket.statsframe.dataframe[column + "_sum"] = df[column] 43 | # check to see if exclusive metric 44 | if column in thicket.exc_metrics: 45 | thicket.statsframe.exc_metrics.append(column + "_sum") 46 | # check to see if inclusive metric 47 | else: 48 | thicket.statsframe.inc_metrics.append(column + "_sum") 49 | # columnar joined thicket object 50 | else: 51 | df = thicket.dataframe[columns].reset_index(level=1).groupby("node").agg(np.sum) 52 | for idx, column in columns: 53 | output_column_names.append((idx, column + "_sum")) 54 | thicket.statsframe.dataframe[(idx, column + "_sum")] = df[(idx, column)] 55 | # check to see if exclusive metric 56 | if (idx, column) in thicket.exc_metrics: 57 | thicket.statsframe.exc_metrics.append((idx, column + "_sum")) 58 | # check to see if inclusive metric 59 | else: 60 | thicket.statsframe.inc_metrics.append((idx, column + "_sum")) 61 | 62 | # sort columns in index 63 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 64 | 65 | return output_column_names 66 | -------------------------------------------------------------------------------- /thicket/stats/ttest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import pandas as pd 7 | from scipy.stats import ttest_ind_from_stats 8 | from scipy.stats import t 9 | 10 | import thicket as th 11 | 12 | 13 | def __ttest(thicket, columns, alpha=0.05, *args, **kwargs): 14 | """Perform a ttest on a user-selected thicket and columns. 15 | 16 | Designed to take in a thicket and two columns. For this private function a tvalue 17 | and list of tstatistics will be returned to preference.py. 18 | 19 | Arguments: 20 | thicket (thicket): Thicket object 21 | columns (list): List of hardware/timing metrics to determine a preference for. 22 | Note, if using a columnar joined thicket a list of tuples must be passed in 23 | with the format (column index, column name). 24 | alpha (double): Threshold for statistical significance. Value must be between 0 25 | and 1. Default is 0.05. 26 | 27 | Returns: 28 | tvalue (double): Value to be used to determine a preference within preference.py. 29 | tstatistics (list): List of values to be used to determine a preference within 30 | preference.py. 31 | """ 32 | # check to see if alpha value is between 0 and 1 33 | if alpha <= 0 or alpha >= 1: 34 | raise ValueError("Value for alpha argument must be between 0 and 1.") 35 | 36 | # check columns contain two columns 37 | if len(columns) != 2: 38 | raise ValueError("Columns must be a list of length 2.") 39 | 40 | n = pd.unique(thicket.dataframe.reset_index()["node"])[0] 41 | 42 | # nobs for parameter one for ttest 43 | nobs_column1 = len(thicket.dataframe.loc[n][columns[0]]) 44 | # nobs for parameter two for ttest 45 | nobs_column2 = len(thicket.dataframe.loc[n][columns[0]]) 46 | 47 | # subtract by len(columns) due to estimating a t-test with two parameters 48 | # alpha/2 is done for a two tail t-test 49 | tvalue = t.ppf(q=1 - alpha / 2, df=nobs_column1 + nobs_column2 - len(columns)) 50 | 51 | th.stats.mean(thicket, columns) 52 | th.stats.std(thicket, columns) 53 | 54 | # thicket object with columnar index 55 | if thicket.dataframe.columns.nlevels > 1: 56 | mean_columns = [(idx, col + "_mean") for idx, col in columns] 57 | std_columns = [(idx, col + "_std") for idx, col in columns] 58 | t_statistics = [] 59 | for i in range(0, len(thicket.statsframe.dataframe)): 60 | tStatistic = ttest_ind_from_stats( 61 | mean1=thicket.statsframe.dataframe[mean_columns[0]][i], 62 | std1=thicket.statsframe.dataframe[std_columns[0]][i], 63 | nobs1=nobs_column1, 64 | mean2=thicket.statsframe.dataframe[mean_columns[1]][i], 65 | std2=thicket.statsframe.dataframe[std_columns[1]][i], 66 | nobs2=nobs_column2, 67 | equal_var=False, 68 | ) 69 | 70 | t_statistics.append(tStatistic.statistic) 71 | 72 | # store results into thicket's aggregated statistics table 73 | aggregated_cols = ( 74 | str(columns[0]).replace("'", "") + " vs " + str(columns[1]).replace("'", "") 75 | ) 76 | 77 | thicket.statsframe.dataframe[ 78 | ( 79 | "Preference", 80 | aggregated_cols + "_tvalue", 81 | ) 82 | ] = tvalue 83 | thicket.statsframe.dataframe[ 84 | ( 85 | "Preference", 86 | aggregated_cols + "_tstatistic", 87 | ) 88 | ] = t_statistics 89 | 90 | return tvalue, t_statistics 91 | # thicket object without columnar index 92 | else: 93 | # gather mean and std columns 94 | mean_columns = [col + "_mean" for col in columns] 95 | std_columns = [col + "_std" for col in columns] 96 | t_statistics = [] 97 | for i in range(0, len(thicket.statsframe.dataframe)): 98 | tStatistic = ttest_ind_from_stats( 99 | mean1=thicket.statsframe.dataframe[mean_columns[0]][i], 100 | std1=thicket.statsframe.dataframe[std_columns[0]][i], 101 | nobs1=nobs_column1, 102 | mean2=thicket.statsframe.dataframe[mean_columns[1]][i], 103 | std2=thicket.statsframe.dataframe[std_columns[1]][i], 104 | nobs2=nobs_column2, 105 | equal_var=False, 106 | ) 107 | 108 | t_statistics.append(tStatistic.statistic) 109 | 110 | # store results into thicket's aggregated statistics table 111 | aggregated_cols = columns[0] + " vs " + columns[1] 112 | 113 | thicket.statsframe.dataframe[aggregated_cols + "_tvalue"] = tvalue 114 | thicket.statsframe.dataframe[aggregated_cols + "_tstatistic"] = t_statistics 115 | 116 | return tvalue, t_statistics 117 | -------------------------------------------------------------------------------- /thicket/stats/variance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import numpy as np 7 | 8 | from ..utils import verify_thicket_structures 9 | from .stats_utils import cache_stats_op 10 | 11 | 12 | @cache_stats_op 13 | def variance(thicket, columns=None): 14 | """Calculate the variance for each node in the performance data table. 15 | 16 | Designed to take in a thicket, and append one or more columns to the aggregated 17 | statistics table for the variance calculation for each node. 18 | 19 | Variance will allow you to see the spread of data within a node and that nodes 20 | profiles. 21 | 22 | Arguments: 23 | thicket (thicket): Thicket object 24 | columns (list): List of hardware/timing metrics to perform variance calculation 25 | on. Note, if using a columnar_joined thicket a list of tuples must be passed 26 | in with the format (column index, column name). 27 | 28 | Returns: 29 | (list): returns a list of output statsframe column names 30 | """ 31 | if columns is None: 32 | raise ValueError( 33 | "To see a list of valid columns, run 'Thicket.performance_cols'." 34 | ) 35 | 36 | verify_thicket_structures(thicket.dataframe, index=["node"], columns=columns) 37 | 38 | output_column_names = [] 39 | 40 | # thicket object without columnar index 41 | if thicket.dataframe.columns.nlevels == 1: 42 | df = thicket.dataframe[columns].reset_index().groupby("node").agg(np.var) 43 | for column in columns: 44 | output_column_names.append(column + "_var") 45 | thicket.statsframe.dataframe[column + "_var"] = df[column] 46 | # check to see if exclusive metric 47 | if column in thicket.exc_metrics: 48 | thicket.statsframe.exc_metrics.append(column + "_var") 49 | # check to see if inclusive metric 50 | else: 51 | thicket.statsframe.inc_metrics.append(column + "_var") 52 | # columnar joined thicket object 53 | else: 54 | df = thicket.dataframe[columns].reset_index(level=1).groupby("node").agg(np.var) 55 | for idx, column in columns: 56 | output_column_names.append((idx, column + "_var")) 57 | thicket.statsframe.dataframe[(idx, column + "_var")] = df[(idx, column)] 58 | # check to see if exclusive metric 59 | if (idx, column) in thicket.exc_metrics: 60 | thicket.statsframe.exc_metrics.append((idx, column + "_var")) 61 | # check to see if inclusive metric 62 | else: 63 | thicket.statsframe.inc_metrics.append((idx, column + "_var")) 64 | 65 | # sort columns in index 66 | thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1) 67 | 68 | return output_column_names 69 | -------------------------------------------------------------------------------- /thicket/tests/data/example-timeseries/cxx.cali: -------------------------------------------------------------------------------- 1 | __rec=node,id=21,attr=10,data=1612,parent=3 2 | __rec=node,id=92,attr=8,data=caliper.config,parent=21 3 | __rec=node,id=89,attr=10,data=1612,parent=1 4 | __rec=node,id=90,attr=8,data=iterations,parent=89 5 | __rec=node,id=22,attr=8,data=cali.caliper.version,parent=21 6 | __rec=node,id=23,attr=22,data=2.10.0-dev 7 | __rec=node,id=91,attr=90,data=100,parent=23 8 | __rec=node,id=93,attr=92,data=,parent=91 9 | __rec=node,id=30,attr=10,data=85,parent=5 10 | __rec=node,id=67,attr=8,data=min#time.duration.ns,parent=30 11 | __rec=node,id=68,attr=8,data=max#time.duration.ns,parent=30 12 | __rec=node,id=69,attr=8,data=sum#time.duration.ns,parent=30 13 | __rec=node,id=70,attr=8,data=avg#time.duration.ns,parent=30 14 | __rec=node,id=32,attr=10,data=85,parent=2 15 | __rec=node,id=85,attr=8,data=count,parent=32 16 | __rec=node,id=86,attr=8,data=aggregate.slot,parent=32 17 | __rec=node,id=26,attr=10,data=2133,parent=1 18 | __rec=node,id=27,attr=8,data=loop.iterations,parent=26 19 | __rec=node,id=31,attr=8,data=timeseries.starttime,parent=30 20 | __rec=node,id=33,attr=8,data=timeseries.snapshot,parent=32 21 | __rec=ctx,ref=93,attr=67=68=69=70=85=86=27=31=33,data=448713.000000=448713.000000=448713.000000=448713.000000=1=0=0=1689118709.135139=0 22 | __rec=node,id=18,attr=10,data=276,parent=3 23 | __rec=node,id=20,attr=8,data=region,parent=18 24 | __rec=node,id=94,attr=20,data=main 25 | __rec=ctx,ref=94=93,attr=67=68=69=70=85=86=27=31=33,data=59851.000000=59851.000000=59851.000000=59851.000000=1=1=0=1689118709.135139=0 26 | __rec=node,id=95,attr=20,data=init,parent=94 27 | __rec=node,id=42,attr=8,data=min#region.count,parent=30 28 | __rec=node,id=43,attr=8,data=max#region.count,parent=30 29 | __rec=node,id=44,attr=8,data=sum#region.count,parent=30 30 | __rec=node,id=45,attr=8,data=avg#region.count,parent=30 31 | __rec=ctx,ref=95=93,attr=42=43=44=45=67=68=69=70=85=86=27=31=33,data=1.000000=1.000000=1.000000=1.000000=12219.000000=12219.000000=12219.000000=12219.000000=1=2=0=1689118709.135139=0 32 | __rec=node,id=34,attr=10,data=2133,parent=5 33 | __rec=node,id=35,attr=8,data=timeseries.duration,parent=34 34 | __rec=ctx,ref=94=93,attr=27=31=33=35=31,data=0=1689118709.135139=0=0.001398=1689118709.136537 35 | __rec=node,id=28,attr=10,data=85,parent=1 36 | __rec=node,id=29,attr=8,data=loop.start_iteration,parent=28 37 | __rec=ctx,ref=94=93,attr=67=68=69=70=85=86=27=29=31=33,data=200739.000000=200739.000000=200739.000000=200739.000000=1=0=20=0=1689118709.136537=1 38 | __rec=node,id=19,attr=8,data=loop,parent=18 39 | __rec=node,id=103,attr=19,data=mainloop,parent=94 40 | __rec=ctx,ref=103=93,attr=42=43=44=45=67=68=69=70=85=86=27=29=31=33,data=1.000000=1.000000=19.000000=1.000000=3306.000000=13928.000000=282447.000000=4787.237288=59=1=20=0=1689118709.136537=1 41 | __rec=node,id=104,attr=20,data=foo,parent=103 42 | __rec=ctx,ref=104=93,attr=42=43=44=45=67=68=69=70=85=86=27=29=31=33,data=1.000000=1.000000=20.000000=1.000000=141457.000000=219005.000000=3281358.000000=164067.900000=20=2=20=0=1689118709.136537=1 43 | __rec=node,id=12,attr=10,data=84,parent=6 44 | __rec=node,id=16,attr=8,data=class.iteration,parent=12 45 | __rec=node,id=96,attr=16,data=true,parent=1 46 | __rec=node,id=97,attr=10,data=21,parent=96 47 | __rec=node,id=98,attr=8,data=iteration#mainloop,parent=97 48 | __rec=ctx,ref=103=93,attr=27=29=31=33=35=31=98,data=20=0=1689118709.136537=1=0.003734=1689118709.140271=19 49 | __rec=ctx,ref=103=93,attr=42=43=44=45=67=68=69=70=85=86=27=29=31=33,data=1.000000=1.000000=20.000000=1.000000=3238.000000=83328.000000=357604.000000=5960.066667=60=0=20=20=1689118709.140271=2 50 | __rec=ctx,ref=104=93,attr=42=43=44=45=67=68=69=70=85=86=27=29=31=33,data=1.000000=1.000000=20.000000=1.000000=159474.000000=209917.000000=3364016.000000=168200.800000=20=1=20=20=1689118709.140271=2 51 | __rec=ctx,ref=103=93,attr=27=29=31=33=35=31=98,data=20=20=1689118709.140271=2=0.003722=1689118709.143993=39 52 | __rec=ctx,ref=103=93,attr=42=43=44=45=67=68=69=70=85=86=27=29=31=33,data=1.000000=1.000000=20.000000=1.000000=3213.000000=84229.000000=402162.000000=6702.700000=60=0=20=40=1689118709.143993=3 53 | __rec=ctx,ref=104=93,attr=42=43=44=45=67=68=69=70=85=86=27=29=31=33,data=1.000000=1.000000=20.000000=1.000000=159545.000000=335990.000000=3522373.000000=176118.650000=20=1=20=40=1689118709.143993=3 54 | __rec=ctx,ref=103=93,attr=27=29=31=33=35=31=98,data=20=40=1689118709.143993=3=0.003928=1689118709.147921=59 55 | __rec=ctx,ref=103=93,attr=42=43=44=45=67=68=69=70=85=86=27=29=31=33,data=1.000000=1.000000=20.000000=1.000000=3342.000000=96083.000000=348946.000000=5815.766667=60=0=20=60=1689118709.147921=4 56 | __rec=ctx,ref=104=93,attr=42=43=44=45=67=68=69=70=85=86=27=29=31=33,data=1.000000=1.000000=20.000000=1.000000=136880.000000=173199.000000=3203448.000000=160172.400000=20=1=20=60=1689118709.147921=4 57 | __rec=ctx,ref=103=93,attr=27=29=31=33=35=31=98,data=20=60=1689118709.147921=4=0.003560=1689118709.151481=79 58 | __rec=ctx,ref=103=93,attr=42=43=44=45=67=68=69=70=85=86=27=29=31=33,data=1.000000=1.000000=20.000000=1.000000=3308.000000=112210.000000=407532.000000=6792.200000=60=0=20=80=1689118709.151481=5 59 | __rec=ctx,ref=104=93,attr=42=43=44=45=67=68=69=70=85=86=27=29=31=33,data=1.000000=1.000000=20.000000=1.000000=156252.000000=715191.000000=4120824.000000=206041.200000=20=1=20=80=1689118709.151481=5 60 | __rec=ctx,ref=103=93,attr=27=29=31=33=35=31=98,data=20=80=1689118709.151481=5=0.004522=1689118709.156003=99 61 | __rec=ctx,ref=103=93,attr=42=43=44=45=67=68=69=70=85=86=27=31=33,data=1.000000=1.000000=1.000000=1.000000=114486.000000=114486.000000=114486.000000=114486.000000=1=0=0=1689118709.156003=6 62 | __rec=ctx,ref=103=93,attr=27=31=33=35=31,data=0=1689118709.156003=6=0.000119=1689118709.156122 63 | __rec=node,id=24,attr=8,data=cali.channel,parent=21 64 | __rec=node,id=25,attr=24,data=default 65 | __rec=globals,ref=93=25 66 | -------------------------------------------------------------------------------- /thicket/tests/data/example-timeseries/mem_power_timeseries.cali: -------------------------------------------------------------------------------- 1 | __rec=node,id=26,attr=10,data=2125,parent=1 2 | __rec=node,id=27,attr=8,data=loop.iterations,parent=26 3 | __rec=node,id=30,attr=10,data=2061,parent=2 4 | __rec=node,id=31,attr=8,data=memstat.vmsize,parent=30 5 | __rec=node,id=32,attr=8,data=memstat.vmrss,parent=30 6 | __rec=node,id=33,attr=8,data=memstat.data,parent=30 7 | __rec=node,id=36,attr=10,data=2133,parent=2 8 | __rec=node,id=37,attr=8,data=variorum.val.power_node_watts,parent=36 9 | __rec=node,id=38,attr=8,data=variorum.power_node_watts,parent=36 10 | __rec=node,id=18,attr=10,data=268,parent=3 11 | __rec=node,id=20,attr=8,data=region,parent=18 12 | __rec=node,id=41,attr=20,data=main 13 | __rec=node,id=21,attr=10,data=1612,parent=3 14 | __rec=node,id=22,attr=8,data=cali.caliper.version,parent=21 15 | __rec=node,id=23,attr=22,data=2.10.0 16 | __rec=ctx,ref=41=23,attr=27=31=32=33=37=38,data=0=923=679=537=389=194 17 | __rec=node,id=28,attr=10,data=77,parent=1 18 | __rec=node,id=29,attr=8,data=loop.start_iteration,parent=28 19 | __rec=node,id=19,attr=8,data=loop,parent=18 20 | __rec=node,id=45,attr=19,data=lulesh.cycle,parent=41 21 | __rec=node,id=12,attr=10,data=76,parent=6 22 | __rec=node,id=16,attr=8,data=class.iteration,parent=12 23 | __rec=node,id=42,attr=16,data=true,parent=1 24 | __rec=node,id=43,attr=10,data=13,parent=42 25 | __rec=node,id=44,attr=8,data=iteration#lulesh.cycle,parent=43 26 | __rec=ctx,ref=45=23,attr=27=29=31=32=33=37=38=44,data=20=0=1000=743=614=391=390=19 27 | __rec=ctx,ref=45=23,attr=27=29=31=32=33=37=38=44,data=20=20=1000=743=614=392=391=39 28 | __rec=ctx,ref=45=23,attr=27=29=31=32=33=37=38=44,data=20=40=1000=743=614=392=392=59 29 | __rec=ctx,ref=45=23,attr=27=29=31=32=33=37=38=44,data=20=60=1000=743=614=392=392=79 30 | __rec=ctx,ref=45=23,attr=27=29=31=32=33=37=38=44,data=20=80=1000=743=614=398=395=99 31 | __rec=ctx,ref=45=23,attr=27=31=32=33=37=38,data=0=1000=743=614=398=398 32 | __rec=node,id=24,attr=8,data=cali.channel,parent=21 33 | __rec=node,id=25,attr=24,data=default 34 | __rec=globals,ref=23=25 35 | -------------------------------------------------------------------------------- /thicket/tests/test_add_root_node.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from hatchet.node import Node 7 | 8 | 9 | def test_add_root_node(literal_thickets): 10 | tk, _, _ = literal_thickets 11 | 12 | assert len(tk.graph) == 4 13 | 14 | # Call add_root_node 15 | tk.add_root_node({"name": "Test", "type": "function"}) 16 | # Get node variable 17 | test_node = tk.get_node("Test") 18 | 19 | # Check if node was inserted in all components 20 | assert isinstance(test_node, Node) 21 | assert test_node._hatchet_nid == 3 22 | assert test_node._depth == 0 23 | assert len(tk.graph) == 5 24 | assert len(tk.statsframe.graph) == 5 25 | assert test_node in tk.dataframe.index.get_level_values("node") 26 | assert test_node in tk.statsframe.dataframe.index.get_level_values("node") 27 | 28 | assert tk.dataframe.loc[test_node, "name"].values[0] == "Test" 29 | assert tk.statsframe.dataframe.loc[test_node, "name"] == "Test" 30 | -------------------------------------------------------------------------------- /thicket/tests/test_caliperreader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from thicket import Thicket 7 | 8 | 9 | def test_from_caliperreader(rajaperf_seq_O3_1M_cali, intersection, fill_perfdata): 10 | """Sanity test a thicket object with known data.""" 11 | tk = Thicket.from_caliperreader( 12 | rajaperf_seq_O3_1M_cali[0], 13 | intersection=intersection, 14 | fill_perfdata=fill_perfdata, 15 | disable_tqdm=True, 16 | ) 17 | 18 | # Check the object type 19 | assert isinstance(tk, Thicket) 20 | 21 | # Check the resulting dataframe shape 22 | assert tk.dataframe.shape == (74, 14) 23 | 24 | # Check a value in the dataframe 25 | assert ( 26 | tk.dataframe.loc[ 27 | tk.dataframe.index.get_level_values(0)[0], "Avg time/rank" 28 | ].values[0] 29 | == 103.47638 30 | ) 31 | 32 | 33 | def test_node_ordering_from_caliper(caliper_ordered, intersection, fill_perfdata): 34 | """Check the order of output from the native Caliper reader by examining a known input with node order column.""" 35 | 36 | tk = Thicket.from_caliperreader( 37 | caliper_ordered, 38 | intersection=intersection, 39 | fill_perfdata=fill_perfdata, 40 | disable_tqdm=True, 41 | ) 42 | 43 | expected_order = [ 44 | "main", 45 | "lulesh.cycle", 46 | "TimeIncrement", 47 | "LagrangeLeapFrog", 48 | "LagrangeNodal", 49 | "CalcForceForNodes", 50 | "CalcVolumeForceForElems", 51 | "IntegrateStressForElems", 52 | "CalcHourglassControlForElems", 53 | "CalcFBHourglassForceForElems", 54 | "LagrangeElements", 55 | "CalcLagrangeElements", 56 | "CalcKinematicsForElems", 57 | "CalcQForElems", 58 | "CalcMonotonicQForElems", 59 | "ApplyMaterialPropertiesForElems", 60 | "EvalEOSForElems", 61 | "CalcEnergyForElems", 62 | "CalcTimeConstraintsForElems", 63 | ] 64 | expected_data_order = [ 65 | 1.250952, 66 | 1.229935, 67 | 0.000085, 68 | 1.229702, 69 | 0.604766, 70 | 0.566399, 71 | 0.561237, 72 | 0.161196, 73 | 0.395344, 74 | 0.239849, 75 | 0.614079, 76 | 0.175102, 77 | 0.168127, 78 | 0.136318, 79 | 0.038575, 80 | 0.299062, 81 | 0.293046, 82 | 0.190395, 83 | 0.010707, 84 | ] 85 | 86 | # check if the rows are in the expected order 87 | for i in range(0, tk.dataframe.shape[0]): 88 | node_name = tk.dataframe.iloc[i]["name"] 89 | assert node_name == expected_order[i] 90 | node_data = tk.dataframe.iloc[i]["Total time"] 91 | assert node_data == expected_data_order[i] 92 | 93 | # check the tree ordering is correct as well 94 | tk.dataframe["hnid"] = [ 95 | node._hatchet_nid for node in tk.graph.node_order_traverse() 96 | ] 97 | output = tk.tree(metric_column="hnid", render_header=False, precision=3) 98 | for i in tk.dataframe["hnid"].tolist(): 99 | location = output.find(str(i) + ".000") 100 | assert location != -1 101 | output = output[location:] 102 | -------------------------------------------------------------------------------- /thicket/tests/test_copy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from thicket import Thicket 7 | 8 | 9 | def test_copy(rajaperf_seq_O3_1M_cali, intersection, fill_perfdata): 10 | self = Thicket.from_caliperreader( 11 | rajaperf_seq_O3_1M_cali[0], 12 | intersection=intersection, 13 | fill_perfdata=fill_perfdata, 14 | disable_tqdm=True, 15 | ) 16 | self.exc_metrics.append("value") 17 | other = self.copy() 18 | 19 | # General assertions 20 | assert self is not other 21 | 22 | assert self.graph is other.graph 23 | assert self.graph == other.graph 24 | 25 | assert self.dataframe is not other.dataframe 26 | assert self.dataframe.equals(other.dataframe) 27 | 28 | assert self.exc_metrics is not other.exc_metrics 29 | assert self.exc_metrics == other.exc_metrics 30 | assert self.inc_metrics is not other.inc_metrics 31 | assert self.inc_metrics == other.inc_metrics 32 | 33 | assert self.default_metric == other.default_metric 34 | 35 | assert self.metadata is not other.metadata 36 | assert self.metadata.equals(other.metadata) 37 | 38 | assert self.profile is not other.profile 39 | assert self.profile == other.profile 40 | 41 | assert self.profile_mapping is not other.profile_mapping 42 | assert self.profile_mapping == other.profile_mapping 43 | 44 | assert self.statsframe is not other.statsframe 45 | 46 | # Check nodes between graph and dataframe are same obj 47 | df_node_0_self = self.dataframe.reset_index().iloc[0, 0] 48 | graph_node_0_self = next(self.graph.traverse()) 49 | assert df_node_0_self is graph_node_0_self 50 | df_node_0_other = other.dataframe.reset_index().iloc[0, 0] 51 | graph_node_0_other = next(other.graph.traverse()) 52 | assert df_node_0_other is graph_node_0_other 53 | # Check across self and other 54 | assert graph_node_0_self is graph_node_0_other 55 | 56 | # Shallow copy of graph 57 | other.graph.roots[0]._hatchet_nid += 1 58 | assert self.graph.roots[0]._hatchet_nid == other.graph.roots[0]._hatchet_nid 59 | assert self.graph.roots[0] is other.graph.roots[0] 60 | 61 | # Shallow copy of data 62 | node = other.dataframe.index.get_level_values("node")[0] 63 | profile = other.dataframe.index.get_level_values(other.profile_idx_name)[0] 64 | other.dataframe.loc[(node, profile), "nid"] = -1 65 | assert ( 66 | other.dataframe.loc[(node, profile), "nid"] 67 | == self.dataframe.loc[(node, profile), "nid"] 68 | ) 69 | # Deep copy of structure 70 | assert len(self.dataframe.columns) + len(self.dataframe.index[0]) == len( 71 | other.dataframe.reset_index().columns 72 | ) 73 | 74 | 75 | def test_deepcopy(rajaperf_seq_O3_1M_cali, intersection, fill_perfdata): 76 | self = Thicket.from_caliperreader( 77 | rajaperf_seq_O3_1M_cali[0], 78 | intersection=intersection, 79 | fill_perfdata=fill_perfdata, 80 | disable_tqdm=True, 81 | ) 82 | self.exc_metrics.append("value") 83 | other = self.deepcopy() 84 | 85 | # General assertions 86 | assert self is not other 87 | 88 | assert self.graph is not other.graph 89 | assert self.graph == other.graph 90 | 91 | assert self.dataframe is not other.dataframe 92 | assert self.dataframe.equals(other.dataframe) 93 | 94 | assert self.exc_metrics is not other.exc_metrics 95 | assert self.exc_metrics == other.exc_metrics 96 | assert self.inc_metrics is not other.inc_metrics 97 | assert self.inc_metrics == other.inc_metrics 98 | 99 | assert self.default_metric == other.default_metric 100 | 101 | assert self.metadata is not other.metadata 102 | assert self.metadata.equals(other.metadata) 103 | 104 | assert self.profile is not other.profile 105 | assert self.profile == other.profile 106 | 107 | assert self.profile_mapping is not other.profile_mapping 108 | assert self.profile_mapping == other.profile_mapping 109 | 110 | assert self.statsframe is not other.statsframe 111 | 112 | # Check nodes between graph and dataframe are same obj 113 | df_node_0_self = self.dataframe.reset_index().iloc[0, 0] 114 | graph_node_0_self = next(self.graph.traverse()) 115 | assert df_node_0_self is graph_node_0_self 116 | df_node_0_other = other.dataframe.reset_index().iloc[0, 0] 117 | graph_node_0_other = next(other.graph.traverse()) 118 | assert df_node_0_other is graph_node_0_other 119 | # Check across self and other 120 | assert graph_node_0_self is not graph_node_0_other 121 | 122 | # Deep copy of graph 123 | other.graph.roots[0]._hatchet_nid += 1 124 | assert self.graph.roots[0]._hatchet_nid != other.graph.roots[0]._hatchet_nid 125 | assert self.graph.roots[0] is not other.graph.roots[0] 126 | 127 | # Deep copy of data 128 | other.dataframe.iloc[0, 0] = 0 129 | assert other.dataframe.iloc[0, 0] != self.dataframe.iloc[0, 0] 130 | # Deep copy of structure 131 | assert len(self.dataframe.columns) + len(self.dataframe.index[0]) == len( 132 | other.dataframe.reset_index().columns 133 | ) 134 | 135 | # Check new graph is statsframe graph 136 | assert other.graph is other.statsframe.graph 137 | -------------------------------------------------------------------------------- /thicket/tests/test_ensemble.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from thicket import Ensemble 7 | 8 | 9 | def test_unify(literal_thickets): 10 | tk, tk2, tk3 = literal_thickets 11 | 12 | union_graph, _thickets = Ensemble._unify([tk, tk2, tk3], disable_tqdm=True) 13 | 14 | ug_hashes = [0, 1, 2, 3, 4, 5, 6] 15 | tk_hashes = [ 16 | [0, 1, 2, 6], 17 | [0, 1, 2, 3], 18 | [3, 4, 5], 19 | ] 20 | 21 | assert [hash(n) for n in union_graph.traverse()] == ug_hashes 22 | assert [ 23 | [hash(n) for n in _thickets[i].dataframe.index.get_level_values("node")] 24 | for i in range(3) 25 | ] == tk_hashes 26 | -------------------------------------------------------------------------------- /thicket/tests/test_filter_profile.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from thicket import Thicket 7 | 8 | 9 | def test_filter_profile(rajaperf_cali_1trial): 10 | tk = Thicket.from_caliperreader(rajaperf_cali_1trial, disable_tqdm=True) 11 | 12 | # Split profile list into two halves 13 | rm_profs = tk.profile[len(tk.profile) // 2 :] 14 | keep_profs = tk.profile[: len(tk.profile) // 2] 15 | 16 | tk_filt = tk.filter_profile(keep_profs) 17 | 18 | # Check each component that uses profiles 19 | for component in [ 20 | tk_filt.profile, 21 | tk_filt.profile_mapping.keys(), 22 | tk_filt.metadata.index, 23 | tk_filt.dataframe.index.get_level_values(tk_filt.profile_idx_name), 24 | ]: 25 | assert all([prof not in component for prof in rm_profs]) 26 | assert all([prof in component for prof in keep_profs]) 27 | -------------------------------------------------------------------------------- /thicket/tests/test_filter_stats.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from thicket import Thicket 7 | 8 | 9 | def check_filter_stats(th, columns_values): 10 | for column in columns_values: 11 | for value in columns_values[column]: 12 | # for type str column 13 | if isinstance(value, str): 14 | # expected nodes after applying filter 15 | exp_nodes = sorted( 16 | th.statsframe.dataframe.index[ 17 | th.statsframe.dataframe[column] == value 18 | ] 19 | ) 20 | new_th = th.filter_stats(lambda x: x[column] == value) 21 | # for type int column 22 | elif isinstance(value, int): 23 | exp_nodes = sorted( 24 | th.statsframe.dataframe.index[ 25 | th.statsframe.dataframe[column] < value 26 | ] 27 | ) 28 | new_th = th.filter_stats(lambda x: x[column] < value) 29 | else: 30 | # test case not implemented 31 | print("The column value type is not a supported test case") 32 | exp_nodes = [] 33 | new_th = th 34 | 35 | # check if output is a thicket object 36 | assert isinstance(new_th, Thicket) 37 | 38 | # check filtered Thicket is separate object 39 | # We can't check th.graph because of squash in filter_stats 40 | assert th.statsframe.graph is not new_th.statsframe.graph 41 | 42 | # filtered nodes in aggregated statistics table 43 | stats_nodes = sorted( 44 | new_th.statsframe.dataframe.index.drop_duplicates().tolist() 45 | ) 46 | # check filtered nodes in aggregated statistics table match exp_nodes 47 | assert stats_nodes == exp_nodes 48 | 49 | # filtered nodes in performance data table 50 | ensemble_nodes = sorted( 51 | new_th.dataframe.index.get_level_values(0).drop_duplicates().tolist() 52 | ) 53 | # check filtered nodes in performance data table match exp_nodes 54 | assert ensemble_nodes == exp_nodes 55 | 56 | 57 | def test_filter_stats(rajaperf_seq_O3_1M_cali, intersection, fill_perfdata): 58 | # example thicket 59 | th = Thicket.from_caliperreader( 60 | rajaperf_seq_O3_1M_cali, 61 | intersection=intersection, 62 | fill_perfdata=fill_perfdata, 63 | disable_tqdm=True, 64 | ) 65 | # columns and corresponding values to filter by 66 | columns_values = { 67 | "test_string_column": ["less than 20"], 68 | "test_numeric_column": [4, 15], 69 | } 70 | # set string column values 71 | less_than_20 = ["less than 20"] * 21 72 | less_than_45 = ["less than 45"] * 25 73 | less_than_87 = ["less than 74"] * 28 74 | new_col = less_than_20 + less_than_45 + less_than_87 75 | th.statsframe.dataframe["test_string_column"] = new_col 76 | # set numeric column values 77 | th.statsframe.dataframe["test_numeric_column"] = range(0, 74) 78 | 79 | check_filter_stats(th, columns_values) 80 | -------------------------------------------------------------------------------- /thicket/tests/test_from_statsframes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import pytest 7 | 8 | import thicket as th 9 | from thicket.utils import DuplicateValueError 10 | 11 | 12 | def test_single_trial(mpi_scaling_cali, intersection, fill_perfdata): 13 | th_list = [] 14 | for file in mpi_scaling_cali: 15 | th_list.append( 16 | th.Thicket.from_caliperreader( 17 | file, 18 | intersection=intersection, 19 | fill_perfdata=fill_perfdata, 20 | disable_tqdm=True, 21 | ) 22 | ) 23 | 24 | # Add arbitrary value to aggregated statistics table 25 | t_val = 0 26 | for t in th_list: 27 | t.statsframe.dataframe["test"] = t_val 28 | t_val += 2 29 | 30 | tk = th.Thicket.from_statsframes(th_list, disable_tqdm=True) 31 | 32 | # Check level values 33 | assert set(tk.dataframe.index.get_level_values(tk.profile_idx_name)) == { 34 | 0, 35 | 1, 36 | 2, 37 | 3, 38 | 4, 39 | } 40 | # Check performance data table values 41 | assert set(tk.dataframe["test"]) == {0, 2, 4, 6, 8} 42 | 43 | tk_named = th.Thicket.from_statsframes( 44 | th_list, metadata_key="mpi.world.size", disable_tqdm=True 45 | ) 46 | 47 | # Check level values 48 | assert set(tk_named.dataframe.index.get_level_values("mpi.world.size")) == { 49 | 27, 50 | 64, 51 | 125, 52 | 216, 53 | 343, 54 | } 55 | # Check performance data table values 56 | assert set(tk_named.dataframe["test"]) == {0, 2, 4, 6, 8} 57 | 58 | 59 | def test_multi_trial( 60 | rajaperf_cali_alltrials, 61 | intersection, 62 | fill_perfdata, 63 | ): 64 | tk = th.Thicket.from_caliperreader( 65 | rajaperf_cali_alltrials, 66 | intersection=intersection, 67 | fill_perfdata=fill_perfdata, 68 | disable_tqdm=True, 69 | ) 70 | 71 | # Simulate multiple trial from grouping by tuning. 72 | gb = tk.groupby("tuning") 73 | 74 | # Arbitrary data in statsframe. 75 | for _, ttk in gb.items(): 76 | ttk.statsframe.dataframe["mean"] = 1 77 | 78 | stk = th.Thicket.from_statsframes(list(gb.values()), metadata_key="tuning") 79 | 80 | # Check error thrown for simulated multi-trial 81 | with pytest.raises( 82 | DuplicateValueError, 83 | ): 84 | th.Thicket.from_statsframes( 85 | [list(gb.values())[0], list(gb.values())[0]], metadata_key="tuning" 86 | ) 87 | 88 | if intersection: 89 | assert stk.dataframe.shape == (192, 2) 90 | else: 91 | assert stk.dataframe.shape == (222, 2) 92 | -------------------------------------------------------------------------------- /thicket/tests/test_get_node.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import pytest 7 | 8 | 9 | def test_get_node(literal_thickets): 10 | tk, _, _ = literal_thickets 11 | 12 | # Check error raised 13 | with pytest.raises(KeyError): 14 | tk.get_node("Foo") 15 | 16 | # Check case which="first" 17 | qux1 = tk.get_node("Qux", which="first") 18 | assert qux1.frame["name"] == "Qux" 19 | assert qux1._hatchet_nid == 1 20 | 21 | # Check case which="last" 22 | qux2 = tk.get_node("Qux", which="last") 23 | assert qux2.frame["name"] == "Qux" 24 | assert qux2._hatchet_nid == 2 25 | 26 | # Check case which="all" 27 | qux_all = tk.get_node("Qux", which="all") 28 | assert len(qux_all) == 2 29 | -------------------------------------------------------------------------------- /thicket/tests/test_intersection.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import thicket.helpers as helpers 7 | from thicket import Thicket as th 8 | 9 | 10 | def test_intersection(rajaperf_cali_1trial, fill_perfdata): 11 | # Manually intersect 12 | tk = th.from_caliperreader( 13 | rajaperf_cali_1trial, 14 | intersection=False, 15 | fill_perfdata=fill_perfdata, 16 | disable_tqdm=True, 17 | ) 18 | intersected_tk = tk.intersection() 19 | 20 | # Use argument during reader 21 | intersected_tk_other = th.from_caliperreader( 22 | rajaperf_cali_1trial, intersection=True, disable_tqdm=True 23 | ) 24 | 25 | # Check other methodology 26 | assert len(intersected_tk.graph) == len(intersected_tk_other.graph) 27 | 28 | # Check original and intersected thickets 29 | assert len(intersected_tk.graph) < len(tk.graph) 30 | assert len(intersected_tk_other.graph) < len(tk.graph) 31 | 32 | # Check that nodes are synced between graph and dataframe 33 | assert helpers._are_synced(tk.graph, tk.dataframe) 34 | assert helpers._are_synced(intersected_tk.graph, intersected_tk.dataframe) 35 | -------------------------------------------------------------------------------- /thicket/tests/test_json_interface.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from thicket import Thicket 7 | 8 | 9 | def test_read_json(example_json): 10 | jgs = "" 11 | with open(example_json, "r") as f: 12 | jgs = f.read() 13 | gf = Thicket.from_json(jgs) 14 | 15 | assert len(gf.dataframe) == 3278 16 | assert len(gf.graph) == 29 17 | assert gf.metadata is not None 18 | assert gf.statsframe is not None 19 | assert gf.graph is gf.statsframe.graph 20 | 21 | 22 | def test_write_json(example_json): 23 | jgs = "" 24 | with open(example_json, "r") as f: 25 | jgs = f.read() 26 | gf = Thicket.from_json(jgs) 27 | json_out = gf.to_json() 28 | 29 | assert "".join(sorted("".join(sorted(jgs.split())))) == "".join( 30 | sorted("".join(json_out.split())) 31 | ) 32 | -------------------------------------------------------------------------------- /thicket/tests/test_model_extrap.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | # Make flake8 ignore unused names in this file 7 | # flake8: noqa: F401 8 | 9 | import sys 10 | 11 | import pytest 12 | 13 | from thicket import Thicket 14 | 15 | extrap_avail = True 16 | try: 17 | import extrap.entities as xent 18 | from extrap.entities.experiment import ( 19 | Experiment, 20 | ) # For some reason it errors if "Experiment" is not explicitly imported 21 | from extrap.fileio import io_helper 22 | from extrap.modelers.model_generator import ModelGenerator 23 | except ImportError: 24 | extrap_avail = False 25 | 26 | if sys.version_info < (3, 8): 27 | pytest.skip( 28 | "requires python3.8 or greater to use extrap module", allow_module_level=True 29 | ) 30 | 31 | if not extrap_avail: 32 | pytest.skip("Extra-P package not available", allow_module_level=True) 33 | 34 | 35 | def test_model_extrap(mpi_scaling_cali, intersection, fill_perfdata): 36 | from thicket.model_extrap import Modeling 37 | 38 | t_ens = Thicket.from_caliperreader( 39 | mpi_scaling_cali, 40 | intersection=intersection, 41 | fill_perfdata=fill_perfdata, 42 | disable_tqdm=True, 43 | ) 44 | 45 | # Method 1: Model created using metadata column 46 | mdl = Modeling( 47 | t_ens, 48 | "jobsize", 49 | chosen_metrics=[ 50 | "Avg time/rank", 51 | ], 52 | ) 53 | mdl.produce_models() 54 | 55 | # Method 2: Model created using manually-input core counts for each file 56 | core_list = { 57 | mpi_scaling_cali[0]: 27, 58 | mpi_scaling_cali[1]: 64, 59 | mpi_scaling_cali[2]: 125, 60 | mpi_scaling_cali[3]: 216, 61 | mpi_scaling_cali[4]: 343, 62 | } 63 | mdl2 = Modeling( 64 | t_ens, 65 | "cores", 66 | core_list, 67 | chosen_metrics=[ 68 | "Avg time/rank", 69 | ], 70 | ) 71 | mdl2.produce_models() 72 | 73 | # Check that model structure is being created properly 74 | assert mdl.tht.statsframe.dataframe.shape == mdl2.tht.statsframe.dataframe.shape 75 | # Check model values between the two methods 76 | assert mdl.tht.statsframe.dataframe.applymap(str).equals( 77 | mdl2.tht.statsframe.dataframe.applymap(str) 78 | ) 79 | 80 | 81 | def test_componentize_functions(mpi_scaling_cali, intersection, fill_perfdata): 82 | from thicket.model_extrap import Modeling 83 | 84 | t_ens = Thicket.from_caliperreader( 85 | mpi_scaling_cali, 86 | intersection=intersection, 87 | fill_perfdata=fill_perfdata, 88 | disable_tqdm=True, 89 | ) 90 | 91 | mdl = Modeling( 92 | t_ens, 93 | "jobsize", 94 | chosen_metrics=[ 95 | "Avg time/rank", 96 | "Max time/rank", 97 | ], 98 | ) 99 | mdl.produce_models(add_stats=False) 100 | 101 | original_shape = t_ens.statsframe.dataframe.shape 102 | 103 | mdl.componentize_statsframe() 104 | 105 | xp_comp_df = t_ens.statsframe.dataframe 106 | 107 | # Check shape. Assert columns were added. 108 | assert xp_comp_df.shape[1] > original_shape[1] 109 | 110 | # Check that each component column produced at least one value. 111 | for column in xp_comp_df.columns: 112 | assert not xp_comp_df[column].isnull().all() 113 | -------------------------------------------------------------------------------- /thicket/tests/test_pickle.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import thicket as th 7 | 8 | 9 | def test_pickle(rajaperf_cali_1trial, tmpdir, intersection, fill_perfdata): 10 | """Test pickling and unpickling of Thicket object.""" 11 | 12 | # Create thicket 13 | tk = th.Thicket.from_caliperreader( 14 | rajaperf_cali_1trial, 15 | intersection=intersection, 16 | fill_perfdata=fill_perfdata, 17 | disable_tqdm=True, 18 | ) 19 | 20 | # Create temporary pickle file and write to it 21 | pkl_file = tmpdir.join("tk.pkl") 22 | tk.to_pickle(pkl_file) 23 | 24 | # Read from pickle file 25 | ptk = th.Thicket.from_pickle(pkl_file) 26 | 27 | # Compare original and pickled thicket 28 | assert tk == ptk 29 | -------------------------------------------------------------------------------- /thicket/tests/test_query.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import re 7 | 8 | import hatchet as ht 9 | import pandas as pd 10 | 11 | from thicket import Thicket 12 | from utils import check_identity 13 | 14 | 15 | def check_query(th, hnids, query): 16 | """Check query function for Thicket object. 17 | 18 | Arguments: 19 | th (Thicket): Thicket object to test. 20 | hnids (list): List to match nodes based of hatchet nid. 21 | query (ht.QueryMatcher()): match nodes from hatchet query. 22 | """ 23 | node_name, profile_name = th.dataframe.index.names[0:2] 24 | 25 | # Get profiles 26 | th_df_profiles = th.dataframe.index.get_level_values(profile_name) 27 | # Match first 8 nodes 28 | match = [node for node in th.graph.traverse() if node._hatchet_nid in hnids] 29 | match_frames = [node.frame for node in match] 30 | match_names = [frame["name"] for frame in match_frames] 31 | # Match all nodes using query 32 | filt_th = th.query(query) 33 | filt_nodes = list(filt_th.graph.traverse()) 34 | 35 | # MultiIndex check 36 | if isinstance(th.statsframe.dataframe.columns, pd.MultiIndex): 37 | assert isinstance(filt_th.statsframe.dataframe.columns, pd.MultiIndex) 38 | 39 | # Get filtered nodes and profiles 40 | filt_th_df_nodes = filt_th.dataframe.index.get_level_values(node_name).to_list() 41 | filt_th_df_profiles = filt_th.dataframe.index.get_level_values(profile_name) 42 | 43 | assert len(filt_nodes) == len(match) 44 | assert all([n.frame in match_frames for n in filt_nodes]) 45 | assert all([n.frame["name"] in match_names for n in filt_nodes]) 46 | assert all([n in filt_th_df_nodes for n in filt_nodes]) 47 | assert sorted(filt_th_df_profiles.unique().to_list()) == sorted( 48 | th_df_profiles.unique().to_list() 49 | ) 50 | 51 | check_identity(th, filt_th, ["default_metric", "profile_idx_name"]) 52 | 53 | 54 | def test_query(rajaperf_cuda_block128_1M_cali, intersection, fill_perfdata): 55 | # test thicket 56 | th = Thicket.from_caliperreader( 57 | rajaperf_cuda_block128_1M_cali, 58 | intersection=intersection, 59 | fill_perfdata=fill_perfdata, 60 | disable_tqdm=True, 61 | ) 62 | # test arguments 63 | hnids = [0, 1, 2, 3, 4] # 5, 6, 7 have Nones 64 | query = ( 65 | ht.QueryMatcher() 66 | .match("*") 67 | .rel( 68 | ".", 69 | lambda row: row["name"] 70 | .apply(lambda x: re.match(r"Algorithm*", x) is not None) 71 | .all(), 72 | ) 73 | ) 74 | 75 | check_query(th, hnids, query) 76 | 77 | 78 | def test_object_dialect_column_multi_index( 79 | rajaperf_seq_O3_1M_cali, intersection, fill_perfdata 80 | ): 81 | th1 = Thicket.from_caliperreader( 82 | rajaperf_seq_O3_1M_cali[0], 83 | intersection=intersection, 84 | fill_perfdata=fill_perfdata, 85 | disable_tqdm=True, 86 | ) 87 | th2 = Thicket.from_caliperreader( 88 | rajaperf_seq_O3_1M_cali[1], 89 | intersection=intersection, 90 | fill_perfdata=fill_perfdata, 91 | disable_tqdm=True, 92 | ) 93 | th_cj = Thicket.concat_thickets([th1, th2], axis="columns") 94 | 95 | query = [ 96 | ("+", {(0, "Avg time/rank"): "> 10.0", (1, "Avg time/rank"): "> 10.0"}), 97 | ] 98 | 99 | match = list( 100 | set( 101 | [ 102 | th_cj.get_node("RAJAPerf"), 103 | th_cj.get_node("Basic"), 104 | th_cj.get_node("Lcals"), 105 | th_cj.get_node("Lcals_DIFF_PREDICT"), 106 | th_cj.get_node("Polybench"), 107 | th_cj.get_node("Apps"), 108 | ] 109 | ) 110 | ) 111 | 112 | new_th = th_cj.query(query, multi_index_mode="all") 113 | queried_nodes = list(new_th.graph.traverse()) 114 | 115 | match_frames = list(sorted([n.frame for n in match])) 116 | queried_frames = list(sorted([n.frame for n in queried_nodes])) 117 | 118 | assert len(queried_nodes) == len(match) 119 | assert all(m == q for m, q in zip(match_frames, queried_frames)) 120 | idx = pd.IndexSlice 121 | assert ( 122 | (new_th.dataframe.loc[idx[queried_nodes, :], (0, "Avg time/rank")] > 10.0) 123 | & (new_th.dataframe.loc[idx[queried_nodes, :], (1, "Avg time/rank")] > 10.0) 124 | ).all() 125 | 126 | 127 | def test_string_dialect_column_multi_index( 128 | rajaperf_seq_O3_1M_cali, intersection, fill_perfdata 129 | ): 130 | th1 = Thicket.from_caliperreader( 131 | rajaperf_seq_O3_1M_cali[0], 132 | intersection=intersection, 133 | fill_perfdata=fill_perfdata, 134 | disable_tqdm=True, 135 | ) 136 | th2 = Thicket.from_caliperreader( 137 | rajaperf_seq_O3_1M_cali[1], 138 | intersection=intersection, 139 | fill_perfdata=fill_perfdata, 140 | disable_tqdm=True, 141 | ) 142 | th_cj = Thicket.concat_thickets([th1, th2], axis="columns") 143 | 144 | query = """MATCH ("+", p) 145 | WHERE p.(0, "Avg time/rank") > 10.0 AND p.(1, "Avg time/rank") > 10.0 146 | """ 147 | 148 | match = list( 149 | set( 150 | [ 151 | th_cj.get_node("RAJAPerf"), 152 | th_cj.get_node("Basic"), 153 | th_cj.get_node("Lcals"), 154 | th_cj.get_node("Lcals_DIFF_PREDICT"), 155 | th_cj.get_node("Polybench"), 156 | th_cj.get_node("Apps"), 157 | ] 158 | ) 159 | ) 160 | 161 | new_th = th_cj.query(query, multi_index_mode="all") 162 | queried_nodes = list(new_th.graph.traverse()) 163 | 164 | match_frames = list(sorted([n.frame for n in match])) 165 | queried_frames = list(sorted([n.frame for n in queried_nodes])) 166 | 167 | assert len(queried_nodes) == len(match) 168 | assert all(m == q for m, q in zip(match_frames, queried_frames)) 169 | idx = pd.IndexSlice 170 | assert ( 171 | (new_th.dataframe.loc[idx[queried_nodes, :], (0, "Avg time/rank")] > 10.0) 172 | & (new_th.dataframe.loc[idx[queried_nodes, :], (1, "Avg time/rank")] > 10.0) 173 | ).all() 174 | -------------------------------------------------------------------------------- /thicket/tests/test_reader_dispatch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import os 7 | import pytest 8 | 9 | from hatchet import GraphFrame 10 | from thicket import Thicket 11 | 12 | 13 | def test_empty_iterable(): 14 | with pytest.raises(ValueError, match="Iterable must contain at least one file"): 15 | Thicket.reader_dispatch( 16 | GraphFrame.from_caliperreader, 17 | False, 18 | True, 19 | True, 20 | [], 21 | ) 22 | 23 | with pytest.raises(ValueError, match="Iterable must contain at least one file"): 24 | Thicket.reader_dispatch( 25 | GraphFrame.from_caliperreader, 26 | False, 27 | True, 28 | True, 29 | tuple([]), 30 | ) 31 | 32 | 33 | def test_file_not_found(): 34 | with pytest.raises(ValueError, match="Path 'blah' not found"): 35 | Thicket.reader_dispatch( 36 | GraphFrame.from_caliperreader, 37 | False, 38 | True, 39 | True, 40 | "blah", 41 | ) 42 | 43 | with pytest.raises(FileNotFoundError, match="File 'blah' not found"): 44 | Thicket.reader_dispatch( 45 | GraphFrame.from_caliperreader, 46 | False, 47 | True, 48 | True, 49 | ["blah"], 50 | ) 51 | 52 | 53 | def test_valid_type(): 54 | with pytest.raises(TypeError, match="'int' is not a valid type to be read from"): 55 | Thicket.reader_dispatch( 56 | GraphFrame.from_caliperreader, 57 | False, 58 | True, 59 | True, 60 | -1, 61 | ) 62 | 63 | 64 | def test_valid_inputs(rajaperf_cali_1trial, data_dir): 65 | 66 | # Works with list 67 | Thicket.reader_dispatch( 68 | GraphFrame.from_caliperreader, 69 | False, 70 | True, 71 | True, 72 | rajaperf_cali_1trial, 73 | ) 74 | 75 | # Works with single file 76 | Thicket.reader_dispatch( 77 | GraphFrame.from_caliperreader, 78 | False, 79 | True, 80 | True, 81 | rajaperf_cali_1trial[0], 82 | ) 83 | 84 | # Works with directory 85 | Thicket.reader_dispatch( 86 | GraphFrame.from_caliperreader, 87 | False, 88 | True, 89 | True, 90 | f"{data_dir}/rajaperf/lassen/clang10.0.1_nvcc10.2.89_1048576/1/", 91 | ) 92 | 93 | 94 | def test_error_file(mpi_scaling_cali, data_dir): 95 | 96 | # Create a temporarily empty file 97 | empty_file_path = os.path.join(f"{data_dir}/mpi_scaling_cali", "empty.cali") 98 | with open(empty_file_path, "w"): 99 | pass # This creates an empty file 100 | 101 | # list 102 | with pytest.raises(Exception, match="Failed to read file"): 103 | Thicket.reader_dispatch( 104 | GraphFrame.from_caliperreader, 105 | False, 106 | True, 107 | True, 108 | mpi_scaling_cali + [empty_file_path], 109 | ) 110 | 111 | # directory 112 | with pytest.raises(Exception, match="Failed to read file"): 113 | Thicket.reader_dispatch( 114 | GraphFrame.from_caliperreader, 115 | False, 116 | True, 117 | True, 118 | f"{data_dir}/mpi_scaling_cali/", 119 | ) 120 | 121 | # Remove the file 122 | os.remove(empty_file_path) 123 | -------------------------------------------------------------------------------- /thicket/tests/test_timeseries.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import re 7 | 8 | import hatchet as ht 9 | 10 | import thicket as tt 11 | 12 | 13 | def test_from_timeseries_cxx(example_timeseries_cxx): 14 | """Sanity test a thicket timeseries object""" 15 | th = tt.Thicket.from_timeseries(example_timeseries_cxx) 16 | 17 | # Check the object type 18 | assert isinstance(th, tt.Thicket) 19 | 20 | # Check the resulting dataframe shape 21 | assert th.dataframe.shape == (20, 17) 22 | assert "loop.start_iteration" not in th.dataframe.columns 23 | 24 | # Check a value in the dataframe 25 | assert ( 26 | th.dataframe.loc[ 27 | th.dataframe.index.get_level_values(0)[0], "avg#time.duration.ns" 28 | ].values[0] 29 | == 59851.0 30 | ) 31 | 32 | 33 | def test_from_timeseries_lulesh(example_timeseries): 34 | """Sanity test a thicket timeseries object""" 35 | th = tt.Thicket.from_timeseries(example_timeseries) 36 | 37 | # Check the object type 38 | assert isinstance(th, tt.Thicket) 39 | 40 | # Check the resulting dataframe shape 41 | assert th.dataframe.shape == (950, 18) 42 | assert "loop.start_iteration" not in th.dataframe.columns 43 | 44 | # Check a value in the dataframe 45 | assert ( 46 | th.dataframe.loc[ 47 | th.dataframe.index.get_level_values(0)[0], "alloc.region.highwatermark" 48 | ].values[0] 49 | == 25824351.0 50 | ) 51 | 52 | 53 | def test_timeseries_statsframe(example_timeseries): 54 | """Test the creation of a statsframe with timeseries thicket""" 55 | 56 | th = tt.Thicket.from_timeseries(example_timeseries) 57 | 58 | # Check that the aggregated statistics table is a Hatchet GraphFrame. 59 | assert isinstance(th.statsframe, ht.GraphFrame) 60 | # Check that 'name' column is in dataframe. If not, tree() will not work. 61 | assert "name" in th.statsframe.dataframe 62 | # Check length of graph is the same as the dataframe. 63 | assert len(th.statsframe.graph) == len(th.statsframe.dataframe) 64 | 65 | tt.stats.mean(th, columns=["alloc.region.highwatermark"]) 66 | 67 | assert "alloc.region.highwatermark_mean" in th.statsframe.dataframe.columns 68 | assert ( 69 | "alloc.region.highwatermark_mean" 70 | in th.statsframe.exc_metrics + th.statsframe.inc_metrics 71 | ) 72 | assert "alloc.region.highwatermark_mean" in th.statsframe.show_metric_columns() 73 | 74 | # Expected tree output 75 | tree_output = th.statsframe.tree(metric_column="alloc.region.highwatermark_mean") 76 | 77 | # Check if tree output is correct. 78 | assert bool(re.search("63732320.000.*lulesh.cycle", tree_output)) 79 | 80 | 81 | def test_timeseries_temporal_pattern(mem_power_timeseries): 82 | """ 83 | Test the stats temporal pattern calculation with timeseries thicket, should add new columns to stats frame and score appropriately 84 | """ 85 | 86 | th = tt.Thicket.from_timeseries(mem_power_timeseries) 87 | 88 | returned_cols = tt.stats.calc_temporal_pattern( 89 | th, columns=["memstat.vmrss", "variorum.val.power_node_watts"] 90 | ) 91 | 92 | # Check that the aggregated statistics table is a Hatchet GraphFrame. 93 | assert isinstance(th.statsframe, ht.GraphFrame) 94 | 95 | expected_cols = [ 96 | "memstat.vmrss_pattern", 97 | "memstat.vmrss_temporal_score", 98 | "variorum.val.power_node_watts_pattern", 99 | "variorum.val.power_node_watts_temporal_score", 100 | ] 101 | for col in expected_cols: 102 | # Check that expected columns are in statsframe dataframe. 103 | assert col in th.statsframe.dataframe 104 | # Check the returned columns from calc_temporal_pattern. 105 | assert col in returned_cols 106 | 107 | # Check some values in the memory pattern column 108 | assert th.statsframe.dataframe["memstat.vmrss_pattern"].iloc[0] == "none" 109 | assert th.statsframe.dataframe["memstat.vmrss_pattern"].iloc[1] == "constant" 110 | assert ( 111 | th.statsframe.dataframe["variorum.val.power_node_watts_pattern"].iloc[0] 112 | == "none" 113 | ) 114 | assert ( 115 | th.statsframe.dataframe["variorum.val.power_node_watts_pattern"].iloc[1] 116 | == "constant" 117 | ) 118 | 119 | mem_tree = th.statsframe.tree( 120 | metric_column="memstat.vmrss_temporal_score", 121 | annotation_column="memstat.vmrss_pattern", 122 | ) 123 | pow_tree = th.statsframe.tree( 124 | metric_column="variorum.val.power_node_watts_temporal_score", 125 | annotation_column="variorum.val.power_node_watts_pattern", 126 | ) 127 | 128 | # Check if tree output is correct. 129 | assert bool(re.search("0.000", mem_tree)) 130 | assert bool(re.search("0.013", pow_tree)) 131 | -------------------------------------------------------------------------------- /thicket/tests/test_tree.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import pytest 7 | 8 | import thicket as th 9 | 10 | 11 | def test_indices(rajaperf_unique_tunings, intersection, fill_perfdata): 12 | tk = th.Thicket.from_caliperreader( 13 | rajaperf_unique_tunings, 14 | intersection=intersection, 15 | fill_perfdata=fill_perfdata, 16 | disable_tqdm=True, 17 | ) 18 | 19 | # No error 20 | tk.tree(metric_column="Avg time/rank", indices=tk.profile[0]) 21 | 22 | tk.metadata_columns_to_perfdata(["variant", "tuning"]) 23 | 24 | # Error because there are duplicate variants. We need to add the tuning to the index as well. 25 | tk.dataframe = ( 26 | tk.dataframe.reset_index().set_index(["node", "variant"]).sort_index() 27 | ) 28 | with pytest.raises( 29 | KeyError, 30 | match=r"Either dataframe cannot be represented as a single index or provided slice,*", 31 | ): 32 | tk.tree(metric_column="Avg time/rank") 33 | 34 | # Add tuning to the index to avoid the error. 35 | tk.dataframe = ( 36 | tk.dataframe.reset_index().set_index(["node", "variant", "tuning"]).sort_index() 37 | ) 38 | # No error 39 | tk.tree(metric_column="Avg time/rank") 40 | 41 | # No error 42 | tk.tree(metric_column="Avg time/rank", indices=["Base_Seq", "default"]) 43 | 44 | with pytest.raises( 45 | KeyError, 46 | match=r"The indices, \{\'tuning\': \'hi\'\}, do not exist in the index \'self.dataframe.index\'", 47 | ): 48 | tk.tree(metric_column="Avg time/rank", indices=["Base_Seq", "hi"]) 49 | 50 | 51 | def test_tree_column_multiindex(thicket_axis_columns): 52 | _, _, combined_th = thicket_axis_columns 53 | 54 | # No error 55 | combined_th.tree( 56 | metric_column=("block_128", "Avg time/rank"), name_column=("name", "") 57 | ) 58 | 59 | # No error 60 | combined_th.tree(metric_column=("block_128", "Avg time/rank")) 61 | -------------------------------------------------------------------------------- /thicket/tests/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | 7 | def check_identity( 8 | obj1, 9 | obj2, 10 | ignore_keys=[], 11 | equal=False, 12 | ): 13 | if equal: 14 | assert obj1 is obj2, "Both objects should have the same identity" 15 | else: 16 | assert obj1 is not obj2, "Both objects should not have the same identity" 17 | for key in obj1.__dict__.keys(): 18 | if key not in ignore_keys: 19 | if equal: 20 | assert ( 21 | obj1.__dict__[key] is obj2.__dict__[key] 22 | ), "{} should have the same identity".format(key) 23 | else: 24 | assert ( 25 | obj1.__dict__[key] is not obj2.__dict__[key] 26 | ), "{} should not have the same identity".format(key) 27 | -------------------------------------------------------------------------------- /thicket/version.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | __version_info__ = ("2024", "2", "1") 7 | __version__ = ".".join(__version_info__) 8 | -------------------------------------------------------------------------------- /thicket/vis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import os 7 | import sys 8 | import subprocess 9 | 10 | _filenotfound_errmsg = """ 11 | Cannot find NPM! 12 | This is required to use thicket.vis! 13 | Please install NPM and try again to import thicket.vis! 14 | """ 15 | 16 | 17 | def check_npm(): 18 | if sys.version_info[0] == 2: 19 | from disutils.spawn import find_executable 20 | 21 | return find_executable("npm") is not None 22 | from shutil import which 23 | 24 | return which("npm") is not None 25 | 26 | 27 | def npm_build(vis_directory): 28 | print("Building thicket.vis using NPM!") 29 | subprocess.check_output(["npm", "install", "-y"], cwd=vis_directory) 30 | subprocess.check_output(["npm", "run", "build"], cwd=vis_directory) 31 | 32 | 33 | # Get the absolute path to this __init__.py file 34 | # Store in curr_dir to minimize the number of variables 35 | curr_dir = os.path.realpath(os.path.expanduser(__file__)) 36 | # Get the thicket/vis directory from the path to this __init__.py file 37 | curr_dir = os.path.abspath(os.path.join(curr_dir, os.pardir)) 38 | # Get the path to thicket/vis/package-lock.json 39 | pkg_lock_file = os.path.abspath(os.path.join(curr_dir, "package-lock.json")) 40 | 41 | if not os.path.isfile(pkg_lock_file): 42 | if not check_npm(): 43 | raise FileNotFoundError(_filenotfound_errmsg) 44 | else: 45 | npm_build(curr_dir) 46 | -------------------------------------------------------------------------------- /thicket/vis/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ens_vis", 3 | "version": "0.1.0", 4 | "description": "Javascript code for the interactive ensemble visualizations for the thicket library.", 5 | "main": "webpack.config.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "build": "npx webpack build", 9 | "watch": "npx webpack build --watch" 10 | }, 11 | "keywords": [ 12 | "ensemble", 13 | "visualization", 14 | "roundtrip" 15 | ], 16 | "author": "Connor Scully-Allison", 17 | "license": "MIT", 18 | "devDependencies": { 19 | "@babel/core": "^7.18.6", 20 | "@babel/preset-env": "^7.18.6", 21 | "babel-loader": "^8.2.5", 22 | "html-webpack-plugin": "^5.5.0", 23 | "webpack-cli": "^4.10.0" 24 | }, 25 | "dependencies": { 26 | "@reduxjs/toolkit": "^1.8.3", 27 | "d3": "^7.6.1", 28 | "redux": "^4.2.0", 29 | "vega-embed": "^6.21.0", 30 | "vega-lite": "^5.6.0" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /thicket/vis/scripts/globals.js: -------------------------------------------------------------------------------- 1 | export let RT = window.Roundtrip 2 | 3 | export const initialState = { 4 | activeProf: {}, 5 | overviewMetrics: [], 6 | categoricalMetric: "", 7 | scatterPlotAxes: {}, 8 | currentNode: 0, 9 | highlightedProfiles: [] 10 | } 11 | 12 | export const layout = { 13 | max_width: 0, 14 | max_height: 0, 15 | margins: { 16 | left: 50, 17 | right: 50, 18 | top: 30, 19 | bottom: 15 20 | } 21 | } -------------------------------------------------------------------------------- /thicket/vis/scripts/pcp/datautil.js: -------------------------------------------------------------------------------- 1 | 2 | export function getCategoricalDomain(data, id){ 3 | let domain = []; 4 | for(const k in data){ 5 | let d = data[k]; 6 | if(!(domain.includes(d[id]))){ 7 | domain.push(d[id]); 8 | } 9 | } 10 | return domain 11 | } 12 | 13 | export function getNumericalDomain(data, id){ 14 | let domain = [Infinity, -Infinity]; 15 | for(const k in data){ 16 | let d = data[k]; 17 | domain[0] = Math.min(d[id],domain[0]); 18 | domain[1] = Math.max(d[id],domain[1]); 19 | } 20 | return domain; 21 | } 22 | 23 | export function getAggregate(op, data, grouping_key, grouping_id, data_id){ 24 | let sum = 0; 25 | let cnt = 0; 26 | for(let d of data){ 27 | if(d[grouping_key] == grouping_id){ 28 | sum += d[data_id]; 29 | cnt += 1; 30 | } 31 | } 32 | 33 | if(op == 'sum') 34 | return sum 35 | 36 | return sum/cnt 37 | } 38 | 39 | function getGraphRoot(graph){ 40 | let root = null; 41 | for(const nid of Object.keys(graph)){ 42 | const n = graph[nid]; 43 | if(n.parents.length == 0 && n.children.length > 0){ 44 | root = nid; 45 | break; 46 | } 47 | } 48 | return root; 49 | } 50 | 51 | 52 | export function getTopLevelInclusiveMetric(data, prof_id, data_id){ 53 | //find root 54 | let root = getGraphRoot(data.graph[0]); 55 | 56 | //use get inclusive time 57 | return getInclusiveMetricForNode(root, data.dataframe, prof_id, data_id); 58 | 59 | } 60 | 61 | export function getInclusiveMetricForNode(nid, data, prof_id, data_id){ 62 | for(const d of data){ 63 | if(d['profile'] == prof_id && d['node'] == nid){ 64 | return d[data_id]; 65 | } 66 | } 67 | } 68 | 69 | export function makeOrdinalMapping(metadata, key){ 70 | let test = getCategoricalDomain(metadata, key); 71 | 72 | if(isNaN(test[0])){ 73 | metadata.sort((a,b) => { 74 | if(a[key] > b[key]){ 75 | return 1; 76 | } 77 | if(a[key] == b[key]){ 78 | return 0; 79 | } 80 | return -1 81 | }); 82 | } 83 | else{ 84 | metadata.sort((a,b) => { 85 | if(parseFloat(a[key]) > parseFloat(b[key])){ 86 | return 1; 87 | } 88 | if(parseFloat(a[key]) == parseFloat(b[key])){ 89 | return 0; 90 | } 91 | return -1 92 | }); 93 | } 94 | 95 | let ordinal_mapping = {}; 96 | let i = -1 97 | for(const mr of metadata){ 98 | if(!Object.keys(ordinal_mapping).includes(mr[key])){ 99 | i += 1; 100 | } 101 | ordinal_mapping[mr[key]] = i; 102 | } 103 | 104 | ordinal_mapping.domain = [0, i]; 105 | 106 | return ordinal_mapping; 107 | } 108 | 109 | export function inverseMapping(obj){ 110 | //from here: https://www.geeksforgeeks.org/how-to-invert-key-value-in-javascript-object/ 111 | var retobj = {}; 112 | for(var key in obj){ 113 | retobj[obj[key]] = key; 114 | } 115 | return retobj; 116 | } 117 | -------------------------------------------------------------------------------- /thicket/vis/scripts/pcp/globals.js: -------------------------------------------------------------------------------- 1 | export let RT = window.Roundtrip 2 | 3 | export const initialState = { 4 | activeProf: {}, 5 | overviewMetrics: [], 6 | categoricalMetric: "", 7 | scatterPlotAxes: {}, 8 | currentNode: 0, 9 | highlightedProfiles: [] 10 | } 11 | 12 | export const layout = { 13 | max_width: 0, 14 | max_height: 0, 15 | margins: { 16 | left: 50, 17 | right: 50, 18 | top: 30, 19 | bottom: 15 20 | } 21 | } -------------------------------------------------------------------------------- /thicket/vis/scripts/pcp/stackedarea.js: -------------------------------------------------------------------------------- 1 | import { layout } from './globals'; 2 | import { store, actions } from './store'; 3 | import { getNumericalDomain, getTopLevelInclusiveMetric } from './datautil'; 4 | import * as d3 from 'd3'; 5 | 6 | export default class StackedAreaPlot{ 7 | constructor(tag, width, height, data){ 8 | this.svg = tag.append('svg').attr('width', width).attr("height", height); 9 | this.width = width; 10 | this.height = height; 11 | this.data = data; 12 | 13 | this.prof_reductions = []; 14 | this.metrics = []; 15 | 16 | this.x = "launchdate"; 17 | 18 | let cali_excludes = ['node', 'nid', 'profile', 'annotation', 'name', 'spot.channel', 'mpi.function']; 19 | for(let md of data.metadata){ 20 | let top_record = {}; 21 | for(const key of Object.keys(data.dataframe[0])){ 22 | if(!cali_excludes.includes(key)){ 23 | top_record[key] = getTopLevelInclusiveMetric(data, md.profile, key); 24 | this.metrics.push(key); 25 | } 26 | } 27 | 28 | top_record["profile"] = md.profile; 29 | 30 | //adding the .x metadata 31 | // launchdate by default 32 | if(this.x == "launchdate"){ 33 | top_record['pivot_ordinal'] = new Date(parseInt(md[this.x])*1000); 34 | }else{ 35 | top_record['pivot_ordinal'] = md[this.x]; 36 | } 37 | this.prof_reductions.push(top_record); 38 | } 39 | 40 | this.prof_reductions.sort((a,b)=>(a['pivot_ordinal'] > b['pivot_ordinal'])) 41 | 42 | let ordinal_surrogate = 0; 43 | for(let r of this.prof_reductions){ 44 | r[this.x] = ordinal_surrogate; 45 | ordinal_surrogate += 1; 46 | } 47 | 48 | this.y = this.metrics[0]; 49 | 50 | this.setup(); 51 | } 52 | 53 | setup(){ 54 | //setup 55 | let y_domain = getNumericalDomain(this.prof_reductions, this.y); 56 | let x_domain = getNumericalDomain(this.prof_reductions, this.x); 57 | this.x_scale = null; 58 | this.y_scale = null; 59 | 60 | // if(this.x == "launchdate"){ 61 | // x_domain = x_domain.map(t => new Date(t*1000)) 62 | // this.x_scale = d3.scaleTime().domain(x_domain).range([layout.margins.left*2, this.width-layout.margins.right]); 63 | // } 64 | 65 | console.log(x_domain); 66 | this.x_scale = d3.scaleLinear().domain(x_domain).range([layout.margins.left*2, this.width-layout.margins.right]); 67 | this.y_scale = d3.scaleLinear().domain(y_domain).range([this.height-layout.margins.top, layout.margins.bottom]); 68 | 69 | this.svg.append('g') 70 | .attr('class', 'left-axis') 71 | .attr('transform',`translate(${layout.margins.left*2},${0})`) 72 | .call(d3.axisLeft().scale(this.y_scale)) 73 | .append('text') 74 | .attr('class', 'label-left') 75 | .attr('transform', 'rotate(270)') 76 | .attr('y', -layout.margins.left) 77 | .attr('x', -this.height/2) 78 | .style("text-anchor", "middle") 79 | .style("fill", "black") 80 | .text(this.y); 81 | 82 | this.svg.append('g') 83 | .attr('class', 'bottom-axis') 84 | .attr('transform',`translate(${0},${this.height - layout.margins.bottom*2})`) 85 | .call(d3.axisBottom().scale(this.x_scale)) 86 | .append('text') 87 | .attr('class', 'label-bottom') 88 | .text(this.x) 89 | .style("text-anchor", "middle") 90 | .style("fill", "black") 91 | .attr('x', this.width/2) 92 | .attr('y', layout.margins.bottom*2); 93 | 94 | //setting up layers 95 | // so things are drawn in the right order 96 | this.svg.append('g') 97 | .attr('class', 'area-chart'); 98 | 99 | this.svg.append('g') 100 | .attr('class', 'brush-layer'); 101 | 102 | this.brush = d3.brushX() 103 | .extent([[layout.margins.left*2, 0],[this.width-layout.margins.right, this.height - layout.margins.bottom*2]]) 104 | .on("start end", (e)=>{ 105 | let new_actives = []; 106 | if(e.selection){ 107 | for(const record of this.prof_reductions){ 108 | if((record[this.x] >= this.x_scale.invert(e.selection[0])) && 109 | (record[this.x] <= this.x_scale.invert(e.selection[1])) 110 | ){ 111 | new_actives.push(record['profile']); 112 | } 113 | } 114 | } 115 | store.dispatch(actions.updateActiveProfiles(new_actives)); 116 | }) 117 | 118 | } 119 | 120 | render(){ 121 | 122 | var area_func = d3.area() 123 | .x(d => {return this.x_scale(d[this.x])}) 124 | .y0(this.height - layout.margins.bottom*2) 125 | .y1(d => {return this.y_scale(d[this.y])}); 126 | 127 | this.svg.select('.area-chart') 128 | .append('path') 129 | .attr('d', area_func(this.prof_reductions)) 130 | .attr('stroke', 'black') 131 | .attr('fill', '#0032A0'); 132 | 133 | 134 | this.svg.select('.brush-layer').call(this.brush); 135 | 136 | 137 | } 138 | 139 | 140 | } 141 | -------------------------------------------------------------------------------- /thicket/vis/scripts/pcp/store.js: -------------------------------------------------------------------------------- 1 | import { initialState } from './globals'; 2 | import { createSlice, configureStore } from '@reduxjs/toolkit' 3 | 4 | const reduce = { 5 | toggleProfActive: (state, action) => { 6 | if(!(action.payload in state.activeProf)){ 7 | state.activeProf[action.payload] = false; 8 | } 9 | 10 | state.activeProf[action.payload] = !state.activeProf[action.payload]; 11 | }, 12 | updateActiveProfiles: (state, action)=>{ 13 | for(const prf in state.activeProf){ 14 | state.activeProf[prf] = false; 15 | } 16 | 17 | for(const prf of action.payload){ 18 | if(!(prf in state.activeProf)){ 19 | state.activeProf[prf] = false; 20 | } 21 | 22 | state.activeProf[prf] = !state.activeProf[prf]; 23 | } 24 | }, 25 | updateCategoricalMetric: (state, action)=>{ 26 | state.categoricalMetric = action.payload; 27 | }, 28 | updateActiveDimensions: (state, action)=>{ 29 | state.activeDimensions = action.payload; 30 | }, 31 | setAxesForScatterPlot: (state, action)=>{ 32 | // Payload: { 33 | // axes: {x:"",y:""}, 34 | // sid: "" 35 | // } 36 | state.scatterPlotAxes[action.payload.sid] = action.payload.axes; 37 | }, 38 | setCurrentNode: (state, action) => { 39 | state.currentNode = action.payload; 40 | }, 41 | setHighlightedProfiles: (state, action) => { 42 | //payload a list of profile id 43 | state.highlightedProfiles = action.payload; 44 | } 45 | 46 | } 47 | 48 | const counterSlice = createSlice({ 49 | name: 'counter', 50 | initialState: initialState, 51 | reducers: reduce 52 | }) 53 | 54 | export const actions = counterSlice.actions 55 | 56 | export const store = configureStore({ 57 | reducer: counterSlice.reducer 58 | }) 59 | 60 | -------------------------------------------------------------------------------- /thicket/vis/scripts/topdown/topdown.js: -------------------------------------------------------------------------------- 1 | import * as d3 from 'd3'; 2 | import {TreeModel, TreeTable} from '../treetable.js'; 3 | 4 | const RT = window.Roundtrip; 5 | 6 | let data = JSON.parse(RT['topdown_data']); 7 | let tree_model = new TreeModel(data.graph[0]); 8 | let tree_max_w = element.offsetWidth; 9 | let tree_max_h = window.innerHeight*.9; 10 | let tree_div = d3.select("#plot-area"); 11 | 12 | 13 | let TB = new TreeTable(tree_div, tree_max_w, tree_max_h, tree_model, data) 14 | TB.render(); 15 | 16 | 17 | -------------------------------------------------------------------------------- /thicket/vis/static/pcp_bundle.html: -------------------------------------------------------------------------------- 1 | Parallel Coordinate Plot Test
Click and drag over either scatter plot to make lines appear on the parallel coordinate plot.
Left X:
Left Y:
Right X:
Right Y:
Highlighed Profile(s):
-------------------------------------------------------------------------------- /thicket/vis/static/topdown_bundle.html: -------------------------------------------------------------------------------- 1 |
-------------------------------------------------------------------------------- /thicket/vis/static/treetable_bundle.html: -------------------------------------------------------------------------------- 1 | Treetable Prototype

Treetable Prototype

-------------------------------------------------------------------------------- /thicket/vis/static_fixer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Lawrence Livermore National Security, LLC and other 2 | # Thicket Project Developers. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from os import walk, path 7 | 8 | from bs4 import BeautifulSoup 9 | 10 | static_filepath = path.abspath("static/") 11 | 12 | for (pt, dirs, files) in walk(static_filepath): 13 | for file in files: 14 | if ".html" in file: 15 | with open(path.join(static_filepath, file), "r") as f: 16 | html = f.read() 17 | soup = BeautifulSoup(html) 18 | soup.script["src"] = path.join(static_filepath, file[0:-5] + ".js") 19 | with open(path.join(static_filepath, file), "w") as writer: 20 | writer.write(soup.prettify()) 21 | -------------------------------------------------------------------------------- /thicket/vis/templates/pcp.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Parallel Coordinate Plot Test 5 | 6 | 7 | 13 | 14 | 15 |
Click and drag over either scatter plot to make lines appear on the parallel coordinate plot.
16 |
17 |
Left X:
Left Y:
18 |
Right X:
Right Y:
19 |
20 | 21 |
22 |
23 |
Highlighed Profile(s):
24 | 25 | -------------------------------------------------------------------------------- /thicket/vis/templates/topdown.html: -------------------------------------------------------------------------------- 1 |
-------------------------------------------------------------------------------- /thicket/vis/templates/treetable.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Treetable Prototype 5 | 6 | 7 | 13 | 14 | 15 |

Treetable Prototype

16 | 17 |
18 | 19 | -------------------------------------------------------------------------------- /thicket/vis/visualizations.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | from os.path import dirname 3 | 4 | from IPython.core.magic import Magics, magics_class, line_magic 5 | from hatchet.external import Roundtrip as RT 6 | 7 | 8 | def _thicket_to_json(data): 9 | return data.to_json() 10 | 11 | 12 | def _df_to_json(data): 13 | return data.to_json(orient="records") 14 | 15 | 16 | def _basic_to_json(data): 17 | import json 18 | 19 | return json.dumps(data) 20 | 21 | 22 | vis_dir = dirname(path.abspath(__file__)) 23 | 24 | 25 | @magics_class 26 | class EnsembleVis(Magics): 27 | def __init__(self, shell): 28 | super(EnsembleVis, self).__init__(shell) 29 | self.vis_dist = path.join(vis_dir, "static") 30 | 31 | @line_magic 32 | def metadata_vis(self, line): 33 | args = line.split(" ") 34 | RT.load_webpack(path.join(self.vis_dist, "pcp_bundle.html"), cache=False) 35 | RT.var_to_js( 36 | args[0], "thicket_ensemble", watch=False, to_js_converter=_thicket_to_json 37 | ) 38 | 39 | if len(args) > 1: 40 | RT.var_to_js( 41 | args[1], "metadata_dims", watch=False, to_js_converter=_basic_to_json 42 | ) 43 | 44 | if len(args) > 2: 45 | RT.var_to_js( 46 | args[2], "focus_node", watch=False, to_js_converter=_basic_to_json 47 | ) 48 | 49 | RT.initialize() 50 | 51 | @line_magic 52 | def topdown_analysis(self, line): 53 | args = line.split(" ") 54 | RT.load_webpack(path.join(self.vis_dist, "topdown_bundle.html"), cache=False) 55 | RT.var_to_js( 56 | args[0], "topdown_data", watch=False, to_js_converter=_thicket_to_json 57 | ) 58 | 59 | RT.initialize() 60 | 61 | 62 | def load_ipython_extension(ipython): 63 | ipython.register_magics(EnsembleVis) 64 | -------------------------------------------------------------------------------- /thicket/vis/webpack.config.js: -------------------------------------------------------------------------------- 1 | const HtmlWebpackPlugin = require('html-webpack-plugin'); 2 | const path = require('path'); 3 | 4 | module.exports = { 5 | module:{ 6 | rules:[ 7 | { 8 | test: /\.css$/i, 9 | use: ["style-loader", "css-loader"] 10 | }, 11 | { 12 | test: /\.(js|jsx)$/, 13 | exclude: /node_modules/, 14 | loader: 'babel-loader', 15 | options:{ 16 | cwd: path.resolve(__dirname), 17 | presets:["@babel/preset-env"] 18 | } 19 | } 20 | ] 21 | }, 22 | entry: { 23 | pcp: [path.resolve(__dirname,'scripts/pcp/pcp.js')], 24 | topdown: [path.resolve(__dirname,'scripts/topdown/topdown.js')] 25 | }, 26 | output: { 27 | publicPath: path.resolve(__dirname, 'static/'), 28 | filename: '[name]_bundle.js', 29 | path: path.resolve(__dirname, 'static/') 30 | // filename: '[name]_bundle.js', 31 | // path: path.resolve(__dirname, 'static') 32 | }, 33 | optimization: { 34 | minimize: false 35 | }, 36 | plugins:[ 37 | new HtmlWebpackPlugin({ 38 | template: 'templates/pcp.html', 39 | chunks: ['pcp'], 40 | filename: 'pcp_bundle.html' 41 | }), 42 | new HtmlWebpackPlugin({ 43 | template: 'templates/topdown.html', 44 | chunks: ['topdown'], 45 | filename: 'topdown_bundle.html' 46 | }) 47 | ], 48 | mode: 'production' 49 | } --------------------------------------------------------------------------------