├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── feature_request.yaml └── workflows │ ├── [flake8] │ ├── drishti-darshan-3.4.0.yml │ ├── drishti-darshan-3.4.1.yml │ └── drishti-darshan-3.4.2.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── drishti ├── __init__.py ├── handlers │ ├── __init__.py │ ├── handle_darshan.py │ └── handle_recorder.py ├── includes │ ├── __init__.py │ ├── config.py │ ├── module.py │ ├── parser.py │ └── snippets │ │ ├── __init__.py │ │ ├── hdf5-alignment.c │ │ ├── hdf5-cache.c │ │ ├── hdf5-collective-metadata.c │ │ ├── hdf5-vol-async-read.c │ │ ├── hdf5-vol-async-write.c │ │ ├── lustre-striping.bash │ │ ├── mpi-io-collective-read.c │ │ ├── mpi-io-collective-write.c │ │ ├── mpi-io-hints.bash │ │ ├── mpi-io-iread.c │ │ ├── mpi-io-iwrite.c │ │ └── pnetcdf-hdf5-no-fill.c └── reporter.py ├── images ├── drishti-logo.png ├── sample-io-insights-issues.svg ├── sample-io-insights-verbose.svg └── sample-io-insights.svg ├── requirements.txt ├── sample ├── jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan └── jlbez_8a_benchmark_write_parallel_id1322696_8-21-14519-8141979180909667175_12.darshan └── setup.py /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug report 2 | description: File a bug report 3 | title: "[Bug]: " 4 | labels: ["bug", "triage"] 5 | 6 | assignees: 7 | - octocat 8 | body: 9 | - type: markdown 10 | attributes: 11 | value: | 12 | Thanks for taking the time to fill out this bug report! 13 | 14 | - type: textarea 15 | id: what-happened 16 | attributes: 17 | label: What happened? 18 | description: Please, describe the issue you are facing. 19 | placeholder: 20 | value: 21 | validations: 22 | required: true 23 | 24 | - type: textarea 25 | id: what-is-expected 26 | attributes: 27 | label: What did you expect to happen? 28 | description: Please, describe what you expected to happen. 29 | placeholder: 30 | value: 31 | validations: 32 | required: true 33 | 34 | - type: dropdown 35 | id: version 36 | attributes: 37 | label: Version 38 | description: What version of Drishti are you running? 39 | options: 40 | - 0.4 (Default) 41 | - 0.3 42 | validations: 43 | required: true 44 | 45 | - type: textarea 46 | id: logs 47 | attributes: 48 | label: Relevant log output 49 | description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. 50 | render: shell -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yaml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Request a new feature 3 | title: "[Feature]: " 4 | labels: ["enhancement", "triage"] 5 | 6 | assignees: 7 | - octocat 8 | body: 9 | - type: markdown 10 | attributes: 11 | value: | 12 | Thanks for taking the time to fill out this feature request! 13 | 14 | - type: textarea 15 | id: what-feature 16 | attributes: 17 | label: What feature do you want? 18 | description: Please, describe the feature you are requesting. 19 | placeholder: 20 | value: 21 | validations: 22 | required: true 23 | -------------------------------------------------------------------------------- /.github/workflows/[flake8]: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 160 3 | exclude = .git,__pycache__,docs/source/conf.py,build,dist -------------------------------------------------------------------------------- /.github/workflows/drishti-darshan-3.4.0.yml: -------------------------------------------------------------------------------- 1 | name: Drishti (Darshan 3.4.0) 2 | 3 | on: 4 | pull_request: 5 | push: 6 | workflow_dispatch: 7 | 8 | jobs: 9 | drishti: 10 | runs-on: ubuntu-latest 11 | container: jlbez/dxt-explorer-ubuntu-20.04 12 | timeout-minutes: 60 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: 17 | submodules: true 18 | 19 | - name: Dependencies 20 | run: | 21 | sudo apt-get update 22 | sudo apt-get install cmake gcc git libtool python3 python3-pip libcurl4-openssl-dev software-properties-common -y 23 | 24 | # Darshan 25 | git clone https://github.com/darshan-hpc/darshan.git darshan-3.4.0 26 | cd darshan-3.4.0 27 | git checkout darshan-3.4.0 28 | 29 | pip install --upgrade pip 30 | 31 | - name: Build Darshan 32 | run: | 33 | export DARSHAN_DIR=/opt/darshan 34 | 35 | mkdir $DARSHAN_DIR 36 | cd darshan-3.4.0 37 | bash prepare.sh 38 | cd darshan-util 39 | ./configure --prefix=$DARSHAN_DIR 40 | make 41 | make install 42 | 43 | - name: Install py-darshan 44 | run: | 45 | pip install darshan==3.4.0.1 46 | 47 | - name: Install Drishti 48 | run: | 49 | pip install -r requirements.txt 50 | pip install . 51 | 52 | - name: Run Drishti (--help) 53 | run: | 54 | export PATH=/opt/darshan/bin:$PATH 55 | 56 | drishti -h 57 | 58 | - name: Run Drishti (--issues) 59 | run: | 60 | export PATH=/opt/darshan/bin:$PATH 61 | 62 | drishti --issues sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 63 | 64 | - name: Run Drishti (--code) 65 | run: | 66 | export PATH=/opt/darshan/bin:$PATH 67 | 68 | drishti --code sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 69 | 70 | - name: Run DXT Explorer (--verbose) 71 | run: | 72 | export PATH=/opt/darshan/bin:$PATH 73 | 74 | drishti --verbose sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 75 | 76 | - name: Run DXT Explorer (--path) 77 | run: | 78 | export PATH=/opt/darshan/bin:$PATH 79 | 80 | drishti --path sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 81 | 82 | - name: Run DXT Explorer (--html) 83 | run: | 84 | export PATH=/opt/darshan/bin:$PATH 85 | 86 | drishti --html sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 87 | 88 | - name: Run DXT Explorer (--svg) 89 | run: | 90 | export PATH=/opt/darshan/bin:$PATH 91 | 92 | drishti --svg sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 93 | 94 | - name: Upload Artifact 95 | if: always() 96 | uses: actions/upload-artifact@v4 97 | with: 98 | name: tests 99 | path: sample/** 100 | retention-days: 1 -------------------------------------------------------------------------------- /.github/workflows/drishti-darshan-3.4.1.yml: -------------------------------------------------------------------------------- 1 | name: Drishti (Darshan 3.4.1) 2 | 3 | on: 4 | pull_request: 5 | push: 6 | workflow_dispatch: 7 | 8 | jobs: 9 | drishti: 10 | runs-on: ubuntu-latest 11 | container: jlbez/dxt-explorer-ubuntu-20.04 12 | timeout-minutes: 60 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: 17 | submodules: true 18 | 19 | - name: Dependencies 20 | run: | 21 | sudo apt-get update 22 | sudo apt-get install cmake gcc git libtool python3 python3-pip libcurl4-openssl-dev software-properties-common -y 23 | 24 | # Darshan 25 | git clone https://github.com/darshan-hpc/darshan.git darshan-3.4.1 26 | cd darshan-3.4.1 27 | git checkout darshan-3.4.1 28 | 29 | pip install --upgrade pip 30 | 31 | - name: Build Darshan 32 | run: | 33 | export DARSHAN_DIR=/opt/darshan 34 | 35 | mkdir $DARSHAN_DIR 36 | cd darshan-3.4.1 37 | bash prepare.sh 38 | cd darshan-util 39 | ./configure --prefix=$DARSHAN_DIR 40 | make 41 | make install 42 | 43 | - name: Install py-darshan 44 | run: | 45 | pip install darshan==3.4.1 46 | 47 | - name: Install Drishti 48 | run: | 49 | pip install -r requirements.txt 50 | pip install . 51 | 52 | - name: Run Drishti (--help) 53 | run: | 54 | export PATH=/opt/darshan/bin:$PATH 55 | 56 | drishti -h 57 | 58 | - name: Run Drishti (--issues) 59 | run: | 60 | export PATH=/opt/darshan/bin:$PATH 61 | 62 | drishti --issues sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 63 | 64 | - name: Run Drishti (--code) 65 | run: | 66 | export PATH=/opt/darshan/bin:$PATH 67 | 68 | drishti --code sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 69 | 70 | - name: Run DXT Explorer (--verbose) 71 | run: | 72 | export PATH=/opt/darshan/bin:$PATH 73 | 74 | drishti --verbose sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 75 | 76 | - name: Run DXT Explorer (--path) 77 | run: | 78 | export PATH=/opt/darshan/bin:$PATH 79 | 80 | drishti --path sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 81 | 82 | - name: Run DXT Explorer (--html) 83 | run: | 84 | export PATH=/opt/darshan/bin:$PATH 85 | 86 | drishti --html sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 87 | 88 | - name: Run DXT Explorer (--svg) 89 | run: | 90 | export PATH=/opt/darshan/bin:$PATH 91 | 92 | drishti --svg sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 93 | 94 | - name: Upload Artifact 95 | if: always() 96 | uses: actions/upload-artifact@v4 97 | with: 98 | name: tests 99 | path: sample/** 100 | retention-days: 1 -------------------------------------------------------------------------------- /.github/workflows/drishti-darshan-3.4.2.yml: -------------------------------------------------------------------------------- 1 | name: Drishti (Darshan 3.4.2) 2 | 3 | on: 4 | pull_request: 5 | push: 6 | workflow_dispatch: 7 | 8 | jobs: 9 | drishti: 10 | runs-on: ubuntu-latest 11 | container: jlbez/dxt-explorer-ubuntu-20.04 12 | timeout-minutes: 60 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: 17 | submodules: true 18 | 19 | - name: Dependencies 20 | run: | 21 | sudo apt-get update 22 | sudo apt-get install cmake gcc git libtool python3 python3-pip libcurl4-openssl-dev software-properties-common -y 23 | 24 | # Darshan 25 | git clone https://github.com/darshan-hpc/darshan.git darshan-3.4.2 26 | cd darshan-3.4.2 27 | git checkout darshan-3.4.2 28 | 29 | pip install --upgrade pip 30 | 31 | - name: Build Darshan 32 | run: | 33 | export DARSHAN_DIR=/opt/darshan 34 | 35 | mkdir $DARSHAN_DIR 36 | cd darshan-3.4.2 37 | bash prepare.sh 38 | cd darshan-util 39 | ./configure --prefix=$DARSHAN_DIR 40 | make 41 | make install 42 | 43 | - name: Install py-darshan 44 | run: | 45 | pip install darshan==3.4.2 46 | 47 | - name: Install Drishti 48 | run: | 49 | pip install -r requirements.txt 50 | pip install . 51 | 52 | - name: Run Drishti (--help) 53 | run: | 54 | export PATH=/opt/darshan/bin:$PATH 55 | 56 | drishti -h 57 | 58 | - name: Run Drishti (--issues) 59 | run: | 60 | export PATH=/opt/darshan/bin:$PATH 61 | 62 | drishti --issues sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 63 | 64 | - name: Run Drishti (--code) 65 | run: | 66 | export PATH=/opt/darshan/bin:$PATH 67 | 68 | drishti --code sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 69 | 70 | - name: Run DXT Explorer (--verbose) 71 | run: | 72 | export PATH=/opt/darshan/bin:$PATH 73 | 74 | drishti --verbose sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 75 | 76 | - name: Run DXT Explorer (--path) 77 | run: | 78 | export PATH=/opt/darshan/bin:$PATH 79 | 80 | drishti --path sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 81 | 82 | - name: Run DXT Explorer (--html) 83 | run: | 84 | export PATH=/opt/darshan/bin:$PATH 85 | 86 | drishti --html sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 87 | 88 | - name: Run DXT Explorer (--svg) 89 | run: | 90 | export PATH=/opt/darshan/bin:$PATH 91 | 92 | drishti --svg sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 93 | 94 | - name: Upload Artifact 95 | if: always() 96 | uses: actions/upload-artifact@v4 97 | with: 98 | name: tests 99 | path: sample/** 100 | retention-days: 1 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,pycharm,visualstudiocode 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,linux,macos,windows,pycharm,visualstudiocode 3 | 4 | ### Linux ### 5 | *~ 6 | 7 | # temporary files which can be created if a process still has a handle open of a deleted file 8 | .fuse_hidden* 9 | 10 | # KDE directory preferences 11 | .directory 12 | 13 | # Linux trash folder which might appear on any partition or disk 14 | .Trash-* 15 | 16 | # .nfs files are created when an open file is removed but is still being accessed 17 | .nfs* 18 | 19 | ### macOS ### 20 | # General 21 | .DS_Store 22 | .AppleDouble 23 | .LSOverride 24 | 25 | # Icon must end with two \r 26 | Icon 27 | 28 | 29 | # Thumbnails 30 | ._* 31 | 32 | # Files that might appear in the root of a volume 33 | .DocumentRevisions-V100 34 | .fseventsd 35 | .Spotlight-V100 36 | .TemporaryItems 37 | .Trashes 38 | .VolumeIcon.icns 39 | .com.apple.timemachine.donotpresent 40 | 41 | # Directories potentially created on remote AFP share 42 | .AppleDB 43 | .AppleDesktop 44 | Network Trash Folder 45 | Temporary Items 46 | .apdisk 47 | 48 | ### macOS Patch ### 49 | # iCloud generated files 50 | *.icloud 51 | 52 | ### PyCharm ### 53 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 54 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 55 | 56 | # User-specific stuff 57 | .idea/**/workspace.xml 58 | .idea/**/tasks.xml 59 | .idea/**/usage.statistics.xml 60 | .idea/**/dictionaries 61 | .idea/**/shelf 62 | 63 | # AWS User-specific 64 | .idea/**/aws.xml 65 | 66 | # Generated files 67 | .idea/**/contentModel.xml 68 | 69 | # Sensitive or high-churn files 70 | .idea/**/dataSources/ 71 | .idea/**/dataSources.ids 72 | .idea/**/dataSources.local.xml 73 | .idea/**/sqlDataSources.xml 74 | .idea/**/dynamic.xml 75 | .idea/**/uiDesigner.xml 76 | .idea/**/dbnavigator.xml 77 | 78 | # Gradle 79 | .idea/**/gradle.xml 80 | .idea/**/libraries 81 | 82 | # Gradle and Maven with auto-import 83 | # When using Gradle or Maven with auto-import, you should exclude module files, 84 | # since they will be recreated, and may cause churn. Uncomment if using 85 | # auto-import. 86 | # .idea/artifacts 87 | # .idea/compiler.xml 88 | # .idea/jarRepositories.xml 89 | # .idea/modules.xml 90 | # .idea/*.iml 91 | # .idea/modules 92 | # *.iml 93 | # *.ipr 94 | 95 | # CMake 96 | cmake-build-*/ 97 | 98 | # Mongo Explorer plugin 99 | .idea/**/mongoSettings.xml 100 | 101 | # File-based project format 102 | *.iws 103 | 104 | # IntelliJ 105 | out/ 106 | 107 | # mpeltonen/sbt-idea plugin 108 | .idea_modules/ 109 | 110 | # JIRA plugin 111 | atlassian-ide-plugin.xml 112 | 113 | # Cursive Clojure plugin 114 | .idea/replstate.xml 115 | 116 | # SonarLint plugin 117 | .idea/sonarlint/ 118 | 119 | # Crashlytics plugin (for Android Studio and IntelliJ) 120 | com_crashlytics_export_strings.xml 121 | crashlytics.properties 122 | crashlytics-build.properties 123 | fabric.properties 124 | 125 | # Editor-based Rest Client 126 | .idea/httpRequests 127 | 128 | # Android studio 3.1+ serialized cache file 129 | .idea/caches/build_file_checksums.ser 130 | 131 | ### PyCharm Patch ### 132 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 133 | 134 | # *.iml 135 | # modules.xml 136 | # .idea/misc.xml 137 | # *.ipr 138 | 139 | # Sonarlint plugin 140 | # https://plugins.jetbrains.com/plugin/7973-sonarlint 141 | .idea/**/sonarlint/ 142 | 143 | # SonarQube Plugin 144 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin 145 | .idea/**/sonarIssues.xml 146 | 147 | # Markdown Navigator plugin 148 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced 149 | .idea/**/markdown-navigator.xml 150 | .idea/**/markdown-navigator-enh.xml 151 | .idea/**/markdown-navigator/ 152 | 153 | # Cache file creation bug 154 | # See https://youtrack.jetbrains.com/issue/JBR-2257 155 | .idea/$CACHE_FILE$ 156 | 157 | # CodeStream plugin 158 | # https://plugins.jetbrains.com/plugin/12206-codestream 159 | .idea/codestream.xml 160 | 161 | # Azure Toolkit for IntelliJ plugin 162 | # https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij 163 | .idea/**/azureSettings.xml 164 | 165 | ### Python ### 166 | # Byte-compiled / optimized / DLL files 167 | __pycache__/ 168 | *.py[cod] 169 | *$py.class 170 | 171 | # C extensions 172 | *.so 173 | 174 | # Distribution / packaging 175 | .Python 176 | build/ 177 | develop-eggs/ 178 | dist/ 179 | downloads/ 180 | eggs/ 181 | .eggs/ 182 | lib/ 183 | lib64/ 184 | parts/ 185 | sdist/ 186 | var/ 187 | wheels/ 188 | share/python-wheels/ 189 | *.egg-info/ 190 | .installed.cfg 191 | *.egg 192 | MANIFEST 193 | 194 | # PyInstaller 195 | # Usually these files are written by a python script from a template 196 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 197 | *.manifest 198 | *.spec 199 | 200 | # Installer logs 201 | pip-log.txt 202 | pip-delete-this-directory.txt 203 | 204 | # Unit test / coverage reports 205 | htmlcov/ 206 | .tox/ 207 | .nox/ 208 | .coverage 209 | .coverage.* 210 | .cache 211 | nosetests.xml 212 | coverage.xml 213 | *.cover 214 | *.py,cover 215 | .hypothesis/ 216 | .pytest_cache/ 217 | cover/ 218 | 219 | # Translations 220 | *.mo 221 | *.pot 222 | 223 | # Django stuff: 224 | *.log 225 | local_settings.py 226 | db.sqlite3 227 | db.sqlite3-journal 228 | 229 | # Flask stuff: 230 | instance/ 231 | .webassets-cache 232 | 233 | # Scrapy stuff: 234 | .scrapy 235 | 236 | # Sphinx documentation 237 | docs/_build/ 238 | 239 | # PyBuilder 240 | .pybuilder/ 241 | target/ 242 | 243 | # Jupyter Notebook 244 | .ipynb_checkpoints 245 | 246 | # IPython 247 | profile_default/ 248 | ipython_config.py 249 | 250 | # pyenv 251 | # For a library or package, you might want to ignore these files since the code is 252 | # intended to run in multiple environments; otherwise, check them in: 253 | # .python-version 254 | 255 | # pipenv 256 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 257 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 258 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 259 | # install all needed dependencies. 260 | #Pipfile.lock 261 | 262 | # poetry 263 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 264 | # This is especially recommended for binary packages to ensure reproducibility, and is more 265 | # commonly ignored for libraries. 266 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 267 | #poetry.lock 268 | 269 | # pdm 270 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 271 | #pdm.lock 272 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 273 | # in version control. 274 | # https://pdm.fming.dev/#use-with-ide 275 | .pdm.toml 276 | 277 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 278 | __pypackages__/ 279 | 280 | # Celery stuff 281 | celerybeat-schedule 282 | celerybeat.pid 283 | 284 | # SageMath parsed files 285 | *.sage.py 286 | 287 | # Environments 288 | .env 289 | .venv 290 | env/ 291 | venv/ 292 | ENV/ 293 | env.bak/ 294 | venv.bak/ 295 | 296 | # Spyder project settings 297 | .spyderproject 298 | .spyproject 299 | 300 | # Rope project settings 301 | .ropeproject 302 | 303 | # mkdocs documentation 304 | /site 305 | 306 | # mypy 307 | .mypy_cache/ 308 | .dmypy.json 309 | dmypy.json 310 | 311 | # Pyre type checker 312 | .pyre/ 313 | 314 | # pytype static type analyzer 315 | .pytype/ 316 | 317 | # Cython debug symbols 318 | cython_debug/ 319 | 320 | # PyCharm 321 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 322 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 323 | # and can be added to the global gitignore or merged into this file. For a more nuclear 324 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 325 | #.idea/ 326 | 327 | ### Python Patch ### 328 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 329 | poetry.toml 330 | 331 | # ruff 332 | .ruff_cache/ 333 | 334 | # LSP config files 335 | pyrightconfig.json 336 | 337 | ### VisualStudioCode ### 338 | .vscode/* 339 | !.vscode/settings.json 340 | !.vscode/tasks.json 341 | !.vscode/launch.json 342 | !.vscode/extensions.json 343 | !.vscode/*.code-snippets 344 | 345 | # Local History for Visual Studio Code 346 | .history/ 347 | 348 | # Built Visual Studio Code Extensions 349 | *.vsix 350 | 351 | ### VisualStudioCode Patch ### 352 | # Ignore all local history of files 353 | .history 354 | .ionide 355 | 356 | ### Windows ### 357 | # Windows thumbnail cache files 358 | Thumbs.db 359 | Thumbs.db:encryptable 360 | ehthumbs.db 361 | ehthumbs_vista.db 362 | 363 | # Dump file 364 | *.stackdump 365 | 366 | # Folder config file 367 | [Dd]esktop.ini 368 | 369 | # Recycle Bin used on file shares 370 | $RECYCLE.BIN/ 371 | 372 | # Windows Installer files 373 | *.cab 374 | *.msi 375 | *.msix 376 | *.msm 377 | *.msp 378 | 379 | # Windows shortcuts 380 | *.lnk 381 | 382 | # End of https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,pycharm,visualstudiocode 383 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | 3 | RUN apt-get update 4 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ 5 | python3 \ 6 | pip \ 7 | make \ 8 | git \ 9 | wget \ 10 | libssl-dev \ 11 | libcurl4-openssl-dev \ 12 | libtool \ 13 | autoconf \ 14 | automake 15 | RUN rm -rf /var/lib/apt/lists/* 16 | 17 | RUN wget https://ftp.mcs.anl.gov/pub/darshan/releases/darshan-3.4.4.tar.gz 18 | RUN tar zxvf darshan-3.4.4.tar.gz 19 | 20 | WORKDIR /darshan-3.4.4/ 21 | 22 | RUN bash prepare.sh 23 | 24 | WORKDIR /darshan-3.4.4/darshan-util/ 25 | 26 | RUN ./configure --prefix=/opt/darshan && make && make install 27 | 28 | ENV PATH=/opt/darshan/bin:$PATH 29 | ENV LD_LIBRARY_PATH=/opt/darshan/lib:$LD_LIBRARY_PATH 30 | 31 | WORKDIR / 32 | 33 | RUN git clone https://github.com/hpc-io/drishti-io 34 | 35 | WORKDIR /drishti-io 36 | 37 | RUN pip install --upgrade pip 38 | RUN pip install -r requirements.txt 39 | RUN pip install . 40 | 41 | ENTRYPOINT ["drishti"] 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | *** License Agreement *** 2 | 3 | Drishti Copyright (c) 2022, The Regents of the University of California, 4 | through Lawrence Berkeley National Laboratory (subject to receipt of 5 | any required approvals from the U.S. Dept. of Energy). All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | (1) Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | 13 | (2) Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | (3) Neither the name of the University of California, Lawrence Berkeley 18 | National Laboratory, U.S. Dept. of Energy nor the names of its contributors 19 | may be used to endorse or promote products derived from this software 20 | without specific prior written permission. 21 | 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 | POSSIBILITY OF SUCH DAMAGE. 34 | 35 | You are under no obligation whatsoever to provide any bug fixes, patches, 36 | or upgrades to the features, functionality or performance of the source 37 | code ("Enhancements") to anyone; however, if you choose to make your 38 | Enhancements available either publicly, or directly to Lawrence Berkeley 39 | National Laboratory, without imposing a separate written license agreement 40 | for such Enhancements, then you hereby grant the following license: a 41 | non-exclusive, royalty-free perpetual license to install, use, modify, 42 | prepare derivative works, incorporate into other computer software, 43 | distribute, and sublicense such enhancements or derivative works thereof, 44 | in binary and source code form. 45 | 46 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include drishti/includes/snippets/* 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Drishti I/O: I/O Insights for All 2 | 3 | Drishti I/O is a command-line tool to guide end-users in optimizing I/O in their applications by detecting typical I/O performance pitfalls and providing a set of recommendations. You can get Drishti directly from pip: 4 | 5 | ``` 6 | pip install drishti-io 7 | ``` 8 | 9 | To install Drishti from scratch, make sure you have Python 3 and first install the dependencies: 10 | 11 | ``` 12 | pip install -r requirements.txt 13 | pip install . 14 | ``` 15 | 16 | You can then run Drishti with the following options: 17 | 18 | ``` 19 | usage: drishti.py [-h] [--issues] [--html] [--svg] [--verbose] [--code] darshan 20 | 21 | Drishti: 22 | 23 | positional arguments: 24 | darshan Input .darshan file 25 | 26 | optional arguments: 27 | -h, --help show this help message and exit 28 | --issues Only displays the detected issues and hides the recommendations 29 | --html Export the report as an HTML page 30 | --svg Export the report as an SVG image 31 | --verbose Display extended details for the recommendations 32 | --code Display insights identification code 33 | ``` 34 | 35 | You can also use our Docker image: 36 | 37 | ``` 38 | docker run --rm --mount type=bind,source="$(PWD)",target=/drishti drishti sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan 39 | ``` 40 | 41 | 42 | You can also use a Docker image already pre-configured with all dependencies to run Drishti: 43 | 44 | ``` 45 | docker pull hpcio/drishti 46 | ``` 47 | 48 | Since we need to provide a Darshan log file as input, make sure you are mounting your current directory in the container and removing the container after using it. You can pass the same arguments described above, after the container name (drishti). 49 | 50 | ``` 51 | docker run --rm --mount \ 52 | type=bind,source="$(PWD)",target="/drishti" \ 53 | drishti .darshan 54 | ``` 55 | 56 | By default Drishti will generate an overview report in the console with recommendations: 57 | 58 |

59 | Drishti 60 |

61 | 62 | You can also only list the issues detected by Drishti with `--issues`: 63 | 64 |

65 | Drishti 66 |

67 | 68 | You can also enable the verbose mode with `--verbose` to visualize solution snippets: 69 | 70 |

71 | Drishti 72 |

73 | 74 | --- 75 | 76 | ### Copyright Notice 77 | 78 | Drishti Copyright (c) 2022, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. 79 | 80 | If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Intellectual Property Office at IPO@lbl.gov. 81 | 82 | NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit others to do so. 83 | 84 | -------------------------------------------------------------------------------- /drishti/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc-io/drishti-io/1d0eedffe5dc68d801093eb7ef13f480d293ee7b/drishti/__init__.py -------------------------------------------------------------------------------- /drishti/handlers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc-io/drishti-io/1d0eedffe5dc68d801093eb7ef13f480d293ee7b/drishti/handlers/__init__.py -------------------------------------------------------------------------------- /drishti/handlers/handle_darshan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import io 4 | import sys 5 | import time 6 | import shlex 7 | import shutil 8 | import subprocess 9 | import pandas as pd 10 | import darshan 11 | import darshan.backend.cffi_backend as darshanll 12 | 13 | from rich import print 14 | from packaging import version 15 | from drishti.includes.module import * 16 | 17 | 18 | def is_available(name): 19 | """Check whether `name` is on PATH and marked as executable.""" 20 | 21 | return shutil.which(name) is not None 22 | 23 | 24 | def check_log_version(console, file, log_version, library_version): 25 | use_file = file 26 | 27 | if version.parse(log_version) < version.parse('3.4.0'): 28 | # Check if darshan-convert is installed and available in the PATH 29 | if not is_available('darshan-convert'): 30 | console.print( 31 | Panel( 32 | Padding( 33 | 'Darshan file is using an old format and darshan-convert is not available in the PATH.', 34 | (1, 1) 35 | ), 36 | title='{}WARNING'.format('[orange1]'), 37 | title_align='left' 38 | ) 39 | ) 40 | 41 | sys.exit(os.EX_DATAERR) 42 | 43 | use_file = os.path.basename(file.replace('.darshan', '.converted.darshan')) 44 | 45 | console.print( 46 | Panel( 47 | Padding( 48 | 'Converting .darshan log from {}: format: saving output file "{}" in the current working directory.'.format( 49 | log_version, 50 | use_file 51 | ), 52 | (1, 1) 53 | ), 54 | title='{}WARNING'.format('[orange1]'), 55 | title_align='left' 56 | ) 57 | ) 58 | 59 | if not os.path.isfile(use_file): 60 | ret = os.system( 61 | 'darshan-convert {} {}'.format( 62 | file, 63 | use_file 64 | ) 65 | ) 66 | 67 | if ret != 0: 68 | print('Unable to convert .darshan file to version {}'.format(library_version)) 69 | 70 | return use_file 71 | 72 | 73 | def handler(): 74 | console = init_console() 75 | 76 | insights_start_time = time.time() 77 | 78 | log = darshanll.log_open(args.log_path) 79 | 80 | modules = darshanll.log_get_modules(log) 81 | 82 | information = darshanll.log_get_job(log) 83 | 84 | if 'log_ver' in information: 85 | log_version = information['log_ver'] 86 | else: 87 | log_version = information['metadata']['lib_ver'] 88 | library_version = darshanll.get_lib_version() 89 | 90 | # Make sure log format is of the same version 91 | filename = args.log_path 92 | # check_log_version(console, args.log_path, log_version, library_version) 93 | 94 | darshanll.log_close(log) 95 | 96 | darshan.enable_experimental() 97 | 98 | report = darshan.DarshanReport(filename) 99 | 100 | job = report.metadata 101 | 102 | ######################################################################################################################################################################### 103 | 104 | # Check usage of STDIO, POSIX, and MPI-IO per file 105 | 106 | if 'STDIO' in report.records: 107 | df_stdio = report.records['STDIO'].to_df() 108 | 109 | if df_stdio: 110 | total_write_size_stdio = df_stdio['counters']['STDIO_BYTES_WRITTEN'].sum() 111 | total_read_size_stdio = df_stdio['counters']['STDIO_BYTES_READ'].sum() 112 | 113 | total_size_stdio = total_write_size_stdio + total_read_size_stdio 114 | else: 115 | total_size_stdio = 0 116 | else: 117 | df_stdio = None 118 | 119 | total_size_stdio = 0 120 | 121 | if 'POSIX' in report.records: 122 | df_posix = report.records['POSIX'].to_df() 123 | 124 | if df_posix: 125 | total_write_size_posix = df_posix['counters']['POSIX_BYTES_WRITTEN'].sum() 126 | total_read_size_posix = df_posix['counters']['POSIX_BYTES_READ'].sum() 127 | 128 | total_size_posix = total_write_size_posix + total_read_size_posix 129 | else: 130 | total_size_posix = 0 131 | else: 132 | df_posix = None 133 | 134 | total_size_posix = 0 135 | 136 | if 'MPI-IO' in report.records: 137 | df_mpiio = report.records['MPI-IO'].to_df() 138 | 139 | if df_mpiio: 140 | total_write_size_mpiio = df_mpiio['counters']['MPIIO_BYTES_WRITTEN'].sum() 141 | total_read_size_mpiio = df_mpiio['counters']['MPIIO_BYTES_READ'].sum() 142 | 143 | total_size_mpiio = total_write_size_mpiio + total_read_size_mpiio 144 | else: 145 | total_size_mpiio = 0 146 | else: 147 | df_mpiio = None 148 | 149 | total_size_mpiio = 0 150 | 151 | dxt_posix = None 152 | dxt_posix_read_data = None 153 | dxt_posix_write_data = None 154 | dxt_mpiio = None 155 | 156 | df_lustre = None 157 | if "LUSTRE" in report.records: 158 | df_lustre = report.records['LUSTRE'].to_df() 159 | 160 | if args.backtrace: 161 | if "DXT_POSIX" in report.records: 162 | dxt_posix = report.records["DXT_POSIX"].to_df() 163 | dxt_posix = pd.DataFrame(dxt_posix) 164 | if "address_line_mapping" not in dxt_posix: 165 | args.backtrace = False 166 | else: 167 | read_id = [] 168 | read_rank = [] 169 | read_length = [] 170 | read_offsets = [] 171 | read_end_time = [] 172 | read_start_time = [] 173 | read_operation = [] 174 | 175 | write_id = [] 176 | write_rank = [] 177 | write_length = [] 178 | write_offsets = [] 179 | write_end_time = [] 180 | write_start_time = [] 181 | write_operation = [] 182 | 183 | for r in zip(dxt_posix['rank'], dxt_posix['read_segments'], dxt_posix['write_segments'], dxt_posix['id']): 184 | if not r[1].empty: 185 | read_id.append([r[3]] * len((r[1]['length'].to_list()))) 186 | read_rank.append([r[0]] * len((r[1]['length'].to_list()))) 187 | read_length.append(r[1]['length'].to_list()) 188 | read_end_time.append(r[1]['end_time'].to_list()) 189 | read_start_time.append(r[1]['start_time'].to_list()) 190 | read_operation.append(['read'] * len((r[1]['length'].to_list()))) 191 | read_offsets.append(r[1]['offset'].to_list()) 192 | 193 | if not r[2].empty: 194 | write_id.append([r[3]] * len((r[2]['length'].to_list()))) 195 | write_rank.append([r[0]] * len((r[2]['length'].to_list()))) 196 | write_length.append(r[2]['length'].to_list()) 197 | write_end_time.append(r[2]['end_time'].to_list()) 198 | write_start_time.append(r[2]['start_time'].to_list()) 199 | write_operation.append(['write'] * len((r[2]['length'].to_list()))) 200 | write_offsets.append(r[2]['offset'].to_list()) 201 | 202 | read_id = [element for nestedlist in read_id for element in nestedlist] 203 | read_rank = [element for nestedlist in read_rank for element in nestedlist] 204 | read_length = [element for nestedlist in read_length for element in nestedlist] 205 | read_offsets = [element for nestedlist in read_offsets for element in nestedlist] 206 | read_end_time = [element for nestedlist in read_end_time for element in nestedlist] 207 | read_operation = [element for nestedlist in read_operation for element in nestedlist] 208 | read_start_time = [element for nestedlist in read_start_time for element in nestedlist] 209 | 210 | write_id = [element for nestedlist in write_id for element in nestedlist] 211 | write_rank = [element for nestedlist in write_rank for element in nestedlist] 212 | write_length = [element for nestedlist in write_length for element in nestedlist] 213 | write_offsets = [element for nestedlist in write_offsets for element in nestedlist] 214 | write_end_time = [element for nestedlist in write_end_time for element in nestedlist] 215 | write_operation = [element for nestedlist in write_operation for element in nestedlist] 216 | write_start_time = [element for nestedlist in write_start_time for element in nestedlist] 217 | 218 | dxt_posix_read_data = pd.DataFrame( 219 | { 220 | 'id': read_id, 221 | 'rank': read_rank, 222 | 'length': read_length, 223 | 'end_time': read_end_time, 224 | 'start_time': read_start_time, 225 | 'operation': read_operation, 226 | 'offsets': read_offsets, 227 | }) 228 | 229 | dxt_posix_write_data = pd.DataFrame( 230 | { 231 | 'id': write_id, 232 | 'rank': write_rank, 233 | 'length': write_length, 234 | 'end_time': write_end_time, 235 | 'start_time': write_start_time, 236 | 'operation': write_operation, 237 | 'offsets': write_offsets, 238 | }) 239 | 240 | if "DXT_MPIIO" in report.records: 241 | dxt_mpiio = report.records["DXT_MPIIO"].to_df() 242 | dxt_mpiio = pd.DataFrame(dxt_mpiio) 243 | 244 | 245 | # Since POSIX will capture both POSIX-only accesses and those comming from MPI-IO, we can subtract those 246 | if total_size_posix > 0 and total_size_posix >= total_size_mpiio: 247 | total_size_posix -= total_size_mpiio 248 | 249 | total_size = total_size_stdio + total_size_posix + total_size_mpiio 250 | 251 | assert(total_size_stdio >= 0) 252 | assert(total_size_posix >= 0) 253 | assert(total_size_mpiio >= 0) 254 | 255 | files = {} 256 | 257 | # Check interface usage for each file 258 | file_map = report.name_records 259 | 260 | total_files = len(file_map) 261 | 262 | total_files_stdio = 0 263 | total_files_posix = 0 264 | total_files_mpiio = 0 265 | 266 | for id, path in file_map.items(): 267 | if df_stdio: 268 | uses_stdio = len(df_stdio['counters'][(df_stdio['counters']['id'] == id)]) > 0 269 | else: 270 | uses_stdio = 0 271 | 272 | if df_posix: 273 | uses_posix = len(df_posix['counters'][(df_posix['counters']['id'] == id)]) > 0 274 | else: 275 | uses_posix = 0 276 | 277 | if df_mpiio: 278 | uses_mpiio = len(df_mpiio['counters'][(df_mpiio['counters']['id'] == id)]) > 0 279 | else: 280 | uses_mpiio = 0 281 | 282 | total_files_stdio += uses_stdio 283 | total_files_posix += uses_posix 284 | total_files_mpiio += uses_mpiio 285 | 286 | files[id] = { 287 | 'path': path, 288 | 'stdio': uses_stdio, 289 | 'posix': uses_posix, 290 | 'mpiio': uses_mpiio 291 | } 292 | 293 | check_stdio(total_size, total_size_stdio) 294 | check_mpiio(modules) 295 | 296 | ######################################################################################################################################################################### 297 | 298 | if 'POSIX' in report.records: 299 | df = report.records['POSIX'].to_df() 300 | 301 | ######################################################################################################################################################################### 302 | 303 | # Get number of write/read operations 304 | total_reads = df['counters']['POSIX_READS'].sum() 305 | total_writes = df['counters']['POSIX_WRITES'].sum() 306 | 307 | # Get total number of I/O operations 308 | total_operations = total_writes + total_reads 309 | 310 | # To check whether the application is write-intersive or read-intensive we only look at the POSIX level and check if the difference between reads and writes is larger than 10% (for more or less), otherwise we assume a balance 311 | check_operation_intensive(total_operations, total_reads, total_writes) 312 | 313 | total_read_size = df['counters']['POSIX_BYTES_READ'].sum() 314 | total_written_size = df['counters']['POSIX_BYTES_WRITTEN'].sum() 315 | 316 | total_size = total_written_size + total_read_size 317 | 318 | check_size_intensive(total_size, total_read_size, total_written_size) 319 | 320 | ######################################################################################################################################################################### 321 | 322 | # Get the number of small I/O operations (less than 1 MB) 323 | total_reads_small = ( 324 | df['counters']['POSIX_SIZE_READ_0_100'].sum() + 325 | df['counters']['POSIX_SIZE_READ_100_1K'].sum() + 326 | df['counters']['POSIX_SIZE_READ_1K_10K'].sum() + 327 | df['counters']['POSIX_SIZE_READ_10K_100K'].sum() + 328 | df['counters']['POSIX_SIZE_READ_100K_1M'].sum() 329 | ) 330 | 331 | total_writes_small = ( 332 | df['counters']['POSIX_SIZE_WRITE_0_100'].sum() + 333 | df['counters']['POSIX_SIZE_WRITE_100_1K'].sum() + 334 | df['counters']['POSIX_SIZE_WRITE_1K_10K'].sum() + 335 | df['counters']['POSIX_SIZE_WRITE_10K_100K'].sum() + 336 | df['counters']['POSIX_SIZE_WRITE_100K_1M'].sum() 337 | ) 338 | 339 | # Get the files responsible for more than half of these accesses 340 | files = [] 341 | 342 | df['counters']['INSIGHTS_POSIX_SMALL_READ'] = ( 343 | df['counters']['POSIX_SIZE_READ_0_100'] + 344 | df['counters']['POSIX_SIZE_READ_100_1K'] + 345 | df['counters']['POSIX_SIZE_READ_1K_10K'] + 346 | df['counters']['POSIX_SIZE_READ_10K_100K'] + 347 | df['counters']['POSIX_SIZE_READ_100K_1M'] 348 | ) 349 | 350 | df['counters']['INSIGHTS_POSIX_SMALL_WRITE'] = ( 351 | df['counters']['POSIX_SIZE_WRITE_0_100'] + 352 | df['counters']['POSIX_SIZE_WRITE_100_1K'] + 353 | df['counters']['POSIX_SIZE_WRITE_1K_10K'] + 354 | df['counters']['POSIX_SIZE_WRITE_10K_100K'] + 355 | df['counters']['POSIX_SIZE_WRITE_100K_1M'] 356 | ) 357 | 358 | detected_files = pd.DataFrame(df['counters'].groupby('id')[['INSIGHTS_POSIX_SMALL_READ', 'INSIGHTS_POSIX_SMALL_WRITE']].sum()).reset_index() 359 | detected_files.columns = ['id', 'total_reads', 'total_writes'] 360 | detected_files.loc[:, 'id'] = detected_files.loc[:, 'id'].astype(str) 361 | 362 | check_small_operation(total_reads, total_reads_small, total_writes, total_writes_small, detected_files, modules, file_map, dxt_posix, dxt_posix_read_data, dxt_posix_write_data) 363 | 364 | ######################################################################################################################################################################### 365 | 366 | # How many requests are misaligned? 367 | 368 | total_mem_not_aligned = df['counters']['POSIX_MEM_NOT_ALIGNED'].sum() 369 | total_file_not_aligned = df['counters']['POSIX_FILE_NOT_ALIGNED'].sum() 370 | 371 | check_misaligned(total_operations, total_mem_not_aligned, total_file_not_aligned, modules, file_map, df_lustre, dxt_posix, dxt_posix_read_data) 372 | 373 | ######################################################################################################################################################################### 374 | 375 | # Redundant read-traffic (based on Phill) 376 | # POSIX_MAX_BYTE_READ (Highest offset in the file that was read) 377 | max_read_offset = df['counters']['POSIX_MAX_BYTE_READ'].max() 378 | max_write_offset = df['counters']['POSIX_MAX_BYTE_WRITTEN'].max() 379 | 380 | check_traffic(max_read_offset, total_read_size, max_write_offset, total_written_size, dxt_posix, dxt_posix_read_data, dxt_posix_write_data) 381 | 382 | ######################################################################################################################################################################### 383 | 384 | # Check for a lot of random operations 385 | 386 | read_consecutive = df['counters']['POSIX_CONSEC_READS'].sum() 387 | #print('READ Consecutive: {} ({:.2f}%)'.format(read_consecutive, read_consecutive / total_reads * 100)) 388 | 389 | read_sequential = df['counters']['POSIX_SEQ_READS'].sum() 390 | read_sequential -= read_consecutive 391 | #print('READ Sequential: {} ({:.2f}%)'.format(read_sequential, read_sequential / total_reads * 100)) 392 | 393 | read_random = total_reads - read_consecutive - read_sequential 394 | #print('READ Random: {} ({:.2f}%)'.format(read_random, read_random / total_reads * 100)) 395 | 396 | 397 | write_consecutive = df['counters']['POSIX_CONSEC_WRITES'].sum() 398 | 399 | write_sequential = df['counters']['POSIX_SEQ_WRITES'].sum() 400 | write_sequential -= write_consecutive 401 | 402 | write_random = total_writes - write_consecutive - write_sequential 403 | #print('WRITE Random: {} ({:.2f}%)'.format(write_random, write_random / total_writes * 100)) 404 | 405 | check_random_operation(read_consecutive, read_sequential, read_random, total_reads, write_consecutive, write_sequential, write_random, total_writes, dxt_posix, dxt_posix_read_data, dxt_posix_write_data) 406 | 407 | ######################################################################################################################################################################### 408 | 409 | # Shared file with small operations 410 | 411 | shared_files = df['counters'].loc[(df['counters']['rank'] == -1)] 412 | 413 | shared_files = shared_files.assign(id=lambda d: d['id'].astype(str)) 414 | 415 | if not shared_files.empty: 416 | total_shared_reads = shared_files['POSIX_READS'].sum() 417 | total_shared_reads_small = ( 418 | shared_files['POSIX_SIZE_READ_0_100'].sum() + 419 | shared_files['POSIX_SIZE_READ_100_1K'].sum() + 420 | shared_files['POSIX_SIZE_READ_1K_10K'].sum() + 421 | shared_files['POSIX_SIZE_READ_10K_100K'].sum() + 422 | shared_files['POSIX_SIZE_READ_100K_1M'].sum() 423 | ) 424 | 425 | shared_files['INSIGHTS_POSIX_SMALL_READS'] = ( 426 | shared_files['POSIX_SIZE_READ_0_100'] + 427 | shared_files['POSIX_SIZE_READ_100_1K'] + 428 | shared_files['POSIX_SIZE_READ_1K_10K'] + 429 | shared_files['POSIX_SIZE_READ_10K_100K'] + 430 | shared_files['POSIX_SIZE_READ_100K_1M'] 431 | ) 432 | 433 | 434 | total_shared_writes = shared_files['POSIX_WRITES'].sum() 435 | total_shared_writes_small = ( 436 | shared_files['POSIX_SIZE_WRITE_0_100'].sum() + 437 | shared_files['POSIX_SIZE_WRITE_100_1K'].sum() + 438 | shared_files['POSIX_SIZE_WRITE_1K_10K'].sum() + 439 | shared_files['POSIX_SIZE_WRITE_10K_100K'].sum() + 440 | shared_files['POSIX_SIZE_WRITE_100K_1M'].sum() 441 | ) 442 | 443 | shared_files['INSIGHTS_POSIX_SMALL_WRITES'] = ( 444 | shared_files['POSIX_SIZE_WRITE_0_100'] + 445 | shared_files['POSIX_SIZE_WRITE_100_1K'] + 446 | shared_files['POSIX_SIZE_WRITE_1K_10K'] + 447 | shared_files['POSIX_SIZE_WRITE_10K_100K'] + 448 | shared_files['POSIX_SIZE_WRITE_100K_1M'] 449 | ) 450 | 451 | check_shared_small_operation(total_shared_reads, total_shared_reads_small, total_shared_writes, total_shared_writes_small, shared_files, file_map) 452 | 453 | ######################################################################################################################################################################### 454 | 455 | count_long_metadata = len(df['fcounters'][(df['fcounters']['POSIX_F_META_TIME'] > thresholds['metadata_time_rank'][0])]) 456 | 457 | check_long_metadata(count_long_metadata, modules) 458 | 459 | # We already have a single line for each shared-file access 460 | # To check for stragglers, we can check the difference between the 461 | 462 | # POSIX_FASTEST_RANK_BYTES 463 | # POSIX_SLOWEST_RANK_BYTES 464 | # POSIX_F_VARIANCE_RANK_BYTES 465 | 466 | stragglers_count = 0 467 | 468 | shared_files = shared_files.assign(id=lambda d: d['id'].astype(str)) 469 | 470 | # Get the files responsible 471 | detected_files = [] 472 | 473 | for index, row in shared_files.iterrows(): 474 | total_transfer_size = row['POSIX_BYTES_WRITTEN'] + row['POSIX_BYTES_READ'] 475 | 476 | if total_transfer_size and abs(row['POSIX_SLOWEST_RANK_BYTES'] - row['POSIX_FASTEST_RANK_BYTES']) / total_transfer_size > thresholds['imbalance_stragglers'][0]: 477 | stragglers_count += 1 478 | 479 | detected_files.append([ 480 | row['id'], abs(row['POSIX_SLOWEST_RANK_BYTES'] - row['POSIX_FASTEST_RANK_BYTES']) / total_transfer_size * 100 481 | ]) 482 | 483 | column_names = ['id', 'data_imbalance'] 484 | detected_files = pd.DataFrame(detected_files, columns=column_names) 485 | check_shared_data_imblance(stragglers_count, detected_files, file_map, dxt_posix, dxt_posix_read_data, dxt_posix_write_data) 486 | 487 | # POSIX_F_FASTEST_RANK_TIME 488 | # POSIX_F_SLOWEST_RANK_TIME 489 | # POSIX_F_VARIANCE_RANK_TIME 490 | 491 | shared_files_times = df['fcounters'].loc[(df['fcounters']['rank'] == -1)] 492 | 493 | # Get the files responsible 494 | detected_files = [] 495 | 496 | stragglers_count = 0 497 | stragglers_imbalance = {} 498 | 499 | shared_files_times = shared_files_times.assign(id=lambda d: d['id'].astype(str)) 500 | 501 | for index, row in shared_files_times.iterrows(): 502 | total_transfer_time = row['POSIX_F_WRITE_TIME'] + row['POSIX_F_READ_TIME'] + row['POSIX_F_META_TIME'] 503 | 504 | if total_transfer_time and abs(row['POSIX_F_SLOWEST_RANK_TIME'] - row['POSIX_F_FASTEST_RANK_TIME']) / total_transfer_time > thresholds['imbalance_stragglers'][0]: 505 | stragglers_count += 1 506 | 507 | detected_files.append([ 508 | row['id'], abs(row['POSIX_F_SLOWEST_RANK_TIME'] - row['POSIX_F_FASTEST_RANK_TIME']) / total_transfer_time * 100 509 | ]) 510 | 511 | column_names = ['id', 'time_imbalance'] 512 | detected_files = pd.DataFrame(detected_files, columns=column_names) 513 | check_shared_time_imbalance(stragglers_count, detected_files, file_map) 514 | 515 | aggregated = df['counters'].loc[(df['counters']['rank'] != -1)][ 516 | ['rank', 'id', 'POSIX_BYTES_WRITTEN', 'POSIX_BYTES_READ'] 517 | ].groupby('id', as_index=False).agg({ 518 | 'rank': 'nunique', 519 | 'POSIX_BYTES_WRITTEN': ['sum', 'min', 'max'], 520 | 'POSIX_BYTES_READ': ['sum', 'min', 'max'] 521 | }) 522 | 523 | aggregated.columns = list(map('_'.join, aggregated.columns.values)) 524 | 525 | aggregated = aggregated.assign(id=lambda d: d['id_'].astype(str)) 526 | 527 | # Get the files responsible 528 | imbalance_count = 0 529 | 530 | detected_files = [] 531 | 532 | for index, row in aggregated.iterrows(): 533 | if row['POSIX_BYTES_WRITTEN_max'] and abs(row['POSIX_BYTES_WRITTEN_max'] - row['POSIX_BYTES_WRITTEN_min']) / row['POSIX_BYTES_WRITTEN_max'] > thresholds['imbalance_size'][0]: 534 | imbalance_count += 1 535 | 536 | detected_files.append([ 537 | row['id'], abs(row['POSIX_BYTES_WRITTEN_max'] - row['POSIX_BYTES_WRITTEN_min']) / row['POSIX_BYTES_WRITTEN_max'] * 100 538 | ]) 539 | 540 | column_names = ['id', 'write_imbalance'] 541 | detected_files = pd.DataFrame(detected_files, columns=column_names) 542 | check_individual_write_imbalance(imbalance_count, detected_files, file_map, dxt_posix, dxt_posix_write_data) 543 | 544 | imbalance_count = 0 545 | 546 | detected_files = [] 547 | 548 | for index, row in aggregated.iterrows(): 549 | if row['POSIX_BYTES_READ_max'] and abs(row['POSIX_BYTES_READ_max'] - row['POSIX_BYTES_READ_min']) / row['POSIX_BYTES_READ_max'] > thresholds['imbalance_size'][0]: 550 | imbalance_count += 1 551 | 552 | detected_files.append([ 553 | row['id'], abs(row['POSIX_BYTES_READ_max'] - row['POSIX_BYTES_READ_min']) / row['POSIX_BYTES_READ_max'] * 100 554 | ]) 555 | 556 | column_names = ['id', 'read_imbalance'] 557 | detected_files = pd.DataFrame(detected_files, columns=column_names) 558 | check_individual_read_imbalance(imbalance_count, detected_files, file_map, dxt_posix, dxt_posix_read_data) 559 | 560 | ######################################################################################################################################################################### 561 | 562 | if 'MPI-IO' in report.records: 563 | # Check if application uses MPI-IO and collective operations 564 | df_mpiio = report.records['MPI-IO'].to_df() 565 | 566 | df_mpiio['counters'] = df_mpiio['counters'].assign(id=lambda d: d['id'].astype(str)) 567 | 568 | # Get the files responsible 569 | detected_files = [] 570 | 571 | df_mpiio_collective_reads = df_mpiio['counters'] #.loc[(df_mpiio['counters']['MPIIO_COLL_READS'] > 0)] 572 | 573 | total_mpiio_read_operations = df_mpiio['counters']['MPIIO_INDEP_READS'].sum() + df_mpiio['counters']['MPIIO_COLL_READS'].sum() 574 | 575 | mpiio_coll_reads = df_mpiio['counters']['MPIIO_COLL_READS'].sum() 576 | mpiio_indep_reads = df_mpiio['counters']['MPIIO_INDEP_READS'].sum() 577 | 578 | detected_files = [] 579 | if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > thresholds['collective_operations_absolute'][0]: 580 | files = pd.DataFrame(df_mpiio_collective_reads.groupby('id').sum()).reset_index() 581 | for index, row in df_mpiio_collective_reads.iterrows(): 582 | if ((row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) and 583 | row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > thresholds['collective_operations'][0] and 584 | (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > thresholds['collective_operations_absolute'][0]): 585 | 586 | detected_files.append([ 587 | row['id'], row['MPIIO_INDEP_READS'], row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) * 100 588 | ]) 589 | 590 | column_names = ['id', 'absolute_indep_reads', 'percent_indep_reads'] 591 | detected_files = pd.DataFrame(detected_files, columns=column_names) 592 | 593 | check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, total_mpiio_read_operations, detected_files, file_map, dxt_mpiio) 594 | 595 | df_mpiio_collective_writes = df_mpiio['counters'] #.loc[(df_mpiio['counters']['MPIIO_COLL_WRITES'] > 0)] 596 | 597 | total_mpiio_write_operations = df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum() + df_mpiio['counters']['MPIIO_COLL_WRITES'].sum() 598 | 599 | mpiio_coll_writes = df_mpiio['counters']['MPIIO_COLL_WRITES'].sum() 600 | mpiio_indep_writes = df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum() 601 | 602 | detected_files = [] 603 | if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > thresholds['collective_operations_absolute'][0]: 604 | files = pd.DataFrame(df_mpiio_collective_writes.groupby('id').sum()).reset_index() 605 | 606 | for index, row in df_mpiio_collective_writes.iterrows(): 607 | if ((row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) and 608 | row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > thresholds['collective_operations'][0] and 609 | (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > thresholds['collective_operations_absolute'][0]): 610 | 611 | detected_files.append([ 612 | row['id'], row['MPIIO_INDEP_WRITES'], row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) * 100 613 | ]) 614 | 615 | column_names = ['id', 'absolute_indep_writes', 'percent_indep_writes'] 616 | detected_files = pd.DataFrame(detected_files, columns=column_names) 617 | 618 | check_mpi_collective_write_operation(mpiio_coll_writes, mpiio_indep_writes, total_mpiio_write_operations, detected_files, file_map, dxt_mpiio) 619 | 620 | ######################################################################################################################################################################### 621 | 622 | # Look for usage of non-block operations 623 | 624 | # Look for HDF5 file extension 625 | 626 | has_hdf5_extension = False 627 | 628 | for index, row in df_mpiio['counters'].iterrows(): 629 | if file_map[int(row['id'])].endswith('.h5') or file_map[int(row['id'])].endswith('.hdf5'): 630 | has_hdf5_extension = True 631 | 632 | mpiio_nb_reads = df_mpiio['counters']['MPIIO_NB_READS'].sum() 633 | mpiio_nb_writes = df_mpiio['counters']['MPIIO_NB_WRITES'].sum() 634 | 635 | check_mpi_none_block_operation(mpiio_nb_reads, mpiio_nb_writes, has_hdf5_extension, modules) 636 | 637 | ######################################################################################################################################################################### 638 | 639 | # Nodes and MPI-IO aggregators 640 | # If the application uses collective reads or collective writes, look for the number of aggregators 641 | hints = '' 642 | 643 | if 'h' in job['job']['metadata']: 644 | hints = job['job']['metadata']['h'] 645 | 646 | if hints: 647 | hints = hints.split(';') 648 | 649 | # print('Hints: ', hints) 650 | 651 | NUMBER_OF_COMPUTE_NODES = 0 652 | 653 | if 'MPI-IO' in modules: 654 | cb_nodes = None 655 | 656 | for hint in hints: 657 | if hint != 'no': 658 | (key, value) = hint.split('=') 659 | 660 | if key == 'cb_nodes': 661 | cb_nodes = value 662 | 663 | # Try to get the number of compute nodes from SLURM, if not found, set as information 664 | command = 'sacct --job {} --format=JobID,JobIDRaw,NNodes,NCPUs --parsable2 --delimiter ","'.format( 665 | job['job']['jobid'] 666 | ) 667 | 668 | arguments = shlex.split(command) 669 | 670 | try: 671 | result = subprocess.run(arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 672 | 673 | if result.returncode == 0: 674 | # We have successfully fetched the information from SLURM 675 | db = csv.DictReader(io.StringIO(result.stdout.decode('utf-8'))) 676 | 677 | try: 678 | first = next(db) 679 | 680 | if 'NNodes' in first: 681 | NUMBER_OF_COMPUTE_NODES = first['NNodes'] 682 | 683 | # Do we have one MPI-IO aggregator per node? 684 | check_mpi_aggregator(cb_nodes, NUMBER_OF_COMPUTE_NODES) 685 | except StopIteration: 686 | pass 687 | except FileNotFoundError: 688 | pass 689 | 690 | ######################################################################################################################################################################### 691 | 692 | insights_end_time = time.time() 693 | 694 | # Version 3.4.1 of py-darshan changed the contents on what is reported in 'job' 695 | if 'start_time' in job['job']: 696 | job_start = datetime.datetime.fromtimestamp(job['job']['start_time'], datetime.timezone.utc) 697 | job_end = datetime.datetime.fromtimestamp(job['job']['end_time'], datetime.timezone.utc) 698 | else: 699 | job_start = datetime.datetime.fromtimestamp(job['job']['start_time_sec'], datetime.timezone.utc) 700 | job_end = datetime.datetime.fromtimestamp(job['job']['end_time_sec'], datetime.timezone.utc) 701 | 702 | console.print() 703 | 704 | console.print( 705 | Panel( 706 | '\n'.join([ 707 | ' [b]JOB[/b]: [white]{}[/white]'.format( 708 | job['job']['jobid'] 709 | ), 710 | ' [b]EXECUTABLE[/b]: [white]{}[/white]'.format( 711 | job['exe'].split()[0] 712 | ), 713 | ' [b]DARSHAN[/b]: [white]{}[/white]'.format( 714 | os.path.basename(args.log_path) 715 | ), 716 | ' [b]EXECUTION TIME[/b]: [white]{} to {} ({:.2f} hours)[/white]'.format( 717 | job_start, 718 | job_end, 719 | (job_end - job_start).total_seconds() / 3600 720 | ), 721 | ' [b]FILES[/b]: [white]{} files ({} use STDIO, {} use POSIX, {} use MPI-IO)[/white]'.format( 722 | total_files, 723 | total_files_stdio, 724 | total_files_posix - total_files_mpiio, # Since MPI-IO files will always use POSIX, we can decrement to get a unique count 725 | total_files_mpiio 726 | ), 727 | ' [b]COMPUTE NODES[/b] [white]{}[/white]'.format( 728 | NUMBER_OF_COMPUTE_NODES 729 | ), 730 | ' [b]PROCESSES[/b] [white]{}[/white]'.format( 731 | job['job']['nprocs'] 732 | ), 733 | ' [b]HINTS[/b]: [white]{}[/white]'.format( 734 | ' '.join(hints) 735 | ) 736 | ]), 737 | title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.5[/b]', 738 | title_align='left', 739 | subtitle='[red][b]{} critical issues[/b][/red], [orange1][b]{} warnings[/b][/orange1], and [white][b]{} recommendations[/b][/white]'.format( 740 | insights_total[HIGH], 741 | insights_total[WARN], 742 | insights_total[RECOMMENDATIONS], 743 | ), 744 | subtitle_align='left', 745 | padding=1 746 | ) 747 | ) 748 | 749 | console.print() 750 | 751 | display_content(console) 752 | display_thresholds(console) 753 | display_footer(console, insights_start_time, insights_end_time) 754 | 755 | # Export to HTML, SVG, and CSV 756 | trace_name = os.path.basename(args.log_path).replace('.darshan', '') 757 | out_dir = args.export_dir if args.export_dir != "" else os.getcwd() 758 | 759 | export_html(console, out_dir, trace_name) 760 | export_svg(console, out_dir, trace_name) 761 | export_csv(out_dir, trace_name, job['job']['jobid']) 762 | -------------------------------------------------------------------------------- /drishti/handlers/handle_recorder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import time 5 | 6 | import pandas as pd 7 | from recorder_utils import RecorderReader 8 | from recorder_utils.build_offset_intervals import build_offset_intervals 9 | 10 | from drishti.includes.module import * 11 | 12 | 13 | def get_accessed_files(reader): 14 | ranks = reader.GM.total_ranks 15 | file_map = {} 16 | for rank in range(ranks): 17 | file_map.update(reader.LMs[rank].filemap) 18 | return file_map 19 | 20 | 21 | def init_df_posix_recordes(reader): 22 | func_list = reader.funcs 23 | ranks = reader.GM.total_ranks 24 | records = [] 25 | for rank in range(ranks): 26 | for i in range(reader.LMs[rank].total_records): 27 | record = reader.records[rank][i] 28 | func_name = func_list[record.func_id] 29 | 30 | if 'MPI' not in func_name and 'H5' not in func_name: 31 | filename = None 32 | if "open" in func_name or "close" in func_name or "creat" in func_name \ 33 | or "seek" in func_name or "sync" in func_name: 34 | fstr = record.args[0] 35 | filename = fstr if type(fstr)==str else fstr.decode('utf-8') 36 | filename = filename.replace('./', '') 37 | 38 | records.append( [filename, rank, func_name, record.tstart, record.tend] ) 39 | 40 | head = ['fname', 'rank', 'function', 'start', 'end'] 41 | df_posix_records = pd.DataFrame(records, columns=head) 42 | return df_posix_records 43 | 44 | 45 | def handler(): 46 | df_intervals = None 47 | df_posix_records = None 48 | df_file_map = None 49 | file_map = None 50 | 51 | if os.path.exists(args.log_path + '.intervals.csv') and os.path.exists(args.log_path + '.records.csv') and os.path.exists(args.log_path + '.filemap.csv'): 52 | print('Using parsed file: {}'.format(os.path.abspath(args.log_path + '.intervals.csv'))) 53 | print('Using parsed file: {}'.format(os.path.abspath(args.log_path + '.records.csv'))) 54 | print('Using parsed file: {}'.format(os.path.abspath(args.log_path + '.filemap.csv'))) 55 | df_intervals = pd.read_csv(args.log_path + '.intervals.csv') 56 | df_posix_records = pd.read_csv(args.log_path + '.records.csv') 57 | df_file_map = pd.read_csv(args.log_path + '.filemap.csv') 58 | file_map = {} 59 | for index, row in df_file_map.iterrows(): 60 | file_map[row['file_id']] = row['file_name'] 61 | else: 62 | reader = RecorderReader(args.log_path) 63 | df_intervals = build_offset_intervals(reader) 64 | df_posix_records = init_df_posix_recordes(reader) 65 | 66 | file_map = get_accessed_files(reader) 67 | 68 | def add_api(row): 69 | if 'MPI' in row['function']: 70 | return 'MPI-IO' 71 | elif 'H5' in row['function']: 72 | return 'H5F' 73 | else: 74 | return 'POSIX' 75 | 76 | def add_duration(row): 77 | return row['end'] - row['start'] 78 | 79 | df_intervals['api'] = df_intervals.apply(add_api, axis=1) 80 | df_intervals['duration'] = df_intervals.apply(add_duration, axis=1) 81 | df_posix_records['duration'] = df_posix_records.apply(add_duration, axis=1) 82 | 83 | df_intervals.to_csv(args.log_path + '.intervals.csv', mode='w', index=False, header=True) 84 | df_posix_records.to_csv(args.log_path + '.records.csv', mode='w', index=False, header=True) 85 | 86 | df_file_map = pd.DataFrame(list(file_map.items()), columns=['file_id', 'file_name']) 87 | df_file_map.to_csv(args.log_path + '.filemap.csv', mode='w', index=False, header=True) 88 | 89 | if args.split_files: 90 | for fid in file_map: 91 | process_helper(file_map, df_intervals[(df_intervals['file_id'] == fid)], 92 | df_posix_records[(df_posix_records['fname'] == file_map[fid])], fid) 93 | else: 94 | process_helper(file_map, df_intervals, df_posix_records) 95 | 96 | 97 | def process_helper(file_map, df_intervals, df_posix_records, fid=None): 98 | if not len(df_intervals): return 99 | 100 | insights_start_time = time.time() 101 | 102 | console = init_console() 103 | 104 | modules = set(df_intervals['api'].unique()) 105 | # Check usage of POSIX, and MPI-IO per file 106 | total_size_stdio = 0 107 | total_size_posix = 0 108 | total_size_mpiio = 0 109 | total_size = 0 110 | 111 | total_files = len(file_map) 112 | total_files_stdio = 0 113 | total_files_posix = 0 114 | total_files_mpiio = 0 115 | 116 | if args.split_files: 117 | total_size_stdio = df_intervals[(df_intervals['api'] == 'STDIO')]['size'].sum() 118 | total_size_posix = df_intervals[(df_intervals['api'] == 'POSIX')]['size'].sum() 119 | total_size_mpiio = df_intervals[(df_intervals['api'] == 'MPI-IO')]['size'].sum() 120 | else: 121 | for id in file_map.keys(): 122 | df_intervals_in_one_file = df_intervals[(df_intervals['file_id'] == id)] 123 | df_stdio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'STDIO')] 124 | df_posix_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'POSIX')] 125 | df_mpiio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'MPI-IO')] 126 | 127 | if len(df_stdio_intervals_in_one_file): 128 | total_files_stdio += 1 129 | total_size_stdio += df_stdio_intervals_in_one_file['size'].sum() 130 | 131 | if len(df_posix_intervals_in_one_file): 132 | total_files_posix += 1 133 | total_size_posix += df_posix_intervals_in_one_file['size'].sum() 134 | 135 | if len(df_mpiio_intervals_in_one_file): 136 | total_files_mpiio += 1 137 | total_size_mpiio += df_mpiio_intervals_in_one_file['size'].sum() 138 | 139 | 140 | # Since POSIX will capture both POSIX-only accesses and those comming from MPI-IO, we can subtract those 141 | if total_size_posix > 0 and total_size_posix >= total_size_mpiio: 142 | total_size_posix -= total_size_mpiio 143 | 144 | total_size = total_size_stdio + total_size_posix + total_size_mpiio 145 | 146 | assert(total_size_stdio >= 0) 147 | assert(total_size_posix >= 0) 148 | assert(total_size_mpiio >= 0) 149 | 150 | check_stdio(total_size, total_size_stdio) 151 | check_mpiio(modules) 152 | 153 | ######################################################################################################################################################################### 154 | 155 | if df_intervals['api'].eq('POSIX').any(): 156 | df_posix = df_intervals[(df_intervals['api'] == 'POSIX')] 157 | 158 | ######################################################################################################################################################################### 159 | 160 | # Get number of write/read operations 161 | total_reads = len(df_posix[(df_posix['function'].str.contains('read'))]) 162 | total_writes = len(df_posix[~(df_posix['function'].str.contains('read'))]) 163 | 164 | # Get total number of I/O operations 165 | total_operations = total_writes + total_reads 166 | 167 | # To check whether the application is write-intersive or read-intensive we only look at the POSIX level and check if the difference between reads and writes is larger than 10% (for more or less), otherwise we assume a balance 168 | check_operation_intensive(total_operations, total_reads, total_writes) 169 | 170 | total_read_size = df_posix[(df_posix['function'].str.contains('read'))]['size'].sum() 171 | total_written_size = df_posix[~(df_posix['function'].str.contains('read'))]['size'].sum() 172 | 173 | total_size = total_written_size + total_read_size 174 | 175 | check_size_intensive(total_size, total_read_size, total_written_size) 176 | 177 | ######################################################################################################################################################################### 178 | 179 | # Get the number of small I/O operations (less than 1 MB) 180 | 181 | total_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) & (df_posix['size'] < thresholds['small_bytes'][0])]) 182 | total_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) & (df_posix['size'] < thresholds['small_bytes'][0])]) 183 | 184 | if args.split_files: 185 | detected_files = pd.DataFrame() 186 | else: 187 | detected_files = [] 188 | for id in file_map.keys(): 189 | read_cnt = len(df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read')) & (df_posix['size'] < thresholds['small_bytes'][0])]) 190 | write_cnt = len(df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read')) & (df_posix['size'] < thresholds['small_bytes'][0])]) 191 | detected_files.append([id, read_cnt, write_cnt]) 192 | 193 | column_names = ['id', 'total_reads', 'total_writes'] 194 | detected_files = pd.DataFrame(detected_files, columns=column_names) 195 | 196 | check_small_operation(total_reads, total_reads_small, total_writes, total_writes_small, detected_files, modules, file_map) 197 | 198 | ######################################################################################################################################################################### 199 | 200 | # How many requests are misaligned? 201 | # TODO: 202 | 203 | ######################################################################################################################################################################### 204 | 205 | # Redundant read-traffic (based on Phill) 206 | # POSIX_MAX_BYTE_READ (Highest offset in the file that was read) 207 | max_read_offset = df_posix[(df_posix['function'].str.contains('read'))]['offset'].max() 208 | max_write_offset = df_posix[~(df_posix['function'].str.contains('read'))]['offset'].max() 209 | 210 | check_traffic(max_read_offset, total_read_size, max_write_offset, total_written_size) 211 | 212 | ######################################################################################################################################################################### 213 | 214 | # Check for a lot of random operations 215 | 216 | grp_posix_by_id = df_posix.groupby('file_id') 217 | 218 | read_consecutive = 0 219 | read_sequential = 0 220 | read_random = 0 221 | 222 | for id, df_filtered in grp_posix_by_id: 223 | df_filtered = df_filtered[(df_filtered['function'].str.contains('read'))].sort_values('start') 224 | 225 | for i in range(len(df_filtered) - 1): 226 | curr_interval = df_filtered.iloc[i] 227 | next_interval = df_filtered.iloc[i + 1] 228 | if curr_interval['offset'] + curr_interval['size'] == next_interval['offset']: 229 | read_consecutive += 1 230 | elif curr_interval['offset'] + curr_interval['size'] < next_interval['offset']: 231 | read_sequential += 1 232 | else: 233 | read_random += 1 234 | 235 | write_consecutive = 0 236 | write_sequential = 0 237 | write_random = 0 238 | 239 | for id, df_filtered in grp_posix_by_id: 240 | df_filtered = df_filtered[~(df_filtered['function'].str.contains('read'))].sort_values('start') 241 | 242 | for i in range(len(df_filtered) - 1): 243 | curr_interval = df_filtered.iloc[i] 244 | next_interval = df_filtered.iloc[i + 1] 245 | if curr_interval['offset'] + curr_interval['size'] == next_interval['offset']: 246 | write_consecutive += 1 247 | elif curr_interval['offset'] + curr_interval['size'] < next_interval['offset']: 248 | write_sequential += 1 249 | else: 250 | write_random += 1 251 | 252 | check_random_operation(read_consecutive, read_sequential, read_random, total_reads, write_consecutive, write_sequential, write_random, total_writes) 253 | 254 | ######################################################################################################################################################################### 255 | 256 | # Shared file with small operations 257 | 258 | # A file is shared if it's been read/written by more than 1 rank 259 | detected_files = grp_posix_by_id['rank'].nunique() 260 | shared_files = set(detected_files[detected_files > 1].index) 261 | 262 | total_shared_reads = len(df_posix[(df_posix['file_id'].isin(shared_files)) & (df_posix['function'].str.contains('read'))]) 263 | total_shared_reads_small = len(df_posix[(df_posix['file_id'].isin(shared_files)) 264 | & (df_posix['function'].str.contains('read')) 265 | & (df_posix['size'] < thresholds['small_bytes'][0])]) 266 | 267 | total_shared_writes = len(df_posix[(df_posix['file_id'].isin(shared_files)) & ~(df_posix['function'].str.contains('read'))]) 268 | total_shared_writes_small = len(df_posix[(df_posix['file_id'].isin(shared_files)) 269 | & ~(df_posix['function'].str.contains('read')) 270 | & (df_posix['size'] < thresholds['small_bytes'][0])]) 271 | 272 | if args.split_files: 273 | detected_files = pd.DataFrame() 274 | else: 275 | detected_files = [] 276 | for id in shared_files: 277 | read_cnt = len(df_posix[(df_posix['file_id'] == id) 278 | & (df_posix['function'].str.contains('read')) 279 | & (df_posix['size'] < thresholds['small_bytes'][0])]) 280 | write_cnt = len(df_posix[(df_posix['file_id'] == id) 281 | & ~(df_posix['function'].str.contains('read')) 282 | & (df_posix['size'] < thresholds['small_bytes'][0])]) 283 | detected_files.append([id, read_cnt, write_cnt]) 284 | 285 | column_names = ['id', 'INSIGHTS_POSIX_SMALL_READS', 'INSIGHTS_POSIX_SMALL_WRITES'] 286 | detected_files = pd.DataFrame(detected_files, columns=column_names) 287 | 288 | check_shared_small_operation(total_shared_reads, total_shared_reads_small, total_shared_writes, total_shared_writes_small, detected_files, file_map) 289 | 290 | ######################################################################################################################################################################### 291 | 292 | # TODO: Assumed metadata operations: open, close, sync, create, seek 293 | df_detected = df_posix_records.groupby('rank')['duration'].sum().reset_index() 294 | count_long_metadata = len(df_detected[(df_detected['duration'] > thresholds['metadata_time_rank'][0])]) 295 | 296 | check_long_metadata(count_long_metadata, modules) 297 | 298 | # We already have a single line for each shared-file access 299 | # To check for stragglers, we can check the difference between the 300 | 301 | # POSIX_FASTEST_RANK_BYTES 302 | # POSIX_SLOWEST_RANK_BYTES 303 | # POSIX_VARIANCE_RANK_BYTES 304 | if args.split_files: 305 | if df_posix['rank'].nunique() > 1: 306 | total_transfer_size = df_posix['size'].sum() 307 | 308 | df_detected = df_posix.groupby('rank').agg({'size': 'sum', 'duration': 'sum'}).reset_index() 309 | slowest_rank_bytes = df_detected.loc[df_detected['duration'].idxmax(), 'size'] 310 | fastest_rank_bytes = df_detected.loc[df_detected['duration'].idxmin(), 'size'] 311 | 312 | check_shared_data_imblance_split(slowest_rank_bytes, fastest_rank_bytes, total_transfer_size) 313 | else: 314 | stragglers_count = 0 315 | 316 | detected_files = [] 317 | for id in shared_files: 318 | df_posix_in_one_file = df_posix[(df_posix['file_id'] == id)] 319 | total_transfer_size = df_posix_in_one_file['size'].sum() 320 | 321 | df_detected = df_posix_in_one_file.groupby('rank').agg({'size': 'sum', 'duration': 'sum'}).reset_index() 322 | slowest_rank_bytes = df_detected.loc[df_detected['duration'].idxmax(), 'size'] 323 | fastest_rank_bytes = df_detected.loc[df_detected['duration'].idxmin(), 'size'] 324 | 325 | if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > thresholds['imbalance_stragglers'][0]: 326 | stragglers_count += 1 327 | 328 | detected_files.append([ 329 | id, abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100 330 | ]) 331 | 332 | column_names = ['id', 'data_imbalance'] 333 | detected_files = pd.DataFrame(detected_files, columns=column_names) 334 | 335 | check_shared_data_imblance(stragglers_count, detected_files, file_map) 336 | 337 | # POSIX_F_FASTEST_RANK_TIME 338 | # POSIX_F_SLOWEST_RANK_TIME 339 | # POSIX_F_VARIANCE_RANK_TIME 340 | if args.split_files: 341 | if df_posix['rank'].nunique() > 1: 342 | total_transfer_time = df_posix['duration'].sum() 343 | 344 | df_detected = df_posix.groupby('rank')['duration'].sum().reset_index() 345 | 346 | slowest_rank_time = df_detected['duration'].max() 347 | fastest_rank_time = df_detected['duration'].min() 348 | 349 | check_shared_time_imbalance_split(slowest_rank_time, fastest_rank_time, total_transfer_time) 350 | else: 351 | stragglers_count = 0 352 | 353 | detected_files = [] 354 | for id in shared_files: 355 | df_posix_in_one_file = df_posix[(df_posix['file_id'] == id)] 356 | total_transfer_time = df_posix_in_one_file['duration'].sum() 357 | 358 | df_detected = df_posix_in_one_file.groupby('rank')['duration'].sum().reset_index() 359 | 360 | slowest_rank_time = df_detected['duration'].max() 361 | fastest_rank_time = df_detected['duration'].min() 362 | 363 | if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > thresholds['imbalance_stragglers'][0]: 364 | stragglers_count += 1 365 | 366 | detected_files.append([ 367 | id, abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100 368 | ]) 369 | 370 | column_names = ['id', 'time_imbalance'] 371 | detected_files = pd.DataFrame(detected_files, columns=column_names) 372 | 373 | check_shared_time_imbalance(stragglers_count, detected_files, file_map) 374 | 375 | # Get the individual files responsible for imbalance 376 | if args.split_files: 377 | if df_posix['rank'].nunique() == 1: 378 | df_detected = df_posix[~(df_posix['function'].str.contains('read'))] 379 | 380 | max_bytes_written = df_detected['size'].max() 381 | min_bytes_written = df_detected['size'].min() 382 | 383 | check_individual_write_imbalance_split(max_bytes_written, min_bytes_written) 384 | 385 | if df_posix['rank'].nunique() == 1: 386 | df_detected = df_posix[(df_posix['function'].str.contains('read'))] 387 | 388 | max_bytes_read = df_detected['size'].max() 389 | min_bytes_read = df_detected['size'].min() 390 | 391 | check_individual_read_imbalance_split(max_bytes_read, min_bytes_read) 392 | else: 393 | imbalance_count = 0 394 | 395 | detected_files = [] 396 | for id in file_map.keys(): 397 | if id in shared_files: continue 398 | df_detected = df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read'))] 399 | 400 | max_bytes_written = df_detected['size'].max() 401 | min_bytes_written = df_detected['size'].min() 402 | 403 | if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > thresholds['imbalance_size'][0]: 404 | imbalance_count += 1 405 | 406 | detected_files.append([ 407 | id, abs(max_bytes_written - min_bytes_written) / max_bytes_written * 100 408 | ]) 409 | 410 | column_names = ['id', 'write_imbalance'] 411 | detected_files = pd.DataFrame(detected_files, columns=column_names) 412 | 413 | check_individual_write_imbalance(imbalance_count, detected_files, file_map) 414 | 415 | imbalance_count = 0 416 | 417 | detected_files = [] 418 | for id in shared_files: 419 | df_detected = df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read'))] 420 | 421 | max_bytes_read = df_detected['size'].max() 422 | min_bytes_read = df_detected['size'].min() 423 | 424 | if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > thresholds['imbalance_size'][0]: 425 | imbalance_count += 1 426 | 427 | detected_files.append([ 428 | id, abs(max_bytes_read - min_bytes_read) / max_bytes_read * 100 429 | ]) 430 | 431 | column_names = ['id', 'read_imbalance'] 432 | detected_files = pd.DataFrame(detected_files, columns=column_names) 433 | 434 | check_individual_read_imbalance(imbalance_count, detected_files, file_map) 435 | 436 | ######################################################################################################################################################################### 437 | 438 | if df_intervals['api'].eq('MPI-IO').any(): 439 | df_mpiio = df_intervals[(df_intervals['api'] == 'MPI-IO')] 440 | 441 | df_mpiio_reads = df_mpiio[(df_mpiio['function'].str.contains('read'))] 442 | mpiio_indep_reads = len(df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all'))]) 443 | mpiio_coll_reads = len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('_all'))]) 444 | total_mpiio_read_operations = mpiio_indep_reads + mpiio_coll_reads 445 | 446 | df_mpiio_writes = df_mpiio[~(df_mpiio['function'].str.contains('read'))] 447 | mpiio_indep_writes = len(df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all'))]) 448 | mpiio_coll_writes = len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('_all'))]) 449 | total_mpiio_write_operations = mpiio_indep_writes + mpiio_coll_writes 450 | 451 | if args.split_files: 452 | detected_files = pd.DataFrame() 453 | else: 454 | detected_files = [] 455 | if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > thresholds['collective_operations_absolute'][0]: 456 | for id in file_map.keys(): 457 | indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)] 458 | indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)] 459 | indep_total_count = indep_read_count + indep_write_count 460 | 461 | if (indep_total_count > thresholds['collective_operations_absolute'][0] and indep_read_count / indep_total_count > thresholds['collective_operations'][0]): 462 | detected_files.append([ 463 | id, indep_read_count, indep_read_count / indep_total_count * 100 464 | ]) 465 | 466 | column_names = ['id', 'absolute_indep_reads', 'percent_indep_reads'] 467 | detected_files = pd.DataFrame(detected_files, columns=column_names) 468 | 469 | check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, total_mpiio_read_operations, detected_files, file_map) 470 | 471 | if args.split_files: 472 | detected_files = pd.DataFrame() 473 | else: 474 | detected_files = [] 475 | if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > thresholds['collective_operations_absolute'][0]: 476 | for id in file_map.keys(): 477 | indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)] 478 | indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)] 479 | indep_total_count = indep_read_count + indep_write_count 480 | 481 | if (indep_total_count > thresholds['collective_operations_absolute'][0] and indep_write_count / indep_total_count > thresholds['collective_operations'][0]): 482 | detected_files.append([ 483 | id, indep_write_count, indep_write_count / indep_total_count * 100 484 | ]) 485 | 486 | column_names = ['id', 'absolute_indep_writes', 'percent_indep_writes'] 487 | detected_files = pd.DataFrame(detected_files, columns=column_names) 488 | 489 | check_mpi_collective_write_operation(mpiio_coll_writes, mpiio_indep_writes, total_mpiio_write_operations, detected_files, file_map) 490 | 491 | ######################################################################################################################################################################### 492 | 493 | # Look for usage of non-block operations 494 | 495 | # Look for HDF5 file extension 496 | 497 | has_hdf5_extension = False 498 | 499 | for id in file_map.keys(): 500 | fname = file_map[id] 501 | if fname.endswith('.h5') or fname.endswith('.hdf5'): 502 | has_hdf5_extension = True 503 | 504 | mpiio_nb_reads = len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('iread|begin|end'))]) 505 | mpiio_nb_writes = len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('iwrite|begin|end'))]) 506 | 507 | check_mpi_none_block_operation(mpiio_nb_reads, mpiio_nb_writes, has_hdf5_extension, modules) 508 | 509 | ######################################################################################################################################################################### 510 | 511 | # Nodes and MPI-IO aggregators 512 | # If the application uses collective reads or collective writes, look for the number of aggregators 513 | # TODO: 514 | 515 | ######################################################################################################################################################################### 516 | 517 | insights_end_time = time.time() 518 | 519 | console.print() 520 | 521 | if args.split_files: 522 | console.print( 523 | Panel( 524 | '\n'.join([ 525 | ' [b]RECORDER[/b]: [white]{}[/white]'.format( 526 | os.path.basename(args.log_path) 527 | ), 528 | ' [b]FILE[/b]: [white]{} ({})[/white]'.format( 529 | file_map[fid], 530 | fid, 531 | ), 532 | ' [b]PROCESSES[/b] [white]{}[/white]'.format( 533 | df_intervals['rank'].nunique() 534 | ), 535 | ]), 536 | title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.5[/b]', 537 | title_align='left', 538 | subtitle='[red][b]{} critical issues[/b][/red], [orange1][b]{} warnings[/b][/orange1], and [white][b]{} recommendations[/b][/white]'.format( 539 | insights_total[HIGH], 540 | insights_total[WARN], 541 | insights_total[RECOMMENDATIONS], 542 | ), 543 | subtitle_align='left', 544 | padding=1 545 | ) 546 | ) 547 | else: 548 | console.print( 549 | Panel( 550 | '\n'.join([ 551 | ' [b]RECORDER[/b]: [white]{}[/white]'.format( 552 | os.path.basename(args.log_path) 553 | ), 554 | ' [b]FILES[/b]: [white]{} files ({} use STDIO, {} use POSIX, {} use MPI-IO)[/white]'.format( 555 | total_files, 556 | total_files_stdio, 557 | total_files_posix - total_files_mpiio, # Since MPI-IO files will always use POSIX, we can decrement to get a unique count 558 | total_files_mpiio 559 | ), 560 | ' [b]PROCESSES[/b] [white]{}[/white]'.format( 561 | df_intervals['rank'].nunique() 562 | ), 563 | ]), 564 | title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.5[/b]', 565 | title_align='left', 566 | subtitle='[red][b]{} critical issues[/b][/red], [orange1][b]{} warnings[/b][/orange1], and [white][b]{} recommendations[/b][/white]'.format( 567 | insights_total[HIGH], 568 | insights_total[WARN], 569 | insights_total[RECOMMENDATIONS], 570 | ), 571 | subtitle_align='left', 572 | padding=1 573 | ) 574 | ) 575 | 576 | console.print() 577 | 578 | display_content(console) 579 | display_thresholds(console) 580 | display_footer(console, insights_start_time, insights_end_time) 581 | 582 | # Export to HTML, SVG, and CSV 583 | trace_name = os.path.basename(os.path.dirname(args.log_path)) 584 | if args.split_files: 585 | trace_name = f"{trace_name}.{fid}" 586 | out_dir = args.export_dir if args.export_dir != "" else os.getcwd() 587 | 588 | export_html(console, out_dir, trace_name) 589 | export_svg(console, out_dir, trace_name) 590 | export_csv(out_dir, trace_name) 591 | -------------------------------------------------------------------------------- /drishti/includes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc-io/drishti-io/1d0eedffe5dc68d801093eb7ef13f480d293ee7b/drishti/includes/__init__.py -------------------------------------------------------------------------------- /drishti/includes/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import json 5 | 6 | from rich.console import Console, Group 7 | from rich.padding import Padding 8 | from rich.panel import Panel 9 | from rich.terminal_theme import TerminalTheme 10 | from rich.terminal_theme import MONOKAI 11 | 12 | from drishti.includes.parser import * 13 | 14 | 15 | RECOMMENDATIONS = 0 16 | HIGH = 1 17 | WARN = 2 18 | INFO = 3 19 | OK = 4 20 | 21 | ROOT = os.path.abspath(os.path.dirname(__file__)) 22 | 23 | TARGET_USER = 1 24 | TARGET_DEVELOPER = 2 25 | TARGET_SYSTEM = 3 26 | 27 | insights_operation = [] 28 | insights_metadata = [] 29 | insights_dxt = [] 30 | 31 | insights_total = dict() 32 | 33 | insights_total[HIGH] = 0 34 | insights_total[WARN] = 0 35 | insights_total[RECOMMENDATIONS] = 0 36 | 37 | thresholds = { 38 | 'imbalance_operations': [0.1, False], 39 | 'small_bytes': [1048576, False], 40 | 'small_requests': [0.1, False], 41 | 'small_requests_absolute': [1000, False], 42 | 'misaligned_requests': [0.1, False], 43 | 'metadata_time_rank': [30, False], 44 | 'random_operations': [0.2, False], 45 | 'random_operations_absolute': [1000, False], 46 | 'imbalance_stragglers': [0.15, False], 47 | 'imbalance_size': [0.3, False], 48 | 'interface_stdio': [0.1, False], 49 | 'collective_operations': [0.5, False], 50 | 'collective_operations_absolute': [1000, False], 51 | 'backtrace': [2, False] 52 | } 53 | 54 | INSIGHTS_STDIO_HIGH_USAGE = 'S01' 55 | INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE = 'P01' 56 | INSIGHTS_POSIX_READ_COUNT_INTENSIVE = 'P02' 57 | INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE = 'P03' 58 | INSIGHTS_POSIX_READ_SIZE_INTENSIVE = 'P04' 59 | INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE = 'P05' 60 | INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE = 'P06' 61 | INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE = 'P07' 62 | INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE = 'P08' 63 | INSIGHTS_POSIX_REDUNDANT_READ_USAGE = 'P09' 64 | INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE = 'P10' 65 | INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE = 'P11' 66 | INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE = 'P12' 67 | INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE = 'P13' 68 | INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE = 'P14' 69 | INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE = 'P15' 70 | INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE = 'P16' 71 | INSIGHTS_POSIX_HIGH_METADATA_TIME = 'P17' 72 | INSIGHTS_POSIX_SIZE_IMBALANCE = 'P18' 73 | INSIGHTS_POSIX_TIME_IMBALANCE = 'P19' 74 | INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE = 'P21' 75 | INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE = 'P22' 76 | INSIGHTS_MPI_IO_NO_USAGE = 'M01' 77 | INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE = 'M02' 78 | INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE = 'M03' 79 | INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE = 'M04' 80 | INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE = 'M05' 81 | INSIGHTS_MPI_IO_BLOCKING_READ_USAGE = 'M06' 82 | INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE = 'M07' 83 | INSIGHTS_MPI_IO_AGGREGATORS_INTRA = 'M08' 84 | INSIGHTS_MPI_IO_AGGREGATORS_INTER = 'M09' 85 | INSIGHTS_MPI_IO_AGGREGATORS_OK = 'M10' 86 | 87 | DETAILS_MAX_SIZE = 10 88 | 89 | csv_report = [] 90 | codes = [] 91 | 92 | 93 | def init_console(): 94 | console = Console(record=True) 95 | 96 | if args.export_size: 97 | console.width = int(args.export_size) 98 | 99 | insights_operation.clear() 100 | insights_metadata.clear() 101 | 102 | insights_total[HIGH] = 0 103 | insights_total[WARN] = 0 104 | insights_total[RECOMMENDATIONS] = 0 105 | 106 | for name in thresholds: 107 | thresholds[name][1] = False 108 | 109 | return console 110 | 111 | 112 | def set_export_theme(): 113 | if args.export_theme_light: 114 | export_theme = TerminalTheme( 115 | (255, 255, 255), 116 | (0, 0, 0), 117 | [ 118 | (26, 26, 26), 119 | (244, 0, 95), 120 | (152, 224, 36), 121 | (253, 151, 31), 122 | (157, 101, 255), 123 | (244, 0, 95), 124 | (88, 209, 235), 125 | (120, 120, 120), 126 | (98, 94, 76), 127 | ], 128 | [ 129 | (244, 0, 95), 130 | (152, 224, 36), 131 | (224, 213, 97), 132 | (157, 101, 255), 133 | (244, 0, 95), 134 | (88, 209, 235), 135 | (246, 246, 239), 136 | ], 137 | ) 138 | else: 139 | export_theme = MONOKAI 140 | return export_theme 141 | 142 | 143 | def load_json(): 144 | codes = [] 145 | if not args.split_files: 146 | if args.json: 147 | f = open(args.json) 148 | data = json.load(f) 149 | 150 | for key, values in data.items(): 151 | for value in values: 152 | code = value['code'] 153 | codes.append(code) 154 | 155 | level = value['level'] 156 | issue = value['issue'] 157 | recommendation = [] 158 | for rec in value['recommendations']: 159 | new_message = {'message': rec} 160 | recommendation.append(new_message) 161 | 162 | insights_dxt.append( 163 | message(code, TARGET_DEVELOPER, level, issue, recommendation) 164 | ) 165 | 166 | 167 | def validate_thresholds(): 168 | """ 169 | Validate thresholds defined by the user. 170 | """ 171 | if args.config: 172 | f = open(args.config) 173 | data = json.load(f) 174 | 175 | for category, thresholds_spec in data.items(): 176 | for threshold_name, threshold_value in thresholds_spec.items(): 177 | thresholds[category + '_' + threshold_name][0] = threshold_value 178 | 179 | assert(thresholds['imbalance_operations'][0] >= 0.0 and thresholds['imbalance_operations'][0] <= 1.0) 180 | assert(thresholds['small_requests'][0] >= 0.0 and thresholds['small_requests'][0] <= 1.0) 181 | assert(thresholds['misaligned_requests'][0] >= 0.0 and thresholds['misaligned_requests'][0] <= 1.0) 182 | assert(thresholds['random_operations'][0] >= 0.0 and thresholds['random_operations'][0] <= 1.0) 183 | 184 | assert(thresholds['metadata_time_rank'][0] >= 0.0) 185 | 186 | 187 | def convert_bytes(bytes_number): 188 | """ 189 | Convert bytes into formatted string. 190 | """ 191 | tags = [ 192 | 'bytes', 193 | 'KB', 194 | 'MB', 195 | 'GB', 196 | 'TB', 197 | 'PB', 198 | 'EB' 199 | ] 200 | 201 | i = 0 202 | double_bytes = bytes_number 203 | 204 | while (i < len(tags) and bytes_number >= 1024): 205 | double_bytes = bytes_number / 1024.0 206 | i = i + 1 207 | bytes_number = bytes_number / 1024 208 | 209 | return str(round(double_bytes, 2)) + ' ' + tags[i] 210 | 211 | 212 | def message(code, target, level, issue, recommendations=None, details=None): 213 | """ 214 | Display the message on the screen with level, issue, and recommendation. 215 | """ 216 | icon = ':arrow_forward:' 217 | 218 | if level in (HIGH, WARN): 219 | insights_total[level] += 1 220 | 221 | if level == HIGH: 222 | color = '[red]' 223 | elif level == WARN: 224 | color = '[orange1]' 225 | elif level == OK: 226 | color = '[green]' 227 | else: 228 | color = '' 229 | 230 | messages = [ 231 | '{}{}{} {}'.format( 232 | color, 233 | icon, 234 | ' [' + code + ']' if args.code else '', 235 | issue 236 | ) 237 | ] 238 | 239 | if args.export_csv: 240 | csv_report.append(code) 241 | 242 | if details: 243 | for detail in details[:DETAILS_MAX_SIZE]: 244 | messages.append(' {}:left_arrow_curving_right: {}'.format( 245 | color, 246 | detail['message'] 247 | ) 248 | ) 249 | 250 | if recommendations: 251 | if not args.only_issues: 252 | messages.append(' [white]:left_arrow_curving_right: [b]Recommendations:[/b]') 253 | 254 | for recommendation in recommendations: 255 | messages.append(' :left_arrow_curving_right: {}'.format(recommendation['message'])) 256 | 257 | if args.verbose and 'sample' in recommendation: 258 | messages.append( 259 | Padding( 260 | Panel( 261 | recommendation['sample'], 262 | title='Solution Example Snippet', 263 | title_align='left', 264 | padding=(1, 2) 265 | ), 266 | (1, 0, 1, 7) 267 | ) 268 | ) 269 | 270 | insights_total[RECOMMENDATIONS] += len(recommendations) 271 | 272 | return Group( 273 | *messages 274 | ) 275 | 276 | 277 | ''' 278 | Pre-load 279 | ''' 280 | load_json() 281 | validate_thresholds() 282 | -------------------------------------------------------------------------------- /drishti/includes/parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | parser = argparse.ArgumentParser( 4 | description='Drishti: ' 5 | ) 6 | 7 | parser.add_argument( 8 | 'log_path', 9 | help='Input .darshan file or recorder folder' 10 | ) 11 | 12 | parser.add_argument( 13 | '--issues', 14 | default=False, 15 | action='store_true', 16 | dest='only_issues', 17 | help='Only displays the detected issues and hides the recommendations' 18 | ) 19 | 20 | parser.add_argument( 21 | '--html', 22 | default=False, 23 | action='store_true', 24 | dest='export_html', 25 | help='Export the report as an HTML page' 26 | ) 27 | 28 | parser.add_argument( 29 | '--svg', 30 | default=False, 31 | action='store_true', 32 | dest='export_svg', 33 | help='Export the report as an SVG image' 34 | ) 35 | 36 | parser.add_argument( 37 | '--light', 38 | default=False, 39 | action='store_true', 40 | dest='export_theme_light', 41 | help='Use a light theme for the report when generating files' 42 | ) 43 | 44 | parser.add_argument( 45 | '--size', 46 | default=False, 47 | dest='export_size', 48 | help='Console width used for the report and generated files' 49 | ) 50 | 51 | parser.add_argument( 52 | '--verbose', 53 | default=False, 54 | action='store_true', 55 | dest='verbose', 56 | help='Display extended details for the recommendations' 57 | ) 58 | 59 | parser.add_argument( 60 | '--threshold', 61 | default=False, 62 | action='store_true', 63 | dest='thold', 64 | help='Display all thresholds used for the report' 65 | ) 66 | 67 | parser.add_argument( 68 | '--code', 69 | default=False, 70 | action='store_true', 71 | dest='code', 72 | help='Display insights identification code' 73 | ) 74 | 75 | parser.add_argument( 76 | '--backtrace', 77 | default=False, 78 | action='store_true', 79 | dest='backtrace', 80 | help='Enable DXT insights and backtrace' 81 | ) 82 | 83 | parser.add_argument( 84 | '--path', 85 | default=False, 86 | action='store_true', 87 | dest='full_path', 88 | help='Display the full file path for the files that triggered the issue' 89 | ) 90 | 91 | parser.add_argument( 92 | '--csv', 93 | default=False, 94 | action='store_true', 95 | dest='export_csv', 96 | help='Export a CSV with the code of all issues that were triggered' 97 | ) 98 | 99 | parser.add_argument( 100 | '--export_dir', 101 | default="", 102 | dest='export_dir', 103 | help='Specify the directory prefix for the output files (if any)' 104 | ) 105 | 106 | parser.add_argument( 107 | '--json', 108 | default=False, 109 | dest='json', 110 | help=argparse.SUPPRESS 111 | ) 112 | 113 | parser.add_argument( 114 | '--split', 115 | default=False, 116 | action='store_true', 117 | dest='split_files', 118 | help='Split the files and generate report for each file' 119 | ) 120 | 121 | parser.add_argument( 122 | '--config', 123 | default=False, 124 | dest='config', 125 | help='Enable thresholds read from json file' 126 | ) 127 | 128 | args = parser.parse_args() 129 | -------------------------------------------------------------------------------- /drishti/includes/snippets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc-io/drishti-io/1d0eedffe5dc68d801093eb7ef13f480d293ee7b/drishti/includes/snippets/__init__.py -------------------------------------------------------------------------------- /drishti/includes/snippets/hdf5-alignment.c: -------------------------------------------------------------------------------- 1 | hid_t fileAccessProperty = H5Pcreate(H5P_FILE_ACCESS); 2 | ... 3 | H5Pset_alignment(fileAccessProperty, threshold, bytes); -------------------------------------------------------------------------------- /drishti/includes/snippets/hdf5-cache.c: -------------------------------------------------------------------------------- 1 | hid_t fileAccessProperty = H5Pcreate(H5P_FILE_ACCESS); 2 | ... 3 | H5AC_cache_config_t cache_config; 4 | cache_config.version = H5AC__CURR_CACHE_CONFIG_VERSION; 5 | H5Pget_mdc_config(m_fileAccessProperty, &cache_config); 6 | cache_config.set_initial_size = 1; 7 | cache_config.initial_size = meta_size; 8 | cache_config.evictions_enabled = 0; 9 | cache_config.incr_mode = H5C_incr__off; 10 | cache_config.flash_incr_mode = H5C_flash_incr__off; 11 | cache_config.decr_mode = H5C_decr__off; 12 | H5Pset_mdc_config(fileAccessProperty, &cache_config); -------------------------------------------------------------------------------- /drishti/includes/snippets/hdf5-collective-metadata.c: -------------------------------------------------------------------------------- 1 | hid_t fileAccessProperty = H5Pcreate(H5P_FILE_ACCESS); 2 | ... 3 | H5Pset_all_coll_metadata_ops(fileAccessProperty, true); 4 | H5Pset_coll_metadata_write(fileAccessProperty, true); -------------------------------------------------------------------------------- /drishti/includes/snippets/hdf5-vol-async-read.c: -------------------------------------------------------------------------------- 1 | hid_t es_id, fid, gid, did; 2 | 3 | MPI_Init_thread(argc, argv, MPI_THREAD_MULTIPLE, &provided); 4 | 5 | es_id = H5EScreate(); // Create event set for tracking async operations 6 | fid = H5Fopen_async(..., es_id); // Asynchronous, can start immediately 7 | gid = H5Gopen_async(fid, ..., es_id); // Asynchronous, starts when H5Fopen completes 8 | did = H5Dopen_async(gid, ..., es_id); // Asynchronous, starts when H5Gopen completes 9 | 10 | status = H5Dread_async(did, ..., es_id); // Asynchronous, starts when H5Dopen completes 11 | 12 | H5ESwait(es_id, H5ES_WAIT_FOREVER, &num_in_progress, &op_failed); 13 | 14 | H5ESclose(es_id); // Close the event set (must wait first) -------------------------------------------------------------------------------- /drishti/includes/snippets/hdf5-vol-async-write.c: -------------------------------------------------------------------------------- 1 | hid_t es_id, fid, gid, did; 2 | 3 | MPI_Init_thread(argc, argv, MPI_THREAD_MULTIPLE, &provided); 4 | 5 | es_id = H5EScreate(); // Create event set for tracking async operations 6 | fid = H5Fopen_async(..., es_id); // Asynchronous, can start immediately 7 | gid = H5Gopen_async(fid, ..., es_id); // Asynchronous, starts when H5Fopen completes 8 | did = H5Dopen_async(gid, ..., es_id); // Asynchronous, starts when H5Gopen completes 9 | 10 | status = H5Dwrite_async(did, ..., es_id); // Asynchronous, starts when H5Dopen completes 11 | 12 | // Wait for operations in event set to complete, buffers used for H5Dwrite must only be changed after 13 | H5ESwait(es_id, H5ES_WAIT_FOREVER, &num_in_progress, &op_failed); 14 | 15 | H5ESclose(es_id); // Close the event set (must wait first) -------------------------------------------------------------------------------- /drishti/includes/snippets/lustre-striping.bash: -------------------------------------------------------------------------------- 1 | lfs setstripe -S 4M -c 64 /path/to/your/directory/or/file 2 | 3 | # -S defines the stripe size (i.e., the size in which the file will be broken down into) 4 | # -c defines the stripe count (i.e., how many servers will be used to distribute stripes of the file) -------------------------------------------------------------------------------- /drishti/includes/snippets/mpi-io-collective-read.c: -------------------------------------------------------------------------------- 1 | MPI_File_open(MPI_COMM_WORLD, "output-example.txt", MPI_MODE_CREATE|MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); 2 | ... 3 | MPI_File_read_all(fh, &buffer, size, MPI_INT, &s); -------------------------------------------------------------------------------- /drishti/includes/snippets/mpi-io-collective-write.c: -------------------------------------------------------------------------------- 1 | MPI_File_open(MPI_COMM_WORLD, "output-example.txt", MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); 2 | ... 3 | MPI_File_write_all(fh, &buffer, size, MPI_CHAR, &s); -------------------------------------------------------------------------------- /drishti/includes/snippets/mpi-io-hints.bash: -------------------------------------------------------------------------------- 1 | # ------------------------------- # 2 | # MPICH # 3 | # ------------------------------- # 4 | export MPICH_MPIIO_HINTS="*:cb_nodes=16:cb_buffer_size=16777216:romio_cb_write=enable:romio_ds_write=disable:romio_cb_read=enable:romio_ds_read=disable" 5 | 6 | # * means it will apply the hints to any file opened with MPI-IO 7 | # cb_nodes ---> number of aggregator nodes, defaults to stripe count 8 | # cb_buffer_size ---> controls the buffer size used for collective buffering 9 | # romio_cb_write ---> controls collective buffering for writes 10 | # romio_cb_read ---> controls collective buffering for reads 11 | # romio_ds_write ---> controls data sieving for writes 12 | # romio_ds_read ---> controls data sieving for reads 13 | 14 | # to visualize the used hints for a given job 15 | export MPICH_MPIIO_HINTS_DISPLAY=1 16 | 17 | # ------------------------------- # 18 | # OpenMPI / SpectrumMPI (Summit) # 19 | # ------------------------------- # 20 | export OMPI_MCA_io=romio321 21 | export ROMIO_HINTS=./my-romio-hints 22 | 23 | # the my-romio-hints file content is as follows: 24 | cat $ROMIO_HINTS 25 | 26 | romio_cb_write enable 27 | romio_cb_read enable 28 | romio_ds_write disable 29 | romio_ds_read disable 30 | cb_buffer_size 16777216 31 | cb_nodes 8 -------------------------------------------------------------------------------- /drishti/includes/snippets/mpi-io-iread.c: -------------------------------------------------------------------------------- 1 | MPI_File fh; 2 | MPI_Status s; 3 | MPI_Request r; 4 | ... 5 | MPI_File_open(MPI_COMM_WORLD, "output-example.txt", MPI_MODE_CREATE|MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); 6 | ... 7 | MPI_File_iread(fh, &buffer, BUFFER_SIZE, n, MPI_CHAR, &r); 8 | ... 9 | // compute something 10 | ... 11 | MPI_Test(&r, &completed, &s); 12 | ... 13 | if (!completed) { 14 | // compute something 15 | 16 | MPI_Wait(&r, &s); 17 | } -------------------------------------------------------------------------------- /drishti/includes/snippets/mpi-io-iwrite.c: -------------------------------------------------------------------------------- 1 | MPI_File fh; 2 | MPI_Status s; 3 | MPI_Request r; 4 | ... 5 | MPI_File_open(MPI_COMM_WORLD, "output-example.txt", MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); 6 | ... 7 | MPI_File_iwrite(fh, &buffer, BUFFER_SIZE, MPI_CHAR, &r); 8 | ... 9 | // compute something 10 | ... 11 | MPI_Test(&r, &completed, &s); 12 | ... 13 | if (!completed) { 14 | // compute something 15 | 16 | MPI_Wait(&r, &s); 17 | } -------------------------------------------------------------------------------- /drishti/includes/snippets/pnetcdf-hdf5-no-fill.c: -------------------------------------------------------------------------------- 1 | status = nc_def_var (ncid, "A", NC_DOUBLE, 3, cube_dim, &cube1_id); 2 | nc_def_var_fill(ncid, cube1_id, NC_NOFILL, NULL); 3 | 4 | status = nc_def_var (ncid, "B", NC_DOUBLE, 3, cube_dim, &cube2_id); 5 | nc_def_var_fill(ncid, cube1_id, NC_NOFILL, NULL); 6 | 7 | status = nc_def_var (ncid, "C", NC_DOUBLE, 3, cube_dim, &cube3_id); 8 | nc_def_var_fill(ncid, cube1_id, NC_NOFILL, NULL); -------------------------------------------------------------------------------- /drishti/reporter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | from subprocess import call 6 | from drishti.includes.parser import * 7 | 8 | 9 | ''' 10 | |- handler_darshan -| 11 | | | 12 | reporter -> /handlers -> |- handler_recorder -| -| 13 | | | | 14 | |- handler_xxx ... -| | 15 | ________________________________________________| 16 | | 17 | |-----> /includes -> module -> config -> parser 18 | ''' 19 | 20 | 21 | LOG_TYPE_DARSHAN = 0 22 | LOG_TYPE_RECORDER = 1 23 | 24 | 25 | def clear(): 26 | """ 27 | Clear the screen with the comment call based on the operating system. 28 | """ 29 | _ = call('clear' if os.name == 'posix' else 'cls') 30 | 31 | 32 | def check_log_type(path): 33 | if path.endswith('.darshan'): 34 | if not os.path.isfile(path): 35 | print('Unable to open .darshan file.') 36 | sys.exit(os.EX_NOINPUT) 37 | else: return LOG_TYPE_DARSHAN 38 | else: # check whether is a valid recorder log 39 | if not os.path.isdir(path): 40 | print('Unable to open recorder folder.') 41 | sys.exit(os.EX_NOINPUT) 42 | else: return LOG_TYPE_RECORDER 43 | 44 | 45 | def main(): 46 | log_type = check_log_type(args.log_path) 47 | 48 | if log_type == LOG_TYPE_DARSHAN: 49 | from drishti.handlers.handle_darshan import handler 50 | 51 | elif log_type == LOG_TYPE_RECORDER: 52 | from drishti.handlers.handle_recorder import handler 53 | 54 | handler() 55 | 56 | -------------------------------------------------------------------------------- /images/drishti-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc-io/drishti-io/1d0eedffe5dc68d801093eb7ef13f480d293ee7b/images/drishti-logo.png -------------------------------------------------------------------------------- /images/sample-io-insights-issues.svg: -------------------------------------------------------------------------------- 1 | Drishti╭─DRISHTIv.0.3──────────────────────────────────────────────────────────────────────────────────────────────────────╮JOB:1190243EXECUTABLE:bin/8_benchmark_parallelDARSHAN:jlbez_8_benchmark_parallel_id1190243_7-23-45631-11755726114084236527_1.darshanEXECUTION DATE:2021-07-23 16:40:31+00:00 to 2021-07-23 16:40:32+00:00 (0.00 hours)FILES:6 files (1 use STDIO, 2 use POSIX, 1 use MPI-IO)PROCESSES64HINTS:romio_no_indep_rw=true cb_nodes=4╰─1 critical issues,5 warnings, and5 recommendations───────────────────────────────────────────────────────────────╯╭─METADATA ───────────────────────────────────────────────────────────────────────────────────────────────────────────╮▶ Application is read operation intensive (6.34% writes vs. 93.66% reads)▶ Application might have redundant read traffic (more data was read than the highest read offset)▶ Application might have redundant write traffic (more data was written than the highest write offset)╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯╭─OPERATIONS ─────────────────────────────────────────────────────────────────────────────────────────────────────────╮▶ Application issues a high number (285) of small read requests (i.e., < 1MB) which represents 37.11% of allread/write requests↪ 284 (36.98%) small read requests are to "benchmark.h5"▶ Application mostly uses consecutive (2.73%) and sequential (90.62%) read requests▶ Application mostly uses consecutive (19.23%) and sequential (76.92%) write requests▶ Application uses MPI-IO and read data using 640 (83.55%) collective operations▶ Application uses MPI-IO and write data using 768 (100.00%) collective operations▶ Application could benefit from non-blocking (asynchronous) reads▶ Application could benefit from non-blocking (asynchronous) writes▶ Application is using inter-node aggregators (which require network communication)╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯2022 |LBL|Drishti report generated at 2022-08-05 13:19:59.787458 in0.955 seconds -------------------------------------------------------------------------------- /images/sample-io-insights.svg: -------------------------------------------------------------------------------- 1 | Drishti╭─DRISHTIv.0.3──────────────────────────────────────────────────────────────────────────────────────────────────────╮JOB:1190243EXECUTABLE:bin/8_benchmark_parallelDARSHAN:jlbez_8_benchmark_parallel_id1190243_7-23-45631-11755726114084236527_1.darshanEXECUTION DATE:2021-07-23 16:40:31+00:00 to 2021-07-23 16:40:32+00:00 (0.00 hours)FILES:6 files (1 use STDIO, 2 use POSIX, 1 use MPI-IO)PROCESSES64HINTS:romio_no_indep_rw=true cb_nodes=4╰─1 critical issues,5 warnings, and5 recommendations───────────────────────────────────────────────────────────────╯╭─METADATA ───────────────────────────────────────────────────────────────────────────────────────────────────────────╮▶ Application is read operation intensive (6.34% writes vs. 93.66% reads)▶ Application might have redundant read traffic (more data was read than the highest read offset)▶ Application might have redundant write traffic (more data was written than the highest write offset)╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯╭─OPERATIONS ─────────────────────────────────────────────────────────────────────────────────────────────────────────╮▶ Application issues a high number (285) of small read requests (i.e., < 1MB) which represents 37.11% of allread/write requests↪ 284 (36.98%) small read requests are to "benchmark.h5"Recommendations:↪ Consider buffering read operations into larger more contiguous ones↪ Since the appplication already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_read_all() orMPI_File_read_at_all()) to aggregate requests into larger ones▶ Application mostly uses consecutive (2.73%) and sequential (90.62%) read requests▶ Application mostly uses consecutive (19.23%) and sequential (76.92%) write requests▶ Application uses MPI-IO and read data using 640 (83.55%) collective operations▶ Application uses MPI-IO and write data using 768 (100.00%) collective operations▶ Application could benefit from non-blocking (asynchronous) readsRecommendations:↪ Since you use MPI-IO, consider non-blocking/asynchronous I/O operations (e.g., MPI_File_iread(),MPI_File_read_all_begin/end(), or MPI_File_read_at_all_begin/end())▶ Application could benefit from non-blocking (asynchronous) writesRecommendations:↪ Since you use MPI-IO, consider non-blocking/asynchronous I/O operations (e.g., MPI_File_iwrite(),MPI_File_write_all_begin/end(), or MPI_File_write_at_all_begin/end())▶ Application is using inter-node aggregators (which require network communication)Recommendations:↪ Set the MPI hints for the number of aggregators as one per compute node (e.g., cb_nodes=32)╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯2022 |LBL|Drishti report generated at 2022-08-05 13:20:19.715639 in0.996 seconds -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argparse 2 | darshan>=3.4.4.0 3 | pandas 4 | rich==12.5.1 5 | recorder-utils 6 | -------------------------------------------------------------------------------- /sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc-io/drishti-io/1d0eedffe5dc68d801093eb7ef13f480d293ee7b/sample/jlbez_8a_benchmark_write_parallel_id1321662_8-21-5892-15802854900629188750_106.darshan -------------------------------------------------------------------------------- /sample/jlbez_8a_benchmark_write_parallel_id1322696_8-21-14519-8141979180909667175_12.darshan: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc-io/drishti-io/1d0eedffe5dc68d801093eb7ef13f480d293ee7b/sample/jlbez_8a_benchmark_write_parallel_id1322696_8-21-14519-8141979180909667175_12.darshan -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | with open("README.md", "r") as f: 4 | long_description = f.read() 5 | 6 | with open("requirements.txt") as f: 7 | requirements = f.readlines() 8 | 9 | setup( 10 | name="drishti-io", 11 | keywords="drishti", 12 | version="0.6", 13 | author="Jean Luca Bez, Suren Byna", 14 | author_email="jlbez@lbl.gov, sbyna@lbl.gov", 15 | description="", 16 | long_description=long_description, 17 | long_description_content_type="text/markdown", 18 | url="https://github.com/hpc-io/drishti", 19 | install_requires=[ 20 | 'argparse', 21 | 'pandas', 22 | 'darshan>=3.4.4.0', 23 | 'rich==12.5.1', 24 | 'recorder-utils', 25 | ], 26 | packages=find_packages(), 27 | package_data={ 28 | 'drishti.includes': [ 29 | 'drishti/includes/snippets/*' 30 | ], 31 | }, 32 | include_package_data=True, 33 | entry_points={ 34 | "console_scripts": [ 35 | "drishti=drishti.reporter:main" 36 | ] 37 | }, 38 | classifiers=[ 39 | "Development Status :: 4 - Beta", 40 | "Environment :: Console", 41 | "Intended Audience :: Developers", 42 | "Intended Audience :: Science/Research", 43 | "License :: Other/Proprietary License", 44 | "Programming Language :: Python :: 3 :: Only" 45 | ], 46 | python_requires='>=3.6', 47 | ) 48 | --------------------------------------------------------------------------------