├── .gitattributes
├── .github
    └── workflows
    │   └── build-and-publish-new-version.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .vscode
    └── settings.json
├── CONTRIBUTING.md
├── LICENSE
├── NOTICE
├── README.md
├── RELEASE.md
├── assets
    └── deepview.png
├── deepview_profile
    ├── __init__.py
    ├── __main__.py
    ├── analysis
    │   ├── __init__.py
    │   ├── request_manager.py
    │   ├── runner.py
    │   ├── session.py
    │   └── static.py
    ├── commands
    │   ├── __init__.py
    │   ├── analysis.py
    │   ├── interactive.py
    │   ├── measurements.py
    │   ├── memory.py
    │   ├── prediction_models.py
    │   └── time.py
    ├── config
    │   └── __init__.py
    ├── data
    │   ├── __init__.py
    │   └── hints.yml
    ├── db
    │   ├── __init__.py
    │   └── database.py
    ├── energy
    │   ├── __init__.py
    │   └── measurer.py
    ├── error_printing.py
    ├── evaluate.py
    ├── exceptions.py
    ├── export_converter.py
    ├── initialization.py
    ├── io
    │   ├── __init__.py
    │   ├── connection.py
    │   ├── connection_acceptor.py
    │   ├── connection_manager.py
    │   └── sentinel.py
    ├── lru_cache.py
    ├── models
    │   ├── __init__.py
    │   ├── analysis.py
    │   └── source_map.py
    ├── nvml.py
    ├── pl
    │   ├── deepview_callback.py
    │   └── deepview_interface.py
    ├── profiler
    │   ├── __init__.py
    │   ├── autograd.py
    │   ├── backward.py
    │   ├── ddp.py
    │   ├── iteration.py
    │   ├── operation.py
    │   └── utilization.py
    ├── protocol
    │   ├── __init__.py
    │   ├── message_handler.py
    │   └── message_sender.py
    ├── protocol_gen
    │   ├── __init__.py
    │   └── innpv_pb2.py
    ├── pytorch_profiler_log_reader.py
    ├── server.py
    ├── skyline.py
    ├── tests
    │   ├── __init__.py
    │   └── test_lru_cache.py
    ├── tracking
    │   ├── __init__.py
    │   ├── backward_interceptor.py
    │   ├── base.py
    │   ├── breakdown.py
    │   ├── call_stack.py
    │   ├── callable_tracker.py
    │   ├── hook_manager.py
    │   ├── memory
    │   │   ├── __init__.py
    │   │   ├── activations.py
    │   │   ├── report.py
    │   │   ├── report_queries.py
    │   │   └── weights.py
    │   ├── time
    │   │   ├── __init__.py
    │   │   ├── operation.py
    │   │   ├── report.py
    │   │   └── report_queries.py
    │   ├── tracker.py
    │   └── utils.py
    ├── user_code_utils.py
    ├── util_weak.py
    ├── utils.py
    └── version_utils.py
├── docs
    ├── memory-report.md
    ├── providers.md
    ├── remote.md
    └── run-time-report.md
├── examples
    ├── densenet
    │   ├── LICENSE
    │   ├── densenet.py
    │   └── entry_point.py
    ├── gnmt
    │   ├── README.md
    │   ├── entry_point.py
    │   └── seq2seq
    │   │   ├── LICENSE
    │   │   ├── data
    │   │       ├── config.py
    │   │       ├── dataset.py
    │   │       ├── sampler.py
    │   │       └── tokenizer.py
    │   │   ├── inference
    │   │       ├── beam_search.py
    │   │       └── inference.py
    │   │   ├── models
    │   │       ├── attention.py
    │   │       ├── decoder.py
    │   │       ├── encoder.py
    │   │       ├── gnmt.py
    │   │       └── seq2seq_base.py
    │   │   ├── train
    │   │       ├── fp_optimizers.py
    │   │       ├── lr_scheduler.py
    │   │       ├── smoothing.py
    │   │       └── trainer.py
    │   │   └── utils.py
    ├── huggingface
    │   └── entry_point.py
    ├── legacy
    │   ├── lenet.py
    │   ├── testnet2.py
    │   └── vgg11.py
    ├── nanogpt
    │   ├── entry_point.py
    │   └── model.py
    ├── pytorch_lightning
    │   └── example.py
    ├── resnet
    │   ├── LICENSE
    │   ├── entry_point.py
    │   ├── entry_point_resnext.py
    │   └── resnet.py
    ├── testnet
    │   ├── entry_point.py
    │   └── testnet1.py
    ├── transformer
    │   ├── entry_point.py
    │   └── transformer
    │   │   ├── Beam.py
    │   │   ├── Constants.py
    │   │   ├── LICENSE
    │   │   ├── Layers.py
    │   │   ├── Models.py
    │   │   ├── Modules.py
    │   │   ├── Optim.py
    │   │   ├── README.md
    │   │   ├── SubLayers.py
    │   │   ├── Translator.py
    │   │   └── __init__.py
    └── vgg
    │   ├── LICENSE
    │   ├── entry_point.py
    │   └── vgg.py
├── protocol
    ├── Makefile
    └── innpv.proto
├── pyproject.toml
├── setup.cfg
├── test
    ├── .gitignore
    ├── TESTING.md
    ├── config_params.py
    ├── test_database.py
    ├── test_driver.py
    └── utils.py
└── tools
    ├── common.sh
    └── prepare-release.sh


/.gitattributes:
--------------------------------------------------------------------------------
1 | assets/* linguist-documentation
2 | website/* linguist-documentation
3 | 


--------------------------------------------------------------------------------
/.github/workflows/build-and-publish-new-version.yml:
--------------------------------------------------------------------------------
  1 | name: Build and publish a new version
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |         tag:
  7 |           description: 'Release Tag. This is in the form x.y.z'
  8 |           required: true
  9 | 
 10 | jobs:
 11 |   build-and-publish:
 12 |     name: Build DeepView.Profile
 13 |     runs-on: ubuntu-latest
 14 |     env:
 15 |       CI_COMMIT_AUTHOR: CentML
 16 |       CI_COMMIT_EMAIL: centml-machine-user@users.noreply.github.com
 17 |     steps:
 18 |       - name: Checkout
 19 |         uses: actions/checkout@v3
 20 | 
 21 |       - name: Set up Python
 22 |         uses: actions/setup-python@v3
 23 |         with:
 24 |           python-version: '3.8'
 25 | 
 26 |       - name: Install and configure Poetry
 27 |         uses: snok/install-poetry@v1
 28 |         with:
 29 |           virtualenvs-in-project: true
 30 | 
 31 |       - name: Create release branch
 32 |         run: |
 33 |           git checkout -b release/${{ github.event.inputs.tag }}
 34 |           git fetch
 35 |           git branch --set-upstream-to=origin/main release/${{ github.event.inputs.tag }}
 36 | 
 37 |       - name: Update version number
 38 |         run: |
 39 |           poetry version ${{ github.event.inputs.tag }}
 40 |           
 41 |       - name: Commit updated version number and tag it
 42 |         run: |
 43 |           git config --global user.name "${{ env.CI_COMMIT_AUTHOR }}"
 44 |           git config --global user.email "${{ env.CI_COMMIT_EMAIL }}"
 45 |           git commit -am "Release version ${{ github.event.inputs.tag }}"
 46 |           git push origin release/${{ github.event.inputs.tag }}
 47 |           git tag ${{ github.event.inputs.tag }}
 48 | 
 49 |       - name: Build Python artifacts
 50 |         run: |
 51 |           poetry build
 52 | 
 53 |       - name: Upload Artifacts
 54 |         uses: actions/upload-artifact@v4
 55 |         with:
 56 |           name: ${{ github.event.inputs.tag }}
 57 |           path: dist/*${{ github.event.inputs.tag }}*
 58 |       
 59 |       - name: Publish a release
 60 |         run: |
 61 |           RELEASE_NOTES="$(git log $(git describe --abbrev=0 --tags).. --merges --pretty=format:"%s %b" | cut -f 4,7- -d ' ')"
 62 |           echo "Autogenerated Release Notes:"
 63 |           echo "$RELEASE_NOTES"
 64 |           RELEASE_ARTIFACTS=$(find ./dist -name "*${{ github.event.inputs.tag }}*" -type f | paste -s -d ' ' - )
 65 |           VERSION_TAG="v${{ github.event.inputs.tag }}"
 66 |           gh auth login --with-token <<< "${{ secrets.GH_TOKEN }}"
 67 |           gh release create "$VERSION_TAG" \
 68 |                                   --title "$VERSION_TAG" \
 69 |                                   --notes "$RELEASE_NOTES" \
 70 |                                   --target "$GITHUB_SHA" \
 71 |                                   $RELEASE_ARTIFACTS
 72 |           gh pr create --title "Release $VERSION_TAG" --body "$RELEASE_NOTES"
 73 | 
 74 |   publish-to-test-pypi:
 75 |     name: Publish to Test PyPI
 76 |     needs: build-and-publish
 77 |     runs-on: ubuntu-latest
 78 |     environment: Test
 79 |     concurrency: Test
 80 |     permissions:
 81 |       id-token: write
 82 | 
 83 |     steps:
 84 |       - name: Download artifact
 85 |         uses: actions/download-artifact@v4
 86 |         with:
 87 |           name: ${{ github.event.inputs.tag }}
 88 |           path: dist
 89 | 
 90 |       - name: Publish to PyPI
 91 |         uses: pypa/gh-action-pypi-publish@release/v1
 92 |         with:
 93 |           repository_url: https://test.pypi.org/legacy/
 94 | 
 95 |   publish-to-pypi:
 96 |     name: Publish to PyPI
 97 |     needs: publish-to-test-pypi
 98 |     runs-on: ubuntu-latest
 99 |     environment: Production
100 |     concurrency: Production
101 |     permissions:
102 |       id-token: write
103 | 
104 | 
105 |     steps:
106 |       - name: Download artifacts
107 |         uses: actions/download-artifact@v4
108 |         with:
109 |           name: ${{ github.event.inputs.tag }}
110 |           path: dist
111 | 
112 |       - name: Publish to PyPI
113 |         uses: pypa/gh-action-pypi-publish@release/v1
114 |         with:
115 |           repository_url: https://upload.pypi.org/legacy/
116 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 | -   repo: https://github.com/charliermarsh/ruff-pre-commit
 5 |     rev: 'v0.0.267'
 6 |     hooks:
 7 |     -   id: ruff
 8 |         args: [ --fix, --exit-non-zero-on-fix ]
 9 | -   repo: https://github.com/pre-commit/pre-commit-hooks
10 |     rev: v3.2.0
11 |     hooks:
12 |     -   id: trailing-whitespace
13 |     -   id: end-of-file-fixer
14 |     -   id: check-ast
15 |     -   id: check-yaml
16 |     -   id: check-added-large-files
17 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "[python]": {
3 |         "editor.defaultFormatter": "ms-python.black-formatter"
4 |     },
5 |     "python.formatting.provider": "none"
6 | }
7 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | TBD


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 | ========================================
 2 | Copyright Notice for the Skyline Project
 3 | ========================================
 4 | 
 5 | Copyright 2020 Geoffrey X. Yu
 6 | Copyright 2020 Tovi Grossman
 7 | Copyright 2020 Gennady Pekhimenko
 8 | 
 9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this project except in compliance with the License.
11 | You may obtain a copy of the License at
12 | 
13 |    http://www.apache.org/licenses/LICENSE-2.0
14 | 
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 | 
21 | 
22 | =====================================
23 | Copyright Notice for the Code Samples
24 | =====================================
25 | 
26 | Portions of code inside the "samples" directory were written by
27 | third party developers. These code files carry their own open
28 | source licenses and copyright notices. Please see the README.md
29 | and LICENSE files inside those directories for more information.
30 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # How to release a new version of DeepView.Profile
 2 | - Go to Github repo and run the action `build-and-publish-new-version`. You will be prompted to specify the version number.
 3 | 
 4 | - This runs a GitHub Action that will take the following steps:
 5 |    1. Fetches the repo and its dependencies
 6 |    2. Creates a release branch
 7 |    3. Updates the version number to the user-specified version by updating the pyproject.toml
 8 |    4. Commits the changes and tag the commit with the version number
 9 |    5. Builds the Python build artifacts
10 |    7. Publishes a release to Github
11 |    8. Create a PR to merge back into main
12 |    9. Publishes to Test PyPI
13 |    10. Publishes to PyPI
14 |    
15 | - The action `build-and-publish-new-version` is defined under `.github/workflows/build-and-publish-new-versionyaml`
16 | 
17 | - This release process follows the release process outlined in [OneFlow](https://www.endoflineblog.com/oneflow-a-git-branching-model-and-workflow).


--------------------------------------------------------------------------------
/assets/deepview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/assets/deepview.png


--------------------------------------------------------------------------------
/deepview_profile/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from importlib.metadata import version, PackageNotFoundError
 3 | 
 4 | try:
 5 |     package_name = "deepview_profile"
 6 |     __name__ = package_name
 7 |     __version__ = version(package_name)
 8 |     __description__ = "Interactive performance profiling and debugging tool for PyTorch neural networks."
 9 | 
10 | except PackageNotFoundError:
11 |     __version__ = "unknown"
12 |     __description__ = "unknown"
13 | 
14 | from .__main__ import main
15 | 


--------------------------------------------------------------------------------
/deepview_profile/__main__.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | warnings.filterwarnings("ignore", message="'deepview_profile.__main__' found in sys.modules after import of package 'deepview_profile', but prior to execution of 'deepview_profile.__main__'; this may result in unpredictable behaviour")
 3 | 
 4 | import argparse
 5 | import sys
 6 | 
 7 | import deepview_profile
 8 | import deepview_profile.commands.interactive
 9 | import deepview_profile.commands.memory
10 | import deepview_profile.commands.time
11 | import deepview_profile.commands.analysis
12 | 
13 | 
14 | def main():
15 |     parser = argparse.ArgumentParser(
16 |         prog="DeepView",
17 |         description="DeepView: Interactive Neural Network Performance "
18 |                     "Profiler, Visualizer, and Debugger for PyTorch",
19 |     )
20 |     parser.add_argument(
21 |         "-v", "--version",
22 |         action="store_true",
23 |         help="Print the version and exit.",
24 |     )
25 |     subparsers = parser.add_subparsers(title="Commands")
26 |     deepview_profile.commands.interactive.register_command(subparsers)
27 |     deepview_profile.commands.memory.register_command(subparsers)
28 |     deepview_profile.commands.time.register_command(subparsers)
29 |     deepview_profile.commands.analysis.register_command(subparsers)
30 |     args = parser.parse_args()
31 | 
32 |     if args.version:
33 |         print('DeepView Command Line Interface', 'v' + deepview_profile.__version__)
34 |         return
35 | 
36 |     if 'func' not in args:
37 |         parser.print_help()
38 |         sys.exit(1)
39 | 
40 |     # Run the specified command
41 |     args.func(args)
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     main()
46 | 


--------------------------------------------------------------------------------
/deepview_profile/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/analysis/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/analysis/runner.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import os
 4 | from deepview_profile.analysis.session import AnalysisSession
 5 | from deepview_profile.nvml import NVML
 6 | from deepview_profile.utils import release_memory
 7 | import weakref
 8 | 
 9 | def analyze_project(project_root, entry_point, nvml, enable_ddp_analysis):
10 |     session = AnalysisSession.new_from(project_root, entry_point)
11 |     
12 |     release_memory()
13 |     print("analyze_project: running measure_breakdown()")
14 |     yield session.measure_breakdown(nvml)
15 |     
16 |     release_memory()
17 |     print("analyze_project: running measure_throughput()")
18 |     yield session.measure_throughput()
19 |     
20 |     release_memory()
21 |     print("analyze_project: running deepview_predict()")
22 |     yield session.habitat_predict()
23 |     
24 |     release_memory()
25 |     print("analyze_project: running measure_utilization()")
26 |     yield session.measure_utilization()
27 | 
28 |     release_memory()
29 |     print("analyze_project: running energy_compute()")
30 |     yield session.energy_compute()
31 | 
32 |     if enable_ddp_analysis:
33 |         release_memory()
34 |         print("analyze_project: running ddp_computation()")
35 |         yield session.ddp_computation()
36 |     
37 |     # release object session (less gpu memory consumption)
38 |     release_memory()
39 |     weakref.finalize(session, print, "session object destroyed")
40 |     del session
41 |     yield None
42 | 
43 | def main():
44 |     # This is used for development and debugging purposes
45 |     parser = argparse.ArgumentParser()
46 |     parser.add_argument("entry_point", type=str)
47 |     args = parser.parse_args()
48 | 
49 |     project_root = os.getcwd()
50 |     with NVML() as nvml:
51 |         analyzer = analyze_project(project_root, args.entry_point, nvml)
52 |         breakdown = next(analyzer)
53 |         throughput = next(analyzer)
54 | 
55 |     print('Peak usage:   ', breakdown.peak_usage_bytes, 'bytes')
56 |     print('Max. capacity:', breakdown.memory_capacity_bytes, 'bytes')
57 |     print('No. of weight breakdown nodes:   ', len(breakdown.operation_tree))
58 |     print('No. of operation breakdown nodes:', len(breakdown.weight_tree))
59 |     print('Throughput:', throughput.samples_per_second, 'samples/s')
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     kwargs = {
64 |         "format": "%(asctime)s %(levelname)-8s %(message)s",
65 |         "datefmt": "%Y-%m-%d %H:%M",
66 |         "level": logging.DEBUG,
67 |     }
68 |     logging.basicConfig(**kwargs)
69 |     main()
70 | 


--------------------------------------------------------------------------------
/deepview_profile/analysis/static.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | import re
 3 | 
 4 | END_OF_FUNCTION = re.compile('\):\s*$')
 5 | 
 6 | 
 7 | class StaticAnalyzer:
 8 |     def __init__(self, source_code, source_tree):
 9 |         self._ast = source_tree
10 |         self._code_by_line = source_code.splitlines()
11 | 
12 |     def batch_size_location(self):
13 |         """
14 |         Locates the line of the 'batch_size' argument in the
15 |         'skyline_input_provider' function and determines if the provider's
16 |         definition can be mutated using our heuristics.
17 |         """
18 |         extractor = _InputProviderExtractor()
19 |         extractor.visit(self._ast)
20 |         function = extractor.function_node
21 | 
22 |         if (function is None or
23 |                 len(function.args.args) == 0 or
24 |                 function.args.args[0].arg != 'batch_size'):
25 |             return None
26 | 
27 |         batch_size_line_number = function.args.args[0].lineno
28 |         match = END_OF_FUNCTION.search(self._code_by_line[function.lineno - 1])
29 |         can_mutate = match is not None
30 | 
31 |         return batch_size_line_number, can_mutate
32 | 
33 | 
34 | class _InputProviderExtractor(ast.NodeVisitor):
35 |     def __init__(self):
36 |         self.function_node = None
37 | 
38 |     def visit_FunctionDef(self, node):
39 |         if self.function_node is not None:
40 |             # Return early if we've already found the provider
41 |             return
42 |         if node.name != 'deepview_input_provider':
43 |             return
44 |         self.function_node = node
45 | 


--------------------------------------------------------------------------------
/deepview_profile/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/commands/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/commands/interactive.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import signal
 3 | import threading
 4 | 
 5 | from deepview_profile.initialization import (
 6 |     check_skyline_preconditions,
 7 |     initialize_skyline,
 8 | )
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | def register_command(subparsers):
14 |     parser = subparsers.add_parser(
15 |         "interactive",
16 |         help="Start a new DeepView interactive profiling session.",
17 |     )
18 |     parser.add_argument(
19 |         "--host",
20 |         default="",
21 |         help="The host address to bind to.",
22 |     )
23 |     parser.add_argument(
24 |         "--port",
25 |         default=60120,
26 |         type=int,
27 |         help="The port to listen on.",
28 |     )
29 |     parser.add_argument(
30 |         "--hints-file",
31 |         help="Path to the performance hints configuration YAML file.",
32 |     )
33 |     parser.add_argument(
34 |         "--measure-for",
35 |         help="Number of iterations to measure when determining throughput.",
36 |     )
37 |     parser.add_argument(
38 |         "--warm-up",
39 |         help="Number of warm up iterations when determining throughput.",
40 |     )
41 |     parser.add_argument(
42 |         "--log-file",
43 |         help="The location of the log file.",
44 |     )
45 |     parser.add_argument(
46 |         "--debug", action="store_true", help="Log debug messages.")
47 |     parser.set_defaults(func=main)
48 | 
49 | def actual_main(args):
50 |     from deepview_profile.server import SkylineServer
51 | 
52 |     should_shutdown = threading.Event()
53 | 
54 |     def signal_handler(signal, frame):
55 |         should_shutdown.set()
56 | 
57 |     signal.signal(signal.SIGINT, signal_handler)
58 |     signal.signal(signal.SIGTERM, signal_handler)
59 | 
60 | 
61 |     with SkylineServer(args.host, args.port) as server:
62 |         _, port = server.listening_on
63 |         logger.info(
64 |             "DeepView interactive profiling session started! "
65 |             "Listening on port %d.",
66 |             port,
67 |         )
68 | 
69 |         # Run the server until asked to terminate
70 |         should_shutdown.wait()
71 | 
72 | 
73 | def main(args):
74 |     check_skyline_preconditions(args)
75 |     initialize_skyline(args)
76 |     actual_main(args)
77 | 


--------------------------------------------------------------------------------
/deepview_profile/commands/measurements.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import sys
  4 | import csv
  5 | 
  6 | from deepview_profile.initialization import (
  7 |     check_skyline_preconditions,
  8 |     initialize_skyline,
  9 | )
 10 | from deepview_profile.error_printing import print_analysis_error
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | def register_command(subparsers):
 16 |     parser = subparsers.add_parser(
 17 |         "measure-batches",
 18 |         help="Make throughput and memory measurements for given batch sizes.",
 19 |     )
 20 |     parser.add_argument(
 21 |         "entry_point",
 22 |         help="The entry point file in this project that contains the DeepView "
 23 |              "provider functions.",
 24 |     )
 25 |     parser.add_argument(
 26 |         "-b", "--batch-sizes",
 27 |         help="The batch sizes to consider.",
 28 |         type=int,
 29 |         nargs="+",
 30 |         required=True,
 31 |     )
 32 |     parser.add_argument(
 33 |         "-t", "--trials",
 34 |         help="Number of trials to run when making measurements.",
 35 |         type=int,
 36 |         required=True,
 37 |         default=5,
 38 |     )
 39 |     parser.add_argument(
 40 |         "-o", "--output",
 41 |         help="The location where the evaluation output should be stored.",
 42 |         required=True,
 43 |     )
 44 |     parser.add_argument(
 45 |         "--log-file",
 46 |         help="The location of the log file.",
 47 |     )
 48 |     parser.add_argument(
 49 |         "--debug", action="store_true", help="Log debug messages.")
 50 |     parser.set_defaults(func=main)
 51 | 
 52 | 
 53 | def make_measurements(session, batch_size):
 54 |     # This is a HACK
 55 |     session._batch_size = batch_size
 56 |     peak_usage_bytes = session.measure_peak_usage_bytes()
 57 |     thpt_msg = session.measure_throughput()
 58 |     return thpt_msg.samples_per_second, peak_usage_bytes
 59 | 
 60 | 
 61 | def actual_main(args):
 62 |     from deepview_profile.analysis.session import AnalysisSession
 63 |     from deepview_profile.exceptions import AnalysisError
 64 | 
 65 |     if os.path.exists(args.output):
 66 |         print(
 67 |             "ERROR: The specified output file already exists.",
 68 |             file=sys.stderr,
 69 |         )
 70 |         sys.exit(1)
 71 | 
 72 |     try:
 73 |         with open(args.output, 'w') as f:
 74 |             writer = csv.writer(f)
 75 |             writer.writerow([
 76 |                 'batch_size',
 77 |                 'trial',
 78 |                 'samples_per_second',
 79 |                 'memory_usage_bytes',
 80 |             ])
 81 |             project_root = os.getcwd()
 82 |             for batch_size in args.batch_sizes:
 83 |                 for trial in range(args.trials):
 84 |                     session = AnalysisSession.new_from(
 85 |                         project_root, args.entry_point)
 86 |                     samples_per_second, memory_usage_bytes = make_measurements(
 87 |                         session, batch_size)
 88 |                     writer.writerow([
 89 |                         batch_size,
 90 |                         trial,
 91 |                         samples_per_second,
 92 |                         memory_usage_bytes,
 93 |                     ])
 94 | 
 95 |     except AnalysisError as ex:
 96 |         print_analysis_error(ex)
 97 |         sys.exit(1)
 98 | 
 99 | 
100 | def main(args):
101 |     check_skyline_preconditions(args)
102 |     initialize_skyline(args)
103 |     actual_main(args)
104 | 


--------------------------------------------------------------------------------
/deepview_profile/commands/memory.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | 
 5 | from deepview_profile.initialization import (
 6 |     check_skyline_preconditions,
 7 |     initialize_skyline,
 8 | )
 9 | from deepview_profile.error_printing import print_analysis_error
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | def register_command(subparsers):
15 |     parser = subparsers.add_parser(
16 |         "memory",
17 |         help="Generate a memory usage report.",
18 |     )
19 |     parser.add_argument(
20 |         "entry_point",
21 |         help="The entry point file in this project that contains the DeepView "
22 |              "provider functions.",
23 |     )
24 |     parser.add_argument(
25 |         "-o", "--output",
26 |         help="The location where the memory report should be stored.",
27 |         required=True,
28 |     )
29 |     parser.add_argument(
30 |         "--log-file",
31 |         help="The location of the log file.",
32 |     )
33 |     parser.add_argument(
34 |         "--debug", action="store_true", help="Log debug messages.")
35 |     parser.set_defaults(func=main)
36 | 
37 | 
38 | def actual_main(args):
39 |     from deepview_profile.analysis.session import AnalysisSession
40 |     from deepview_profile.exceptions import AnalysisError
41 | 
42 |     if os.path.exists(args.output):
43 |         print(
44 |             "ERROR: The specified output file already exists.",
45 |             file=sys.stderr,
46 |         )
47 |         sys.exit(1)
48 | 
49 |     try:
50 |         project_root = os.getcwd()
51 |         session = AnalysisSession.new_from(
52 |             project_root, args.entry_point)
53 |         session.generate_memory_usage_report(
54 |             save_report_to=args.output,
55 |         )
56 |     except AnalysisError as ex:
57 |         print_analysis_error(ex)
58 |         sys.exit(1)
59 | 
60 | 
61 | def main(args):
62 |     check_skyline_preconditions(args)
63 |     initialize_skyline(args)
64 |     actual_main(args)
65 | 


--------------------------------------------------------------------------------
/deepview_profile/commands/prediction_models.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | import csv
 5 | 
 6 | from deepview_profile.initialization import (
 7 |     check_skyline_preconditions,
 8 |     initialize_skyline,
 9 | )
10 | from deepview_profile.error_printing import print_analysis_error
11 | 
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | def register_command(subparsers):
16 |     parser = subparsers.add_parser(
17 |         "prediction-models",
18 |         help="Evaluate DeepView's prediction accuracy.",
19 |     )
20 |     parser.add_argument(
21 |         "entry_point",
22 |         help="The entry point file in this project that contains the DeepView "
23 |              "provider functions.",
24 |     )
25 |     parser.add_argument(
26 |         "-b", "--batch-sizes",
27 |         help="The starting batch sizes to build models from.",
28 |         type=int,
29 |         nargs="+",
30 |         required=True,
31 |     )
32 |     parser.add_argument(
33 |         "-o", "--output",
34 |         help="The location where the evaluation output should be stored.",
35 |         required=True,
36 |     )
37 |     parser.add_argument(
38 |         "--log-file",
39 |         help="The location of the log file.",
40 |     )
41 |     parser.add_argument(
42 |         "--debug", action="store_true", help="Log debug messages.")
43 |     parser.set_defaults(func=main)
44 | 
45 | 
46 | def get_model(session, batch_size):
47 |     # This is a HACK
48 |     session._batch_size = batch_size
49 |     thpt_msg = session.measure_throughput()
50 |     return (
51 |         (thpt_msg.peak_usage_bytes.slope, thpt_msg.peak_usage_bytes.bias),
52 |         (thpt_msg.run_time_ms.slope, thpt_msg.run_time_ms.bias),
53 |     )
54 | 
55 | 
56 | def actual_main(args):
57 |     from deepview_profile.analysis.session import AnalysisSession
58 |     from deepview_profile.exceptions import AnalysisError
59 | 
60 |     if os.path.exists(args.output):
61 |         print(
62 |             "ERROR: The specified output file already exists.",
63 |             file=sys.stderr,
64 |         )
65 |         sys.exit(1)
66 | 
67 |     try:
68 |         with open(args.output, 'w') as f:
69 |             writer = csv.writer(f)
70 |             writer.writerow([
71 |                 'batch_size',
72 |                 'run_time_ms_slope',
73 |                 'run_time_ms_bias',
74 |                 'memory_usage_bytes_slope',
75 |                 'memory_usage_bytes_bias',
76 |             ])
77 |             project_root = os.getcwd()
78 |             for batch_size in args.batch_sizes:
79 |                 session = AnalysisSession.new_from(
80 |                     project_root, args.entry_point)
81 |                 memory_model, run_time_model = get_model(
82 |                     session, batch_size)
83 |                 writer.writerow([
84 |                     batch_size, *run_time_model, *memory_model,
85 |                 ])
86 | 
87 |     except AnalysisError as ex:
88 |         print_analysis_error(ex)
89 |         sys.exit(1)
90 | 
91 | 
92 | def main(args):
93 |     check_skyline_preconditions(args)
94 |     initialize_skyline(args)
95 |     actual_main(args)
96 | 


--------------------------------------------------------------------------------
/deepview_profile/commands/time.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | 
 5 | from deepview_profile.initialization import (
 6 |     check_skyline_preconditions,
 7 |     initialize_skyline,
 8 | )
 9 | from deepview_profile.error_printing import print_analysis_error
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | def register_command(subparsers):
15 |     parser = subparsers.add_parser(
16 |         "time",
17 |         help="Generate an iteration run time breakdown report.",
18 |     )
19 |     parser.add_argument(
20 |         "entry_point",
21 |         help="The entry point file in this project that contains the DeepView "
22 |              "provider functions.",
23 |     )
24 |     parser.add_argument(
25 |         "-o", "--output",
26 |         help="The location where the iteration run time breakdown report "
27 |              "should be stored.",
28 |         required=True,
29 |     )
30 |     parser.add_argument(
31 |         "--log-file",
32 |         help="The location of the log file.",
33 |     )
34 |     parser.add_argument(
35 |         "--debug", action="store_true", help="Log debug messages.")
36 |     parser.set_defaults(func=main)
37 | 
38 | 
39 | def actual_main(args):
40 |     from deepview_profile.analysis.session import AnalysisSession
41 |     from deepview_profile.exceptions import AnalysisError
42 | 
43 |     if os.path.exists(args.output):
44 |         print(
45 |             "ERROR: The specified output file already exists.",
46 |             file=sys.stderr,
47 |         )
48 |         sys.exit(1)
49 | 
50 |     try:
51 |         project_root = os.getcwd()
52 |         session = AnalysisSession.new_from(
53 |             project_root, args.entry_point)
54 |         session.generate_run_time_breakdown_report(
55 |             save_report_to=args.output,
56 |         )
57 |     except AnalysisError as ex:
58 |         print_analysis_error(ex)
59 |         sys.exit(1)
60 | 
61 | 
62 | def main(args):
63 |     check_skyline_preconditions(args)
64 |     initialize_skyline(args)
65 |     actual_main(args)
66 | 


--------------------------------------------------------------------------------
/deepview_profile/config/__init__.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | 
 3 | import deepview_profile.data
 4 | 
 5 | 
 6 | class _Config:
 7 |     def __init__(self):
 8 |         self.Hints = None
 9 | 
10 |         self.warm_up = 100
11 |         self.measure_for = 10
12 | 
13 |     def initialize_hints_config(self, hints_file):
14 |         if hints_file is None:
15 |             file_to_open = deepview_profile.data.get_absolute_path('hints.yml')
16 |         else:
17 |             file_to_open = hints_file
18 | 
19 |         with open(file_to_open, 'r') as f:
20 |             self.Hints = yaml.load(f, Loader=yaml.Loader)
21 | 
22 |     def parse_args(self, args):
23 |         if 'hints_file' not in args:
24 |             args.hints_file = None
25 |         self.initialize_hints_config(args.hints_file)
26 | 
27 |         if 'warm_up' in args and args.warm_up is not None:
28 |             self.warm_up = args.warm_up
29 |         if 'measure_for' in args and args.measure_for is not None:
30 |             self.measure_for = args.measure_for
31 | 
32 | 
33 | Config = _Config()
34 | 


--------------------------------------------------------------------------------
/deepview_profile/data/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | _DATA_PATH = os.path.abspath(os.path.dirname(__file__))
4 | 
5 | 
6 | def get_absolute_path(data_file):
7 |     return os.path.join(_DATA_PATH, data_file)
8 | 


--------------------------------------------------------------------------------
/deepview_profile/data/hints.yml:
--------------------------------------------------------------------------------
 1 | Conv2d:
 2 |   in_channels:
 3 |     effectiveness: 'high'
 4 |     natural_direction: true
 5 | 
 6 |   out_channels:
 7 |     effectiveness: 'high'
 8 |     natural_direction: true
 9 | 
10 |   kernel_size:
11 |     effectiveness: 'low'
12 |     natural_direction: true
13 | 
14 | 
15 | Linear:
16 |   in_features:
17 |     effectiveness: 'high'
18 |     natural_direction: true
19 | 
20 |   out_features:
21 |     effectiveness: 'high'
22 |     natural_direction: true
23 | 
24 | 
25 | MaxPool2d:
26 |   kernel_size:
27 |     effectiveness: 'low'
28 |     natural_direction: true
29 | 
30 |   stride:
31 |     effectiveness: 'low'
32 |     natural_direction: false
33 | 
34 | 
35 | BatchNorm2d:
36 |   num_features:
37 |     effectiveness: 'high'
38 |     natural_direction: true
39 | 


--------------------------------------------------------------------------------
/deepview_profile/db/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/db/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/db/database.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | import sqlite3
 4 | 
 5 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | DB_PATH = os.path.join(BASE_DIR, "deepview.sqlite3")
 7 | 
 8 | class DatabaseInterface:
 9 |     def __init__(self, database_name=DB_PATH) -> None:
10 |         self.connection = sqlite3.connect(
11 |             database_name, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES
12 |         )
13 |         self.create_energy_table()
14 | 
15 |     def create_energy_table(self) -> None:
16 |         self.connection.cursor().execute("CREATE TABLE IF NOT EXISTS ENERGY ( \
17 |             entry_point TEXT, \
18 |             cpu_component REAL, \
19 |             gpu_component REAL, \
20 |             batch_size INT, \
21 |             ts TIMESTAMP \
22 |         );")
23 | 
24 | 
25 | class EnergyTableInterface:
26 |     def __init__(self, database_connection: sqlite3.Connection):
27 |         self.database_connection: sqlite3.Connection = database_connection
28 | 
29 |     @staticmethod
30 |     def is_valid_entry(entry: list) -> bool:
31 |         '''
32 |         Validates an entry in the Energy table by testing if the length is 3,
33 |         and the types match the columns. Note that timestamp is not part of the entry.
34 |         Returns True if it is valid, else False
35 |         '''
36 |         return len(entry) == 4 and type(entry[0]) == str and type(entry[1]) == float \
37 |             and type(entry[2]) == float and type(entry[3]) == int
38 | 
39 |     @staticmethod
40 |     def is_valid_entry_with_timestamp(entry: list) -> bool:
41 |         '''
42 |         Validates an entry in the Energy table by testing if the length is 4,
43 |         and the types match the columns. Returns True if it is valid, else False
44 |         '''
45 |         return len(entry) == 5 and type(entry[0]) == str and type(entry[1]) == float \
46 |             and type(entry[2]) == float and type(entry[3]) == int \
47 |             and type(entry[4]) == datetime.datetime
48 | 
49 |     def add_entry(self, entry: list) -> bool:
50 |         '''
51 |         Validates an entry and then adds that entry into the Energy table. Note that
52 |         current timestamp is added by this function. Returns False if the entry is
53 |         not a valid format, or if the insertion failed. Else returns True
54 |         '''
55 |         if self.is_valid_entry(entry):
56 |             try:
57 |                 entry.append(datetime.datetime.now())
58 |                 cursor = self.database_connection.cursor()
59 |                 cursor.execute("INSERT INTO ENERGY VALUES(?, ?, ?, ?, ?)", entry)
60 |                 self.database_connection.commit()
61 |                 return True
62 |             except sqlite3.IntegrityError as e:
63 |                 print(e)
64 |                 return False
65 |         else:
66 |             return False
67 | 
68 |     def get_latest_n_entries_of_entry_point(self, n: int, entry_point: str) -> list:
69 |         '''
70 |         Gets the n latest entries of a given entry point
71 |         '''
72 |         params = [entry_point, n]
73 |         cursor = self.database_connection.cursor()
74 |         results = cursor.execute(
75 |             "SELECT * FROM ENERGY WHERE entry_point=? ORDER BY ts DESC LIMIT ?;",
76 |             params
77 |         ).fetchall()
78 |         return results
79 | 


--------------------------------------------------------------------------------
/deepview_profile/energy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/energy/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/energy/measurer.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from threading import Thread
  3 | import numpy as np
  4 | 
  5 | import pynvml as N
  6 | from pyRAPL import Sensor
  7 | 
  8 | class CPUMeasurer:
  9 |     def __init__(self, interval):
 10 |         self.interval = interval
 11 |         self.power = []
 12 |         self.last_cpu = None
 13 |         self.last_dram = None
 14 | 
 15 |     def measurer_init(self):
 16 |         self.sensor = None
 17 |         try:
 18 |             self.sensor = Sensor()
 19 |             energy = self.sensor.energy()
 20 |             self.last_cpu = np.array(energy[0::2])
 21 |             self.last_dram = np.array(energy[1::2])
 22 |         except Exception:
 23 |             print("Warning. Failed to get CPU energy. \
 24 |                   You need to set the right permissions for pyRAPL")
 25 |             print("eg. $ sudo chmod -R a+r /sys/class/powercap/intel-rapl")
 26 | 
 27 |     def measurer_measure(self):
 28 |         # Get energy consumed so far (since last CPU reset)
 29 |         if self.sensor is None:
 30 |             return
 31 | 
 32 |         energy = self.sensor.energy()
 33 |         cpu = np.array(energy[0::2])
 34 |         dram = np.array(energy[1::2])
 35 | 
 36 |         # Compare against last measurement to determine energy since last measure
 37 |         diff_cpu = cpu - self.last_cpu
 38 |         dram - self.last_dram
 39 | 
 40 |         # 1J = 10^6 uJ
 41 |         # The cpu used this much since the last measurement
 42 |         # We have mW = 1000*J/s = 1000*(uJ/10^6)/s
 43 |         cpu_total = np.sum(diff_cpu)
 44 |         cpu_mW = 1000 * (cpu_total / 1e6) / self.interval
 45 |         self.power.append(cpu_mW)
 46 | 
 47 |         self.last_cpu = cpu
 48 |         self.last_dram = dram
 49 | 
 50 |     def measurer_deallocate(self):
 51 |         pass
 52 | 
 53 |     def total_energy(self):
 54 |         if len(self.power) == 0:
 55 |             return None
 56 | 
 57 |         # J = W * s,    1W = 1000 mW
 58 |         energy = self.interval * sum(self.power) / 1000.0
 59 |         return energy
 60 | 
 61 | class GPUMeasurer:
 62 |     def __init__(self, interval):
 63 |         self.interval = interval
 64 |         self.power = []
 65 | 
 66 |     def measurer_init(self):
 67 |         N.nvmlInit()
 68 |         self.device_handle = N.nvmlDeviceGetHandleByIndex(0)
 69 | 
 70 |     def measurer_measure(self):
 71 |         power = N.nvmlDeviceGetPowerUsage(self.device_handle)
 72 |         self.power.append(power)
 73 | 
 74 |     def measurer_deallocate(self):
 75 |         N.nvmlShutdown()
 76 | 
 77 |     def total_energy(self):
 78 |         # J = W * s,    1W = 1000 mW
 79 |         energy = self.interval * sum(self.power) / 1000.0
 80 |         return energy
 81 | 
 82 | class EnergyMeasurer:
 83 |     def __init__(self):
 84 |         self.sleep_interval = 0.1
 85 |         self.measuring = False
 86 |         self.measure_thread = None
 87 | 
 88 |         self.measurers = {
 89 |             "cpu": CPUMeasurer(self.sleep_interval),
 90 |             "gpu": GPUMeasurer(self.sleep_interval),
 91 |         }
 92 | 
 93 |     def run_measure(self):
 94 |         # Initialize
 95 |         for m in self.measurers:
 96 |             self.measurers[m].measurer_init()
 97 | 
 98 |         # Run measurement loop
 99 |         while self.measuring:
100 |             for m in self.measurers:
101 |                 self.measurers[m].measurer_measure()
102 |             time.sleep(self.sleep_interval)
103 | 
104 |         # Cleanup
105 |         for m in self.measurers:
106 |             self.measurers[m].measurer_deallocate()
107 | 
108 |     def begin_measurement(self):
109 |         assert(self.measure_thread is None)
110 |         self.measure_thread = Thread(target=self.run_measure)
111 |         self.measuring = True
112 |         self.measure_thread.start()
113 | 
114 |     def end_measurement(self):
115 |         self.measuring = False
116 |         self.measure_thread.join()
117 |         self.measure_thread = None
118 | 
119 |     def total_energy(self):
120 |         total_energy = 0.
121 |         for m in self.measurers:
122 |             e = self.measurers[m].total_energy()
123 |             if e is not None:
124 |                 total_energy += e
125 |         return total_energy
126 | 
127 |     def cpu_energy(self):
128 |         return self.measurers["cpu"].total_energy()
129 | 
130 |     def gpu_energy(self):
131 |         return self.measurers["gpu"].total_energy()
132 | 


--------------------------------------------------------------------------------
/deepview_profile/error_printing.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | 
 4 | def print_analysis_error(error, file=sys.stderr):
 5 |     print(
 6 |         "DeepView encountered an error when profiling your model:",
 7 |         file=file,
 8 |     )
 9 |     print("->", str(error), file=file)
10 | 
11 |     if error.file_context is not None:
12 |         if error.file_context.line_number is not None:
13 |             message = (
14 |                 "This error occurred on line {} when processing {}.".format(
15 |                     error.file_context.line_number,
16 |                     error.file_context.file_path,
17 |                 )
18 |             )
19 |         else:
20 |             message = "This error occurred when processing {}.".format(
21 |                 error.file_context.file_path,
22 |             )
23 |         print("->", message, file=file)
24 | 


--------------------------------------------------------------------------------
/deepview_profile/evaluate.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | 
 4 | import deepview_profile.commands.measurements
 5 | import deepview_profile.commands.prediction_models
 6 | 
 7 | 
 8 | def main():
 9 |     parser = argparse.ArgumentParser(
10 |         prog="deepview-evaluate",
11 |         description="DeepView Evaluation Tool",
12 |     )
13 |     subparsers = parser.add_subparsers(title="Commands")
14 |     deepview_profile.commands.measurements.register_command(subparsers)
15 |     deepview_profile.commands.prediction_models.register_command(subparsers)
16 |     args = parser.parse_args()
17 | 
18 |     if 'func' not in args:
19 |         parser.print_help()
20 |         sys.exit(1)
21 | 
22 |     # Run the specified command
23 |     args.func(args)
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/deepview_profile/exceptions.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import contextlib
  3 | import os
  4 | import traceback
  5 | import logging
  6 | 
  7 | logger = logging.getLogger(__name__)
  8 | 
  9 | FileContext = collections.namedtuple(
 10 |     'FileContext',
 11 |     ['file_path', 'line_number'],
 12 | )
 13 | 
 14 | 
 15 | class AnalysisError(RuntimeError):
 16 |     def __init__(self, message, exception_type=None):
 17 |         if exception_type is None:
 18 |             super(AnalysisError, self).__init__(message)
 19 |         else:
 20 |             super(AnalysisError, self).__init__(
 21 |                 '{}: {}'.format(exception_type.__name__, message))
 22 | 
 23 |         self.file_context = None
 24 | 
 25 |     def with_file_context(self, file_path, line_number=None):
 26 |         self.file_context = FileContext(
 27 |             file_path=file_path,
 28 |             line_number=line_number,
 29 |         )
 30 |         return self
 31 | 
 32 | 
 33 | class NoConnectionError(Exception):
 34 |     def __init__(self, message):
 35 |         super().__init__(message)
 36 | 
 37 | 
 38 | class _SuspendExecution(Exception):
 39 |     # This exception is used internally by the BackwardInterceptor to return
 40 |     # early from the user's code.
 41 |     pass
 42 | 
 43 | 
 44 | @contextlib.contextmanager
 45 | def exceptions_as_analysis_errors(project_root):
 46 |     try:
 47 |         yield
 48 |     except _SuspendExecution:
 49 |         # A _SuspendExecution exception does not indicate an error - we use it
 50 |         # to return early from the user's code.
 51 |         pass
 52 |     except AnalysisError:
 53 |         # The user's code may raise an AnalysisError (e.g., from the wrapped
 54 |         # providers). If this happens, we should pass the exception through.
 55 |         raise
 56 |     except Exception as ex:
 57 |         logger.debug(
 58 |             "An error occured during analysis (could be a problem with the "
 59 |             "user's code):",
 60 |             exc_info=ex,
 61 |         )
 62 |         if isinstance(ex, SyntaxError):
 63 |             error = AnalysisError(
 64 |                 'DeepView encountered a syntax error while profiling your '
 65 |                 'model.'
 66 |             )
 67 |         else:
 68 |             error = AnalysisError(str(ex), type(ex))
 69 | 
 70 |         # Extract the relevant file context, if it is available, starting by
 71 |         # inspecting the exception itself.
 72 |         if hasattr(ex, 'filename') and ex.filename.startswith(project_root):
 73 |             _add_context_to_error(
 74 |                 error, project_root, ex.filename, getattr(ex, 'lineno', None))
 75 |         else:
 76 |             stack = traceback.extract_tb(ex.__traceback__)
 77 |             for frame in reversed(stack):
 78 |                 if frame.filename.startswith(project_root):
 79 |                     _add_context_to_error(
 80 |                         error, project_root, frame.filename, frame.lineno)
 81 |                     break
 82 | 
 83 |         # Special case: Add a more detailed error message when there's an
 84 |         # input number mismatch.
 85 |         if (error.file_context is None and
 86 |                 str(error).startswith("TypeError: forward() takes")):
 87 |             error = AnalysisError(
 88 |                 "{}. This error could be due to a mismatch between the number "
 89 |                 "of inputs that your model expects and the number of inputs "
 90 |                 "that your input provider returns.".format(str(error))
 91 |             )
 92 | 
 93 |         raise error
 94 | 
 95 | 
 96 | def _add_context_to_error(error, project_root, file_path, line_number):
 97 |     error.with_file_context(
 98 |         file_path=os.path.relpath(file_path, start=project_root),
 99 |         line_number=line_number,
100 |     )
101 | 


--------------------------------------------------------------------------------
/deepview_profile/initialization.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | 
 7 | def check_skyline_preconditions(args):
 8 |     """
 9 |     This is the first function that should run before importing any other
10 |     DeepView code.
11 |     """
12 |     _configure_logging(args)
13 |     if not _validate_dependencies():
14 |         sys.exit(1)
15 |     if not _validate_gpu():
16 |         sys.exit(1)
17 | 
18 | 
19 | def initialize_skyline(args):
20 |     """
21 |     Performs common initialization tasks.
22 |     """
23 |     from deepview_profile.config import Config
24 | 
25 |     Config.parse_args(args)
26 | 
27 | def _configure_logging(args):
28 |     kwargs = {
29 |         "format": "%(asctime)s %(levelname)-8s %(message)s",
30 |         "datefmt": "%Y-%m-%d %H:%M",
31 |         "level": logging.DEBUG if args.debug else logging.INFO,
32 |     }
33 | 
34 |     if args.log_file is not None:
35 |         kwargs["filename"] = args.log_file
36 | 
37 |     logging.basicConfig(**kwargs)
38 | 
39 | 
40 | def _validate_dependencies():
41 |     # NOTE: If you make a change here, make sure to update the INSTALL_REQUIRES
42 |     #       list in setup.py as well.
43 |     try:
44 |         import yaml # pyyaml on PyPI # noqa: F401
45 |         import pynvml # nvidia-ml-py3 on PyPI # noqa: F401
46 |         import google.protobuf # protobuf on PyPI # noqa: F401
47 |         import numpy # noqa: F401
48 |         import torch # noqa: F401
49 |         return True
50 |     except ImportError as ex:
51 |         logger.error(
52 |             "DeepView could not find the '%s' module, which is a required "
53 |             "dependency. Please make sure all the required dependencies are "
54 |             "installed before launching DeepView. If you use a package "
55 |             "manager, these dependencies will be automatically installed for "
56 |             "you.",
57 |             ex.name,
58 |         )
59 |         return False
60 | 
61 | 
62 | def _validate_gpu():
63 |     import torch
64 |     if not torch.cuda.is_available():
65 |         logger.error(
66 |             "DeepView did not detect a GPU on this machine. DeepView only "
67 |             "profiles deep learning workloads on GPUs."
68 |         )
69 |         return False
70 |     return True
71 | 


--------------------------------------------------------------------------------
/deepview_profile/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/io/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/io/connection.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import select
  3 | import struct
  4 | from threading import Thread
  5 | 
  6 | from deepview_profile.io.sentinel import Sentinel
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | class Connection:
 12 |     """
 13 |     Manages an open connection to a client.
 14 | 
 15 |     This class must be constructed with an already-connected
 16 |     socket. Upon receipt of a message on the socket, the
 17 |     handler_function will be called with the raw message.
 18 | 
 19 |     Socket communication is performed using length-prefixed
 20 |     binary protobuf messages.
 21 | 
 22 |     The stop function must be called to close the connection.
 23 |     """
 24 |     def __init__(self, socket, address, handler_function, closed_handler):
 25 |         self.address = address
 26 |         self._socket = socket
 27 |         self._reader = Thread(target=self._socket_read)
 28 |         self._handler_function = handler_function
 29 |         self._closed_handler = closed_handler
 30 |         self._sentinel = Sentinel()
 31 |         self._project_root = ""
 32 |         self._entry_point = ""
 33 | 
 34 |     def start(self):
 35 |         self._sentinel.start()
 36 |         self._reader.start()
 37 | 
 38 |     def stop(self):
 39 |         self._sentinel.signal_exit()
 40 |         self._reader.join()
 41 |         self._socket.close()
 42 |         self._sentinel.stop()
 43 | 
 44 |     def send_bytes(self, raw_bytes):
 45 |         self._socket.sendall(struct.pack('!I', len(raw_bytes)))
 46 |         self._socket.sendall(raw_bytes)
 47 | 
 48 |     def _socket_read(self):
 49 |         buffer = b''
 50 |         message_length = -1
 51 | 
 52 |         try:
 53 |             while True:
 54 |                 read_ready, _, _ = select.select([
 55 |                     self._socket, self._sentinel.read_pipe], [], [])
 56 |                 if self._sentinel.should_exit(read_ready):
 57 |                     logger.debug(
 58 |                         "Connection (%s:%d) is being closed.",
 59 |                         *self.address,
 60 |                     )
 61 |                     self._sentinel.consume_exit_signal()
 62 |                     break
 63 | 
 64 |                 data = self._socket.recv(4096)
 65 |                 if len(data) == 0:
 66 |                     logger.debug(
 67 |                         "Connection (%s:%d) has been closed by the client.",
 68 |                         *self.address,
 69 |                     )
 70 |                     self._closed_handler(self.address)
 71 |                     break
 72 | 
 73 |                 buffer += data
 74 | 
 75 |                 # Process all messages that exist in the buffer
 76 |                 while True:
 77 |                     if message_length <= 0:
 78 |                         if len(buffer) < 4:
 79 |                             break
 80 |                         # Network byte order 32-bit unsigned integer
 81 |                         message_length = struct.unpack('!I', buffer[:4])[0]
 82 |                         buffer = buffer[4:]
 83 | 
 84 |                     if len(buffer) < message_length:
 85 |                         break
 86 | 
 87 |                     try:
 88 |                         self._handler_function(
 89 |                             buffer[:message_length], self.address)
 90 |                     finally:
 91 |                         buffer = buffer[message_length:]
 92 |                         message_length = -1
 93 | 
 94 |         except Exception:
 95 |             logger.exception("Connection unexpectedly stopping...")
 96 | 
 97 |     @property
 98 |     def project_root(self):
 99 |         return self._project_root
100 | 
101 |     @property
102 |     def entry_point(self):
103 |         return self._entry_point
104 | 
105 |     def set_project_paths(self, project_root, entry_point):
106 |         self._project_root = project_root
107 |         self._entry_point = entry_point
108 | 
109 | class ConnectionState:
110 |     def __init__(self):
111 |         # NOTE: This counter is modified by a thread in the main executor, but
112 |         #       will be read by other threads. No R/W lock is needed because of
113 |         #       the Python GIL.
114 |         #
115 |         # NOTE: The sequence number from the client must be non-negative
116 |         self.sequence_number = -1
117 | 
118 |         # Connections have two states: uninitialized and "ready" (initialized)
119 |         # As a result for simplicity, we use a boolean to represent the state.
120 |         self.initialized = False
121 | 
122 |         # The plugin may disconnect from us while we are processing a request.
123 |         # We use this flag to indicate whether the connection still "exists"
124 |         # to allow requests to abort early.
125 |         self.connected = True
126 | 
127 |     def update_sequence(self, request):
128 |         if request.sequence_number <= self.sequence_number:
129 |             return
130 |         self.sequence_number = request.sequence_number
131 | 
132 |     def is_request_current(self, request):
133 |         return request.sequence_number >= self.sequence_number
134 | 


--------------------------------------------------------------------------------
/deepview_profile/io/connection_acceptor.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import select
 3 | import socket
 4 | from threading import Thread
 5 | 
 6 | from deepview_profile.io.sentinel import Sentinel
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class ConnectionAcceptor:
12 |     """
13 |     Manages the "server socket" for the agent, allowing it to accept
14 |     connection requests from other agents.
15 | 
16 |     Each time a connection is received, the handler_function is called
17 |     with the new socket and address.
18 |     """
19 |     def __init__(self, host, port, handler_function):
20 |         self._host = host
21 |         self._port = port
22 |         self._server_socket = socket.socket(
23 |             socket.AF_INET,
24 |             socket.SOCK_STREAM,
25 |         )
26 |         self._server_socket.setsockopt(
27 |             socket.SOL_SOCKET,
28 |             socket.SO_REUSEADDR,
29 |             1,
30 |         )
31 |         self._handler_function = handler_function
32 |         self._acceptor = Thread(target=self._accept_connections)
33 |         self._sentinel = Sentinel()
34 | 
35 |     def start(self):
36 |         self._server_socket.bind((self._host, self._port))
37 |         self._port = self._server_socket.getsockname()[1]
38 |         self._server_socket.listen()
39 |         self._sentinel.start()
40 |         self._acceptor.start()
41 |         logger.debug(
42 |             "DeepView is listening for connections on (%s:%d).",
43 |             self._host,
44 |             self._port,
45 |         )
46 | 
47 |     def stop(self):
48 |         self._sentinel.signal_exit()
49 |         self._acceptor.join()
50 |         self._server_socket.close()
51 |         self._sentinel.stop()
52 |         logging.debug(
53 |             "DeepView has stopped listening for connections on (%s:%d).",
54 |             self._host,
55 |             self._port,
56 |         )
57 | 
58 |     @property
59 |     def host(self):
60 |         return self._host
61 | 
62 |     @property
63 |     def port(self):
64 |         return self._port
65 | 
66 |     def _accept_connections(self):
67 |         try:
68 |             while True:
69 |                 read_ready, _, _ = select.select(
70 |                     [self._server_socket, self._sentinel.read_pipe], [], [])
71 | 
72 |                 if self._sentinel.should_exit(read_ready):
73 |                     self._sentinel.consume_exit_signal()
74 |                     break
75 | 
76 |                 socket, address = self._server_socket.accept()
77 |                 host, port = address
78 |                 logger.debug("Accepted a connection to (%s:%d).", host, port)
79 |                 self._handler_function(socket, address)
80 |         except Exception:
81 |             logging.exception(
82 |                 "DeepView has unexpectedly stopped accepting connections.")
83 | 


--------------------------------------------------------------------------------
/deepview_profile/io/connection_manager.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import socket
 3 | 
 4 | from deepview_profile.io.connection import Connection, ConnectionState
 5 | from deepview_profile.exceptions import NoConnectionError
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class ConnectionManager:
11 |     def __init__(self, message_handler, closed_handler):
12 |         self._connections = {}
13 |         self._message_handler = message_handler
14 |         self._closed_handler = closed_handler
15 | 
16 |     def register_connection(self, opened_socket, address):
17 |         self._connections[address] = (
18 |             Connection(
19 |                 opened_socket,
20 |                 address,
21 |                 self._message_handler,
22 |                 self._closed_handler,
23 |             ),
24 |             ConnectionState(),
25 |         )
26 |         self._connections[address][0].start()
27 | 
28 |     def remove_connection(self, address):
29 |         connection, state = self.get_connection_tuple(address)
30 |         connection.stop()
31 |         state.connected = False
32 |         del self._connections[address]
33 |         logger.debug("Removed connection to (%s:%d).", *address)
34 | 
35 |     def get_connection(self, address):
36 |         return self.get_connection_tuple(address)[0]
37 | 
38 |     def get_connection_state(self, address):
39 |         return self.get_connection_tuple(address)[1]
40 | 
41 |     def get_connection_tuple(self, address):
42 |         if address not in self._connections:
43 |             host, port = address
44 |             raise NoConnectionError(
45 |                 "Connection to ({}:{}) does not exist.".format(host, port))
46 |         return self._connections[address]
47 | 
48 |     def broadcast(self, string_message):
49 |         for _, (connection, _) in self._connections.items():
50 |             connection.write_string_message(string_message)
51 | 
52 |     def connect_to(self, host, port):
53 |         new_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
54 |         new_socket.connect((host, port))
55 |         self.register_connection(new_socket, (host, port))
56 | 
57 |     def stop(self):
58 |         for _, (connection, state) in self._connections.items():
59 |             connection.stop()
60 |             state.connected = False
61 |         self._connections.clear()
62 | 


--------------------------------------------------------------------------------
/deepview_profile/io/sentinel.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | class Sentinel:
 5 |     def __init__(self):
 6 |         self._read_pipe = None
 7 |         self._write_pipe = None
 8 | 
 9 |     def start(self):
10 |         self._read_pipe, self._write_pipe = os.pipe()
11 | 
12 |     def stop(self):
13 |         os.close(self._write_pipe)
14 |         os.close(self._read_pipe)
15 |         self._read_pipe = None
16 |         self._write_pipe = None
17 | 
18 |     @property
19 |     def read_pipe(self):
20 |         return self._read_pipe
21 | 
22 |     def should_exit(self, ready_descriptors):
23 |         return self._read_pipe in ready_descriptors
24 | 
25 |     def signal_exit(self):
26 |         os.write(self._write_pipe, b'\0')
27 | 
28 |     def consume_exit_signal(self):
29 |         # This should only be called after signal_exit(),
30 |         # otherwise the calling thread will block.
31 |         os.read(self._read_pipe, 1)
32 | 


--------------------------------------------------------------------------------
/deepview_profile/lru_cache.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class LRUCache:
 4 |     def __init__(self, max_size=128):
 5 |         self._max_size = max_size
 6 |         self._cache_by_key = {}
 7 |         self._cache_by_use = _LRUCacheList()
 8 | 
 9 |     def query(self, key):
10 |         if key not in self._cache_by_key:
11 |             return None
12 |         node = self._cache_by_key[key]
13 |         self._cache_by_use.move_to_front(node)
14 |         return node.value
15 | 
16 |     def add(self, key, value):
17 |         if self._cache_by_use.size >= self._max_size:
18 |             removed = self._cache_by_use.remove_back()
19 |             del self._cache_by_key[removed.key]
20 |         node = self._cache_by_use.add_to_front(key, value)
21 |         self._cache_by_key[key] = node
22 | 
23 | 
24 | class _LRUCacheList:
25 |     def __init__(self):
26 |         # Front of the list: most recently used
27 |         self.front = None
28 |         self.back = None
29 |         self.size = 0
30 | 
31 |     def add_to_front(self, key, value):
32 |         node = _LRUCacheNode(key, value)
33 |         self._add_to_front(node)
34 |         self.size += 1
35 |         return node
36 | 
37 |     def _add_to_front(self, node):
38 |         if self.size == 0:
39 |             self.front = node
40 |             self.back = node
41 |         else:
42 |             node.next = self.front
43 |             self.front.prev = node
44 |             self.front = node
45 | 
46 |     def move_to_front(self, node):
47 |         if self.front == node:
48 |             # Nothing needs to be done if the node is already at the front of
49 |             # the list
50 |             return
51 | 
52 |         if node.next is None:
53 |             # Back of the list
54 |             node.prev.next = None
55 |             self.back = node.prev
56 |             node.prev = None
57 |         else:
58 |             # Middle of the list
59 |             node.prev.next = node.next
60 |             node.next.prev = node.prev
61 |             node.next = None
62 |             node.prev = None
63 | 
64 |         self._add_to_front(node)
65 | 
66 |     def remove_back(self):
67 |         if self.size == 0:
68 |             return None
69 | 
70 |         node = self.back
71 | 
72 |         if self.size == 1:
73 |             self.front = None
74 |             self.back = None
75 |         else:
76 |             node.prev.next = None
77 |             self.back = node.prev
78 | 
79 |         self.size -= 1
80 |         return node
81 | 
82 | 
83 | class _LRUCacheNode:
84 |     def __init__(self, key, value):
85 |         self.key = key
86 |         self.value = value
87 |         self.prev = None
88 |         self.next = None
89 | 


--------------------------------------------------------------------------------
/deepview_profile/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/models/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/models/analysis.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | class OperationInfo:
  4 |     def __init__(self, bound_name, op_name, ast_node, position, perf_hints):
  5 |         self.bound_name = bound_name
  6 |         self.op_name = op_name
  7 |         self.ast_node = ast_node
  8 |         self.position = position
  9 |         self.perf_hints = perf_hints
 10 |         self.usages = []
 11 |         self.runtime_us = 0
 12 | 
 13 |     def set_usages(self, usages):
 14 |         self.usages = usages
 15 | 
 16 |     def add_to_runtime_us(self, runtime_us):
 17 |         self.runtime_us += runtime_us
 18 | 
 19 | 
 20 | class OperationInfoMap:
 21 |     def __init__(self):
 22 |         self.operations = {}
 23 | 
 24 |     def add_operation_info(self, operation):
 25 |         self.operations[operation.bound_name] = operation
 26 | 
 27 |     def get_operation_info_by_bound_name(self, bound_name):
 28 |         if bound_name not in self.operations:
 29 |             return None
 30 |         return self.operations[bound_name]
 31 | 
 32 |     def get_operations(self):
 33 |         return self.operations.values()
 34 | 
 35 |     def set_runtimes_from_cache(self, cached_info_map):
 36 |         """
 37 |         Used to set the runtimes from cache for when the parsed code has not
 38 |         changed.
 39 |         """
 40 |         for bound_name, op_info in self.operations.items():
 41 |             cached_op_info = cached_info_map.get_operation_info_by_bound_name(
 42 |                 bound_name)
 43 |             op_info.runtime_us = cached_op_info.runtime_us
 44 | 
 45 | 
 46 | class AnnotationInfo:
 47 |     def __init__(self, input_size, start_position, end_position):
 48 |         self.input_size = input_size
 49 |         self.start_position = start_position
 50 |         self.end_position = end_position
 51 | 
 52 | 
 53 | class PerformanceHint:
 54 |     def __init__(self, keyword, position, effectiveness, natural_direction):
 55 |         self.keyword = keyword
 56 |         self.position = position
 57 |         self.effectiveness = effectiveness
 58 |         self.natural_direction = natural_direction
 59 | 
 60 | 
 61 | class LinearModel:
 62 |     def __init__(self, coefficient, bias):
 63 |         self.coefficient = coefficient
 64 |         self.bias = bias
 65 | 
 66 |     def __repr__(self):
 67 |         return 'LinearModel(coefficient={:.4f}, bias={:.4f})'.format(
 68 |             self.coefficient, self.bias)
 69 | 
 70 |     def evaluate(self, x):
 71 |         return self.coefficient * x + self.bias
 72 | 
 73 |     def inverse(self, y):
 74 |         return (y - self.bias) / self.coefficient
 75 | 
 76 | 
 77 | class MemoryInfo:
 78 |     def __init__(self, usage_model_mb, usage_mb, max_capacity_mb):
 79 |         self.usage_model_mb = usage_model_mb
 80 |         self.usage_mb = usage_mb
 81 |         self.max_capacity_mb = max_capacity_mb
 82 | 
 83 |     def __repr__(self):
 84 |         return 'MemoryInfo(model={}, usage_mb={}, capacity_mb={})'.format(
 85 |             self.usage_model_mb, self.usage_mb, self.max_capacity_mb)
 86 | 
 87 | 
 88 | class ThroughputInfo:
 89 |     def __init__(
 90 |         self,
 91 |         throughput,
 92 |         max_throughput,
 93 |         runtime_model_ms
 94 |     ):
 95 |         self.throughput = throughput
 96 |         self.max_throughput = max_throughput
 97 |         self.runtime_model_ms = runtime_model_ms
 98 | 
 99 |     def __repr__(self):
100 |         return (
101 |             'ThroughputInfo(thpt={}, max_thpt={}, model={})'
102 |             .format(
103 |                 self.throughput,
104 |                 self.max_throughput,
105 |                 self.runtime_model_ms,
106 |             )
107 |         )
108 | 
109 |     def batch_from_throughput(self, throughput):
110 |         # Thpt = batch / runtime_model
111 |         throughput_ms = throughput / 1000
112 |         return (
113 |             (throughput_ms * self.runtime_model_ms.bias) /
114 |             (1 - throughput_ms * self.runtime_model_ms.coefficient)
115 |         )
116 | 
117 | 
118 | class PerformanceLimits:
119 |     def __init__(self, max_batch_size, throughput_limit):
120 |         self.max_batch_size = max_batch_size
121 |         self.throughput_limit = throughput_limit
122 | 
123 |     def __repr__(self):
124 |         return (
125 |             'PerformanceLimits(max_batch={:.2f}, thpt_limit={:.2f})'.format(
126 |                 self.max_batch_size,
127 |                 self.throughput_limit,
128 |             )
129 |         )
130 | 


--------------------------------------------------------------------------------
/deepview_profile/models/source_map.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class Position:
 4 |     def __init__(self, line, column):
 5 |         self.line = line
 6 |         self.column = column
 7 | 
 8 |     def offset(self, length):
 9 |         return Position(self.line, self.column + length)
10 | 
11 | 
12 | class SourceMap:
13 |     def __init__(self, source_code):
14 |         self._source_by_line = source_code.splitlines()
15 | 
16 |     def find_position(self, snippet, line_offset=0):
17 |         for offset, line in enumerate(self._source_by_line[line_offset:]):
18 |             index = line.find(snippet)
19 |             if index == -1:
20 |                 continue
21 |             # NOTE: We don't add 1 here to make the line number 0-based
22 |             return Position(line_offset + offset, index)
23 | 
24 |         return None
25 | 
26 |     def find_position_on_line(self, snippet, offset_position):
27 |         if offset_position.line >= len(self._source_by_line):
28 |             return None
29 |         index = self._source_by_line[offset_position.line].find(
30 |             snippet, offset_position.column)
31 | 
32 |         if index == -1:
33 |             return None
34 |         else:
35 |             return Position(offset_position.line, index)
36 | 


--------------------------------------------------------------------------------
/deepview_profile/nvml.py:
--------------------------------------------------------------------------------
 1 | import pynvml
 2 | 
 3 | 
 4 | class NVML:
 5 |     def __enter__(self):
 6 |         self.start()
 7 |         return self
 8 | 
 9 |     def __exit__(self, exc_type, exc_value, traceback):
10 |         self.stop()
11 | 
12 |     def start(self):
13 |         pynvml.nvmlInit()
14 | 
15 |     def stop(self):
16 |         pynvml.nvmlShutdown()
17 | 
18 |     def get_memory_capacity(self):
19 |         # TODO: Support multiple devices
20 |         handle = pynvml.nvmlDeviceGetHandleByIndex(0)
21 |         return pynvml.nvmlDeviceGetMemoryInfo(handle)
22 | 
23 |     def get_device_names(self):
24 |         device_names = []
25 |         for i in range(pynvml.nvmlDeviceGetCount()):
26 |             handle = pynvml.nvmlDeviceGetHandleByIndex(i)
27 |             device_name = pynvml.nvmlDeviceGetName(handle).decode("utf-8")
28 |             device_names.append(device_name)
29 |         return device_names
30 |     
31 | 


--------------------------------------------------------------------------------
/deepview_profile/pl/deepview_callback.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Tuple
 2 | 
 3 | import time
 4 | import os
 5 | import json
 6 | import torch
 7 | import sys
 8 | 
 9 | try:
10 |     import pytorch_lightning as pl
11 | except ImportError:
12 |     sys.exit("Please install pytorch-lightning:\nuse: pip install lightning\nExiting...")
13 | 
14 | from termcolor import colored
15 | from deepview_profile.pl.deepview_interface import trigger_profiling
16 | 
17 | 
18 | class DeepViewProfilerCallback(pl.Callback):
19 |     def __init__(self, profile_name: str):
20 |         super().__init__()
21 |         self.profiling_triggered = False
22 |         self.output_filename = f"{profile_name}_{int(time.time())}.json"
23 | 
24 |     def on_train_batch_end(
25 |         self,
26 |         trainer: pl.Trainer,
27 |         pl_module: pl.LightningModule,
28 |         outputs,
29 |         batch,
30 |         batch_idx,
31 |     ):
32 | 
33 |         # only do this once
34 |         if self.profiling_triggered:
35 |             return
36 | 
37 |         print(colored("DeepViewProfiler: Running profiling.", "green"))
38 | 
39 |         """
40 |         need 3 things:
41 | 
42 |             input_provider: just return batch
43 |             model_provider: just return pl_module
44 |             iteration_provider: a lambda function that (a) calls pl_module.forward_step and (b) calls loss.backward
45 |         """
46 |         initial_batch_size = batch[0].shape[0]
47 | 
48 |         def input_provider(batch_size: int = initial_batch_size) -> Tuple:
49 |             model_inputs = list()
50 |             for elem in batch:
51 |                 # we assume the first dimension is the batch dimension
52 |                 model_inputs.append(
53 |                     elem[:1].repeat([batch_size] + [1 for _ in elem.shape[1:]])
54 |                 )
55 |             return (tuple(model_inputs), 0)
56 | 
57 |         model_provider = lambda: pl_module
58 | 
59 |         def iteration_provider(module: torch.nn.Module) -> Callable:
60 |             def iteration(*args, **kwargs):
61 |                 loss = module.training_step(*args, **kwargs)
62 |                 loss.backward()
63 | 
64 |             return iteration
65 | 
66 |         project_root = os.getcwd()
67 | 
68 |         output = trigger_profiling(
69 |             project_root,
70 |             "entry_point.py",
71 |             initial_batch_size,
72 |             input_provider,
73 |             model_provider,
74 |             iteration_provider,
75 |         )
76 | 
77 |         with open(self.output_filename, "w") as fp:
78 |             json.dump(output, fp, indent=4)
79 | 
80 |         print(
81 |             colored(
82 |                 f"DeepViewProfiler: Profiling complete! Report written to ", "green"
83 |             )
84 |             + colored(self.output_filename, "green", attrs=["bold"])
85 |         )
86 |         print(
87 |             colored(
88 |                 f"DeepViewProfiler: View your report at https://deepview.centml.ai",
89 |                 "green",
90 |             )
91 |         )
92 |         self.profiling_triggered = True
93 | 


--------------------------------------------------------------------------------
/deepview_profile/pl/deepview_interface.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from typing import Callable
  3 | import platform
  4 | 
  5 | from deepview_profile.analysis.session import AnalysisSession
  6 | from deepview_profile.exceptions import AnalysisError
  7 | from deepview_profile.nvml import NVML
  8 | 
  9 | # from deepview_profile.utils import release_memory, next_message_to_dict, files_encoded_unique
 10 | from deepview_profile.utils import release_memory, files_encoded_unique
 11 | from deepview_profile.error_printing import print_analysis_error
 12 | 
 13 | from google.protobuf.json_format import MessageToDict
 14 | 
 15 | 
 16 | def measure_breakdown(session, nvml):
 17 |     print("analysis: running measure_breakdown()")
 18 |     yield session.measure_breakdown(nvml)
 19 |     release_memory()
 20 | 
 21 | 
 22 | def measure_throughput(session):
 23 |     print("analysis: running measure_throughput()")
 24 |     yield session.measure_throughput()
 25 |     release_memory()
 26 | 
 27 | 
 28 | def habitat_predict(session):
 29 |     print("analysis: running deepview_predict()")
 30 |     yield session.habitat_predict()
 31 |     release_memory()
 32 | 
 33 | 
 34 | def measure_utilization(session):
 35 |     print("analysis: running measure_utilization()")
 36 |     yield session.measure_utilization()
 37 |     release_memory()
 38 | 
 39 | 
 40 | def energy_compute(session):
 41 |     print("analysis: running energy_compute()")
 42 |     yield session.energy_compute()
 43 |     release_memory()
 44 | 
 45 | 
 46 | def ddp_analysis(session):
 47 |     print("analysis: running ddp_computation()")
 48 |     yield session.ddp_computation()
 49 |     release_memory()
 50 | 
 51 | 
 52 | def hardware_information(nvml):
 53 |     hardware_info = {
 54 |         "hostname": platform.node(),
 55 |         "os": " ".join(list(platform.uname())),
 56 |         "gpus": nvml.get_device_names(),
 57 |     }
 58 |     return hardware_info
 59 | 
 60 | 
 61 | class DummyStaticAnalyzer:
 62 |     def batch_size_location(self):
 63 |         return None
 64 | 
 65 | 
 66 | def next_message_to_dict(a):
 67 |     message = next(a)
 68 |     return MessageToDict(message, preserving_proto_field_name=True)
 69 | 
 70 | 
 71 | def trigger_profiling(
 72 |     project_root: str,
 73 |     entry_point: str,
 74 |     initial_batch_size: int,
 75 |     input_provider: Callable,
 76 |     model_provider: Callable,
 77 |     iteration_provider: Callable,
 78 | ):
 79 |     try:
 80 |         data = {
 81 |             "analysis": {
 82 |                 "message_type": "analysis",
 83 |                 "project_root": project_root,
 84 |                 "project_entry_point": entry_point,
 85 |                 "hardware_info": {},
 86 |                 "throughput": {},
 87 |                 "breakdown": {},
 88 |                 "habitat": {},
 89 |                 "additionalProviders": "",
 90 |                 "energy": {},
 91 |                 "utilization": {},
 92 |                 "ddp": {},
 93 |             },
 94 |             "epochs": 50,
 95 |             "iterations": 1000,
 96 |             "encodedFiles": [],
 97 |         }
 98 | 
 99 |         session = AnalysisSession(
100 |             project_root,
101 |             entry_point,
102 |             project_root,
103 |             model_provider,
104 |             input_provider,
105 |             iteration_provider,
106 |             initial_batch_size,
107 |             DummyStaticAnalyzer(),
108 |         )
109 |         release_memory()
110 | 
111 |         exclude_source = False
112 | 
113 |         with NVML() as nvml:
114 |             data["analysis"]["hardware_info"] = hardware_information(nvml)
115 |             data["analysis"]["breakdown"] = next_message_to_dict(
116 |                 measure_breakdown(session, nvml)
117 |             )
118 | 
119 |             operation_tree = data["analysis"]["breakdown"]["operation_tree"]
120 |             if not exclude_source and operation_tree is not None:
121 |                 data["encodedFiles"] = files_encoded_unique(operation_tree)
122 | 
123 |         data["analysis"]["throughput"] = next_message_to_dict(
124 |             measure_throughput(session)
125 |         )
126 |         data["analysis"]["habitat"] = next_message_to_dict(habitat_predict(session))
127 |         data["analysis"]["utilization"] = next_message_to_dict(
128 |             measure_utilization(session)
129 |         )
130 |         data["analysis"]["energy"] = next_message_to_dict(energy_compute(session))
131 |         # data['analysis']['ddp'] = next_message_to_dict(ddp_analysis(session))
132 | 
133 |         from deepview_profile.export_converter import convert
134 | 
135 |         data["analysis"] = convert(data["analysis"])
136 | 
137 |         return data
138 | 
139 |     except AnalysisError as ex:
140 |         print_analysis_error(ex)
141 |         sys.exit(1)
142 | 


--------------------------------------------------------------------------------
/deepview_profile/profiler/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from deepview_profile.exceptions import AnalysisError
 4 | from deepview_profile.models.analysis import PerformanceLimits
 5 | 
 6 | 
 7 | def to_trainable_model(parse_tree, class_name):
 8 |     try:
 9 |         executable = compile(parse_tree, '<string>', 'exec')
10 |         scope = {}
11 |         exec(executable, scope, scope)
12 |         model = scope[class_name]().to(torch.device('cuda'))
13 |         model.train()
14 |         return model
15 |     except Exception as ex:
16 |         raise AnalysisError(str(ex), type(ex))
17 | 
18 | 
19 | def get_performance_limits(memory_info, throughput_info):
20 |     max_capacity_batch_size = memory_info.usage_model_mb.inverse(
21 |         memory_info.max_capacity_mb)
22 |     max_capacity_throughput = (
23 |         max_capacity_batch_size /
24 |         throughput_info.runtime_model_ms.evaluate(max_capacity_batch_size) *
25 |         1000
26 |     )
27 |     max_throughput_batch_size = throughput_info.batch_from_throughput(
28 |         throughput_info.max_throughput)
29 | 
30 |     thpt_limits = (max_throughput_batch_size, throughput_info.max_throughput)
31 |     mem_limits = (max_capacity_batch_size, max_capacity_throughput)
32 | 
33 |     limits = min(thpt_limits, mem_limits, key=lambda tup: tup[0])
34 | 
35 |     return PerformanceLimits(
36 |         max_batch_size=limits[0],
37 |         throughput_limit=limits[1],
38 |     )
39 | 


--------------------------------------------------------------------------------
/deepview_profile/profiler/autograd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from deepview_profile.profiler.backward import get_grad_fn, flatten_operation_output
 4 | 
 5 | 
 6 | class AutogradEngine:
 7 |     """
 8 |     Emulates the backward pass for a given model output, for timing purposes.
 9 |     """
10 |     def __init__(self, grad_fn_ordering, input_map, initial_inputs):
11 |         self._grad_fn_ordering = grad_fn_ordering
12 |         self._input_holder = {
13 |             fn: [None] * size for fn, size in input_map.items()
14 |         }
15 |         self._input_holder[self._grad_fn_ordering[0]] = initial_inputs
16 | 
17 |     @classmethod
18 |     def new_from(cls, operation_output, exclude_accumulate_grad=True):
19 |         # Traverse the autograd graph, build input map for each grad_fn and
20 |         # create a topological ordering
21 |         _, initial_grad_fn = get_grad_fn(operation_output)
22 |         if initial_grad_fn is None:
23 |             raise ValueError('No grad_fn available on the operation output.')
24 | 
25 |         ordering = []
26 |         input_map = {}
27 |         initial_inputs = [
28 |             tensor.detach()
29 |             for tensor in flatten_operation_output(operation_output)
30 |         ]
31 |         input_map[initial_grad_fn] = len(initial_inputs)
32 | 
33 |         stack = [(initial_grad_fn, 0)]
34 |         visited = {initial_grad_fn}
35 | 
36 |         # Build a topological ordering
37 |         while len(stack) > 0:
38 |             grad_fn, visit_count = stack.pop()
39 |             if visit_count != 0:
40 |                 ordering.append(grad_fn)
41 |                 continue
42 | 
43 |             stack.append((grad_fn, 1))
44 |             for next_fn, input_idx in grad_fn.next_functions:
45 |                 if next_fn is None:
46 |                     continue
47 | 
48 |                 if (exclude_accumulate_grad and
49 |                         next_fn.name() == 'torch::autograd::AccumulateGrad'):
50 |                     continue
51 | 
52 |                 # Keep track of the inputs to each grad_fn
53 |                 if next_fn not in input_map:
54 |                     input_map[next_fn] = 1
55 |                 input_map[next_fn] = max(input_map[next_fn], input_idx + 1)
56 | 
57 |                 # Determine whether to visit this grad_fn
58 |                 if next_fn in visited:
59 |                     continue
60 | 
61 |                 visited.add(next_fn)
62 |                 stack.append((next_fn, 0))
63 | 
64 |         ordering.reverse()
65 |         return cls(ordering, input_map, initial_inputs)
66 | 
67 |     def run_backward(self):
68 |         for grad_fn in self._grad_fn_ordering:
69 |             # 1. Run the backward function
70 |             outputs = grad_fn(*(self._input_holder[grad_fn]))
71 | 
72 |             # 2. Store its outputs for the next backward function(s)
73 |             if isinstance(outputs, torch.Tensor):
74 |                 outputs = [outputs]
75 |             for (output, (next_fn, input_idx)) in zip(
76 |                     outputs, grad_fn.next_functions):
77 |                 if next_fn is None or next_fn not in self._input_holder:
78 |                     continue
79 |                 # NOTE: If implementing to actually calculate the gradient, we
80 |                 # need to sum gradients that "flow" into the same grad function
81 |                 # input.
82 |                 self._input_holder[next_fn][input_idx] = output
83 | 


--------------------------------------------------------------------------------
/deepview_profile/profiler/backward.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | class BackwardHelper:
  5 |     def __init__(self, backward_runnable, ag_dict):
  6 |         self.run_backward = backward_runnable
  7 |         self._ag_dict = ag_dict
  8 | 
  9 |     @classmethod
 10 |     def new_from(cls, operation_outputs):
 11 |         retval, initial_grad_fn = get_grad_fn(operation_outputs)
 12 |         if initial_grad_fn is None:
 13 |             raise ValueError('No grad_fn available on the operation output.')
 14 | 
 15 |         grads = torch.ones_like(retval)
 16 |         def backward_runnable():
 17 |             torch.autograd.backward(retval, grads, retain_graph=True)
 18 | 
 19 |         size_dict = get_accumulate_grad_inputs(
 20 |             initial_grad_fn,
 21 |             backward_runnable,
 22 |         )
 23 | 
 24 |         ag_dict = {
 25 |             grad_fn: torch.randn(size, device=torch.device('cuda'))
 26 |             for grad_fn, size in size_dict.items()
 27 |         }
 28 | 
 29 |         return cls(backward_runnable, ag_dict)
 30 | 
 31 |     def run_accumulate_grad(self):
 32 |         for grad_fn, grad in self._ag_dict.items():
 33 |             grad_fn(grad)
 34 | 
 35 | 
 36 | def backward_available(operation_output):
 37 |     return get_grad_fn(operation_output)[1] is not None
 38 | 
 39 | 
 40 | def flatten_operation_output(operation_output):
 41 |     if isinstance(operation_output, torch.Tensor):
 42 |         return [operation_output]
 43 |     elif (not isinstance(operation_output, tuple) and
 44 |           not isinstance(operation_output, list)):
 45 |         return []
 46 | 
 47 |     flattened = []
 48 |     for value in operation_output:
 49 |         flattened.extend(flatten_operation_output(value))
 50 |     return flattened
 51 | 
 52 | 
 53 | def get_grad_fn(retval):
 54 |     if isinstance(retval, torch.Tensor) and retval.grad_fn is not None:
 55 |         return retval, retval.grad_fn
 56 |     elif isinstance(retval, tuple) or isinstance(retval, list):
 57 |         for inner_value in retval:
 58 |             inner_retval, grad_fn = get_grad_fn(inner_value)
 59 |             if grad_fn is not None:
 60 |                 return inner_retval, grad_fn
 61 | 
 62 |     return None, None
 63 | 
 64 | 
 65 | def get_accumulate_grad_inputs(initial_grad_fn, backward_runnable):
 66 |     input_dict = {}
 67 |     hook_handles = []
 68 |     def get_hook(grad_fn):
 69 |         def hook(arg1, arg2):
 70 |             if not isinstance(arg2[0], torch.Tensor):
 71 |                 return
 72 |             input_dict[grad_fn] = arg2[0].size()
 73 |         return hook
 74 | 
 75 |     # Traverse the graph to identify all AccumulateGrad functions
 76 |     stack = [initial_grad_fn]
 77 |     visited = {initial_grad_fn}
 78 | 
 79 |     while len(stack) > 0:
 80 |         grad_fn = stack.pop()
 81 | 
 82 |         if grad_fn.name() == 'torch::autograd::AccumulateGrad':
 83 |             hook_handles.append(grad_fn.register_hook(get_hook(grad_fn)))
 84 | 
 85 |         for next_grad_fn, _ in grad_fn.next_functions:
 86 |             if next_grad_fn is None or next_grad_fn in visited:
 87 |                 continue
 88 |             stack.append(next_grad_fn)
 89 |             visited.add(next_grad_fn)
 90 | 
 91 |     # Run a backward pass to get accumulate grad sizes
 92 |     backward_runnable()
 93 |     torch.cuda.synchronize()
 94 | 
 95 |     # Clear hooks
 96 |     for handle in hook_handles:
 97 |         handle.remove()
 98 | 
 99 |     return input_dict
100 | 


--------------------------------------------------------------------------------
/deepview_profile/profiler/ddp.py:
--------------------------------------------------------------------------------
  1 | from scipy.stats import gaussian_kde
  2 | import numpy as np
  3 | import os
  4 | import logging
  5 | from deepview_profile.pytorch_profiler_log_reader import (
  6 |     get_first_last_step,
  7 |     get_bucket_sizes,
  8 |     get_ddp_forward_backward_times,
  9 | )
 10 | import time
 11 | from torch.profiler import profile, schedule, ProfilerActivity
 12 | import torch.distributed as dist
 13 | from torch.nn.parallel import DistributedDataParallel as DDP
 14 | import subprocess
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | FILENAME = "pytorch_profiler.json"
 19 | RANK = 0
 20 | WORLD_SIZE = 1
 21 | DEFAULT_BUCKET_SIZE = 25
 22 | 
 23 | 
 24 | def setup(rank, world_size):
 25 |     os.environ["MASTER_ADDR"] = "localhost"
 26 |     os.environ["MASTER_PORT"] = "12345"
 27 |     dist.init_process_group("nccl", rank=rank, world_size=world_size)
 28 | 
 29 | 
 30 | def cleanup():
 31 |     dist.destroy_process_group()
 32 | 
 33 | 
 34 | def _bucket_estimate_max_expected(bucket_times, ngpu):
 35 |     m = 1000
 36 | 
 37 |     np_samples = np.array(bucket_times)
 38 | 
 39 |     kde_samples = gaussian_kde(np_samples)
 40 | 
 41 |     z_arr = []
 42 |     for _ in range(m):
 43 |         num_resamples = kde_samples.resample(ngpu)
 44 | 
 45 |         z_arr.append(np.max(num_resamples))
 46 | 
 47 |     expected_max = np.mean(z_arr)
 48 | 
 49 |     return expected_max
 50 | 
 51 | 
 52 | def _bucket_comp_times(path_to_file):
 53 |     data_matrix = []
 54 |     first_step, last_step = get_first_last_step(path_to_file)
 55 |     NUM_STEPS = 25
 56 |     forward_time_acc = 0
 57 |     for step in range(first_step + 1, first_step + NUM_STEPS + 1):
 58 |         fw_time, bucket_comp_times = get_ddp_forward_backward_times(path_to_file, step)
 59 |         forward_time_acc += fw_time
 60 |         """
 61 |         storing as:
 62 |         [bucket_0 time1, bucket_1 time1, ... , bucket_n time1]
 63 |         [bucket_0 time2, bucket_1 time2, ... , bucket_n time2]
 64 |         ...
 65 |         """
 66 |         data_matrix.append(bucket_comp_times)
 67 |     # convert to numpy and transpose
 68 |     data_numpy = np.array(data_matrix)
 69 |     """
 70 |     store as :
 71 |     [bucket_0 time1, bucket_0 time2, ...., bucket_0 time n]
 72 |     [bucket_1 time1, bucket_1 time2, ...., bucket_1 time n]
 73 |     """
 74 |     data_transpose = np.transpose(data_numpy)
 75 | 
 76 |     return forward_time_acc / NUM_STEPS, data_transpose
 77 | 
 78 | 
 79 | def _bucket_expected_max(bucket_times, ngpus):
 80 |     expected_max_arr = []
 81 |     for samples in bucket_times:
 82 |         expected_max = _bucket_estimate_max_expected(samples, ngpus)
 83 |         expected_max_arr.append(expected_max)
 84 | 
 85 |     return expected_max_arr
 86 | 
 87 | 
 88 | def _trace_handler(p):
 89 |     p.export_chrome_trace(FILENAME)
 90 | 
 91 | 
 92 | def run_profiler(model_provider, input_provider, iteration_provider):
 93 |     setup(RANK, WORLD_SIZE)
 94 | 
 95 |     model = model_provider()
 96 |     inputs = input_provider()
 97 |     ddp_model = DDP(model, device_ids=[RANK], bucket_cap_mb=DEFAULT_BUCKET_SIZE)
 98 |     iteration = iteration_provider(ddp_model)
 99 |     # warmup for 30 secs
100 |     start = time.time()
101 |     elapsed = 0
102 | 
103 |     while elapsed < 30:
104 |         for _ in range(100):
105 |             iteration(*inputs)
106 |         elapsed = time.time() - start
107 | 
108 |     skip_first = 10
109 |     wait = 5
110 |     warmup = 10
111 |     active = 30
112 |     totalIterations = skip_first + wait + warmup + active
113 |     deepviewSchedule = schedule(
114 |         skip_first=skip_first, wait=wait, warmup=warmup, active=active, repeat=1
115 |     )
116 | 
117 |     with profile(
118 |         activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
119 |         schedule=deepviewSchedule,
120 |         on_trace_ready=_trace_handler,
121 |     ) as p:
122 |         for _ in range(totalIterations):
123 |             iteration(*inputs)
124 |             p.step()
125 | 
126 |     cleanup()
127 | 
128 | 
129 | def ddp_analysis(model_provider, input_provider, iteration_provider):
130 |     run_profiler(model_provider, input_provider, iteration_provider)
131 | 
132 |     path_to_file = os.path.join(os.getcwd(), FILENAME)
133 | 
134 |     fw_avg_msec, bucket_comp_times = _bucket_comp_times(path_to_file)
135 |     bucket_sizes_arr = get_bucket_sizes(model_provider(), DEFAULT_BUCKET_SIZE)
136 | 
137 |     expected_max_2gpus = _bucket_expected_max(bucket_comp_times, 2)
138 |     expected_max_4gpus = _bucket_expected_max(bucket_comp_times, 4)
139 | 
140 |     jsonFormat = {
141 |         "forward_time_ms": fw_avg_msec,
142 |         "bucket_sizes": bucket_sizes_arr,
143 |         "expected_computation_times": [
144 |             {"ngpus": 2, "expected_max_times": expected_max_2gpus},
145 |             {"ngpus": 4, "expected_max_times": expected_max_4gpus},
146 |         ],
147 |     }
148 | 
149 |     subprocess.run(["rm", "-f", os.path.join(os.getcwd(), FILENAME)])
150 |     return jsonFormat
151 | 


--------------------------------------------------------------------------------
/deepview_profile/protocol/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/protocol/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/protocol/message_sender.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import pynvml
 4 | import platform
 5 | 
 6 | from deepview_profile.exceptions import NoConnectionError
 7 | 
 8 | import deepview_profile.protocol_gen.innpv_pb2 as pm
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class MessageSender:
14 |     def __init__(self, connection_manager):
15 |         self._connection_manager = connection_manager
16 | 
17 |     def send_initialize_response(self, context):
18 |         message = pm.InitializeResponse()
19 |         connection = self._connection_manager.get_connection(context.address)
20 |         message.server_project_root = connection.project_root
21 |         message.entry_point.components.extend(connection.entry_point.split(os.sep))
22 | 
23 |         # Populate hardware info
24 |         message.hardware.hostname = platform.node()
25 |         message.hardware.os = " ".join(list(platform.uname()))
26 |         pynvml.nvmlInit()
27 |         for i in range(pynvml.nvmlDeviceGetCount()):
28 |             handle = pynvml.nvmlDeviceGetHandleByIndex(i)
29 |             device_name = pynvml.nvmlDeviceGetName(handle).decode("utf-8")
30 |             message.hardware.gpus.append(device_name)
31 |         pynvml.nvmlShutdown()
32 | 
33 |         self._send_message(message, 'initialize', context)
34 | 
35 |     def send_protocol_error(self, error_code, context):
36 |         message = pm.ProtocolError()
37 |         message.error_code = error_code
38 |         self._send_message(message, 'error', context)
39 | 
40 |     def send_breakdown_response(self, breakdown, context):
41 |         # Ideally, MessageSender users should not need to know about the INNPV
42 |         # protocol messages. However, to avoid extraneous copies, sometimes
43 |         # callers will pass in constructed messages for sending.
44 |         self._send_message(breakdown, 'breakdown', context)
45 | 
46 |     def send_analysis_error(self, analysis_error, context):
47 |         message = pm.AnalysisError()
48 |         message.error_message = str(analysis_error)
49 |         if analysis_error.file_context is not None:
50 |             message.file_context.file_path.components.extend(
51 |                 analysis_error.file_context.file_path.split(os.sep)
52 |             )
53 |             message.file_context.line_number = (
54 |                 analysis_error.file_context.line_number
55 |                 if analysis_error.file_context.line_number is not None
56 |                 else 0
57 |             )
58 |         self._send_message(message, 'analysis_error', context)
59 | 
60 |     def send_throughput_response(self, throughput, context):
61 |         self._send_message(throughput, 'throughput', context)
62 | 
63 |     def send_habitat_response(self, habitat_resp, context):
64 |         self._send_message(habitat_resp, 'habitat', context)
65 | 
66 |     def send_energy_response(self, energy_resp, context):
67 |         self._send_message(energy_resp, 'energy', context)
68 | 
69 |     def send_utilization_response(self, utilization_resp, context):
70 |         self._send_message(utilization_resp, 'utilization', context)
71 | 
72 |     def send_ddp_response(self, ddp_resp, context):
73 |         self._send_message(ddp_resp, 'ddp', context)
74 | 
75 |     def _send_message(self, message, payload_name, context):
76 |         try:
77 |             connection = self._connection_manager.get_connection(
78 |                 context.address)
79 |             enclosing_message = pm.FromServer()
80 |             getattr(enclosing_message, payload_name).CopyFrom(message)
81 |             enclosing_message.sequence_number = context.sequence_number 
82 |             connection.send_bytes(enclosing_message.SerializeToString())
83 |         except NoConnectionError:
84 |             logger.debug(
85 |                 'Not sending message to (%s:%d) because it is no longer '
86 |                 'connected.',
87 |                 *context.address,
88 |             )
89 | 


--------------------------------------------------------------------------------
/deepview_profile/protocol_gen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/protocol_gen/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/server.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from concurrent.futures import ThreadPoolExecutor
 3 | 
 4 | from deepview_profile.analysis.request_manager import AnalysisRequestManager
 5 | from deepview_profile.io.connection_acceptor import ConnectionAcceptor
 6 | from deepview_profile.io.connection_manager import ConnectionManager
 7 | from deepview_profile.protocol.message_handler import MessageHandler
 8 | from deepview_profile.protocol.message_sender import MessageSender
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class SkylineServer:
14 |     def __init__(self, host, port):
15 |         self._requested_host = host
16 |         # This is the port the user specified on the command line (it can be 0)
17 |         self._requested_port = port
18 |         self._connection_acceptor = ConnectionAcceptor(
19 |             self._requested_host,
20 |             self._requested_port,
21 |             self._on_new_connection,
22 |         )
23 |         self._connection_manager = ConnectionManager(
24 |             self._on_message,
25 |             self._on_connection_closed,
26 |         )
27 |         self._message_sender = MessageSender(self._connection_manager)
28 |         self._analysis_request_manager = AnalysisRequestManager(
29 |             self._submit_work,
30 |             self._message_sender,
31 |             self._connection_manager,
32 |         )
33 |         self._message_handler = MessageHandler(
34 |             self._connection_manager,
35 |             self._message_sender,
36 |             self._analysis_request_manager,
37 |         )
38 |         self._main_executor = ThreadPoolExecutor(max_workers=1)
39 | 
40 |     def __enter__(self):
41 |         self.start()
42 |         return self
43 | 
44 |     def __exit__(self, exc_type, exc_value, traceback):
45 |         self.stop()
46 | 
47 |     def start(self):
48 |         self._analysis_request_manager.start()
49 |         self._connection_acceptor.start()
50 |         logger.debug("DeepView server has started.")
51 | 
52 |     def stop(self):
53 |         def shutdown():
54 |             self._connection_acceptor.stop()
55 |             self._connection_manager.stop()
56 | 
57 |         self._analysis_request_manager.stop()
58 |         self._main_executor.submit(shutdown).result()
59 |         self._main_executor.shutdown()
60 |         logger.debug("DeepView server has shut down.")
61 | 
62 |     @property
63 |     def listening_on(self):
64 |         return (self._connection_acceptor.host, self._connection_acceptor.port)
65 | 
66 |     def _on_message(self, data, address):
67 |         print("on_message:", data, address)
68 |         # Do not call directly - called by a connection
69 |         self._main_executor.submit(
70 |             self._message_handler.handle_message,
71 |             data,
72 |             address,
73 |         )
74 | 
75 |     def _on_new_connection(self, socket, address):
76 |         print("on_new_connection", socket, address)
77 |         # Do not call directly - called by _connection_acceptor
78 |         self._main_executor.submit(
79 |             self._connection_manager.register_connection,
80 |             socket,
81 |             address,
82 |         )
83 | 
84 |     def _on_connection_closed(self, address):
85 |         # Do not call directly - called by a connection when it is closed
86 |         self._main_executor.submit(
87 |             self._connection_manager.remove_connection,
88 |             address,
89 |         )
90 | 
91 |     def _submit_work(self, func, *args, **kwargs):
92 |         print("submit_work", func)
93 |         # print("submit_work args:", args)
94 |         logger.debug("submit_work args:", args)
95 |         print("submit_work kwargs:", kwargs)
96 |         # Do not call directly - called by another thread to submit work
97 |         # onto the main executor
98 |         self._main_executor.submit(func, *args, **kwargs)
99 | 


--------------------------------------------------------------------------------
/deepview_profile/skyline.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | 
 4 | 
 5 | import deepview_profile
 6 | import deepview_profile.commands.interactive
 7 | import deepview_profile.commands.memory
 8 | import deepview_profile.commands.time
 9 | 
10 | 
11 | def main():
12 |     parser = argparse.ArgumentParser(
13 |         prog = deepview_profile.__name__,
14 |         description = deepview_profile.__description__
15 |     )
16 |     parser.add_argument(
17 |         "-v", "--version",
18 |         action="store_true",
19 |         help="Print the version and exit.",
20 |     )
21 |     subparsers = parser.add_subparsers(title="Commands")
22 |     deepview_profile.commands.interactive.register_command(subparsers)
23 |     deepview_profile.commands.memory.register_command(subparsers)
24 |     deepview_profile.commands.time.register_command(subparsers)
25 |     args = parser.parse_args()
26 | 
27 |     if args.version:
28 |         print('DeepView Command Line Interface', 'v' + deepview_profile.__version__,)
29 |         return
30 | 
31 |     if 'func' not in args:
32 |         parser.print_help()
33 |         sys.exit(1)
34 | 
35 |     # Run the specified command
36 |     args.func(args)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     main()
41 | 


--------------------------------------------------------------------------------
/deepview_profile/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/tests/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/tracking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/tracking/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/tracking/backward_interceptor.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import torch
 3 | 
 4 | from deepview_profile.exceptions import _SuspendExecution
 5 | from deepview_profile.tracking.hook_manager import HookManager
 6 | 
 7 | 
 8 | class BackwardInterceptor:
 9 |     def __init__(self):
10 |         self._backward_hooks = HookManager()
11 |         self.backward_root = None
12 | 
13 |     @contextlib.contextmanager
14 |     def intercept(self):
15 |         self._backward_hooks.attach_hook(
16 |             torch.Tensor,
17 |             'backward',
18 |             self._hook_creator,
19 |         )
20 |         try:
21 |             yield
22 |         except _SuspendExecution:
23 |             pass
24 |         finally:
25 |             self._backward_hooks.remove_hooks()
26 | 
27 |     def _hook_creator(self, fn):
28 |         def hook(*args):
29 |             self.backward_root = args[0]
30 |             raise _SuspendExecution
31 |         return hook
32 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/base.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import sqlite3
 3 | 
 4 | 
 5 | class TrackerBase:
 6 |     def __init__(self):
 7 |         self._is_tracking = False
 8 | 
 9 |     @contextlib.contextmanager
10 |     def track(self):
11 |         self.start_tracking()
12 |         try:
13 |             yield self
14 |         finally:
15 |             self.stop_tracking()
16 | 
17 |     def start_tracking(self):
18 |         self._is_tracking = True
19 | 
20 |     def stop_tracking(self):
21 |         self._is_tracking = False
22 | 
23 |     def populate_report(self, builder):
24 |         raise NotImplementedError
25 | 
26 | 
27 | class ReportBase:
28 |     def __init__(self, connection):
29 |         self._connection = connection
30 | 
31 |     def __del__(self):
32 |         self._connection.close()
33 | 
34 | 
35 | class ReportBuilderBase:
36 |     def __init__(self, file=None):
37 |         database_file = file if file is not None else ':memory:'
38 |         self._connection = sqlite3.connect(database_file, check_same_thread=False)
39 |         self._create_report_tables()
40 | 
41 |     def process_tracker(self, tracker):
42 |         tracker.populate_report(self)
43 |         return self
44 | 
45 |     def build(self):
46 |         raise NotImplementedError
47 | 
48 |     def _create_report_tables(self):
49 |         raise NotImplementedError
50 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/call_stack.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import inspect
 3 | import os
 4 | import re
 5 | import torch
 6 | from deepview_profile.utils import model_location_patterns
 7 | 
 8 | SourceLocation = collections.namedtuple(
 9 |     "SourceLocation", ["file_path", "line_number", "module_id"]
10 | )
11 | 
12 | def find_pattern_match(filename):
13 |     pattern_list = model_location_patterns()
14 |     return any(re.search(pattern, filename) for pattern in pattern_list)
15 | 
16 | class CallStack:
17 |     def __init__(self, frames):
18 |         self.frames = frames
19 | 
20 |     @staticmethod
21 |     def from_here(project_root, start_from=1):
22 |         """
23 |         Returns the current call stack when invoked.
24 |         """
25 |         stack = inspect.stack()
26 |         context = []
27 |         try:
28 |             for frame_info in stack[start_from:]:
29 |                 # Only track source locations that are within the project model (or transformers, diffusers, etc)
30 |                 # that are within a torch.nn.Module. Note that we assume the
31 |                 # user uses "self" to refer to the current class instance.
32 | 
33 |                 if not (
34 |                     frame_info.filename.startswith(project_root)
35 |                     or find_pattern_match(frame_info.filename)
36 |                 ):
37 |                     continue
38 |                 if "self" not in frame_info.frame.f_locals:
39 |                     continue
40 |                 if not isinstance(frame_info.frame.f_locals["self"], torch.nn.Module):
41 |                     continue
42 |                 context.append(
43 |                     SourceLocation(
44 |                         file_path=os.path.relpath(
45 |                             frame_info.filename, start=project_root
46 |                         ),
47 |                         line_number=frame_info.lineno,
48 |                         module_id=id(frame_info.frame.f_locals["self"]),
49 |                     )
50 |                 )
51 |             return CallStack(context)
52 |         finally:
53 |             del stack
54 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/callable_tracker.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | 
  3 | import torch
  4 | 
  5 | from deepview_profile.tracking.base import TrackerBase
  6 | from deepview_profile.tracking.hook_manager import HookManager
  7 | from deepview_profile.version_utils import Version
  8 | 
  9 | OLD_VF_PATH_VERSION = Version.parse_semantic_version('1.4.0')
 10 | 
 11 | 
 12 | class CallableTracker(TrackerBase):
 13 |     def __init__(self, hook_creator):
 14 |         super().__init__()
 15 |         self._hook_manager = HookManager()
 16 |         self._hook_creator = hook_creator
 17 |         self._torch_version = Version.parse_semantic_version(torch.__version__)
 18 | 
 19 |     def start_tracking(self):
 20 |         super().start_tracking()
 21 |         self._hook_manager.attach_hooks_on_module(
 22 |             torch,
 23 |             lambda fn: _is_callable_and_public(fn) and \
 24 |               fn.__name__ not in BLACKLISTED_TORCH_METHODS,
 25 |             self._hook_creator,
 26 |         )
 27 |         self._hook_manager.attach_hooks_on_module(
 28 |             torch.Tensor,
 29 |             lambda fn: _is_callable_and_public(fn) and \
 30 |               fn.__name__ != 'backward' and \
 31 |               fn.__name__ not in BLACKLISTED_TENSOR_METHODS,
 32 |             self._hook_creator,
 33 |         )
 34 |         self._hook_manager.attach_hooks_on_module(
 35 |             torch.Tensor,
 36 |             _is_callable_dunder,
 37 |             self._hook_creator,
 38 |         )
 39 |         self._hook_manager.attach_hooks_on_module(
 40 |             torch.nn.functional,
 41 |             _is_callable_and_public,
 42 |             self._hook_creator,
 43 |         )
 44 | 
 45 |         # The _VF module was moved to the torch module after version 1.4.0.
 46 |         # This is an unfortunate hack because we need to monkey patch certain
 47 |         # internal PyTorch functions to be able to identify all the operations
 48 |         # properly. The _VF module contains recurrent operations (e.g., lstm).
 49 |         vf_module = (
 50 |             torch._VF if self._torch_version is None or
 51 |                 self._torch_version > OLD_VF_PATH_VERSION
 52 |             else torch.nn._VF
 53 |         )
 54 |         self._hook_manager.attach_hooks_on_module_using(
 55 |             vf_module,
 56 |             torch._C._VariableFunctions,
 57 |             _is_callable_and_public,
 58 |             self._hook_creator,
 59 |         )
 60 | 
 61 |     def stop_tracking(self):
 62 |         super().stop_tracking()
 63 |         self._hook_manager.remove_hooks()
 64 | 
 65 | 
 66 | def _is_callable_and_public(maybe_fn):
 67 |     # By convention, _ prefixed functions in Python should not be
 68 |     # called by users (i.e. they are "private" functions)
 69 |     return _is_callable(maybe_fn) and maybe_fn.__name__[0] != '_'
 70 | 
 71 | # Original source of these blacklists:
 72 | # https://github.com/NVIDIA/apex/blob/master/apex/pyprof/nvtx/nvmarker.py
 73 | BLACKLISTED_DUNDERS = {
 74 |     '__all__',
 75 |     '__array__',
 76 |     '__array_priority__',
 77 |     '__array_wrap__',
 78 |     '__bool__',
 79 |     '__builtins__',
 80 |     '__cached__',
 81 |     '__class__',
 82 |     '__deepcopy__',
 83 |     '__delattr__',
 84 |     '__delitem__',
 85 |     '__dict__',
 86 |     '__dir__',
 87 |     '__doc__',
 88 |     '__file__',
 89 |     '__format__',
 90 |     '__getattribute__',
 91 |     '__getitem__',
 92 |     '__hash__',
 93 |     '__index__',
 94 |     '__init__',
 95 |     '__init_subclass__',
 96 |     '__iter__',
 97 |     '__len__',
 98 |     '__loader__',
 99 |     '__module__',
100 |     '__name__',
101 |     '__new__',
102 |     '__nonzero__',
103 |     '__package__',
104 |     '__path__',
105 |     '__reduce__',
106 |     '__reduce_ex__',
107 |     '__repr__',
108 |     '__reversed__',
109 |     '__setattr__',
110 |     '__setitem__',
111 |     '__setstate__',
112 |     '__sizeof__',
113 |     '__spec__',
114 |     '__str__',
115 |     '__subclasshook__',
116 |     '__version__',
117 |     '__weakref__',
118 | }
119 | 
120 | BLACKLISTED_TENSOR_METHODS = {
121 |     'size', 'dim', 'item', 'tolist',
122 | }
123 | 
124 | BLACKLISTED_TORCH_METHODS = {
125 |     'is_storage',
126 | }
127 | 
128 | 
129 | def _is_callable_dunder(maybe_fn):
130 |     """
131 |     Returns True if maybe_fn is a callable dunder (callable named with double
132 |     underscores) (e.g., __add__)
133 |     """
134 |     return (
135 |         _is_callable(maybe_fn) and
136 |         len(maybe_fn.__name__) > 4 and
137 |         maybe_fn.__name__[:2] == '__' and
138 |         maybe_fn.__name__[-2:] == '__' and
139 |         maybe_fn.__name__ not in BLACKLISTED_DUNDERS
140 |     )
141 | 
142 | 
143 | def _is_callable(maybe_fn):
144 |     return (
145 |         inspect.isfunction(maybe_fn) or
146 |         inspect.ismethod(maybe_fn) or
147 |         inspect.isbuiltin(maybe_fn) or
148 |         inspect.isroutine(maybe_fn)
149 |     )
150 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/hook_manager.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class HookManager:
 4 |     def __init__(self):
 5 |         self._original_callables = {}
 6 | 
 7 |     def attach_hooks_on_module(self, module, predicate, hook_creator):
 8 |         self.attach_hooks_on_module_using(
 9 |             module, module, predicate, hook_creator)
10 | 
11 |     def attach_hooks_on_module_using(
12 |             self, module, using_module, predicate, hook_creator):
13 |         """
14 |         Attach hooks onto functions in the provided module. Use the
15 |         `using_module` to discover the existing functions.
16 |         """
17 |         for prop in dir(using_module):
18 |             if not predicate(getattr(module, prop)):
19 |                 continue
20 |             self.attach_hook(module, prop, hook_creator)
21 | 
22 |     def attach_hook(self, module, prop, hook_creator):
23 |         target = getattr(module, prop)
24 |         self._maybe_store_callable(module, prop, target)
25 |         setattr(module, prop, hook_creator(target))
26 | 
27 |     def remove_hooks(self):
28 |         for module, callable_pairs in self._original_callables.items():
29 |             for prop, original_callable in callable_pairs.items():
30 |                 setattr(module, prop, original_callable)
31 |         self._original_callables.clear()
32 | 
33 |     def _maybe_store_callable(self, module, prop, original_callable):
34 |         """
35 |         Store the original callable (to be able to restore it) only when it is
36 |         the first time we are encountering the given callable.
37 |         """
38 |         if module not in self._original_callables:
39 |             self._original_callables[module] = {}
40 | 
41 |         if prop in self._original_callables[module]:
42 |             return
43 | 
44 |         self._original_callables[module][prop] = original_callable
45 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/memory/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/tracking/memory/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/tracking/memory/report.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import enum
  3 | 
  4 | from deepview_profile.tracking.base import ReportBase, ReportBuilderBase
  5 | import deepview_profile.tracking.memory.report_queries as queries
  6 | 
  7 | 
  8 | WeightEntry = collections.namedtuple(
  9 |     'WeightEntry',
 10 |     ['weight_name',
 11 |      'size_bytes',
 12 |      'grad_size_bytes',
 13 |      'file_path',
 14 |      'line_number'],
 15 | )
 16 | 
 17 | 
 18 | ActivationEntry = collections.namedtuple(
 19 |     'ActivationEntry',
 20 |     ['operation_name', 'size_bytes', 'file_path', 'line_number'],
 21 | )
 22 | 
 23 | 
 24 | class MiscSizeType(enum.Enum):
 25 |     PeakUsageBytes = 'peak_usage_bytes'
 26 | 
 27 | 
 28 | class MemoryReport(ReportBase):
 29 |     def __init__(self, connection):
 30 |         super().__init__(connection)
 31 | 
 32 |     def get_weight_entries(self, path_prefix=None):
 33 |         cursor = self._connection.cursor()
 34 |         return map(
 35 |             lambda row: WeightEntry(*row),
 36 |             cursor.execute(queries.get_weight_entries_with_context),
 37 |         )
 38 | 
 39 |     def get_activation_entries(self, path_prefix=None):
 40 |         cursor = self._connection.cursor()
 41 |         return map(
 42 |             lambda row: ActivationEntry(*row),
 43 |             cursor.execute(queries.get_activation_entries_with_context),
 44 |         )
 45 | 
 46 |     def get_misc_entry(self, misc_size_type: MiscSizeType):
 47 |         cursor = self._connection.cursor()
 48 |         cursor.execute(queries.get_misc_entry, (misc_size_type.value,))
 49 |         return cursor.fetchone()[0]
 50 | 
 51 | 
 52 | class MemoryReportBuilder(ReportBuilderBase):
 53 |     # This is the memory tracking report file format version that will be
 54 |     # created by this builder. When changes are made to the file format, this
 55 |     # integer should be increased monotonically.
 56 |     #
 57 |     # We need to version these tracking reports to protect us from future
 58 |     # changes to the file format.
 59 |     Version = 1
 60 | 
 61 |     def __init__(self, file=None):
 62 |         super().__init__(file)
 63 | 
 64 |     def add_weight_entry(
 65 |             self, weight_name, size_bytes, grad_size_bytes, stack_context):
 66 |         cursor = self._connection.cursor()
 67 |         cursor.execute(
 68 |             queries.add_weight_entry,
 69 |             (weight_name, size_bytes, grad_size_bytes),
 70 |         )
 71 |         self._add_stack_frames(
 72 |             cursor=cursor,
 73 |             entry_id=cursor.lastrowid,
 74 |             entry_type=queries.EntryType.Weight,
 75 |             stack_context=stack_context,
 76 |         )
 77 |         return self
 78 | 
 79 |     def add_activation_entry(self, operation_name, size_bytes, stack_context):
 80 |         cursor = self._connection.cursor()
 81 |         cursor.execute(
 82 |             queries.add_activation_entry, (operation_name, size_bytes))
 83 |         self._add_stack_frames(
 84 |             cursor=cursor,
 85 |             entry_id=cursor.lastrowid,
 86 |             entry_type=queries.EntryType.Activation,
 87 |             stack_context=stack_context,
 88 |         )
 89 |         return self
 90 | 
 91 |     def add_misc_entry(self, size_type: MiscSizeType, size_bytes):
 92 |         cursor = self._connection.cursor()
 93 |         cursor.execute(queries.add_misc_entry, (size_type.value, size_bytes))
 94 |         return self
 95 | 
 96 |     def build(self):
 97 |         self._connection.commit()
 98 |         return MemoryReport(self._connection)
 99 | 
100 |     def _create_report_tables(self):
101 |         cursor = self._connection.cursor()
102 |         cursor.execute(queries.set_report_format_version.format(
103 |             version=MemoryReportBuilder.Version))
104 |         for creation_query in queries.create_report_tables.values():
105 |             cursor.execute(creation_query)
106 |         cursor.executemany(
107 |             queries.add_entry_type,
108 |             map(lambda entry: (entry.value, entry.name), queries.EntryType),
109 |         )
110 |         self._connection.commit()
111 | 
112 |     def _add_stack_frames(
113 |         self,
114 |         cursor,
115 |         entry_id,
116 |         entry_type: queries.EntryType,
117 |         stack_context,
118 |     ):
119 |         cursor.execute(
120 |             queries.add_correlation_entry, (entry_id, entry_type.value))
121 |         correlation_id = cursor.lastrowid
122 | 
123 |         def stack_frame_generator():
124 |             for idx, frame in enumerate(stack_context.frames):
125 |                 yield (correlation_id, idx, frame.file_path, frame.line_number)
126 | 
127 |         cursor.executemany(queries.add_stack_frame, stack_frame_generator())
128 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/memory/report_queries.py:
--------------------------------------------------------------------------------
  1 | import enum
  2 | 
  3 | 
  4 | class EntryType(enum.Enum):
  5 |     Weight = 1
  6 |     Activation = 2
  7 | 
  8 | 
  9 | create_report_tables = {
 10 |     'weight_entries': """
 11 |       CREATE TABLE IF NOT EXISTS weight_entries (
 12 |         id INTEGER PRIMARY KEY,
 13 |         name TEXT NOT NULL,
 14 |         size_bytes INTEGER NOT NULL,
 15 |         grad_size_bytes INTEGER NOT NULL
 16 |       )
 17 |     """,
 18 |     'activation_entries': """
 19 |       CREATE TABLE IF NOT EXISTS activation_entries (
 20 |         id INTEGER PRIMARY KEY,
 21 |         operation_name TEXT NOT NULL,
 22 |         size_bytes INTEGER NOT NULL
 23 |       )
 24 |     """,
 25 |     'correlation': """
 26 |       CREATE TABLE IF NOT EXISTS stack_correlation (
 27 |         correlation_id INTEGER PRIMARY KEY,
 28 |         entry_id INTEGER NOT NULL,
 29 |         entry_type INTEGER NOT NULL,
 30 |         UNIQUE (correlation_id, entry_id)
 31 |       )
 32 |     """,
 33 |     'correlation_index': """
 34 |       CREATE UNIQUE INDEX IF NOT EXISTS entry_type_and_id
 35 |         ON stack_correlation(entry_type, entry_id)
 36 |     """,
 37 |     'stack_frames': """
 38 |       CREATE TABLE IF NOT EXISTS stack_frames (
 39 |         correlation_id INTEGER NOT NULL,
 40 |         ordering INTEGER NOT NULL,
 41 |         file_path TEXT NOT NULL,
 42 |         line_number INTEGER NOT NULL,
 43 |         PRIMARY KEY (correlation_id, ordering)
 44 |       )
 45 |     """,
 46 |     'entry_types': """
 47 |       CREATE TABLE IF NOT EXISTS entry_types (
 48 |         entry_type INTEGER PRIMARY KEY,
 49 |         name TEXT NOT NULL
 50 |       )
 51 |     """,
 52 |     'misc_sizes': """
 53 |       CREATE TABLE IF NOT EXISTS misc_sizes (
 54 |         key TEXT PRIMARY KEY,
 55 |         size_bytes INT NOT NULL
 56 |       )
 57 |     """,
 58 | }
 59 | 
 60 | set_report_format_version = 'PRAGMA user_version = {version:d}'
 61 | 
 62 | add_entry_type = """
 63 |   INSERT INTO entry_types (entry_type, name) VALUES (?, ?)
 64 | """
 65 | 
 66 | add_weight_entry = """
 67 |   INSERT INTO weight_entries (id, name, size_bytes, grad_size_bytes)
 68 |     VALUES (NULL, ?, ?, ?)
 69 | """
 70 | 
 71 | add_activation_entry = """
 72 |   INSERT INTO activation_entries (id, operation_name, size_bytes)
 73 |     VALUES (NULL, ?, ?)
 74 | """
 75 | 
 76 | add_correlation_entry = """
 77 |   INSERT INTO stack_correlation (correlation_id, entry_id, entry_type)
 78 |     VALUES (NULL, ?, ?)
 79 | """
 80 | 
 81 | add_stack_frame = """
 82 |   INSERT INTO stack_frames (correlation_id, ordering, file_path, line_number)
 83 |     VALUES (?, ?, ?, ?)
 84 | """
 85 | 
 86 | add_misc_entry = "INSERT INTO misc_sizes (key, size_bytes) VALUES (?, ?)"
 87 | 
 88 | get_misc_entry = "SELECT size_bytes FROM misc_sizes WHERE key = ?"
 89 | 
 90 | get_code_context_subquery = """
 91 |   WITH code_contexts AS (
 92 |     SELECT c.entry_id, s.file_path, s.line_number
 93 |       FROM stack_frames AS s JOIN stack_correlation AS c
 94 |         ON s.correlation_id == c.correlation_id
 95 |       WHERE
 96 |         c.entry_type = {:d}
 97 |       GROUP BY s.correlation_id HAVING s.ordering == MIN(s.ordering)
 98 |   )
 99 | """
100 | 
101 | get_weight_entries_with_context = (
102 |     get_code_context_subquery.format(EntryType.Weight.value) +
103 |     """
104 |       SELECT
105 |           w.name, w.size_bytes, w.grad_size_bytes, c.file_path, c.line_number
106 |         FROM weight_entries AS w
107 |           LEFT JOIN code_contexts AS c
108 |           ON w.id == c.entry_id
109 |         WHERE w.size_bytes > 0
110 |         ORDER BY c.file_path ASC, c.line_number ASC
111 |     """
112 | )
113 | 
114 | get_activation_entries_with_context = (
115 |     get_code_context_subquery.format(EntryType.Activation.value) +
116 |     """
117 |       SELECT a.operation_name, a.size_bytes, c.file_path, c.line_number
118 |         FROM activation_entries AS a
119 |           LEFT JOIN code_contexts AS c
120 |           ON a.id == c.entry_id
121 |         WHERE a.size_bytes > 0
122 |         ORDER BY c.file_path ASC, c.line_number ASC
123 |     """
124 | )
125 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/memory/weights.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import inspect
 3 | 
 4 | from deepview_profile.tracking.base import TrackerBase
 5 | from deepview_profile.tracking.call_stack import CallStack
 6 | from deepview_profile.tracking.hook_manager import HookManager
 7 | from deepview_profile.tracking.utils import tensor_size_bytes
 8 | from deepview_profile.util_weak import WeakTensorKeyDictionary
 9 | 
10 | class WeightsTracker(TrackerBase):
11 |     def __init__(self, project_root):
12 |         super().__init__()
13 |         self._hook_manager = HookManager()
14 |         self._module_parameters = WeakTensorKeyDictionary()
15 |         self._project_root = project_root
16 | 
17 |     def start_tracking(self):
18 |         super().start_tracking()
19 |         self._hook_manager.attach_hook(
20 |             torch.nn.Module,
21 |             'register_parameter',
22 |             self._register_parameter_hook_creator,
23 |         )
24 | 
25 |     def stop_tracking(self):
26 |         super().stop_tracking()
27 |         self._hook_manager.remove_hooks()
28 | 
29 |     def populate_report(self, builder):
30 |         for param, (name, stack) in self._module_parameters.items():
31 |             if not param.is_cuda:
32 |                 continue
33 |             builder.add_weight_entry(
34 |                 weight_name=name,
35 |                 size_bytes=tensor_size_bytes(param),
36 |                 grad_size_bytes=tensor_size_bytes(param.grad),
37 |                 stack_context=stack,
38 |             )
39 | 
40 |     def populate_breakdown(self, builder):
41 |         # The HierarchicalBreakdownBuilder uses the same API as the
42 |         # MemoryReportBuilder.
43 |         self.populate_report(builder)
44 | 
45 |     def _register_parameter_hook_creator(self, func):
46 |         def hook(*args, **kwargs):
47 |             name = args[1]
48 |             parameter = args[2]
49 |             retval = func(*args, **kwargs)
50 |             if parameter is not None and parameter not in self._module_parameters:
51 |                 self._module_parameters[parameter] = (
52 |                     name,
53 |                     CallStack.from_here(self._project_root, start_from=2),
54 |                 )
55 |             return retval
56 |         return hook
57 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/time/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/tracking/time/__init__.py


--------------------------------------------------------------------------------
/deepview_profile/tracking/time/operation.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | 
 4 | from deepview_profile.tracking.call_stack import CallStack
 5 | from deepview_profile.tracking.base import TrackerBase
 6 | from deepview_profile.tracking.callable_tracker import CallableTracker
 7 | from deepview_profile.tracking.utils import remove_dunder
 8 | from deepview_profile.profiler.operation import OperationProfiler
 9 | 
10 | OperationInfo = collections.namedtuple(
11 |     'OperationInfo', ['operation_name', 'stack', 'forward_ms', 'backward_ms'])
12 | 
13 | 
14 | class OperationRunTimeTracker(TrackerBase):
15 |     def __init__(self, project_root):
16 |         super().__init__()
17 |         self._callable_tracker = CallableTracker(self._hook_creator)
18 |         self._profiler = OperationProfiler()
19 |         self._project_root = project_root
20 |         self._processing_hook = False
21 | 
22 |         self.operations = []
23 | 
24 |     def start_tracking(self):
25 |         super().start_tracking()
26 |         self._callable_tracker.start_tracking()
27 | 
28 |     def stop_tracking(self):
29 |         super().stop_tracking()
30 |         self._callable_tracker.stop_tracking()
31 | 
32 |     def populate_report(self, builder):
33 |         for op_info in self.operations:
34 |             builder.add_run_time_entry(
35 |                 operation_name=remove_dunder(op_info.operation_name),
36 |                 forward_ms=op_info.forward_ms,
37 |                 backward_ms=op_info.backward_ms,
38 |                 stack_context=op_info.stack,
39 |             )
40 | 
41 |     def populate_breakdown(self, builder):
42 |         # The HierarchicalBreakdownBuilder uses the same run time entry API as
43 |         # the OperationRunTimeReportBuilder.
44 |         self.populate_report(builder)
45 | 
46 |     def _hook_creator(self, func):
47 |         def hook(*args, **kwargs):
48 |             # NOTE: We use self._processing_hook to handle cases where we have
49 |             #       hooks on nested function calls.
50 |             if self._processing_hook:
51 |                 return func(*args, **kwargs)
52 | 
53 |             self._processing_hook = True
54 |             try:
55 |                 stack = CallStack.from_here(self._project_root, start_from=2)
56 |                 if len(stack.frames) == 0:
57 |                     return func(*args, **kwargs)
58 | 
59 |                 forward_ms, backward_ms = self._profiler.measure_operation_ms(
60 |                     func, args, kwargs)
61 |                 self.operations.append(OperationInfo(
62 |                     operation_name=func.__name__,
63 |                     stack=stack,
64 |                     forward_ms=forward_ms,
65 |                     backward_ms=backward_ms,
66 |                 ))
67 | 
68 |                 # Actually run the hooked function
69 |                 return func(*args, **kwargs)
70 |             finally:
71 |                 self._processing_hook = False
72 | 
73 |         return hook
74 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/time/report.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | from deepview_profile.tracking.base import ReportBase, ReportBuilderBase
 4 | import deepview_profile.tracking.time.report_queries as queries
 5 | 
 6 | RunTimeEntry = collections.namedtuple(
 7 |     'RunTimeEntry',
 8 |     ['operation_name',
 9 |      'forward_ms',
10 |      'backward_ms',
11 |      'file_path',
12 |      'line_number'],
13 | )
14 | 
15 | 
16 | class OperationRunTimeReport(ReportBase):
17 |     def __init__(self, connection):
18 |         super().__init__(connection)
19 | 
20 |     def get_run_time_entries(self, path_prefix=None):
21 |         cursor = self._connection.cursor()
22 |         return map(
23 |             lambda row: RunTimeEntry(*row),
24 |             cursor.execute(queries.get_run_time_entries_with_context),
25 |         )
26 | 
27 | 
28 | class OperationRunTimeReportBuilder(ReportBuilderBase):
29 |     # This is the operation run time tracking report file format version that
30 |     # will be created by this builder. When changes are made to the file
31 |     # format, this integer should be increased monotonically.
32 |     #
33 |     # We need to version these tracking reports to protect us from future
34 |     # changes to the file format.
35 |     Version = 1
36 | 
37 |     def __init__(self, file=None):
38 |         super().__init__(file)
39 | 
40 |     def add_run_time_entry(
41 |             self, operation_name, forward_ms, backward_ms, stack_context):
42 |         cursor = self._connection.cursor()
43 |         cursor.execute(queries.add_run_time_entry, (
44 |             operation_name,
45 |             forward_ms,
46 |             backward_ms,
47 |         ))
48 |         entry_id = cursor.lastrowid
49 | 
50 |         def stack_frame_generator():
51 |             for idx, frame in enumerate(stack_context.frames):
52 |                 yield (idx, frame.file_path, frame.line_number, entry_id)
53 | 
54 |         cursor.executemany(queries.add_stack_frame, stack_frame_generator())
55 | 
56 |     def build(self):
57 |         self._connection.commit()
58 |         return OperationRunTimeReport(self._connection)
59 | 
60 |     def _create_report_tables(self):
61 |         cursor = self._connection.cursor()
62 |         cursor.execute(queries.set_report_format_version.format(
63 |             version=OperationRunTimeReportBuilder.Version))
64 |         for creation_query in queries.create_report_tables.values():
65 |             cursor.execute(creation_query)
66 |         self._connection.commit()
67 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/time/report_queries.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | create_report_tables = {
 4 |     'run_time_entries': """
 5 |       CREATE TABLE IF NOT EXISTS run_time_entries (
 6 |         id INTEGER PRIMARY KEY,
 7 |         operation_name TEXT NOT NULL,
 8 |         forward_ms REAL NOT NULL,
 9 |         backward_ms REAL
10 |       )
11 |     """,
12 |     'stack_frames': """
13 |       CREATE TABLE IF NOT EXISTS stack_frames (
14 |         ordering INTEGER NOT NULL,
15 |         file_path TEXT NOT NULL,
16 |         line_number INTEGER NOT NULL,
17 |         entry_id INTEGER NOT NULL,
18 |         PRIMARY KEY (entry_id, ordering)
19 |       )
20 |     """,
21 | }
22 | 
23 | set_report_format_version = 'PRAGMA user_version = {version:d}'
24 | 
25 | add_stack_frame = """
26 |   INSERT INTO stack_frames (ordering, file_path, line_number, entry_id)
27 |     VALUES (?, ?, ?, ?)
28 | """
29 | 
30 | add_run_time_entry = """
31 |   INSERT INTO run_time_entries (operation_name, forward_ms, backward_ms)
32 |     VALUES (?, ?, ?)
33 | """
34 | 
35 | get_run_time_entries_with_context = """
36 |   WITH code_contexts AS (
37 |     SELECT entry_id, file_path, line_number FROM stack_frames
38 |     GROUP BY entry_id HAVING ordering == MIN(ordering)
39 |   )
40 |   SELECT
41 |     e.operation_name,
42 |     e.forward_ms,
43 |     e.backward_ms,
44 |     c.file_path,
45 |     c.line_number
46 |   FROM
47 |     run_time_entries AS e LEFT JOIN code_contexts AS c
48 |     ON e.id == c.entry_id
49 |   ORDER BY c.file_path ASC, c.line_number ASC
50 | """
51 | 


--------------------------------------------------------------------------------
/deepview_profile/tracking/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | DUNDER_REGEX = re.compile('__(?P<name>.+)__')
 5 | 
 6 | 
 7 | def tensor_size_bytes(tensor):
 8 |     if tensor is None or not tensor.is_cuda:
 9 |         return 0
10 |     return tensor.numel() * tensor.element_size()
11 | 
12 | 
13 | def remove_dunder(fn_name):
14 |     match = DUNDER_REGEX.match(fn_name)
15 |     if match is None:
16 |         return fn_name
17 |     return match.group('name')
18 | 


--------------------------------------------------------------------------------
/deepview_profile/user_code_utils.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import sys
 3 | 
 4 | from deepview_profile.exceptions import exceptions_as_analysis_errors
 5 | 
 6 | 
 7 | @contextlib.contextmanager
 8 | def user_code_environment(script_root_path, project_root):
 9 |     """
10 |     A combined context manager that activates all relevant context managers
11 |     used when running user code.
12 |     """
13 |     with sys_path_root(script_root_path):
14 |         # with prevent_module_caching():
15 |             with exceptions_as_analysis_errors(project_root):
16 |                 yield
17 | 
18 | 
19 | @contextlib.contextmanager
20 | def sys_path_root(script_root_path):
21 |     """
22 |     A context manager that sets sys.path[0] to the specified path on entry and
23 |     then restores it after exiting the context manager.
24 |     """
25 |     # As per the Python documentation, sys.path[0] always stores the path to
26 |     # the directory containing the Python script that was used to start the
27 |     # Python interpreter. The contents of sys.path are used to resolve module
28 |     # imports.
29 |     #
30 |     # When we run user code (e.g., the user's entry point file), we want to run
31 |     # it as if it was being directly executed by the user from the shell. For
32 |     # example:
33 |     #
34 |     #   $ python3 entry_point.py
35 |     #
36 |     # For this to work, we need to ensure that sys.path[0] is the path to the
37 |     # directory containing the entry_point.py file. However if we use exec(),
38 |     # sys.path[0] is set to the path of DeepView's command line executable.
39 |     #
40 |     # To fix this problem, we set sys.path[0] to the correct root path before
41 |     # running the user's code and restore it to DeepView's script path after the
42 |     # execution completes. Doing this is **very important** as it ensures that
43 |     # imports work as expected inside the user's code. This context manager
44 |     # should be used each time we execute user code because imports can exist
45 |     # inside user-defined functions.
46 |     #
47 |     # Setting and then restoring sys.path[0] is better than just appending the
48 |     # user's path to sys.path because we want to avoid accidentally importing
49 |     # anything from the user's codebase.
50 |     skyline_script_root = sys.path[0]
51 |     try:
52 |         sys.path[0] = script_root_path
53 |         yield
54 |     finally:
55 |         sys.path[0] = skyline_script_root
56 | 
57 | 
58 | @contextlib.contextmanager
59 | def prevent_module_caching():
60 |     """
61 |     A context manager that prevents any imported modules from being cached
62 |     after exiting.
63 |     """
64 |     try:
65 |         original_modules = sys.modules.copy()
66 |         yield
67 |     finally:
68 |         newly_added = {
69 |             module_name for module_name in sys.modules.keys()
70 |             if module_name not in original_modules
71 |         }
72 |         for module_name in newly_added:
73 |             del sys.modules[module_name]
74 | 


--------------------------------------------------------------------------------
/deepview_profile/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import logging
 3 | import gc
 4 | import os
 5 | import base64
 6 | from google.protobuf.json_format import MessageToDict
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | def release_memory():
11 |     logger.debug("Emptying cache")
12 |     gc.collect()
13 |     torch.cuda.empty_cache()
14 | 
15 | def next_message_to_dict(object):
16 |     message = next(object)
17 |     return MessageToDict(message)
18 | 
19 | def files_encoded_unique(operation_tree):
20 |     encoded_files = []
21 | 
22 |     for analysis in operation_tree:
23 |         context_info_map = analysis["operation"].get("contextInfoMap", None)
24 |         if context_info_map is not None and len(context_info_map) > 0:
25 |             filename = list(
26 |                 context_info_map[0]["context"]["filePath"]["components"]
27 |             ).pop()
28 | 
29 |             already_in_list = next(
30 |                 (item for item in encoded_files if item["name"] == filename), None
31 |             )
32 |             if not already_in_list:
33 |                 file_path = os.path.join(
34 |                     "", *list(context_info_map[0]["context"]["filePath"]["components"])
35 |                 )
36 | 
37 |                 encoded_file = encode_file("", file_path)
38 |                 encoded_files.append(encoded_file)
39 | 
40 |     return encoded_files
41 | 
42 | def encode_file(root, file):
43 |     file_dict = None
44 |     if os.path.splitext(file)[1] == ".py" and file != "entry_point.py":
45 |         file_dict = {"name": file, "content": ""}
46 | 
47 |         filename = os.path.join(root, file)
48 | 
49 |         with open(filename, "r") as f:
50 |             file_content = f.read()
51 |             file_dict["content"] = base64.b64encode(
52 |                 file_content.encode("utf-8")
53 |             ).decode("utf-8")
54 | 
55 |     return file_dict
56 | 
57 | def model_location_patterns():
58 |     return [
59 |         "./transformers/models[/\w+/]+\w+.py",
60 |         "./transformers/integrations[/\w+/]+\w+.py",
61 |         "./diffusers/models[/\w+/]+\w+.py",
62 |     ]


--------------------------------------------------------------------------------
/deepview_profile/version_utils.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import re
 3 | 
 4 | Version = collections.namedtuple('Version', ['major', 'minor', 'patch'])
 5 | 
 6 | VERSION_REGEX = re.compile('^\d+\.\d+\.\d+$')
 7 | 
 8 | 
 9 | class Version:
10 |     def __init__(self, major, minor, patch):
11 |         self._major = major
12 |         self._minor = minor
13 |         self._patch = patch
14 | 
15 |     @property
16 |     def major(self):
17 |         return self._major
18 | 
19 |     @property
20 |     def minor(self):
21 |         return self._minor
22 | 
23 |     @property
24 |     def patch(self):
25 |         return self._patch
26 | 
27 |     def __repr__(self):
28 |         return ''.join([
29 |             'Version(major=',
30 |             str(self.major),
31 |             ', minor=',
32 |             str(self.minor),
33 |             ', patch=',
34 |             str(self.patch),
35 |             ')'
36 |         ])
37 | 
38 |     def __eq__(self, other):
39 |         return (
40 |             self.major == other.major and
41 |             self.minor == other.minor and
42 |             self.patch == other.patch
43 |         )
44 | 
45 |     def __gt__(self, other):
46 |         self_nums = [self.major, self.minor, self.patch]
47 |         other_nums = [other.major, other.minor, other.patch]
48 | 
49 |         for self_ver, other_ver in zip(self_nums, other_nums):
50 |             if self_ver > other_ver:
51 |                 return True
52 |             elif self_ver < other_ver:
53 |                 return False
54 | 
55 |         return False
56 | 
57 |     def __ne__(self, other):
58 |         return not (self == other)
59 | 
60 |     def __ge__(self, other):
61 |         return self == other or self > other
62 | 
63 |     def __lt__(self, other):
64 |         return not (self >= other)
65 | 
66 |     def __le__(self, other):
67 |         return not (self > other)
68 | 
69 |     @classmethod
70 |     def parse_semantic_version(cls, version_str):
71 |         if VERSION_REGEX.match(version_str) is None:
72 |             return None
73 |         version_nums = list(map(int, version_str.split('.')))
74 |         return cls(
75 |             major=version_nums[0],
76 |             minor=version_nums[1],
77 |             patch=version_nums[2],
78 |         )
79 | 


--------------------------------------------------------------------------------
/docs/memory-report.md:
--------------------------------------------------------------------------------
 1 | This page describes the database schema of the memory report that is generated by DeepView.Profile's `memory` subcommand. Recall that DeepView.Profile's reports (memory and run time) are [SQLite database files](https://www.sqlite.org/).
 2 | 
 3 | **NOTE:** DeepView.Profile's memory profiling is for GPU memory only.
 4 | 
 5 | ## Overview
 6 | 
 7 | DeepView.Profile tracks the memory usage associated with a model's *weights* and *activations*. DeepView.Profile will also report the peak amount of memory allocated during a training iteration.
 8 | 
 9 | Just like the run time report, DeepView.Profile also includes the stack trace associated with each activation or weight in the report. DeepView.Profile only includes the stack frames associated with files inside your project (i.e. files under your project's root directory).
10 | 
11 | ## Tables
12 | 
13 | ### `weight_entries`
14 | 
15 | ```sql title="Schema"
16 | CREATE TABLE weight_entries (
17 |   id INTEGER PRIMARY KEY,
18 |   name TEXT NOT NULL,
19 |   size_bytes INTEGER NOT NULL,
20 |   grad_size_bytes INTEGER NOT NULL
21 | );
22 | ```
23 | This table holds the memory used by the model's weights. The `size_bytes` column is the number of bytes used by the weight and `grad_size_bytes` is the number of bytes used by the weight's gradient tensor. The `name` column holds
24 | the weight's name, which is assigned by PyTorch.
25 | 
26 | ### `activation_entries`
27 | 
28 | ```sql title="Schema"
29 | CREATE TABLE activation_entries (
30 |   id INTEGER PRIMARY KEY,
31 |   operation_name TEXT NOT NULL,
32 |   size_bytes INTEGER NOT NULL
33 | );
34 | ```
35 | This table holds the memory used by the model's activations in one training iteration. The `size_bytes` column is the number of bytes used by the activation. The `operation_name` column is the name of the operation that generated the activation.
36 | 
37 | ### `entry_types`
38 | 
39 | ```sql title="Schema"
40 | CREATE TABLE entry_types (
41 |   entry_type INTEGER PRIMARY KEY,
42 |   name TEXT NOT NULL
43 | );
44 | ```
45 | This is a table that stores mappings of DeepView.Profile's memory entry types (activations, weights) to numeric identifiers. DeepView.Profile maps weights to an entry type of `1`, and activations to an entry type of `2`.
46 | 
47 | ### `stack_correlation`
48 | 
49 | ```sql title="Schema"
50 | CREATE TABLE stack_correlation (
51 |   correlation_id INTEGER PRIMARY KEY,
52 |   entry_id INTEGER NOT NULL,
53 |   entry_type INTEGER NOT NULL,
54 |   UNIQUE (correlation_id, entry_id)
55 | );
56 | CREATE UNIQUE INDEX entry_type_and_id
57 |   ON stack_correlation(entry_type, entry_id);
58 | ```
59 | This table maps entries to a `correlation_id`, which can be used to look up a memory entry's relevant stack frames in the `stack_frames` table. The `entry_type` column contains either `1` or `2`, which corresponds to the weights and activations respectively.
60 | 
61 | For all rows where `entry_type == 1`, the `entry_id` column will act as a foreign key for the `id` column in the `weight_entries` table. Similarly for all rows where `entry_type == 2`, the `entry_id` column will act as a foreign key for the `id` column in the `activation_entries` table.
62 | 
63 | ### `stack_frames`
64 | 
65 | ```sql title="Schema"
66 | CREATE TABLE stack_frames (
67 |   correlation_id INTEGER NOT NULL,
68 |   ordering INTEGER NOT NULL,
69 |   file_path TEXT NOT NULL,
70 |   line_number INTEGER NOT NULL,
71 |   PRIMARY KEY (correlation_id, ordering)
72 | );
73 | ```
74 | This table holds the stack frames associated with a memory usage entry (both weights and activations). The `correlation_id` column is a foreign key that references the `correlation_id` in the `stack_correlation` table. File paths stored in the `file_path` column will be relative to the project's root directory and line numbers are 1-based.
75 | 
76 | **NOTE:** DeepView.Profile does not add an explicit foreign key constraint to the `correlation_id` column.
77 | 
78 | **Ordering.**
79 | There may be multiple stack frames associated with any given memory entry (i.e. any given `correlation_id`). The `ordering` column is used to keep track of the ordering among stack frames that share the same `correlation_id`. When sorted in ascending order by the `ordering` column, the stack frames will be ordered from most-specific (i.e. *closest* to the weight or operation responsible for the activation) to least-specific (i.e. *farthest* from the weight or operation responsible for the activation).
80 | 
81 | **Connecting to Entries.**
82 | To get the stack frames for a given entry, you need to first query the `stack_correlation` table to find the `correlation_id` associated with your `entry_id` and `entry_type` combination. Then you can use that `correlation_id` to look up the associated stack frames in this table.
83 | 
84 | ### `misc_sizes`
85 | 
86 | ```sql title="Schema"
87 | CREATE TABLE misc_sizes (
88 |   key TEXT PRIMARY KEY,
89 |   size_bytes INT NOT NULL
90 | );
91 | ```
92 | 
93 | This table holds any miscellaneous memory usage information that is reported by DeepView.Profile. Currently, DeepView.Profile only uses this table to report the peak memory usage during one training iteration. This memory usage is reported using the `peak_usage_bytes` key.
94 | 


--------------------------------------------------------------------------------
/docs/providers.md:
--------------------------------------------------------------------------------
  1 | ### Model Provider
  2 | 
  3 | ```python
  4 | def deepview_model_provider() -> torch.nn.Module:
  5 |     pass
  6 | ```
  7 | 
  8 | The model provider must take no arguments and return an instance of your model (a `torch.nn.Module`) that is on the GPU (i.e. you need to call `.cuda()` on the module before returning it).
  9 | 
 10 | ### Input Provider
 11 | 
 12 | ```python
 13 | def deepview_input_provider(batch_size: int = 32) -> Tuple:
 14 |     pass
 15 | ```
 16 | 
 17 | The input provider must take a single `batch_size` argument that has a default value (the batch size you want to profile with). It must return an iterable (does not *have* to be a `tuple`) that contains the arguments that you would normally pass to your model's `forward` method. Any `Tensor`s in the returned iterable must be on the GPU (i.e. you need to call `.cuda()` on them before returning them).
 18 | 
 19 | 
 20 | ### Iteration Provider
 21 | 
 22 | ```python
 23 | def deepview_iteration_provider(model: torch.nn.Module) -> Callable:
 24 |     pass
 25 | ```
 26 | 
 27 | The iteration provider must take a single `model` argument, which will be an instance of your model. This provider must return a callable (e.g., a function) that, when invoked, runs a single training iteration.
 28 | 
 29 | ### Example
 30 | 
 31 | Suppose that your project code is kept under a `my_project` directory:
 32 | 
 33 | ```zsh
 34 | my_project
 35 | ├── __init__.py
 36 | └── model.py
 37 | ```
 38 | and your model is defined in `model.py`:
 39 | 
 40 | ```python
 41 | import torch.nn as nn
 42 | 
 43 | 
 44 | class Model(nn.Module):
 45 |     def __init__(self):
 46 |         super().__init__()
 47 |         self.conv = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3)
 48 |         self.linear = nn.Linear(in_features=387096, out_features=10)
 49 | 
 50 |     def forward(self, input):
 51 |         out = self.conv(input)
 52 |         return self.linear(out.view(-1, 387096))
 53 | ```
 54 | 
 55 | One way to write the `entry_point.py` file would be:
 56 | 
 57 | ```python
 58 | import torch
 59 | import torch.nn as nn
 60 | 
 61 | from my_project.model import Model
 62 | 
 63 | 
 64 | class ModelWithLoss(nn.Module):
 65 |     def __init__(self):
 66 |         super().__init__()
 67 |         self.model = Model()
 68 |         self.loss_fn = nn.CrossEntropyLoss()
 69 | 
 70 |     def forward(self, input, target):
 71 |         output = self.model(input)
 72 |         return self.loss_fn(output, target)
 73 | 
 74 | 
 75 | def deepview_model_provider():
 76 |     # Return a GPU-based instance of our model (that returns a loss)
 77 |     return ModelWithLoss().cuda()
 78 | 
 79 | 
 80 | def deepview_input_provider(batch_size=32):
 81 |     # Return GPU-based inputs for our model
 82 |     return (
 83 |       torch.randn((batch_size, 3, 256, 256)).cuda(),
 84 |       torch.randint(low=0, high=9, size=(batch_size,)).cuda(),
 85 |     )
 86 | 
 87 | 
 88 | def deepview_iteration_provider(model):
 89 |     # Return a function that executes one training iteration
 90 |     optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
 91 |     def iteration(*inputs):
 92 |         optimizer.zero_grad()
 93 |         out = model(*inputs)
 94 |         out.backward()
 95 |         optimizer.step()
 96 |     return iteration
 97 | ```
 98 | One important thing to highlight is our use of a wrapper `ModelWithLoss` module. DeepView.Profile only provides breakdowns for operations that run inside the module returned by the model provider. We included the loss function in this wrapper module to have DeepView.Profile include it in the breakdown. We could have also placed the loss function call in the `iteration` function.
 99 | 
100 | You can place these provider functions either in a new file or directly in `model.py`. Whichever file contains the providers will be your project's entry point file. In this example, we defined the providers in a separate file called `entry_point.py` inside `my_project`.
101 | 


--------------------------------------------------------------------------------
/docs/remote.md:
--------------------------------------------------------------------------------
 1 | # Remote Profiling
 2 | 
 3 | ## Terminology
 4 | - **Client:** The local machine where you run the DeepView.Explore plugin.
 5 | - **Server:** The remote machine where you want to run the DeepView.Profile.
 6 | 
 7 | ## Prerequisites
 8 | **SSH Access.**
 9 | At a minimum, you need SSH access to a server that allows SSH tunnelling. If the server machine exposes ports then it does not need to support SSH tunnelling.
10 | 
11 | **DeepView.Profile and DeepView.Predict.**
12 | Install DeepView.Profile and (optionally DeepView.Predict) on your server to allow remote profiling.
13 | 
14 | **[VSCode Remote - SSH extension.](https://code.visualstudio.com/docs/remote/ssh)**
15 | This extension allows users to connect to a remote machine and run extensions remotely. The extension handles most of the heavy lifting so it makes it easy to use DeepView.Explore on remote machines.
16 | 
17 | **Installing the DeepView.Explore on the Server**
18 | To install the DeepView.Explore plugin on the server, take the following steps.
19 | 1. Connect to your server via SSH.
20 | 2. Get the VSIX file following the installation instructions. Take note the path to the VSIX file.
21 | 2. Open VSCode on your client and connect to your server.
22 | 3. Click the Extensions tab (Ctrl-Shift-X on Linux/Windows, ⌘-Shift-X on macOS) and click the `...` button. Click `Install from VSIX` and then specify the path to the VSIX file on your server.
23 | 4. Restart VSCode to enable your changes.
24 | 
25 | ## Starting a Remote Profiling Session
26 | 
27 | ### Starting the DeepView.Profiler
28 | DeepView.Profile needs to running on the server to enable the plugin. You can connect to the server via SSH and start DeepView.Profile by running the `deepview interactive` command as usual.
29 | 
30 | ```zsh
31 | poetry run deepview interactive
32 | ```
33 | 
34 | If you want to use a different port, you can use the `--port` flag to tell the profiler to listen on a different port.
35 | 
36 | ```zsh
37 | poetry run deepview interactive --port portNumber
38 | ```
39 | 
40 | ### Starting DeepView.Explore
41 | Launch VSCode and open DeepView.Explore by running the deepview command in the command palette (Ctrl-Shift-P on Linux/Windows, ⌘-Shift-P on macOS). Select your project root and begin profiling.
42 | 


--------------------------------------------------------------------------------
/docs/run-time-report.md:
--------------------------------------------------------------------------------
 1 | This page describes the database schema of the run time report that is generated by DeepView.Profile's `time` subcommand. Recall
 2 | that DeepView.Profile's reports (memory and run time) are [SQLite database files](https://www.sqlite.org/).
 3 | 
 4 | ## Overview
 5 | 
 6 | DeepView.Profile's run time report contains a breakdown of the run times of each operation that runs in one training iteration. DeepView.Profile only tracks the
 7 | operations that execute as a part of either the forward and backward pass.
 8 | 
 9 | For each tracked operation, DeepView.Profile also includes the stack trace leading to that operation. DeepView.Profile only includes the stack frames associated with files inside your project (i.e. files under your project's root directory).
10 | 
11 | ## Tables
12 | 
13 | ### `run_time_entries`
14 | 
15 | ```sql title="Schema"
16 | CREATE TABLE run_time_entries (
17 |   id INTEGER PRIMARY KEY,
18 |   operation_name TEXT NOT NULL,
19 |   forward_ms REAL NOT NULL,
20 |   backward_ms REAL
21 | );
22 | ```
23 | 
24 | This table holds the measured run time(s) of each tracked operation. Each entry in this table represents one operation *instance* (i.e. one invocation of an operation). The columns in this table are self-explanatory.
25 | 
26 | **NOTE:** DeepView.Profile reports run times in milliseconds.
27 | 
28 | **Backward Pass.**
29 | Note that not every operation is necessarily involved in the backward pass. When an operation is not in the backward pass, `backward_ms` will be `NULL`.
30 | 
31 | 
32 | ### `stack_frames`
33 | 
34 | ```sql title="Schema"
35 | CREATE TABLE stack_frames (
36 |   ordering INTEGER NOT NULL,
37 |   file_path TEXT NOT NULL,
38 |   line_number INTEGER NOT NULL,
39 |   entry_id INTEGER NOT NULL,
40 |   PRIMARY KEY (entry_id, ordering)
41 | );
42 | ```
43 | 
44 | This table holds the stack frames associated with each tracked operation. The `entry_id` column is a foreign key that references the `id` column in `run_time_entries`.
45 | 
46 | **NOTE** DeepView.Profile does not add an explicit foreign key constraint to the `entry_id` column.
47 | 
48 | **Ordering.**
49 | There may be multiple stack frames associated with any given tracked operation (i.e. any given `entry_id`). The `ordering` column is used to keep track of the ordering among stack frames that share the same `entry_id`. When sorted in ascending order by the `ordering` column, the stack frames will be ordered from most-specific (i.e. *closest* to the operation's call site) to least-specific (i.e. *farthest* from the operation's call site).
50 | 


--------------------------------------------------------------------------------
/examples/densenet/LICENSE:
--------------------------------------------------------------------------------
 1 | NOTE: This license and disclaimer applies only to the "resnet.py" file in this
 2 | directory.
 3 | 
 4 | BSD 3-Clause License
 5 | 
 6 | Copyright (c) Soumith Chintala 2016,
 7 | All rights reserved.
 8 | 
 9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions are met:
11 | 
12 | * Redistributions of source code must retain the above copyright notice, this
13 |   list of conditions and the following disclaimer.
14 | 
15 | * Redistributions in binary form must reproduce the above copyright notice,
16 |   this list of conditions and the following disclaimer in the documentation
17 |   and/or other materials provided with the distribution.
18 | 
19 | * Neither the name of the copyright holder nor the names of its
20 |   contributors may be used to endorse or promote products derived from
21 |   this software without specific prior written permission.
22 | 
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
27 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 | 


--------------------------------------------------------------------------------
/examples/densenet/entry_point.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import densenet
 4 | 
 5 | 
 6 | def deepview_model_provider():
 7 |     return densenet.densenet121().cuda()
 8 | 
 9 | 
10 | def deepview_input_provider(batch_size=16):
11 |     return (
12 |         torch.randn((batch_size, 3, 224, 224)).cuda(),
13 |         torch.randint(low=0, high=1000, size=(batch_size,)).cuda(),
14 |     )
15 | 
16 | 
17 | def deepview_iteration_provider(model):
18 |     optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
19 |     def iteration(*inputs):
20 |         optimizer.zero_grad()
21 |         out = model(*inputs)
22 |         out.backward()
23 |         optimizer.step()
24 |     return iteration
25 | 


--------------------------------------------------------------------------------
/examples/gnmt/README.md:
--------------------------------------------------------------------------------
 1 | # GNMT (Google Neural Machine Translation) Model
 2 | 
 3 | This directory contains an implementation of GNMT that was adapted from the
 4 | code found in the [MLPerf training repository](https://github.com/mlperf/training/tree/master/rnn_translator).
 5 | 
 6 | To launch an interactive DeepView.Profile session for GNMT, run
 7 | ```
 8 | deepview interactive entry_point.py
 9 | ```
10 | 
11 | 
12 | ## License
13 | 
14 | This code, with the exception of the `deepview_` prefixed functions in
15 | `entry_point.py`, was adapted from the MLPerf training benchmarks and therefore
16 | shares the same license. The unmodified license can be found in the `LICENSE`
17 | file in the `seq2seq` directory.
18 | 


--------------------------------------------------------------------------------
/examples/gnmt/seq2seq/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Elad Hoffer
 4 | Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/examples/gnmt/seq2seq/data/config.py:
--------------------------------------------------------------------------------
 1 | PAD_TOKEN = '<pad>'
 2 | UNK_TOKEN = '<unk>'
 3 | BOS_TOKEN = '<s>'
 4 | EOS_TOKEN = '<\s>'
 5 | 
 6 | # special PAD, UNKNOWN, BEGIN-OF-STRING, END-OF-STRING tokens
 7 | PAD, UNK, BOS, EOS = [0, 1, 2, 3]
 8 | 
 9 | # path to the BPE vocabulary file, relative to the data directory, it should
10 | # point to file generated by subword-nmt/get_vocab.py
11 | VOCAB_FNAME = 'vocab.bpe.32000'
12 | 
13 | # paths to source and target training files, relative to the data directory, it
14 | # should point to BPE-encoded files, generated by subword-nmt/apply_bpe.py
15 | SRC_TRAIN_FNAME = 'train.tok.clean.bpe.32000.en'
16 | TGT_TRAIN_FNAME = 'train.tok.clean.bpe.32000.de'
17 | 
18 | # paths to source and target validation files, relative to the data directory,
19 | # it should point to BPE-encoded files, generated by subword-nmt/apply_bpe.py
20 | SRC_VAL_FNAME = 'newstest_dev.tok.clean.bpe.32000.en'
21 | TGT_VAL_FNAME = 'newstest_dev.tok.clean.bpe.32000.de'
22 | 
23 | # path to the test source file, relative to the data directory, it should point
24 | # to BPE-encoded file, generated by subword-nmt/apply_bpe.py
25 | SRC_TEST_FNAME = 'newstest2014.tok.bpe.32000.en'
26 | 
27 | # path to the test target file, relative to the data directory, it should point
28 | # to plaintext file, tokenization is performed by the sacrebleu package
29 | TGT_TEST_TARGET_FNAME = 'newstest2014.de'
30 | 
31 | # path to the moses detokenizer, relative to the data directory
32 | DETOKENIZER = 'mosesdecoder/scripts/tokenizer/detokenizer.perl'
33 | 


--------------------------------------------------------------------------------
/examples/gnmt/seq2seq/data/tokenizer.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from collections import defaultdict
  3 | from functools import partial
  4 | 
  5 | import seq2seq.data.config as config
  6 | 
  7 | 
  8 | class Tokenizer:
  9 |     """
 10 |     Tokenizer class.
 11 |     """
 12 |     def __init__(self, vocab_fname=None, pad=1, separator='@@'):
 13 |         """
 14 |         Constructor for the Tokenizer class.
 15 | 
 16 |         :param vocab_fname: path to the file with vocabulary
 17 |         :param pad: pads vocabulary to a multiple of 'pad' tokens
 18 |         :param separator: tokenization separator
 19 |         """
 20 |         if vocab_fname:
 21 |             self.separator = separator
 22 | 
 23 |             logging.info(f'Building vocabulary from {vocab_fname}')
 24 |             vocab = [config.PAD_TOKEN, config.UNK_TOKEN,
 25 |                      config.BOS_TOKEN, config.EOS_TOKEN]
 26 | 
 27 |             with open(vocab_fname) as vfile:
 28 |                 for line in vfile:
 29 |                     vocab.append(line.strip())
 30 | 
 31 |             self.pad_vocabulary(vocab, pad)
 32 | 
 33 |             self.vocab_size = len(vocab)
 34 |             logging.info(f'Size of vocabulary: {self.vocab_size}')
 35 | 
 36 |             self.tok2idx = defaultdict(partial(int, config.UNK))
 37 |             for idx, token in enumerate(vocab):
 38 |                 self.tok2idx[token] = idx
 39 | 
 40 |             self.idx2tok = {}
 41 |             for key, value in self.tok2idx.items():
 42 |                 self.idx2tok[value] = key
 43 | 
 44 |     def pad_vocabulary(self, vocab, pad):
 45 |         """
 46 |         Pads vocabulary to a multiple of 'pad' tokens.
 47 | 
 48 |         :param vocab: list with vocabulary
 49 |         :param pad: integer
 50 |         """
 51 |         vocab_size = len(vocab)
 52 |         padded_vocab_size = (vocab_size + pad - 1) // pad * pad
 53 |         for i in range(0, padded_vocab_size - vocab_size):
 54 |             token = f'madeupword{i:04d}'
 55 |             vocab.append(token)
 56 |         assert len(vocab) % pad == 0
 57 | 
 58 |     def get_state(self):
 59 |         logging.info('Saving state of the tokenizer')
 60 |         state = {
 61 |             'separator': self.separator,
 62 |             'vocab_size': self.vocab_size,
 63 |             'tok2idx': self.tok2idx,
 64 |             'idx2tok': self.idx2tok,
 65 |         }
 66 |         return state
 67 | 
 68 |     def set_state(self, state):
 69 |         logging.info('Restoring state of the tokenizer')
 70 |         self.separator = state['separator']
 71 |         self.vocab_size = state['vocab_size']
 72 |         self.tok2idx = state['tok2idx']
 73 |         self.idx2tok = state['idx2tok']
 74 | 
 75 |     def segment(self, line):
 76 |         """
 77 |         Tokenizes single sentence and adds special BOS and EOS tokens.
 78 | 
 79 |         :param line: sentence
 80 | 
 81 |         returns: list representing tokenized sentence
 82 |         """
 83 |         line = line.strip().split()
 84 |         entry = [self.tok2idx[i] for i in line]
 85 |         entry = [config.BOS] + entry + [config.EOS]
 86 |         return entry
 87 | 
 88 |     def detokenize(self, inputs, delim=' '):
 89 |         """
 90 |         Detokenizes single sentence and removes token separator characters.
 91 | 
 92 |         :param inputs: sequence of tokens
 93 |         :param delim: tokenization delimiter
 94 | 
 95 |         returns: string representing detokenized sentence
 96 |         """
 97 |         detok = delim.join([self.idx2tok[idx] for idx in inputs])
 98 |         detok = detok.replace(self.separator + ' ', '')
 99 |         detok = detok.replace(self.separator, '')
100 | 
101 |         detok = detok.replace(config.BOS_TOKEN, '')
102 |         detok = detok.replace(config.EOS_TOKEN, '')
103 |         detok = detok.replace(config.PAD_TOKEN, '')
104 |         detok = detok.strip()
105 |         return detok
106 | 


--------------------------------------------------------------------------------
/examples/gnmt/seq2seq/models/encoder.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.nn.utils.rnn import pack_padded_sequence
 3 | from torch.nn.utils.rnn import pad_packed_sequence
 4 | 
 5 | import seq2seq.data.config as config
 6 | from seq2seq.utils import init_lstm_
 7 | 
 8 | 
 9 | class ResidualRecurrentEncoder(nn.Module):
10 |     """
11 |     Encoder with Embedding, LSTM layers, residual connections and optional
12 |     dropout.
13 | 
14 |     The first LSTM layer is bidirectional and uses variable sequence length
15 |     API, the remaining (num_layers-1) layers are unidirectional. Residual
16 |     connections are enabled after third LSTM layer, dropout is applied on
17 |     inputs to LSTM layers.
18 |     """
19 |     def __init__(self, vocab_size, hidden_size=1024, num_layers=4, dropout=0.2,
20 |                  batch_first=False, embedder=None, init_weight=0.1):
21 |         """
22 |         Constructor for the ResidualRecurrentEncoder.
23 | 
24 |         :param vocab_size: size of vocabulary
25 |         :param hidden_size: hidden size for LSTM layers
26 |         :param num_layers: number of LSTM layers, 1st layer is bidirectional
27 |         :param dropout: probability of dropout (on input to LSTM layers)
28 |         :param batch_first: if True the model uses (batch,seq,feature) tensors,
29 |             if false the model uses (seq, batch, feature)
30 |         :param embedder: instance of nn.Embedding, if None constructor will
31 |             create new embedding layer
32 |         :param init_weight: range for the uniform initializer
33 |         """
34 |         super(ResidualRecurrentEncoder, self).__init__()
35 |         self.batch_first = batch_first
36 |         self.rnn_layers = nn.ModuleList()
37 |         # 1st LSTM layer, bidirectional
38 |         self.rnn_layers.append(
39 |             nn.LSTM(hidden_size, hidden_size, num_layers=1, bias=True,
40 |                     batch_first=batch_first, bidirectional=True))
41 | 
42 |         # 2nd LSTM layer, with 2x larger input_size
43 |         self.rnn_layers.append(
44 |             nn.LSTM((2 * hidden_size), hidden_size, num_layers=1, bias=True,
45 |                     batch_first=batch_first))
46 | 
47 |         # Remaining LSTM layers
48 |         for _ in range(num_layers - 2):
49 |             self.rnn_layers.append(
50 |                 nn.LSTM(hidden_size, hidden_size, num_layers=1, bias=True,
51 |                         batch_first=batch_first))
52 | 
53 |         for lstm in self.rnn_layers:
54 |             init_lstm_(lstm, init_weight)
55 | 
56 |         self.dropout = nn.Dropout(p=dropout)
57 | 
58 |         if embedder is not None:
59 |             self.embedder = embedder
60 |         else:
61 |             self.embedder = nn.Embedding(vocab_size, hidden_size,
62 |                                          padding_idx=config.PAD)
63 |             nn.init.uniform_(self.embedder.weight.data, -init_weight, init_weight)
64 | 
65 |     def forward(self, inputs, lengths):
66 |         """
67 |         Execute the encoder.
68 | 
69 |         :param inputs: tensor with indices from the vocabulary
70 |         :param lengths: vector with sequence lengths (excluding padding)
71 | 
72 |         returns: tensor with encoded sequences
73 |         """
74 |         x = self.embedder(inputs)
75 | 
76 |         # bidirectional layer
77 |         x = self.dropout(x)
78 |         x = pack_padded_sequence(x, lengths.cpu().numpy(),
79 |                                  batch_first=self.batch_first)
80 |         x, _ = self.rnn_layers[0](x)
81 |         x, _ = pad_packed_sequence(x, batch_first=self.batch_first)
82 | 
83 |         # 1st unidirectional layer
84 |         x = self.dropout(x)
85 |         x, _ = self.rnn_layers[1](x)
86 | 
87 |         # the rest of unidirectional layers,
88 |         # with residual connections starting from 3rd layer
89 |         for i in range(2, len(self.rnn_layers)):
90 |             residual = x
91 |             x = self.dropout(x)
92 |             x, _ = self.rnn_layers[i](x)
93 |             x = x + residual
94 | 
95 |         return x
96 | 


--------------------------------------------------------------------------------
/examples/gnmt/seq2seq/models/gnmt.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | import seq2seq.data.config as config
 4 | from seq2seq.models.decoder import ResidualRecurrentDecoder
 5 | from seq2seq.models.encoder import ResidualRecurrentEncoder
 6 | from seq2seq.models.seq2seq_base import Seq2Seq
 7 | 
 8 | 
 9 | class GNMT(Seq2Seq):
10 |     """
11 |     GNMT v2 model
12 |     """
13 |     def __init__(self, vocab_size, hidden_size=1024, num_layers=4, dropout=0.2,
14 |                  batch_first=False, share_embedding=True):
15 |         """
16 |         Constructor for the GNMT v2 model.
17 | 
18 |         :param vocab_size: size of vocabulary (number of tokens)
19 |         :param hidden_size: internal hidden size of the model
20 |         :param num_layers: number of layers, applies to both encoder and
21 |             decoder
22 |         :param dropout: probability of dropout (in encoder and decoder)
23 |         :param batch_first: if True the model uses (batch,seq,feature) tensors,
24 |             if false the model uses (seq, batch, feature)
25 |         :param share_embedding: if True embeddings are shared between encoder
26 |             and decoder
27 |         """
28 | 
29 |         super(GNMT, self).__init__(batch_first=batch_first)
30 | 
31 |         if share_embedding:
32 |             embedder = nn.Embedding(vocab_size, hidden_size,
33 |                                     padding_idx=config.PAD)
34 |             nn.init.uniform_(embedder.weight.data, -0.1, 0.1)
35 |         else:
36 |             embedder = None
37 | 
38 |         self.encoder = ResidualRecurrentEncoder(vocab_size, hidden_size,
39 |                                                 num_layers, dropout,
40 |                                                 batch_first, embedder)
41 | 
42 |         self.decoder = ResidualRecurrentDecoder(vocab_size, hidden_size,
43 |                                                 num_layers, dropout,
44 |                                                 batch_first, embedder)
45 | 
46 |     def forward(self, input_encoder, input_enc_len, input_decoder):
47 |         context = self.encode(input_encoder, input_enc_len)
48 |         context = (context, input_enc_len, None)
49 |         output, _, _ = self.decode(input_decoder, context)
50 | 
51 |         return output
52 | 


--------------------------------------------------------------------------------
/examples/gnmt/seq2seq/models/seq2seq_base.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.nn.functional import log_softmax
 3 | 
 4 | 
 5 | class Seq2Seq(nn.Module):
 6 |     """
 7 |     Generic Seq2Seq module, with an encoder and a decoder.
 8 |     """
 9 |     def __init__(self, encoder=None, decoder=None, batch_first=False):
10 |         """
11 |         Constructor for the Seq2Seq module.
12 | 
13 |         :param encoder: encoder module
14 |         :param decoder: decoder module
15 |         :param batch_first: if True the model uses (batch, seq, feature)
16 |             tensors, if false the model uses (seq, batch, feature) tensors
17 |         """
18 |         super(Seq2Seq, self).__init__()
19 |         self.encoder = encoder
20 |         self.decoder = decoder
21 |         self.batch_first = batch_first
22 | 
23 |     def encode(self, inputs, lengths):
24 |         """
25 |         Applies the encoder to inputs with a given input sequence lengths.
26 | 
27 |         :param inputs: tensor with inputs (batch, seq_len) if 'batch_first'
28 |             else (seq_len, batch)
29 |         :param lengths: vector with sequence lengths (excluding padding)
30 |         """
31 |         return self.encoder(inputs, lengths)
32 | 
33 |     def decode(self, inputs, context, inference=False):
34 |         """
35 |         Applies the decoder to inputs, given the context from the encoder.
36 | 
37 |         :param inputs: tensor with inputs (batch, seq_len) if 'batch_first'
38 |             else (seq_len, batch)
39 |         :param context: context from the encoder
40 |         :param inference: if True inference mode, if False training mode
41 |         """
42 |         return self.decoder(inputs, context, inference)
43 | 
44 |     def generate(self, inputs, context, beam_size):
45 |         """
46 |         Autoregressive generator, works with SequenceGenerator class.
47 |         Executes decoder (in inference mode), applies log_softmax and topK for
48 |         inference with beam search decoding.
49 | 
50 |         :param inputs: tensor with inputs to the decoder
51 |         :param context: context from the encoder
52 |         :param beam_size: beam size for the generator
53 | 
54 |         returns: (words, logprobs, scores, new_context)
55 |             words: indices of topK tokens
56 |             logprobs: log probabilities of topK tokens
57 |             scores: scores from the attention module (for coverage penalty)
58 |             new_context: new decoder context, includes new hidden states for
59 |                 decoder RNN cells
60 |         """
61 |         logits, scores, new_context = self.decode(inputs, context, True)
62 |         logprobs = log_softmax(logits, dim=-1)
63 |         logprobs, words = logprobs.topk(beam_size, dim=-1)
64 |         return words, logprobs, scores, new_context
65 | 


--------------------------------------------------------------------------------
/examples/gnmt/seq2seq/train/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import math
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | 
 8 | def perhaps_convert_float(param, total):
 9 |     if isinstance(param, float):
10 |         param = int(param * total)
11 |     return param
12 | 
13 | 
14 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
15 |     """
16 |     Learning rate scheduler with exponential warmup and step decay.
17 |     """
18 |     def __init__(self, optimizer, iterations, warmup_steps=0,
19 |                  remain_steps=1.0, decay_interval=None, decay_steps=4,
20 |                  decay_factor=0.5, last_epoch=-1):
21 |         """
22 |         Constructor of WarmupMultiStepLR.
23 | 
24 |         Parameters: warmup_steps, remain_steps and decay_interval accept both
25 |         integers and floats as an input. Integer input is interpreted as
26 |         absolute index of iteration, float input is interpreted as a fraction
27 |         of total training iterations (epochs * steps_per_epoch).
28 | 
29 |         If decay_interval is None then the decay will happen at regulary spaced
30 |         intervals ('decay_steps' decays between iteration indices
31 |         'remain_steps' and 'iterations').
32 | 
33 |         :param optimizer: instance of optimizer
34 |         :param iterations: total number of training iterations
35 |         :param warmup_steps: number of warmup iterations
36 |         :param remain_steps: start decay at 'remain_steps' iteration
37 |         :param decay_interval: interval between LR decay steps
38 |         :param decay_steps: max number of decay steps
39 |         :param decay_factor: decay factor
40 |         :param last_epoch: the index of last iteration
41 |         """
42 | 
43 |         # iterations before learning rate reaches base LR
44 |         self.warmup_steps = perhaps_convert_float(warmup_steps, iterations)
45 | 
46 |         # iteration at which decay starts
47 |         self.remain_steps = perhaps_convert_float(remain_steps, iterations)
48 | 
49 |         # number of steps between each decay
50 |         if decay_interval is None:
51 |             # decay at regulary spaced intervals
52 |             decay_iterations = iterations - self.remain_steps
53 |             self.decay_interval = decay_iterations // (decay_steps)
54 |             self.decay_interval = max(self.decay_interval, 1)
55 |         else:
56 |             self.decay_interval = perhaps_convert_float(decay_interval,
57 |                                                         iterations)
58 | 
59 |         # multiplicative decay factor
60 |         self.decay_factor = decay_factor
61 | 
62 |         # max number of decay steps
63 |         self.decay_steps = decay_steps
64 | 
65 |         if self.warmup_steps > self.remain_steps:
66 |             logging.warn('warmup_steps should not be larger than '
67 |                          'remain_steps, setting warmup_steps=remain_steps')
68 |             self.warmup_steps = self.remain_steps
69 | 
70 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
71 | 
72 |     def get_lr(self):
73 |         if self.last_epoch <= self.warmup_steps:
74 |             # exponential lr warmup
75 |             if self.warmup_steps != 0:
76 |                 warmup_factor = math.exp(math.log(0.01) / self.warmup_steps)
77 |             else:
78 |                 warmup_factor = 1.0
79 |             inv_decay = warmup_factor ** (self.warmup_steps - self.last_epoch)
80 |             lr = [base_lr * inv_decay for base_lr in self.base_lrs]
81 | 
82 |         elif self.last_epoch >= self.remain_steps:
83 |             # step decay
84 |             decay_iter = self.last_epoch - self.remain_steps
85 |             num_decay_steps = decay_iter // self.decay_interval + 1
86 |             num_decay_steps = min(num_decay_steps, self.decay_steps)
87 |             lr = [
88 |                 base_lr * (self.decay_factor ** num_decay_steps)
89 |                 for base_lr in self.base_lrs
90 |                 ]
91 |         else:
92 |             # base lr
93 |             lr = [base_lr for base_lr in self.base_lrs]
94 |         return lr
95 | 


--------------------------------------------------------------------------------
/examples/gnmt/seq2seq/train/smoothing.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class LabelSmoothing(nn.Module):
 6 |     """
 7 |     NLL loss with label smoothing.
 8 |     """
 9 |     def __init__(self, padding_idx, smoothing=0.0):
10 |         """
11 |         Constructor for the LabelSmoothing module.
12 | 
13 |         :param padding_idx: index of the PAD token
14 |         :param smoothing: label smoothing factor
15 |         """
16 |         super(LabelSmoothing, self).__init__()
17 |         self.padding_idx = padding_idx
18 |         self.confidence = 1.0 - smoothing
19 |         self.smoothing = smoothing
20 | 
21 |     def forward(self, x, target):
22 |         logprobs = torch.nn.functional.log_softmax(x, dim=-1,
23 |                                                    dtype=torch.float32)
24 | 
25 |         non_pad_mask = (target != self.padding_idx)
26 |         nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
27 |         nll_loss = nll_loss.squeeze(1)[non_pad_mask]
28 |         smooth_loss = -logprobs.mean(dim=-1)[non_pad_mask]
29 |         loss = self.confidence * nll_loss + self.smoothing * smooth_loss
30 |         return loss.sum()
31 | 


--------------------------------------------------------------------------------
/examples/huggingface/entry_point.py:
--------------------------------------------------------------------------------
 1 | from transformers import (
 2 |     get_linear_schedule_with_warmup,
 3 |     AutoModelForCausalLM,
 4 |     Trainer,
 5 | )
 6 | import torch
 7 | import torch.optim as optim
 8 | 
 9 | model_id = "roberta-base"
10 | 
11 | 
12 | def deepview_model_provider():
13 |     return AutoModelForCausalLM.from_pretrained(model_id, is_decoder=True).cuda()
14 | 
15 | 
16 | def deepview_input_provider(batch_size=2):
17 |     vocab_size = 30522
18 |     src_seq_len = 512
19 |     tgt_seq_len = 512 
20 | 
21 |     device = torch.device("cuda")
22 | 
23 |     source = torch.randint(
24 |         low=0,
25 |         high=vocab_size,
26 |         size=(batch_size, src_seq_len),
27 |         dtype=torch.int64,
28 |         device=device,
29 |     )
30 |     target = torch.randint(
31 |         low=0,
32 |         high=vocab_size,
33 |         size=(batch_size, tgt_seq_len),
34 |         dtype=torch.int64,
35 |         device=device,
36 |     )
37 |     return (source, target)
38 | 
39 | 
40 | def deepview_iteration_provider(model):
41 |     model.parameters()
42 |     optimizer = optim.AdamW(
43 |         params=model.parameters(),
44 |         betas=(0.9, 0.999),
45 |         eps=1e-6,
46 |         weight_decay=0.01,
47 |         lr=1e-4,
48 |     )
49 |     scheduler = get_linear_schedule_with_warmup(optimizer, 10000, 500000)
50 |     trainer = Trainer(model=model, optimizers=(optimizer, scheduler))
51 | 
52 |     def iteration(source, label):
53 |         trainer.training_step(model, {"input_ids": source, "labels": label})
54 | 
55 |     return iteration


--------------------------------------------------------------------------------
/examples/legacy/lenet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class LeNet(nn.Module):
 5 |     def __init__(self):
 6 |         super(LeNet, self).__init__()
 7 |         self.conv1 = nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5)
 8 |         self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=5)
 9 |         self.dense1 = nn.Linear(in_features=1250, out_features=500)
10 |         self.dense2 = nn.Linear(in_features=500, out_features=10)
11 |         self.tanh = nn.Tanh()
12 |         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
13 |         self.softmax = nn.LogSoftmax(dim=1)
14 | 
15 |     def forward(self, input):
16 |         """
17 |         LeNet for CIFAR-10
18 |         @innpv size (512, 3, 32, 32)
19 |         """
20 |         output = self.conv1(input)
21 |         output = self.tanh(output)
22 |         output = self.pool(output)
23 | 
24 |         output = self.conv2(output)
25 |         output = self.tanh(output)
26 |         output = self.pool(output)
27 | 
28 |         output = output.view(-1, 1250)
29 | 
30 |         output = self.dense1(output)
31 |         output = self.tanh(output)
32 |         output = self.dense2(output)
33 |         output = self.softmax(output)
34 | 
35 |         return output
36 | 


--------------------------------------------------------------------------------
/examples/legacy/testnet2.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class TestNet(nn.Module):
 5 |     def __init__(self):
 6 |         super(TestNet, self).__init__()
 7 |         self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3)
 8 |         self.bn1 = nn.BatchNorm2d(num_features=64)
 9 |         self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
10 |         self.bn2 = nn.BatchNorm2d(num_features=128)
11 |         self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3)
12 |         self.bn3 = nn.BatchNorm2d(num_features=256)
13 |         self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3)
14 |         self.bn4 = nn.BatchNorm2d(num_features=512)
15 | 
16 |         self.linear = nn.Linear(in_features=4608, out_features=1000)
17 |         self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
18 |         self.max_pool2 = nn.MaxPool2d(kernel_size=4, stride=4)
19 |         self.relu = nn.ReLU()
20 | 
21 |     def forward(self, input):
22 |         """
23 |         @innpv size (16, 3, 128, 128)
24 |         """
25 |         output = self.conv1(input)
26 |         output = self.bn1(output)
27 |         output = self.relu(output)
28 |         output = self.max_pool(output)
29 | 
30 |         output = self.conv2(output)
31 |         output = self.bn2(output)
32 |         output = self.relu(output)
33 |         output = self.max_pool(output)
34 | 
35 |         output = self.conv3(output)
36 |         output = self.bn3(output)
37 |         output = self.relu(output)
38 |         output = self.max_pool(output)
39 | 
40 |         output = self.conv4(output)
41 |         output = self.bn4(output)
42 |         output = self.relu(output)
43 |         output = self.max_pool2(output)
44 | 
45 |         output = output.view(output.size(0), -1)
46 |         output = self.linear(output)
47 | 
48 |         return output
49 | 


--------------------------------------------------------------------------------
/examples/legacy/vgg11.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class VGG11(nn.Module):
 5 |     def __init__(self):
 6 |         super(VGG11, self).__init__()
 7 |         self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
 8 |         self.bn1 = nn.BatchNorm2d(64)
 9 | 
10 |         self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
11 |         self.bn2 = nn.BatchNorm2d(128)
12 | 
13 |         self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
14 |         self.bn3 = nn.BatchNorm2d(256)
15 |         self.conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
16 |         self.bn4 = nn.BatchNorm2d(256)
17 | 
18 |         self.conv5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
19 |         self.bn5 = nn.BatchNorm2d(512)
20 |         self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
21 |         self.bn6 = nn.BatchNorm2d(512)
22 | 
23 |         self.conv7 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
24 |         self.bn7 = nn.BatchNorm2d(512)
25 |         self.conv8 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
26 |         self.bn8 = nn.BatchNorm2d(512)
27 | 
28 |         self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
29 |         self.relu = nn.ReLU()
30 |         self.linear = nn.Linear(in_features=512, out_features=10)
31 | 
32 |     def forward(self, input):
33 |         """
34 |         VGG-11 for CIFAR-10
35 |         @innpv size (32, 3, 32, 32)
36 |         """
37 |         output = self.conv1(input)
38 |         output = self.bn1(output)
39 |         output = self.relu(output)
40 |         output = self.max_pool(output)
41 | 
42 |         output = self.conv2(output)
43 |         output = self.bn2(output)
44 |         output = self.relu(output)
45 |         output = self.max_pool(output)
46 | 
47 |         output = self.conv3(output)
48 |         output = self.bn3(output)
49 |         output = self.relu(output)
50 |         output = self.conv4(output)
51 |         output = self.bn4(output)
52 |         output = self.relu(output)
53 |         output = self.max_pool(output)
54 | 
55 |         output = self.conv5(output)
56 |         output = self.bn5(output)
57 |         output = self.relu(output)
58 |         output = self.conv6(output)
59 |         output = self.bn6(output)
60 |         output = self.relu(output)
61 |         output = self.max_pool(output)
62 | 
63 |         output = self.conv7(output)
64 |         output = self.bn7(output)
65 |         output = self.relu(output)
66 |         output = self.conv8(output)
67 |         output = self.bn8(output)
68 |         output = self.relu(output)
69 |         output = self.max_pool(output)
70 | 
71 |         output = output.view(-1, 512)
72 |         output = self.linear(output)
73 | 
74 |         return output
75 | 


--------------------------------------------------------------------------------
/examples/nanogpt/entry_point.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from model import GPTConfig, GPT
 6 | 
 7 | # Batch size.
 8 | block_size = 32
 9 | device = "cuda" if torch.cuda.is_available() else "cpu"
10 | 
11 | # model
12 | n_layer = 16  
13 | n_head = 16  
14 | n_embd = 512  
15 | dropout = 0.0
16 | vocab_size = 65
17 | bias = False
18 | 
19 | # Adamw optimizer
20 | learning_rate = 6e-4
21 | weight_decay = 1e-1
22 | beta1 = 0.9
23 | beta2 = 0.95
24 | 
25 | 
26 | # optimizer
27 | def configure_optimizer(model, weight_decay, learning_rate, betas):
28 |     param_dict = {pn: p for pn, p in model.named_parameters()}
29 |     param_dict = {pn: p for pn, p in param_dict.items() if p.requires_grad}
30 |     decay_params = [p for n, p in param_dict.items() if p.dim() >= 2]
31 |     nodecay_params = [p for n, p in param_dict.items() if p.dim() < 2]
32 |     optim_groups = [
33 |         {"params": decay_params, "weight_decay": weight_decay},
34 |         {"params": nodecay_params, "weight_decay": 0.0},
35 |     ]
36 |     optimizer = torch.optim.AdamW(optim_groups, lr=learning_rate, betas=betas)
37 | 
38 |     return optimizer
39 | 
40 | def deepview_model_provider():
41 |     # model init
42 |     # ---------------------------------------------
43 |     # Enable flash attention
44 |     enable_flash_attention = False
45 |     model_args = dict(
46 |         n_layer=n_layer,
47 |         n_head=n_head,
48 |         n_embd=n_embd,
49 |         block_size=block_size,
50 |         bias=bias,
51 |         vocab_size=vocab_size,
52 |         dropout=dropout,
53 |         enable_flash_attention=enable_flash_attention,
54 |     )
55 |     gptconf = GPTConfig(**model_args)
56 |     model = GPT(gptconf)
57 |     return model.to(device)
58 | 
59 | 
60 | def deepview_input_provider(batch_size=48):
61 |     data = np.random.randint(vocab_size, size=(batch_size, block_size + 1))
62 |     x = torch.stack(
63 |         [torch.from_numpy((data[i, :-1]).astype(np.int64)) for i in range(batch_size)]
64 |     )
65 |     y = torch.stack(
66 |         [torch.from_numpy((data[i, 1:]).astype(np.int64)) for i in range(batch_size)]
67 |     )
68 | 
69 |     return (x.to(device), y.to(device))
70 | 
71 | 
72 | def deepview_iteration_provider(model):
73 |     criterion = nn.CrossEntropyLoss()
74 |     optimizer = torch.optim.AdamW(
75 |         model.parameters(), lr=learning_rate, betas=(beta1, beta2)
76 |     )
77 | 
78 |     def iteration(inputs, targets):
79 |         optimizer.zero_grad()
80 |         outputs = model(inputs)
81 |         loss = criterion(outputs.view(-1, outputs.size(-1)), targets.view(-1))
82 |         loss.backward()
83 |         optimizer.step()
84 | 
85 |     return iteration


--------------------------------------------------------------------------------
/examples/pytorch_lightning/example.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.utils.data import DataLoader
 4 | from torchvision import datasets, transforms, models
 5 | import pytorch_lightning as pl
 6 | 
 7 | from deepview_profile.pl.deepview_callback import DeepViewProfilerCallback
 8 | 
 9 | class ResNetModel(pl.LightningModule):
10 |     def __init__(self, num_classes=10, learning_rate=1e-3):
11 |         super(ResNetModel, self).__init__()
12 |         self.model = models.resnet18(pretrained=True)
13 |         self.model.conv1 = nn.Conv2d(
14 |             1, 64, kernel_size=7, stride=2, padding=3, bias=False
15 |         )
16 |         self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
17 |         self.learning_rate = learning_rate
18 |         self.criterion = nn.CrossEntropyLoss()
19 | 
20 |     def forward(self, x):
21 |         return self.model(x)
22 |     
23 |     def training_step(self, batch, batch_idx):
24 |         x, y = batch
25 |         y_hat = self(x)
26 |         loss = torch.nn.functional.cross_entropy(y_hat, y)
27 |         return loss
28 |     
29 |     def validation_step(self, batch, batch_idx):
30 |         x, y = batch
31 |         y_hat = self(x)
32 |         loss = torch.nn.functional.cross_entropy(y_hat, y)
33 |         acc = (y_hat.argmax(dim=1) == y).float().mean()
34 |         self.log('val_loss', loss)
35 |         self.log('val_acc', acc)
36 |     
37 |     def configure_optimizers(self):
38 |         optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
39 |         return optimizer
40 | 
41 | def mnist_dataloader(batch_size=32):
42 |     transform = transforms.Compose([transforms.Resize(224), 
43 |                                     transforms.ToTensor(),
44 |                                     transforms.Normalize((0.1307,), (0.3081,))])
45 |     
46 |     mnist_train = datasets.MNIST(root='mnist_data', train=True, download=True, transform=transform)
47 |     mnist_val = datasets.MNIST(root='mnist_data', train=False, download=True, transform=transform)
48 |     
49 |     train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
50 |     val_loader = DataLoader(mnist_val, batch_size=batch_size)
51 |     
52 |     return train_loader, val_loader
53 | 
54 | if __name__ == '__main__':
55 |     train_loader, val_loader = mnist_dataloader(batch_size=16)
56 |     model = ResNetModel()
57 | 
58 |     dv_callback = DeepViewProfilerCallback("example")
59 | 
60 |     trainer = pl.Trainer(
61 |         max_epochs=2, accelerator='gpu', devices=1,
62 |         callbacks=[dv_callback]
63 |     )
64 |     trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)


--------------------------------------------------------------------------------
/examples/resnet/LICENSE:
--------------------------------------------------------------------------------
 1 | NOTE: This license and disclaimer applies only to the "resnet.py" file in this
 2 | directory.
 3 | 
 4 | BSD 3-Clause License
 5 | 
 6 | Copyright (c) Soumith Chintala 2016,
 7 | All rights reserved.
 8 | 
 9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions are met:
11 | 
12 | * Redistributions of source code must retain the above copyright notice, this
13 |   list of conditions and the following disclaimer.
14 | 
15 | * Redistributions in binary form must reproduce the above copyright notice,
16 |   this list of conditions and the following disclaimer in the documentation
17 |   and/or other materials provided with the distribution.
18 | 
19 | * Neither the name of the copyright holder nor the names of its
20 |   contributors may be used to endorse or promote products derived from
21 |   this software without specific prior written permission.
22 | 
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
27 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 | 


--------------------------------------------------------------------------------
/examples/resnet/entry_point.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import resnet
 4 | 
 5 | 
 6 | def deepview_model_provider():
 7 |     return resnet.resnet50().cuda()
 8 | 
 9 | 
10 | def deepview_input_provider(batch_size=16):
11 |     return (
12 |         torch.randn((batch_size, 3, 224, 224)).cuda(),
13 |         torch.randint(low=0, high=1000, size=(batch_size,)).cuda(),
14 |     )
15 | 
16 | 
17 | def deepview_iteration_provider(model):
18 |     optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
19 |     loss_fn = torch.nn.CrossEntropyLoss()
20 |     def iteration(inputs, targets):
21 |         optimizer.zero_grad()
22 |         out = model(inputs)
23 |         loss = loss_fn(out, targets)
24 |         loss.backward()
25 |         optimizer.step()
26 |     return iteration
27 | 


--------------------------------------------------------------------------------
/examples/resnet/entry_point_resnext.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import resnet
 4 | 
 5 | 
 6 | def deepview_model_provider():
 7 |     return resnet.resnext50_32x4d().cuda()
 8 | 
 9 | 
10 | def deepview_input_provider(batch_size=16):
11 |     return (
12 |         torch.randn((batch_size, 3, 224, 224)).cuda(),
13 |         torch.randint(low=0, high=1000, size=(batch_size,)).cuda(),
14 |     )
15 | 
16 | 
17 | def deepview_iteration_provider(model):
18 |     optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
19 |     def iteration(*inputs):
20 |         optimizer.zero_grad()
21 |         out = model(*inputs)
22 |         out.backward()
23 |         optimizer.step()
24 |     return iteration
25 | 


--------------------------------------------------------------------------------
/examples/testnet/entry_point.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | import testnet1
 5 | 
 6 | 
 7 | class TestNetWithLoss(nn.Module):
 8 |     def __init__(self):
 9 |         super().__init__()
10 |         self.testnet = testnet1.TestNet()
11 | 
12 |     def forward(self, input):
13 |         return self.testnet(input).sum()
14 | 
15 | 
16 | def deepview_model_provider():
17 |     return TestNetWithLoss().cuda()
18 | 
19 | 
20 | def deepview_input_provider(batch_size=32):
21 |     return (torch.randn((batch_size, 3, 128, 128)).cuda(),)
22 | 
23 | 
24 | def deepview_iteration_provider(model):
25 |     optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
26 |     def iteration(*inputs):
27 |         optimizer.zero_grad()
28 |         out = model(*inputs)
29 |         out.backward()
30 |         optimizer.step()
31 |     return iteration
32 | 


--------------------------------------------------------------------------------
/examples/testnet/testnet1.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class TestNet(nn.Module):
 5 |     def __init__(self):
 6 |         super(TestNet, self).__init__()
 7 |         self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3)
 8 |         self.bn1 = nn.BatchNorm2d(num_features=64)
 9 |         self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
10 |         self.bn2 = nn.BatchNorm2d(num_features=128)
11 |         self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3)
12 |         self.bn3 = nn.BatchNorm2d(num_features=256)
13 |         self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3)
14 |         self.bn4 = nn.BatchNorm2d(num_features=512)
15 |         self.conv5 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3)
16 |         self.bn5 = nn.BatchNorm2d(num_features=1024)
17 | 
18 |         self.linear = nn.Linear(in_features=4096, out_features=1000)
19 |         self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
20 |         self.relu = nn.ReLU()
21 | 
22 |     def forward(self, input):
23 |         output = self.conv1(input)
24 |         output = self.bn1(output)
25 |         output = self.relu(output)
26 |         output = self.max_pool(output)
27 | 
28 |         output = self.conv2(output)
29 |         output = self.bn2(output)
30 |         output = self.relu(output)
31 |         output = self.max_pool(output)
32 | 
33 |         output = self.conv3(output)
34 |         output = self.bn3(output)
35 |         output = self.relu(output)
36 |         output = self.max_pool(output)
37 | 
38 |         output = self.conv4(output)
39 |         output = self.bn4(output)
40 |         output = self.relu(output)
41 |         output = self.max_pool(output)
42 | 
43 |         output = self.conv5(output)
44 |         output = self.bn5(output)
45 |         output = self.relu(output)
46 |         output = self.max_pool(output)
47 | 
48 |         output = output.view(output.size(0), -1)
49 |         output = self.linear(output)
50 | 
51 |         return output
52 | 


--------------------------------------------------------------------------------
/examples/transformer/transformer/Beam.py:
--------------------------------------------------------------------------------
  1 | """ Manage beam search info structure.
  2 | 
  3 |     Heavily borrowed from OpenNMT-py.
  4 |     For code in OpenNMT-py, please check the following link:
  5 |     https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/Beam.py
  6 | """
  7 | 
  8 | import torch
  9 | import transformer.Constants as Constants
 10 | 
 11 | class Beam():
 12 |     ''' Beam search '''
 13 | 
 14 |     def __init__(self, size, device=False):
 15 | 
 16 |         self.size = size
 17 |         self._done = False
 18 | 
 19 |         # The score for each translation on the beam.
 20 |         self.scores = torch.zeros((size,), dtype=torch.float, device=device)
 21 |         self.all_scores = []
 22 | 
 23 |         # The backpointers at each time-step.
 24 |         self.prev_ks = []
 25 | 
 26 |         # The outputs at each time-step.
 27 |         self.next_ys = [torch.full((size,), Constants.PAD, dtype=torch.long, device=device)]
 28 |         self.next_ys[0][0] = Constants.BOS
 29 | 
 30 |     def get_current_state(self):
 31 |         "Get the outputs for the current timestep."
 32 |         return self.get_tentative_hypothesis()
 33 | 
 34 |     def get_current_origin(self):
 35 |         "Get the backpointers for the current timestep."
 36 |         return self.prev_ks[-1]
 37 | 
 38 |     @property
 39 |     def done(self):
 40 |         return self._done
 41 | 
 42 |     def advance(self, word_prob):
 43 |         "Update beam status and check if finished or not."
 44 |         num_words = word_prob.size(1)
 45 | 
 46 |         # Sum the previous scores.
 47 |         if len(self.prev_ks) > 0:
 48 |             beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob)
 49 |         else:
 50 |             beam_lk = word_prob[0]
 51 | 
 52 |         flat_beam_lk = beam_lk.view(-1)
 53 | 
 54 |         best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 1st sort
 55 |         best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 2nd sort
 56 | 
 57 |         self.all_scores.append(self.scores)
 58 |         self.scores = best_scores
 59 | 
 60 |         # bestScoresId is flattened as a (beam x word) array,
 61 |         # so we need to calculate which word and beam each score came from
 62 |         prev_k = best_scores_id / num_words
 63 |         self.prev_ks.append(prev_k)
 64 |         self.next_ys.append(best_scores_id - prev_k * num_words)
 65 | 
 66 |         # End condition is when top-of-beam is EOS.
 67 |         if self.next_ys[-1][0].item() == Constants.EOS:
 68 |             self._done = True
 69 |             self.all_scores.append(self.scores)
 70 | 
 71 |         return self._done
 72 | 
 73 |     def sort_scores(self):
 74 |         "Sort the scores."
 75 |         return torch.sort(self.scores, 0, True)
 76 | 
 77 |     def get_the_best_score_and_idx(self):
 78 |         "Get the score of the best in the beam."
 79 |         scores, ids = self.sort_scores()
 80 |         return scores[1], ids[1]
 81 | 
 82 |     def get_tentative_hypothesis(self):
 83 |         "Get the decoded sequence for the current timestep."
 84 | 
 85 |         if len(self.next_ys) == 1:
 86 |             dec_seq = self.next_ys[0].unsqueeze(1)
 87 |         else:
 88 |             _, keys = self.sort_scores()
 89 |             hyps = [self.get_hypothesis(k) for k in keys]
 90 |             hyps = [[Constants.BOS] + h for h in hyps]
 91 |             dec_seq = torch.LongTensor(hyps)
 92 | 
 93 |         return dec_seq
 94 | 
 95 |     def get_hypothesis(self, k):
 96 |         """ Walk back to construct the full hypothesis. """
 97 |         hyp = []
 98 |         for j in range(len(self.prev_ks) - 1, -1, -1):
 99 |             hyp.append(self.next_ys[j+1][k])
100 |             k = self.prev_ks[j][k]
101 | 
102 |         return list(map(lambda x: x.item(), hyp[::-1]))
103 | 


--------------------------------------------------------------------------------
/examples/transformer/transformer/Constants.py:
--------------------------------------------------------------------------------
 1 | 
 2 | PAD = 0
 3 | UNK = 1
 4 | BOS = 2
 5 | EOS = 3
 6 | 
 7 | PAD_WORD = '<blank>'
 8 | UNK_WORD = '<unk>'
 9 | BOS_WORD = '<s>'
10 | EOS_WORD = '</s>'
11 | 


--------------------------------------------------------------------------------
/examples/transformer/transformer/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Victor Huang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/examples/transformer/transformer/Layers.py:
--------------------------------------------------------------------------------
 1 | ''' Define the Layers '''
 2 | import torch.nn as nn
 3 | from transformer.SubLayers import MultiHeadAttention, PositionwiseFeedForward
 4 | 
 5 | __author__ = "Yu-Hsiang Huang"
 6 | 
 7 | 
 8 | class EncoderLayer(nn.Module):
 9 |     ''' Compose with two layers '''
10 | 
11 |     def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
12 |         super(EncoderLayer, self).__init__()
13 |         self.slf_attn = MultiHeadAttention(
14 |             n_head, d_model, d_k, d_v, dropout=dropout)
15 |         self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
16 | 
17 |     def forward(self, enc_input, non_pad_mask=None, slf_attn_mask=None):
18 |         enc_output, enc_slf_attn = self.slf_attn(
19 |             enc_input, enc_input, enc_input, mask=slf_attn_mask)
20 |         enc_output *= non_pad_mask
21 | 
22 |         enc_output = self.pos_ffn(enc_output)
23 |         enc_output *= non_pad_mask
24 | 
25 |         return enc_output, enc_slf_attn
26 | 
27 | 
28 | class DecoderLayer(nn.Module):
29 |     ''' Compose with three layers '''
30 | 
31 |     def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
32 |         super(DecoderLayer, self).__init__()
33 |         self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
34 |         self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
35 |         self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
36 | 
37 |     def forward(self, dec_input, enc_output, non_pad_mask=None, slf_attn_mask=None, dec_enc_attn_mask=None):
38 |         dec_output, dec_slf_attn = self.slf_attn(
39 |             dec_input, dec_input, dec_input, mask=slf_attn_mask)
40 |         dec_output *= non_pad_mask
41 | 
42 |         dec_output, dec_enc_attn = self.enc_attn(
43 |             dec_output, enc_output, enc_output, mask=dec_enc_attn_mask)
44 |         dec_output *= non_pad_mask
45 | 
46 |         dec_output = self.pos_ffn(dec_output)
47 |         dec_output *= non_pad_mask
48 | 
49 |         return dec_output, dec_slf_attn, dec_enc_attn
50 | 


--------------------------------------------------------------------------------
/examples/transformer/transformer/Modules.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | 
 5 | __author__ = "Yu-Hsiang Huang"
 6 | 
 7 | class ScaledDotProductAttention(nn.Module):
 8 |     ''' Scaled Dot-Product Attention '''
 9 | 
10 |     def __init__(self, temperature, attn_dropout=0.1):
11 |         super().__init__()
12 |         self.temperature = temperature
13 |         self.dropout = nn.Dropout(attn_dropout)
14 |         self.softmax = nn.Softmax(dim=2)
15 | 
16 |     def forward(self, q, k, v, mask=None):
17 | 
18 |         attn = torch.bmm(q, k.transpose(1, 2))
19 |         attn = attn / self.temperature
20 | 
21 |         if mask is not None:
22 |             attn = attn.masked_fill(mask, -np.inf)
23 | 
24 |         attn = self.softmax(attn)
25 |         attn = self.dropout(attn)
26 |         output = torch.bmm(attn, v)
27 | 
28 |         return output, attn
29 | 


--------------------------------------------------------------------------------
/examples/transformer/transformer/Optim.py:
--------------------------------------------------------------------------------
 1 | '''A wrapper class for optimizer '''
 2 | import numpy as np
 3 | 
 4 | class ScheduledOptim():
 5 |     '''A simple wrapper class for learning rate scheduling'''
 6 | 
 7 |     def __init__(self, optimizer, d_model, n_warmup_steps):
 8 |         self._optimizer = optimizer
 9 |         self.n_warmup_steps = n_warmup_steps
10 |         self.n_current_steps = 0
11 |         self.init_lr = np.power(d_model, -0.5)
12 | 
13 |     def step_and_update_lr(self):
14 |         "Step with the inner optimizer"
15 |         self._update_learning_rate()
16 |         self._optimizer.step()
17 | 
18 |     def zero_grad(self):
19 |         "Zero out the gradients by the inner optimizer"
20 |         self._optimizer.zero_grad()
21 | 
22 |     def _get_lr_scale(self):
23 |         return np.min([
24 |             np.power(self.n_current_steps, -0.5),
25 |             np.power(self.n_warmup_steps, -1.5) * self.n_current_steps])
26 | 
27 |     def _update_learning_rate(self):
28 |         ''' Learning rate scheduling per step '''
29 | 
30 |         self.n_current_steps += 1
31 |         lr = self.init_lr * self._get_lr_scale()
32 | 
33 |         for param_group in self._optimizer.param_groups:
34 |             param_group['lr'] = lr
35 | 
36 | 


--------------------------------------------------------------------------------
/examples/transformer/transformer/README.md:
--------------------------------------------------------------------------------
 1 | Transformer Model (Attention is All You Need)
 2 | =============================================
 3 | This directory contains a PyTorch implementation of the Transformer model
 4 | described in the "[Attention is All You Need](https://arxiv.org/abs/1706.03762)"
 5 | paper. This code was adapted from Yu-Hsiang Huang's implementation found in
 6 | [jadore801120/attention-is-all-you-need-pytorch](https://github.com/jadore801120/attention-is-all-you-need-pytorch).
 7 | 
 8 | License
 9 | -------
10 | The code inside this directory is adapted from Yu-Hsiang Huang's implementation
11 | and therefore shares the same license. The unmodified license can be found in
12 | the `LICENSE` file.
13 | 


--------------------------------------------------------------------------------
/examples/transformer/transformer/SubLayers.py:
--------------------------------------------------------------------------------
 1 | ''' Define the sublayers in encoder/decoder layer '''
 2 | import numpy as np
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from transformer.Modules import ScaledDotProductAttention
 6 | 
 7 | __author__ = "Yu-Hsiang Huang"
 8 | 
 9 | class MultiHeadAttention(nn.Module):
10 |     ''' Multi-Head Attention module '''
11 | 
12 |     def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
13 |         super().__init__()
14 | 
15 |         self.n_head = n_head
16 |         self.d_k = d_k
17 |         self.d_v = d_v
18 | 
19 |         self.w_qs = nn.Linear(d_model, n_head * d_k)
20 |         self.w_ks = nn.Linear(d_model, n_head * d_k)
21 |         self.w_vs = nn.Linear(d_model, n_head * d_v)
22 |         nn.init.normal_(self.w_qs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k)))
23 |         nn.init.normal_(self.w_ks.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k)))
24 |         nn.init.normal_(self.w_vs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_v)))
25 | 
26 |         self.attention = ScaledDotProductAttention(temperature=np.power(d_k, 0.5))
27 |         self.layer_norm = nn.LayerNorm(d_model)
28 | 
29 |         self.fc = nn.Linear(n_head * d_v, d_model)
30 |         nn.init.xavier_normal_(self.fc.weight)
31 | 
32 |         self.dropout = nn.Dropout(dropout)
33 | 
34 | 
35 |     def forward(self, q, k, v, mask=None):
36 | 
37 |         d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
38 | 
39 |         sz_b, len_q, _ = q.size()
40 |         sz_b, len_k, _ = k.size()
41 |         sz_b, len_v, _ = v.size()
42 | 
43 |         residual = q
44 | 
45 |         q = self.w_qs(q).view(sz_b, len_q, n_head, d_k)
46 |         k = self.w_ks(k).view(sz_b, len_k, n_head, d_k)
47 |         v = self.w_vs(v).view(sz_b, len_v, n_head, d_v)
48 | 
49 |         q = q.permute(2, 0, 1, 3).contiguous().view(-1, len_q, d_k) # (n*b) x lq x dk
50 |         k = k.permute(2, 0, 1, 3).contiguous().view(-1, len_k, d_k) # (n*b) x lk x dk
51 |         v = v.permute(2, 0, 1, 3).contiguous().view(-1, len_v, d_v) # (n*b) x lv x dv
52 | 
53 |         mask = mask.repeat(n_head, 1, 1) # (n*b) x .. x ..
54 |         output, attn = self.attention(q, k, v, mask=mask)
55 | 
56 |         output = output.view(n_head, sz_b, len_q, d_v)
57 |         output = output.permute(1, 2, 0, 3).contiguous().view(sz_b, len_q, -1) # b x lq x (n*dv)
58 | 
59 |         output = self.dropout(self.fc(output))
60 |         output = self.layer_norm(output + residual)
61 | 
62 |         return output, attn
63 | 
64 | class PositionwiseFeedForward(nn.Module):
65 |     ''' A two-feed-forward-layer module '''
66 | 
67 |     def __init__(self, d_in, d_hid, dropout=0.1):
68 |         super().__init__()
69 |         self.w_1 = nn.Conv1d(d_in, d_hid, 1) # position-wise
70 |         self.w_2 = nn.Conv1d(d_hid, d_in, 1) # position-wise
71 |         self.layer_norm = nn.LayerNorm(d_in)
72 |         self.dropout = nn.Dropout(dropout)
73 | 
74 |     def forward(self, x):
75 |         residual = x
76 |         output = x.transpose(1, 2)
77 |         output = self.w_2(F.relu(self.w_1(output)))
78 |         output = output.transpose(1, 2)
79 |         output = self.dropout(output)
80 |         output = self.layer_norm(output + residual)
81 |         return output
82 | 


--------------------------------------------------------------------------------
/examples/transformer/transformer/__init__.py:
--------------------------------------------------------------------------------
 1 | import transformer.Constants
 2 | import transformer.Modules
 3 | import transformer.Layers
 4 | import transformer.SubLayers
 5 | import transformer.Models
 6 | import transformer.Translator
 7 | import transformer.Beam
 8 | import transformer.Optim
 9 | 
10 | __all__ = [
11 |     transformer.Constants, transformer.Modules, transformer.Layers,
12 |     transformer.SubLayers, transformer.Models, transformer.Optim,
13 |     transformer.Translator, transformer.Beam]
14 | 


--------------------------------------------------------------------------------
/examples/vgg/LICENSE:
--------------------------------------------------------------------------------
 1 | NOTE: This license and disclaimer applies only to the "resnet.py" file in this
 2 | directory.
 3 | 
 4 | BSD 3-Clause License
 5 | 
 6 | Copyright (c) Soumith Chintala 2016,
 7 | All rights reserved.
 8 | 
 9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions are met:
11 | 
12 | * Redistributions of source code must retain the above copyright notice, this
13 |   list of conditions and the following disclaimer.
14 | 
15 | * Redistributions in binary form must reproduce the above copyright notice,
16 |   this list of conditions and the following disclaimer in the documentation
17 |   and/or other materials provided with the distribution.
18 | 
19 | * Neither the name of the copyright holder nor the names of its
20 |   contributors may be used to endorse or promote products derived from
21 |   this software without specific prior written permission.
22 | 
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
27 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 | 


--------------------------------------------------------------------------------
/examples/vgg/entry_point.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import vgg
 4 | 
 5 | 
 6 | def deepview_model_provider():
 7 |     return vgg.vgg11().cuda()
 8 | 
 9 | 
10 | def deepview_input_provider(batch_size=16):
11 |     return (
12 |         torch.randn((batch_size, 3, 224, 224)).cuda(),
13 |         torch.randint(low=0, high=1000, size=(batch_size,)).cuda(),
14 |     )
15 | 
16 | 
17 | def deepview_iteration_provider(model):
18 |     optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
19 |     def iteration(*inputs):
20 |         optimizer.zero_grad()
21 |         out = model(*inputs)
22 |         out.backward()
23 |         optimizer.step()
24 |     return iteration
25 | 


--------------------------------------------------------------------------------
/protocol/Makefile:
--------------------------------------------------------------------------------
 1 | GEN_DIR = protocol_gen
 2 | 
 3 | PLUGIN_GEN = ../plugin/lib/$(GEN_DIR)
 4 | PLUGIN_GEN_FILE = $(PLUGIN_GEN)/innpv_pb.js
 5 | 
 6 | SERVER_GEN = ../deepview_profile/$(GEN_DIR)
 7 | SERVER_GEN_FILE = $(SERVER_GEN)/innpv_pb2.py
 8 | 
 9 | PROTO_FILE = innpv.proto
10 | 
11 | .PHONY: all clean
12 | 
13 | all: $(PLUGIN_GEN_FILE) $(SERVER_GEN_FILE)
14 | 
15 | $(PLUGIN_GEN_FILE): $(PROTO_FILE)
16 | 	mkdir -p $(PLUGIN_GEN)
17 | 	protoc --js_out=import_style=commonjs,binary:$(PLUGIN_GEN) $^
18 | 
19 | $(SERVER_GEN_FILE): $(PROTO_FILE)
20 | 	mkdir -p $(SERVER_GEN)
21 | 	protoc --python_out=$(SERVER_GEN) $^
22 | 
23 | clean:
24 | 	rm -rf $(PLUGIN_GEN) $(SERVER_GEN)
25 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "deepview-profile"
 3 | version = "0.14.5"
 4 | description = "Interactive performance profiling and debugging tool for PyTorch neural networks."
 5 | authors = ["CentML <support@centml.ai>"]
 6 | license = "Apache-2.0"
 7 | readme = "README.md"
 8 | repository = "https://github.com/CentML/DeepView.Profile"
 9 | keywords = ["pytorch", "neural networks", "debugger", "profiler"]
10 | classifiers = [
11 |     "Development Status :: 4 - Beta",
12 |     "Intended Audience :: Developers",
13 |     "License :: OSI Approved :: Apache Software License",
14 |     "Programming Language :: Python :: 3 :: Only",
15 |     "Topic :: Software Development :: Debuggers",
16 | ]
17 | packages = [
18 |     { include = "deepview_profile" },
19 | ]
20 | 
21 | include = [ "pyproject.toml" ]
22 | 
23 | [tool.poetry.scripts]
24 | deepview = "deepview_profile:main"
25 | 
26 | [tool.poetry.dependencies]
27 | python = "^3.9"
28 | pyyaml = "*"
29 | protobuf = "3.19.6"
30 | numpy = "^1.15.2"
31 | torch = ">=2.1.0"
32 | nvidia-ml-py3 = "*"
33 | toml = "^0.10.2"
34 | pyRAPL = "^0.2.3"
35 | deepview-predict = "*"
36 | perfetto = "*"
37 | orjson = "*"
38 | torch-tb-profiler = "*"
39 | pymongo = "*"
40 | scipy = "*"
41 | termcolor = "*"
42 | 
43 | [tool.poetry.dev-dependencies]
44 | 
45 | [tool.poetry.group.dev.dependencies]
46 | ruff = "^0.0.267"
47 | pre-commit = "2.21.0"
48 | 
49 | [build-system]
50 | requires = ["poetry-core>=1.0.0"]
51 | build-backend = "poetry.core.masonry.api"
52 | 
53 | [tool.ruff]
54 | extend-exclude = ["examples", "deepview_profile/protocol_gen/"]
55 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/setup.cfg


--------------------------------------------------------------------------------
/test/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | 


--------------------------------------------------------------------------------
/test/TESTING.md:
--------------------------------------------------------------------------------
 1 | To run tests:
 2 | * Install the `pytest` module.
 3 | * Run `pytest` in this directory.
 4 | 
 5 | ```
 6 | $ pytest
 7 | ============================= test session starts ==============================
 8 | platform linux -- Python 3.9.13, pytest-7.1.3, pluggy-1.0.0
 9 | rootdir: /home/ubuntu/habitat-a100/centml/deepview_profile/test
10 | collected 2 items
11 | 
12 | test_driver.py ..                                                        [100%]
13 | 
14 | ============================== 2 passed in 27.20s ==============================
15 | ```
16 | 


--------------------------------------------------------------------------------
/test/config_params.py:
--------------------------------------------------------------------------------
1 | def TestConfig():
2 |     config = dict()
3 |     config["model_names_from_examples"] = ["resnet", "nanogpt"]
4 | 
5 |     return config
6 | 


--------------------------------------------------------------------------------
/test/test_database.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import deepview_profile.db.database as database
 4 | 
 5 | LOWER_BOUND_RAND_INT = 1
 6 | UPPER_BOUND_RAND_INT = 10
 7 | class MockDatabaseInterface(database.DatabaseInterface):
 8 |     def __del__(self):
 9 |         if os.path.exists("test.sqlite"):
10 |             os.remove("test.sqlite")
11 | 
12 | 
13 | class TestSkylineDatabase:
14 |     test_database: MockDatabaseInterface = MockDatabaseInterface("test.sqlite")
15 |     energy_table_interface: database.EnergyTableInterface = (
16 |         database.EnergyTableInterface(test_database.connection)
17 |     )
18 | 
19 |     # Test if energy table is created
20 |     def test_energy_table_is_created(self):
21 |         query_result = self.test_database.connection.execute(
22 |             "SELECT name from sqlite_schema WHERE type='table' and name ='ENERGY';"
23 |         )
24 |         query_result_list = query_result.fetchall()
25 |         assert len(query_result_list) > 0
26 | 
27 |     # try adding invalid entry and test if it is added
28 |     def test_invalid_entry_too_short(self):
29 |         assert self.energy_table_interface.is_valid_entry([]) is False
30 | 
31 |     def test_invalid_entry_too_long(self):
32 |         assert self.energy_table_interface.is_valid_entry([1, 2, 3, 4, 5]) is False
33 | 
34 |     def test_invalid_entry_wrong_types(self):
35 |         assert (
36 |             self.energy_table_interface.is_valid_entry([None, None, None, None, None])
37 |             is False
38 |         )
39 | 
40 |     def test_adding_valid_entry(self):
41 |         params = [
42 |             "entry_point",
43 |             random.random(),
44 |             random.random(),
45 |             random.randint(LOWER_BOUND_RAND_INT, UPPER_BOUND_RAND_INT),
46 |         ]
47 |         self.energy_table_interface.add_entry(params)
48 |         query_result = self.test_database.connection.execute(
49 |             "SELECT * FROM ENERGY ORDER BY ts DESC;"
50 |         ).fetchone()
51 |         # params is passed in by reference so it have the timestamp in it
52 |         assert query_result == tuple(params)
53 | 
54 |     # add 10 valid entries and get top 3
55 |     def test_get_latest_n_entries_of_entry_point(self):
56 |         for _ in range(10):
57 |             params = [
58 |                 "entry_point",
59 |                 random.random(),
60 |                 random.random(),
61 |                 random.randint(LOWER_BOUND_RAND_INT, UPPER_BOUND_RAND_INT),
62 |             ]
63 |             self.energy_table_interface.add_entry(params)
64 |         for _ in range(20):
65 |             params = [
66 |                 "other_entry_point",
67 |                 random.random(),
68 |                 random.random(),
69 |                 random.randint(LOWER_BOUND_RAND_INT, UPPER_BOUND_RAND_INT),
70 |             ]
71 |             self.energy_table_interface.add_entry(params)
72 |         entries = []
73 |         for _ in range(3):
74 |             params = [
75 |                 "entry_point",
76 |                 random.random(),
77 |                 random.random(),
78 |                 random.randint(LOWER_BOUND_RAND_INT, UPPER_BOUND_RAND_INT),
79 |             ]
80 |             entries.insert(0, params)
81 |             self.energy_table_interface.add_entry(params)
82 |         latest_n_entries = (
83 |             self.energy_table_interface.get_latest_n_entries_of_entry_point(
84 |                 3, "entry_point"
85 |             )
86 |         )
87 |         entries = [tuple(entry) for entry in entries]
88 |         assert entries == latest_n_entries


--------------------------------------------------------------------------------
/test/test_driver.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import pickle
 3 | from utils import DeepviewSession, BackendContext
 4 | from google.protobuf.json_format import MessageToDict
 5 | from config_params import TestConfig
 6 | import os
 7 | 
 8 | REPS = 2
 9 | NUM_EXPECTED_MESSAGES = 6
10 | 
11 | 
12 | def get_config_name():
13 |     import pkg_resources
14 | 
15 |     package_versions = {p.key: p.version for p in pkg_resources.working_set}
16 |     return package_versions
17 | 
18 | 
19 | config = TestConfig()
20 | 
21 | tests = list()
22 | for model_name in config["model_names_from_examples"]:
23 |     dir_path = os.path.join(
24 |         os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
25 |         "examples",
26 |         model_name,
27 |     )
28 |     tests.append((model_name, dir_path))
29 | 
30 | 
31 | @pytest.mark.parametrize("test_name, entry_point", tests)
32 | def test_entry_point(test_name, entry_point):
33 |     print(f"Testing {entry_point}")
34 | 
35 |     # create new folder
36 |     folder = (
37 |         os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/tests_results"
38 |     )
39 |     os.makedirs(folder, exist_ok=True)
40 | 
41 |     stdout_fd = open(os.path.join(folder, f"{test_name}_interactive_output.log"), "w")
42 |     stderr_fd = open(os.path.join(folder, f"{test_name}_interactive_w_debug_output.log"), "w")
43 |     context = BackendContext(entry_point, stdout_fd=stdout_fd, stderr_fd=stderr_fd)
44 |     context.spawn_process()
45 | 
46 |     analysis_messages = list()
47 | 
48 |     for reps in range(REPS):
49 |         sess = DeepviewSession()
50 |         while context.state == 0:
51 |             pass
52 |         sess.connect("localhost", 60120)
53 |         sess.send_initialize_request(entry_point)
54 |         sess.send_analysis_request()
55 |         while (
56 |             context.alive()
57 |             and sess.alive()
58 |             and len(sess.received_messages) < NUM_EXPECTED_MESSAGES
59 |         ):
60 |             pass
61 | 
62 |         sess.cleanup()
63 |         analysis_messages.extend(sess.received_messages)
64 | 
65 |         assert len(sess.received_messages) == NUM_EXPECTED_MESSAGES, (
66 |             f"Run {reps}: Expected to receive {NUM_EXPECTED_MESSAGES} got "
67 |             f"{len(sess.received_messages)} (did the process terminate prematurely?)"
68 |         )
69 | 
70 |     context.terminate()
71 |     # create folder to store files
72 |     # flush contents to files
73 |     with open(os.path.join(folder, f"{test_name}_analysis.pkl"), "wb") as fp:
74 |         pickle.dump(list(map(MessageToDict, analysis_messages)), fp)
75 |     # write package versions
76 |     package_dict = get_config_name()
77 |     with open(os.path.join(folder, "package-list.txt"), "w") as f:
78 |         for k, v in package_dict.items():
79 |             f.write(f"{k}={v}\n")
80 |     stdout_fd.close()
81 |     stderr_fd.close()
82 | 


--------------------------------------------------------------------------------
/tools/common.sh:
--------------------------------------------------------------------------------
  1 | COLOR_RED="\033[0;31m"
  2 | COLOR_GREEN="\033[0;32m"
  3 | COLOR_YELLOW="\033[0;33m"
  4 | COLOR_BLUE="\033[0;36m"
  5 | COLOR_NC="\033[0m"
  6 | 
  7 | function echo_colored() {
  8 |   echo -e "${1}${2}${COLOR_NC}"
  9 | }
 10 | 
 11 | function echo_green() {
 12 |   echo_colored "$COLOR_GREEN" "$1"
 13 | }
 14 | 
 15 | function echo_red() {
 16 |   echo_colored "$COLOR_RED" "$1"
 17 | }
 18 | 
 19 | function echo_yellow() {
 20 |   echo_colored "$COLOR_YELLOW" "$1"
 21 | }
 22 | 
 23 | function echo_blue() {
 24 |   echo_colored "$COLOR_BLUE" "$1"
 25 | }
 26 | 
 27 | function prompt_yn() {
 28 |   echo -en "${COLOR_YELLOW}$1${COLOR_NC}"
 29 |   read -r
 30 |   if [[ ! $REPLY =~ ^[Yy]$ ]]
 31 |   then
 32 |     exit 1
 33 |   fi
 34 | }
 35 | 
 36 | function get_repo_hash() {
 37 |   echo "$(git rev-parse HEAD)"
 38 | }
 39 | 
 40 | function check_repo() {
 41 |   # Make sure no unstaged changes
 42 |   echo_yellow "> Check for uncommitted changes"
 43 |   if [[ ! -z $(git status --porcelain) ]];
 44 |   then
 45 |     echo_red "ERROR: There are uncommitted changes. Please commit before releasing."
 46 |     exit 1
 47 |   fi
 48 | 
 49 |   # Make sure we're on main
 50 |   echo_yellow "> Check the current branch"
 51 |   INNPV_MAIN_HASH=$(git rev-parse main)
 52 |   INNPV_HASH=$(git rev-parse HEAD)
 53 | 
 54 |   if [[ $INNPV_MAIN_HASH != $INNPV_HASH ]]; then
 55 |     echo_red "ERROR: You must be on main when releasing."
 56 |     exit 1
 57 |   fi
 58 | 
 59 |   INNPV_SHORT_HASH=$(git rev-parse --short HEAD)
 60 | 
 61 |   echo_green "✓ Repository OK"
 62 | }
 63 | 
 64 | function check_tools() {
 65 |   echo_yellow "> Check tools"
 66 |   if [ -z "$(which poetry)" ]; then
 67 |     echo_red "ERROR: Poetry must be installed."
 68 |     exit 1
 69 |   fi
 70 | 
 71 |   if [ -z "$(which gh)" ]; then
 72 |     echo_red "ERROR: GitHub CLI must be installed."
 73 |     exit 1
 74 |   fi
 75 | 
 76 |   echo ""
 77 |   echo_yellow "> Tooling versions:"
 78 |   echo "$(poetry --version)"
 79 |   echo "$(poetry run python3 --version)"
 80 |   echo "$(gh --version)"
 81 |   echo_green "✓ Release tooling OK"
 82 | }
 83 | 
 84 | function build_release() {
 85 |   echo_yellow "> Building wheels..."
 86 |   rm -rf ../dist/*
 87 |   cp ../pyproject.toml ../deepview_profile/
 88 |   poetry build
 89 |   echo_green "✓ Wheels successfully built"
 90 | }
 91 | 
 92 | function publish_to_pypi() {
 93 |   case $1 in
 94 |   prod)
 95 |     poetry publish -r pypi;;
 96 |   *)
 97 |     poetry publish -r test-pypi;;
 98 |   esac
 99 |   echo_green "✓ New release uploaded to PyPI"
100 | }
101 | 


--------------------------------------------------------------------------------
/tools/prepare-release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script is used to release a new version of the DeepView CLI.
 4 | # Release steps:
 5 | # 1. Create release branch
 6 | # 2. Increment package version in pyproject.toml
 7 | # 3. Prepare change log since the last version
 8 | # 4. Commit the change log
 9 | # 5. Creater draft Github release
10 | # 6. Optional: create ability to publish to test pypi
11 | 
12 | 
13 | set -e
14 | 
15 | RELEASE_SCRIPT_PATH=$(cd $(dirname $0) && pwd -P)
16 | cd $RELEASE_SCRIPT_PATH
17 | source common.sh
18 | 
19 | echo ""
20 | echo_blue "DeepView.Profile Release Preparation Tool"
21 | echo_blue "========================================="
22 | 
23 | echo ""
24 | check_repo
25 | 
26 | echo ""
27 | check_tools
28 | 
29 | CURR_CLI_VERSION=$(poetry version --short)
30 | echo -en "${COLOR_YELLOW}Release increment: [patch], minor, major ${COLOR_NC}"
31 | read -r
32 | case $REPLY in
33 | major)
34 |   poetry version major;;
35 | minor)
36 |   poetry version minor;;
37 | *)
38 |   poetry version patch;;
39 | esac
40 | NEXT_CLI_VERSION=$(poetry version --short)
41 | VERSION_TAG="v$NEXT_CLI_VERSION"
42 | 
43 | echo ""
44 | echo_yellow "> The next CLI version will be '$VERSION_TAG'."
45 | prompt_yn "> Is this correct? (y/N) "
46 | git checkout -b "release-$VERSION_TAG"
47 | git commit -am "Bump version to $VERSION_TAG"
48 | git push origin "release-$VERSION_TAG"
49 | REPO_HASH=$(get_repo_hash)
50 | RELEASE_NOTES=$(git log $(git describe --abbrev=0 --tags).. --merges --pretty=format:"%s %b" | cut -f 4,7- -d ' ')
51 | echo ""
52 | echo "Release Notes:"
53 | echo "$RELEASE_NOTES"
54 | gh pr create --title "Release $VERSION_TAG" --body "$RELEASE_NOTES"
55 | 
56 | 
57 | # echo ""
58 | # build_release
59 | 
60 | # RELEASE_NOTES=$(git log $(git describe --abbrev=0 --tags).. --merges --pretty=format:"%s %b" | cut -f 4,7- -d ' ')
61 | # echo ""
62 | # echo "Release Notes:"
63 | # echo "$RELEASE_NOTES"
64 | 
65 | # RELEASE_ARTIFACTS=$(find ../dist -name "*$NEXT_CLI_VERSION*" -type f | paste -s -d ' ' - )
66 | 
67 | # GH_TOKEN=$UOFT_ECOSYSTEM_GH_TOKEN
68 | # echo ""
69 | # prompt_yn "> Create a draft release on Github? (y/N) "
70 | # gh release create "v$VERSION_TAG" --draft \
71 | #                                   --title "$VERSION_TAG" \
72 | #                                   --notes "$RELEASE_NOTES" \
73 | #                                   --target "$REPO_HASH" \
74 | #                                   $RELEASE_ARTIFACTS
75 | # echo -en "${COLOR_YELLOW}Ready to publish? [dryrun], test-pypi, pypi${COLOR_NC}"
76 | # read -r
77 | # echo ""
78 | # case $REPLY in
79 | # test-pypi)
80 | #   echo_yellow "> Releasing $VERSION_TAG of the CLI..."
81 | #   publish_to_pypi;;
82 | # pypi)
83 | #   echo_yellow "> Releasing $VERSION_TAG of the CLI..."
84 | #   publish_to_pypi "prod";;
85 | # *)
86 | #   echo_yellow "Skipping the upload to PyPI";;
87 | # esac
88 | 
89 | echo_green "✓ Done!"
90 | 
91 | 


--------------------------------------------------------------------------------