├── .gitattributes ├── .github └── workflows │ └── build-and-publish-new-version.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .vscode └── settings.json ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── RELEASE.md ├── assets └── deepview.png ├── deepview_profile ├── __init__.py ├── __main__.py ├── analysis │ ├── __init__.py │ ├── request_manager.py │ ├── runner.py │ ├── session.py │ └── static.py ├── commands │ ├── __init__.py │ ├── analysis.py │ ├── interactive.py │ ├── measurements.py │ ├── memory.py │ ├── prediction_models.py │ └── time.py ├── config │ └── __init__.py ├── data │ ├── __init__.py │ └── hints.yml ├── db │ ├── __init__.py │ └── database.py ├── energy │ ├── __init__.py │ └── measurer.py ├── error_printing.py ├── evaluate.py ├── exceptions.py ├── export_converter.py ├── initialization.py ├── io │ ├── __init__.py │ ├── connection.py │ ├── connection_acceptor.py │ ├── connection_manager.py │ └── sentinel.py ├── lru_cache.py ├── models │ ├── __init__.py │ ├── analysis.py │ └── source_map.py ├── nvml.py ├── pl │ ├── deepview_callback.py │ └── deepview_interface.py ├── profiler │ ├── __init__.py │ ├── autograd.py │ ├── backward.py │ ├── ddp.py │ ├── iteration.py │ ├── operation.py │ └── utilization.py ├── protocol │ ├── __init__.py │ ├── message_handler.py │ └── message_sender.py ├── protocol_gen │ ├── __init__.py │ └── innpv_pb2.py ├── pytorch_profiler_log_reader.py ├── server.py ├── skyline.py ├── tests │ ├── __init__.py │ └── test_lru_cache.py ├── tracking │ ├── __init__.py │ ├── backward_interceptor.py │ ├── base.py │ ├── breakdown.py │ ├── call_stack.py │ ├── callable_tracker.py │ ├── hook_manager.py │ ├── memory │ │ ├── __init__.py │ │ ├── activations.py │ │ ├── report.py │ │ ├── report_queries.py │ │ └── weights.py │ ├── time │ │ ├── __init__.py │ │ ├── operation.py │ │ ├── report.py │ │ └── report_queries.py │ ├── tracker.py │ └── utils.py ├── user_code_utils.py ├── util_weak.py ├── utils.py └── version_utils.py ├── docs ├── memory-report.md ├── providers.md ├── remote.md └── run-time-report.md ├── examples ├── densenet │ ├── LICENSE │ ├── densenet.py │ └── entry_point.py ├── gnmt │ ├── README.md │ ├── entry_point.py │ └── seq2seq │ │ ├── LICENSE │ │ ├── data │ │ ├── config.py │ │ ├── dataset.py │ │ ├── sampler.py │ │ └── tokenizer.py │ │ ├── inference │ │ ├── beam_search.py │ │ └── inference.py │ │ ├── models │ │ ├── attention.py │ │ ├── decoder.py │ │ ├── encoder.py │ │ ├── gnmt.py │ │ └── seq2seq_base.py │ │ ├── train │ │ ├── fp_optimizers.py │ │ ├── lr_scheduler.py │ │ ├── smoothing.py │ │ └── trainer.py │ │ └── utils.py ├── huggingface │ └── entry_point.py ├── legacy │ ├── lenet.py │ ├── testnet2.py │ └── vgg11.py ├── nanogpt │ ├── entry_point.py │ └── model.py ├── pytorch_lightning │ └── example.py ├── resnet │ ├── LICENSE │ ├── entry_point.py │ ├── entry_point_resnext.py │ └── resnet.py ├── testnet │ ├── entry_point.py │ └── testnet1.py ├── transformer │ ├── entry_point.py │ └── transformer │ │ ├── Beam.py │ │ ├── Constants.py │ │ ├── LICENSE │ │ ├── Layers.py │ │ ├── Models.py │ │ ├── Modules.py │ │ ├── Optim.py │ │ ├── README.md │ │ ├── SubLayers.py │ │ ├── Translator.py │ │ └── __init__.py └── vgg │ ├── LICENSE │ ├── entry_point.py │ └── vgg.py ├── protocol ├── Makefile └── innpv.proto ├── pyproject.toml ├── setup.cfg ├── test ├── .gitignore ├── TESTING.md ├── config_params.py ├── test_database.py ├── test_driver.py └── utils.py └── tools ├── common.sh └── prepare-release.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | assets/* linguist-documentation 2 | website/* linguist-documentation 3 | -------------------------------------------------------------------------------- /.github/workflows/build-and-publish-new-version.yml: -------------------------------------------------------------------------------- 1 | name: Build and publish a new version 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | tag: 7 | description: 'Release Tag. This is in the form x.y.z' 8 | required: true 9 | 10 | jobs: 11 | build-and-publish: 12 | name: Build DeepView.Profile 13 | runs-on: ubuntu-latest 14 | env: 15 | CI_COMMIT_AUTHOR: CentML 16 | CI_COMMIT_EMAIL: centml-machine-user@users.noreply.github.com 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v3 20 | 21 | - name: Set up Python 22 | uses: actions/setup-python@v3 23 | with: 24 | python-version: '3.8' 25 | 26 | - name: Install and configure Poetry 27 | uses: snok/install-poetry@v1 28 | with: 29 | virtualenvs-in-project: true 30 | 31 | - name: Create release branch 32 | run: | 33 | git checkout -b release/${{ github.event.inputs.tag }} 34 | git fetch 35 | git branch --set-upstream-to=origin/main release/${{ github.event.inputs.tag }} 36 | 37 | - name: Update version number 38 | run: | 39 | poetry version ${{ github.event.inputs.tag }} 40 | 41 | - name: Commit updated version number and tag it 42 | run: | 43 | git config --global user.name "${{ env.CI_COMMIT_AUTHOR }}" 44 | git config --global user.email "${{ env.CI_COMMIT_EMAIL }}" 45 | git commit -am "Release version ${{ github.event.inputs.tag }}" 46 | git push origin release/${{ github.event.inputs.tag }} 47 | git tag ${{ github.event.inputs.tag }} 48 | 49 | - name: Build Python artifacts 50 | run: | 51 | poetry build 52 | 53 | - name: Upload Artifacts 54 | uses: actions/upload-artifact@v4 55 | with: 56 | name: ${{ github.event.inputs.tag }} 57 | path: dist/*${{ github.event.inputs.tag }}* 58 | 59 | - name: Publish a release 60 | run: | 61 | RELEASE_NOTES="$(git log $(git describe --abbrev=0 --tags).. --merges --pretty=format:"%s %b" | cut -f 4,7- -d ' ')" 62 | echo "Autogenerated Release Notes:" 63 | echo "$RELEASE_NOTES" 64 | RELEASE_ARTIFACTS=$(find ./dist -name "*${{ github.event.inputs.tag }}*" -type f | paste -s -d ' ' - ) 65 | VERSION_TAG="v${{ github.event.inputs.tag }}" 66 | gh auth login --with-token <<< "${{ secrets.GH_TOKEN }}" 67 | gh release create "$VERSION_TAG" \ 68 | --title "$VERSION_TAG" \ 69 | --notes "$RELEASE_NOTES" \ 70 | --target "$GITHUB_SHA" \ 71 | $RELEASE_ARTIFACTS 72 | gh pr create --title "Release $VERSION_TAG" --body "$RELEASE_NOTES" 73 | 74 | publish-to-test-pypi: 75 | name: Publish to Test PyPI 76 | needs: build-and-publish 77 | runs-on: ubuntu-latest 78 | environment: Test 79 | concurrency: Test 80 | permissions: 81 | id-token: write 82 | 83 | steps: 84 | - name: Download artifact 85 | uses: actions/download-artifact@v4 86 | with: 87 | name: ${{ github.event.inputs.tag }} 88 | path: dist 89 | 90 | - name: Publish to PyPI 91 | uses: pypa/gh-action-pypi-publish@release/v1 92 | with: 93 | repository_url: https://test.pypi.org/legacy/ 94 | 95 | publish-to-pypi: 96 | name: Publish to PyPI 97 | needs: publish-to-test-pypi 98 | runs-on: ubuntu-latest 99 | environment: Production 100 | concurrency: Production 101 | permissions: 102 | id-token: write 103 | 104 | 105 | steps: 106 | - name: Download artifacts 107 | uses: actions/download-artifact@v4 108 | with: 109 | name: ${{ github.event.inputs.tag }} 110 | path: dist 111 | 112 | - name: Publish to PyPI 113 | uses: pypa/gh-action-pypi-publish@release/v1 114 | with: 115 | repository_url: https://upload.pypi.org/legacy/ 116 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/charliermarsh/ruff-pre-commit 5 | rev: 'v0.0.267' 6 | hooks: 7 | - id: ruff 8 | args: [ --fix, --exit-non-zero-on-fix ] 9 | - repo: https://github.com/pre-commit/pre-commit-hooks 10 | rev: v3.2.0 11 | hooks: 12 | - id: trailing-whitespace 13 | - id: end-of-file-fixer 14 | - id: check-ast 15 | - id: check-yaml 16 | - id: check-added-large-files 17 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[python]": { 3 | "editor.defaultFormatter": "ms-python.black-formatter" 4 | }, 5 | "python.formatting.provider": "none" 6 | } 7 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | TBD -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | ======================================== 2 | Copyright Notice for the Skyline Project 3 | ======================================== 4 | 5 | Copyright 2020 Geoffrey X. Yu 6 | Copyright 2020 Tovi Grossman 7 | Copyright 2020 Gennady Pekhimenko 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this project except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | http://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | 21 | 22 | ===================================== 23 | Copyright Notice for the Code Samples 24 | ===================================== 25 | 26 | Portions of code inside the "samples" directory were written by 27 | third party developers. These code files carry their own open 28 | source licenses and copyright notices. Please see the README.md 29 | and LICENSE files inside those directories for more information. 30 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # How to release a new version of DeepView.Profile 2 | - Go to Github repo and run the action `build-and-publish-new-version`. You will be prompted to specify the version number. 3 | 4 | - This runs a GitHub Action that will take the following steps: 5 | 1. Fetches the repo and its dependencies 6 | 2. Creates a release branch 7 | 3. Updates the version number to the user-specified version by updating the pyproject.toml 8 | 4. Commits the changes and tag the commit with the version number 9 | 5. Builds the Python build artifacts 10 | 7. Publishes a release to Github 11 | 8. Create a PR to merge back into main 12 | 9. Publishes to Test PyPI 13 | 10. Publishes to PyPI 14 | 15 | - The action `build-and-publish-new-version` is defined under `.github/workflows/build-and-publish-new-versionyaml` 16 | 17 | - This release process follows the release process outlined in [OneFlow](https://www.endoflineblog.com/oneflow-a-git-branching-model-and-workflow). -------------------------------------------------------------------------------- /assets/deepview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/assets/deepview.png -------------------------------------------------------------------------------- /deepview_profile/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from importlib.metadata import version, PackageNotFoundError 3 | 4 | try: 5 | package_name = "deepview_profile" 6 | __name__ = package_name 7 | __version__ = version(package_name) 8 | __description__ = "Interactive performance profiling and debugging tool for PyTorch neural networks." 9 | 10 | except PackageNotFoundError: 11 | __version__ = "unknown" 12 | __description__ = "unknown" 13 | 14 | from .__main__ import main 15 | -------------------------------------------------------------------------------- /deepview_profile/__main__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore", message="'deepview_profile.__main__' found in sys.modules after import of package 'deepview_profile', but prior to execution of 'deepview_profile.__main__'; this may result in unpredictable behaviour") 3 | 4 | import argparse 5 | import sys 6 | 7 | import deepview_profile 8 | import deepview_profile.commands.interactive 9 | import deepview_profile.commands.memory 10 | import deepview_profile.commands.time 11 | import deepview_profile.commands.analysis 12 | 13 | 14 | def main(): 15 | parser = argparse.ArgumentParser( 16 | prog="DeepView", 17 | description="DeepView: Interactive Neural Network Performance " 18 | "Profiler, Visualizer, and Debugger for PyTorch", 19 | ) 20 | parser.add_argument( 21 | "-v", "--version", 22 | action="store_true", 23 | help="Print the version and exit.", 24 | ) 25 | subparsers = parser.add_subparsers(title="Commands") 26 | deepview_profile.commands.interactive.register_command(subparsers) 27 | deepview_profile.commands.memory.register_command(subparsers) 28 | deepview_profile.commands.time.register_command(subparsers) 29 | deepview_profile.commands.analysis.register_command(subparsers) 30 | args = parser.parse_args() 31 | 32 | if args.version: 33 | print('DeepView Command Line Interface', 'v' + deepview_profile.__version__) 34 | return 35 | 36 | if 'func' not in args: 37 | parser.print_help() 38 | sys.exit(1) 39 | 40 | # Run the specified command 41 | args.func(args) 42 | 43 | 44 | if __name__ == '__main__': 45 | main() 46 | -------------------------------------------------------------------------------- /deepview_profile/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/analysis/__init__.py -------------------------------------------------------------------------------- /deepview_profile/analysis/runner.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | from deepview_profile.analysis.session import AnalysisSession 5 | from deepview_profile.nvml import NVML 6 | from deepview_profile.utils import release_memory 7 | import weakref 8 | 9 | def analyze_project(project_root, entry_point, nvml, enable_ddp_analysis): 10 | session = AnalysisSession.new_from(project_root, entry_point) 11 | 12 | release_memory() 13 | print("analyze_project: running measure_breakdown()") 14 | yield session.measure_breakdown(nvml) 15 | 16 | release_memory() 17 | print("analyze_project: running measure_throughput()") 18 | yield session.measure_throughput() 19 | 20 | release_memory() 21 | print("analyze_project: running deepview_predict()") 22 | yield session.habitat_predict() 23 | 24 | release_memory() 25 | print("analyze_project: running measure_utilization()") 26 | yield session.measure_utilization() 27 | 28 | release_memory() 29 | print("analyze_project: running energy_compute()") 30 | yield session.energy_compute() 31 | 32 | if enable_ddp_analysis: 33 | release_memory() 34 | print("analyze_project: running ddp_computation()") 35 | yield session.ddp_computation() 36 | 37 | # release object session (less gpu memory consumption) 38 | release_memory() 39 | weakref.finalize(session, print, "session object destroyed") 40 | del session 41 | yield None 42 | 43 | def main(): 44 | # This is used for development and debugging purposes 45 | parser = argparse.ArgumentParser() 46 | parser.add_argument("entry_point", type=str) 47 | args = parser.parse_args() 48 | 49 | project_root = os.getcwd() 50 | with NVML() as nvml: 51 | analyzer = analyze_project(project_root, args.entry_point, nvml) 52 | breakdown = next(analyzer) 53 | throughput = next(analyzer) 54 | 55 | print('Peak usage: ', breakdown.peak_usage_bytes, 'bytes') 56 | print('Max. capacity:', breakdown.memory_capacity_bytes, 'bytes') 57 | print('No. of weight breakdown nodes: ', len(breakdown.operation_tree)) 58 | print('No. of operation breakdown nodes:', len(breakdown.weight_tree)) 59 | print('Throughput:', throughput.samples_per_second, 'samples/s') 60 | 61 | 62 | if __name__ == "__main__": 63 | kwargs = { 64 | "format": "%(asctime)s %(levelname)-8s %(message)s", 65 | "datefmt": "%Y-%m-%d %H:%M", 66 | "level": logging.DEBUG, 67 | } 68 | logging.basicConfig(**kwargs) 69 | main() 70 | -------------------------------------------------------------------------------- /deepview_profile/analysis/static.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import re 3 | 4 | END_OF_FUNCTION = re.compile('\):\s*$') 5 | 6 | 7 | class StaticAnalyzer: 8 | def __init__(self, source_code, source_tree): 9 | self._ast = source_tree 10 | self._code_by_line = source_code.splitlines() 11 | 12 | def batch_size_location(self): 13 | """ 14 | Locates the line of the 'batch_size' argument in the 15 | 'skyline_input_provider' function and determines if the provider's 16 | definition can be mutated using our heuristics. 17 | """ 18 | extractor = _InputProviderExtractor() 19 | extractor.visit(self._ast) 20 | function = extractor.function_node 21 | 22 | if (function is None or 23 | len(function.args.args) == 0 or 24 | function.args.args[0].arg != 'batch_size'): 25 | return None 26 | 27 | batch_size_line_number = function.args.args[0].lineno 28 | match = END_OF_FUNCTION.search(self._code_by_line[function.lineno - 1]) 29 | can_mutate = match is not None 30 | 31 | return batch_size_line_number, can_mutate 32 | 33 | 34 | class _InputProviderExtractor(ast.NodeVisitor): 35 | def __init__(self): 36 | self.function_node = None 37 | 38 | def visit_FunctionDef(self, node): 39 | if self.function_node is not None: 40 | # Return early if we've already found the provider 41 | return 42 | if node.name != 'deepview_input_provider': 43 | return 44 | self.function_node = node 45 | -------------------------------------------------------------------------------- /deepview_profile/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/commands/__init__.py -------------------------------------------------------------------------------- /deepview_profile/commands/interactive.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import signal 3 | import threading 4 | 5 | from deepview_profile.initialization import ( 6 | check_skyline_preconditions, 7 | initialize_skyline, 8 | ) 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def register_command(subparsers): 14 | parser = subparsers.add_parser( 15 | "interactive", 16 | help="Start a new DeepView interactive profiling session.", 17 | ) 18 | parser.add_argument( 19 | "--host", 20 | default="", 21 | help="The host address to bind to.", 22 | ) 23 | parser.add_argument( 24 | "--port", 25 | default=60120, 26 | type=int, 27 | help="The port to listen on.", 28 | ) 29 | parser.add_argument( 30 | "--hints-file", 31 | help="Path to the performance hints configuration YAML file.", 32 | ) 33 | parser.add_argument( 34 | "--measure-for", 35 | help="Number of iterations to measure when determining throughput.", 36 | ) 37 | parser.add_argument( 38 | "--warm-up", 39 | help="Number of warm up iterations when determining throughput.", 40 | ) 41 | parser.add_argument( 42 | "--log-file", 43 | help="The location of the log file.", 44 | ) 45 | parser.add_argument( 46 | "--debug", action="store_true", help="Log debug messages.") 47 | parser.set_defaults(func=main) 48 | 49 | def actual_main(args): 50 | from deepview_profile.server import SkylineServer 51 | 52 | should_shutdown = threading.Event() 53 | 54 | def signal_handler(signal, frame): 55 | should_shutdown.set() 56 | 57 | signal.signal(signal.SIGINT, signal_handler) 58 | signal.signal(signal.SIGTERM, signal_handler) 59 | 60 | 61 | with SkylineServer(args.host, args.port) as server: 62 | _, port = server.listening_on 63 | logger.info( 64 | "DeepView interactive profiling session started! " 65 | "Listening on port %d.", 66 | port, 67 | ) 68 | 69 | # Run the server until asked to terminate 70 | should_shutdown.wait() 71 | 72 | 73 | def main(args): 74 | check_skyline_preconditions(args) 75 | initialize_skyline(args) 76 | actual_main(args) 77 | -------------------------------------------------------------------------------- /deepview_profile/commands/measurements.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | import csv 5 | 6 | from deepview_profile.initialization import ( 7 | check_skyline_preconditions, 8 | initialize_skyline, 9 | ) 10 | from deepview_profile.error_printing import print_analysis_error 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def register_command(subparsers): 16 | parser = subparsers.add_parser( 17 | "measure-batches", 18 | help="Make throughput and memory measurements for given batch sizes.", 19 | ) 20 | parser.add_argument( 21 | "entry_point", 22 | help="The entry point file in this project that contains the DeepView " 23 | "provider functions.", 24 | ) 25 | parser.add_argument( 26 | "-b", "--batch-sizes", 27 | help="The batch sizes to consider.", 28 | type=int, 29 | nargs="+", 30 | required=True, 31 | ) 32 | parser.add_argument( 33 | "-t", "--trials", 34 | help="Number of trials to run when making measurements.", 35 | type=int, 36 | required=True, 37 | default=5, 38 | ) 39 | parser.add_argument( 40 | "-o", "--output", 41 | help="The location where the evaluation output should be stored.", 42 | required=True, 43 | ) 44 | parser.add_argument( 45 | "--log-file", 46 | help="The location of the log file.", 47 | ) 48 | parser.add_argument( 49 | "--debug", action="store_true", help="Log debug messages.") 50 | parser.set_defaults(func=main) 51 | 52 | 53 | def make_measurements(session, batch_size): 54 | # This is a HACK 55 | session._batch_size = batch_size 56 | peak_usage_bytes = session.measure_peak_usage_bytes() 57 | thpt_msg = session.measure_throughput() 58 | return thpt_msg.samples_per_second, peak_usage_bytes 59 | 60 | 61 | def actual_main(args): 62 | from deepview_profile.analysis.session import AnalysisSession 63 | from deepview_profile.exceptions import AnalysisError 64 | 65 | if os.path.exists(args.output): 66 | print( 67 | "ERROR: The specified output file already exists.", 68 | file=sys.stderr, 69 | ) 70 | sys.exit(1) 71 | 72 | try: 73 | with open(args.output, 'w') as f: 74 | writer = csv.writer(f) 75 | writer.writerow([ 76 | 'batch_size', 77 | 'trial', 78 | 'samples_per_second', 79 | 'memory_usage_bytes', 80 | ]) 81 | project_root = os.getcwd() 82 | for batch_size in args.batch_sizes: 83 | for trial in range(args.trials): 84 | session = AnalysisSession.new_from( 85 | project_root, args.entry_point) 86 | samples_per_second, memory_usage_bytes = make_measurements( 87 | session, batch_size) 88 | writer.writerow([ 89 | batch_size, 90 | trial, 91 | samples_per_second, 92 | memory_usage_bytes, 93 | ]) 94 | 95 | except AnalysisError as ex: 96 | print_analysis_error(ex) 97 | sys.exit(1) 98 | 99 | 100 | def main(args): 101 | check_skyline_preconditions(args) 102 | initialize_skyline(args) 103 | actual_main(args) 104 | -------------------------------------------------------------------------------- /deepview_profile/commands/memory.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | from deepview_profile.initialization import ( 6 | check_skyline_preconditions, 7 | initialize_skyline, 8 | ) 9 | from deepview_profile.error_printing import print_analysis_error 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def register_command(subparsers): 15 | parser = subparsers.add_parser( 16 | "memory", 17 | help="Generate a memory usage report.", 18 | ) 19 | parser.add_argument( 20 | "entry_point", 21 | help="The entry point file in this project that contains the DeepView " 22 | "provider functions.", 23 | ) 24 | parser.add_argument( 25 | "-o", "--output", 26 | help="The location where the memory report should be stored.", 27 | required=True, 28 | ) 29 | parser.add_argument( 30 | "--log-file", 31 | help="The location of the log file.", 32 | ) 33 | parser.add_argument( 34 | "--debug", action="store_true", help="Log debug messages.") 35 | parser.set_defaults(func=main) 36 | 37 | 38 | def actual_main(args): 39 | from deepview_profile.analysis.session import AnalysisSession 40 | from deepview_profile.exceptions import AnalysisError 41 | 42 | if os.path.exists(args.output): 43 | print( 44 | "ERROR: The specified output file already exists.", 45 | file=sys.stderr, 46 | ) 47 | sys.exit(1) 48 | 49 | try: 50 | project_root = os.getcwd() 51 | session = AnalysisSession.new_from( 52 | project_root, args.entry_point) 53 | session.generate_memory_usage_report( 54 | save_report_to=args.output, 55 | ) 56 | except AnalysisError as ex: 57 | print_analysis_error(ex) 58 | sys.exit(1) 59 | 60 | 61 | def main(args): 62 | check_skyline_preconditions(args) 63 | initialize_skyline(args) 64 | actual_main(args) 65 | -------------------------------------------------------------------------------- /deepview_profile/commands/prediction_models.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | import csv 5 | 6 | from deepview_profile.initialization import ( 7 | check_skyline_preconditions, 8 | initialize_skyline, 9 | ) 10 | from deepview_profile.error_printing import print_analysis_error 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def register_command(subparsers): 16 | parser = subparsers.add_parser( 17 | "prediction-models", 18 | help="Evaluate DeepView's prediction accuracy.", 19 | ) 20 | parser.add_argument( 21 | "entry_point", 22 | help="The entry point file in this project that contains the DeepView " 23 | "provider functions.", 24 | ) 25 | parser.add_argument( 26 | "-b", "--batch-sizes", 27 | help="The starting batch sizes to build models from.", 28 | type=int, 29 | nargs="+", 30 | required=True, 31 | ) 32 | parser.add_argument( 33 | "-o", "--output", 34 | help="The location where the evaluation output should be stored.", 35 | required=True, 36 | ) 37 | parser.add_argument( 38 | "--log-file", 39 | help="The location of the log file.", 40 | ) 41 | parser.add_argument( 42 | "--debug", action="store_true", help="Log debug messages.") 43 | parser.set_defaults(func=main) 44 | 45 | 46 | def get_model(session, batch_size): 47 | # This is a HACK 48 | session._batch_size = batch_size 49 | thpt_msg = session.measure_throughput() 50 | return ( 51 | (thpt_msg.peak_usage_bytes.slope, thpt_msg.peak_usage_bytes.bias), 52 | (thpt_msg.run_time_ms.slope, thpt_msg.run_time_ms.bias), 53 | ) 54 | 55 | 56 | def actual_main(args): 57 | from deepview_profile.analysis.session import AnalysisSession 58 | from deepview_profile.exceptions import AnalysisError 59 | 60 | if os.path.exists(args.output): 61 | print( 62 | "ERROR: The specified output file already exists.", 63 | file=sys.stderr, 64 | ) 65 | sys.exit(1) 66 | 67 | try: 68 | with open(args.output, 'w') as f: 69 | writer = csv.writer(f) 70 | writer.writerow([ 71 | 'batch_size', 72 | 'run_time_ms_slope', 73 | 'run_time_ms_bias', 74 | 'memory_usage_bytes_slope', 75 | 'memory_usage_bytes_bias', 76 | ]) 77 | project_root = os.getcwd() 78 | for batch_size in args.batch_sizes: 79 | session = AnalysisSession.new_from( 80 | project_root, args.entry_point) 81 | memory_model, run_time_model = get_model( 82 | session, batch_size) 83 | writer.writerow([ 84 | batch_size, *run_time_model, *memory_model, 85 | ]) 86 | 87 | except AnalysisError as ex: 88 | print_analysis_error(ex) 89 | sys.exit(1) 90 | 91 | 92 | def main(args): 93 | check_skyline_preconditions(args) 94 | initialize_skyline(args) 95 | actual_main(args) 96 | -------------------------------------------------------------------------------- /deepview_profile/commands/time.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | from deepview_profile.initialization import ( 6 | check_skyline_preconditions, 7 | initialize_skyline, 8 | ) 9 | from deepview_profile.error_printing import print_analysis_error 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def register_command(subparsers): 15 | parser = subparsers.add_parser( 16 | "time", 17 | help="Generate an iteration run time breakdown report.", 18 | ) 19 | parser.add_argument( 20 | "entry_point", 21 | help="The entry point file in this project that contains the DeepView " 22 | "provider functions.", 23 | ) 24 | parser.add_argument( 25 | "-o", "--output", 26 | help="The location where the iteration run time breakdown report " 27 | "should be stored.", 28 | required=True, 29 | ) 30 | parser.add_argument( 31 | "--log-file", 32 | help="The location of the log file.", 33 | ) 34 | parser.add_argument( 35 | "--debug", action="store_true", help="Log debug messages.") 36 | parser.set_defaults(func=main) 37 | 38 | 39 | def actual_main(args): 40 | from deepview_profile.analysis.session import AnalysisSession 41 | from deepview_profile.exceptions import AnalysisError 42 | 43 | if os.path.exists(args.output): 44 | print( 45 | "ERROR: The specified output file already exists.", 46 | file=sys.stderr, 47 | ) 48 | sys.exit(1) 49 | 50 | try: 51 | project_root = os.getcwd() 52 | session = AnalysisSession.new_from( 53 | project_root, args.entry_point) 54 | session.generate_run_time_breakdown_report( 55 | save_report_to=args.output, 56 | ) 57 | except AnalysisError as ex: 58 | print_analysis_error(ex) 59 | sys.exit(1) 60 | 61 | 62 | def main(args): 63 | check_skyline_preconditions(args) 64 | initialize_skyline(args) 65 | actual_main(args) 66 | -------------------------------------------------------------------------------- /deepview_profile/config/__init__.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | import deepview_profile.data 4 | 5 | 6 | class _Config: 7 | def __init__(self): 8 | self.Hints = None 9 | 10 | self.warm_up = 100 11 | self.measure_for = 10 12 | 13 | def initialize_hints_config(self, hints_file): 14 | if hints_file is None: 15 | file_to_open = deepview_profile.data.get_absolute_path('hints.yml') 16 | else: 17 | file_to_open = hints_file 18 | 19 | with open(file_to_open, 'r') as f: 20 | self.Hints = yaml.load(f, Loader=yaml.Loader) 21 | 22 | def parse_args(self, args): 23 | if 'hints_file' not in args: 24 | args.hints_file = None 25 | self.initialize_hints_config(args.hints_file) 26 | 27 | if 'warm_up' in args and args.warm_up is not None: 28 | self.warm_up = args.warm_up 29 | if 'measure_for' in args and args.measure_for is not None: 30 | self.measure_for = args.measure_for 31 | 32 | 33 | Config = _Config() 34 | -------------------------------------------------------------------------------- /deepview_profile/data/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | _DATA_PATH = os.path.abspath(os.path.dirname(__file__)) 4 | 5 | 6 | def get_absolute_path(data_file): 7 | return os.path.join(_DATA_PATH, data_file) 8 | -------------------------------------------------------------------------------- /deepview_profile/data/hints.yml: -------------------------------------------------------------------------------- 1 | Conv2d: 2 | in_channels: 3 | effectiveness: 'high' 4 | natural_direction: true 5 | 6 | out_channels: 7 | effectiveness: 'high' 8 | natural_direction: true 9 | 10 | kernel_size: 11 | effectiveness: 'low' 12 | natural_direction: true 13 | 14 | 15 | Linear: 16 | in_features: 17 | effectiveness: 'high' 18 | natural_direction: true 19 | 20 | out_features: 21 | effectiveness: 'high' 22 | natural_direction: true 23 | 24 | 25 | MaxPool2d: 26 | kernel_size: 27 | effectiveness: 'low' 28 | natural_direction: true 29 | 30 | stride: 31 | effectiveness: 'low' 32 | natural_direction: false 33 | 34 | 35 | BatchNorm2d: 36 | num_features: 37 | effectiveness: 'high' 38 | natural_direction: true 39 | -------------------------------------------------------------------------------- /deepview_profile/db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/db/__init__.py -------------------------------------------------------------------------------- /deepview_profile/db/database.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import sqlite3 4 | 5 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 6 | DB_PATH = os.path.join(BASE_DIR, "deepview.sqlite3") 7 | 8 | class DatabaseInterface: 9 | def __init__(self, database_name=DB_PATH) -> None: 10 | self.connection = sqlite3.connect( 11 | database_name, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES 12 | ) 13 | self.create_energy_table() 14 | 15 | def create_energy_table(self) -> None: 16 | self.connection.cursor().execute("CREATE TABLE IF NOT EXISTS ENERGY ( \ 17 | entry_point TEXT, \ 18 | cpu_component REAL, \ 19 | gpu_component REAL, \ 20 | batch_size INT, \ 21 | ts TIMESTAMP \ 22 | );") 23 | 24 | 25 | class EnergyTableInterface: 26 | def __init__(self, database_connection: sqlite3.Connection): 27 | self.database_connection: sqlite3.Connection = database_connection 28 | 29 | @staticmethod 30 | def is_valid_entry(entry: list) -> bool: 31 | ''' 32 | Validates an entry in the Energy table by testing if the length is 3, 33 | and the types match the columns. Note that timestamp is not part of the entry. 34 | Returns True if it is valid, else False 35 | ''' 36 | return len(entry) == 4 and type(entry[0]) == str and type(entry[1]) == float \ 37 | and type(entry[2]) == float and type(entry[3]) == int 38 | 39 | @staticmethod 40 | def is_valid_entry_with_timestamp(entry: list) -> bool: 41 | ''' 42 | Validates an entry in the Energy table by testing if the length is 4, 43 | and the types match the columns. Returns True if it is valid, else False 44 | ''' 45 | return len(entry) == 5 and type(entry[0]) == str and type(entry[1]) == float \ 46 | and type(entry[2]) == float and type(entry[3]) == int \ 47 | and type(entry[4]) == datetime.datetime 48 | 49 | def add_entry(self, entry: list) -> bool: 50 | ''' 51 | Validates an entry and then adds that entry into the Energy table. Note that 52 | current timestamp is added by this function. Returns False if the entry is 53 | not a valid format, or if the insertion failed. Else returns True 54 | ''' 55 | if self.is_valid_entry(entry): 56 | try: 57 | entry.append(datetime.datetime.now()) 58 | cursor = self.database_connection.cursor() 59 | cursor.execute("INSERT INTO ENERGY VALUES(?, ?, ?, ?, ?)", entry) 60 | self.database_connection.commit() 61 | return True 62 | except sqlite3.IntegrityError as e: 63 | print(e) 64 | return False 65 | else: 66 | return False 67 | 68 | def get_latest_n_entries_of_entry_point(self, n: int, entry_point: str) -> list: 69 | ''' 70 | Gets the n latest entries of a given entry point 71 | ''' 72 | params = [entry_point, n] 73 | cursor = self.database_connection.cursor() 74 | results = cursor.execute( 75 | "SELECT * FROM ENERGY WHERE entry_point=? ORDER BY ts DESC LIMIT ?;", 76 | params 77 | ).fetchall() 78 | return results 79 | -------------------------------------------------------------------------------- /deepview_profile/energy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/energy/__init__.py -------------------------------------------------------------------------------- /deepview_profile/energy/measurer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from threading import Thread 3 | import numpy as np 4 | 5 | import pynvml as N 6 | from pyRAPL import Sensor 7 | 8 | class CPUMeasurer: 9 | def __init__(self, interval): 10 | self.interval = interval 11 | self.power = [] 12 | self.last_cpu = None 13 | self.last_dram = None 14 | 15 | def measurer_init(self): 16 | self.sensor = None 17 | try: 18 | self.sensor = Sensor() 19 | energy = self.sensor.energy() 20 | self.last_cpu = np.array(energy[0::2]) 21 | self.last_dram = np.array(energy[1::2]) 22 | except Exception: 23 | print("Warning. Failed to get CPU energy. \ 24 | You need to set the right permissions for pyRAPL") 25 | print("eg. $ sudo chmod -R a+r /sys/class/powercap/intel-rapl") 26 | 27 | def measurer_measure(self): 28 | # Get energy consumed so far (since last CPU reset) 29 | if self.sensor is None: 30 | return 31 | 32 | energy = self.sensor.energy() 33 | cpu = np.array(energy[0::2]) 34 | dram = np.array(energy[1::2]) 35 | 36 | # Compare against last measurement to determine energy since last measure 37 | diff_cpu = cpu - self.last_cpu 38 | dram - self.last_dram 39 | 40 | # 1J = 10^6 uJ 41 | # The cpu used this much since the last measurement 42 | # We have mW = 1000*J/s = 1000*(uJ/10^6)/s 43 | cpu_total = np.sum(diff_cpu) 44 | cpu_mW = 1000 * (cpu_total / 1e6) / self.interval 45 | self.power.append(cpu_mW) 46 | 47 | self.last_cpu = cpu 48 | self.last_dram = dram 49 | 50 | def measurer_deallocate(self): 51 | pass 52 | 53 | def total_energy(self): 54 | if len(self.power) == 0: 55 | return None 56 | 57 | # J = W * s, 1W = 1000 mW 58 | energy = self.interval * sum(self.power) / 1000.0 59 | return energy 60 | 61 | class GPUMeasurer: 62 | def __init__(self, interval): 63 | self.interval = interval 64 | self.power = [] 65 | 66 | def measurer_init(self): 67 | N.nvmlInit() 68 | self.device_handle = N.nvmlDeviceGetHandleByIndex(0) 69 | 70 | def measurer_measure(self): 71 | power = N.nvmlDeviceGetPowerUsage(self.device_handle) 72 | self.power.append(power) 73 | 74 | def measurer_deallocate(self): 75 | N.nvmlShutdown() 76 | 77 | def total_energy(self): 78 | # J = W * s, 1W = 1000 mW 79 | energy = self.interval * sum(self.power) / 1000.0 80 | return energy 81 | 82 | class EnergyMeasurer: 83 | def __init__(self): 84 | self.sleep_interval = 0.1 85 | self.measuring = False 86 | self.measure_thread = None 87 | 88 | self.measurers = { 89 | "cpu": CPUMeasurer(self.sleep_interval), 90 | "gpu": GPUMeasurer(self.sleep_interval), 91 | } 92 | 93 | def run_measure(self): 94 | # Initialize 95 | for m in self.measurers: 96 | self.measurers[m].measurer_init() 97 | 98 | # Run measurement loop 99 | while self.measuring: 100 | for m in self.measurers: 101 | self.measurers[m].measurer_measure() 102 | time.sleep(self.sleep_interval) 103 | 104 | # Cleanup 105 | for m in self.measurers: 106 | self.measurers[m].measurer_deallocate() 107 | 108 | def begin_measurement(self): 109 | assert(self.measure_thread is None) 110 | self.measure_thread = Thread(target=self.run_measure) 111 | self.measuring = True 112 | self.measure_thread.start() 113 | 114 | def end_measurement(self): 115 | self.measuring = False 116 | self.measure_thread.join() 117 | self.measure_thread = None 118 | 119 | def total_energy(self): 120 | total_energy = 0. 121 | for m in self.measurers: 122 | e = self.measurers[m].total_energy() 123 | if e is not None: 124 | total_energy += e 125 | return total_energy 126 | 127 | def cpu_energy(self): 128 | return self.measurers["cpu"].total_energy() 129 | 130 | def gpu_energy(self): 131 | return self.measurers["gpu"].total_energy() 132 | -------------------------------------------------------------------------------- /deepview_profile/error_printing.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | def print_analysis_error(error, file=sys.stderr): 5 | print( 6 | "DeepView encountered an error when profiling your model:", 7 | file=file, 8 | ) 9 | print("->", str(error), file=file) 10 | 11 | if error.file_context is not None: 12 | if error.file_context.line_number is not None: 13 | message = ( 14 | "This error occurred on line {} when processing {}.".format( 15 | error.file_context.line_number, 16 | error.file_context.file_path, 17 | ) 18 | ) 19 | else: 20 | message = "This error occurred when processing {}.".format( 21 | error.file_context.file_path, 22 | ) 23 | print("->", message, file=file) 24 | -------------------------------------------------------------------------------- /deepview_profile/evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | import deepview_profile.commands.measurements 5 | import deepview_profile.commands.prediction_models 6 | 7 | 8 | def main(): 9 | parser = argparse.ArgumentParser( 10 | prog="deepview-evaluate", 11 | description="DeepView Evaluation Tool", 12 | ) 13 | subparsers = parser.add_subparsers(title="Commands") 14 | deepview_profile.commands.measurements.register_command(subparsers) 15 | deepview_profile.commands.prediction_models.register_command(subparsers) 16 | args = parser.parse_args() 17 | 18 | if 'func' not in args: 19 | parser.print_help() 20 | sys.exit(1) 21 | 22 | # Run the specified command 23 | args.func(args) 24 | 25 | 26 | if __name__ == '__main__': 27 | main() 28 | -------------------------------------------------------------------------------- /deepview_profile/exceptions.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import contextlib 3 | import os 4 | import traceback 5 | import logging 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | FileContext = collections.namedtuple( 10 | 'FileContext', 11 | ['file_path', 'line_number'], 12 | ) 13 | 14 | 15 | class AnalysisError(RuntimeError): 16 | def __init__(self, message, exception_type=None): 17 | if exception_type is None: 18 | super(AnalysisError, self).__init__(message) 19 | else: 20 | super(AnalysisError, self).__init__( 21 | '{}: {}'.format(exception_type.__name__, message)) 22 | 23 | self.file_context = None 24 | 25 | def with_file_context(self, file_path, line_number=None): 26 | self.file_context = FileContext( 27 | file_path=file_path, 28 | line_number=line_number, 29 | ) 30 | return self 31 | 32 | 33 | class NoConnectionError(Exception): 34 | def __init__(self, message): 35 | super().__init__(message) 36 | 37 | 38 | class _SuspendExecution(Exception): 39 | # This exception is used internally by the BackwardInterceptor to return 40 | # early from the user's code. 41 | pass 42 | 43 | 44 | @contextlib.contextmanager 45 | def exceptions_as_analysis_errors(project_root): 46 | try: 47 | yield 48 | except _SuspendExecution: 49 | # A _SuspendExecution exception does not indicate an error - we use it 50 | # to return early from the user's code. 51 | pass 52 | except AnalysisError: 53 | # The user's code may raise an AnalysisError (e.g., from the wrapped 54 | # providers). If this happens, we should pass the exception through. 55 | raise 56 | except Exception as ex: 57 | logger.debug( 58 | "An error occured during analysis (could be a problem with the " 59 | "user's code):", 60 | exc_info=ex, 61 | ) 62 | if isinstance(ex, SyntaxError): 63 | error = AnalysisError( 64 | 'DeepView encountered a syntax error while profiling your ' 65 | 'model.' 66 | ) 67 | else: 68 | error = AnalysisError(str(ex), type(ex)) 69 | 70 | # Extract the relevant file context, if it is available, starting by 71 | # inspecting the exception itself. 72 | if hasattr(ex, 'filename') and ex.filename.startswith(project_root): 73 | _add_context_to_error( 74 | error, project_root, ex.filename, getattr(ex, 'lineno', None)) 75 | else: 76 | stack = traceback.extract_tb(ex.__traceback__) 77 | for frame in reversed(stack): 78 | if frame.filename.startswith(project_root): 79 | _add_context_to_error( 80 | error, project_root, frame.filename, frame.lineno) 81 | break 82 | 83 | # Special case: Add a more detailed error message when there's an 84 | # input number mismatch. 85 | if (error.file_context is None and 86 | str(error).startswith("TypeError: forward() takes")): 87 | error = AnalysisError( 88 | "{}. This error could be due to a mismatch between the number " 89 | "of inputs that your model expects and the number of inputs " 90 | "that your input provider returns.".format(str(error)) 91 | ) 92 | 93 | raise error 94 | 95 | 96 | def _add_context_to_error(error, project_root, file_path, line_number): 97 | error.with_file_context( 98 | file_path=os.path.relpath(file_path, start=project_root), 99 | line_number=line_number, 100 | ) 101 | -------------------------------------------------------------------------------- /deepview_profile/initialization.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | def check_skyline_preconditions(args): 8 | """ 9 | This is the first function that should run before importing any other 10 | DeepView code. 11 | """ 12 | _configure_logging(args) 13 | if not _validate_dependencies(): 14 | sys.exit(1) 15 | if not _validate_gpu(): 16 | sys.exit(1) 17 | 18 | 19 | def initialize_skyline(args): 20 | """ 21 | Performs common initialization tasks. 22 | """ 23 | from deepview_profile.config import Config 24 | 25 | Config.parse_args(args) 26 | 27 | def _configure_logging(args): 28 | kwargs = { 29 | "format": "%(asctime)s %(levelname)-8s %(message)s", 30 | "datefmt": "%Y-%m-%d %H:%M", 31 | "level": logging.DEBUG if args.debug else logging.INFO, 32 | } 33 | 34 | if args.log_file is not None: 35 | kwargs["filename"] = args.log_file 36 | 37 | logging.basicConfig(**kwargs) 38 | 39 | 40 | def _validate_dependencies(): 41 | # NOTE: If you make a change here, make sure to update the INSTALL_REQUIRES 42 | # list in setup.py as well. 43 | try: 44 | import yaml # pyyaml on PyPI # noqa: F401 45 | import pynvml # nvidia-ml-py3 on PyPI # noqa: F401 46 | import google.protobuf # protobuf on PyPI # noqa: F401 47 | import numpy # noqa: F401 48 | import torch # noqa: F401 49 | return True 50 | except ImportError as ex: 51 | logger.error( 52 | "DeepView could not find the '%s' module, which is a required " 53 | "dependency. Please make sure all the required dependencies are " 54 | "installed before launching DeepView. If you use a package " 55 | "manager, these dependencies will be automatically installed for " 56 | "you.", 57 | ex.name, 58 | ) 59 | return False 60 | 61 | 62 | def _validate_gpu(): 63 | import torch 64 | if not torch.cuda.is_available(): 65 | logger.error( 66 | "DeepView did not detect a GPU on this machine. DeepView only " 67 | "profiles deep learning workloads on GPUs." 68 | ) 69 | return False 70 | return True 71 | -------------------------------------------------------------------------------- /deepview_profile/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/io/__init__.py -------------------------------------------------------------------------------- /deepview_profile/io/connection.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import select 3 | import struct 4 | from threading import Thread 5 | 6 | from deepview_profile.io.sentinel import Sentinel 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class Connection: 12 | """ 13 | Manages an open connection to a client. 14 | 15 | This class must be constructed with an already-connected 16 | socket. Upon receipt of a message on the socket, the 17 | handler_function will be called with the raw message. 18 | 19 | Socket communication is performed using length-prefixed 20 | binary protobuf messages. 21 | 22 | The stop function must be called to close the connection. 23 | """ 24 | def __init__(self, socket, address, handler_function, closed_handler): 25 | self.address = address 26 | self._socket = socket 27 | self._reader = Thread(target=self._socket_read) 28 | self._handler_function = handler_function 29 | self._closed_handler = closed_handler 30 | self._sentinel = Sentinel() 31 | self._project_root = "" 32 | self._entry_point = "" 33 | 34 | def start(self): 35 | self._sentinel.start() 36 | self._reader.start() 37 | 38 | def stop(self): 39 | self._sentinel.signal_exit() 40 | self._reader.join() 41 | self._socket.close() 42 | self._sentinel.stop() 43 | 44 | def send_bytes(self, raw_bytes): 45 | self._socket.sendall(struct.pack('!I', len(raw_bytes))) 46 | self._socket.sendall(raw_bytes) 47 | 48 | def _socket_read(self): 49 | buffer = b'' 50 | message_length = -1 51 | 52 | try: 53 | while True: 54 | read_ready, _, _ = select.select([ 55 | self._socket, self._sentinel.read_pipe], [], []) 56 | if self._sentinel.should_exit(read_ready): 57 | logger.debug( 58 | "Connection (%s:%d) is being closed.", 59 | *self.address, 60 | ) 61 | self._sentinel.consume_exit_signal() 62 | break 63 | 64 | data = self._socket.recv(4096) 65 | if len(data) == 0: 66 | logger.debug( 67 | "Connection (%s:%d) has been closed by the client.", 68 | *self.address, 69 | ) 70 | self._closed_handler(self.address) 71 | break 72 | 73 | buffer += data 74 | 75 | # Process all messages that exist in the buffer 76 | while True: 77 | if message_length <= 0: 78 | if len(buffer) < 4: 79 | break 80 | # Network byte order 32-bit unsigned integer 81 | message_length = struct.unpack('!I', buffer[:4])[0] 82 | buffer = buffer[4:] 83 | 84 | if len(buffer) < message_length: 85 | break 86 | 87 | try: 88 | self._handler_function( 89 | buffer[:message_length], self.address) 90 | finally: 91 | buffer = buffer[message_length:] 92 | message_length = -1 93 | 94 | except Exception: 95 | logger.exception("Connection unexpectedly stopping...") 96 | 97 | @property 98 | def project_root(self): 99 | return self._project_root 100 | 101 | @property 102 | def entry_point(self): 103 | return self._entry_point 104 | 105 | def set_project_paths(self, project_root, entry_point): 106 | self._project_root = project_root 107 | self._entry_point = entry_point 108 | 109 | class ConnectionState: 110 | def __init__(self): 111 | # NOTE: This counter is modified by a thread in the main executor, but 112 | # will be read by other threads. No R/W lock is needed because of 113 | # the Python GIL. 114 | # 115 | # NOTE: The sequence number from the client must be non-negative 116 | self.sequence_number = -1 117 | 118 | # Connections have two states: uninitialized and "ready" (initialized) 119 | # As a result for simplicity, we use a boolean to represent the state. 120 | self.initialized = False 121 | 122 | # The plugin may disconnect from us while we are processing a request. 123 | # We use this flag to indicate whether the connection still "exists" 124 | # to allow requests to abort early. 125 | self.connected = True 126 | 127 | def update_sequence(self, request): 128 | if request.sequence_number <= self.sequence_number: 129 | return 130 | self.sequence_number = request.sequence_number 131 | 132 | def is_request_current(self, request): 133 | return request.sequence_number >= self.sequence_number 134 | -------------------------------------------------------------------------------- /deepview_profile/io/connection_acceptor.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import select 3 | import socket 4 | from threading import Thread 5 | 6 | from deepview_profile.io.sentinel import Sentinel 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class ConnectionAcceptor: 12 | """ 13 | Manages the "server socket" for the agent, allowing it to accept 14 | connection requests from other agents. 15 | 16 | Each time a connection is received, the handler_function is called 17 | with the new socket and address. 18 | """ 19 | def __init__(self, host, port, handler_function): 20 | self._host = host 21 | self._port = port 22 | self._server_socket = socket.socket( 23 | socket.AF_INET, 24 | socket.SOCK_STREAM, 25 | ) 26 | self._server_socket.setsockopt( 27 | socket.SOL_SOCKET, 28 | socket.SO_REUSEADDR, 29 | 1, 30 | ) 31 | self._handler_function = handler_function 32 | self._acceptor = Thread(target=self._accept_connections) 33 | self._sentinel = Sentinel() 34 | 35 | def start(self): 36 | self._server_socket.bind((self._host, self._port)) 37 | self._port = self._server_socket.getsockname()[1] 38 | self._server_socket.listen() 39 | self._sentinel.start() 40 | self._acceptor.start() 41 | logger.debug( 42 | "DeepView is listening for connections on (%s:%d).", 43 | self._host, 44 | self._port, 45 | ) 46 | 47 | def stop(self): 48 | self._sentinel.signal_exit() 49 | self._acceptor.join() 50 | self._server_socket.close() 51 | self._sentinel.stop() 52 | logging.debug( 53 | "DeepView has stopped listening for connections on (%s:%d).", 54 | self._host, 55 | self._port, 56 | ) 57 | 58 | @property 59 | def host(self): 60 | return self._host 61 | 62 | @property 63 | def port(self): 64 | return self._port 65 | 66 | def _accept_connections(self): 67 | try: 68 | while True: 69 | read_ready, _, _ = select.select( 70 | [self._server_socket, self._sentinel.read_pipe], [], []) 71 | 72 | if self._sentinel.should_exit(read_ready): 73 | self._sentinel.consume_exit_signal() 74 | break 75 | 76 | socket, address = self._server_socket.accept() 77 | host, port = address 78 | logger.debug("Accepted a connection to (%s:%d).", host, port) 79 | self._handler_function(socket, address) 80 | except Exception: 81 | logging.exception( 82 | "DeepView has unexpectedly stopped accepting connections.") 83 | -------------------------------------------------------------------------------- /deepview_profile/io/connection_manager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import socket 3 | 4 | from deepview_profile.io.connection import Connection, ConnectionState 5 | from deepview_profile.exceptions import NoConnectionError 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class ConnectionManager: 11 | def __init__(self, message_handler, closed_handler): 12 | self._connections = {} 13 | self._message_handler = message_handler 14 | self._closed_handler = closed_handler 15 | 16 | def register_connection(self, opened_socket, address): 17 | self._connections[address] = ( 18 | Connection( 19 | opened_socket, 20 | address, 21 | self._message_handler, 22 | self._closed_handler, 23 | ), 24 | ConnectionState(), 25 | ) 26 | self._connections[address][0].start() 27 | 28 | def remove_connection(self, address): 29 | connection, state = self.get_connection_tuple(address) 30 | connection.stop() 31 | state.connected = False 32 | del self._connections[address] 33 | logger.debug("Removed connection to (%s:%d).", *address) 34 | 35 | def get_connection(self, address): 36 | return self.get_connection_tuple(address)[0] 37 | 38 | def get_connection_state(self, address): 39 | return self.get_connection_tuple(address)[1] 40 | 41 | def get_connection_tuple(self, address): 42 | if address not in self._connections: 43 | host, port = address 44 | raise NoConnectionError( 45 | "Connection to ({}:{}) does not exist.".format(host, port)) 46 | return self._connections[address] 47 | 48 | def broadcast(self, string_message): 49 | for _, (connection, _) in self._connections.items(): 50 | connection.write_string_message(string_message) 51 | 52 | def connect_to(self, host, port): 53 | new_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 54 | new_socket.connect((host, port)) 55 | self.register_connection(new_socket, (host, port)) 56 | 57 | def stop(self): 58 | for _, (connection, state) in self._connections.items(): 59 | connection.stop() 60 | state.connected = False 61 | self._connections.clear() 62 | -------------------------------------------------------------------------------- /deepview_profile/io/sentinel.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class Sentinel: 5 | def __init__(self): 6 | self._read_pipe = None 7 | self._write_pipe = None 8 | 9 | def start(self): 10 | self._read_pipe, self._write_pipe = os.pipe() 11 | 12 | def stop(self): 13 | os.close(self._write_pipe) 14 | os.close(self._read_pipe) 15 | self._read_pipe = None 16 | self._write_pipe = None 17 | 18 | @property 19 | def read_pipe(self): 20 | return self._read_pipe 21 | 22 | def should_exit(self, ready_descriptors): 23 | return self._read_pipe in ready_descriptors 24 | 25 | def signal_exit(self): 26 | os.write(self._write_pipe, b'\0') 27 | 28 | def consume_exit_signal(self): 29 | # This should only be called after signal_exit(), 30 | # otherwise the calling thread will block. 31 | os.read(self._read_pipe, 1) 32 | -------------------------------------------------------------------------------- /deepview_profile/lru_cache.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class LRUCache: 4 | def __init__(self, max_size=128): 5 | self._max_size = max_size 6 | self._cache_by_key = {} 7 | self._cache_by_use = _LRUCacheList() 8 | 9 | def query(self, key): 10 | if key not in self._cache_by_key: 11 | return None 12 | node = self._cache_by_key[key] 13 | self._cache_by_use.move_to_front(node) 14 | return node.value 15 | 16 | def add(self, key, value): 17 | if self._cache_by_use.size >= self._max_size: 18 | removed = self._cache_by_use.remove_back() 19 | del self._cache_by_key[removed.key] 20 | node = self._cache_by_use.add_to_front(key, value) 21 | self._cache_by_key[key] = node 22 | 23 | 24 | class _LRUCacheList: 25 | def __init__(self): 26 | # Front of the list: most recently used 27 | self.front = None 28 | self.back = None 29 | self.size = 0 30 | 31 | def add_to_front(self, key, value): 32 | node = _LRUCacheNode(key, value) 33 | self._add_to_front(node) 34 | self.size += 1 35 | return node 36 | 37 | def _add_to_front(self, node): 38 | if self.size == 0: 39 | self.front = node 40 | self.back = node 41 | else: 42 | node.next = self.front 43 | self.front.prev = node 44 | self.front = node 45 | 46 | def move_to_front(self, node): 47 | if self.front == node: 48 | # Nothing needs to be done if the node is already at the front of 49 | # the list 50 | return 51 | 52 | if node.next is None: 53 | # Back of the list 54 | node.prev.next = None 55 | self.back = node.prev 56 | node.prev = None 57 | else: 58 | # Middle of the list 59 | node.prev.next = node.next 60 | node.next.prev = node.prev 61 | node.next = None 62 | node.prev = None 63 | 64 | self._add_to_front(node) 65 | 66 | def remove_back(self): 67 | if self.size == 0: 68 | return None 69 | 70 | node = self.back 71 | 72 | if self.size == 1: 73 | self.front = None 74 | self.back = None 75 | else: 76 | node.prev.next = None 77 | self.back = node.prev 78 | 79 | self.size -= 1 80 | return node 81 | 82 | 83 | class _LRUCacheNode: 84 | def __init__(self, key, value): 85 | self.key = key 86 | self.value = value 87 | self.prev = None 88 | self.next = None 89 | -------------------------------------------------------------------------------- /deepview_profile/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/models/__init__.py -------------------------------------------------------------------------------- /deepview_profile/models/analysis.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class OperationInfo: 4 | def __init__(self, bound_name, op_name, ast_node, position, perf_hints): 5 | self.bound_name = bound_name 6 | self.op_name = op_name 7 | self.ast_node = ast_node 8 | self.position = position 9 | self.perf_hints = perf_hints 10 | self.usages = [] 11 | self.runtime_us = 0 12 | 13 | def set_usages(self, usages): 14 | self.usages = usages 15 | 16 | def add_to_runtime_us(self, runtime_us): 17 | self.runtime_us += runtime_us 18 | 19 | 20 | class OperationInfoMap: 21 | def __init__(self): 22 | self.operations = {} 23 | 24 | def add_operation_info(self, operation): 25 | self.operations[operation.bound_name] = operation 26 | 27 | def get_operation_info_by_bound_name(self, bound_name): 28 | if bound_name not in self.operations: 29 | return None 30 | return self.operations[bound_name] 31 | 32 | def get_operations(self): 33 | return self.operations.values() 34 | 35 | def set_runtimes_from_cache(self, cached_info_map): 36 | """ 37 | Used to set the runtimes from cache for when the parsed code has not 38 | changed. 39 | """ 40 | for bound_name, op_info in self.operations.items(): 41 | cached_op_info = cached_info_map.get_operation_info_by_bound_name( 42 | bound_name) 43 | op_info.runtime_us = cached_op_info.runtime_us 44 | 45 | 46 | class AnnotationInfo: 47 | def __init__(self, input_size, start_position, end_position): 48 | self.input_size = input_size 49 | self.start_position = start_position 50 | self.end_position = end_position 51 | 52 | 53 | class PerformanceHint: 54 | def __init__(self, keyword, position, effectiveness, natural_direction): 55 | self.keyword = keyword 56 | self.position = position 57 | self.effectiveness = effectiveness 58 | self.natural_direction = natural_direction 59 | 60 | 61 | class LinearModel: 62 | def __init__(self, coefficient, bias): 63 | self.coefficient = coefficient 64 | self.bias = bias 65 | 66 | def __repr__(self): 67 | return 'LinearModel(coefficient={:.4f}, bias={:.4f})'.format( 68 | self.coefficient, self.bias) 69 | 70 | def evaluate(self, x): 71 | return self.coefficient * x + self.bias 72 | 73 | def inverse(self, y): 74 | return (y - self.bias) / self.coefficient 75 | 76 | 77 | class MemoryInfo: 78 | def __init__(self, usage_model_mb, usage_mb, max_capacity_mb): 79 | self.usage_model_mb = usage_model_mb 80 | self.usage_mb = usage_mb 81 | self.max_capacity_mb = max_capacity_mb 82 | 83 | def __repr__(self): 84 | return 'MemoryInfo(model={}, usage_mb={}, capacity_mb={})'.format( 85 | self.usage_model_mb, self.usage_mb, self.max_capacity_mb) 86 | 87 | 88 | class ThroughputInfo: 89 | def __init__( 90 | self, 91 | throughput, 92 | max_throughput, 93 | runtime_model_ms 94 | ): 95 | self.throughput = throughput 96 | self.max_throughput = max_throughput 97 | self.runtime_model_ms = runtime_model_ms 98 | 99 | def __repr__(self): 100 | return ( 101 | 'ThroughputInfo(thpt={}, max_thpt={}, model={})' 102 | .format( 103 | self.throughput, 104 | self.max_throughput, 105 | self.runtime_model_ms, 106 | ) 107 | ) 108 | 109 | def batch_from_throughput(self, throughput): 110 | # Thpt = batch / runtime_model 111 | throughput_ms = throughput / 1000 112 | return ( 113 | (throughput_ms * self.runtime_model_ms.bias) / 114 | (1 - throughput_ms * self.runtime_model_ms.coefficient) 115 | ) 116 | 117 | 118 | class PerformanceLimits: 119 | def __init__(self, max_batch_size, throughput_limit): 120 | self.max_batch_size = max_batch_size 121 | self.throughput_limit = throughput_limit 122 | 123 | def __repr__(self): 124 | return ( 125 | 'PerformanceLimits(max_batch={:.2f}, thpt_limit={:.2f})'.format( 126 | self.max_batch_size, 127 | self.throughput_limit, 128 | ) 129 | ) 130 | -------------------------------------------------------------------------------- /deepview_profile/models/source_map.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Position: 4 | def __init__(self, line, column): 5 | self.line = line 6 | self.column = column 7 | 8 | def offset(self, length): 9 | return Position(self.line, self.column + length) 10 | 11 | 12 | class SourceMap: 13 | def __init__(self, source_code): 14 | self._source_by_line = source_code.splitlines() 15 | 16 | def find_position(self, snippet, line_offset=0): 17 | for offset, line in enumerate(self._source_by_line[line_offset:]): 18 | index = line.find(snippet) 19 | if index == -1: 20 | continue 21 | # NOTE: We don't add 1 here to make the line number 0-based 22 | return Position(line_offset + offset, index) 23 | 24 | return None 25 | 26 | def find_position_on_line(self, snippet, offset_position): 27 | if offset_position.line >= len(self._source_by_line): 28 | return None 29 | index = self._source_by_line[offset_position.line].find( 30 | snippet, offset_position.column) 31 | 32 | if index == -1: 33 | return None 34 | else: 35 | return Position(offset_position.line, index) 36 | -------------------------------------------------------------------------------- /deepview_profile/nvml.py: -------------------------------------------------------------------------------- 1 | import pynvml 2 | 3 | 4 | class NVML: 5 | def __enter__(self): 6 | self.start() 7 | return self 8 | 9 | def __exit__(self, exc_type, exc_value, traceback): 10 | self.stop() 11 | 12 | def start(self): 13 | pynvml.nvmlInit() 14 | 15 | def stop(self): 16 | pynvml.nvmlShutdown() 17 | 18 | def get_memory_capacity(self): 19 | # TODO: Support multiple devices 20 | handle = pynvml.nvmlDeviceGetHandleByIndex(0) 21 | return pynvml.nvmlDeviceGetMemoryInfo(handle) 22 | 23 | def get_device_names(self): 24 | device_names = [] 25 | for i in range(pynvml.nvmlDeviceGetCount()): 26 | handle = pynvml.nvmlDeviceGetHandleByIndex(i) 27 | device_name = pynvml.nvmlDeviceGetName(handle).decode("utf-8") 28 | device_names.append(device_name) 29 | return device_names 30 | 31 | -------------------------------------------------------------------------------- /deepview_profile/pl/deepview_callback.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Tuple 2 | 3 | import time 4 | import os 5 | import json 6 | import torch 7 | import sys 8 | 9 | try: 10 | import pytorch_lightning as pl 11 | except ImportError: 12 | sys.exit("Please install pytorch-lightning:\nuse: pip install lightning\nExiting...") 13 | 14 | from termcolor import colored 15 | from deepview_profile.pl.deepview_interface import trigger_profiling 16 | 17 | 18 | class DeepViewProfilerCallback(pl.Callback): 19 | def __init__(self, profile_name: str): 20 | super().__init__() 21 | self.profiling_triggered = False 22 | self.output_filename = f"{profile_name}_{int(time.time())}.json" 23 | 24 | def on_train_batch_end( 25 | self, 26 | trainer: pl.Trainer, 27 | pl_module: pl.LightningModule, 28 | outputs, 29 | batch, 30 | batch_idx, 31 | ): 32 | 33 | # only do this once 34 | if self.profiling_triggered: 35 | return 36 | 37 | print(colored("DeepViewProfiler: Running profiling.", "green")) 38 | 39 | """ 40 | need 3 things: 41 | 42 | input_provider: just return batch 43 | model_provider: just return pl_module 44 | iteration_provider: a lambda function that (a) calls pl_module.forward_step and (b) calls loss.backward 45 | """ 46 | initial_batch_size = batch[0].shape[0] 47 | 48 | def input_provider(batch_size: int = initial_batch_size) -> Tuple: 49 | model_inputs = list() 50 | for elem in batch: 51 | # we assume the first dimension is the batch dimension 52 | model_inputs.append( 53 | elem[:1].repeat([batch_size] + [1 for _ in elem.shape[1:]]) 54 | ) 55 | return (tuple(model_inputs), 0) 56 | 57 | model_provider = lambda: pl_module 58 | 59 | def iteration_provider(module: torch.nn.Module) -> Callable: 60 | def iteration(*args, **kwargs): 61 | loss = module.training_step(*args, **kwargs) 62 | loss.backward() 63 | 64 | return iteration 65 | 66 | project_root = os.getcwd() 67 | 68 | output = trigger_profiling( 69 | project_root, 70 | "entry_point.py", 71 | initial_batch_size, 72 | input_provider, 73 | model_provider, 74 | iteration_provider, 75 | ) 76 | 77 | with open(self.output_filename, "w") as fp: 78 | json.dump(output, fp, indent=4) 79 | 80 | print( 81 | colored( 82 | f"DeepViewProfiler: Profiling complete! Report written to ", "green" 83 | ) 84 | + colored(self.output_filename, "green", attrs=["bold"]) 85 | ) 86 | print( 87 | colored( 88 | f"DeepViewProfiler: View your report at https://deepview.centml.ai", 89 | "green", 90 | ) 91 | ) 92 | self.profiling_triggered = True 93 | -------------------------------------------------------------------------------- /deepview_profile/pl/deepview_interface.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import Callable 3 | import platform 4 | 5 | from deepview_profile.analysis.session import AnalysisSession 6 | from deepview_profile.exceptions import AnalysisError 7 | from deepview_profile.nvml import NVML 8 | 9 | # from deepview_profile.utils import release_memory, next_message_to_dict, files_encoded_unique 10 | from deepview_profile.utils import release_memory, files_encoded_unique 11 | from deepview_profile.error_printing import print_analysis_error 12 | 13 | from google.protobuf.json_format import MessageToDict 14 | 15 | 16 | def measure_breakdown(session, nvml): 17 | print("analysis: running measure_breakdown()") 18 | yield session.measure_breakdown(nvml) 19 | release_memory() 20 | 21 | 22 | def measure_throughput(session): 23 | print("analysis: running measure_throughput()") 24 | yield session.measure_throughput() 25 | release_memory() 26 | 27 | 28 | def habitat_predict(session): 29 | print("analysis: running deepview_predict()") 30 | yield session.habitat_predict() 31 | release_memory() 32 | 33 | 34 | def measure_utilization(session): 35 | print("analysis: running measure_utilization()") 36 | yield session.measure_utilization() 37 | release_memory() 38 | 39 | 40 | def energy_compute(session): 41 | print("analysis: running energy_compute()") 42 | yield session.energy_compute() 43 | release_memory() 44 | 45 | 46 | def ddp_analysis(session): 47 | print("analysis: running ddp_computation()") 48 | yield session.ddp_computation() 49 | release_memory() 50 | 51 | 52 | def hardware_information(nvml): 53 | hardware_info = { 54 | "hostname": platform.node(), 55 | "os": " ".join(list(platform.uname())), 56 | "gpus": nvml.get_device_names(), 57 | } 58 | return hardware_info 59 | 60 | 61 | class DummyStaticAnalyzer: 62 | def batch_size_location(self): 63 | return None 64 | 65 | 66 | def next_message_to_dict(a): 67 | message = next(a) 68 | return MessageToDict(message, preserving_proto_field_name=True) 69 | 70 | 71 | def trigger_profiling( 72 | project_root: str, 73 | entry_point: str, 74 | initial_batch_size: int, 75 | input_provider: Callable, 76 | model_provider: Callable, 77 | iteration_provider: Callable, 78 | ): 79 | try: 80 | data = { 81 | "analysis": { 82 | "message_type": "analysis", 83 | "project_root": project_root, 84 | "project_entry_point": entry_point, 85 | "hardware_info": {}, 86 | "throughput": {}, 87 | "breakdown": {}, 88 | "habitat": {}, 89 | "additionalProviders": "", 90 | "energy": {}, 91 | "utilization": {}, 92 | "ddp": {}, 93 | }, 94 | "epochs": 50, 95 | "iterations": 1000, 96 | "encodedFiles": [], 97 | } 98 | 99 | session = AnalysisSession( 100 | project_root, 101 | entry_point, 102 | project_root, 103 | model_provider, 104 | input_provider, 105 | iteration_provider, 106 | initial_batch_size, 107 | DummyStaticAnalyzer(), 108 | ) 109 | release_memory() 110 | 111 | exclude_source = False 112 | 113 | with NVML() as nvml: 114 | data["analysis"]["hardware_info"] = hardware_information(nvml) 115 | data["analysis"]["breakdown"] = next_message_to_dict( 116 | measure_breakdown(session, nvml) 117 | ) 118 | 119 | operation_tree = data["analysis"]["breakdown"]["operation_tree"] 120 | if not exclude_source and operation_tree is not None: 121 | data["encodedFiles"] = files_encoded_unique(operation_tree) 122 | 123 | data["analysis"]["throughput"] = next_message_to_dict( 124 | measure_throughput(session) 125 | ) 126 | data["analysis"]["habitat"] = next_message_to_dict(habitat_predict(session)) 127 | data["analysis"]["utilization"] = next_message_to_dict( 128 | measure_utilization(session) 129 | ) 130 | data["analysis"]["energy"] = next_message_to_dict(energy_compute(session)) 131 | # data['analysis']['ddp'] = next_message_to_dict(ddp_analysis(session)) 132 | 133 | from deepview_profile.export_converter import convert 134 | 135 | data["analysis"] = convert(data["analysis"]) 136 | 137 | return data 138 | 139 | except AnalysisError as ex: 140 | print_analysis_error(ex) 141 | sys.exit(1) 142 | -------------------------------------------------------------------------------- /deepview_profile/profiler/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from deepview_profile.exceptions import AnalysisError 4 | from deepview_profile.models.analysis import PerformanceLimits 5 | 6 | 7 | def to_trainable_model(parse_tree, class_name): 8 | try: 9 | executable = compile(parse_tree, '', 'exec') 10 | scope = {} 11 | exec(executable, scope, scope) 12 | model = scope[class_name]().to(torch.device('cuda')) 13 | model.train() 14 | return model 15 | except Exception as ex: 16 | raise AnalysisError(str(ex), type(ex)) 17 | 18 | 19 | def get_performance_limits(memory_info, throughput_info): 20 | max_capacity_batch_size = memory_info.usage_model_mb.inverse( 21 | memory_info.max_capacity_mb) 22 | max_capacity_throughput = ( 23 | max_capacity_batch_size / 24 | throughput_info.runtime_model_ms.evaluate(max_capacity_batch_size) * 25 | 1000 26 | ) 27 | max_throughput_batch_size = throughput_info.batch_from_throughput( 28 | throughput_info.max_throughput) 29 | 30 | thpt_limits = (max_throughput_batch_size, throughput_info.max_throughput) 31 | mem_limits = (max_capacity_batch_size, max_capacity_throughput) 32 | 33 | limits = min(thpt_limits, mem_limits, key=lambda tup: tup[0]) 34 | 35 | return PerformanceLimits( 36 | max_batch_size=limits[0], 37 | throughput_limit=limits[1], 38 | ) 39 | -------------------------------------------------------------------------------- /deepview_profile/profiler/autograd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from deepview_profile.profiler.backward import get_grad_fn, flatten_operation_output 4 | 5 | 6 | class AutogradEngine: 7 | """ 8 | Emulates the backward pass for a given model output, for timing purposes. 9 | """ 10 | def __init__(self, grad_fn_ordering, input_map, initial_inputs): 11 | self._grad_fn_ordering = grad_fn_ordering 12 | self._input_holder = { 13 | fn: [None] * size for fn, size in input_map.items() 14 | } 15 | self._input_holder[self._grad_fn_ordering[0]] = initial_inputs 16 | 17 | @classmethod 18 | def new_from(cls, operation_output, exclude_accumulate_grad=True): 19 | # Traverse the autograd graph, build input map for each grad_fn and 20 | # create a topological ordering 21 | _, initial_grad_fn = get_grad_fn(operation_output) 22 | if initial_grad_fn is None: 23 | raise ValueError('No grad_fn available on the operation output.') 24 | 25 | ordering = [] 26 | input_map = {} 27 | initial_inputs = [ 28 | tensor.detach() 29 | for tensor in flatten_operation_output(operation_output) 30 | ] 31 | input_map[initial_grad_fn] = len(initial_inputs) 32 | 33 | stack = [(initial_grad_fn, 0)] 34 | visited = {initial_grad_fn} 35 | 36 | # Build a topological ordering 37 | while len(stack) > 0: 38 | grad_fn, visit_count = stack.pop() 39 | if visit_count != 0: 40 | ordering.append(grad_fn) 41 | continue 42 | 43 | stack.append((grad_fn, 1)) 44 | for next_fn, input_idx in grad_fn.next_functions: 45 | if next_fn is None: 46 | continue 47 | 48 | if (exclude_accumulate_grad and 49 | next_fn.name() == 'torch::autograd::AccumulateGrad'): 50 | continue 51 | 52 | # Keep track of the inputs to each grad_fn 53 | if next_fn not in input_map: 54 | input_map[next_fn] = 1 55 | input_map[next_fn] = max(input_map[next_fn], input_idx + 1) 56 | 57 | # Determine whether to visit this grad_fn 58 | if next_fn in visited: 59 | continue 60 | 61 | visited.add(next_fn) 62 | stack.append((next_fn, 0)) 63 | 64 | ordering.reverse() 65 | return cls(ordering, input_map, initial_inputs) 66 | 67 | def run_backward(self): 68 | for grad_fn in self._grad_fn_ordering: 69 | # 1. Run the backward function 70 | outputs = grad_fn(*(self._input_holder[grad_fn])) 71 | 72 | # 2. Store its outputs for the next backward function(s) 73 | if isinstance(outputs, torch.Tensor): 74 | outputs = [outputs] 75 | for (output, (next_fn, input_idx)) in zip( 76 | outputs, grad_fn.next_functions): 77 | if next_fn is None or next_fn not in self._input_holder: 78 | continue 79 | # NOTE: If implementing to actually calculate the gradient, we 80 | # need to sum gradients that "flow" into the same grad function 81 | # input. 82 | self._input_holder[next_fn][input_idx] = output 83 | -------------------------------------------------------------------------------- /deepview_profile/profiler/backward.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BackwardHelper: 5 | def __init__(self, backward_runnable, ag_dict): 6 | self.run_backward = backward_runnable 7 | self._ag_dict = ag_dict 8 | 9 | @classmethod 10 | def new_from(cls, operation_outputs): 11 | retval, initial_grad_fn = get_grad_fn(operation_outputs) 12 | if initial_grad_fn is None: 13 | raise ValueError('No grad_fn available on the operation output.') 14 | 15 | grads = torch.ones_like(retval) 16 | def backward_runnable(): 17 | torch.autograd.backward(retval, grads, retain_graph=True) 18 | 19 | size_dict = get_accumulate_grad_inputs( 20 | initial_grad_fn, 21 | backward_runnable, 22 | ) 23 | 24 | ag_dict = { 25 | grad_fn: torch.randn(size, device=torch.device('cuda')) 26 | for grad_fn, size in size_dict.items() 27 | } 28 | 29 | return cls(backward_runnable, ag_dict) 30 | 31 | def run_accumulate_grad(self): 32 | for grad_fn, grad in self._ag_dict.items(): 33 | grad_fn(grad) 34 | 35 | 36 | def backward_available(operation_output): 37 | return get_grad_fn(operation_output)[1] is not None 38 | 39 | 40 | def flatten_operation_output(operation_output): 41 | if isinstance(operation_output, torch.Tensor): 42 | return [operation_output] 43 | elif (not isinstance(operation_output, tuple) and 44 | not isinstance(operation_output, list)): 45 | return [] 46 | 47 | flattened = [] 48 | for value in operation_output: 49 | flattened.extend(flatten_operation_output(value)) 50 | return flattened 51 | 52 | 53 | def get_grad_fn(retval): 54 | if isinstance(retval, torch.Tensor) and retval.grad_fn is not None: 55 | return retval, retval.grad_fn 56 | elif isinstance(retval, tuple) or isinstance(retval, list): 57 | for inner_value in retval: 58 | inner_retval, grad_fn = get_grad_fn(inner_value) 59 | if grad_fn is not None: 60 | return inner_retval, grad_fn 61 | 62 | return None, None 63 | 64 | 65 | def get_accumulate_grad_inputs(initial_grad_fn, backward_runnable): 66 | input_dict = {} 67 | hook_handles = [] 68 | def get_hook(grad_fn): 69 | def hook(arg1, arg2): 70 | if not isinstance(arg2[0], torch.Tensor): 71 | return 72 | input_dict[grad_fn] = arg2[0].size() 73 | return hook 74 | 75 | # Traverse the graph to identify all AccumulateGrad functions 76 | stack = [initial_grad_fn] 77 | visited = {initial_grad_fn} 78 | 79 | while len(stack) > 0: 80 | grad_fn = stack.pop() 81 | 82 | if grad_fn.name() == 'torch::autograd::AccumulateGrad': 83 | hook_handles.append(grad_fn.register_hook(get_hook(grad_fn))) 84 | 85 | for next_grad_fn, _ in grad_fn.next_functions: 86 | if next_grad_fn is None or next_grad_fn in visited: 87 | continue 88 | stack.append(next_grad_fn) 89 | visited.add(next_grad_fn) 90 | 91 | # Run a backward pass to get accumulate grad sizes 92 | backward_runnable() 93 | torch.cuda.synchronize() 94 | 95 | # Clear hooks 96 | for handle in hook_handles: 97 | handle.remove() 98 | 99 | return input_dict 100 | -------------------------------------------------------------------------------- /deepview_profile/profiler/ddp.py: -------------------------------------------------------------------------------- 1 | from scipy.stats import gaussian_kde 2 | import numpy as np 3 | import os 4 | import logging 5 | from deepview_profile.pytorch_profiler_log_reader import ( 6 | get_first_last_step, 7 | get_bucket_sizes, 8 | get_ddp_forward_backward_times, 9 | ) 10 | import time 11 | from torch.profiler import profile, schedule, ProfilerActivity 12 | import torch.distributed as dist 13 | from torch.nn.parallel import DistributedDataParallel as DDP 14 | import subprocess 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | FILENAME = "pytorch_profiler.json" 19 | RANK = 0 20 | WORLD_SIZE = 1 21 | DEFAULT_BUCKET_SIZE = 25 22 | 23 | 24 | def setup(rank, world_size): 25 | os.environ["MASTER_ADDR"] = "localhost" 26 | os.environ["MASTER_PORT"] = "12345" 27 | dist.init_process_group("nccl", rank=rank, world_size=world_size) 28 | 29 | 30 | def cleanup(): 31 | dist.destroy_process_group() 32 | 33 | 34 | def _bucket_estimate_max_expected(bucket_times, ngpu): 35 | m = 1000 36 | 37 | np_samples = np.array(bucket_times) 38 | 39 | kde_samples = gaussian_kde(np_samples) 40 | 41 | z_arr = [] 42 | for _ in range(m): 43 | num_resamples = kde_samples.resample(ngpu) 44 | 45 | z_arr.append(np.max(num_resamples)) 46 | 47 | expected_max = np.mean(z_arr) 48 | 49 | return expected_max 50 | 51 | 52 | def _bucket_comp_times(path_to_file): 53 | data_matrix = [] 54 | first_step, last_step = get_first_last_step(path_to_file) 55 | NUM_STEPS = 25 56 | forward_time_acc = 0 57 | for step in range(first_step + 1, first_step + NUM_STEPS + 1): 58 | fw_time, bucket_comp_times = get_ddp_forward_backward_times(path_to_file, step) 59 | forward_time_acc += fw_time 60 | """ 61 | storing as: 62 | [bucket_0 time1, bucket_1 time1, ... , bucket_n time1] 63 | [bucket_0 time2, bucket_1 time2, ... , bucket_n time2] 64 | ... 65 | """ 66 | data_matrix.append(bucket_comp_times) 67 | # convert to numpy and transpose 68 | data_numpy = np.array(data_matrix) 69 | """ 70 | store as : 71 | [bucket_0 time1, bucket_0 time2, ...., bucket_0 time n] 72 | [bucket_1 time1, bucket_1 time2, ...., bucket_1 time n] 73 | """ 74 | data_transpose = np.transpose(data_numpy) 75 | 76 | return forward_time_acc / NUM_STEPS, data_transpose 77 | 78 | 79 | def _bucket_expected_max(bucket_times, ngpus): 80 | expected_max_arr = [] 81 | for samples in bucket_times: 82 | expected_max = _bucket_estimate_max_expected(samples, ngpus) 83 | expected_max_arr.append(expected_max) 84 | 85 | return expected_max_arr 86 | 87 | 88 | def _trace_handler(p): 89 | p.export_chrome_trace(FILENAME) 90 | 91 | 92 | def run_profiler(model_provider, input_provider, iteration_provider): 93 | setup(RANK, WORLD_SIZE) 94 | 95 | model = model_provider() 96 | inputs = input_provider() 97 | ddp_model = DDP(model, device_ids=[RANK], bucket_cap_mb=DEFAULT_BUCKET_SIZE) 98 | iteration = iteration_provider(ddp_model) 99 | # warmup for 30 secs 100 | start = time.time() 101 | elapsed = 0 102 | 103 | while elapsed < 30: 104 | for _ in range(100): 105 | iteration(*inputs) 106 | elapsed = time.time() - start 107 | 108 | skip_first = 10 109 | wait = 5 110 | warmup = 10 111 | active = 30 112 | totalIterations = skip_first + wait + warmup + active 113 | deepviewSchedule = schedule( 114 | skip_first=skip_first, wait=wait, warmup=warmup, active=active, repeat=1 115 | ) 116 | 117 | with profile( 118 | activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], 119 | schedule=deepviewSchedule, 120 | on_trace_ready=_trace_handler, 121 | ) as p: 122 | for _ in range(totalIterations): 123 | iteration(*inputs) 124 | p.step() 125 | 126 | cleanup() 127 | 128 | 129 | def ddp_analysis(model_provider, input_provider, iteration_provider): 130 | run_profiler(model_provider, input_provider, iteration_provider) 131 | 132 | path_to_file = os.path.join(os.getcwd(), FILENAME) 133 | 134 | fw_avg_msec, bucket_comp_times = _bucket_comp_times(path_to_file) 135 | bucket_sizes_arr = get_bucket_sizes(model_provider(), DEFAULT_BUCKET_SIZE) 136 | 137 | expected_max_2gpus = _bucket_expected_max(bucket_comp_times, 2) 138 | expected_max_4gpus = _bucket_expected_max(bucket_comp_times, 4) 139 | 140 | jsonFormat = { 141 | "forward_time_ms": fw_avg_msec, 142 | "bucket_sizes": bucket_sizes_arr, 143 | "expected_computation_times": [ 144 | {"ngpus": 2, "expected_max_times": expected_max_2gpus}, 145 | {"ngpus": 4, "expected_max_times": expected_max_4gpus}, 146 | ], 147 | } 148 | 149 | subprocess.run(["rm", "-f", os.path.join(os.getcwd(), FILENAME)]) 150 | return jsonFormat 151 | -------------------------------------------------------------------------------- /deepview_profile/protocol/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/protocol/__init__.py -------------------------------------------------------------------------------- /deepview_profile/protocol/message_sender.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import pynvml 4 | import platform 5 | 6 | from deepview_profile.exceptions import NoConnectionError 7 | 8 | import deepview_profile.protocol_gen.innpv_pb2 as pm 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class MessageSender: 14 | def __init__(self, connection_manager): 15 | self._connection_manager = connection_manager 16 | 17 | def send_initialize_response(self, context): 18 | message = pm.InitializeResponse() 19 | connection = self._connection_manager.get_connection(context.address) 20 | message.server_project_root = connection.project_root 21 | message.entry_point.components.extend(connection.entry_point.split(os.sep)) 22 | 23 | # Populate hardware info 24 | message.hardware.hostname = platform.node() 25 | message.hardware.os = " ".join(list(platform.uname())) 26 | pynvml.nvmlInit() 27 | for i in range(pynvml.nvmlDeviceGetCount()): 28 | handle = pynvml.nvmlDeviceGetHandleByIndex(i) 29 | device_name = pynvml.nvmlDeviceGetName(handle).decode("utf-8") 30 | message.hardware.gpus.append(device_name) 31 | pynvml.nvmlShutdown() 32 | 33 | self._send_message(message, 'initialize', context) 34 | 35 | def send_protocol_error(self, error_code, context): 36 | message = pm.ProtocolError() 37 | message.error_code = error_code 38 | self._send_message(message, 'error', context) 39 | 40 | def send_breakdown_response(self, breakdown, context): 41 | # Ideally, MessageSender users should not need to know about the INNPV 42 | # protocol messages. However, to avoid extraneous copies, sometimes 43 | # callers will pass in constructed messages for sending. 44 | self._send_message(breakdown, 'breakdown', context) 45 | 46 | def send_analysis_error(self, analysis_error, context): 47 | message = pm.AnalysisError() 48 | message.error_message = str(analysis_error) 49 | if analysis_error.file_context is not None: 50 | message.file_context.file_path.components.extend( 51 | analysis_error.file_context.file_path.split(os.sep) 52 | ) 53 | message.file_context.line_number = ( 54 | analysis_error.file_context.line_number 55 | if analysis_error.file_context.line_number is not None 56 | else 0 57 | ) 58 | self._send_message(message, 'analysis_error', context) 59 | 60 | def send_throughput_response(self, throughput, context): 61 | self._send_message(throughput, 'throughput', context) 62 | 63 | def send_habitat_response(self, habitat_resp, context): 64 | self._send_message(habitat_resp, 'habitat', context) 65 | 66 | def send_energy_response(self, energy_resp, context): 67 | self._send_message(energy_resp, 'energy', context) 68 | 69 | def send_utilization_response(self, utilization_resp, context): 70 | self._send_message(utilization_resp, 'utilization', context) 71 | 72 | def send_ddp_response(self, ddp_resp, context): 73 | self._send_message(ddp_resp, 'ddp', context) 74 | 75 | def _send_message(self, message, payload_name, context): 76 | try: 77 | connection = self._connection_manager.get_connection( 78 | context.address) 79 | enclosing_message = pm.FromServer() 80 | getattr(enclosing_message, payload_name).CopyFrom(message) 81 | enclosing_message.sequence_number = context.sequence_number 82 | connection.send_bytes(enclosing_message.SerializeToString()) 83 | except NoConnectionError: 84 | logger.debug( 85 | 'Not sending message to (%s:%d) because it is no longer ' 86 | 'connected.', 87 | *context.address, 88 | ) 89 | -------------------------------------------------------------------------------- /deepview_profile/protocol_gen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/protocol_gen/__init__.py -------------------------------------------------------------------------------- /deepview_profile/server.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from concurrent.futures import ThreadPoolExecutor 3 | 4 | from deepview_profile.analysis.request_manager import AnalysisRequestManager 5 | from deepview_profile.io.connection_acceptor import ConnectionAcceptor 6 | from deepview_profile.io.connection_manager import ConnectionManager 7 | from deepview_profile.protocol.message_handler import MessageHandler 8 | from deepview_profile.protocol.message_sender import MessageSender 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class SkylineServer: 14 | def __init__(self, host, port): 15 | self._requested_host = host 16 | # This is the port the user specified on the command line (it can be 0) 17 | self._requested_port = port 18 | self._connection_acceptor = ConnectionAcceptor( 19 | self._requested_host, 20 | self._requested_port, 21 | self._on_new_connection, 22 | ) 23 | self._connection_manager = ConnectionManager( 24 | self._on_message, 25 | self._on_connection_closed, 26 | ) 27 | self._message_sender = MessageSender(self._connection_manager) 28 | self._analysis_request_manager = AnalysisRequestManager( 29 | self._submit_work, 30 | self._message_sender, 31 | self._connection_manager, 32 | ) 33 | self._message_handler = MessageHandler( 34 | self._connection_manager, 35 | self._message_sender, 36 | self._analysis_request_manager, 37 | ) 38 | self._main_executor = ThreadPoolExecutor(max_workers=1) 39 | 40 | def __enter__(self): 41 | self.start() 42 | return self 43 | 44 | def __exit__(self, exc_type, exc_value, traceback): 45 | self.stop() 46 | 47 | def start(self): 48 | self._analysis_request_manager.start() 49 | self._connection_acceptor.start() 50 | logger.debug("DeepView server has started.") 51 | 52 | def stop(self): 53 | def shutdown(): 54 | self._connection_acceptor.stop() 55 | self._connection_manager.stop() 56 | 57 | self._analysis_request_manager.stop() 58 | self._main_executor.submit(shutdown).result() 59 | self._main_executor.shutdown() 60 | logger.debug("DeepView server has shut down.") 61 | 62 | @property 63 | def listening_on(self): 64 | return (self._connection_acceptor.host, self._connection_acceptor.port) 65 | 66 | def _on_message(self, data, address): 67 | print("on_message:", data, address) 68 | # Do not call directly - called by a connection 69 | self._main_executor.submit( 70 | self._message_handler.handle_message, 71 | data, 72 | address, 73 | ) 74 | 75 | def _on_new_connection(self, socket, address): 76 | print("on_new_connection", socket, address) 77 | # Do not call directly - called by _connection_acceptor 78 | self._main_executor.submit( 79 | self._connection_manager.register_connection, 80 | socket, 81 | address, 82 | ) 83 | 84 | def _on_connection_closed(self, address): 85 | # Do not call directly - called by a connection when it is closed 86 | self._main_executor.submit( 87 | self._connection_manager.remove_connection, 88 | address, 89 | ) 90 | 91 | def _submit_work(self, func, *args, **kwargs): 92 | print("submit_work", func) 93 | # print("submit_work args:", args) 94 | logger.debug("submit_work args:", args) 95 | print("submit_work kwargs:", kwargs) 96 | # Do not call directly - called by another thread to submit work 97 | # onto the main executor 98 | self._main_executor.submit(func, *args, **kwargs) 99 | -------------------------------------------------------------------------------- /deepview_profile/skyline.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | 5 | import deepview_profile 6 | import deepview_profile.commands.interactive 7 | import deepview_profile.commands.memory 8 | import deepview_profile.commands.time 9 | 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser( 13 | prog = deepview_profile.__name__, 14 | description = deepview_profile.__description__ 15 | ) 16 | parser.add_argument( 17 | "-v", "--version", 18 | action="store_true", 19 | help="Print the version and exit.", 20 | ) 21 | subparsers = parser.add_subparsers(title="Commands") 22 | deepview_profile.commands.interactive.register_command(subparsers) 23 | deepview_profile.commands.memory.register_command(subparsers) 24 | deepview_profile.commands.time.register_command(subparsers) 25 | args = parser.parse_args() 26 | 27 | if args.version: 28 | print('DeepView Command Line Interface', 'v' + deepview_profile.__version__,) 29 | return 30 | 31 | if 'func' not in args: 32 | parser.print_help() 33 | sys.exit(1) 34 | 35 | # Run the specified command 36 | args.func(args) 37 | 38 | 39 | if __name__ == '__main__': 40 | main() 41 | -------------------------------------------------------------------------------- /deepview_profile/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/tests/__init__.py -------------------------------------------------------------------------------- /deepview_profile/tracking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/tracking/__init__.py -------------------------------------------------------------------------------- /deepview_profile/tracking/backward_interceptor.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import torch 3 | 4 | from deepview_profile.exceptions import _SuspendExecution 5 | from deepview_profile.tracking.hook_manager import HookManager 6 | 7 | 8 | class BackwardInterceptor: 9 | def __init__(self): 10 | self._backward_hooks = HookManager() 11 | self.backward_root = None 12 | 13 | @contextlib.contextmanager 14 | def intercept(self): 15 | self._backward_hooks.attach_hook( 16 | torch.Tensor, 17 | 'backward', 18 | self._hook_creator, 19 | ) 20 | try: 21 | yield 22 | except _SuspendExecution: 23 | pass 24 | finally: 25 | self._backward_hooks.remove_hooks() 26 | 27 | def _hook_creator(self, fn): 28 | def hook(*args): 29 | self.backward_root = args[0] 30 | raise _SuspendExecution 31 | return hook 32 | -------------------------------------------------------------------------------- /deepview_profile/tracking/base.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import sqlite3 3 | 4 | 5 | class TrackerBase: 6 | def __init__(self): 7 | self._is_tracking = False 8 | 9 | @contextlib.contextmanager 10 | def track(self): 11 | self.start_tracking() 12 | try: 13 | yield self 14 | finally: 15 | self.stop_tracking() 16 | 17 | def start_tracking(self): 18 | self._is_tracking = True 19 | 20 | def stop_tracking(self): 21 | self._is_tracking = False 22 | 23 | def populate_report(self, builder): 24 | raise NotImplementedError 25 | 26 | 27 | class ReportBase: 28 | def __init__(self, connection): 29 | self._connection = connection 30 | 31 | def __del__(self): 32 | self._connection.close() 33 | 34 | 35 | class ReportBuilderBase: 36 | def __init__(self, file=None): 37 | database_file = file if file is not None else ':memory:' 38 | self._connection = sqlite3.connect(database_file, check_same_thread=False) 39 | self._create_report_tables() 40 | 41 | def process_tracker(self, tracker): 42 | tracker.populate_report(self) 43 | return self 44 | 45 | def build(self): 46 | raise NotImplementedError 47 | 48 | def _create_report_tables(self): 49 | raise NotImplementedError 50 | -------------------------------------------------------------------------------- /deepview_profile/tracking/call_stack.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import inspect 3 | import os 4 | import re 5 | import torch 6 | from deepview_profile.utils import model_location_patterns 7 | 8 | SourceLocation = collections.namedtuple( 9 | "SourceLocation", ["file_path", "line_number", "module_id"] 10 | ) 11 | 12 | def find_pattern_match(filename): 13 | pattern_list = model_location_patterns() 14 | return any(re.search(pattern, filename) for pattern in pattern_list) 15 | 16 | class CallStack: 17 | def __init__(self, frames): 18 | self.frames = frames 19 | 20 | @staticmethod 21 | def from_here(project_root, start_from=1): 22 | """ 23 | Returns the current call stack when invoked. 24 | """ 25 | stack = inspect.stack() 26 | context = [] 27 | try: 28 | for frame_info in stack[start_from:]: 29 | # Only track source locations that are within the project model (or transformers, diffusers, etc) 30 | # that are within a torch.nn.Module. Note that we assume the 31 | # user uses "self" to refer to the current class instance. 32 | 33 | if not ( 34 | frame_info.filename.startswith(project_root) 35 | or find_pattern_match(frame_info.filename) 36 | ): 37 | continue 38 | if "self" not in frame_info.frame.f_locals: 39 | continue 40 | if not isinstance(frame_info.frame.f_locals["self"], torch.nn.Module): 41 | continue 42 | context.append( 43 | SourceLocation( 44 | file_path=os.path.relpath( 45 | frame_info.filename, start=project_root 46 | ), 47 | line_number=frame_info.lineno, 48 | module_id=id(frame_info.frame.f_locals["self"]), 49 | ) 50 | ) 51 | return CallStack(context) 52 | finally: 53 | del stack 54 | -------------------------------------------------------------------------------- /deepview_profile/tracking/callable_tracker.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import torch 4 | 5 | from deepview_profile.tracking.base import TrackerBase 6 | from deepview_profile.tracking.hook_manager import HookManager 7 | from deepview_profile.version_utils import Version 8 | 9 | OLD_VF_PATH_VERSION = Version.parse_semantic_version('1.4.0') 10 | 11 | 12 | class CallableTracker(TrackerBase): 13 | def __init__(self, hook_creator): 14 | super().__init__() 15 | self._hook_manager = HookManager() 16 | self._hook_creator = hook_creator 17 | self._torch_version = Version.parse_semantic_version(torch.__version__) 18 | 19 | def start_tracking(self): 20 | super().start_tracking() 21 | self._hook_manager.attach_hooks_on_module( 22 | torch, 23 | lambda fn: _is_callable_and_public(fn) and \ 24 | fn.__name__ not in BLACKLISTED_TORCH_METHODS, 25 | self._hook_creator, 26 | ) 27 | self._hook_manager.attach_hooks_on_module( 28 | torch.Tensor, 29 | lambda fn: _is_callable_and_public(fn) and \ 30 | fn.__name__ != 'backward' and \ 31 | fn.__name__ not in BLACKLISTED_TENSOR_METHODS, 32 | self._hook_creator, 33 | ) 34 | self._hook_manager.attach_hooks_on_module( 35 | torch.Tensor, 36 | _is_callable_dunder, 37 | self._hook_creator, 38 | ) 39 | self._hook_manager.attach_hooks_on_module( 40 | torch.nn.functional, 41 | _is_callable_and_public, 42 | self._hook_creator, 43 | ) 44 | 45 | # The _VF module was moved to the torch module after version 1.4.0. 46 | # This is an unfortunate hack because we need to monkey patch certain 47 | # internal PyTorch functions to be able to identify all the operations 48 | # properly. The _VF module contains recurrent operations (e.g., lstm). 49 | vf_module = ( 50 | torch._VF if self._torch_version is None or 51 | self._torch_version > OLD_VF_PATH_VERSION 52 | else torch.nn._VF 53 | ) 54 | self._hook_manager.attach_hooks_on_module_using( 55 | vf_module, 56 | torch._C._VariableFunctions, 57 | _is_callable_and_public, 58 | self._hook_creator, 59 | ) 60 | 61 | def stop_tracking(self): 62 | super().stop_tracking() 63 | self._hook_manager.remove_hooks() 64 | 65 | 66 | def _is_callable_and_public(maybe_fn): 67 | # By convention, _ prefixed functions in Python should not be 68 | # called by users (i.e. they are "private" functions) 69 | return _is_callable(maybe_fn) and maybe_fn.__name__[0] != '_' 70 | 71 | # Original source of these blacklists: 72 | # https://github.com/NVIDIA/apex/blob/master/apex/pyprof/nvtx/nvmarker.py 73 | BLACKLISTED_DUNDERS = { 74 | '__all__', 75 | '__array__', 76 | '__array_priority__', 77 | '__array_wrap__', 78 | '__bool__', 79 | '__builtins__', 80 | '__cached__', 81 | '__class__', 82 | '__deepcopy__', 83 | '__delattr__', 84 | '__delitem__', 85 | '__dict__', 86 | '__dir__', 87 | '__doc__', 88 | '__file__', 89 | '__format__', 90 | '__getattribute__', 91 | '__getitem__', 92 | '__hash__', 93 | '__index__', 94 | '__init__', 95 | '__init_subclass__', 96 | '__iter__', 97 | '__len__', 98 | '__loader__', 99 | '__module__', 100 | '__name__', 101 | '__new__', 102 | '__nonzero__', 103 | '__package__', 104 | '__path__', 105 | '__reduce__', 106 | '__reduce_ex__', 107 | '__repr__', 108 | '__reversed__', 109 | '__setattr__', 110 | '__setitem__', 111 | '__setstate__', 112 | '__sizeof__', 113 | '__spec__', 114 | '__str__', 115 | '__subclasshook__', 116 | '__version__', 117 | '__weakref__', 118 | } 119 | 120 | BLACKLISTED_TENSOR_METHODS = { 121 | 'size', 'dim', 'item', 'tolist', 122 | } 123 | 124 | BLACKLISTED_TORCH_METHODS = { 125 | 'is_storage', 126 | } 127 | 128 | 129 | def _is_callable_dunder(maybe_fn): 130 | """ 131 | Returns True if maybe_fn is a callable dunder (callable named with double 132 | underscores) (e.g., __add__) 133 | """ 134 | return ( 135 | _is_callable(maybe_fn) and 136 | len(maybe_fn.__name__) > 4 and 137 | maybe_fn.__name__[:2] == '__' and 138 | maybe_fn.__name__[-2:] == '__' and 139 | maybe_fn.__name__ not in BLACKLISTED_DUNDERS 140 | ) 141 | 142 | 143 | def _is_callable(maybe_fn): 144 | return ( 145 | inspect.isfunction(maybe_fn) or 146 | inspect.ismethod(maybe_fn) or 147 | inspect.isbuiltin(maybe_fn) or 148 | inspect.isroutine(maybe_fn) 149 | ) 150 | -------------------------------------------------------------------------------- /deepview_profile/tracking/hook_manager.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class HookManager: 4 | def __init__(self): 5 | self._original_callables = {} 6 | 7 | def attach_hooks_on_module(self, module, predicate, hook_creator): 8 | self.attach_hooks_on_module_using( 9 | module, module, predicate, hook_creator) 10 | 11 | def attach_hooks_on_module_using( 12 | self, module, using_module, predicate, hook_creator): 13 | """ 14 | Attach hooks onto functions in the provided module. Use the 15 | `using_module` to discover the existing functions. 16 | """ 17 | for prop in dir(using_module): 18 | if not predicate(getattr(module, prop)): 19 | continue 20 | self.attach_hook(module, prop, hook_creator) 21 | 22 | def attach_hook(self, module, prop, hook_creator): 23 | target = getattr(module, prop) 24 | self._maybe_store_callable(module, prop, target) 25 | setattr(module, prop, hook_creator(target)) 26 | 27 | def remove_hooks(self): 28 | for module, callable_pairs in self._original_callables.items(): 29 | for prop, original_callable in callable_pairs.items(): 30 | setattr(module, prop, original_callable) 31 | self._original_callables.clear() 32 | 33 | def _maybe_store_callable(self, module, prop, original_callable): 34 | """ 35 | Store the original callable (to be able to restore it) only when it is 36 | the first time we are encountering the given callable. 37 | """ 38 | if module not in self._original_callables: 39 | self._original_callables[module] = {} 40 | 41 | if prop in self._original_callables[module]: 42 | return 43 | 44 | self._original_callables[module][prop] = original_callable 45 | -------------------------------------------------------------------------------- /deepview_profile/tracking/memory/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/tracking/memory/__init__.py -------------------------------------------------------------------------------- /deepview_profile/tracking/memory/report.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import enum 3 | 4 | from deepview_profile.tracking.base import ReportBase, ReportBuilderBase 5 | import deepview_profile.tracking.memory.report_queries as queries 6 | 7 | 8 | WeightEntry = collections.namedtuple( 9 | 'WeightEntry', 10 | ['weight_name', 11 | 'size_bytes', 12 | 'grad_size_bytes', 13 | 'file_path', 14 | 'line_number'], 15 | ) 16 | 17 | 18 | ActivationEntry = collections.namedtuple( 19 | 'ActivationEntry', 20 | ['operation_name', 'size_bytes', 'file_path', 'line_number'], 21 | ) 22 | 23 | 24 | class MiscSizeType(enum.Enum): 25 | PeakUsageBytes = 'peak_usage_bytes' 26 | 27 | 28 | class MemoryReport(ReportBase): 29 | def __init__(self, connection): 30 | super().__init__(connection) 31 | 32 | def get_weight_entries(self, path_prefix=None): 33 | cursor = self._connection.cursor() 34 | return map( 35 | lambda row: WeightEntry(*row), 36 | cursor.execute(queries.get_weight_entries_with_context), 37 | ) 38 | 39 | def get_activation_entries(self, path_prefix=None): 40 | cursor = self._connection.cursor() 41 | return map( 42 | lambda row: ActivationEntry(*row), 43 | cursor.execute(queries.get_activation_entries_with_context), 44 | ) 45 | 46 | def get_misc_entry(self, misc_size_type: MiscSizeType): 47 | cursor = self._connection.cursor() 48 | cursor.execute(queries.get_misc_entry, (misc_size_type.value,)) 49 | return cursor.fetchone()[0] 50 | 51 | 52 | class MemoryReportBuilder(ReportBuilderBase): 53 | # This is the memory tracking report file format version that will be 54 | # created by this builder. When changes are made to the file format, this 55 | # integer should be increased monotonically. 56 | # 57 | # We need to version these tracking reports to protect us from future 58 | # changes to the file format. 59 | Version = 1 60 | 61 | def __init__(self, file=None): 62 | super().__init__(file) 63 | 64 | def add_weight_entry( 65 | self, weight_name, size_bytes, grad_size_bytes, stack_context): 66 | cursor = self._connection.cursor() 67 | cursor.execute( 68 | queries.add_weight_entry, 69 | (weight_name, size_bytes, grad_size_bytes), 70 | ) 71 | self._add_stack_frames( 72 | cursor=cursor, 73 | entry_id=cursor.lastrowid, 74 | entry_type=queries.EntryType.Weight, 75 | stack_context=stack_context, 76 | ) 77 | return self 78 | 79 | def add_activation_entry(self, operation_name, size_bytes, stack_context): 80 | cursor = self._connection.cursor() 81 | cursor.execute( 82 | queries.add_activation_entry, (operation_name, size_bytes)) 83 | self._add_stack_frames( 84 | cursor=cursor, 85 | entry_id=cursor.lastrowid, 86 | entry_type=queries.EntryType.Activation, 87 | stack_context=stack_context, 88 | ) 89 | return self 90 | 91 | def add_misc_entry(self, size_type: MiscSizeType, size_bytes): 92 | cursor = self._connection.cursor() 93 | cursor.execute(queries.add_misc_entry, (size_type.value, size_bytes)) 94 | return self 95 | 96 | def build(self): 97 | self._connection.commit() 98 | return MemoryReport(self._connection) 99 | 100 | def _create_report_tables(self): 101 | cursor = self._connection.cursor() 102 | cursor.execute(queries.set_report_format_version.format( 103 | version=MemoryReportBuilder.Version)) 104 | for creation_query in queries.create_report_tables.values(): 105 | cursor.execute(creation_query) 106 | cursor.executemany( 107 | queries.add_entry_type, 108 | map(lambda entry: (entry.value, entry.name), queries.EntryType), 109 | ) 110 | self._connection.commit() 111 | 112 | def _add_stack_frames( 113 | self, 114 | cursor, 115 | entry_id, 116 | entry_type: queries.EntryType, 117 | stack_context, 118 | ): 119 | cursor.execute( 120 | queries.add_correlation_entry, (entry_id, entry_type.value)) 121 | correlation_id = cursor.lastrowid 122 | 123 | def stack_frame_generator(): 124 | for idx, frame in enumerate(stack_context.frames): 125 | yield (correlation_id, idx, frame.file_path, frame.line_number) 126 | 127 | cursor.executemany(queries.add_stack_frame, stack_frame_generator()) 128 | -------------------------------------------------------------------------------- /deepview_profile/tracking/memory/report_queries.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | 4 | class EntryType(enum.Enum): 5 | Weight = 1 6 | Activation = 2 7 | 8 | 9 | create_report_tables = { 10 | 'weight_entries': """ 11 | CREATE TABLE IF NOT EXISTS weight_entries ( 12 | id INTEGER PRIMARY KEY, 13 | name TEXT NOT NULL, 14 | size_bytes INTEGER NOT NULL, 15 | grad_size_bytes INTEGER NOT NULL 16 | ) 17 | """, 18 | 'activation_entries': """ 19 | CREATE TABLE IF NOT EXISTS activation_entries ( 20 | id INTEGER PRIMARY KEY, 21 | operation_name TEXT NOT NULL, 22 | size_bytes INTEGER NOT NULL 23 | ) 24 | """, 25 | 'correlation': """ 26 | CREATE TABLE IF NOT EXISTS stack_correlation ( 27 | correlation_id INTEGER PRIMARY KEY, 28 | entry_id INTEGER NOT NULL, 29 | entry_type INTEGER NOT NULL, 30 | UNIQUE (correlation_id, entry_id) 31 | ) 32 | """, 33 | 'correlation_index': """ 34 | CREATE UNIQUE INDEX IF NOT EXISTS entry_type_and_id 35 | ON stack_correlation(entry_type, entry_id) 36 | """, 37 | 'stack_frames': """ 38 | CREATE TABLE IF NOT EXISTS stack_frames ( 39 | correlation_id INTEGER NOT NULL, 40 | ordering INTEGER NOT NULL, 41 | file_path TEXT NOT NULL, 42 | line_number INTEGER NOT NULL, 43 | PRIMARY KEY (correlation_id, ordering) 44 | ) 45 | """, 46 | 'entry_types': """ 47 | CREATE TABLE IF NOT EXISTS entry_types ( 48 | entry_type INTEGER PRIMARY KEY, 49 | name TEXT NOT NULL 50 | ) 51 | """, 52 | 'misc_sizes': """ 53 | CREATE TABLE IF NOT EXISTS misc_sizes ( 54 | key TEXT PRIMARY KEY, 55 | size_bytes INT NOT NULL 56 | ) 57 | """, 58 | } 59 | 60 | set_report_format_version = 'PRAGMA user_version = {version:d}' 61 | 62 | add_entry_type = """ 63 | INSERT INTO entry_types (entry_type, name) VALUES (?, ?) 64 | """ 65 | 66 | add_weight_entry = """ 67 | INSERT INTO weight_entries (id, name, size_bytes, grad_size_bytes) 68 | VALUES (NULL, ?, ?, ?) 69 | """ 70 | 71 | add_activation_entry = """ 72 | INSERT INTO activation_entries (id, operation_name, size_bytes) 73 | VALUES (NULL, ?, ?) 74 | """ 75 | 76 | add_correlation_entry = """ 77 | INSERT INTO stack_correlation (correlation_id, entry_id, entry_type) 78 | VALUES (NULL, ?, ?) 79 | """ 80 | 81 | add_stack_frame = """ 82 | INSERT INTO stack_frames (correlation_id, ordering, file_path, line_number) 83 | VALUES (?, ?, ?, ?) 84 | """ 85 | 86 | add_misc_entry = "INSERT INTO misc_sizes (key, size_bytes) VALUES (?, ?)" 87 | 88 | get_misc_entry = "SELECT size_bytes FROM misc_sizes WHERE key = ?" 89 | 90 | get_code_context_subquery = """ 91 | WITH code_contexts AS ( 92 | SELECT c.entry_id, s.file_path, s.line_number 93 | FROM stack_frames AS s JOIN stack_correlation AS c 94 | ON s.correlation_id == c.correlation_id 95 | WHERE 96 | c.entry_type = {:d} 97 | GROUP BY s.correlation_id HAVING s.ordering == MIN(s.ordering) 98 | ) 99 | """ 100 | 101 | get_weight_entries_with_context = ( 102 | get_code_context_subquery.format(EntryType.Weight.value) + 103 | """ 104 | SELECT 105 | w.name, w.size_bytes, w.grad_size_bytes, c.file_path, c.line_number 106 | FROM weight_entries AS w 107 | LEFT JOIN code_contexts AS c 108 | ON w.id == c.entry_id 109 | WHERE w.size_bytes > 0 110 | ORDER BY c.file_path ASC, c.line_number ASC 111 | """ 112 | ) 113 | 114 | get_activation_entries_with_context = ( 115 | get_code_context_subquery.format(EntryType.Activation.value) + 116 | """ 117 | SELECT a.operation_name, a.size_bytes, c.file_path, c.line_number 118 | FROM activation_entries AS a 119 | LEFT JOIN code_contexts AS c 120 | ON a.id == c.entry_id 121 | WHERE a.size_bytes > 0 122 | ORDER BY c.file_path ASC, c.line_number ASC 123 | """ 124 | ) 125 | -------------------------------------------------------------------------------- /deepview_profile/tracking/memory/weights.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import inspect 3 | 4 | from deepview_profile.tracking.base import TrackerBase 5 | from deepview_profile.tracking.call_stack import CallStack 6 | from deepview_profile.tracking.hook_manager import HookManager 7 | from deepview_profile.tracking.utils import tensor_size_bytes 8 | from deepview_profile.util_weak import WeakTensorKeyDictionary 9 | 10 | class WeightsTracker(TrackerBase): 11 | def __init__(self, project_root): 12 | super().__init__() 13 | self._hook_manager = HookManager() 14 | self._module_parameters = WeakTensorKeyDictionary() 15 | self._project_root = project_root 16 | 17 | def start_tracking(self): 18 | super().start_tracking() 19 | self._hook_manager.attach_hook( 20 | torch.nn.Module, 21 | 'register_parameter', 22 | self._register_parameter_hook_creator, 23 | ) 24 | 25 | def stop_tracking(self): 26 | super().stop_tracking() 27 | self._hook_manager.remove_hooks() 28 | 29 | def populate_report(self, builder): 30 | for param, (name, stack) in self._module_parameters.items(): 31 | if not param.is_cuda: 32 | continue 33 | builder.add_weight_entry( 34 | weight_name=name, 35 | size_bytes=tensor_size_bytes(param), 36 | grad_size_bytes=tensor_size_bytes(param.grad), 37 | stack_context=stack, 38 | ) 39 | 40 | def populate_breakdown(self, builder): 41 | # The HierarchicalBreakdownBuilder uses the same API as the 42 | # MemoryReportBuilder. 43 | self.populate_report(builder) 44 | 45 | def _register_parameter_hook_creator(self, func): 46 | def hook(*args, **kwargs): 47 | name = args[1] 48 | parameter = args[2] 49 | retval = func(*args, **kwargs) 50 | if parameter is not None and parameter not in self._module_parameters: 51 | self._module_parameters[parameter] = ( 52 | name, 53 | CallStack.from_here(self._project_root, start_from=2), 54 | ) 55 | return retval 56 | return hook 57 | -------------------------------------------------------------------------------- /deepview_profile/tracking/time/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/deepview_profile/tracking/time/__init__.py -------------------------------------------------------------------------------- /deepview_profile/tracking/time/operation.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | 4 | from deepview_profile.tracking.call_stack import CallStack 5 | from deepview_profile.tracking.base import TrackerBase 6 | from deepview_profile.tracking.callable_tracker import CallableTracker 7 | from deepview_profile.tracking.utils import remove_dunder 8 | from deepview_profile.profiler.operation import OperationProfiler 9 | 10 | OperationInfo = collections.namedtuple( 11 | 'OperationInfo', ['operation_name', 'stack', 'forward_ms', 'backward_ms']) 12 | 13 | 14 | class OperationRunTimeTracker(TrackerBase): 15 | def __init__(self, project_root): 16 | super().__init__() 17 | self._callable_tracker = CallableTracker(self._hook_creator) 18 | self._profiler = OperationProfiler() 19 | self._project_root = project_root 20 | self._processing_hook = False 21 | 22 | self.operations = [] 23 | 24 | def start_tracking(self): 25 | super().start_tracking() 26 | self._callable_tracker.start_tracking() 27 | 28 | def stop_tracking(self): 29 | super().stop_tracking() 30 | self._callable_tracker.stop_tracking() 31 | 32 | def populate_report(self, builder): 33 | for op_info in self.operations: 34 | builder.add_run_time_entry( 35 | operation_name=remove_dunder(op_info.operation_name), 36 | forward_ms=op_info.forward_ms, 37 | backward_ms=op_info.backward_ms, 38 | stack_context=op_info.stack, 39 | ) 40 | 41 | def populate_breakdown(self, builder): 42 | # The HierarchicalBreakdownBuilder uses the same run time entry API as 43 | # the OperationRunTimeReportBuilder. 44 | self.populate_report(builder) 45 | 46 | def _hook_creator(self, func): 47 | def hook(*args, **kwargs): 48 | # NOTE: We use self._processing_hook to handle cases where we have 49 | # hooks on nested function calls. 50 | if self._processing_hook: 51 | return func(*args, **kwargs) 52 | 53 | self._processing_hook = True 54 | try: 55 | stack = CallStack.from_here(self._project_root, start_from=2) 56 | if len(stack.frames) == 0: 57 | return func(*args, **kwargs) 58 | 59 | forward_ms, backward_ms = self._profiler.measure_operation_ms( 60 | func, args, kwargs) 61 | self.operations.append(OperationInfo( 62 | operation_name=func.__name__, 63 | stack=stack, 64 | forward_ms=forward_ms, 65 | backward_ms=backward_ms, 66 | )) 67 | 68 | # Actually run the hooked function 69 | return func(*args, **kwargs) 70 | finally: 71 | self._processing_hook = False 72 | 73 | return hook 74 | -------------------------------------------------------------------------------- /deepview_profile/tracking/time/report.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from deepview_profile.tracking.base import ReportBase, ReportBuilderBase 4 | import deepview_profile.tracking.time.report_queries as queries 5 | 6 | RunTimeEntry = collections.namedtuple( 7 | 'RunTimeEntry', 8 | ['operation_name', 9 | 'forward_ms', 10 | 'backward_ms', 11 | 'file_path', 12 | 'line_number'], 13 | ) 14 | 15 | 16 | class OperationRunTimeReport(ReportBase): 17 | def __init__(self, connection): 18 | super().__init__(connection) 19 | 20 | def get_run_time_entries(self, path_prefix=None): 21 | cursor = self._connection.cursor() 22 | return map( 23 | lambda row: RunTimeEntry(*row), 24 | cursor.execute(queries.get_run_time_entries_with_context), 25 | ) 26 | 27 | 28 | class OperationRunTimeReportBuilder(ReportBuilderBase): 29 | # This is the operation run time tracking report file format version that 30 | # will be created by this builder. When changes are made to the file 31 | # format, this integer should be increased monotonically. 32 | # 33 | # We need to version these tracking reports to protect us from future 34 | # changes to the file format. 35 | Version = 1 36 | 37 | def __init__(self, file=None): 38 | super().__init__(file) 39 | 40 | def add_run_time_entry( 41 | self, operation_name, forward_ms, backward_ms, stack_context): 42 | cursor = self._connection.cursor() 43 | cursor.execute(queries.add_run_time_entry, ( 44 | operation_name, 45 | forward_ms, 46 | backward_ms, 47 | )) 48 | entry_id = cursor.lastrowid 49 | 50 | def stack_frame_generator(): 51 | for idx, frame in enumerate(stack_context.frames): 52 | yield (idx, frame.file_path, frame.line_number, entry_id) 53 | 54 | cursor.executemany(queries.add_stack_frame, stack_frame_generator()) 55 | 56 | def build(self): 57 | self._connection.commit() 58 | return OperationRunTimeReport(self._connection) 59 | 60 | def _create_report_tables(self): 61 | cursor = self._connection.cursor() 62 | cursor.execute(queries.set_report_format_version.format( 63 | version=OperationRunTimeReportBuilder.Version)) 64 | for creation_query in queries.create_report_tables.values(): 65 | cursor.execute(creation_query) 66 | self._connection.commit() 67 | -------------------------------------------------------------------------------- /deepview_profile/tracking/time/report_queries.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | create_report_tables = { 4 | 'run_time_entries': """ 5 | CREATE TABLE IF NOT EXISTS run_time_entries ( 6 | id INTEGER PRIMARY KEY, 7 | operation_name TEXT NOT NULL, 8 | forward_ms REAL NOT NULL, 9 | backward_ms REAL 10 | ) 11 | """, 12 | 'stack_frames': """ 13 | CREATE TABLE IF NOT EXISTS stack_frames ( 14 | ordering INTEGER NOT NULL, 15 | file_path TEXT NOT NULL, 16 | line_number INTEGER NOT NULL, 17 | entry_id INTEGER NOT NULL, 18 | PRIMARY KEY (entry_id, ordering) 19 | ) 20 | """, 21 | } 22 | 23 | set_report_format_version = 'PRAGMA user_version = {version:d}' 24 | 25 | add_stack_frame = """ 26 | INSERT INTO stack_frames (ordering, file_path, line_number, entry_id) 27 | VALUES (?, ?, ?, ?) 28 | """ 29 | 30 | add_run_time_entry = """ 31 | INSERT INTO run_time_entries (operation_name, forward_ms, backward_ms) 32 | VALUES (?, ?, ?) 33 | """ 34 | 35 | get_run_time_entries_with_context = """ 36 | WITH code_contexts AS ( 37 | SELECT entry_id, file_path, line_number FROM stack_frames 38 | GROUP BY entry_id HAVING ordering == MIN(ordering) 39 | ) 40 | SELECT 41 | e.operation_name, 42 | e.forward_ms, 43 | e.backward_ms, 44 | c.file_path, 45 | c.line_number 46 | FROM 47 | run_time_entries AS e LEFT JOIN code_contexts AS c 48 | ON e.id == c.entry_id 49 | ORDER BY c.file_path ASC, c.line_number ASC 50 | """ 51 | -------------------------------------------------------------------------------- /deepview_profile/tracking/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | DUNDER_REGEX = re.compile('__(?P.+)__') 5 | 6 | 7 | def tensor_size_bytes(tensor): 8 | if tensor is None or not tensor.is_cuda: 9 | return 0 10 | return tensor.numel() * tensor.element_size() 11 | 12 | 13 | def remove_dunder(fn_name): 14 | match = DUNDER_REGEX.match(fn_name) 15 | if match is None: 16 | return fn_name 17 | return match.group('name') 18 | -------------------------------------------------------------------------------- /deepview_profile/user_code_utils.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import sys 3 | 4 | from deepview_profile.exceptions import exceptions_as_analysis_errors 5 | 6 | 7 | @contextlib.contextmanager 8 | def user_code_environment(script_root_path, project_root): 9 | """ 10 | A combined context manager that activates all relevant context managers 11 | used when running user code. 12 | """ 13 | with sys_path_root(script_root_path): 14 | # with prevent_module_caching(): 15 | with exceptions_as_analysis_errors(project_root): 16 | yield 17 | 18 | 19 | @contextlib.contextmanager 20 | def sys_path_root(script_root_path): 21 | """ 22 | A context manager that sets sys.path[0] to the specified path on entry and 23 | then restores it after exiting the context manager. 24 | """ 25 | # As per the Python documentation, sys.path[0] always stores the path to 26 | # the directory containing the Python script that was used to start the 27 | # Python interpreter. The contents of sys.path are used to resolve module 28 | # imports. 29 | # 30 | # When we run user code (e.g., the user's entry point file), we want to run 31 | # it as if it was being directly executed by the user from the shell. For 32 | # example: 33 | # 34 | # $ python3 entry_point.py 35 | # 36 | # For this to work, we need to ensure that sys.path[0] is the path to the 37 | # directory containing the entry_point.py file. However if we use exec(), 38 | # sys.path[0] is set to the path of DeepView's command line executable. 39 | # 40 | # To fix this problem, we set sys.path[0] to the correct root path before 41 | # running the user's code and restore it to DeepView's script path after the 42 | # execution completes. Doing this is **very important** as it ensures that 43 | # imports work as expected inside the user's code. This context manager 44 | # should be used each time we execute user code because imports can exist 45 | # inside user-defined functions. 46 | # 47 | # Setting and then restoring sys.path[0] is better than just appending the 48 | # user's path to sys.path because we want to avoid accidentally importing 49 | # anything from the user's codebase. 50 | skyline_script_root = sys.path[0] 51 | try: 52 | sys.path[0] = script_root_path 53 | yield 54 | finally: 55 | sys.path[0] = skyline_script_root 56 | 57 | 58 | @contextlib.contextmanager 59 | def prevent_module_caching(): 60 | """ 61 | A context manager that prevents any imported modules from being cached 62 | after exiting. 63 | """ 64 | try: 65 | original_modules = sys.modules.copy() 66 | yield 67 | finally: 68 | newly_added = { 69 | module_name for module_name in sys.modules.keys() 70 | if module_name not in original_modules 71 | } 72 | for module_name in newly_added: 73 | del sys.modules[module_name] 74 | -------------------------------------------------------------------------------- /deepview_profile/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import logging 3 | import gc 4 | import os 5 | import base64 6 | from google.protobuf.json_format import MessageToDict 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | def release_memory(): 11 | logger.debug("Emptying cache") 12 | gc.collect() 13 | torch.cuda.empty_cache() 14 | 15 | def next_message_to_dict(object): 16 | message = next(object) 17 | return MessageToDict(message) 18 | 19 | def files_encoded_unique(operation_tree): 20 | encoded_files = [] 21 | 22 | for analysis in operation_tree: 23 | context_info_map = analysis["operation"].get("contextInfoMap", None) 24 | if context_info_map is not None and len(context_info_map) > 0: 25 | filename = list( 26 | context_info_map[0]["context"]["filePath"]["components"] 27 | ).pop() 28 | 29 | already_in_list = next( 30 | (item for item in encoded_files if item["name"] == filename), None 31 | ) 32 | if not already_in_list: 33 | file_path = os.path.join( 34 | "", *list(context_info_map[0]["context"]["filePath"]["components"]) 35 | ) 36 | 37 | encoded_file = encode_file("", file_path) 38 | encoded_files.append(encoded_file) 39 | 40 | return encoded_files 41 | 42 | def encode_file(root, file): 43 | file_dict = None 44 | if os.path.splitext(file)[1] == ".py" and file != "entry_point.py": 45 | file_dict = {"name": file, "content": ""} 46 | 47 | filename = os.path.join(root, file) 48 | 49 | with open(filename, "r") as f: 50 | file_content = f.read() 51 | file_dict["content"] = base64.b64encode( 52 | file_content.encode("utf-8") 53 | ).decode("utf-8") 54 | 55 | return file_dict 56 | 57 | def model_location_patterns(): 58 | return [ 59 | "./transformers/models[/\w+/]+\w+.py", 60 | "./transformers/integrations[/\w+/]+\w+.py", 61 | "./diffusers/models[/\w+/]+\w+.py", 62 | ] -------------------------------------------------------------------------------- /deepview_profile/version_utils.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import re 3 | 4 | Version = collections.namedtuple('Version', ['major', 'minor', 'patch']) 5 | 6 | VERSION_REGEX = re.compile('^\d+\.\d+\.\d+$') 7 | 8 | 9 | class Version: 10 | def __init__(self, major, minor, patch): 11 | self._major = major 12 | self._minor = minor 13 | self._patch = patch 14 | 15 | @property 16 | def major(self): 17 | return self._major 18 | 19 | @property 20 | def minor(self): 21 | return self._minor 22 | 23 | @property 24 | def patch(self): 25 | return self._patch 26 | 27 | def __repr__(self): 28 | return ''.join([ 29 | 'Version(major=', 30 | str(self.major), 31 | ', minor=', 32 | str(self.minor), 33 | ', patch=', 34 | str(self.patch), 35 | ')' 36 | ]) 37 | 38 | def __eq__(self, other): 39 | return ( 40 | self.major == other.major and 41 | self.minor == other.minor and 42 | self.patch == other.patch 43 | ) 44 | 45 | def __gt__(self, other): 46 | self_nums = [self.major, self.minor, self.patch] 47 | other_nums = [other.major, other.minor, other.patch] 48 | 49 | for self_ver, other_ver in zip(self_nums, other_nums): 50 | if self_ver > other_ver: 51 | return True 52 | elif self_ver < other_ver: 53 | return False 54 | 55 | return False 56 | 57 | def __ne__(self, other): 58 | return not (self == other) 59 | 60 | def __ge__(self, other): 61 | return self == other or self > other 62 | 63 | def __lt__(self, other): 64 | return not (self >= other) 65 | 66 | def __le__(self, other): 67 | return not (self > other) 68 | 69 | @classmethod 70 | def parse_semantic_version(cls, version_str): 71 | if VERSION_REGEX.match(version_str) is None: 72 | return None 73 | version_nums = list(map(int, version_str.split('.'))) 74 | return cls( 75 | major=version_nums[0], 76 | minor=version_nums[1], 77 | patch=version_nums[2], 78 | ) 79 | -------------------------------------------------------------------------------- /docs/memory-report.md: -------------------------------------------------------------------------------- 1 | This page describes the database schema of the memory report that is generated by DeepView.Profile's `memory` subcommand. Recall that DeepView.Profile's reports (memory and run time) are [SQLite database files](https://www.sqlite.org/). 2 | 3 | **NOTE:** DeepView.Profile's memory profiling is for GPU memory only. 4 | 5 | ## Overview 6 | 7 | DeepView.Profile tracks the memory usage associated with a model's *weights* and *activations*. DeepView.Profile will also report the peak amount of memory allocated during a training iteration. 8 | 9 | Just like the run time report, DeepView.Profile also includes the stack trace associated with each activation or weight in the report. DeepView.Profile only includes the stack frames associated with files inside your project (i.e. files under your project's root directory). 10 | 11 | ## Tables 12 | 13 | ### `weight_entries` 14 | 15 | ```sql title="Schema" 16 | CREATE TABLE weight_entries ( 17 | id INTEGER PRIMARY KEY, 18 | name TEXT NOT NULL, 19 | size_bytes INTEGER NOT NULL, 20 | grad_size_bytes INTEGER NOT NULL 21 | ); 22 | ``` 23 | This table holds the memory used by the model's weights. The `size_bytes` column is the number of bytes used by the weight and `grad_size_bytes` is the number of bytes used by the weight's gradient tensor. The `name` column holds 24 | the weight's name, which is assigned by PyTorch. 25 | 26 | ### `activation_entries` 27 | 28 | ```sql title="Schema" 29 | CREATE TABLE activation_entries ( 30 | id INTEGER PRIMARY KEY, 31 | operation_name TEXT NOT NULL, 32 | size_bytes INTEGER NOT NULL 33 | ); 34 | ``` 35 | This table holds the memory used by the model's activations in one training iteration. The `size_bytes` column is the number of bytes used by the activation. The `operation_name` column is the name of the operation that generated the activation. 36 | 37 | ### `entry_types` 38 | 39 | ```sql title="Schema" 40 | CREATE TABLE entry_types ( 41 | entry_type INTEGER PRIMARY KEY, 42 | name TEXT NOT NULL 43 | ); 44 | ``` 45 | This is a table that stores mappings of DeepView.Profile's memory entry types (activations, weights) to numeric identifiers. DeepView.Profile maps weights to an entry type of `1`, and activations to an entry type of `2`. 46 | 47 | ### `stack_correlation` 48 | 49 | ```sql title="Schema" 50 | CREATE TABLE stack_correlation ( 51 | correlation_id INTEGER PRIMARY KEY, 52 | entry_id INTEGER NOT NULL, 53 | entry_type INTEGER NOT NULL, 54 | UNIQUE (correlation_id, entry_id) 55 | ); 56 | CREATE UNIQUE INDEX entry_type_and_id 57 | ON stack_correlation(entry_type, entry_id); 58 | ``` 59 | This table maps entries to a `correlation_id`, which can be used to look up a memory entry's relevant stack frames in the `stack_frames` table. The `entry_type` column contains either `1` or `2`, which corresponds to the weights and activations respectively. 60 | 61 | For all rows where `entry_type == 1`, the `entry_id` column will act as a foreign key for the `id` column in the `weight_entries` table. Similarly for all rows where `entry_type == 2`, the `entry_id` column will act as a foreign key for the `id` column in the `activation_entries` table. 62 | 63 | ### `stack_frames` 64 | 65 | ```sql title="Schema" 66 | CREATE TABLE stack_frames ( 67 | correlation_id INTEGER NOT NULL, 68 | ordering INTEGER NOT NULL, 69 | file_path TEXT NOT NULL, 70 | line_number INTEGER NOT NULL, 71 | PRIMARY KEY (correlation_id, ordering) 72 | ); 73 | ``` 74 | This table holds the stack frames associated with a memory usage entry (both weights and activations). The `correlation_id` column is a foreign key that references the `correlation_id` in the `stack_correlation` table. File paths stored in the `file_path` column will be relative to the project's root directory and line numbers are 1-based. 75 | 76 | **NOTE:** DeepView.Profile does not add an explicit foreign key constraint to the `correlation_id` column. 77 | 78 | **Ordering.** 79 | There may be multiple stack frames associated with any given memory entry (i.e. any given `correlation_id`). The `ordering` column is used to keep track of the ordering among stack frames that share the same `correlation_id`. When sorted in ascending order by the `ordering` column, the stack frames will be ordered from most-specific (i.e. *closest* to the weight or operation responsible for the activation) to least-specific (i.e. *farthest* from the weight or operation responsible for the activation). 80 | 81 | **Connecting to Entries.** 82 | To get the stack frames for a given entry, you need to first query the `stack_correlation` table to find the `correlation_id` associated with your `entry_id` and `entry_type` combination. Then you can use that `correlation_id` to look up the associated stack frames in this table. 83 | 84 | ### `misc_sizes` 85 | 86 | ```sql title="Schema" 87 | CREATE TABLE misc_sizes ( 88 | key TEXT PRIMARY KEY, 89 | size_bytes INT NOT NULL 90 | ); 91 | ``` 92 | 93 | This table holds any miscellaneous memory usage information that is reported by DeepView.Profile. Currently, DeepView.Profile only uses this table to report the peak memory usage during one training iteration. This memory usage is reported using the `peak_usage_bytes` key. 94 | -------------------------------------------------------------------------------- /docs/providers.md: -------------------------------------------------------------------------------- 1 | ### Model Provider 2 | 3 | ```python 4 | def deepview_model_provider() -> torch.nn.Module: 5 | pass 6 | ``` 7 | 8 | The model provider must take no arguments and return an instance of your model (a `torch.nn.Module`) that is on the GPU (i.e. you need to call `.cuda()` on the module before returning it). 9 | 10 | ### Input Provider 11 | 12 | ```python 13 | def deepview_input_provider(batch_size: int = 32) -> Tuple: 14 | pass 15 | ``` 16 | 17 | The input provider must take a single `batch_size` argument that has a default value (the batch size you want to profile with). It must return an iterable (does not *have* to be a `tuple`) that contains the arguments that you would normally pass to your model's `forward` method. Any `Tensor`s in the returned iterable must be on the GPU (i.e. you need to call `.cuda()` on them before returning them). 18 | 19 | 20 | ### Iteration Provider 21 | 22 | ```python 23 | def deepview_iteration_provider(model: torch.nn.Module) -> Callable: 24 | pass 25 | ``` 26 | 27 | The iteration provider must take a single `model` argument, which will be an instance of your model. This provider must return a callable (e.g., a function) that, when invoked, runs a single training iteration. 28 | 29 | ### Example 30 | 31 | Suppose that your project code is kept under a `my_project` directory: 32 | 33 | ```zsh 34 | my_project 35 | ├── __init__.py 36 | └── model.py 37 | ``` 38 | and your model is defined in `model.py`: 39 | 40 | ```python 41 | import torch.nn as nn 42 | 43 | 44 | class Model(nn.Module): 45 | def __init__(self): 46 | super().__init__() 47 | self.conv = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3) 48 | self.linear = nn.Linear(in_features=387096, out_features=10) 49 | 50 | def forward(self, input): 51 | out = self.conv(input) 52 | return self.linear(out.view(-1, 387096)) 53 | ``` 54 | 55 | One way to write the `entry_point.py` file would be: 56 | 57 | ```python 58 | import torch 59 | import torch.nn as nn 60 | 61 | from my_project.model import Model 62 | 63 | 64 | class ModelWithLoss(nn.Module): 65 | def __init__(self): 66 | super().__init__() 67 | self.model = Model() 68 | self.loss_fn = nn.CrossEntropyLoss() 69 | 70 | def forward(self, input, target): 71 | output = self.model(input) 72 | return self.loss_fn(output, target) 73 | 74 | 75 | def deepview_model_provider(): 76 | # Return a GPU-based instance of our model (that returns a loss) 77 | return ModelWithLoss().cuda() 78 | 79 | 80 | def deepview_input_provider(batch_size=32): 81 | # Return GPU-based inputs for our model 82 | return ( 83 | torch.randn((batch_size, 3, 256, 256)).cuda(), 84 | torch.randint(low=0, high=9, size=(batch_size,)).cuda(), 85 | ) 86 | 87 | 88 | def deepview_iteration_provider(model): 89 | # Return a function that executes one training iteration 90 | optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) 91 | def iteration(*inputs): 92 | optimizer.zero_grad() 93 | out = model(*inputs) 94 | out.backward() 95 | optimizer.step() 96 | return iteration 97 | ``` 98 | One important thing to highlight is our use of a wrapper `ModelWithLoss` module. DeepView.Profile only provides breakdowns for operations that run inside the module returned by the model provider. We included the loss function in this wrapper module to have DeepView.Profile include it in the breakdown. We could have also placed the loss function call in the `iteration` function. 99 | 100 | You can place these provider functions either in a new file or directly in `model.py`. Whichever file contains the providers will be your project's entry point file. In this example, we defined the providers in a separate file called `entry_point.py` inside `my_project`. 101 | -------------------------------------------------------------------------------- /docs/remote.md: -------------------------------------------------------------------------------- 1 | # Remote Profiling 2 | 3 | ## Terminology 4 | - **Client:** The local machine where you run the DeepView.Explore plugin. 5 | - **Server:** The remote machine where you want to run the DeepView.Profile. 6 | 7 | ## Prerequisites 8 | **SSH Access.** 9 | At a minimum, you need SSH access to a server that allows SSH tunnelling. If the server machine exposes ports then it does not need to support SSH tunnelling. 10 | 11 | **DeepView.Profile and DeepView.Predict.** 12 | Install DeepView.Profile and (optionally DeepView.Predict) on your server to allow remote profiling. 13 | 14 | **[VSCode Remote - SSH extension.](https://code.visualstudio.com/docs/remote/ssh)** 15 | This extension allows users to connect to a remote machine and run extensions remotely. The extension handles most of the heavy lifting so it makes it easy to use DeepView.Explore on remote machines. 16 | 17 | **Installing the DeepView.Explore on the Server** 18 | To install the DeepView.Explore plugin on the server, take the following steps. 19 | 1. Connect to your server via SSH. 20 | 2. Get the VSIX file following the installation instructions. Take note the path to the VSIX file. 21 | 2. Open VSCode on your client and connect to your server. 22 | 3. Click the Extensions tab (Ctrl-Shift-X on Linux/Windows, ⌘-Shift-X on macOS) and click the `...` button. Click `Install from VSIX` and then specify the path to the VSIX file on your server. 23 | 4. Restart VSCode to enable your changes. 24 | 25 | ## Starting a Remote Profiling Session 26 | 27 | ### Starting the DeepView.Profiler 28 | DeepView.Profile needs to running on the server to enable the plugin. You can connect to the server via SSH and start DeepView.Profile by running the `deepview interactive` command as usual. 29 | 30 | ```zsh 31 | poetry run deepview interactive 32 | ``` 33 | 34 | If you want to use a different port, you can use the `--port` flag to tell the profiler to listen on a different port. 35 | 36 | ```zsh 37 | poetry run deepview interactive --port portNumber 38 | ``` 39 | 40 | ### Starting DeepView.Explore 41 | Launch VSCode and open DeepView.Explore by running the deepview command in the command palette (Ctrl-Shift-P on Linux/Windows, ⌘-Shift-P on macOS). Select your project root and begin profiling. 42 | -------------------------------------------------------------------------------- /docs/run-time-report.md: -------------------------------------------------------------------------------- 1 | This page describes the database schema of the run time report that is generated by DeepView.Profile's `time` subcommand. Recall 2 | that DeepView.Profile's reports (memory and run time) are [SQLite database files](https://www.sqlite.org/). 3 | 4 | ## Overview 5 | 6 | DeepView.Profile's run time report contains a breakdown of the run times of each operation that runs in one training iteration. DeepView.Profile only tracks the 7 | operations that execute as a part of either the forward and backward pass. 8 | 9 | For each tracked operation, DeepView.Profile also includes the stack trace leading to that operation. DeepView.Profile only includes the stack frames associated with files inside your project (i.e. files under your project's root directory). 10 | 11 | ## Tables 12 | 13 | ### `run_time_entries` 14 | 15 | ```sql title="Schema" 16 | CREATE TABLE run_time_entries ( 17 | id INTEGER PRIMARY KEY, 18 | operation_name TEXT NOT NULL, 19 | forward_ms REAL NOT NULL, 20 | backward_ms REAL 21 | ); 22 | ``` 23 | 24 | This table holds the measured run time(s) of each tracked operation. Each entry in this table represents one operation *instance* (i.e. one invocation of an operation). The columns in this table are self-explanatory. 25 | 26 | **NOTE:** DeepView.Profile reports run times in milliseconds. 27 | 28 | **Backward Pass.** 29 | Note that not every operation is necessarily involved in the backward pass. When an operation is not in the backward pass, `backward_ms` will be `NULL`. 30 | 31 | 32 | ### `stack_frames` 33 | 34 | ```sql title="Schema" 35 | CREATE TABLE stack_frames ( 36 | ordering INTEGER NOT NULL, 37 | file_path TEXT NOT NULL, 38 | line_number INTEGER NOT NULL, 39 | entry_id INTEGER NOT NULL, 40 | PRIMARY KEY (entry_id, ordering) 41 | ); 42 | ``` 43 | 44 | This table holds the stack frames associated with each tracked operation. The `entry_id` column is a foreign key that references the `id` column in `run_time_entries`. 45 | 46 | **NOTE** DeepView.Profile does not add an explicit foreign key constraint to the `entry_id` column. 47 | 48 | **Ordering.** 49 | There may be multiple stack frames associated with any given tracked operation (i.e. any given `entry_id`). The `ordering` column is used to keep track of the ordering among stack frames that share the same `entry_id`. When sorted in ascending order by the `ordering` column, the stack frames will be ordered from most-specific (i.e. *closest* to the operation's call site) to least-specific (i.e. *farthest* from the operation's call site). 50 | -------------------------------------------------------------------------------- /examples/densenet/LICENSE: -------------------------------------------------------------------------------- 1 | NOTE: This license and disclaimer applies only to the "resnet.py" file in this 2 | directory. 3 | 4 | BSD 3-Clause License 5 | 6 | Copyright (c) Soumith Chintala 2016, 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are met: 11 | 12 | * Redistributions of source code must retain the above copyright notice, this 13 | list of conditions and the following disclaimer. 14 | 15 | * Redistributions in binary form must reproduce the above copyright notice, 16 | this list of conditions and the following disclaimer in the documentation 17 | and/or other materials provided with the distribution. 18 | 19 | * Neither the name of the copyright holder nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 27 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | -------------------------------------------------------------------------------- /examples/densenet/entry_point.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import densenet 4 | 5 | 6 | def deepview_model_provider(): 7 | return densenet.densenet121().cuda() 8 | 9 | 10 | def deepview_input_provider(batch_size=16): 11 | return ( 12 | torch.randn((batch_size, 3, 224, 224)).cuda(), 13 | torch.randint(low=0, high=1000, size=(batch_size,)).cuda(), 14 | ) 15 | 16 | 17 | def deepview_iteration_provider(model): 18 | optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) 19 | def iteration(*inputs): 20 | optimizer.zero_grad() 21 | out = model(*inputs) 22 | out.backward() 23 | optimizer.step() 24 | return iteration 25 | -------------------------------------------------------------------------------- /examples/gnmt/README.md: -------------------------------------------------------------------------------- 1 | # GNMT (Google Neural Machine Translation) Model 2 | 3 | This directory contains an implementation of GNMT that was adapted from the 4 | code found in the [MLPerf training repository](https://github.com/mlperf/training/tree/master/rnn_translator). 5 | 6 | To launch an interactive DeepView.Profile session for GNMT, run 7 | ``` 8 | deepview interactive entry_point.py 9 | ``` 10 | 11 | 12 | ## License 13 | 14 | This code, with the exception of the `deepview_` prefixed functions in 15 | `entry_point.py`, was adapted from the MLPerf training benchmarks and therefore 16 | shares the same license. The unmodified license can be found in the `LICENSE` 17 | file in the `seq2seq` directory. 18 | -------------------------------------------------------------------------------- /examples/gnmt/seq2seq/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Elad Hoffer 4 | Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /examples/gnmt/seq2seq/data/config.py: -------------------------------------------------------------------------------- 1 | PAD_TOKEN = '' 2 | UNK_TOKEN = '' 3 | BOS_TOKEN = '' 4 | EOS_TOKEN = '<\s>' 5 | 6 | # special PAD, UNKNOWN, BEGIN-OF-STRING, END-OF-STRING tokens 7 | PAD, UNK, BOS, EOS = [0, 1, 2, 3] 8 | 9 | # path to the BPE vocabulary file, relative to the data directory, it should 10 | # point to file generated by subword-nmt/get_vocab.py 11 | VOCAB_FNAME = 'vocab.bpe.32000' 12 | 13 | # paths to source and target training files, relative to the data directory, it 14 | # should point to BPE-encoded files, generated by subword-nmt/apply_bpe.py 15 | SRC_TRAIN_FNAME = 'train.tok.clean.bpe.32000.en' 16 | TGT_TRAIN_FNAME = 'train.tok.clean.bpe.32000.de' 17 | 18 | # paths to source and target validation files, relative to the data directory, 19 | # it should point to BPE-encoded files, generated by subword-nmt/apply_bpe.py 20 | SRC_VAL_FNAME = 'newstest_dev.tok.clean.bpe.32000.en' 21 | TGT_VAL_FNAME = 'newstest_dev.tok.clean.bpe.32000.de' 22 | 23 | # path to the test source file, relative to the data directory, it should point 24 | # to BPE-encoded file, generated by subword-nmt/apply_bpe.py 25 | SRC_TEST_FNAME = 'newstest2014.tok.bpe.32000.en' 26 | 27 | # path to the test target file, relative to the data directory, it should point 28 | # to plaintext file, tokenization is performed by the sacrebleu package 29 | TGT_TEST_TARGET_FNAME = 'newstest2014.de' 30 | 31 | # path to the moses detokenizer, relative to the data directory 32 | DETOKENIZER = 'mosesdecoder/scripts/tokenizer/detokenizer.perl' 33 | -------------------------------------------------------------------------------- /examples/gnmt/seq2seq/data/tokenizer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections import defaultdict 3 | from functools import partial 4 | 5 | import seq2seq.data.config as config 6 | 7 | 8 | class Tokenizer: 9 | """ 10 | Tokenizer class. 11 | """ 12 | def __init__(self, vocab_fname=None, pad=1, separator='@@'): 13 | """ 14 | Constructor for the Tokenizer class. 15 | 16 | :param vocab_fname: path to the file with vocabulary 17 | :param pad: pads vocabulary to a multiple of 'pad' tokens 18 | :param separator: tokenization separator 19 | """ 20 | if vocab_fname: 21 | self.separator = separator 22 | 23 | logging.info(f'Building vocabulary from {vocab_fname}') 24 | vocab = [config.PAD_TOKEN, config.UNK_TOKEN, 25 | config.BOS_TOKEN, config.EOS_TOKEN] 26 | 27 | with open(vocab_fname) as vfile: 28 | for line in vfile: 29 | vocab.append(line.strip()) 30 | 31 | self.pad_vocabulary(vocab, pad) 32 | 33 | self.vocab_size = len(vocab) 34 | logging.info(f'Size of vocabulary: {self.vocab_size}') 35 | 36 | self.tok2idx = defaultdict(partial(int, config.UNK)) 37 | for idx, token in enumerate(vocab): 38 | self.tok2idx[token] = idx 39 | 40 | self.idx2tok = {} 41 | for key, value in self.tok2idx.items(): 42 | self.idx2tok[value] = key 43 | 44 | def pad_vocabulary(self, vocab, pad): 45 | """ 46 | Pads vocabulary to a multiple of 'pad' tokens. 47 | 48 | :param vocab: list with vocabulary 49 | :param pad: integer 50 | """ 51 | vocab_size = len(vocab) 52 | padded_vocab_size = (vocab_size + pad - 1) // pad * pad 53 | for i in range(0, padded_vocab_size - vocab_size): 54 | token = f'madeupword{i:04d}' 55 | vocab.append(token) 56 | assert len(vocab) % pad == 0 57 | 58 | def get_state(self): 59 | logging.info('Saving state of the tokenizer') 60 | state = { 61 | 'separator': self.separator, 62 | 'vocab_size': self.vocab_size, 63 | 'tok2idx': self.tok2idx, 64 | 'idx2tok': self.idx2tok, 65 | } 66 | return state 67 | 68 | def set_state(self, state): 69 | logging.info('Restoring state of the tokenizer') 70 | self.separator = state['separator'] 71 | self.vocab_size = state['vocab_size'] 72 | self.tok2idx = state['tok2idx'] 73 | self.idx2tok = state['idx2tok'] 74 | 75 | def segment(self, line): 76 | """ 77 | Tokenizes single sentence and adds special BOS and EOS tokens. 78 | 79 | :param line: sentence 80 | 81 | returns: list representing tokenized sentence 82 | """ 83 | line = line.strip().split() 84 | entry = [self.tok2idx[i] for i in line] 85 | entry = [config.BOS] + entry + [config.EOS] 86 | return entry 87 | 88 | def detokenize(self, inputs, delim=' '): 89 | """ 90 | Detokenizes single sentence and removes token separator characters. 91 | 92 | :param inputs: sequence of tokens 93 | :param delim: tokenization delimiter 94 | 95 | returns: string representing detokenized sentence 96 | """ 97 | detok = delim.join([self.idx2tok[idx] for idx in inputs]) 98 | detok = detok.replace(self.separator + ' ', '') 99 | detok = detok.replace(self.separator, '') 100 | 101 | detok = detok.replace(config.BOS_TOKEN, '') 102 | detok = detok.replace(config.EOS_TOKEN, '') 103 | detok = detok.replace(config.PAD_TOKEN, '') 104 | detok = detok.strip() 105 | return detok 106 | -------------------------------------------------------------------------------- /examples/gnmt/seq2seq/models/encoder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn.utils.rnn import pack_padded_sequence 3 | from torch.nn.utils.rnn import pad_packed_sequence 4 | 5 | import seq2seq.data.config as config 6 | from seq2seq.utils import init_lstm_ 7 | 8 | 9 | class ResidualRecurrentEncoder(nn.Module): 10 | """ 11 | Encoder with Embedding, LSTM layers, residual connections and optional 12 | dropout. 13 | 14 | The first LSTM layer is bidirectional and uses variable sequence length 15 | API, the remaining (num_layers-1) layers are unidirectional. Residual 16 | connections are enabled after third LSTM layer, dropout is applied on 17 | inputs to LSTM layers. 18 | """ 19 | def __init__(self, vocab_size, hidden_size=1024, num_layers=4, dropout=0.2, 20 | batch_first=False, embedder=None, init_weight=0.1): 21 | """ 22 | Constructor for the ResidualRecurrentEncoder. 23 | 24 | :param vocab_size: size of vocabulary 25 | :param hidden_size: hidden size for LSTM layers 26 | :param num_layers: number of LSTM layers, 1st layer is bidirectional 27 | :param dropout: probability of dropout (on input to LSTM layers) 28 | :param batch_first: if True the model uses (batch,seq,feature) tensors, 29 | if false the model uses (seq, batch, feature) 30 | :param embedder: instance of nn.Embedding, if None constructor will 31 | create new embedding layer 32 | :param init_weight: range for the uniform initializer 33 | """ 34 | super(ResidualRecurrentEncoder, self).__init__() 35 | self.batch_first = batch_first 36 | self.rnn_layers = nn.ModuleList() 37 | # 1st LSTM layer, bidirectional 38 | self.rnn_layers.append( 39 | nn.LSTM(hidden_size, hidden_size, num_layers=1, bias=True, 40 | batch_first=batch_first, bidirectional=True)) 41 | 42 | # 2nd LSTM layer, with 2x larger input_size 43 | self.rnn_layers.append( 44 | nn.LSTM((2 * hidden_size), hidden_size, num_layers=1, bias=True, 45 | batch_first=batch_first)) 46 | 47 | # Remaining LSTM layers 48 | for _ in range(num_layers - 2): 49 | self.rnn_layers.append( 50 | nn.LSTM(hidden_size, hidden_size, num_layers=1, bias=True, 51 | batch_first=batch_first)) 52 | 53 | for lstm in self.rnn_layers: 54 | init_lstm_(lstm, init_weight) 55 | 56 | self.dropout = nn.Dropout(p=dropout) 57 | 58 | if embedder is not None: 59 | self.embedder = embedder 60 | else: 61 | self.embedder = nn.Embedding(vocab_size, hidden_size, 62 | padding_idx=config.PAD) 63 | nn.init.uniform_(self.embedder.weight.data, -init_weight, init_weight) 64 | 65 | def forward(self, inputs, lengths): 66 | """ 67 | Execute the encoder. 68 | 69 | :param inputs: tensor with indices from the vocabulary 70 | :param lengths: vector with sequence lengths (excluding padding) 71 | 72 | returns: tensor with encoded sequences 73 | """ 74 | x = self.embedder(inputs) 75 | 76 | # bidirectional layer 77 | x = self.dropout(x) 78 | x = pack_padded_sequence(x, lengths.cpu().numpy(), 79 | batch_first=self.batch_first) 80 | x, _ = self.rnn_layers[0](x) 81 | x, _ = pad_packed_sequence(x, batch_first=self.batch_first) 82 | 83 | # 1st unidirectional layer 84 | x = self.dropout(x) 85 | x, _ = self.rnn_layers[1](x) 86 | 87 | # the rest of unidirectional layers, 88 | # with residual connections starting from 3rd layer 89 | for i in range(2, len(self.rnn_layers)): 90 | residual = x 91 | x = self.dropout(x) 92 | x, _ = self.rnn_layers[i](x) 93 | x = x + residual 94 | 95 | return x 96 | -------------------------------------------------------------------------------- /examples/gnmt/seq2seq/models/gnmt.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | import seq2seq.data.config as config 4 | from seq2seq.models.decoder import ResidualRecurrentDecoder 5 | from seq2seq.models.encoder import ResidualRecurrentEncoder 6 | from seq2seq.models.seq2seq_base import Seq2Seq 7 | 8 | 9 | class GNMT(Seq2Seq): 10 | """ 11 | GNMT v2 model 12 | """ 13 | def __init__(self, vocab_size, hidden_size=1024, num_layers=4, dropout=0.2, 14 | batch_first=False, share_embedding=True): 15 | """ 16 | Constructor for the GNMT v2 model. 17 | 18 | :param vocab_size: size of vocabulary (number of tokens) 19 | :param hidden_size: internal hidden size of the model 20 | :param num_layers: number of layers, applies to both encoder and 21 | decoder 22 | :param dropout: probability of dropout (in encoder and decoder) 23 | :param batch_first: if True the model uses (batch,seq,feature) tensors, 24 | if false the model uses (seq, batch, feature) 25 | :param share_embedding: if True embeddings are shared between encoder 26 | and decoder 27 | """ 28 | 29 | super(GNMT, self).__init__(batch_first=batch_first) 30 | 31 | if share_embedding: 32 | embedder = nn.Embedding(vocab_size, hidden_size, 33 | padding_idx=config.PAD) 34 | nn.init.uniform_(embedder.weight.data, -0.1, 0.1) 35 | else: 36 | embedder = None 37 | 38 | self.encoder = ResidualRecurrentEncoder(vocab_size, hidden_size, 39 | num_layers, dropout, 40 | batch_first, embedder) 41 | 42 | self.decoder = ResidualRecurrentDecoder(vocab_size, hidden_size, 43 | num_layers, dropout, 44 | batch_first, embedder) 45 | 46 | def forward(self, input_encoder, input_enc_len, input_decoder): 47 | context = self.encode(input_encoder, input_enc_len) 48 | context = (context, input_enc_len, None) 49 | output, _, _ = self.decode(input_decoder, context) 50 | 51 | return output 52 | -------------------------------------------------------------------------------- /examples/gnmt/seq2seq/models/seq2seq_base.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn.functional import log_softmax 3 | 4 | 5 | class Seq2Seq(nn.Module): 6 | """ 7 | Generic Seq2Seq module, with an encoder and a decoder. 8 | """ 9 | def __init__(self, encoder=None, decoder=None, batch_first=False): 10 | """ 11 | Constructor for the Seq2Seq module. 12 | 13 | :param encoder: encoder module 14 | :param decoder: decoder module 15 | :param batch_first: if True the model uses (batch, seq, feature) 16 | tensors, if false the model uses (seq, batch, feature) tensors 17 | """ 18 | super(Seq2Seq, self).__init__() 19 | self.encoder = encoder 20 | self.decoder = decoder 21 | self.batch_first = batch_first 22 | 23 | def encode(self, inputs, lengths): 24 | """ 25 | Applies the encoder to inputs with a given input sequence lengths. 26 | 27 | :param inputs: tensor with inputs (batch, seq_len) if 'batch_first' 28 | else (seq_len, batch) 29 | :param lengths: vector with sequence lengths (excluding padding) 30 | """ 31 | return self.encoder(inputs, lengths) 32 | 33 | def decode(self, inputs, context, inference=False): 34 | """ 35 | Applies the decoder to inputs, given the context from the encoder. 36 | 37 | :param inputs: tensor with inputs (batch, seq_len) if 'batch_first' 38 | else (seq_len, batch) 39 | :param context: context from the encoder 40 | :param inference: if True inference mode, if False training mode 41 | """ 42 | return self.decoder(inputs, context, inference) 43 | 44 | def generate(self, inputs, context, beam_size): 45 | """ 46 | Autoregressive generator, works with SequenceGenerator class. 47 | Executes decoder (in inference mode), applies log_softmax and topK for 48 | inference with beam search decoding. 49 | 50 | :param inputs: tensor with inputs to the decoder 51 | :param context: context from the encoder 52 | :param beam_size: beam size for the generator 53 | 54 | returns: (words, logprobs, scores, new_context) 55 | words: indices of topK tokens 56 | logprobs: log probabilities of topK tokens 57 | scores: scores from the attention module (for coverage penalty) 58 | new_context: new decoder context, includes new hidden states for 59 | decoder RNN cells 60 | """ 61 | logits, scores, new_context = self.decode(inputs, context, True) 62 | logprobs = log_softmax(logits, dim=-1) 63 | logprobs, words = logprobs.topk(beam_size, dim=-1) 64 | return words, logprobs, scores, new_context 65 | -------------------------------------------------------------------------------- /examples/gnmt/seq2seq/train/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | 4 | import torch 5 | 6 | 7 | 8 | def perhaps_convert_float(param, total): 9 | if isinstance(param, float): 10 | param = int(param * total) 11 | return param 12 | 13 | 14 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 15 | """ 16 | Learning rate scheduler with exponential warmup and step decay. 17 | """ 18 | def __init__(self, optimizer, iterations, warmup_steps=0, 19 | remain_steps=1.0, decay_interval=None, decay_steps=4, 20 | decay_factor=0.5, last_epoch=-1): 21 | """ 22 | Constructor of WarmupMultiStepLR. 23 | 24 | Parameters: warmup_steps, remain_steps and decay_interval accept both 25 | integers and floats as an input. Integer input is interpreted as 26 | absolute index of iteration, float input is interpreted as a fraction 27 | of total training iterations (epochs * steps_per_epoch). 28 | 29 | If decay_interval is None then the decay will happen at regulary spaced 30 | intervals ('decay_steps' decays between iteration indices 31 | 'remain_steps' and 'iterations'). 32 | 33 | :param optimizer: instance of optimizer 34 | :param iterations: total number of training iterations 35 | :param warmup_steps: number of warmup iterations 36 | :param remain_steps: start decay at 'remain_steps' iteration 37 | :param decay_interval: interval between LR decay steps 38 | :param decay_steps: max number of decay steps 39 | :param decay_factor: decay factor 40 | :param last_epoch: the index of last iteration 41 | """ 42 | 43 | # iterations before learning rate reaches base LR 44 | self.warmup_steps = perhaps_convert_float(warmup_steps, iterations) 45 | 46 | # iteration at which decay starts 47 | self.remain_steps = perhaps_convert_float(remain_steps, iterations) 48 | 49 | # number of steps between each decay 50 | if decay_interval is None: 51 | # decay at regulary spaced intervals 52 | decay_iterations = iterations - self.remain_steps 53 | self.decay_interval = decay_iterations // (decay_steps) 54 | self.decay_interval = max(self.decay_interval, 1) 55 | else: 56 | self.decay_interval = perhaps_convert_float(decay_interval, 57 | iterations) 58 | 59 | # multiplicative decay factor 60 | self.decay_factor = decay_factor 61 | 62 | # max number of decay steps 63 | self.decay_steps = decay_steps 64 | 65 | if self.warmup_steps > self.remain_steps: 66 | logging.warn('warmup_steps should not be larger than ' 67 | 'remain_steps, setting warmup_steps=remain_steps') 68 | self.warmup_steps = self.remain_steps 69 | 70 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 71 | 72 | def get_lr(self): 73 | if self.last_epoch <= self.warmup_steps: 74 | # exponential lr warmup 75 | if self.warmup_steps != 0: 76 | warmup_factor = math.exp(math.log(0.01) / self.warmup_steps) 77 | else: 78 | warmup_factor = 1.0 79 | inv_decay = warmup_factor ** (self.warmup_steps - self.last_epoch) 80 | lr = [base_lr * inv_decay for base_lr in self.base_lrs] 81 | 82 | elif self.last_epoch >= self.remain_steps: 83 | # step decay 84 | decay_iter = self.last_epoch - self.remain_steps 85 | num_decay_steps = decay_iter // self.decay_interval + 1 86 | num_decay_steps = min(num_decay_steps, self.decay_steps) 87 | lr = [ 88 | base_lr * (self.decay_factor ** num_decay_steps) 89 | for base_lr in self.base_lrs 90 | ] 91 | else: 92 | # base lr 93 | lr = [base_lr for base_lr in self.base_lrs] 94 | return lr 95 | -------------------------------------------------------------------------------- /examples/gnmt/seq2seq/train/smoothing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class LabelSmoothing(nn.Module): 6 | """ 7 | NLL loss with label smoothing. 8 | """ 9 | def __init__(self, padding_idx, smoothing=0.0): 10 | """ 11 | Constructor for the LabelSmoothing module. 12 | 13 | :param padding_idx: index of the PAD token 14 | :param smoothing: label smoothing factor 15 | """ 16 | super(LabelSmoothing, self).__init__() 17 | self.padding_idx = padding_idx 18 | self.confidence = 1.0 - smoothing 19 | self.smoothing = smoothing 20 | 21 | def forward(self, x, target): 22 | logprobs = torch.nn.functional.log_softmax(x, dim=-1, 23 | dtype=torch.float32) 24 | 25 | non_pad_mask = (target != self.padding_idx) 26 | nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) 27 | nll_loss = nll_loss.squeeze(1)[non_pad_mask] 28 | smooth_loss = -logprobs.mean(dim=-1)[non_pad_mask] 29 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss 30 | return loss.sum() 31 | -------------------------------------------------------------------------------- /examples/huggingface/entry_point.py: -------------------------------------------------------------------------------- 1 | from transformers import ( 2 | get_linear_schedule_with_warmup, 3 | AutoModelForCausalLM, 4 | Trainer, 5 | ) 6 | import torch 7 | import torch.optim as optim 8 | 9 | model_id = "roberta-base" 10 | 11 | 12 | def deepview_model_provider(): 13 | return AutoModelForCausalLM.from_pretrained(model_id, is_decoder=True).cuda() 14 | 15 | 16 | def deepview_input_provider(batch_size=2): 17 | vocab_size = 30522 18 | src_seq_len = 512 19 | tgt_seq_len = 512 20 | 21 | device = torch.device("cuda") 22 | 23 | source = torch.randint( 24 | low=0, 25 | high=vocab_size, 26 | size=(batch_size, src_seq_len), 27 | dtype=torch.int64, 28 | device=device, 29 | ) 30 | target = torch.randint( 31 | low=0, 32 | high=vocab_size, 33 | size=(batch_size, tgt_seq_len), 34 | dtype=torch.int64, 35 | device=device, 36 | ) 37 | return (source, target) 38 | 39 | 40 | def deepview_iteration_provider(model): 41 | model.parameters() 42 | optimizer = optim.AdamW( 43 | params=model.parameters(), 44 | betas=(0.9, 0.999), 45 | eps=1e-6, 46 | weight_decay=0.01, 47 | lr=1e-4, 48 | ) 49 | scheduler = get_linear_schedule_with_warmup(optimizer, 10000, 500000) 50 | trainer = Trainer(model=model, optimizers=(optimizer, scheduler)) 51 | 52 | def iteration(source, label): 53 | trainer.training_step(model, {"input_ids": source, "labels": label}) 54 | 55 | return iteration -------------------------------------------------------------------------------- /examples/legacy/lenet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class LeNet(nn.Module): 5 | def __init__(self): 6 | super(LeNet, self).__init__() 7 | self.conv1 = nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5) 8 | self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=5) 9 | self.dense1 = nn.Linear(in_features=1250, out_features=500) 10 | self.dense2 = nn.Linear(in_features=500, out_features=10) 11 | self.tanh = nn.Tanh() 12 | self.pool = nn.MaxPool2d(kernel_size=2, stride=2) 13 | self.softmax = nn.LogSoftmax(dim=1) 14 | 15 | def forward(self, input): 16 | """ 17 | LeNet for CIFAR-10 18 | @innpv size (512, 3, 32, 32) 19 | """ 20 | output = self.conv1(input) 21 | output = self.tanh(output) 22 | output = self.pool(output) 23 | 24 | output = self.conv2(output) 25 | output = self.tanh(output) 26 | output = self.pool(output) 27 | 28 | output = output.view(-1, 1250) 29 | 30 | output = self.dense1(output) 31 | output = self.tanh(output) 32 | output = self.dense2(output) 33 | output = self.softmax(output) 34 | 35 | return output 36 | -------------------------------------------------------------------------------- /examples/legacy/testnet2.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class TestNet(nn.Module): 5 | def __init__(self): 6 | super(TestNet, self).__init__() 7 | self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3) 8 | self.bn1 = nn.BatchNorm2d(num_features=64) 9 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3) 10 | self.bn2 = nn.BatchNorm2d(num_features=128) 11 | self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3) 12 | self.bn3 = nn.BatchNorm2d(num_features=256) 13 | self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3) 14 | self.bn4 = nn.BatchNorm2d(num_features=512) 15 | 16 | self.linear = nn.Linear(in_features=4608, out_features=1000) 17 | self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2) 18 | self.max_pool2 = nn.MaxPool2d(kernel_size=4, stride=4) 19 | self.relu = nn.ReLU() 20 | 21 | def forward(self, input): 22 | """ 23 | @innpv size (16, 3, 128, 128) 24 | """ 25 | output = self.conv1(input) 26 | output = self.bn1(output) 27 | output = self.relu(output) 28 | output = self.max_pool(output) 29 | 30 | output = self.conv2(output) 31 | output = self.bn2(output) 32 | output = self.relu(output) 33 | output = self.max_pool(output) 34 | 35 | output = self.conv3(output) 36 | output = self.bn3(output) 37 | output = self.relu(output) 38 | output = self.max_pool(output) 39 | 40 | output = self.conv4(output) 41 | output = self.bn4(output) 42 | output = self.relu(output) 43 | output = self.max_pool2(output) 44 | 45 | output = output.view(output.size(0), -1) 46 | output = self.linear(output) 47 | 48 | return output 49 | -------------------------------------------------------------------------------- /examples/legacy/vgg11.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class VGG11(nn.Module): 5 | def __init__(self): 6 | super(VGG11, self).__init__() 7 | self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1) 8 | self.bn1 = nn.BatchNorm2d(64) 9 | 10 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1) 11 | self.bn2 = nn.BatchNorm2d(128) 12 | 13 | self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1) 14 | self.bn3 = nn.BatchNorm2d(256) 15 | self.conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1) 16 | self.bn4 = nn.BatchNorm2d(256) 17 | 18 | self.conv5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1) 19 | self.bn5 = nn.BatchNorm2d(512) 20 | self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1) 21 | self.bn6 = nn.BatchNorm2d(512) 22 | 23 | self.conv7 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1) 24 | self.bn7 = nn.BatchNorm2d(512) 25 | self.conv8 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1) 26 | self.bn8 = nn.BatchNorm2d(512) 27 | 28 | self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2) 29 | self.relu = nn.ReLU() 30 | self.linear = nn.Linear(in_features=512, out_features=10) 31 | 32 | def forward(self, input): 33 | """ 34 | VGG-11 for CIFAR-10 35 | @innpv size (32, 3, 32, 32) 36 | """ 37 | output = self.conv1(input) 38 | output = self.bn1(output) 39 | output = self.relu(output) 40 | output = self.max_pool(output) 41 | 42 | output = self.conv2(output) 43 | output = self.bn2(output) 44 | output = self.relu(output) 45 | output = self.max_pool(output) 46 | 47 | output = self.conv3(output) 48 | output = self.bn3(output) 49 | output = self.relu(output) 50 | output = self.conv4(output) 51 | output = self.bn4(output) 52 | output = self.relu(output) 53 | output = self.max_pool(output) 54 | 55 | output = self.conv5(output) 56 | output = self.bn5(output) 57 | output = self.relu(output) 58 | output = self.conv6(output) 59 | output = self.bn6(output) 60 | output = self.relu(output) 61 | output = self.max_pool(output) 62 | 63 | output = self.conv7(output) 64 | output = self.bn7(output) 65 | output = self.relu(output) 66 | output = self.conv8(output) 67 | output = self.bn8(output) 68 | output = self.relu(output) 69 | output = self.max_pool(output) 70 | 71 | output = output.view(-1, 512) 72 | output = self.linear(output) 73 | 74 | return output 75 | -------------------------------------------------------------------------------- /examples/nanogpt/entry_point.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | 5 | from model import GPTConfig, GPT 6 | 7 | # Batch size. 8 | block_size = 32 9 | device = "cuda" if torch.cuda.is_available() else "cpu" 10 | 11 | # model 12 | n_layer = 16 13 | n_head = 16 14 | n_embd = 512 15 | dropout = 0.0 16 | vocab_size = 65 17 | bias = False 18 | 19 | # Adamw optimizer 20 | learning_rate = 6e-4 21 | weight_decay = 1e-1 22 | beta1 = 0.9 23 | beta2 = 0.95 24 | 25 | 26 | # optimizer 27 | def configure_optimizer(model, weight_decay, learning_rate, betas): 28 | param_dict = {pn: p for pn, p in model.named_parameters()} 29 | param_dict = {pn: p for pn, p in param_dict.items() if p.requires_grad} 30 | decay_params = [p for n, p in param_dict.items() if p.dim() >= 2] 31 | nodecay_params = [p for n, p in param_dict.items() if p.dim() < 2] 32 | optim_groups = [ 33 | {"params": decay_params, "weight_decay": weight_decay}, 34 | {"params": nodecay_params, "weight_decay": 0.0}, 35 | ] 36 | optimizer = torch.optim.AdamW(optim_groups, lr=learning_rate, betas=betas) 37 | 38 | return optimizer 39 | 40 | def deepview_model_provider(): 41 | # model init 42 | # --------------------------------------------- 43 | # Enable flash attention 44 | enable_flash_attention = False 45 | model_args = dict( 46 | n_layer=n_layer, 47 | n_head=n_head, 48 | n_embd=n_embd, 49 | block_size=block_size, 50 | bias=bias, 51 | vocab_size=vocab_size, 52 | dropout=dropout, 53 | enable_flash_attention=enable_flash_attention, 54 | ) 55 | gptconf = GPTConfig(**model_args) 56 | model = GPT(gptconf) 57 | return model.to(device) 58 | 59 | 60 | def deepview_input_provider(batch_size=48): 61 | data = np.random.randint(vocab_size, size=(batch_size, block_size + 1)) 62 | x = torch.stack( 63 | [torch.from_numpy((data[i, :-1]).astype(np.int64)) for i in range(batch_size)] 64 | ) 65 | y = torch.stack( 66 | [torch.from_numpy((data[i, 1:]).astype(np.int64)) for i in range(batch_size)] 67 | ) 68 | 69 | return (x.to(device), y.to(device)) 70 | 71 | 72 | def deepview_iteration_provider(model): 73 | criterion = nn.CrossEntropyLoss() 74 | optimizer = torch.optim.AdamW( 75 | model.parameters(), lr=learning_rate, betas=(beta1, beta2) 76 | ) 77 | 78 | def iteration(inputs, targets): 79 | optimizer.zero_grad() 80 | outputs = model(inputs) 81 | loss = criterion(outputs.view(-1, outputs.size(-1)), targets.view(-1)) 82 | loss.backward() 83 | optimizer.step() 84 | 85 | return iteration -------------------------------------------------------------------------------- /examples/pytorch_lightning/example.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.utils.data import DataLoader 4 | from torchvision import datasets, transforms, models 5 | import pytorch_lightning as pl 6 | 7 | from deepview_profile.pl.deepview_callback import DeepViewProfilerCallback 8 | 9 | class ResNetModel(pl.LightningModule): 10 | def __init__(self, num_classes=10, learning_rate=1e-3): 11 | super(ResNetModel, self).__init__() 12 | self.model = models.resnet18(pretrained=True) 13 | self.model.conv1 = nn.Conv2d( 14 | 1, 64, kernel_size=7, stride=2, padding=3, bias=False 15 | ) 16 | self.model.fc = nn.Linear(self.model.fc.in_features, num_classes) 17 | self.learning_rate = learning_rate 18 | self.criterion = nn.CrossEntropyLoss() 19 | 20 | def forward(self, x): 21 | return self.model(x) 22 | 23 | def training_step(self, batch, batch_idx): 24 | x, y = batch 25 | y_hat = self(x) 26 | loss = torch.nn.functional.cross_entropy(y_hat, y) 27 | return loss 28 | 29 | def validation_step(self, batch, batch_idx): 30 | x, y = batch 31 | y_hat = self(x) 32 | loss = torch.nn.functional.cross_entropy(y_hat, y) 33 | acc = (y_hat.argmax(dim=1) == y).float().mean() 34 | self.log('val_loss', loss) 35 | self.log('val_acc', acc) 36 | 37 | def configure_optimizers(self): 38 | optimizer = torch.optim.Adam(self.parameters(), lr=1e-3) 39 | return optimizer 40 | 41 | def mnist_dataloader(batch_size=32): 42 | transform = transforms.Compose([transforms.Resize(224), 43 | transforms.ToTensor(), 44 | transforms.Normalize((0.1307,), (0.3081,))]) 45 | 46 | mnist_train = datasets.MNIST(root='mnist_data', train=True, download=True, transform=transform) 47 | mnist_val = datasets.MNIST(root='mnist_data', train=False, download=True, transform=transform) 48 | 49 | train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True) 50 | val_loader = DataLoader(mnist_val, batch_size=batch_size) 51 | 52 | return train_loader, val_loader 53 | 54 | if __name__ == '__main__': 55 | train_loader, val_loader = mnist_dataloader(batch_size=16) 56 | model = ResNetModel() 57 | 58 | dv_callback = DeepViewProfilerCallback("example") 59 | 60 | trainer = pl.Trainer( 61 | max_epochs=2, accelerator='gpu', devices=1, 62 | callbacks=[dv_callback] 63 | ) 64 | trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader) -------------------------------------------------------------------------------- /examples/resnet/LICENSE: -------------------------------------------------------------------------------- 1 | NOTE: This license and disclaimer applies only to the "resnet.py" file in this 2 | directory. 3 | 4 | BSD 3-Clause License 5 | 6 | Copyright (c) Soumith Chintala 2016, 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are met: 11 | 12 | * Redistributions of source code must retain the above copyright notice, this 13 | list of conditions and the following disclaimer. 14 | 15 | * Redistributions in binary form must reproduce the above copyright notice, 16 | this list of conditions and the following disclaimer in the documentation 17 | and/or other materials provided with the distribution. 18 | 19 | * Neither the name of the copyright holder nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 27 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | -------------------------------------------------------------------------------- /examples/resnet/entry_point.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import resnet 4 | 5 | 6 | def deepview_model_provider(): 7 | return resnet.resnet50().cuda() 8 | 9 | 10 | def deepview_input_provider(batch_size=16): 11 | return ( 12 | torch.randn((batch_size, 3, 224, 224)).cuda(), 13 | torch.randint(low=0, high=1000, size=(batch_size,)).cuda(), 14 | ) 15 | 16 | 17 | def deepview_iteration_provider(model): 18 | optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) 19 | loss_fn = torch.nn.CrossEntropyLoss() 20 | def iteration(inputs, targets): 21 | optimizer.zero_grad() 22 | out = model(inputs) 23 | loss = loss_fn(out, targets) 24 | loss.backward() 25 | optimizer.step() 26 | return iteration 27 | -------------------------------------------------------------------------------- /examples/resnet/entry_point_resnext.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import resnet 4 | 5 | 6 | def deepview_model_provider(): 7 | return resnet.resnext50_32x4d().cuda() 8 | 9 | 10 | def deepview_input_provider(batch_size=16): 11 | return ( 12 | torch.randn((batch_size, 3, 224, 224)).cuda(), 13 | torch.randint(low=0, high=1000, size=(batch_size,)).cuda(), 14 | ) 15 | 16 | 17 | def deepview_iteration_provider(model): 18 | optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) 19 | def iteration(*inputs): 20 | optimizer.zero_grad() 21 | out = model(*inputs) 22 | out.backward() 23 | optimizer.step() 24 | return iteration 25 | -------------------------------------------------------------------------------- /examples/testnet/entry_point.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | import testnet1 5 | 6 | 7 | class TestNetWithLoss(nn.Module): 8 | def __init__(self): 9 | super().__init__() 10 | self.testnet = testnet1.TestNet() 11 | 12 | def forward(self, input): 13 | return self.testnet(input).sum() 14 | 15 | 16 | def deepview_model_provider(): 17 | return TestNetWithLoss().cuda() 18 | 19 | 20 | def deepview_input_provider(batch_size=32): 21 | return (torch.randn((batch_size, 3, 128, 128)).cuda(),) 22 | 23 | 24 | def deepview_iteration_provider(model): 25 | optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) 26 | def iteration(*inputs): 27 | optimizer.zero_grad() 28 | out = model(*inputs) 29 | out.backward() 30 | optimizer.step() 31 | return iteration 32 | -------------------------------------------------------------------------------- /examples/testnet/testnet1.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class TestNet(nn.Module): 5 | def __init__(self): 6 | super(TestNet, self).__init__() 7 | self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3) 8 | self.bn1 = nn.BatchNorm2d(num_features=64) 9 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3) 10 | self.bn2 = nn.BatchNorm2d(num_features=128) 11 | self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3) 12 | self.bn3 = nn.BatchNorm2d(num_features=256) 13 | self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3) 14 | self.bn4 = nn.BatchNorm2d(num_features=512) 15 | self.conv5 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3) 16 | self.bn5 = nn.BatchNorm2d(num_features=1024) 17 | 18 | self.linear = nn.Linear(in_features=4096, out_features=1000) 19 | self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2) 20 | self.relu = nn.ReLU() 21 | 22 | def forward(self, input): 23 | output = self.conv1(input) 24 | output = self.bn1(output) 25 | output = self.relu(output) 26 | output = self.max_pool(output) 27 | 28 | output = self.conv2(output) 29 | output = self.bn2(output) 30 | output = self.relu(output) 31 | output = self.max_pool(output) 32 | 33 | output = self.conv3(output) 34 | output = self.bn3(output) 35 | output = self.relu(output) 36 | output = self.max_pool(output) 37 | 38 | output = self.conv4(output) 39 | output = self.bn4(output) 40 | output = self.relu(output) 41 | output = self.max_pool(output) 42 | 43 | output = self.conv5(output) 44 | output = self.bn5(output) 45 | output = self.relu(output) 46 | output = self.max_pool(output) 47 | 48 | output = output.view(output.size(0), -1) 49 | output = self.linear(output) 50 | 51 | return output 52 | -------------------------------------------------------------------------------- /examples/transformer/transformer/Beam.py: -------------------------------------------------------------------------------- 1 | """ Manage beam search info structure. 2 | 3 | Heavily borrowed from OpenNMT-py. 4 | For code in OpenNMT-py, please check the following link: 5 | https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/Beam.py 6 | """ 7 | 8 | import torch 9 | import transformer.Constants as Constants 10 | 11 | class Beam(): 12 | ''' Beam search ''' 13 | 14 | def __init__(self, size, device=False): 15 | 16 | self.size = size 17 | self._done = False 18 | 19 | # The score for each translation on the beam. 20 | self.scores = torch.zeros((size,), dtype=torch.float, device=device) 21 | self.all_scores = [] 22 | 23 | # The backpointers at each time-step. 24 | self.prev_ks = [] 25 | 26 | # The outputs at each time-step. 27 | self.next_ys = [torch.full((size,), Constants.PAD, dtype=torch.long, device=device)] 28 | self.next_ys[0][0] = Constants.BOS 29 | 30 | def get_current_state(self): 31 | "Get the outputs for the current timestep." 32 | return self.get_tentative_hypothesis() 33 | 34 | def get_current_origin(self): 35 | "Get the backpointers for the current timestep." 36 | return self.prev_ks[-1] 37 | 38 | @property 39 | def done(self): 40 | return self._done 41 | 42 | def advance(self, word_prob): 43 | "Update beam status and check if finished or not." 44 | num_words = word_prob.size(1) 45 | 46 | # Sum the previous scores. 47 | if len(self.prev_ks) > 0: 48 | beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob) 49 | else: 50 | beam_lk = word_prob[0] 51 | 52 | flat_beam_lk = beam_lk.view(-1) 53 | 54 | best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 1st sort 55 | best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 2nd sort 56 | 57 | self.all_scores.append(self.scores) 58 | self.scores = best_scores 59 | 60 | # bestScoresId is flattened as a (beam x word) array, 61 | # so we need to calculate which word and beam each score came from 62 | prev_k = best_scores_id / num_words 63 | self.prev_ks.append(prev_k) 64 | self.next_ys.append(best_scores_id - prev_k * num_words) 65 | 66 | # End condition is when top-of-beam is EOS. 67 | if self.next_ys[-1][0].item() == Constants.EOS: 68 | self._done = True 69 | self.all_scores.append(self.scores) 70 | 71 | return self._done 72 | 73 | def sort_scores(self): 74 | "Sort the scores." 75 | return torch.sort(self.scores, 0, True) 76 | 77 | def get_the_best_score_and_idx(self): 78 | "Get the score of the best in the beam." 79 | scores, ids = self.sort_scores() 80 | return scores[1], ids[1] 81 | 82 | def get_tentative_hypothesis(self): 83 | "Get the decoded sequence for the current timestep." 84 | 85 | if len(self.next_ys) == 1: 86 | dec_seq = self.next_ys[0].unsqueeze(1) 87 | else: 88 | _, keys = self.sort_scores() 89 | hyps = [self.get_hypothesis(k) for k in keys] 90 | hyps = [[Constants.BOS] + h for h in hyps] 91 | dec_seq = torch.LongTensor(hyps) 92 | 93 | return dec_seq 94 | 95 | def get_hypothesis(self, k): 96 | """ Walk back to construct the full hypothesis. """ 97 | hyp = [] 98 | for j in range(len(self.prev_ks) - 1, -1, -1): 99 | hyp.append(self.next_ys[j+1][k]) 100 | k = self.prev_ks[j][k] 101 | 102 | return list(map(lambda x: x.item(), hyp[::-1])) 103 | -------------------------------------------------------------------------------- /examples/transformer/transformer/Constants.py: -------------------------------------------------------------------------------- 1 | 2 | PAD = 0 3 | UNK = 1 4 | BOS = 2 5 | EOS = 3 6 | 7 | PAD_WORD = '' 8 | UNK_WORD = '' 9 | BOS_WORD = '' 10 | EOS_WORD = '' 11 | -------------------------------------------------------------------------------- /examples/transformer/transformer/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Victor Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/transformer/transformer/Layers.py: -------------------------------------------------------------------------------- 1 | ''' Define the Layers ''' 2 | import torch.nn as nn 3 | from transformer.SubLayers import MultiHeadAttention, PositionwiseFeedForward 4 | 5 | __author__ = "Yu-Hsiang Huang" 6 | 7 | 8 | class EncoderLayer(nn.Module): 9 | ''' Compose with two layers ''' 10 | 11 | def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): 12 | super(EncoderLayer, self).__init__() 13 | self.slf_attn = MultiHeadAttention( 14 | n_head, d_model, d_k, d_v, dropout=dropout) 15 | self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) 16 | 17 | def forward(self, enc_input, non_pad_mask=None, slf_attn_mask=None): 18 | enc_output, enc_slf_attn = self.slf_attn( 19 | enc_input, enc_input, enc_input, mask=slf_attn_mask) 20 | enc_output *= non_pad_mask 21 | 22 | enc_output = self.pos_ffn(enc_output) 23 | enc_output *= non_pad_mask 24 | 25 | return enc_output, enc_slf_attn 26 | 27 | 28 | class DecoderLayer(nn.Module): 29 | ''' Compose with three layers ''' 30 | 31 | def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): 32 | super(DecoderLayer, self).__init__() 33 | self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) 34 | self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) 35 | self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) 36 | 37 | def forward(self, dec_input, enc_output, non_pad_mask=None, slf_attn_mask=None, dec_enc_attn_mask=None): 38 | dec_output, dec_slf_attn = self.slf_attn( 39 | dec_input, dec_input, dec_input, mask=slf_attn_mask) 40 | dec_output *= non_pad_mask 41 | 42 | dec_output, dec_enc_attn = self.enc_attn( 43 | dec_output, enc_output, enc_output, mask=dec_enc_attn_mask) 44 | dec_output *= non_pad_mask 45 | 46 | dec_output = self.pos_ffn(dec_output) 47 | dec_output *= non_pad_mask 48 | 49 | return dec_output, dec_slf_attn, dec_enc_attn 50 | -------------------------------------------------------------------------------- /examples/transformer/transformer/Modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | __author__ = "Yu-Hsiang Huang" 6 | 7 | class ScaledDotProductAttention(nn.Module): 8 | ''' Scaled Dot-Product Attention ''' 9 | 10 | def __init__(self, temperature, attn_dropout=0.1): 11 | super().__init__() 12 | self.temperature = temperature 13 | self.dropout = nn.Dropout(attn_dropout) 14 | self.softmax = nn.Softmax(dim=2) 15 | 16 | def forward(self, q, k, v, mask=None): 17 | 18 | attn = torch.bmm(q, k.transpose(1, 2)) 19 | attn = attn / self.temperature 20 | 21 | if mask is not None: 22 | attn = attn.masked_fill(mask, -np.inf) 23 | 24 | attn = self.softmax(attn) 25 | attn = self.dropout(attn) 26 | output = torch.bmm(attn, v) 27 | 28 | return output, attn 29 | -------------------------------------------------------------------------------- /examples/transformer/transformer/Optim.py: -------------------------------------------------------------------------------- 1 | '''A wrapper class for optimizer ''' 2 | import numpy as np 3 | 4 | class ScheduledOptim(): 5 | '''A simple wrapper class for learning rate scheduling''' 6 | 7 | def __init__(self, optimizer, d_model, n_warmup_steps): 8 | self._optimizer = optimizer 9 | self.n_warmup_steps = n_warmup_steps 10 | self.n_current_steps = 0 11 | self.init_lr = np.power(d_model, -0.5) 12 | 13 | def step_and_update_lr(self): 14 | "Step with the inner optimizer" 15 | self._update_learning_rate() 16 | self._optimizer.step() 17 | 18 | def zero_grad(self): 19 | "Zero out the gradients by the inner optimizer" 20 | self._optimizer.zero_grad() 21 | 22 | def _get_lr_scale(self): 23 | return np.min([ 24 | np.power(self.n_current_steps, -0.5), 25 | np.power(self.n_warmup_steps, -1.5) * self.n_current_steps]) 26 | 27 | def _update_learning_rate(self): 28 | ''' Learning rate scheduling per step ''' 29 | 30 | self.n_current_steps += 1 31 | lr = self.init_lr * self._get_lr_scale() 32 | 33 | for param_group in self._optimizer.param_groups: 34 | param_group['lr'] = lr 35 | 36 | -------------------------------------------------------------------------------- /examples/transformer/transformer/README.md: -------------------------------------------------------------------------------- 1 | Transformer Model (Attention is All You Need) 2 | ============================================= 3 | This directory contains a PyTorch implementation of the Transformer model 4 | described in the "[Attention is All You Need](https://arxiv.org/abs/1706.03762)" 5 | paper. This code was adapted from Yu-Hsiang Huang's implementation found in 6 | [jadore801120/attention-is-all-you-need-pytorch](https://github.com/jadore801120/attention-is-all-you-need-pytorch). 7 | 8 | License 9 | ------- 10 | The code inside this directory is adapted from Yu-Hsiang Huang's implementation 11 | and therefore shares the same license. The unmodified license can be found in 12 | the `LICENSE` file. 13 | -------------------------------------------------------------------------------- /examples/transformer/transformer/SubLayers.py: -------------------------------------------------------------------------------- 1 | ''' Define the sublayers in encoder/decoder layer ''' 2 | import numpy as np 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from transformer.Modules import ScaledDotProductAttention 6 | 7 | __author__ = "Yu-Hsiang Huang" 8 | 9 | class MultiHeadAttention(nn.Module): 10 | ''' Multi-Head Attention module ''' 11 | 12 | def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): 13 | super().__init__() 14 | 15 | self.n_head = n_head 16 | self.d_k = d_k 17 | self.d_v = d_v 18 | 19 | self.w_qs = nn.Linear(d_model, n_head * d_k) 20 | self.w_ks = nn.Linear(d_model, n_head * d_k) 21 | self.w_vs = nn.Linear(d_model, n_head * d_v) 22 | nn.init.normal_(self.w_qs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k))) 23 | nn.init.normal_(self.w_ks.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k))) 24 | nn.init.normal_(self.w_vs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_v))) 25 | 26 | self.attention = ScaledDotProductAttention(temperature=np.power(d_k, 0.5)) 27 | self.layer_norm = nn.LayerNorm(d_model) 28 | 29 | self.fc = nn.Linear(n_head * d_v, d_model) 30 | nn.init.xavier_normal_(self.fc.weight) 31 | 32 | self.dropout = nn.Dropout(dropout) 33 | 34 | 35 | def forward(self, q, k, v, mask=None): 36 | 37 | d_k, d_v, n_head = self.d_k, self.d_v, self.n_head 38 | 39 | sz_b, len_q, _ = q.size() 40 | sz_b, len_k, _ = k.size() 41 | sz_b, len_v, _ = v.size() 42 | 43 | residual = q 44 | 45 | q = self.w_qs(q).view(sz_b, len_q, n_head, d_k) 46 | k = self.w_ks(k).view(sz_b, len_k, n_head, d_k) 47 | v = self.w_vs(v).view(sz_b, len_v, n_head, d_v) 48 | 49 | q = q.permute(2, 0, 1, 3).contiguous().view(-1, len_q, d_k) # (n*b) x lq x dk 50 | k = k.permute(2, 0, 1, 3).contiguous().view(-1, len_k, d_k) # (n*b) x lk x dk 51 | v = v.permute(2, 0, 1, 3).contiguous().view(-1, len_v, d_v) # (n*b) x lv x dv 52 | 53 | mask = mask.repeat(n_head, 1, 1) # (n*b) x .. x .. 54 | output, attn = self.attention(q, k, v, mask=mask) 55 | 56 | output = output.view(n_head, sz_b, len_q, d_v) 57 | output = output.permute(1, 2, 0, 3).contiguous().view(sz_b, len_q, -1) # b x lq x (n*dv) 58 | 59 | output = self.dropout(self.fc(output)) 60 | output = self.layer_norm(output + residual) 61 | 62 | return output, attn 63 | 64 | class PositionwiseFeedForward(nn.Module): 65 | ''' A two-feed-forward-layer module ''' 66 | 67 | def __init__(self, d_in, d_hid, dropout=0.1): 68 | super().__init__() 69 | self.w_1 = nn.Conv1d(d_in, d_hid, 1) # position-wise 70 | self.w_2 = nn.Conv1d(d_hid, d_in, 1) # position-wise 71 | self.layer_norm = nn.LayerNorm(d_in) 72 | self.dropout = nn.Dropout(dropout) 73 | 74 | def forward(self, x): 75 | residual = x 76 | output = x.transpose(1, 2) 77 | output = self.w_2(F.relu(self.w_1(output))) 78 | output = output.transpose(1, 2) 79 | output = self.dropout(output) 80 | output = self.layer_norm(output + residual) 81 | return output 82 | -------------------------------------------------------------------------------- /examples/transformer/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | import transformer.Constants 2 | import transformer.Modules 3 | import transformer.Layers 4 | import transformer.SubLayers 5 | import transformer.Models 6 | import transformer.Translator 7 | import transformer.Beam 8 | import transformer.Optim 9 | 10 | __all__ = [ 11 | transformer.Constants, transformer.Modules, transformer.Layers, 12 | transformer.SubLayers, transformer.Models, transformer.Optim, 13 | transformer.Translator, transformer.Beam] 14 | -------------------------------------------------------------------------------- /examples/vgg/LICENSE: -------------------------------------------------------------------------------- 1 | NOTE: This license and disclaimer applies only to the "resnet.py" file in this 2 | directory. 3 | 4 | BSD 3-Clause License 5 | 6 | Copyright (c) Soumith Chintala 2016, 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are met: 11 | 12 | * Redistributions of source code must retain the above copyright notice, this 13 | list of conditions and the following disclaimer. 14 | 15 | * Redistributions in binary form must reproduce the above copyright notice, 16 | this list of conditions and the following disclaimer in the documentation 17 | and/or other materials provided with the distribution. 18 | 19 | * Neither the name of the copyright holder nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 27 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | -------------------------------------------------------------------------------- /examples/vgg/entry_point.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import vgg 4 | 5 | 6 | def deepview_model_provider(): 7 | return vgg.vgg11().cuda() 8 | 9 | 10 | def deepview_input_provider(batch_size=16): 11 | return ( 12 | torch.randn((batch_size, 3, 224, 224)).cuda(), 13 | torch.randint(low=0, high=1000, size=(batch_size,)).cuda(), 14 | ) 15 | 16 | 17 | def deepview_iteration_provider(model): 18 | optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) 19 | def iteration(*inputs): 20 | optimizer.zero_grad() 21 | out = model(*inputs) 22 | out.backward() 23 | optimizer.step() 24 | return iteration 25 | -------------------------------------------------------------------------------- /protocol/Makefile: -------------------------------------------------------------------------------- 1 | GEN_DIR = protocol_gen 2 | 3 | PLUGIN_GEN = ../plugin/lib/$(GEN_DIR) 4 | PLUGIN_GEN_FILE = $(PLUGIN_GEN)/innpv_pb.js 5 | 6 | SERVER_GEN = ../deepview_profile/$(GEN_DIR) 7 | SERVER_GEN_FILE = $(SERVER_GEN)/innpv_pb2.py 8 | 9 | PROTO_FILE = innpv.proto 10 | 11 | .PHONY: all clean 12 | 13 | all: $(PLUGIN_GEN_FILE) $(SERVER_GEN_FILE) 14 | 15 | $(PLUGIN_GEN_FILE): $(PROTO_FILE) 16 | mkdir -p $(PLUGIN_GEN) 17 | protoc --js_out=import_style=commonjs,binary:$(PLUGIN_GEN) $^ 18 | 19 | $(SERVER_GEN_FILE): $(PROTO_FILE) 20 | mkdir -p $(SERVER_GEN) 21 | protoc --python_out=$(SERVER_GEN) $^ 22 | 23 | clean: 24 | rm -rf $(PLUGIN_GEN) $(SERVER_GEN) 25 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "deepview-profile" 3 | version = "0.14.5" 4 | description = "Interactive performance profiling and debugging tool for PyTorch neural networks." 5 | authors = ["CentML "] 6 | license = "Apache-2.0" 7 | readme = "README.md" 8 | repository = "https://github.com/CentML/DeepView.Profile" 9 | keywords = ["pytorch", "neural networks", "debugger", "profiler"] 10 | classifiers = [ 11 | "Development Status :: 4 - Beta", 12 | "Intended Audience :: Developers", 13 | "License :: OSI Approved :: Apache Software License", 14 | "Programming Language :: Python :: 3 :: Only", 15 | "Topic :: Software Development :: Debuggers", 16 | ] 17 | packages = [ 18 | { include = "deepview_profile" }, 19 | ] 20 | 21 | include = [ "pyproject.toml" ] 22 | 23 | [tool.poetry.scripts] 24 | deepview = "deepview_profile:main" 25 | 26 | [tool.poetry.dependencies] 27 | python = "^3.9" 28 | pyyaml = "*" 29 | protobuf = "3.19.6" 30 | numpy = "^1.15.2" 31 | torch = ">=2.1.0" 32 | nvidia-ml-py3 = "*" 33 | toml = "^0.10.2" 34 | pyRAPL = "^0.2.3" 35 | deepview-predict = "*" 36 | perfetto = "*" 37 | orjson = "*" 38 | torch-tb-profiler = "*" 39 | pymongo = "*" 40 | scipy = "*" 41 | termcolor = "*" 42 | 43 | [tool.poetry.dev-dependencies] 44 | 45 | [tool.poetry.group.dev.dependencies] 46 | ruff = "^0.0.267" 47 | pre-commit = "2.21.0" 48 | 49 | [build-system] 50 | requires = ["poetry-core>=1.0.0"] 51 | build-backend = "poetry.core.masonry.api" 52 | 53 | [tool.ruff] 54 | extend-exclude = ["examples", "deepview_profile/protocol_gen/"] 55 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CentML/DeepView.Profile/e42b632a0fd3225bf90d7241fb73968600731355/setup.cfg -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | -------------------------------------------------------------------------------- /test/TESTING.md: -------------------------------------------------------------------------------- 1 | To run tests: 2 | * Install the `pytest` module. 3 | * Run `pytest` in this directory. 4 | 5 | ``` 6 | $ pytest 7 | ============================= test session starts ============================== 8 | platform linux -- Python 3.9.13, pytest-7.1.3, pluggy-1.0.0 9 | rootdir: /home/ubuntu/habitat-a100/centml/deepview_profile/test 10 | collected 2 items 11 | 12 | test_driver.py .. [100%] 13 | 14 | ============================== 2 passed in 27.20s ============================== 15 | ``` 16 | -------------------------------------------------------------------------------- /test/config_params.py: -------------------------------------------------------------------------------- 1 | def TestConfig(): 2 | config = dict() 3 | config["model_names_from_examples"] = ["resnet", "nanogpt"] 4 | 5 | return config 6 | -------------------------------------------------------------------------------- /test/test_database.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import deepview_profile.db.database as database 4 | 5 | LOWER_BOUND_RAND_INT = 1 6 | UPPER_BOUND_RAND_INT = 10 7 | class MockDatabaseInterface(database.DatabaseInterface): 8 | def __del__(self): 9 | if os.path.exists("test.sqlite"): 10 | os.remove("test.sqlite") 11 | 12 | 13 | class TestSkylineDatabase: 14 | test_database: MockDatabaseInterface = MockDatabaseInterface("test.sqlite") 15 | energy_table_interface: database.EnergyTableInterface = ( 16 | database.EnergyTableInterface(test_database.connection) 17 | ) 18 | 19 | # Test if energy table is created 20 | def test_energy_table_is_created(self): 21 | query_result = self.test_database.connection.execute( 22 | "SELECT name from sqlite_schema WHERE type='table' and name ='ENERGY';" 23 | ) 24 | query_result_list = query_result.fetchall() 25 | assert len(query_result_list) > 0 26 | 27 | # try adding invalid entry and test if it is added 28 | def test_invalid_entry_too_short(self): 29 | assert self.energy_table_interface.is_valid_entry([]) is False 30 | 31 | def test_invalid_entry_too_long(self): 32 | assert self.energy_table_interface.is_valid_entry([1, 2, 3, 4, 5]) is False 33 | 34 | def test_invalid_entry_wrong_types(self): 35 | assert ( 36 | self.energy_table_interface.is_valid_entry([None, None, None, None, None]) 37 | is False 38 | ) 39 | 40 | def test_adding_valid_entry(self): 41 | params = [ 42 | "entry_point", 43 | random.random(), 44 | random.random(), 45 | random.randint(LOWER_BOUND_RAND_INT, UPPER_BOUND_RAND_INT), 46 | ] 47 | self.energy_table_interface.add_entry(params) 48 | query_result = self.test_database.connection.execute( 49 | "SELECT * FROM ENERGY ORDER BY ts DESC;" 50 | ).fetchone() 51 | # params is passed in by reference so it have the timestamp in it 52 | assert query_result == tuple(params) 53 | 54 | # add 10 valid entries and get top 3 55 | def test_get_latest_n_entries_of_entry_point(self): 56 | for _ in range(10): 57 | params = [ 58 | "entry_point", 59 | random.random(), 60 | random.random(), 61 | random.randint(LOWER_BOUND_RAND_INT, UPPER_BOUND_RAND_INT), 62 | ] 63 | self.energy_table_interface.add_entry(params) 64 | for _ in range(20): 65 | params = [ 66 | "other_entry_point", 67 | random.random(), 68 | random.random(), 69 | random.randint(LOWER_BOUND_RAND_INT, UPPER_BOUND_RAND_INT), 70 | ] 71 | self.energy_table_interface.add_entry(params) 72 | entries = [] 73 | for _ in range(3): 74 | params = [ 75 | "entry_point", 76 | random.random(), 77 | random.random(), 78 | random.randint(LOWER_BOUND_RAND_INT, UPPER_BOUND_RAND_INT), 79 | ] 80 | entries.insert(0, params) 81 | self.energy_table_interface.add_entry(params) 82 | latest_n_entries = ( 83 | self.energy_table_interface.get_latest_n_entries_of_entry_point( 84 | 3, "entry_point" 85 | ) 86 | ) 87 | entries = [tuple(entry) for entry in entries] 88 | assert entries == latest_n_entries -------------------------------------------------------------------------------- /test/test_driver.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pickle 3 | from utils import DeepviewSession, BackendContext 4 | from google.protobuf.json_format import MessageToDict 5 | from config_params import TestConfig 6 | import os 7 | 8 | REPS = 2 9 | NUM_EXPECTED_MESSAGES = 6 10 | 11 | 12 | def get_config_name(): 13 | import pkg_resources 14 | 15 | package_versions = {p.key: p.version for p in pkg_resources.working_set} 16 | return package_versions 17 | 18 | 19 | config = TestConfig() 20 | 21 | tests = list() 22 | for model_name in config["model_names_from_examples"]: 23 | dir_path = os.path.join( 24 | os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 25 | "examples", 26 | model_name, 27 | ) 28 | tests.append((model_name, dir_path)) 29 | 30 | 31 | @pytest.mark.parametrize("test_name, entry_point", tests) 32 | def test_entry_point(test_name, entry_point): 33 | print(f"Testing {entry_point}") 34 | 35 | # create new folder 36 | folder = ( 37 | os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/tests_results" 38 | ) 39 | os.makedirs(folder, exist_ok=True) 40 | 41 | stdout_fd = open(os.path.join(folder, f"{test_name}_interactive_output.log"), "w") 42 | stderr_fd = open(os.path.join(folder, f"{test_name}_interactive_w_debug_output.log"), "w") 43 | context = BackendContext(entry_point, stdout_fd=stdout_fd, stderr_fd=stderr_fd) 44 | context.spawn_process() 45 | 46 | analysis_messages = list() 47 | 48 | for reps in range(REPS): 49 | sess = DeepviewSession() 50 | while context.state == 0: 51 | pass 52 | sess.connect("localhost", 60120) 53 | sess.send_initialize_request(entry_point) 54 | sess.send_analysis_request() 55 | while ( 56 | context.alive() 57 | and sess.alive() 58 | and len(sess.received_messages) < NUM_EXPECTED_MESSAGES 59 | ): 60 | pass 61 | 62 | sess.cleanup() 63 | analysis_messages.extend(sess.received_messages) 64 | 65 | assert len(sess.received_messages) == NUM_EXPECTED_MESSAGES, ( 66 | f"Run {reps}: Expected to receive {NUM_EXPECTED_MESSAGES} got " 67 | f"{len(sess.received_messages)} (did the process terminate prematurely?)" 68 | ) 69 | 70 | context.terminate() 71 | # create folder to store files 72 | # flush contents to files 73 | with open(os.path.join(folder, f"{test_name}_analysis.pkl"), "wb") as fp: 74 | pickle.dump(list(map(MessageToDict, analysis_messages)), fp) 75 | # write package versions 76 | package_dict = get_config_name() 77 | with open(os.path.join(folder, "package-list.txt"), "w") as f: 78 | for k, v in package_dict.items(): 79 | f.write(f"{k}={v}\n") 80 | stdout_fd.close() 81 | stderr_fd.close() 82 | -------------------------------------------------------------------------------- /tools/common.sh: -------------------------------------------------------------------------------- 1 | COLOR_RED="\033[0;31m" 2 | COLOR_GREEN="\033[0;32m" 3 | COLOR_YELLOW="\033[0;33m" 4 | COLOR_BLUE="\033[0;36m" 5 | COLOR_NC="\033[0m" 6 | 7 | function echo_colored() { 8 | echo -e "${1}${2}${COLOR_NC}" 9 | } 10 | 11 | function echo_green() { 12 | echo_colored "$COLOR_GREEN" "$1" 13 | } 14 | 15 | function echo_red() { 16 | echo_colored "$COLOR_RED" "$1" 17 | } 18 | 19 | function echo_yellow() { 20 | echo_colored "$COLOR_YELLOW" "$1" 21 | } 22 | 23 | function echo_blue() { 24 | echo_colored "$COLOR_BLUE" "$1" 25 | } 26 | 27 | function prompt_yn() { 28 | echo -en "${COLOR_YELLOW}$1${COLOR_NC}" 29 | read -r 30 | if [[ ! $REPLY =~ ^[Yy]$ ]] 31 | then 32 | exit 1 33 | fi 34 | } 35 | 36 | function get_repo_hash() { 37 | echo "$(git rev-parse HEAD)" 38 | } 39 | 40 | function check_repo() { 41 | # Make sure no unstaged changes 42 | echo_yellow "> Check for uncommitted changes" 43 | if [[ ! -z $(git status --porcelain) ]]; 44 | then 45 | echo_red "ERROR: There are uncommitted changes. Please commit before releasing." 46 | exit 1 47 | fi 48 | 49 | # Make sure we're on main 50 | echo_yellow "> Check the current branch" 51 | INNPV_MAIN_HASH=$(git rev-parse main) 52 | INNPV_HASH=$(git rev-parse HEAD) 53 | 54 | if [[ $INNPV_MAIN_HASH != $INNPV_HASH ]]; then 55 | echo_red "ERROR: You must be on main when releasing." 56 | exit 1 57 | fi 58 | 59 | INNPV_SHORT_HASH=$(git rev-parse --short HEAD) 60 | 61 | echo_green "✓ Repository OK" 62 | } 63 | 64 | function check_tools() { 65 | echo_yellow "> Check tools" 66 | if [ -z "$(which poetry)" ]; then 67 | echo_red "ERROR: Poetry must be installed." 68 | exit 1 69 | fi 70 | 71 | if [ -z "$(which gh)" ]; then 72 | echo_red "ERROR: GitHub CLI must be installed." 73 | exit 1 74 | fi 75 | 76 | echo "" 77 | echo_yellow "> Tooling versions:" 78 | echo "$(poetry --version)" 79 | echo "$(poetry run python3 --version)" 80 | echo "$(gh --version)" 81 | echo_green "✓ Release tooling OK" 82 | } 83 | 84 | function build_release() { 85 | echo_yellow "> Building wheels..." 86 | rm -rf ../dist/* 87 | cp ../pyproject.toml ../deepview_profile/ 88 | poetry build 89 | echo_green "✓ Wheels successfully built" 90 | } 91 | 92 | function publish_to_pypi() { 93 | case $1 in 94 | prod) 95 | poetry publish -r pypi;; 96 | *) 97 | poetry publish -r test-pypi;; 98 | esac 99 | echo_green "✓ New release uploaded to PyPI" 100 | } 101 | -------------------------------------------------------------------------------- /tools/prepare-release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is used to release a new version of the DeepView CLI. 4 | # Release steps: 5 | # 1. Create release branch 6 | # 2. Increment package version in pyproject.toml 7 | # 3. Prepare change log since the last version 8 | # 4. Commit the change log 9 | # 5. Creater draft Github release 10 | # 6. Optional: create ability to publish to test pypi 11 | 12 | 13 | set -e 14 | 15 | RELEASE_SCRIPT_PATH=$(cd $(dirname $0) && pwd -P) 16 | cd $RELEASE_SCRIPT_PATH 17 | source common.sh 18 | 19 | echo "" 20 | echo_blue "DeepView.Profile Release Preparation Tool" 21 | echo_blue "=========================================" 22 | 23 | echo "" 24 | check_repo 25 | 26 | echo "" 27 | check_tools 28 | 29 | CURR_CLI_VERSION=$(poetry version --short) 30 | echo -en "${COLOR_YELLOW}Release increment: [patch], minor, major ${COLOR_NC}" 31 | read -r 32 | case $REPLY in 33 | major) 34 | poetry version major;; 35 | minor) 36 | poetry version minor;; 37 | *) 38 | poetry version patch;; 39 | esac 40 | NEXT_CLI_VERSION=$(poetry version --short) 41 | VERSION_TAG="v$NEXT_CLI_VERSION" 42 | 43 | echo "" 44 | echo_yellow "> The next CLI version will be '$VERSION_TAG'." 45 | prompt_yn "> Is this correct? (y/N) " 46 | git checkout -b "release-$VERSION_TAG" 47 | git commit -am "Bump version to $VERSION_TAG" 48 | git push origin "release-$VERSION_TAG" 49 | REPO_HASH=$(get_repo_hash) 50 | RELEASE_NOTES=$(git log $(git describe --abbrev=0 --tags).. --merges --pretty=format:"%s %b" | cut -f 4,7- -d ' ') 51 | echo "" 52 | echo "Release Notes:" 53 | echo "$RELEASE_NOTES" 54 | gh pr create --title "Release $VERSION_TAG" --body "$RELEASE_NOTES" 55 | 56 | 57 | # echo "" 58 | # build_release 59 | 60 | # RELEASE_NOTES=$(git log $(git describe --abbrev=0 --tags).. --merges --pretty=format:"%s %b" | cut -f 4,7- -d ' ') 61 | # echo "" 62 | # echo "Release Notes:" 63 | # echo "$RELEASE_NOTES" 64 | 65 | # RELEASE_ARTIFACTS=$(find ../dist -name "*$NEXT_CLI_VERSION*" -type f | paste -s -d ' ' - ) 66 | 67 | # GH_TOKEN=$UOFT_ECOSYSTEM_GH_TOKEN 68 | # echo "" 69 | # prompt_yn "> Create a draft release on Github? (y/N) " 70 | # gh release create "v$VERSION_TAG" --draft \ 71 | # --title "$VERSION_TAG" \ 72 | # --notes "$RELEASE_NOTES" \ 73 | # --target "$REPO_HASH" \ 74 | # $RELEASE_ARTIFACTS 75 | # echo -en "${COLOR_YELLOW}Ready to publish? [dryrun], test-pypi, pypi${COLOR_NC}" 76 | # read -r 77 | # echo "" 78 | # case $REPLY in 79 | # test-pypi) 80 | # echo_yellow "> Releasing $VERSION_TAG of the CLI..." 81 | # publish_to_pypi;; 82 | # pypi) 83 | # echo_yellow "> Releasing $VERSION_TAG of the CLI..." 84 | # publish_to_pypi "prod";; 85 | # *) 86 | # echo_yellow "Skipping the upload to PyPI";; 87 | # esac 88 | 89 | echo_green "✓ Done!" 90 | 91 | --------------------------------------------------------------------------------