├── .codecov.yml ├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── pull_request_template.md ├── .gitignore ├── .vsts ├── pipeline.yml ├── populate_tag.ps1 ├── pyenv.yml └── sign.yml ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── SECURITY.md ├── THIRD_PARTY_NOTICES.txt ├── appveyor.yml ├── blobxfer ├── __init__.py ├── api.py ├── models │ ├── __init__.py │ ├── azure.py │ ├── crypto.py │ ├── download.py │ ├── metadata.py │ ├── offload.py │ ├── options.py │ ├── resume.py │ ├── synccopy.py │ └── upload.py ├── operations │ ├── __init__.py │ ├── azure │ │ ├── __init__.py │ │ ├── blob │ │ │ ├── __init__.py │ │ │ ├── append.py │ │ │ ├── block.py │ │ │ └── page.py │ │ └── file.py │ ├── crypto.py │ ├── download.py │ ├── md5.py │ ├── progress.py │ ├── resume.py │ ├── synccopy.py │ └── upload.py ├── retry.py ├── util.py └── version.py ├── cli ├── __init__.py ├── azure.ico ├── cli.py ├── file_version_info.txt └── settings.py ├── docker ├── gen_3rd_party_notices.sh ├── linux │ ├── Dockerfile │ └── hooks │ │ └── build └── win │ └── Dockerfile ├── docs ├── 01-installation.md ├── 10-cli-usage.md ├── 20-yaml-configuration.md ├── 30-vectored-io.md ├── 40-client-side-encryption.md ├── 80-blobxfer-python-library.md ├── 98-performance-considerations.md ├── 99-current-limitations.md ├── CHANGELOG.md ├── README.md ├── index.md └── sample_config.yaml ├── mkdocs.yml ├── setup.cfg ├── setup.py ├── test_requirements.txt ├── tests ├── test_blobxfer.py ├── test_blobxfer_models_azure.py ├── test_blobxfer_models_crypto.py ├── test_blobxfer_models_download.py ├── test_blobxfer_models_metadata.py ├── test_blobxfer_models_offload.py ├── test_blobxfer_models_options.py ├── test_blobxfer_models_resume.py ├── test_blobxfer_models_synccopy.py ├── test_blobxfer_models_upload.py ├── test_blobxfer_operations_azure.py ├── test_blobxfer_operations_azure_blob.py ├── test_blobxfer_operations_azure_blob_append.py ├── test_blobxfer_operations_azure_blob_block.py ├── test_blobxfer_operations_azure_blob_page.py ├── test_blobxfer_operations_azure_file.py ├── test_blobxfer_operations_crypto.py ├── test_blobxfer_operations_download.py ├── test_blobxfer_operations_md5.py ├── test_blobxfer_operations_progress.py ├── test_blobxfer_operations_resume.py ├── test_blobxfer_operations_synccopy.py ├── test_blobxfer_operations_upload.py ├── test_blobxfer_retry.py └── test_blobxfer_util.py └── tox.ini /.codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | round: down 3 | precision: 4 4 | range: "100...100" 5 | 6 | status: 7 | project: 8 | default: 9 | target: auto 10 | threshold: 0 11 | patch: 12 | default: 13 | target: auto 14 | threshold: 0 15 | changes: 16 | default: 17 | target: auto 18 | threshold: 0 19 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = blobxfer 3 | omit = 4 | **/test/* 5 | 6 | [report] 7 | exclude_lines = 8 | # Have to re-enable the standard pragma 9 | pragma: no cover 10 | noqa 11 | 12 | # Don't complain about missing debug-only code: 13 | def __repr__ 14 | if self\.debug 15 | 16 | # Don't complain if tests don't hit defensive assertion code: 17 | raise AssertionError 18 | raise NotImplementedError 19 | 20 | # Don't complain if non-runnable code isn't run: 21 | if 0: 22 | if False: 23 | if __name__ == .__main__.: 24 | 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | #### Problem Description 8 | 9 | #### Azure blobxfer parameters output 10 | ``` 11 | INSERT PARAMETER OUTPUT LOG HERE 12 | ``` 13 | 14 | #### Steps to Reproduce 15 | 16 | #### Expected Results 17 | 18 | #### Actual Results 19 | 20 | #### Additional Logs 21 | Ensure that you execute your command with `-v --enable-azure-storage-logger` 22 | options. 23 | 24 | ``` 25 | INSERT ADDITIONAL LOGS HERE 26 | ``` 27 | 28 | #### Additonal Comments 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | #### Feature Request Description 8 | - Is your feature request related to a problem? Please describe. 9 | - A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 10 | 11 | #### Describe Preferred Solution 12 | A clear and concise description of what you want to happen. 13 | 14 | #### Describe Alternatives Considered 15 | A clear and concise description of any alternative solutions or features you've considered. 16 | 17 | #### Additional Context 18 | Add any other context or screenshots about the feature request here. 19 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Pull Request Checklist 2 | 3 | - [ ] Review the [contributing guidelines](https://github.com/Azure/blobxfer/blob/master/CONTRIBUTING.md) 4 | - [ ] Outstanding issue linked, if applicable 5 | - [ ] PR should only be opened against `master` if it includes no features (open against `develop` otherwise) 6 | - [ ] Commit messages should conform to [good style practices](https://chris.beams.io/posts/git-commit/) 7 | - [ ] PR should pass all checks and have no conflicts 8 | 9 | ### Description 10 | Please describe the pull request. 11 | 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | .pytest_cache/ 5 | 6 | # C extensions 7 | *.so 8 | *.[oa] 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | junit-*.xml 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | 56 | # Sphinx documentation 57 | docs/_build/ 58 | 59 | # PyBuilder 60 | target/ 61 | 62 | # Visual Studo 2015 cache/options directory 63 | .vs/ 64 | 65 | # PyCharm 66 | .idea/ 67 | 68 | # Vim 69 | *.sw[po] 70 | -------------------------------------------------------------------------------- /.vsts/populate_tag.ps1: -------------------------------------------------------------------------------- 1 | try { 2 | $buildver = Get-Content "version_tag.txt" 3 | echo "##vso[task.setvariable variable=VERSION_TAG;]$buildver" 4 | Write-Host "tag version: $buildver" 5 | } catch { 6 | Write-Host "version.txt file not found" 7 | } 8 | -------------------------------------------------------------------------------- /.vsts/pyenv.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - script: | 3 | set -e 4 | set -o pipefail 5 | echo "SYSTEM_PHASENAME=$SYSTEM_PHASENAME ARTIFACT_CLI=$ARTIFACT_CLI" 6 | PYTHON_CONFIGURE_OPTS="--enable-shared" 7 | if [ "${SYSTEM_PHASENAME}" == "Linux" ]; then 8 | sudo apt-get update 9 | sudo apt-get install -y libbz2-dev libreadline-dev libsqlite3-dev 10 | elif [ "${SYSTEM_PHASENAME}" == "MacOS" ]; then 11 | PYTHON_CONFIGURE_OPTS="$PYTHON_CONFIGURE_OPTS --with-openssl=$(brew --prefix openssl)" 12 | fi 13 | export PYENV_ROOT="$(Agent.WorkFolder)/.pyenv" 14 | git clone https://github.com/pyenv/pyenv.git $PYENV_ROOT 15 | export PATH="$PYENV_ROOT/bin:$PATH" 16 | pyenv --version 17 | export PYTHON_CONFIGURE_OPTS 18 | pyenv install $PYENV_VERSION 19 | unset PYTHON_CONFIGURE_OPTS 20 | pyenv global $PYENV_VERSION 21 | echo "##vso[task.prependpath]$PYENV_ROOT/bin" 22 | echo "##vso[task.prependpath]$PYENV_ROOT/shims" 23 | displayName: Install Python 24 | condition: and(succeeded(), ne(variables['ARTIFACT_CLI'], '')) 25 | -------------------------------------------------------------------------------- /.vsts/sign.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | enabled: false 3 | folder: ./bin 4 | pattern: '*.dll,*.exe' 5 | 6 | steps: 7 | - task: SFP.build-tasks.custom-build-task-1.EsrpCodeSigning@1 8 | displayName: "Code Sign" 9 | condition: ${{ parameters.enabled }} 10 | inputs: 11 | ConnectedServiceName: 'ESRP CodeSign' 12 | FolderPath: ${{ parameters.folder }} 13 | Pattern: ${{ parameters.pattern }} 14 | UseMinimatch: false 15 | signConfigType: inlineSignParams 16 | inlineOperation: | 17 | [ 18 | { 19 | "KeyCode" : "CP-230012", 20 | "OperationCode" : "SigntoolSign", 21 | "Parameters" : { 22 | "OpusName" : "Microsoft", 23 | "OpusInfo" : "http://www.microsoft.com", 24 | "FileDigest" : "/fd \"SHA256\"", 25 | "PageHash" : "/NPH", 26 | "TimeStamp" : "/tr \"http://rfc3161.gtm.corp.microsoft.com/TSS/HttpTspServer\" /td sha256" 27 | }, 28 | "ToolName" : "sign", 29 | "ToolVersion" : "1.0" 30 | }, 31 | { 32 | "KeyCode" : "CP-230012", 33 | "OperationCode" : "SigntoolVerify", 34 | "Parameters" : {}, 35 | "ToolName" : "sign", 36 | "ToolVersion" : "1.0" 37 | } 38 | ] 39 | SessionTimeout: 60 40 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | This project has adopted the 4 | [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 5 | For more information see the 6 | [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 7 | or contact [](mailto:opencode@microsoft.com) with any 8 | additional questions or comments. 9 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | If you would like to contribute to this project, please view the 4 | [Microsoft Contribution guidelines](https://azure.github.io/guidelines/). 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Microsoft Corporation 2 | 3 | All rights reserved. 4 | 5 | MIT License 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a 8 | copy of this software and associated documentation files (the "Software"), 9 | to deal in the Software without restriction, including without limitation 10 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | and/or sell copies of the Software, and to permit persons to whom the 12 | Software is furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://azurebatch.visualstudio.com/blobxfer/_apis/build/status/blobxfer-CI)](https://azurebatch.visualstudio.com/blobxfer/_build/latest?definitionId=12) 2 | [![Build status](https://ci.appveyor.com/api/projects/status/qgth9p7jlessgp5i/branch/master?svg=true)](https://ci.appveyor.com/project/alfpark/blobxfer) 3 | [![codecov](https://codecov.io/gh/Azure/blobxfer/branch/master/graph/badge.svg)](https://codecov.io/gh/Azure/blobxfer) 4 | [![PyPI](https://img.shields.io/pypi/v/blobxfer.svg)](https://pypi.python.org/pypi/blobxfer) 5 | 6 | ## PROJECT STATUS 7 | **This project is no longer actively maintained.** For tools officially supported by 8 | Microsoft please refer to this 9 | [documentation](https://docs.microsoft.com/azure/storage/common/storage-choose-data-transfer-solution). 10 | 11 | # blobxfer 12 | `blobxfer` is an advanced data movement tool and library for Azure Storage 13 | Blob and Files. With `blobxfer` you can copy your files into or out of Azure 14 | Storage with the CLI or integrate the `blobxfer` data movement library into 15 | your own Python scripts. 16 | 17 | ## Major Features 18 | * Command-line interface (CLI) providing data movement capability to and 19 | from Azure Blob and File Storage 20 | * Standalone library for integration with scripts or other Python packages 21 | * High-performance design with asynchronous transfers and disk I/O 22 | * Supports ingress, egress and synchronization of entire directories, 23 | containers and file shares 24 | * YAML configuration driven execution support 25 | * Fine-grained resume support including resuming a broken operation 26 | within a file or object 27 | * Vectored IO support 28 | * `stripe` mode allows striping a single file across multiple blobs (even 29 | to multiple storage accounts) to break through single blob or fileshare 30 | throughput limits 31 | * `replica` mode allows replication of a file across multiple destinations 32 | including to multiple storage accounts 33 | * Synchronous copy with cross-mode (object transform) replication support 34 | * Leverages server-side copies by default 35 | * Arbitrary URL copy support 36 | * Client-side encryption support 37 | * Support all Azure Blob types and Azure Files for both upload and download 38 | * Advanced skip options for rsync-like operations 39 | * Store/restore POSIX filemode and uid/gid 40 | * Support reading/pipe from `stdin` including to page blob destinations 41 | * Support reading from blob and file share snapshots for downloading and 42 | synchronous copy 43 | * Support for setting access tier on objects for uploading and synchronous 44 | copy 45 | * Configurable one-shot block upload support 46 | * Configurable chunk size for both upload and download 47 | * Automatic block size selection for block blob uploading 48 | * Automatic uploading of VHD/VHDX files as page blobs 49 | * Include and exclude filtering support 50 | * Rsync-like delete support 51 | * No clobber support in either direction 52 | * Automatic content type tagging 53 | * Support for setting the Cache Control property of blobs and files 54 | * File logging support 55 | * Support for HTTP proxies 56 | 57 | ## Installation 58 | There are three ways to install `blobxfer`: 59 | 60 | * `blobxfer` Python package from [PyPI](https://pypi.python.org/pypi/blobxfer) 61 | * Pre-built binaries available under [Releases](https://github.com/Azure/blobxfer/releases) 62 | * Docker images are available for both Linux and Windows platforms on the 63 | [Microsoft Container Registry](https://hub.docker.com/_/microsoft-blobxfer) 64 | 65 | Please refer to the 66 | [installation guide](http://blobxfer.readthedocs.io/en/latest/01-installation/) 67 | for more information on how to install `blobxfer`. 68 | 69 | ## Documentation 70 | Please refer to the [`blobxfer` documentation](http://blobxfer.readthedocs.io/) 71 | for more details and usage information. 72 | 73 | ## Change Log 74 | Please see the 75 | [Change Log](http://blobxfer.readthedocs.io/en/latest/CHANGELOG/) 76 | for project history. 77 | 78 | ## Support 79 | This project is community supported and not officially supported by Microsoft. 80 | There is no defined SLA for addressing features, issues, and bugs which are 81 | exclusively serviced via GitHub issues. For tools officially supported by 82 | Microsoft please refer to this 83 | [documentation](https://docs.microsoft.com/azure/storage/common/storage-choose-data-transfer-solution). 84 | 85 | * * * 86 | Please see this project's [Code of Conduct](CODE_OF_CONDUCT.md) and 87 | [Contributing](CONTRIBUTING.md) guidelines. 88 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: '{branch}-{build}' 2 | 3 | clone_depth: 5 4 | 5 | cache: 6 | - '%LOCALAPPDATA%\pip\Cache' 7 | 8 | environment: 9 | matrix: 10 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 11 | PYTHON: "C:\\Python36-x64" 12 | PYTHON_VERSION: "3.6" 13 | PYTHON_ARCH: "64" 14 | TOX_ENV: "py36" 15 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 16 | PYTHON: "C:\\Python37-x64" 17 | PYTHON_VERSION: "3.7" 18 | PYTHON_ARCH: "64" 19 | TOX_ENV: "py37" 20 | - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 21 | PYTHON: "C:\\Python38-x64" 22 | PYTHON_VERSION: "3.8" 23 | PYTHON_ARCH: "64" 24 | TOX_ENV: "py38" 25 | 26 | init: 27 | - echo %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH% 28 | 29 | install: 30 | - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" 31 | - where pip 32 | - pip install --upgrade setuptools wheel 33 | - pip install --upgrade virtualenv coveralls tox 34 | 35 | build: off 36 | 37 | test_script: 38 | - tox -e "%TOX_ENV%" 39 | -------------------------------------------------------------------------------- /blobxfer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | import sys 26 | from .version import __version__ # noqa 27 | 28 | # monkeypatch User-Agent string 29 | import azure.storage.common 30 | azure.storage.common._constants.USER_AGENT_STRING_PREFIX = \ 31 | 'blobxfer/{} {}'.format( 32 | __version__, 33 | azure.storage.common._constants.USER_AGENT_STRING_PREFIX 34 | ) 35 | 36 | # set stdin source 37 | if sys.version_info >= (3, 0): # noqa 38 | STDIN = sys.stdin.buffer 39 | else: # noqa 40 | # set stdin to binary mode on Windows 41 | if sys.platform == 'win32': 42 | import msvcrt 43 | import os 44 | msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY) 45 | STDIN = sys.stdin 46 | -------------------------------------------------------------------------------- /blobxfer/api.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # stdlib imports 26 | # non-stdlib imports 27 | # local imports 28 | 29 | # clients 30 | from .operations.azure.blob.append import ( # noqa 31 | create_client as create_append_blob_client 32 | ) 33 | from .operations.azure.blob.block import ( # noqa 34 | create_client as create_block_blob_client 35 | ) 36 | from .operations.azure.blob.page import ( # noqa 37 | create_client as create_page_blob_client 38 | ) 39 | from .operations.azure.file import ( # noqa 40 | create_client as create_file_client 41 | ) 42 | 43 | # models 44 | from .models.options import ( # noqa 45 | Timeout as TimeoutOptions, 46 | Concurrency as ConcurrencyOptions, 47 | General as GeneralOptions, 48 | VectoredIo as VectoredIoOptions, 49 | SkipOn as SkipOnOptions, 50 | FileProperties as FilePropertiesOptions, 51 | Download as DownloadOptions, 52 | SyncCopy as SyncCopyOptions, 53 | Upload as UploadOptions 54 | ) 55 | from .models.download import ( # noqa 56 | LocalDestinationPath, 57 | Specification as DownloadSpecification 58 | ) 59 | from .models.synccopy import ( # noqa 60 | Specification as SynccopySpecification 61 | ) 62 | from .models.upload import ( # noqa 63 | LocalSourcePath, 64 | Specification as UploadSpecification 65 | ) 66 | 67 | # operations 68 | from .operations.azure import ( # noqa 69 | StorageCredentials as AzureStorageCredentials, 70 | DestinationPath as AzureDestinationPath, 71 | SourcePath as AzureSourcePath 72 | ) 73 | from .operations.download import ( # noqa 74 | Downloader 75 | ) 76 | from .operations.synccopy import ( # noqa 77 | SyncCopy 78 | ) 79 | from .operations.upload import ( # noqa 80 | Uploader 81 | ) 82 | -------------------------------------------------------------------------------- /blobxfer/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # stdlib imports 26 | import fnmatch 27 | import pathlib 28 | # non-stdlib imports 29 | # local imports 30 | 31 | 32 | class _BaseSourcePaths(object): 33 | """Base Source Paths""" 34 | def __init__(self): 35 | # type: (_BaseSourcePaths) -> None 36 | """Ctor for _BaseSourcePaths 37 | :param _BaseSourcePaths self: this 38 | """ 39 | self._include = None 40 | self._exclude = None 41 | self._paths = [] 42 | 43 | @property 44 | def paths(self): 45 | # type: (_BaseSourcePaths) -> List[pathlib.Path] 46 | """Stored paths 47 | :param _BaseSourcePaths self: this 48 | :rtype: list 49 | :return: list of pathlib.Path 50 | """ 51 | return self._paths 52 | 53 | def add_includes(self, includes): 54 | # type: (_BaseSourcePaths, list) -> None 55 | """Add a list of includes 56 | :param _BaseSourcePaths self: this 57 | :param list includes: list of includes 58 | """ 59 | if not isinstance(includes, list): 60 | if isinstance(includes, tuple): 61 | includes = list(includes) 62 | else: 63 | includes = [includes] 64 | # remove any starting rglob spec 65 | incl = [] 66 | for inc in includes: 67 | tmp = pathlib.Path(inc).parts 68 | if tmp[0] == '**': 69 | if len(tmp) == 1: 70 | continue 71 | else: 72 | incl.append(str(pathlib.Path(*tmp[1:]))) 73 | else: 74 | incl.append(inc) 75 | # check for any remaining rglob specs 76 | if any(['**' in x for x in incl]): 77 | raise ValueError('invalid include specification containing "**"') 78 | if self._include is None: 79 | self._include = incl 80 | else: 81 | self._include.extend(incl) 82 | 83 | def add_excludes(self, excludes): 84 | # type: (_BaseSourcePaths, list) -> None 85 | """Add a list of excludes 86 | :param _BaseSourcePaths self: this 87 | :param list excludes: list of excludes 88 | """ 89 | if not isinstance(excludes, list): 90 | if isinstance(excludes, tuple): 91 | excludes = list(excludes) 92 | else: 93 | excludes = [excludes] 94 | # remove any starting rglob spec 95 | excl = [] 96 | for exc in excludes: 97 | tmp = pathlib.Path(exc).parts 98 | if tmp[0] == '**': 99 | if len(tmp) == 1: 100 | continue 101 | else: 102 | excl.append(str(pathlib.Path(*tmp[1:]))) 103 | else: 104 | excl.append(exc) 105 | # check for any remaining rglob specs 106 | if any(['**' in x for x in excl]): 107 | raise ValueError('invalid exclude specification containing "**"') 108 | if self._exclude is None: 109 | self._exclude = excl 110 | else: 111 | self._exclude.extend(excl) 112 | 113 | def add_path(self, path): 114 | # type: (_BaseSourcePaths, str) -> None 115 | """Add a local path 116 | :param _BaseSourcePaths self: this 117 | :param str path: path to add 118 | """ 119 | if isinstance(path, pathlib.Path): 120 | self._paths.append(path) 121 | else: 122 | self._paths.append(pathlib.Path(path)) 123 | 124 | def add_paths(self, paths): 125 | # type: (_BaseSourcePaths, list) -> None 126 | """Add a list of local paths 127 | :param _BaseSourcePaths self: this 128 | :param list paths: paths to add 129 | """ 130 | for path in paths: 131 | self.add_path(path) 132 | 133 | def _inclusion_check(self, path): 134 | # type: (_BaseSourcePaths, pathlib.Path) -> bool 135 | """Check file for inclusion against filters 136 | :param _BaseSourcePaths self: this 137 | :param pathlib.Path path: path to check 138 | :rtype: bool 139 | :return: if file should be included 140 | """ 141 | _spath = str(path) 142 | inc = True 143 | if self._include is not None: 144 | inc = any([fnmatch.fnmatch(_spath, x) for x in self._include]) 145 | if inc and self._exclude is not None: 146 | inc = not any([fnmatch.fnmatch(_spath, x) for x in self._exclude]) 147 | return inc 148 | -------------------------------------------------------------------------------- /blobxfer/models/metadata.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # stdlib imports 26 | import collections 27 | import json 28 | import logging 29 | # non-stdlib imports 30 | # local imports 31 | import blobxfer.util 32 | 33 | # create logger 34 | logger = logging.getLogger(__name__) 35 | # global defines 36 | JSON_KEY_BLOBXFER_METADATA = 'blobxfer_metadata' 37 | # file attributes 38 | _JSON_KEY_FILE_ATTRIBUTES = 'FileAttributes' 39 | _JSON_KEY_FILE_ATTRIBUTES_POSIX = 'POSIX' 40 | _JSON_KEY_FILE_ATTRIBUTES_WINDOWS = 'Windows' 41 | _JSON_KEY_FILE_ATTRIBUTES_MODE = 'mode' 42 | _JSON_KEY_FILE_ATTRIBUTES_UID = 'uid' 43 | _JSON_KEY_FILE_ATTRIBUTES_GID = 'gid' 44 | # vectored io 45 | _JSON_KEY_VECTORED_IO = 'VectoredIO' 46 | _JSON_KEY_VECTORED_IO_MODE = 'Mode' 47 | _JSON_KEY_VECTORED_IO_STRIPE = 'Stripe' 48 | _JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SIZE = 'TotalSize' 49 | _JSON_KEY_VECTORED_IO_STRIPE_OFFSET_START = 'OffsetStart' 50 | _JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SLICES = 'TotalSlices' 51 | _JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID = 'SliceId' 52 | _JSON_KEY_VECTORED_IO_STRIPE_NEXT = 'Next' 53 | # named tuples 54 | PosixFileAttr = collections.namedtuple( 55 | 'PosixFileAttr', [ 56 | 'gid', 57 | 'mode', 58 | 'uid', 59 | ] 60 | ) 61 | WindowsFileAttr = collections.namedtuple( 62 | 'WindowsFileAttr', [ 63 | ] 64 | ) 65 | VectoredStripe = collections.namedtuple( 66 | 'VectoredStripe', [ 67 | 'next', 68 | 'offset_start', 69 | 'slice_id', 70 | 'total_size', 71 | 'total_slices', 72 | ] 73 | ) 74 | VectoredNextEntry = collections.namedtuple( 75 | 'VectoredNextEntry', [ 76 | 'storage_account_name', 77 | 'endpoint', 78 | 'container', 79 | 'name', 80 | ] 81 | ) 82 | _FILEATTR_WARNED_ON_WINDOWS = False 83 | 84 | 85 | def get_md5_from_metadata(ase): 86 | # type: (blobxfer.models.azure.StorageEntity) -> str 87 | """Get MD5 from properties or metadata 88 | :param blobxfer.models.azure.StorageEntity ase: Azure Storage Entity 89 | :rtype: str or None 90 | :return: md5 91 | """ 92 | # if encryption metadata is present, check for pre-encryption 93 | # md5 in blobxfer extensions 94 | md5 = None 95 | if ase.is_encrypted: 96 | try: 97 | md5 = ase.encryption_metadata.blobxfer_extensions.\ 98 | pre_encrypted_content_md5 99 | except AttributeError: 100 | # this can happen if partial metadata is present 101 | md5 = None 102 | if blobxfer.util.is_none_or_empty(md5): 103 | md5 = ase.md5 104 | return md5 105 | 106 | 107 | def generate_fileattr_metadata(local_path, metadata): 108 | # type: (blobxfer.models.upload.LocalPath, dict) -> dict 109 | """Generate file attribute metadata dict 110 | :param blobxfer.models.upload.LocalPath local_path: local path 111 | :param dict metadata: existing metadata dict 112 | :rtype: dict 113 | :return: merged metadata dictionary 114 | """ 115 | if blobxfer.util.on_windows(): 116 | global _FILEATTR_WARNED_ON_WINDOWS 117 | if not _FILEATTR_WARNED_ON_WINDOWS: 118 | _FILEATTR_WARNED_ON_WINDOWS = True 119 | logger.warning( 120 | 'file attributes store/restore on Windows is not ' 121 | 'supported yet') 122 | return None 123 | else: 124 | md = { 125 | _JSON_KEY_FILE_ATTRIBUTES: { 126 | _JSON_KEY_FILE_ATTRIBUTES_POSIX: { 127 | _JSON_KEY_FILE_ATTRIBUTES_MODE: local_path.mode, 128 | _JSON_KEY_FILE_ATTRIBUTES_UID: local_path.uid, 129 | _JSON_KEY_FILE_ATTRIBUTES_GID: local_path.gid, 130 | } 131 | } 132 | } 133 | return blobxfer.util.merge_dict(metadata, md) 134 | 135 | 136 | def fileattr_from_metadata(md): 137 | # type: (dict) -> collections.namedtuple 138 | """Convert fileattr metadata in json metadata 139 | :param dict md: metadata dictionary 140 | :rtype: PosixFileAttr or WindowsFileAttr or None 141 | :return: fileattr metadata 142 | """ 143 | try: 144 | mdattr = json.loads( 145 | md[JSON_KEY_BLOBXFER_METADATA])[_JSON_KEY_FILE_ATTRIBUTES] 146 | except (KeyError, TypeError): 147 | return None 148 | else: 149 | if blobxfer.util.on_windows(): 150 | global _FILEATTR_WARNED_ON_WINDOWS 151 | if not _FILEATTR_WARNED_ON_WINDOWS: 152 | _FILEATTR_WARNED_ON_WINDOWS = True 153 | logger.warning( 154 | 'file attributes store/restore on Windows is not ' 155 | 'supported yet') 156 | fileattr = None 157 | else: 158 | try: 159 | fileattr = PosixFileAttr( 160 | mode=mdattr[_JSON_KEY_FILE_ATTRIBUTES_POSIX][ 161 | _JSON_KEY_FILE_ATTRIBUTES_MODE], 162 | uid=mdattr[_JSON_KEY_FILE_ATTRIBUTES_POSIX][ 163 | _JSON_KEY_FILE_ATTRIBUTES_UID], 164 | gid=mdattr[_JSON_KEY_FILE_ATTRIBUTES_POSIX][ 165 | _JSON_KEY_FILE_ATTRIBUTES_GID], 166 | ) 167 | except KeyError: 168 | fileattr = None 169 | return fileattr 170 | 171 | 172 | def create_vectored_io_next_entry(ase): 173 | # type: (blobxfer.models.azure.StorageEntity) -> str 174 | """Create Vectored IO next entry id 175 | :param blobxfer.models.azure.StorageEntity ase: Azure Storage Entity 176 | :rtype: str 177 | :return: vectored io next entry 178 | """ 179 | return ';'.join( 180 | (ase.client.primary_endpoint, ase.container, ase.name) 181 | ) 182 | 183 | 184 | def explode_vectored_io_next_entry(entry): 185 | # type: (str, int) -> str 186 | """Explode next vectored io entry 187 | :param str entry: next entry 188 | :rtype: VectoredNextEntry 189 | :return: vectored next entry 190 | """ 191 | tmp = entry.split(';') 192 | _sa = tmp[0].split('.') 193 | return VectoredNextEntry( 194 | storage_account_name=_sa[0], 195 | endpoint='.'.join(_sa[2:]), 196 | container=tmp[1], 197 | name=tmp[2], 198 | ) 199 | 200 | 201 | def remove_vectored_io_slice_suffix_from_name(name, slice): 202 | # type: (str, int) -> str 203 | """Remove vectored io (stripe) slice suffix from a given name 204 | :param str name: entity name 205 | :param int slice: slice num 206 | :rtype: str 207 | :return: name without suffix 208 | """ 209 | suffix = '.bxslice-{}'.format(slice) 210 | if name.endswith(suffix): 211 | return name[:-len(suffix)] 212 | else: 213 | return name 214 | 215 | 216 | def generate_vectored_io_stripe_metadata(local_path, metadata): 217 | # type: (blobxfer.models.upload.LocalPath, dict) -> dict 218 | """Generate vectored io stripe metadata dict 219 | :param blobxfer.models.upload.LocalPath local_path: local path 220 | :param dict metadata: existing metadata dict 221 | :rtype: dict 222 | :return: merged metadata dictionary 223 | """ 224 | md = { 225 | _JSON_KEY_VECTORED_IO: { 226 | _JSON_KEY_VECTORED_IO_MODE: _JSON_KEY_VECTORED_IO_STRIPE, 227 | _JSON_KEY_VECTORED_IO_STRIPE: { 228 | _JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SIZE: local_path.total_size, 229 | _JSON_KEY_VECTORED_IO_STRIPE_OFFSET_START: 230 | local_path.view.fd_start, 231 | _JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SLICES: 232 | local_path.view.total_slices, 233 | _JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID: 234 | local_path.view.slice_num, 235 | _JSON_KEY_VECTORED_IO_STRIPE_NEXT: local_path.view.next, 236 | } 237 | } 238 | } 239 | return blobxfer.util.merge_dict(metadata, md) 240 | 241 | 242 | def vectored_io_from_metadata(md): 243 | # type: (dict) -> collections.namedtuple 244 | """Convert vectored io metadata in json metadata 245 | :param dict md: metadata dictionary 246 | :rtype: VectoredStripe or None 247 | :return: vectored io metadata 248 | """ 249 | try: 250 | mdattr = json.loads( 251 | md[JSON_KEY_BLOBXFER_METADATA])[_JSON_KEY_VECTORED_IO] 252 | except (KeyError, TypeError): 253 | pass 254 | else: 255 | if mdattr[_JSON_KEY_VECTORED_IO_MODE] == _JSON_KEY_VECTORED_IO_STRIPE: 256 | mdstripe = mdattr[_JSON_KEY_VECTORED_IO_STRIPE] 257 | try: 258 | nextptr = explode_vectored_io_next_entry( 259 | mdstripe[_JSON_KEY_VECTORED_IO_STRIPE_NEXT]) 260 | except (KeyError, AttributeError): 261 | nextptr = None 262 | vio = VectoredStripe( 263 | total_size=mdstripe[_JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SIZE], 264 | offset_start=mdstripe[ 265 | _JSON_KEY_VECTORED_IO_STRIPE_OFFSET_START], 266 | total_slices=mdstripe[ 267 | _JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SLICES], 268 | slice_id=mdstripe[_JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID], 269 | next=nextptr, 270 | ) 271 | return vio 272 | else: 273 | raise RuntimeError('Cannot handle Vectored IO mode: {}'.format( 274 | mdattr[_JSON_KEY_VECTORED_IO_MODE])) 275 | return None 276 | -------------------------------------------------------------------------------- /blobxfer/models/offload.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # stdlib imports 26 | import logging 27 | import multiprocessing 28 | import threading 29 | import queue 30 | 31 | # create logger 32 | logger = logging.getLogger(__name__) 33 | 34 | 35 | class _MultiprocessOffload(object): 36 | __slots__ = [ 37 | '_task_queue', '_done_queue', '_done_cv', '_term_signal', '_procs', 38 | '_check_thread' 39 | ] 40 | 41 | def __init__(self, target, num_workers, description=None): 42 | # type: (_MultiprocessOffload, function, int, str) -> None 43 | """Ctor for Multiprocess Offload 44 | :param _MultiprocessOffload self: this 45 | :param function target: target function for process 46 | :param int num_workers: number of worker processes 47 | :param str description: description 48 | """ 49 | self._task_queue = multiprocessing.Queue() 50 | self._done_queue = multiprocessing.Queue() 51 | self._done_cv = multiprocessing.Condition() 52 | self._term_signal = multiprocessing.Value('i', 0) 53 | self._procs = [] 54 | self._check_thread = None 55 | self._initialize_processes(target, num_workers, description) 56 | 57 | @property 58 | def done_cv(self): 59 | # type: (_MultiprocessOffload) -> multiprocessing.Condition 60 | """Get Done condition variable 61 | :param _MultiprocessOffload self: this 62 | :rtype: multiprocessing.Condition 63 | :return: cv for download done 64 | """ 65 | return self._done_cv 66 | 67 | @property 68 | def terminated(self): 69 | # type: (_MultiprocessOffload) -> bool 70 | """Check if terminated 71 | :param _MultiprocessOffload self: this 72 | :rtype: bool 73 | :return: if terminated 74 | """ 75 | return self._term_signal.value == 1 76 | 77 | def _initialize_processes(self, target, num_workers, description): 78 | # type: (_MultiprocessOffload, function, int, str) -> None 79 | """Initialize processes 80 | :param _MultiprocessOffload self: this 81 | :param function target: target function for process 82 | :param int num_workers: number of worker processes 83 | :param str description: description 84 | """ 85 | if num_workers is None or num_workers < 1: 86 | raise ValueError('invalid num_workers: {}'.format(num_workers)) 87 | logger.debug('initializing {}{} processes'.format( 88 | num_workers, ' ' + description if not None else '')) 89 | for _ in range(num_workers): 90 | proc = multiprocessing.Process( 91 | target=target, 92 | args=( 93 | self._term_signal, 94 | self._task_queue, 95 | self._done_cv, 96 | self._done_queue 97 | ) 98 | ) 99 | proc.start() 100 | self._procs.append(proc) 101 | 102 | def finalize_processes(self): 103 | # type: (_MultiprocessOffload) -> None 104 | """Finalize processes 105 | :param _MultiprocessOffload self: this 106 | """ 107 | self._term_signal.value = 1 108 | if self._check_thread is not None: 109 | self._check_thread.join() 110 | for proc in self._procs: 111 | proc.join() 112 | 113 | def pop_done_queue(self): 114 | # type: (_MultiprocessOffload) -> object 115 | """Get item from done queue 116 | :param _MultiprocessOffload self: this 117 | :rtype: object or None 118 | :return: object from done queue, if exists 119 | """ 120 | try: 121 | return self._done_queue.get_nowait() 122 | except queue.Empty: 123 | return None 124 | 125 | def initialize_check_thread(self, check_func): 126 | # type: (_MultiprocessOffload, function) -> None 127 | """Initialize the multiprocess done queue check thread 128 | :param Downloader self: this 129 | :param function check_func: check function 130 | """ 131 | self._check_thread = threading.Thread(target=check_func) 132 | self._check_thread.start() 133 | -------------------------------------------------------------------------------- /blobxfer/models/options.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # stdlib imports 26 | import collections 27 | import logging 28 | import multiprocessing 29 | import pathlib 30 | import sys 31 | # non-stdlib imports 32 | # local imports 33 | import blobxfer.util 34 | 35 | # create logger 36 | logger = logging.getLogger(__name__) 37 | # global defines 38 | if sys.version_info >= (3, 5): # noqa 39 | _DEFAULT_REQUESTS_TIMEOUT = (10, 200) 40 | else: # noqa 41 | _DEFAULT_REQUESTS_TIMEOUT = (10, 31) 42 | 43 | # named tuples 44 | HttpProxy = collections.namedtuple( 45 | 'HttpProxy', [ 46 | 'host', 47 | 'port', 48 | 'username', 49 | 'password', 50 | ] 51 | ) 52 | VectoredIo = collections.namedtuple( 53 | 'VectoredIoOptions', [ 54 | 'stripe_chunk_size_bytes', 55 | 'distribution_mode', 56 | ] 57 | ) 58 | SkipOn = collections.namedtuple( 59 | 'SkipOn', [ 60 | 'filesize_match', 61 | 'lmt_ge', 62 | 'md5_match', 63 | ] 64 | ) 65 | FileProperties = collections.namedtuple( 66 | 'FileProperties', [ 67 | 'attributes', 68 | 'cache_control', 69 | 'content_type', 70 | 'lmt', 71 | 'md5', 72 | ] 73 | ) 74 | Upload = collections.namedtuple( 75 | 'Upload', [ 76 | 'access_tier', 77 | 'chunk_size_bytes', 78 | 'delete_extraneous_destination', 79 | 'delete_only', 80 | 'mode', 81 | 'one_shot_bytes', 82 | 'overwrite', 83 | 'recursive', 84 | 'rename', 85 | 'rsa_public_key', 86 | 'stdin_as_page_blob_size', 87 | 'store_file_properties', 88 | 'strip_components', 89 | 'vectored_io', 90 | ] 91 | ) 92 | Download = collections.namedtuple( 93 | 'Download', [ 94 | 'check_file_md5', 95 | 'chunk_size_bytes', 96 | 'delete_extraneous_destination', 97 | 'delete_only', 98 | 'max_single_object_concurrency', 99 | 'mode', 100 | 'overwrite', 101 | 'recursive', 102 | 'rename', 103 | 'restore_file_properties', 104 | 'rsa_private_key', 105 | 'strip_components', 106 | ] 107 | ) 108 | SyncCopy = collections.namedtuple( 109 | 'SyncCopy', [ 110 | 'access_tier', 111 | 'delete_extraneous_destination', 112 | 'delete_only', 113 | 'dest_mode', 114 | 'mode', 115 | 'overwrite', 116 | 'recursive', 117 | 'rename', 118 | 'server_side_copy', 119 | 'strip_components', 120 | ] 121 | ) 122 | 123 | 124 | class Timeout(object): 125 | """Timeout Options""" 126 | def __init__(self, connect, read, max_retries): 127 | """Ctor for Timeout options 128 | :param Timeout self: this 129 | :param float connect: connect timeout 130 | :param float read: read timeout 131 | :param int max_retries: max retries 132 | """ 133 | if connect is None or connect <= 0: 134 | self._connect = _DEFAULT_REQUESTS_TIMEOUT[0] 135 | else: 136 | self._connect = connect 137 | if read is None or read <= 0: 138 | self._read = _DEFAULT_REQUESTS_TIMEOUT[1] 139 | else: 140 | self._read = read 141 | if max_retries is None or max_retries < 0: 142 | self._max_retries = None 143 | else: 144 | self._max_retries = max_retries 145 | 146 | @property 147 | def connect(self): 148 | """Connect timeout 149 | :rtype: float 150 | :return: connect timeout 151 | """ 152 | return self._connect 153 | 154 | @property 155 | def read(self): 156 | """Read timeout 157 | :rtype: float 158 | :return: read timeout 159 | """ 160 | return self._read 161 | 162 | @property 163 | def timeout(self): 164 | """Timeout property in requests format 165 | :rtype: tuple 166 | :return: (connect, read) timeout tuple 167 | """ 168 | return (self._connect, self._read) 169 | 170 | @property 171 | def max_retries(self): 172 | """Max retries 173 | :rtype: int 174 | :return maximum number of retries 175 | """ 176 | return self._max_retries 177 | 178 | 179 | class Concurrency(object): 180 | """Concurrency Options""" 181 | def __init__( 182 | self, crypto_processes, md5_processes, disk_threads, 183 | transfer_threads, action=None): 184 | """Ctor for Concurrency Options 185 | :param Concurrency self: this 186 | :param int crypto_processes: number of crypto procs 187 | :param int md5_processes: number of md5 procs 188 | :param int disk_threads: number of disk threads 189 | :param int transfer_threads: number of transfer threads 190 | :param int action: action hint (1=Download, 2=Upload, 3=SyncCopy) 191 | """ 192 | self.crypto_processes = crypto_processes 193 | self.md5_processes = md5_processes 194 | self.disk_threads = disk_threads 195 | self.transfer_threads = transfer_threads 196 | # allow crypto processes to be zero (which will inline crypto 197 | # routines with main process) 198 | if self.crypto_processes is None or self.crypto_processes < 1: 199 | self.crypto_processes = 0 200 | if self.md5_processes is None or self.md5_processes < 1: 201 | self.md5_processes = multiprocessing.cpu_count() >> 1 202 | if self.md5_processes < 1: 203 | self.md5_processes = 1 204 | auto_disk = False 205 | if self.disk_threads is None or self.disk_threads < 1: 206 | self.disk_threads = multiprocessing.cpu_count() << 1 207 | # cap maximum number of disk threads from cpu count to 64 208 | if self.disk_threads > 64: 209 | self.disk_threads = 64 210 | # for download action, cap disk threads to lower value 211 | if action == 1 and self.disk_threads > 32: 212 | self.disk_threads = 32 213 | auto_disk = True 214 | # for synccopy action, set all non-transfer counts to zero 215 | if action == 3: 216 | auto_disk = False 217 | self.md5_processes = 0 218 | self.crypto_processes = 0 219 | self.disk_threads = 0 220 | if self.transfer_threads is None or self.transfer_threads < 1: 221 | if auto_disk: 222 | # for download action, cap network threads to lower value 223 | if action == 1: 224 | max_threads = (multiprocessing.cpu_count() >> 1) - 2 225 | if max_threads < 3: 226 | max_threads = 3 227 | self.transfer_threads = max_threads 228 | self.disk_threads = int(max_threads * 1.5) 229 | else: 230 | self.transfer_threads = self.disk_threads << 1 231 | else: 232 | self.transfer_threads = multiprocessing.cpu_count() << 2 233 | # cap maximum number of threads from cpu count to 96 234 | if self.transfer_threads > 96: 235 | self.transfer_threads = 96 236 | 237 | 238 | class General(object): 239 | """General Options""" 240 | def __init__( 241 | self, concurrency, log_file=None, progress_bar=True, 242 | resume_file=None, timeout=None, verbose=False, quiet=False, 243 | dry_run=False, proxy=None): 244 | """Ctor for General Options 245 | :param General self: this 246 | :param Concurrency concurrency: concurrency options 247 | :param bool progress_bar: progress bar 248 | :param str log_file: log file 249 | :param str resume_file: resume file 250 | :param Timeout timeout: timeout options 251 | :param bool verbose: verbose output 252 | :param bool quiet: quiet 253 | :param bool dry_run: dry run 254 | :param HttpProxy proxy: proxy 255 | """ 256 | if concurrency is None: 257 | raise ValueError('concurrency option is unspecified') 258 | self.concurrency = concurrency 259 | self.log_file = log_file 260 | self.progress_bar = progress_bar 261 | if blobxfer.util.is_not_empty(resume_file): 262 | self.resume_file = pathlib.Path(resume_file) 263 | else: 264 | self.resume_file = None 265 | self.timeout = timeout 266 | self.verbose = verbose 267 | self.quiet = quiet 268 | self.dry_run = dry_run 269 | self.proxy = proxy 270 | -------------------------------------------------------------------------------- /blobxfer/operations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/blobxfer/b1ed6fb474766b049bc36ab6ade55f246ab5b78c/blobxfer/operations/__init__.py -------------------------------------------------------------------------------- /blobxfer/operations/azure/blob/append.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # stdlib imports 26 | import logging 27 | # non-stdlib imports 28 | import azure.storage.blob 29 | # local imports 30 | import blobxfer.retry 31 | 32 | # create logger 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | def create_client(storage_account, timeout, proxy): 37 | # type: (blobxfer.operations.azure.StorageAccount, 38 | # blobxfer.models.options.Timeout, 39 | # blobxfer.models.options.HttpProxy) -> AppendBlobService 40 | """Create Append blob client 41 | :param blobxfer.operations.azure.StorageAccount storage_account: 42 | storage account 43 | :param blobxfer.models.options.Timeout timeout: timeout 44 | :param blobxfer.models.options.HttpProxy proxy: proxy 45 | :rtype: AppendBlobService 46 | :return: append blob service client 47 | """ 48 | if storage_account.is_sas: 49 | client = azure.storage.blob.AppendBlobService( 50 | account_name=storage_account.name, 51 | sas_token=storage_account.key, 52 | endpoint_suffix=storage_account.endpoint, 53 | request_session=storage_account.session, 54 | socket_timeout=timeout.timeout) 55 | else: 56 | client = azure.storage.blob.AppendBlobService( 57 | account_name=storage_account.name, 58 | account_key=storage_account.key, 59 | endpoint_suffix=storage_account.endpoint, 60 | request_session=storage_account.session, 61 | socket_timeout=timeout.timeout) 62 | # set proxy 63 | if proxy is not None: 64 | client.set_proxy( 65 | proxy.host, proxy.port, proxy.username, proxy.password) 66 | # set retry policy 67 | client.retry = blobxfer.retry.ExponentialRetryWithMaxWait( 68 | max_retries=timeout.max_retries).retry 69 | return client 70 | 71 | 72 | def create_blob(ase, timeout=None): 73 | # type: (blobxfer.models.azure.StorageEntity, int) -> None 74 | """Create append blob 75 | :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity 76 | :param int timeout: timeout 77 | """ 78 | ase.client.create_blob( 79 | container_name=ase.container, 80 | blob_name=ase.name, 81 | content_settings=azure.storage.blob.models.ContentSettings( 82 | content_type=ase.content_type, 83 | ), 84 | timeout=timeout) # noqa 85 | 86 | 87 | def append_block(ase, data, timeout=None): 88 | # type: (blobxfer.models.azure.StorageEntity, bytes, int) -> None 89 | """Appends a block into remote blob 90 | :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity 91 | :param bytes data: data 92 | :param int timeout: timeout 93 | """ 94 | ase.client.append_block( 95 | container_name=ase.container, 96 | blob_name=ase.name, 97 | block=data, 98 | validate_content=False, # integrity is enforced with HTTPS 99 | timeout=timeout) # noqa 100 | -------------------------------------------------------------------------------- /blobxfer/operations/azure/blob/block.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # stdlib imports 26 | import datetime 27 | import logging 28 | # non-stdlib imports 29 | import azure.storage.blob 30 | # local imports 31 | import blobxfer.models.azure 32 | import blobxfer.retry 33 | 34 | # create logger 35 | logger = logging.getLogger(__name__) 36 | 37 | 38 | def create_client(storage_account, timeout, proxy): 39 | # type: (blobxfer.operations.azure.StorageAccount, 40 | # blobxfer.models.options.Timeout, 41 | # blobxfer.models.options.HttpProxy) -> BlockBlobService 42 | """Create block blob client 43 | :param blobxfer.operations.azure.StorageAccount storage_account: 44 | storage account 45 | :param blobxfer.models.options.Timeout timeout: timeout 46 | :param blobxfer.models.options.HttpProxy proxy: proxy 47 | :rtype: azure.storage.blob.BlockBlobService 48 | :return: block blob service client 49 | """ 50 | if storage_account.is_sas: 51 | client = azure.storage.blob.BlockBlobService( 52 | account_name=storage_account.name, 53 | sas_token=storage_account.key, 54 | endpoint_suffix=storage_account.endpoint, 55 | request_session=storage_account.session, 56 | socket_timeout=timeout.timeout) 57 | else: 58 | client = azure.storage.blob.BlockBlobService( 59 | account_name=storage_account.name, 60 | account_key=storage_account.key, 61 | endpoint_suffix=storage_account.endpoint, 62 | request_session=storage_account.session, 63 | socket_timeout=timeout.timeout) 64 | # set proxy 65 | if proxy is not None: 66 | client.set_proxy( 67 | proxy.host, proxy.port, proxy.username, proxy.password) 68 | # set retry policy 69 | client.retry = blobxfer.retry.ExponentialRetryWithMaxWait( 70 | max_retries=timeout.max_retries).retry 71 | return client 72 | 73 | 74 | def create_blob(ase, data, md5, metadata, timeout=None): 75 | # type: (blobxfer.models.azure.StorageEntity, bytes, str, dict, 76 | # int) -> None 77 | """Create one shot block blob 78 | :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity 79 | :param bytes data: blob data 80 | :param str md5: md5 as base64 81 | :param dict metadata: metadata kv pairs 82 | :param int timeout: timeout 83 | """ 84 | ase.client._put_blob( 85 | container_name=ase.container, 86 | blob_name=ase.name, 87 | blob=data, 88 | content_settings=azure.storage.blob.models.ContentSettings( 89 | content_type=ase.content_type, 90 | content_md5=md5, 91 | cache_control=ase.cache_control, 92 | ), 93 | metadata=metadata, 94 | validate_content=False, # integrity is enforced with HTTPS 95 | timeout=timeout) # noqa 96 | 97 | 98 | def _format_block_id(chunk_num): 99 | # type: (int) -> str 100 | """Create a block id given a block (chunk) number 101 | :param int chunk_num: chunk number 102 | :rtype: str 103 | :return: block id 104 | """ 105 | return '{0:08d}'.format(chunk_num) 106 | 107 | 108 | def put_block(ase, offsets, data, timeout=None): 109 | # type: (blobxfer.models.azure.StorageEntity, 110 | # blobxfer.models.upload.Offsets, bytes, int) -> None 111 | """Puts a block into remote blob 112 | :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity 113 | :param blobxfer.models.upload.Offsets offsets: upload offsets 114 | :param bytes data: data 115 | :param int timeout: timeout 116 | """ 117 | ase.client.put_block( 118 | container_name=ase.container, 119 | blob_name=ase.name, 120 | block=data, 121 | block_id=_format_block_id(offsets.chunk_num), 122 | validate_content=False, # integrity is enforced with HTTPS 123 | timeout=timeout) # noqa 124 | 125 | 126 | def put_block_from_url(src_ase, dst_ase, offsets, timeout=None): 127 | # type: (blobxfer.models.azure.StorageEntity, 128 | # blobxfer.models.azure.StorageEntity, 129 | # blobxfer.models.upload.Offsets, int) -> None 130 | """Puts a block into remote blob 131 | :param blobxfer.models.azure.StorageEntity src_ase: 132 | Source Azure StorageEntity 133 | :param blobxfer.models.azure.StorageEntity dst_ase: 134 | Destination Azure StorageEntity 135 | :param blobxfer.models.upload.Offsets offsets: upload offsets 136 | :param int timeout: timeout 137 | """ 138 | if src_ase.is_arbitrary_url: 139 | src_url = src_ase.path 140 | else: 141 | if blobxfer.util.is_not_empty(src_ase.client.account_key): 142 | if src_ase.mode == blobxfer.models.azure.StorageModes.File: 143 | sas = src_ase.client.generate_file_shared_access_signature( 144 | share_name=src_ase.container, 145 | file_name=src_ase.name, 146 | permission=azure.storage.file.FilePermissions(read=True), 147 | expiry=datetime.datetime.utcnow() + datetime.timedelta( 148 | days=7), 149 | ) 150 | else: 151 | sas = src_ase.client.generate_blob_shared_access_signature( 152 | container_name=src_ase.container, 153 | blob_name=src_ase.name, 154 | permission=azure.storage.blob.BlobPermissions(read=True), 155 | expiry=datetime.datetime.utcnow() + datetime.timedelta( 156 | days=7), 157 | ) 158 | else: 159 | sas = src_ase.client.sas_token 160 | src_url = 'https://{}/{}?{}'.format( 161 | src_ase.client.primary_endpoint, src_ase.path, sas) 162 | dst_ase.client.put_block_from_url( 163 | container_name=dst_ase.container, 164 | blob_name=dst_ase.name, 165 | copy_source_url=src_url, 166 | source_range_start=offsets.range_start, 167 | source_range_end=offsets.range_end, 168 | block_id=_format_block_id(offsets.chunk_num), 169 | source_content_md5=None, 170 | timeout=timeout) # noqa 171 | 172 | 173 | def put_block_list( 174 | ase, last_block_num, md5, metadata, timeout=None): 175 | # type: (blobxfer.models.azure.StorageEntity, bytes, str, dict, 176 | # int) -> None 177 | """Create block blob from blocks 178 | :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity 179 | :param int last_block_num: last block number (chunk_num) 180 | :param str md5: md5 as base64 181 | :param dict metadata: metadata kv pairs 182 | :param int timeout: timeout 183 | """ 184 | # construct block list 185 | block_list = [ 186 | azure.storage.blob.BlobBlock(id=_format_block_id(x)) 187 | for x in range(0, last_block_num + 1) 188 | ] 189 | ase.client.put_block_list( 190 | container_name=ase.container, 191 | blob_name=ase.name, 192 | block_list=block_list, 193 | content_settings=azure.storage.blob.models.ContentSettings( 194 | content_type=ase.content_type, 195 | content_md5=md5, 196 | cache_control=ase.cache_control, 197 | ), 198 | metadata=metadata, 199 | validate_content=False, # integrity is enforced with HTTPS 200 | timeout=timeout) 201 | 202 | 203 | def get_committed_block_list(ase, timeout=None): 204 | # type: (blobxfer.models.azure.StorageEntity, int) -> list 205 | """Get committed block list 206 | :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity 207 | :param int timeout: timeout 208 | :rtype: list 209 | :return: list of committed blocks 210 | """ 211 | if blobxfer.util.blob_is_snapshot(ase.name): 212 | blob_name, snapshot = blobxfer.util.parse_blob_snapshot_parameter( 213 | ase.name) 214 | else: 215 | blob_name = ase.name 216 | snapshot = None 217 | return ase.client.get_block_list( 218 | container_name=ase.container, 219 | blob_name=blob_name, 220 | snapshot=snapshot, 221 | block_list_type=azure.storage.blob.BlockListType.Committed, 222 | timeout=timeout).committed_blocks 223 | 224 | 225 | def set_blob_access_tier(ase, timeout=None): 226 | # type: (blobxfer.models.azure.StorageEntity, int) -> None 227 | """Set blob access tier 228 | :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity 229 | :param int timeout: timeout 230 | """ 231 | ase.client.set_standard_blob_tier( 232 | container_name=ase.container, 233 | blob_name=ase.name, 234 | standard_blob_tier=ase.access_tier, 235 | timeout=timeout) # noqa 236 | -------------------------------------------------------------------------------- /blobxfer/operations/azure/blob/page.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # stdlib imports 26 | import logging 27 | # non-stdlib imports 28 | import azure.storage.blob 29 | # local imports 30 | import blobxfer.retry 31 | import blobxfer.util 32 | 33 | # create logger 34 | logger = logging.getLogger(__name__) 35 | 36 | 37 | def create_client(storage_account, timeout, proxy): 38 | # type: (blobxfer.operations.azure.StorageAccount, 39 | # blobxfer.models.options.Timeout, 40 | # blobxfer.models.options.HttpProxy) -> PageBlobService 41 | """Create block blob client 42 | :param blobxfer.operations.azure.StorageAccount storage_account: 43 | storage account 44 | :param blobxfer.models.options.Timeout timeout: timeout 45 | :param blobxfer.models.options.HttpProxy proxy: proxy 46 | :rtype: PageBlobService 47 | :return: block blob service client 48 | """ 49 | if storage_account.is_sas: 50 | client = azure.storage.blob.PageBlobService( 51 | account_name=storage_account.name, 52 | sas_token=storage_account.key, 53 | endpoint_suffix=storage_account.endpoint, 54 | request_session=storage_account.session, 55 | socket_timeout=timeout.timeout) 56 | else: 57 | client = azure.storage.blob.PageBlobService( 58 | account_name=storage_account.name, 59 | account_key=storage_account.key, 60 | endpoint_suffix=storage_account.endpoint, 61 | request_session=storage_account.session, 62 | socket_timeout=timeout.timeout) 63 | # set proxy 64 | if proxy is not None: 65 | client.set_proxy( 66 | proxy.host, proxy.port, proxy.username, proxy.password) 67 | # set retry policy 68 | client.retry = blobxfer.retry.ExponentialRetryWithMaxWait( 69 | max_retries=timeout.max_retries).retry 70 | return client 71 | 72 | 73 | def create_blob(ase, timeout=None): 74 | # type: (blobxfer.models.azure.StorageEntity, int) -> None 75 | """Create page blob 76 | :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity 77 | :param int timeout: timeout 78 | """ 79 | ase.client.create_blob( 80 | container_name=ase.container, 81 | blob_name=ase.name, 82 | content_length=blobxfer.util.page_align_content_length(ase.size), 83 | content_settings=azure.storage.blob.models.ContentSettings( 84 | content_type=ase.content_type, 85 | ), 86 | timeout=timeout) # noqa 87 | 88 | 89 | def put_page(ase, page_start, page_end, data, timeout=None): 90 | # type: (blobxfer.models.azure.StorageEntity, 91 | # int, int, bytes, int) -> None 92 | """Puts a page into remote blob 93 | :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity 94 | :param int page_start: page range start 95 | :param int page_end: page range end 96 | :param bytes data: data 97 | :param int timeout: timeout 98 | """ 99 | ase.client.update_page( 100 | container_name=ase.container, 101 | blob_name=ase.name, 102 | page=data, 103 | start_range=page_start, 104 | end_range=page_end, 105 | validate_content=False, # integrity is enforced with HTTPS 106 | timeout=timeout) # noqa 107 | 108 | 109 | def resize_blob(ase, size, timeout=None): 110 | # type: (blobxfer.models.azure.StorageEntity, int, int) -> None 111 | """Resizes a page blob 112 | :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity 113 | :param int size: content length 114 | :param int timeout: timeout 115 | """ 116 | ase.client.resize_blob( 117 | container_name=ase.container, 118 | blob_name=ase.name, 119 | content_length=blobxfer.util.page_align_content_length(size), 120 | timeout=timeout) # noqa 121 | -------------------------------------------------------------------------------- /blobxfer/operations/md5.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # stdlib imports 26 | import logging 27 | import queue 28 | # non-stdlib imports 29 | # local imports 30 | import blobxfer.models.azure 31 | import blobxfer.models.offload 32 | import blobxfer.util 33 | 34 | # create logger 35 | logger = logging.getLogger(__name__) 36 | # global defines 37 | _EMPTY_MAX_PAGE_SIZE_MD5 = 'tc+p1sj+vWGPkawoQ9UKHA==' 38 | _MAX_PAGE_SIZE_BYTES = 4194304 39 | 40 | 41 | def compute_md5_for_file_asbase64( 42 | filename, pagealign=False, start=None, end=None, blocksize=65536): 43 | # type: (str, bool, int, int, int) -> str 44 | """Compute MD5 hash for file and encode as Base64 45 | :param str filename: file to compute MD5 for 46 | :param bool pagealign: page align data 47 | :param int start: file start offset 48 | :param int end: file end offset 49 | :param int blocksize: block size 50 | :rtype: str 51 | :return: MD5 for file encoded as Base64 52 | """ 53 | hasher = blobxfer.util.new_md5_hasher() 54 | with open(filename, 'rb') as filedesc: 55 | if start is not None: 56 | filedesc.seek(start) 57 | curr = start 58 | else: 59 | curr = 0 60 | while True: 61 | if end is not None and curr + blocksize > end: 62 | blocksize = end - curr 63 | if blocksize == 0: 64 | break 65 | buf = filedesc.read(blocksize) 66 | if not buf: 67 | break 68 | buflen = len(buf) 69 | if pagealign and buflen < blocksize: 70 | aligned = blobxfer.util.page_align_content_length(buflen) 71 | if aligned != buflen: 72 | buf = buf.ljust(aligned, b'\0') 73 | hasher.update(buf) 74 | curr += blocksize 75 | return blobxfer.util.base64_encode_as_string(hasher.digest()) 76 | 77 | 78 | def compute_md5_for_data_asbase64(data): 79 | # type: (obj) -> str 80 | """Compute MD5 hash for bits and encode as Base64 81 | :param any data: data to compute MD5 for 82 | :rtype: str 83 | :return: MD5 for data 84 | """ 85 | hasher = blobxfer.util.new_md5_hasher() 86 | hasher.update(data) 87 | return blobxfer.util.base64_encode_as_string(hasher.digest()) 88 | 89 | 90 | def check_data_is_empty(data): 91 | # type: (bytes) -> bool 92 | """Check if data is empty via MD5 93 | :param bytes data: data to check 94 | :rtype: bool 95 | :return: if data is empty 96 | """ 97 | contentmd5 = compute_md5_for_data_asbase64(data) 98 | datalen = len(data) 99 | if datalen == _MAX_PAGE_SIZE_BYTES: 100 | if contentmd5 == _EMPTY_MAX_PAGE_SIZE_MD5: 101 | return True 102 | else: 103 | data_chk = b'\0' * datalen 104 | if compute_md5_for_data_asbase64(data_chk) == contentmd5: 105 | return True 106 | return False 107 | 108 | 109 | class LocalFileMd5Offload(blobxfer.models.offload._MultiprocessOffload): 110 | """LocalFileMd5Offload""" 111 | def __init__(self, num_workers): 112 | # type: (LocalFileMd5Offload, int) -> None 113 | """Ctor for Local File Md5 Offload 114 | :param LocalFileMd5Offload self: this 115 | :param int num_workers: number of worker processes 116 | """ 117 | super().__init__( 118 | LocalFileMd5Offload._worker_process, num_workers, 'MD5' 119 | ) 120 | 121 | @staticmethod 122 | def _worker_process(term_signal, task_queue, done_cv, done_queue): 123 | # type: (multiprocessing.Value, multiprocessing.Queue, 124 | # multiprocessing.Condition, multiprocessing.Queue) -> None 125 | """Compute MD5 for local file 126 | :param multiprocessing.Value term_signal: termination signal 127 | :param multiprocessing.Queue task_queue: task queue 128 | :param multiprocessing.Condition done_cv: done condition variable 129 | :param multiprocessing.Queue done_queue: done queue 130 | """ 131 | while term_signal.value != 1: 132 | try: 133 | key, lpath, fpath, remote_md5, pagealign, lpview = \ 134 | task_queue.get(True, 0.1) 135 | except queue.Empty: 136 | continue 137 | if lpview is None: 138 | start = None 139 | end = None 140 | size = None 141 | else: 142 | start = lpview.fd_start 143 | end = lpview.fd_end 144 | size = end - start 145 | md5 = blobxfer.operations.md5.compute_md5_for_file_asbase64( 146 | fpath, pagealign, start, end) 147 | done_cv.acquire() 148 | done_queue.put((key, lpath, size, md5, md5 == remote_md5)) 149 | done_cv.notify() 150 | done_cv.release() 151 | 152 | def add_localfile_for_md5_check( 153 | self, key, lpath, fpath, remote_md5, mode, lpview): 154 | # type: (LocalFileMd5Offload, str, str, str, str, 155 | # blobxfer.models.azure.StorageModes, object) -> None 156 | """Add a local file to MD5 check queue 157 | :param LocalFileMd5Offload self: this 158 | :param str key: md5 map key 159 | :param str lpath: "local" path for descriptor 160 | :param str fpath: "final" path for/where file 161 | :param str remote_md5: remote MD5 to compare against 162 | :param blobxfer.models.azure.StorageModes mode: mode 163 | :param object lpview: local path view 164 | """ 165 | if blobxfer.util.is_none_or_empty(remote_md5): 166 | raise ValueError('comparison MD5 is empty for file {}'.format( 167 | lpath)) 168 | if mode == blobxfer.models.azure.StorageModes.Page: 169 | pagealign = True 170 | else: 171 | pagealign = False 172 | self._task_queue.put( 173 | (key, lpath, fpath, remote_md5, pagealign, lpview) 174 | ) 175 | -------------------------------------------------------------------------------- /blobxfer/retry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | # stdlib imports 26 | import errno 27 | import ssl 28 | # non-stdlib imports 29 | import azure.storage.common.models 30 | import azure.storage.common.retry 31 | import requests 32 | import urllib3 33 | # local imports 34 | 35 | 36 | # global defines 37 | _RETRYABLE_ERRNO_MAXRETRY = frozenset(( 38 | '[Errno {}]'.format(errno.ECONNABORTED), 39 | '[Errno {}]'.format(errno.ECONNRESET), 40 | '[Errno {}]'.format(errno.ECONNREFUSED), 41 | '[Errno {}]'.format(errno.ENETRESET), 42 | '[Errno {}]'.format(errno.ETIMEDOUT), 43 | # Windows errors 44 | '[WinError {}]'.format(errno.ECONNABORTED), 45 | '[WinError {}]'.format(errno.ECONNRESET), 46 | '[WinError {}]'.format(errno.ECONNREFUSED), 47 | '[WinError {}]'.format(errno.ENETRESET), 48 | '[WinError {}]'.format(errno.ETIMEDOUT), 49 | )) 50 | _RETRYABLE_ERRNO_PROTOCOL = frozenset(( 51 | '({},'.format(errno.ECONNABORTED), 52 | '({},'.format(errno.ECONNRESET), 53 | '({},'.format(errno.ECONNREFUSED), 54 | '({},'.format(errno.ENETRESET), 55 | '({},'.format(errno.ETIMEDOUT), 56 | )) 57 | _RETRYABLE_STRING_FALLBACK = frozenset(( 58 | 'connection aborted', 59 | 'connection reset', 60 | 'connection refused', 61 | 'network dropped', 62 | 'timed out', 63 | )) 64 | 65 | 66 | class ExponentialRetryWithMaxWait(azure.storage.common.retry._Retry): 67 | """Exponential Retry with Max Wait Reset""" 68 | def __init__( 69 | self, initial_backoff=0.1, max_backoff=1, max_retries=None, 70 | reset_at_max=True): 71 | # type: (ExponentialRetryWithMaxWait, int, int, int, bool) -> None 72 | """Ctor for ExponentialRetryWithMaxWait 73 | :param ExponentialRetryWithMaxWait self: this 74 | :param int initial_backoff: initial backoff 75 | :param int max_backoff: max backoff 76 | :param int max_retries: max retries 77 | :param bool reset_at_max: reset after reaching max wait 78 | """ 79 | if max_backoff <= 0: 80 | raise ValueError( 81 | 'max backoff is non-positive: {}'.format(max_backoff)) 82 | if max_retries is not None and max_retries < 0: 83 | raise ValueError( 84 | 'max retries is invalid: {}'.format(max_retries)) 85 | if max_backoff < initial_backoff: 86 | raise ValueError( 87 | 'max backoff {} less than initial backoff {}'.format( 88 | max_backoff, initial_backoff)) 89 | self._backoff_count = 0 90 | self._last_backoff = initial_backoff 91 | self.initial_backoff = initial_backoff 92 | self.max_backoff = max_backoff 93 | self.reset_at_max = reset_at_max 94 | super(ExponentialRetryWithMaxWait, self).__init__( 95 | max_retries if max_retries is not None else 2147483647, False) 96 | 97 | def _should_retry(self, context): 98 | # type: (ExponentialRetryWithMaxWait, 99 | # azure.storage.common.models.RetryContext) -> bool 100 | """Determine if retry should happen or not 101 | :param ExponentialRetryWithMaxWait self: this 102 | :param azure.storage.common.models.RetryContext context: retry context 103 | :rtype: bool 104 | :return: True if retry should happen, False otherwise 105 | """ 106 | # do not retry if max attempts equal or exceeded 107 | if context.count >= self.max_attempts: 108 | return False 109 | 110 | # get response status 111 | status = None 112 | if context.response and context.response.status: 113 | status = context.response.status 114 | 115 | # if there is no response status, then handle the exception 116 | # appropriately from the lower layer 117 | if status is None: 118 | exc = context.exception 119 | # default to not retry in unknown/unhandled exception case 120 | ret = False 121 | # requests timeout, retry 122 | if isinstance(exc, requests.Timeout): 123 | ret = True 124 | elif isinstance(exc, requests.exceptions.ContentDecodingError): 125 | ret = True 126 | elif (isinstance(exc, requests.exceptions.ConnectionError) or 127 | isinstance(exc, requests.exceptions.ChunkedEncodingError)): 128 | # newer versions of requests do not expose errno on the 129 | # args[0] reason object; manually string parse 130 | if isinstance(exc.args[0], urllib3.exceptions.MaxRetryError): 131 | try: 132 | msg = exc.args[0].reason.args[0] 133 | except (AttributeError, IndexError): 134 | # unexpected/malformed exception hierarchy, don't retry 135 | pass 136 | else: 137 | if (isinstance(msg, ssl.SSLWantWriteError) or 138 | isinstance(msg, ssl.SSLWantReadError)): 139 | ret = True 140 | elif any(x in msg for x in _RETRYABLE_ERRNO_MAXRETRY): 141 | ret = True 142 | elif isinstance(exc.args[0], urllib3.exceptions.ProtocolError): 143 | try: 144 | msg = exc.args[0].args[0] 145 | except (AttributeError, IndexError): 146 | # unexpected/malformed exception hierarchy, don't retry 147 | pass 148 | else: 149 | if any(x in msg for x in _RETRYABLE_ERRNO_PROTOCOL): 150 | ret = True 151 | # fallback to string search 152 | if not ret: 153 | msg = str(exc).lower() 154 | if any(x in msg for x in _RETRYABLE_STRING_FALLBACK): 155 | ret = True 156 | return ret 157 | elif 200 <= status < 300: 158 | # failure during respond body download or parsing, so success 159 | # codes should be retried 160 | return True 161 | elif 300 <= status < 500: 162 | # response code 404 should be retried if secondary was used 163 | if (status == 404 and 164 | context.location_mode == 165 | azure.storage.common.models.LocationMode.SECONDARY): 166 | return True 167 | # response code 408 is a timeout and should be retried 168 | # response code 429 is too many requests (throttle) 169 | # TODO use "Retry-After" header for backoff amount 170 | if status == 408 or status == 429: 171 | return True 172 | return False 173 | elif status >= 500: 174 | # response codes above 500 should be retried except for 175 | # 501 (not implemented) and 505 (version not supported) 176 | if status == 501 or status == 505: 177 | return False 178 | return True 179 | else: # noqa 180 | # this should be unreachable, retry anyway 181 | return True 182 | 183 | def retry(self, context): 184 | # type: (ExponentialRetryWithMaxWait, 185 | # azure.storage.common.models.RetryContext) -> int 186 | """Retry handler 187 | :param ExponentialRetryWithMaxWait self: this 188 | :param azure.storage.common.models.RetryContext context: retry context 189 | :rtype: int or None 190 | :return: int 191 | """ 192 | return self._retry(context, self._backoff) 193 | 194 | def _backoff(self, context): 195 | # type: (ExponentialRetryWithMaxWait, 196 | # azure.storage.common.models.RetryContext) -> int 197 | """Backoff calculator 198 | :param ExponentialRetryWithMaxWait self: this 199 | :param azure.storage.common.models.RetryContext context: retry context 200 | :rtype: int 201 | :return: backoff amount 202 | """ 203 | self._backoff_count += 1 204 | if self._backoff_count == 1: 205 | self._last_backoff = self.initial_backoff 206 | else: 207 | self._last_backoff *= 2 208 | if self._last_backoff > self.max_backoff and self.reset_at_max: 209 | self._backoff_count = 1 210 | self._last_backoff = self.initial_backoff 211 | return self._last_backoff 212 | -------------------------------------------------------------------------------- /blobxfer/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation 2 | # 3 | # All rights reserved. 4 | # 5 | # MIT License 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | # DEALINGS IN THE SOFTWARE. 24 | 25 | __version__ = '1.11.0' 26 | -------------------------------------------------------------------------------- /cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/blobxfer/b1ed6fb474766b049bc36ab6ade55f246ab5b78c/cli/__init__.py -------------------------------------------------------------------------------- /cli/azure.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/blobxfer/b1ed6fb474766b049bc36ab6ade55f246ab5b78c/cli/azure.ico -------------------------------------------------------------------------------- /cli/file_version_info.txt: -------------------------------------------------------------------------------- 1 | VSVersionInfo( 2 | ffi=FixedFileInfo( 3 | filevers={BUILDVER_TUPLE}, 4 | prodvers={BUILDVER_TUPLE}, 5 | mask=0x3f, 6 | flags=0x0, 7 | OS=0x40004, 8 | fileType=0x1, 9 | subtype=0x0, 10 | date=(0, 0) 11 | ), 12 | kids=[ 13 | StringFileInfo( 14 | [ 15 | StringTable( 16 | u'040904B0', 17 | [StringStruct(u'CompanyName', u'Microsoft Corporation'), 18 | StringStruct(u'FileDescription', u'blobxfer ({BRANCH_GITSHA1})'), 19 | StringStruct(u'FileVersion', u'{BUILDVER_DOTTED} ({BRANCH_GITSHA1})'), 20 | StringStruct(u'InternalName', u'blobxfer'), 21 | StringStruct(u'LegalCopyright', u'\xa9 Microsoft Corporation. All rights reserved.'), 22 | StringStruct(u'OriginalFilename', u'{EXE}'), 23 | StringStruct(u'ProductName', u'blobxfer'), 24 | StringStruct(u'ProductVersion', u'{BUILDVER_DOTTED}')]) 25 | ]), 26 | VarFileInfo([VarStruct(u'Translation', [1033, 1200])]) 27 | ] 28 | ) 29 | -------------------------------------------------------------------------------- /docker/gen_3rd_party_notices.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -o pipefail 5 | 6 | TPNFILE=../THIRD_PARTY_NOTICES.txt 7 | 8 | DEPENDENCIES=( 9 | azure-storage-python 10 | https://github.com/Azure/azure-storage-python 11 | https://github.com/Azure/azure-storage-python/raw/master/LICENSE.txt 12 | bitstring 13 | https://github.com/scott-griffiths/bitstring 14 | https://github.com/scott-griffiths/bitstring/raw/master/LICENSE 15 | click 16 | https://github.com/pallets/click 17 | https://github.com/pallets/click/raw/main/LICENSE.rst 18 | cryptography 19 | https://github.com/pyca/cryptography 20 | https://github.com/pyca/cryptography/raw/main/LICENSE.BSD 21 | Python 22 | https://python.org 23 | https://github.com/python/cpython/raw/main/LICENSE 24 | python-dateutil 25 | https://github.com/dateutil/dateutil 26 | https://github.com/dateutil/dateutil/raw/master/LICENSE 27 | requests 28 | https://github.com/requests/requests 29 | https://github.com/requests/requests/raw/master/LICENSE 30 | ruamel.yaml 31 | https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/ 32 | "https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/LICENSE?format=raw" 33 | ) 34 | DEPLEN=${#DEPENDENCIES[@]} 35 | 36 | add_attribution() { 37 | name=$1 38 | url=$2 39 | license=$(curl -fSsL "$3") 40 | 41 | { echo ""; echo "-------------------------------------------------------------------------------"; \ 42 | echo ""; echo "$name ($url)"; echo ""; echo "$license"; } >> $TPNFILE 43 | } 44 | 45 | cat << 'EOF' > $TPNFILE 46 | Do Not Translate or Localize 47 | 48 | This file is based on or incorporates material from the projects listed 49 | below (Third Party IP). The original copyright notice and the license under 50 | which Microsoft received such Third Party IP, are set forth below. Such 51 | licenses and notices are provided for informational purposes only. Microsoft 52 | licenses the Third Party IP to you under the licensing terms for the 53 | Microsoft product. Microsoft reserves all other rights not expressly 54 | granted under this agreement, whether by implication, estoppel or otherwise. 55 | EOF 56 | 57 | echo -n "Generating $((DEPLEN / 3)) attributions: [" 58 | i=0 59 | while [ $i -lt "$DEPLEN" ]; do 60 | add_attribution "${DEPENDENCIES[$i]}" "${DEPENDENCIES[$((i+1))]}" "${DEPENDENCIES[$((i+2))]}" 61 | i=$((i + 3)) 62 | echo -n "." 63 | done 64 | { echo ""; echo "-------------------------------------------------------------------------------"; } >> $TPNFILE 65 | echo "] done." 66 | -------------------------------------------------------------------------------- /docker/linux/Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile for Azure/blobxfer (Linux) 2 | 3 | FROM python:3.9.7-alpine3.14 4 | MAINTAINER Fred Park 5 | 6 | ARG GIT_BRANCH 7 | ARG GIT_COMMIT 8 | 9 | RUN apk update \ 10 | && apk add --update --no-cache \ 11 | musl build-base openssl-dev libffi-dev rust cargo ca-certificates git \ 12 | && python3 -m ensurepip --upgrade \ 13 | && pip3 install --no-cache-dir --upgrade pip setuptools setuptools-rust wheel \ 14 | && git clone -b $GIT_BRANCH --single-branch --depth 5 https://github.com/Azure/blobxfer.git /blobxfer \ 15 | && cd /blobxfer \ 16 | && git checkout $GIT_COMMIT \ 17 | && pip3 install --no-cache-dir -e . \ 18 | && python3 setup.py install \ 19 | && cp THIRD_PARTY_NOTICES.txt /BLOBXFER_THIRD_PARTY_NOTICES.txt \ 20 | && cp LICENSE /BLOBXFER_LICENSE.txt \ 21 | && pip3 uninstall -y setuptools-rust wheel \ 22 | && apk del --purge build-base patch openssl-dev libffi-dev rust cargo git \ 23 | && rm /var/cache/apk/* \ 24 | && rm -rf /root/.cache /root/.cargo \ 25 | && rm -rf /blobxfer 26 | 27 | ENTRYPOINT ["blobxfer"] 28 | -------------------------------------------------------------------------------- /docker/linux/hooks/build: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | printenv 4 | docker build --build-arg GIT_BRANCH=$SOURCE_BRANCH --build-arg GIT_COMMIT=$GIT_SHA1 -t $IMAGE_NAME . 5 | -------------------------------------------------------------------------------- /docker/win/Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile for Azure/blobxfer (Windows) 2 | 3 | FROM python:3.9.7-windowsservercore-1809 4 | MAINTAINER Fred Park 5 | 6 | ENV chocolateyUseWindowsCompression false 7 | RUN [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 ; \ 8 | iex ((new-object net.webclient).DownloadString('https://chocolatey.org/install.ps1')) ; \ 9 | choco install --no-progress -y git -params "/GitAndUnixToolsOnPath" 10 | 11 | ARG GIT_BRANCH 12 | ARG GIT_COMMIT 13 | 14 | WORKDIR C:\\blobxfer 15 | RUN git clone -b $Env:GIT_BRANCH --single-branch --depth 5 https://github.com/Azure/blobxfer.git C:\blobxfer ; \ 16 | git checkout $Env:GIT_COMMIT ; \ 17 | pip install --no-cache-dir -e . ; \ 18 | python setup.py install 19 | 20 | RUN python -m compileall C:\Python\Lib\site-packages ; \ 21 | exit 0 22 | 23 | FROM mcr.microsoft.com/windows/nanoserver:1809 24 | 25 | COPY --from=0 /Python /Python 26 | COPY --from=0 /blobxfer/THIRD_PARTY_NOTICES.txt /BLOBXFER_THIRD_PARTY_NOTICES.txt 27 | COPY --from=0 /blobxfer/LICENSE /BLOBXFER_LICENSE.txt 28 | 29 | SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"] 30 | 31 | ENTRYPOINT ["blobxfer"] 32 | -------------------------------------------------------------------------------- /docs/01-installation.md: -------------------------------------------------------------------------------- 1 | # blobxfer Installation 2 | There are multiple available options for installing `blobxfer`. If you 3 | only require the CLI functionality, then you can install via one of 4 | three methods: 5 | 6 | * [Python package from PyPI](#pypi) (i.e., `pip install`) 7 | * [Pre-built binary](#binary) 8 | * [Docker image](#docker) 9 | 10 | If you require the `blobxfer` data movement library, then you will 11 | need to install the [Python package from PyPI](#pypi). 12 | 13 | ## Python Package from PyPI 14 | `blobxfer` is a pure Python package, however, some dependencies require a C 15 | compiler and supporting libraries if there is no binary wheel for that 16 | dependency and your platform. Please follow the pre-requisites section first 17 | prior to invoking installation via `pip`. 18 | 19 | It is strongly recommended to use a 64-bit Python interpreter. Python 2 20 | is not supported. 21 | 22 | ### Pre-requisites 23 | `blobxfer` has dependencies which require a C compiler if your platform does 24 | not have pre-made binary wheels for these dependencies. Please follow the 25 | instructions below for your platform. You will need to run the following 26 | commands via `sudo` or as root. 27 | 28 | #### Ubuntu 29 | ```shell 30 | # for Python3 31 | apt-get update 32 | apt-get install -y build-essential libssl-dev libffi-dev python3-dev python3-pip 33 | ``` 34 | 35 | #### CentOS/RHEL 36 | ```shell 37 | # for Python3 38 | yum install -y epel-release 39 | yum install -y python36-devel gcc openssl-devel libffi-devel 40 | curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3 41 | ``` 42 | 43 | #### SLES/OpenSUSE 44 | ```shell 45 | # for Python3 46 | zypper ref 47 | zypper -n in gcc libopenssl-devel libffi48-devel python3-devel 48 | curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3 49 | ``` 50 | 51 | #### Mac OS X 52 | To install `blobxfer` for Python 3.6+, please follow the steps outlined on 53 | [this guide](http://docs.python-guide.org/en/latest/starting/install3/osx/#install3-osx) 54 | to ensure that you have the latest version of Python, a compiler and pip. 55 | 56 | #### Windows 57 | Please install at least Python 3.6 or later to avoid requiring a 58 | compiler. It is strongly recommended to use a 64-bit interpreter. 59 | 60 | #### Windows Subsystem for Linux 61 | Please follow the same instructions for the Linux distribution installed. 62 | 63 | ### Installation via `pip` 64 | After the pre-requisite steps have been completed then install the 65 | [blobxfer](https://pypi.python.org/pypi/blobxfer) Python package: 66 | 67 | ```shell 68 | # for Python3 69 | pip3 install blobxfer 70 | ``` 71 | 72 | `blobxfer` is compatible with Python 3.6+. To install for Python 3, some 73 | distributions may use `pip3` instead of `pip`. Installing into a virtual 74 | environment or your user area via `--user` is recommended to avoid 75 | installation issues and conflicts with system-wide Python packages. 76 | 77 | ## Pre-built Binary 78 | Download an appropriate [Release](https://github.com/Azure/blobxfer/releases) 79 | binary for your operating system. Pre-built binaries are not available 80 | for all platforms and architectures at this time. 81 | 82 | Note that for the Linux pre-built binary, it may not work on all 83 | distributions. If this is the case, please pick an alternate installation 84 | method. After downloading the binary, make sure that the executable bit is 85 | set via `chmod +x` prior to attempting to execute the file. 86 | 87 | ## Docker Image 88 | [blobxfer](https://hub.docker.com/_/microsoft-blobxfer) is available on the 89 | Microsoft Container Registry and can be retrieved with: 90 | 91 | ```shell 92 | # Linux 93 | docker pull mcr.microsoft.com/blobxfer 94 | 95 | # Windows 96 | docker pull mcr.microsoft.com/blobxfer:latest-windows 97 | ``` 98 | 99 | Please note that when invoking the Docker image, you will need to ensure 100 | proper mapping of host to container mount points. 101 | -------------------------------------------------------------------------------- /docs/30-vectored-io.md: -------------------------------------------------------------------------------- 1 | # blobxfer Vectored I/O 2 | `blobxfer` supports Vectored I/O (scatter/gather) which can help alleviate 3 | problems associated with 4 | [single blob or single fileshare throughput limits](https://docs.microsoft.com/en-us/azure/storage/storage-scalability-targets). 5 | Additionally, `blobxfer` has the ability to replicate a single source to 6 | multiple destinations to allow for increased resiliency or throughput for 7 | consumption later. 8 | 9 | ## Distribution Modes 10 | `blobxfer` supports two distribution modes: `replica` and `stripe`. The 11 | following sections describe each. 12 | 13 | ### Replica 14 | `replica` mode replicates an entire file (or set of files) across all 15 | specified destinations. This allows for multiple backups, resiliency, 16 | and potentially increased download throughput later if the clients understand 17 | how to download from multiple sources. 18 | 19 | The logic is fairly simple in how this is accomplished. Each source file 20 | has portions of the file read from disk, buffered in memory and then 21 | replicated across all specified destinations. 22 | 23 | ``` 24 | Whole File +---------------------+ 25 | Replication | | 26 | +------------------------------> | Destination 0: | 27 | | | Storage Account A | 28 | | | | 29 | | +---------------------+ 30 | | 31 | | 32 | +------------+---------------+ Whole File +---------------------+ 33 | | | Replication | | 34 | | 10 GiB VHD on Local Disk +--------------> | Destination 1: | 35 | | | | Storage Account B | 36 | +------------+---------------+ | | 37 | | +---------------------+ 38 | | 39 | | 40 | | +---------------------+ 41 | | Whole File | | 42 | | Replication | Destination 2: | 43 | +------------------------------> | Storage Account C | 44 | | | 45 | +---------------------+ 46 | ``` 47 | 48 | In order to take advantage of `replica` Vectored IO, you must use a YAML 49 | configuration file to define multiple destinations. 50 | 51 | ### Stripe 52 | `stripe` mode will splice a file into multiple chunks and scatter these 53 | chunks across destinations specified. These destinations can be single or 54 | multiple containers within the same storage account or even containers 55 | distributed across multiple storage accounts if single storage account 56 | bandwidth limits are insufficient. 57 | 58 | `blobxfer` will slice the source file into multiple chunks where the 59 | `stripe_chunk_size_bytes` is the stripe width of each chunk. This parameter 60 | will allow you to effectively control how many blobs/files are created on 61 | Azure. `blobxfer` will then round-robin through all of the destinations 62 | specified to scatter the slices. Information required to reconstruct the 63 | original file is stored on the blob or file metadata. It is important to 64 | keep this metadata in-tact or reconstruction will fail. 65 | 66 | ``` 67 | +---------------------+ 68 | | | <-----------------------------------+ 69 | | Destination 1: | | 70 | | Storage Account B | <---------------------+ | 71 | | | | | 72 | +---------------------+ <-------+ | | 73 | | | | 74 | ^ ^ | | | 75 | | | | | | 76 | 1 GiB Stripe | | | | | 77 | +-----------------------------+ Width +------+---+--+------+---+--+------+---+--+------+---+--+------+---+--+ 78 | | | | | | | | | | | | | | 79 | | 10 GiB File on Local Disk | +-----------> | D0 | D1 | D0 | D1 | D0 | D1 | D0 | D1 | D0 | D1 | 80 | | | | | | | | | | | | | | 81 | +-----------------------------+ 10 Vectored +---+--+------+---+--+------+---+--+------+---+--+------+---+--+------+ 82 | Slices | | | | | 83 | | | | | | 84 | | v | | | 85 | | | | | 86 | +> +---------------------+ <+ | | 87 | | | | | 88 | | Destination 0: | <--------------+ | 89 | | Storage Account A | | 90 | | | <----------------------------+ 91 | +---------------------+ 92 | ``` 93 | 94 | In order to take advantage of `stripe` Vectored IO across multiple 95 | destinations, you must use a YAML configuration file. Additionally, when 96 | downloading a striped blob, you must specify all storage account locations 97 | of the striped blob in the `azure_storage` section of your YAML 98 | configuration file. 99 | -------------------------------------------------------------------------------- /docs/40-client-side-encryption.md: -------------------------------------------------------------------------------- 1 | # blobxfer Client-side Encryption Notes 2 | Please read the following carefully regarding client-side encryption support 3 | in `blobxfer`. Additionally, current limitations for client-side encryption 4 | can be found [here](99-current-limitations.md). 5 | 6 | * Encryption is performed using AES256-CBC. MACs are generated using 7 | HMAC-SHA256. 8 | * All required information regarding the encryption process is stored on 9 | each blob's `encryptiondata` and `encryptiondata_authentication` metadata 10 | fields. These metadata entries are used on download to configure the proper 11 | download parameters for the decryption process as well as to authenticate 12 | the `encryptiondata` metadata and the encrypted entity. Encryption metadata 13 | set by `blobxfer` (or any Azure Storage SDK) should not be modified or 14 | the blob/file may be unrecoverable. 15 | * Keys for the AES256 block cipher are generated on a per-blob/file basis. 16 | These keys are encrypted using RSAES-OAEP and encoded in the metadata. 17 | * MD5 for both the pre-encrypted and encrypted version of the file is stored 18 | in the entity metadata, if enabled. `skip_on` options will still work 19 | transparently with encrypted blobs/files. 20 | * HMAC-SHA256 checks over encrypted data are performed instead of MD5 over 21 | unencrypted data to validate integrity if both are present. 22 | * Attempting to upload the same file that exists in Azure Storage, but the 23 | file in Azure Storage is not encrypted will not occur if any `skip_on` match 24 | condition succeeds. This behavior can be overridden by deleting the target 25 | file in Azure Storage or disabling the `skip_on` behavior. 26 | * Attempting to upload the same file as an encrypted blob with a different 27 | RSA key will not occur if the file content MD5 is the same. This behavior 28 | can be overridden by deleting the target file in Azure Storage or disabling 29 | the `skip_on` `md5_match` behavior. 30 | * Zero-byte files are not encrypted. 31 | -------------------------------------------------------------------------------- /docs/80-blobxfer-python-library.md: -------------------------------------------------------------------------------- 1 | # `blobxfer` Python Data Movement Library 2 | `blobxfer` is comprised of two main components, the CLI tool and the data 3 | movement library. The `blobxfer` CLI tool is built on top of the `blobxfer` 4 | data movement library. 5 | 6 | ## `blobxfer` Python Package structure 7 | The `blobxfer` Python package is laid out as follows: 8 | 9 | ``` 10 | ├── blobxfer 11 | │   ├── models 12 | │   └── operations 13 | │   └── azure 14 | │   └── blob 15 | ├── cli 16 | ... 17 | ``` 18 | 19 | The `blobxfer` CLI tool is entirely contained in the `cli` directory and 20 | is thus not part of the `blobxfer` data movement library. To import the 21 | `blobxfer` data movement library, you would simply perform `import` statements 22 | such as `import blobxfer`. 23 | 24 | ## High-Level Operations: `blobxfer.api` 25 | The high-level `blobxfer` API is found in the `blobxfer.api` module. This 26 | module exposes each of the operations: `Downloader`, `SyncCopy` and 27 | `Uploader`. 28 | 29 | These high-level operations classes allow you to input various options for 30 | each type of operation and allows the `blobxfer` data movement library to 31 | do the rest without having to construct each of the pieces yourself. For 32 | example, to download a set of blobs, you would invoke the `Downloader` 33 | similar to the following: 34 | 35 | ```python 36 | # Downloader Example 37 | 38 | import blobxfer.api 39 | 40 | 41 | # construct general options 42 | general_options = blobxfer.api.GeneralOptions(...) 43 | 44 | # construct download options 45 | download_options = blobxfer.api.DownloadOptions(...) 46 | 47 | # construct skip on options 48 | skip_on_options = blobxfer.api.SkipOnOptions(...) 49 | 50 | # construct local destination path 51 | local_destination_path = blobxfer.api.LocalDestinationPath(...) 52 | 53 | # construct specification 54 | specification = blobxfer.api.DownloadSpecification( 55 | download_options, 56 | skip_on_options, 57 | local_destination_path) 58 | 59 | # construct credentials 60 | credentials = blobxfer.api.AzureStorageCredentials(general_options) 61 | credentials.add_storage_account(...) 62 | 63 | # construct Azure source paths and add it to specification 64 | asp = blobxfer.api.AzureSourcePath() 65 | asp.add_path_with_storage_account(...) 66 | specification.add_azure_source_path(asp) 67 | 68 | # execute downloader 69 | downloader = blobxfer.api.Downloader( 70 | general_options, 71 | credentials, 72 | specification) 73 | downloader.start() 74 | ``` 75 | 76 | ## Canonical Example of Library Use: `cli` 77 | As the `blobxfer` CLI is built on top of the `blobxfer` data movement library, 78 | examining the contents of the 79 | [`cli` directory](https://github.com/Azure/blobxfer/tree/master/cli) will 80 | provide you with a code sample of how to utilize the `blobxfer` data movement 81 | library for your own Python programs and modules. 82 | -------------------------------------------------------------------------------- /docs/98-performance-considerations.md: -------------------------------------------------------------------------------- 1 | # blobxfer Performance Considerations 2 | Please read the following carefully regarding considerations that should 3 | be applied with regard to performance and `blobxfer`. Additionally, 4 | please review the 5 | [Azure Storage Scalability and Performance Targets](https://azure.microsoft.com/en-us/documentation/articles/storage-scalability-targets/) 6 | for an overview of general performance targets that apply to Azure Blobs, 7 | File shares and Storage Account types (GRS, LRS, ZRS, etc). 8 | 9 | ## Concurrency 10 | * `blobxfer` offers four concurrency knobs. Each one should be tuned for 11 | maximum performance according to your system and network characteristics. 12 | * Disk threads: concurrency in reading (uploads) and writing (downloads) 13 | to disk is controlled by the number of disk threads. 14 | * Transfer threads: concurrency in the number of threads transferring 15 | from/to Azure Storage is controlled by the number of transfer threads. 16 | * MD5 processes: computing MD5 for potential omission from transfer due 17 | to `skip_on` `md5_match` being specified are offloaded to the specified 18 | number of processors. 19 | * Crypto processes: decrypting encrypted blobs and files can be offloaded 20 | to the specified number of processors. Due to the inherent 21 | non-parallelizable encryption algorithm used, this is ignored for 22 | encryption (uploads). 23 | * The thread concurrency options (disk and transfer) can be set to a 24 | non-positive number to be automatically set as a multiple of the number of 25 | cores available on the machine. 26 | * For uploads, there should be a sufficient number of disk threads to ensure 27 | that all transfer threads have work to do. For downloads, there should be 28 | sufficient number of disk threads to write data to disk so transfer threads 29 | are not artificially blocked. 30 | 31 | ## Chunk Sizing 32 | Chunk sizing refers to the `chunk_size_bytes` option and the meaning of which 33 | varies upon the context of uploading or downloading. To ensure that 34 | [high-throughput block blob](https://azure.microsoft.com/blog/high-throughput-with-azure-blob-storage/) 35 | upload speeds are enabled (only for block blobs), your chunk size should be 36 | greater than 4MiB. With default chunk sizes, this behavior should be enabled 37 | automatically. 38 | 39 | ## Timeouts 40 | `blobxfer` uses two timeout values, a connect timeout and a read timeout. 41 | The read timeout should be set to something that is reasonably large 42 | enough to transmit each chunk given your bandwidth limitations. 43 | 44 | ### Uploads 45 | For uploads, chunk sizes correspond to the maximum amount of data to transfer 46 | with a single request. The Azure Storage service imposes maximums depending 47 | upon the type of entity that is being written. For block blobs, the maximum 48 | is 100MiB (although you may "one-shot" up to 256MiB). For page blobs, the 49 | maximum is 4MiB. For append blobs, the maximum is 4MiB. For Azure Files, 50 | the maximum is 4MiB. 51 | 52 | For block blobs, setting the chunk size to something greater than 4MiB will 53 | not only allow you larger file sizes (recall that the maximum number of 54 | blocks for a block blob is 50000, thus at 100MiB blocks, you can create a 55 | 4.768TiB block blob object) but will allow you to amortize larger portions of 56 | data transfer over each request/response overhead. Additionally, this will 57 | activate HTBB (please see chunk size above) for high throughput transfers. 58 | `blobxfer` can automatically select the proper block size given your file, 59 | but will not automatically tune the chunk size as that depends upon your 60 | system, network characteristics, and source storage scalability targets. 61 | 62 | ### Downloads 63 | For downloads, chunk sizes correspond to the maximum amount of data to 64 | request from the server for each request. It is important to keep a balance 65 | between the chunk size and the number of in-flight operations afforded by 66 | the `transfer_threads` concurrency control. `blobxfer` does not automatically 67 | tune this (but can automatically set it to a value that should work for 68 | most situations) due to varying system and network conditions. 69 | 70 | Additionally, disk write performance is typically lower than disk read 71 | performance so you need to ensure that the number of `disk_threads` is not 72 | set to a very large number to prevent thrashing and highly random write 73 | patterns. 74 | 75 | ### Synccopy 76 | For sync copy sources which are block blobs, the block size will determine 77 | the chunk size. Thus, block blobs with block sizes which fall below the 78 | HTBB chunk size cut off will not be eligible for HTBB throughput speeds. 79 | 80 | ## Azure File Share Performance 81 | File share performance can be "slow" or become a bottleneck, especially for 82 | file shares containing thousands of files as multiple REST calls must be 83 | performed for each file. Currently, a single file share has a limit of up 84 | to 60 MB/s and 1000 8KB IOPS. Please refer to the 85 | [Azure Storage Scalability and Performance Targets](https://azure.microsoft.com/en-us/documentation/articles/storage-scalability-targets/) 86 | for performance targets and limits regarding Azure Storage File shares. 87 | If scalable high performance is required, consider using Blob storage 88 | instead. 89 | 90 | ## MD5 Hashing 91 | MD5 hashing will impose some performance penalties to check if the file 92 | should be uploaded or downloaded. For instance, if uploading and the local 93 | file is determined to be different than its remote counterpart, then the 94 | time spent performing the MD5 comparison is effectively "lost." 95 | 96 | ## Client-side Encryption 97 | Client-side encryption will naturally impose a performance penalty on 98 | `blobxfer` both for uploads (encrypting) and downloads (decrypting) depending 99 | upon the processor speed and number of cores available. Additionally, for 100 | uploads, encryption is not parallelizable within an object and is in-lined 101 | with the main process. 102 | 103 | ## Resume Files (Databases) 104 | Enabling resume support may slightly impact performance as a key-value shelve 105 | for bookkeeping is kept on disk and is updated frequently. 106 | 107 | ## pyOpenSSL 108 | As of requests 2.6.0 and Python versions < 2.7.9 (i.e., interpreter found on 109 | default Ubuntu 14.04 installations, 16.04 is not affected), if certain 110 | packages are installed, as those found in `requests[security]` then the 111 | underlying urllib3 package will utilize the `ndg-httpsclient` package which 112 | will use `pyOpenSSL`. This will ensure the peers are fully validated. However, 113 | this incurs a rather larger performance penalty. If you understand the 114 | potential security risks for disabling this behavior due to high performance 115 | requirements, you can either remove `ndg-httpsclient` or use `blobxfer` in a 116 | virtualenv environment without the `ndg-httpsclient` package. Python 117 | versions >= 2.7.9 are not affected by this issue. 118 | 119 | Additionally, `urllib3` (which `requests` uses) may use `pyOpenSSL` which 120 | may result in exceptions being thrown that are not normalized by `urllib3`. 121 | This may result in exceptions that should be retried, but are not. It is 122 | recommended to upgrade your Python where `pyOpenSSL` is not required for 123 | fully validating peers and such that `blobxfer` can operate without 124 | `pyOpenSSL` in a secure fashion. You can also run `blobxfer` via Docker 125 | or in a virtualenv environment without `pyOpenSSL`. 126 | -------------------------------------------------------------------------------- /docs/99-current-limitations.md: -------------------------------------------------------------------------------- 1 | # blobxfer Current Limitations 2 | Please read this section carefully for any current known limitations to 3 | `blobxfer`. 4 | 5 | ### SAS Keys (Tokens) 6 | `blobxfer` is fundamentally limited in what operations it can perform 7 | if given a restricted scope SAS token. This is not a limitation with 8 | `blobxfer` itself, but with the permissions that are granted by the SAS 9 | token. The following are a few examples: 10 | 11 | * Containers or file shares cannot be created if not given an 12 | account-level SAS with the appropriate signed resource type. 13 | * Objects cannot be listed within a container or file share if not given 14 | an account-level SAS or a container-level object SAS. If a 15 | non-container object-level SAS is provided, remote 16 | paths associated with these SAS tokens must be a single entity. 17 | * Non-download skip-on processing cannot be performed for SAS tokens 18 | without sufficient read permission. 19 | * Because policy-based SASes cannot be introspected for permission scope, 20 | `blobxfer` will assume permissions granting read, write, and list. 21 | 22 | ### Client-side Encryption 23 | * Client-side encryption is currently only available for block blobs and 24 | Azure Files. 25 | * Azure KeyVault key references are currently not supported. 26 | 27 | ### Platform-specific 28 | * File attribute store/restore is currently not supported on Windows. 29 | 30 | ### Resume Support 31 | * Encrypted uploads/downloads cannot currently be resumed as the Python 32 | SHA256 object cannot be pickled. 33 | * Append blobs currently cannot be resumed for upload. 34 | 35 | ### `stdin` Limitations 36 | * `stdin` uploads with `--mode` set to `page` without the 37 | `--stdin-as-page-blob-size` parameter will allocate a maximum-sized page 38 | blob and then will be resized once the `stdin` source completes. If such 39 | an upload fails, the page blob will remain maximum sized and will be 40 | charged as such; no cleanup is performed if the upload fails. 41 | * `stdin` sources cannot be resumed. 42 | * `stdin` sources cannot be encrypted. 43 | * `stdin` sources cannot be stripe vectorized for upload. 44 | * For optimal performance, `--chunk-size-bytes` should match the "chunk size" 45 | that is being written to `stdin`. For example, if you were using `dd` you 46 | should set the block size (`bs`) parameter to be the same as the 47 | `--chunk-size-bytes` parameter. 48 | 49 | ### Azure File Limitations 50 | * Empty directories are not created locally when downloading from an Azure 51 | File share which has empty directories. 52 | * Empty directories are not deleted if `--delete` is specified and no files 53 | remain in the directory on the Azure File share. 54 | * Please see [this article](https://msdn.microsoft.com/en-us/library/azure/dn744326.aspx) 55 | for general limitations with Azure File Shares. 56 | 57 | ### Synccopy Limitations 58 | * Synchronous copy between containers and file shares within the same storage 59 | account must be invoked with a storage key or an account-level SAS token with 60 | access to both services on both the source and destination. Single object 61 | copies are not affected. 62 | * Synchronous copy operations between Azure source and remote paths are 63 | limited to transfers within the same Azure cloud (i.e., the source and 64 | destination `endpoint` must match). For example, you can synchonously copy 65 | within or between storage accounts in Azure Public cloud, 66 | e.g., `core.windows.net` but not between Azure clouds, e.g., 67 | `core.windows.net` and `core.usgovcloudapi.net`. Single object copies between 68 | clouds can be performed by providing a full source URL as the `--remote-path`. 69 | 70 | ### Other Limitations 71 | * MD5 is not computed for append blobs. 72 | * Virtual directories in Azure with no characters, e.g. `mycontainer//mydir` 73 | are not supported. 74 | * Downloading of a remote path is based on prefix-matching. Thus a remote path 75 | of `mycontainer/mydir` will also download `mycontainer/mydirfile.txt` and 76 | `mycontainer/mydir1` in addition to `mycontainer/mydir`. To only download 77 | contents of `mycontainer/mydir`, please specify an `--include` filter. For 78 | this example, the include filter would be `--include mydir/*`. Ensure that 79 | the parameter is quoted or shell globbing is disabled. Note that a 80 | remote path of `mycontainer/mydir/` will not work as intended as, internally, 81 | `blobxfer` will strip the trailing slash. 82 | * `/dev/null` or `nul` destinations are not supported. 83 | * Application of access tiers can only be applied to block blobs on either 84 | Blob Storage or General Purpose V2 Storage accounts. Please see 85 | [this article](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-storage-tiers) 86 | for more information. 87 | * A storage object with the same name as a virtual directory is not copied 88 | during a `synccopy` operation. 89 | -------------------------------------------------------------------------------- /docs/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ../CHANGELOG.md -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # blobxfer Documentation and Guides 2 | Please see the 3 | [blobxfer Documentation on Read the Docs](http://blobxfer.readthedocs.io/). 4 | 5 | You can substitute `latest` at the end of the Read the Docs URL to display 6 | documentation for a specific version of blobxfer. If that version does 7 | not exist on Read the Docs, you can select a tagged version on the GitHub 8 | repo page (select the `Branch` pulldown and switch to the `Tags` tab) to 9 | display older version documentation. 10 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /docs/sample_config.yaml: -------------------------------------------------------------------------------- 1 | # configuration version 2 | version: 1 3 | 4 | # general options 5 | options: 6 | log_file: blobxfer.log 7 | progress_bar: true 8 | verbose: false 9 | 10 | # azure storage credentials 11 | azure_storage: 12 | endpoint: core.windows.net 13 | accounts: 14 | mystorageaccount: ABCDE... 15 | 16 | # download specification 17 | download: 18 | - source: 19 | - mystorageaccount: some/remote/path 20 | destination: mybackup/dir 21 | options: 22 | check_file_md5: true 23 | chunk_size_bytes: 4194304 24 | delete_extraneous_destination: false 25 | mode: auto 26 | overwrite: true 27 | recursive: true 28 | rename: false 29 | restore_file_attributes: false 30 | rsa_private_key: my_private_key.pem 31 | rsa_private_key_passphrase: null 32 | skip_on: 33 | filesize_match: false 34 | lmt_ge: false 35 | md5_match: true 36 | 37 | # synccopy specification 38 | synccopy: 39 | - source: 40 | - mystorageaccount: source/path 41 | destination: 42 | - myotherstorageaccount: dest/path 43 | include: 44 | - "*.dat" 45 | exclude: 46 | - "*.tmp" 47 | options: 48 | delete_extraneous_destination: false 49 | dest_mode: auto 50 | mode: auto 51 | overwrite: true 52 | recursive: true 53 | rename: false 54 | skip_on: 55 | filesize_match: false 56 | lmt_ge: false 57 | md5_match: true 58 | 59 | # upload specification 60 | upload: 61 | - source: 62 | - /path/to/source 63 | destination: 64 | - mystorageaccount: destination/dir 65 | include: 66 | - "*.txt" 67 | - "*.bin" 68 | exclude: 69 | - "*.tmp" 70 | options: 71 | mode: auto 72 | chunk_size_bytes: 4194304 73 | delete_extraneous_destination: false 74 | one_shot_bytes: 33554432 75 | overwrite: true 76 | recursive: true 77 | rename: false 78 | rsa_public_key: my_public_key.pem 79 | skip_on: 80 | filesize_match: false 81 | lmt_ge: false 82 | md5_match: true 83 | store_file_properties: 84 | attributes: false 85 | md5: true 86 | strip_components: 0 87 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: blobxfer 2 | site_description: Azure blobxfer 3 | site_url: https://github.com/Azure/blobxfer 4 | repo_url: https://github.com/Azure/blobxfer 5 | theme: readthedocs 6 | 7 | pages: 8 | - Home: index.md 9 | - Installation: 01-installation.md 10 | - Commands and Configuration: 11 | - CLI Commands and Usage: 10-cli-usage.md 12 | - YAML Configuration: 20-yaml-configuration.md 13 | - In-Depth Feature Guides: 14 | - Vectored I/O: 30-vectored-io.md 15 | - Client-side Encryption: 40-client-side-encryption.md 16 | - blobxfer Data Movement Library: 80-blobxfer-python-library.md 17 | - Performance Considerations: 98-performance-considerations.md 18 | - Current Limitations: 99-current-limitations.md 19 | - Change Log: CHANGELOG.md 20 | 21 | markdown_extensions: 22 | - admonition 23 | - codehilite 24 | - fenced_code 25 | - sane_lists 26 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from codecs import open 2 | import re 3 | try: 4 | from setuptools import setup 5 | except ImportError: # noqa 6 | from distutils.core import setup 7 | import sys 8 | 9 | if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv: 10 | long_description = open('README.md', 'r', 'utf-8').read() 11 | else: 12 | long_description = '' 13 | 14 | with open('blobxfer/version.py', 'r', 'utf-8') as fd: 15 | version = re.search( 16 | r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', 17 | fd.read(), re.MULTILINE).group(1) 18 | 19 | if not version or len(version) == 0: 20 | raise RuntimeError('Cannot find version') 21 | 22 | packages = [ 23 | 'blobxfer', 24 | 'blobxfer.models', 25 | 'blobxfer.operations', 26 | 'blobxfer.operations.azure', 27 | 'blobxfer.operations.azure.blob', 28 | 'blobxfer_cli', 29 | ] 30 | 31 | install_requires = [ 32 | 'azure-storage-blob>=2.1.0,<3', 33 | 'azure-storage-file>=2.1.0,<3', 34 | 'bitstring>=3.1.9,<4', 35 | 'click>=8.0.1,<9', 36 | 'cryptography>=3.3.2', 37 | 'python-dateutil>=2.8.2,<3', 38 | 'requests>=2.26.0,<3', 39 | 'ruamel.yaml>=0.17.3', 40 | ] 41 | 42 | setup( 43 | name='blobxfer', 44 | version=version, 45 | author='Microsoft Corporation', 46 | author_email='', 47 | description='Azure storage transfer tool and data movement library', 48 | platforms='any', 49 | url='https://github.com/Azure/blobxfer', 50 | license='MIT', 51 | long_description=long_description, 52 | long_description_content_type='text/markdown', 53 | packages=packages, 54 | package_data={'blobxfer': ['LICENSE']}, 55 | package_dir={'blobxfer': 'blobxfer', 'blobxfer_cli': 'cli'}, 56 | entry_points={ 57 | 'console_scripts': 'blobxfer=blobxfer_cli.cli:cli', 58 | }, 59 | zip_safe=False, 60 | install_requires=install_requires, 61 | tests_require=['pytest'], 62 | classifiers=[ 63 | 'Development Status :: 5 - Production/Stable', 64 | 'Environment :: Console', 65 | 'Intended Audience :: Developers', 66 | 'Intended Audience :: End Users/Desktop', 67 | 'Intended Audience :: Information Technology', 68 | 'Intended Audience :: System Administrators', 69 | 'License :: OSI Approved :: MIT License', 70 | 'Operating System :: OS Independent', 71 | 'Programming Language :: Python :: 3', 72 | 'Topic :: Software Development :: Libraries :: Python Modules', 73 | 'Topic :: Utilities', 74 | ], 75 | keywords=[ 76 | 'azure', 'storage', 'blob', 'files', 'transfer', 'copy', 'smb', 77 | 'cifs', 'blobxfer', 'azcopy' 78 | ], 79 | ) 80 | -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | coverage==5.5 2 | flake8==3.9.2 3 | pytest==6.2.5 4 | pytest-cov==2.12.1 5 | -------------------------------------------------------------------------------- /tests/test_blobxfer.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for miscellaneous""" 3 | 4 | # stdlib imports 5 | # non-stdlib imports 6 | import azure.storage.common 7 | # module under test 8 | import blobxfer.version 9 | 10 | 11 | def test_user_agent_monkey_patch(): 12 | verstr = 'blobxfer/{}'.format(blobxfer.version.__version__) 13 | assert azure.storage.common._constants.USER_AGENT_STRING_PREFIX.startswith( 14 | verstr) 15 | -------------------------------------------------------------------------------- /tests/test_blobxfer_models_azure.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for models azure""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | # non-stdlib imports 7 | import azure.storage.blob 8 | import azure.storage.file 9 | # local imports 10 | import blobxfer.models.crypto 11 | # module under test 12 | import blobxfer.models.azure as azmodels 13 | 14 | 15 | def test_azurestorageentity(): 16 | ase = azmodels.StorageEntity('cont') 17 | assert ase.container == 'cont' 18 | assert ase.encryption_metadata is None 19 | 20 | blob = mock.MagicMock() 21 | blob.name = 'name' 22 | blob.snapshot = None 23 | blob.properties = mock.MagicMock() 24 | blob.properties.last_modified = 'lmt' 25 | blob.properties.content_length = 123 26 | blob.properties.content_settings = mock.MagicMock() 27 | blob.properties.content_settings.content_md5 = 'abc' 28 | blob.properties.blob_type = azure.storage.blob.models._BlobTypes.BlockBlob 29 | blob.properties.blob_tier = 'Cool' 30 | ase.populate_from_blob(mock.MagicMock(), blob) 31 | 32 | assert ase.can_create_containers is not None 33 | assert ase.client is not None 34 | assert ase.name == 'name' 35 | assert ase.lmt == 'lmt' 36 | assert ase.size == 123 37 | assert ase.md5 == 'abc' 38 | assert not ase.from_local 39 | assert ase.append_create 40 | assert ase.encryption_metadata is None 41 | assert ase.raw_metadata is None 42 | assert ase.snapshot is None 43 | assert ase.access_tier == 'Cool' 44 | assert ase.mode == azmodels.StorageModes.Block 45 | 46 | blob.properties.blob_type = azure.storage.blob.models._BlobTypes.AppendBlob 47 | blob.metadata = '{}' 48 | ase.populate_from_blob(mock.MagicMock(), blob, store_raw_metadata=True) 49 | assert ase.mode == azmodels.StorageModes.Append 50 | assert ase.raw_metadata == blob.metadata 51 | 52 | blob.properties.blob_type = azure.storage.blob.models._BlobTypes.PageBlob 53 | blob.metadata = None 54 | blob.snapshot = 'abc' 55 | ase.populate_from_blob(mock.MagicMock(), blob) 56 | assert ase.mode == azmodels.StorageModes.Page 57 | assert ase.snapshot is not None 58 | 59 | blob.snapshot = None 60 | ase.populate_from_file(mock.MagicMock(), blob, 'path') 61 | assert ase.mode == azmodels.StorageModes.File 62 | assert ase.snapshot is None 63 | 64 | blob.metadata = '{}' 65 | ase.populate_from_file( 66 | mock.MagicMock(), blob, None, store_raw_metadata=True) 67 | assert ase.mode == azmodels.StorageModes.File 68 | assert ase.raw_metadata == blob.metadata 69 | assert ase.name == blob.name 70 | 71 | ase.populate_from_local( 72 | mock.MagicMock(), 'cont', 'path', azmodels.StorageModes.Append, 'cc', 73 | 'ct') 74 | assert ase.from_local 75 | assert ase.mode == azmodels.StorageModes.Append 76 | assert ase.cache_control == 'cc' 77 | assert ase.content_type == 'ct' 78 | 79 | ase.populate_from_local( 80 | mock.MagicMock(), 'cont', 'path', azmodels.StorageModes.Block, None, 81 | None) 82 | assert ase.from_local 83 | assert ase.mode == azmodels.StorageModes.Block 84 | assert ase.cache_control is None 85 | assert ase.content_type == 'application/octet-stream' 86 | 87 | ase.populate_from_local( 88 | mock.MagicMock(), 'cont', 'path', azmodels.StorageModes.File, None, 89 | None) 90 | assert ase.from_local 91 | assert ase.mode == azmodels.StorageModes.File 92 | 93 | ase.populate_from_local( 94 | mock.MagicMock(), 'cont', 'path', azmodels.StorageModes.Page, None, 95 | None) 96 | assert ase.from_local 97 | assert ase.mode == azmodels.StorageModes.Page 98 | 99 | ase.populate_from_local( 100 | mock.MagicMock(), 'cont', 'path.vhdx', azmodels.StorageModes.Auto, 101 | None, None) 102 | assert ase.from_local 103 | assert ase.mode == azmodels.StorageModes.Page 104 | 105 | ase.populate_from_local( 106 | mock.MagicMock(), 'cont', 'path.bin', azmodels.StorageModes.Auto, 107 | None, None) 108 | assert ase.from_local 109 | assert ase.mode == azmodels.StorageModes.Block 110 | 111 | ase.size = 456 112 | ase.append_create = False 113 | ase.encryption_metadata = blobxfer.models.crypto.EncryptionMetadata() 114 | assert ase.size == 456 115 | assert not ase.append_create 116 | assert ase.encryption_metadata is not None 117 | 118 | ase = azmodels.StorageEntity(container=None) 119 | ase.populate_from_arbitrary_url('https://host/remote/path', 10) 120 | assert ase.is_arbitrary_url 121 | assert ase.size == 10 122 | assert ase.path == 'https://host/remote/path' 123 | -------------------------------------------------------------------------------- /tests/test_blobxfer_models_crypto.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for crypto models""" 3 | 4 | # stdlib imports 5 | import copy 6 | import json 7 | # non-stdlib imports 8 | import pytest 9 | # local imports 10 | # module under test 11 | import blobxfer.models.crypto as models 12 | import blobxfer.operations.crypto as ops 13 | 14 | 15 | _SAMPLE_RSA_KEY = """ 16 | -----BEGIN RSA PRIVATE KEY----- 17 | MIICXQIBAAKBgQDwlQ0W6O2ixhZM+LYl/ZtUi4lpjFu6+Kt/fyim/LQojaa389yD 18 | e3lqWnAitj13n8uLpv1XuysG2fL+G0AvzT9JJj8gageJRC/8uffhOlxvH/vzfFqU 19 | wQEgwhuv9LXdFcl+mON4TiHqbKsUmggNNPNzSN/P0aohMG8pG8ihyO3uOQIDAQAB 20 | AoGBAIkaKA96RpKQmHzc79DOqgqQSorf9hajR/ismpovQOwrbWs/iddUMmktiOH/ 21 | QSA+7Fx1mcK5Y1fQNO4i0X1sVjdasoPvmU7iGVgHQ9TX6F5LGQtDqAKXAH6GpjkF 22 | V7I7nEBs2vtetpzzq8up2nY7fuwPwse44jdLGZjh1pc0HcFRAkEA/F5XdWq5ZYVo 23 | hMyxxhdb+6J8NKZTsWn92tW0s/pGlkgDwrryglpLqNf9MR+Mm906UUVh6ZmsKoxD 24 | kZzA+4S3bwJBAPQLSryk8CUE0uFviYYANq3asn9sDDTGcvEceSGGwbaZOTDVQNQg 25 | 7BhLL5vA8Be/xvkXfEaWa1XipmaBI+4WINcCQGQLEiid0jkIldJvQtoAUJqEYzCL 26 | 7wmZtuSVazkdsfXJPpRnf9Nk8DFSzjA3DYqMPJ4THyl3neSQDgkfVvFeP0kCQQDu 27 | 0OIJKwsJ3ueSznhw1mKrzTkh8pUbTBwNEQUEpv+H9fd+byGqtLD1sRXcwHjzdKt8 28 | 9Nubo/VTraGS68tCYQsvAkAYxzwSeX7Gj9/mMBFx1Y5v9sSCqLZQeF7q1ltzkwlK 29 | n3by7Z7RvxXXPjv1YoFQPV0WlA6zo4sm0HwFzA0sbOql 30 | -----END RSA PRIVATE KEY----- 31 | """ 32 | 33 | _SAMPLE_ED = \ 34 | { 35 | "BlobxferExtensions": { 36 | "PreEncryptedContentMD5": "tc+p1sj+vWGPkawoQ9UKHA==" 37 | }, 38 | "ContentEncryptionIV": "KjA4Y14+J1p7EJcYWhnKNQ==", 39 | "EncryptionAgent": { 40 | "EncryptionAlgorithm": "AES_CBC_256", 41 | "Protocol": "1.0" 42 | }, 43 | "EncryptionAuthentication": { 44 | "Algorithm": "HMAC-SHA256", 45 | "MessageAuthenticationCode": 46 | "9oKt5Ett7t1AWahxNq3qcGd5NbZMxLtzSN8Lwqy3PgU=" 47 | }, 48 | "EncryptionMode": "FullBlob", 49 | "KeyWrappingMetadata": {}, 50 | "WrappedContentKey": { 51 | "Algorithm": "RSA-OAEP", 52 | "EncryptedAuthenticationKey": 53 | "1kO63RxIqIyUp1EW+v2o5VwyhAlrrJiLc+seXnNcVRm0YLHzJYqOrBCz2+" 54 | "c2do2dJKhzTOXyPsJSwkvQVJ0NuYVUTxf6bzDNip2Ge1jTHnsd5IsljMKy" 55 | "rSAvHaKs9NxdvDu5Ex6lhKEChnuMtJBq52zCML5+LUd98WkBxdB2az4=", 56 | "EncryptedKey": 57 | "yOuWT2txNNzOITtDcjV1Uf3/V+TRn5AKjvOtHt+PRuBgMhq6fOFV8kcJhO" 58 | "zPxh8bHqydIFM2OQ+ktiETQ5Ibg7OA24hhr+n8Y6nJNpw3cGtP6L/23n8a" 59 | "a7RMKhmactl3sToFM3xvaXRO0DYuDZeQtPR/DDKPgi2gK641y1THAoc=", 60 | "KeyId": "private:key1" 61 | } 62 | } 63 | 64 | _SAMPLE_EDA = \ 65 | { 66 | "EncryptionMetadataAuthentication": { 67 | "Algorithm": "HMAC-SHA256", 68 | "Encoding": "UTF-8", 69 | "MessageAuthenticationCode": 70 | "BhJjehtHxgSRIBaITDB6o6ZUt6mdehN0PDkhHtwXTP8=" 71 | } 72 | } 73 | 74 | 75 | def test_encryption_metadata_exists(): 76 | md = None 77 | assert not models.EncryptionMetadata.encryption_metadata_exists(md) 78 | 79 | md = {} 80 | assert not models.EncryptionMetadata.encryption_metadata_exists(md) 81 | 82 | md = {'encryptiondata': {}} 83 | assert not models.EncryptionMetadata.encryption_metadata_exists(md) 84 | 85 | md = {'encryptiondata': {'key': 'value'}} 86 | assert models.EncryptionMetadata.encryption_metadata_exists(md) 87 | 88 | 89 | def test_create_new_metadata(): 90 | em = models.EncryptionMetadata() 91 | em.create_new_metadata('key') 92 | 93 | assert em._rsa_public_key == 'key' 94 | assert em.symmetric_key is not None 95 | assert em.signing_key is not None 96 | assert em.content_encryption_iv is not None 97 | assert em.encryption_agent is not None 98 | assert em.encryption_mode is not None 99 | 100 | 101 | def test_convert_from_json(tmpdir): 102 | keyfile = tmpdir.join('keyfile') 103 | keyfile.write(_SAMPLE_RSA_KEY) 104 | rsaprivatekey = ops.load_rsa_private_key_file(str(keyfile), None) 105 | 106 | # test various missing metadata fields 107 | ced = copy.deepcopy(_SAMPLE_ED) 108 | ced['EncryptionAgent']['EncryptionAlgorithm'] = 'OOPS' 109 | md = { 110 | 'encryptiondata': json.dumps( 111 | ced, sort_keys=True, ensure_ascii=False), 112 | 'encryptiondata_authentication': json.dumps(_SAMPLE_EDA) 113 | } 114 | em = models.EncryptionMetadata() 115 | with pytest.raises(RuntimeError): 116 | em.convert_from_json(md, 'blob', rsaprivatekey) 117 | 118 | ced = copy.deepcopy(_SAMPLE_ED) 119 | ced['EncryptionAgent']['Protocol'] = 'OOPS' 120 | md = { 121 | 'encryptiondata': json.dumps( 122 | ced, sort_keys=True, ensure_ascii=False), 123 | 'encryptiondata_authentication': json.dumps(_SAMPLE_EDA) 124 | } 125 | em = models.EncryptionMetadata() 126 | with pytest.raises(RuntimeError): 127 | em.convert_from_json(md, 'blob', rsaprivatekey) 128 | 129 | ced = copy.deepcopy(_SAMPLE_ED) 130 | ced['EncryptionAuthentication']['Algorithm'] = 'OOPS' 131 | md = { 132 | 'encryptiondata': json.dumps( 133 | ced, sort_keys=True, ensure_ascii=False), 134 | 'encryptiondata_authentication': json.dumps(_SAMPLE_EDA) 135 | } 136 | em = models.EncryptionMetadata() 137 | with pytest.raises(RuntimeError): 138 | em.convert_from_json(md, 'blob', rsaprivatekey) 139 | 140 | ced = copy.deepcopy(_SAMPLE_ED) 141 | ced['EncryptionMode'] = 'OOPS' 142 | md = { 143 | 'encryptiondata': json.dumps( 144 | ced, sort_keys=True, ensure_ascii=False), 145 | 'encryptiondata_authentication': json.dumps(_SAMPLE_EDA) 146 | } 147 | em = models.EncryptionMetadata() 148 | with pytest.raises(RuntimeError): 149 | em.convert_from_json(md, 'blob', rsaprivatekey) 150 | 151 | ced = copy.deepcopy(_SAMPLE_ED) 152 | ced['WrappedContentKey'].pop('EncryptedAuthenticationKey') 153 | ced['WrappedContentKey']['Algorithm'] = 'OOPS' 154 | md = { 155 | 'encryptiondata': json.dumps( 156 | ced, sort_keys=True, ensure_ascii=False), 157 | 'encryptiondata_authentication': json.dumps(_SAMPLE_EDA) 158 | } 159 | em = models.EncryptionMetadata() 160 | with pytest.raises(RuntimeError): 161 | em.convert_from_json(md, 'blob', rsaprivatekey) 162 | 163 | ceda = copy.deepcopy(_SAMPLE_EDA) 164 | ceda['EncryptionMetadataAuthentication']['Algorithm'] = 'OOPS' 165 | md = { 166 | 'encryptiondata': json.dumps( 167 | _SAMPLE_ED, sort_keys=True, ensure_ascii=False), 168 | 'encryptiondata_authentication': json.dumps(ceda) 169 | } 170 | em = models.EncryptionMetadata() 171 | with pytest.raises(RuntimeError): 172 | em.convert_from_json(md, 'blob', rsaprivatekey) 173 | 174 | # test failed hmac 175 | ced = copy.deepcopy(_SAMPLE_ED) 176 | ced.pop('BlobxferExtensions') 177 | md = { 178 | 'encryptiondata': json.dumps( 179 | ced, sort_keys=True, ensure_ascii=False), 180 | 'encryptiondata_authentication': json.dumps(_SAMPLE_EDA) 181 | } 182 | em = models.EncryptionMetadata() 183 | with pytest.raises(RuntimeError): 184 | em.convert_from_json(md, 'blob', rsaprivatekey) 185 | 186 | # test correct path 187 | md = { 188 | 'encryptiondata': json.dumps( 189 | _SAMPLE_ED, sort_keys=True, ensure_ascii=False), 190 | 'encryptiondata_authentication': json.dumps(_SAMPLE_EDA) 191 | } 192 | em = models.EncryptionMetadata() 193 | em.convert_from_json(md, 'blob', rsaprivatekey) 194 | hmac = em.initialize_hmac() 195 | assert em.wrapped_content_key is not None 196 | assert em._symkey == em.symmetric_key 197 | assert em._signkey == em.signing_key 198 | assert em._symkey is not None 199 | assert em._signkey is not None 200 | assert hmac is not None 201 | 202 | em = models.EncryptionMetadata() 203 | em.convert_from_json(md, 'blob', None) 204 | assert em.wrapped_content_key is not None 205 | assert em._symkey is None 206 | assert em._signkey is None 207 | 208 | ced = copy.deepcopy(_SAMPLE_ED) 209 | ced['WrappedContentKey'].pop('EncryptedAuthenticationKey') 210 | md = { 211 | 'encryptiondata': json.dumps( 212 | ced, sort_keys=True, ensure_ascii=False) 213 | } 214 | em = models.EncryptionMetadata() 215 | em.convert_from_json(md, 'blob', rsaprivatekey) 216 | hmac = em.initialize_hmac() 217 | assert em.wrapped_content_key is not None 218 | assert em._symkey is not None 219 | assert em._signkey is None 220 | assert hmac is None 221 | 222 | 223 | def test_convert_to_json_with_mac(tmpdir): 224 | keyfile = tmpdir.join('keyfile') 225 | keyfile.write(_SAMPLE_RSA_KEY) 226 | rsaprivatekey = ops.load_rsa_private_key_file(str(keyfile), None) 227 | rsapublickey = rsaprivatekey.public_key() 228 | 229 | em = models.EncryptionMetadata() 230 | em.create_new_metadata(rsapublickey) 231 | symkey = em._symkey 232 | signkey = em._signkey 233 | 234 | encjson = em.convert_to_json_with_mac('md5digest', 'hmacdigest') 235 | assert encjson is not None 236 | em.convert_from_json(encjson, 'entityname', rsaprivatekey) 237 | assert em._symkey == symkey 238 | assert em._signkey == signkey 239 | -------------------------------------------------------------------------------- /tests/test_blobxfer_models_metadata.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for models metadata""" 3 | 4 | # stdlib imports 5 | import json 6 | import unittest.mock as mock 7 | # non-stdlib imports 8 | import pytest 9 | # module under test 10 | import blobxfer.models.metadata as md 11 | 12 | 13 | class AseAE(object): 14 | def __init__(self): 15 | self.is_encrypted = True 16 | self.md5 = 'aseae' 17 | 18 | 19 | def test_get_md5_from_metadata(): 20 | ase = mock.MagicMock() 21 | ase.is_encrypted = True 22 | ase.encryption_metadata.blobxfer_extensions.pre_encrypted_content_md5 = \ 23 | 'premd5' 24 | assert md.get_md5_from_metadata(ase) == 'premd5' 25 | 26 | ase.is_encrypted = False 27 | ase.md5 = 'md5' 28 | assert md.get_md5_from_metadata(ase) == 'md5' 29 | 30 | ase = AseAE() 31 | asemd5 = md.get_md5_from_metadata(ase) 32 | assert asemd5 == 'aseae' 33 | 34 | 35 | def test_generate_fileattr_metadata(): 36 | with mock.patch('blobxfer.util.on_windows', return_value=True): 37 | md._FILEATTR_WARNED_ON_WINDOWS = False 38 | assert md.generate_fileattr_metadata(None, None) is None 39 | assert md._FILEATTR_WARNED_ON_WINDOWS 40 | 41 | with mock.patch('blobxfer.util.on_windows', return_value=False): 42 | lp = mock.MagicMock() 43 | lp.mode = 'mode' 44 | lp.uid = 0 45 | lp.gid = 0 46 | 47 | ret = md.generate_fileattr_metadata(lp, {}) 48 | assert len(ret) > 0 49 | assert md._JSON_KEY_FILE_ATTRIBUTES in ret 50 | assert md._JSON_KEY_FILE_ATTRIBUTES_POSIX in ret[ 51 | md._JSON_KEY_FILE_ATTRIBUTES] 52 | assert ret[md._JSON_KEY_FILE_ATTRIBUTES][ 53 | md._JSON_KEY_FILE_ATTRIBUTES_POSIX][ 54 | md._JSON_KEY_FILE_ATTRIBUTES_MODE] == lp.mode 55 | assert ret[md._JSON_KEY_FILE_ATTRIBUTES][ 56 | md._JSON_KEY_FILE_ATTRIBUTES_POSIX][ 57 | md._JSON_KEY_FILE_ATTRIBUTES_UID] == lp.uid 58 | assert ret[md._JSON_KEY_FILE_ATTRIBUTES][ 59 | md._JSON_KEY_FILE_ATTRIBUTES_POSIX][ 60 | md._JSON_KEY_FILE_ATTRIBUTES_GID] == lp.gid 61 | 62 | 63 | def test_fileattr_from_metadata(): 64 | assert md.fileattr_from_metadata(None) is None 65 | 66 | with mock.patch('blobxfer.util.on_windows', return_value=True): 67 | md._FILEATTR_WARNED_ON_WINDOWS = False 68 | val = { 69 | md.JSON_KEY_BLOBXFER_METADATA: json.dumps( 70 | {md._JSON_KEY_FILE_ATTRIBUTES: {}}) 71 | } 72 | assert md.fileattr_from_metadata(val) is None 73 | assert md._FILEATTR_WARNED_ON_WINDOWS 74 | 75 | with mock.patch('blobxfer.util.on_windows', return_value=False): 76 | lp = mock.MagicMock() 77 | lp.mode = 'mode' 78 | lp.uid = 0 79 | lp.gid = 0 80 | 81 | val = { 82 | md.JSON_KEY_BLOBXFER_METADATA: json.dumps( 83 | md.generate_fileattr_metadata(lp, {})) 84 | } 85 | assert md.fileattr_from_metadata(val) is not None 86 | 87 | val = { 88 | md.JSON_KEY_BLOBXFER_METADATA: json.dumps( 89 | {md._JSON_KEY_FILE_ATTRIBUTES: {}}) 90 | } 91 | assert md.fileattr_from_metadata(val) is None 92 | 93 | 94 | def test_create_vecotred_io_next_entry(): 95 | ase = mock.MagicMock() 96 | ase.client.primary_endpoint = 'ep' 97 | ase.container = 'cont' 98 | ase.name = 'name' 99 | 100 | assert md.create_vectored_io_next_entry(ase) == 'ep;cont;name' 101 | 102 | 103 | def test_explode_vectored_io_next_entry(): 104 | entry = 'sa.blob.core.windows.net;cont;name;' 105 | 106 | vne = md.explode_vectored_io_next_entry(entry) 107 | assert vne.storage_account_name == 'sa' 108 | assert vne.endpoint == 'core.windows.net' 109 | assert vne.container == 'cont' 110 | assert vne.name == 'name' 111 | 112 | 113 | def test_remove_vectored_io_slice_suffix_from_name(): 114 | name = 'abc.bxslice-100' 115 | assert md.remove_vectored_io_slice_suffix_from_name(name, 100) == 'abc' 116 | 117 | name = 'abc.bob' 118 | assert md.remove_vectored_io_slice_suffix_from_name(name, 0) == 'abc.bob' 119 | 120 | 121 | def test_generate_vectored_io_stripe_metadata(): 122 | lp = mock.MagicMock() 123 | lp.total_size = 100 124 | lp.view.fd_start = 0 125 | lp.view.total_slices = 2 126 | lp.view.slice_num = 0 127 | lp.view.next = 'next' 128 | 129 | ret = md.generate_vectored_io_stripe_metadata(lp, {}) 130 | assert len(ret) > 0 131 | assert md._JSON_KEY_VECTORED_IO in ret 132 | assert md._JSON_KEY_VECTORED_IO_STRIPE == ret[md._JSON_KEY_VECTORED_IO][ 133 | md._JSON_KEY_VECTORED_IO_MODE] 134 | assert ret[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_STRIPE][ 135 | md._JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SIZE] == lp.total_size 136 | assert ret[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_STRIPE][ 137 | md._JSON_KEY_VECTORED_IO_STRIPE_OFFSET_START] == lp.view.fd_start 138 | assert ret[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_STRIPE][ 139 | md._JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SLICES] == lp.view.total_slices 140 | assert ret[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_STRIPE][ 141 | md._JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID] == lp.view.slice_num 142 | assert ret[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_STRIPE][ 143 | md._JSON_KEY_VECTORED_IO_STRIPE_NEXT] == lp.view.next 144 | 145 | 146 | def test_vectored_io_from_metadata(): 147 | assert md.vectored_io_from_metadata(None) is None 148 | 149 | lp = mock.MagicMock() 150 | lp.total_size = 100 151 | lp.view.fd_start = 0 152 | lp.view.total_slices = 2 153 | lp.view.slice_num = 0 154 | lp.view.next = 'sa.blob.core.windows.net;cont;name;' 155 | 156 | val = { 157 | md.JSON_KEY_BLOBXFER_METADATA: json.dumps( 158 | md.generate_vectored_io_stripe_metadata(lp, {})) 159 | } 160 | vio = md.vectored_io_from_metadata(val) 161 | assert vio.total_size == lp.total_size 162 | assert vio.offset_start == lp.view.fd_start 163 | assert vio.total_slices == lp.view.total_slices 164 | assert vio.slice_id == lp.view.slice_num 165 | assert type(vio.next) == md.VectoredNextEntry 166 | 167 | lp = mock.MagicMock() 168 | lp.total_size = 100 169 | lp.view.fd_start = 0 170 | lp.view.total_slices = 2 171 | lp.view.slice_num = 0 172 | lp.view.next = None 173 | 174 | val = { 175 | md.JSON_KEY_BLOBXFER_METADATA: json.dumps( 176 | md.generate_vectored_io_stripe_metadata(lp, {})) 177 | } 178 | vio = md.vectored_io_from_metadata(val) 179 | assert vio.total_size == lp.total_size 180 | assert vio.offset_start == lp.view.fd_start 181 | assert vio.total_slices == lp.view.total_slices 182 | assert vio.slice_id == lp.view.slice_num 183 | assert vio.next is None 184 | 185 | tmp = md.generate_vectored_io_stripe_metadata(lp, {}) 186 | tmp[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_MODE] = 'oops' 187 | val = { 188 | md.JSON_KEY_BLOBXFER_METADATA: json.dumps(tmp) 189 | } 190 | with pytest.raises(RuntimeError): 191 | md.vectored_io_from_metadata(val) 192 | -------------------------------------------------------------------------------- /tests/test_blobxfer_models_offload.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for offload""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | # non-stdlib imports 7 | import pytest 8 | # local imports 9 | import blobxfer.util as util 10 | # module under test 11 | import blobxfer.models.offload as offload 12 | 13 | 14 | class PicklableMagicMock(mock.MagicMock): 15 | def __reduce__(self): 16 | return (mock.MagicMock, ()) 17 | 18 | 19 | def test_multiprocess_offload(): 20 | with pytest.raises(ValueError): 21 | a = offload._MultiprocessOffload(None, None) 22 | 23 | if util.on_linux(): 24 | target = mock.MagicMock() 25 | else: 26 | target = PicklableMagicMock() 27 | a = offload._MultiprocessOffload(target, 1, 'test') 28 | assert len(a._procs) == 1 29 | assert not a.terminated 30 | assert a._done_cv == a.done_cv 31 | assert a._check_thread is None 32 | assert a.pop_done_queue() is None 33 | 34 | item = (0, 'abc') 35 | a._done_queue.put(item) 36 | 37 | check_func = mock.MagicMock() 38 | a.initialize_check_thread(check_func) 39 | 40 | a.finalize_processes() 41 | assert a.terminated 42 | for proc in a._procs: 43 | assert not proc.is_alive() 44 | 45 | assert a.pop_done_queue() == item 46 | -------------------------------------------------------------------------------- /tests/test_blobxfer_models_options.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for models options""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | import pathlib 7 | # non-stdlib imports 8 | import pytest 9 | # module under test 10 | import blobxfer.models.options as options 11 | 12 | 13 | def test_timeout(): 14 | a = options.Timeout(connect=None, read=1, max_retries=-1) 15 | assert a.connect == options._DEFAULT_REQUESTS_TIMEOUT[0] 16 | assert a.read == 1 17 | assert a.timeout == (options._DEFAULT_REQUESTS_TIMEOUT[0], 1) 18 | assert a.max_retries is None 19 | 20 | a = options.Timeout(connect=2, read=0, max_retries=3) 21 | assert a.connect == 2 22 | assert a.read == options._DEFAULT_REQUESTS_TIMEOUT[1] 23 | assert a.timeout == (2, options._DEFAULT_REQUESTS_TIMEOUT[1]) 24 | assert a.max_retries == 3 25 | 26 | 27 | @mock.patch('multiprocessing.cpu_count', return_value=1) 28 | def test_concurrency_options(patched_cc): 29 | a = options.Concurrency( 30 | crypto_processes=-1, 31 | md5_processes=0, 32 | disk_threads=-1, 33 | transfer_threads=-2, 34 | ) 35 | 36 | assert a.crypto_processes == 0 37 | assert a.md5_processes == 1 38 | assert a.disk_threads == 2 39 | assert a.transfer_threads == 4 40 | 41 | a = options.Concurrency( 42 | crypto_processes=-1, 43 | md5_processes=0, 44 | disk_threads=1, 45 | transfer_threads=-1, 46 | ) 47 | 48 | assert a.crypto_processes == 0 49 | assert a.md5_processes == 1 50 | assert a.disk_threads == 1 51 | assert a.transfer_threads == 4 52 | 53 | 54 | @mock.patch('multiprocessing.cpu_count', return_value=64) 55 | def test_concurrency_options_max_disk_and_transfer_threads(patched_cc): 56 | a = options.Concurrency( 57 | crypto_processes=1, 58 | md5_processes=1, 59 | disk_threads=None, 60 | transfer_threads=None, 61 | ) 62 | 63 | assert a.disk_threads == 64 64 | assert a.transfer_threads == 96 65 | 66 | a = options.Concurrency( 67 | crypto_processes=1, 68 | md5_processes=1, 69 | disk_threads=None, 70 | transfer_threads=None, 71 | action=1, 72 | ) 73 | 74 | assert a.disk_threads == 45 75 | assert a.transfer_threads == 30 76 | 77 | a = options.Concurrency( 78 | crypto_processes=1, 79 | md5_processes=1, 80 | disk_threads=None, 81 | transfer_threads=None, 82 | action=3, 83 | ) 84 | 85 | assert a.md5_processes == 0 86 | assert a.crypto_processes == 0 87 | assert a.disk_threads == 0 88 | assert a.transfer_threads == 96 89 | 90 | 91 | @mock.patch('multiprocessing.cpu_count', return_value=6) 92 | def test_concurrency_options_max_disk_and_transfer_threads2(patched_cc): 93 | a = options.Concurrency( 94 | crypto_processes=1, 95 | md5_processes=1, 96 | disk_threads=None, 97 | transfer_threads=None, 98 | action=1, 99 | ) 100 | 101 | assert a.disk_threads == 4 102 | assert a.transfer_threads == 3 103 | 104 | 105 | def test_general_options(): 106 | a = options.General( 107 | concurrency=options.Concurrency( 108 | crypto_processes=1, 109 | md5_processes=2, 110 | disk_threads=3, 111 | transfer_threads=4, 112 | ), 113 | log_file='abc.log', 114 | progress_bar=False, 115 | resume_file='abc', 116 | timeout=options.Timeout(1, 2, None), 117 | quiet=True, 118 | dry_run=True, 119 | verbose=True, 120 | ) 121 | 122 | assert a.concurrency.crypto_processes == 1 123 | assert a.concurrency.md5_processes == 2 124 | assert a.concurrency.disk_threads == 3 125 | assert a.concurrency.transfer_threads == 4 126 | assert a.log_file == 'abc.log' 127 | assert not a.progress_bar 128 | assert a.resume_file == pathlib.Path('abc') 129 | assert a.timeout.timeout == (1, 2) 130 | assert a.timeout.max_retries is None 131 | assert a.quiet 132 | assert a.dry_run 133 | assert a.verbose 134 | 135 | a = options.General( 136 | concurrency=options.Concurrency( 137 | crypto_processes=1, 138 | md5_processes=2, 139 | disk_threads=3, 140 | transfer_threads=4, 141 | ), 142 | progress_bar=False, 143 | resume_file=None, 144 | timeout=options.Timeout(2, 1, 0), 145 | verbose=True, 146 | ) 147 | 148 | assert a.concurrency.crypto_processes == 1 149 | assert a.concurrency.md5_processes == 2 150 | assert a.concurrency.disk_threads == 3 151 | assert a.concurrency.transfer_threads == 4 152 | assert a.log_file is None 153 | assert not a.progress_bar 154 | assert a.resume_file is None 155 | assert a.timeout.timeout == (2, 1) 156 | assert a.timeout.max_retries == 0 157 | assert not a.quiet 158 | assert not a.dry_run 159 | assert a.verbose 160 | 161 | with pytest.raises(ValueError): 162 | a = options.General(None) 163 | -------------------------------------------------------------------------------- /tests/test_blobxfer_models_resume.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for models resume""" 3 | 4 | # stdlib imports 5 | # non-stdlib imports 6 | # module under test 7 | import blobxfer.models.resume as rmodels 8 | 9 | 10 | def test_download(): 11 | d = rmodels.Download('fp', 1, 2, 0, False, '') 12 | assert d.final_path == 'fp' 13 | assert d.length == 1 14 | assert d.chunk_size == 2 15 | assert d.next_integrity_chunk == 0 16 | assert not d.completed 17 | assert d.md5hexdigest == '' 18 | 19 | d.md5hexdigest = None 20 | assert d.md5hexdigest == '' 21 | 22 | d.md5hexdigest = 'abc' 23 | assert d.md5hexdigest == 'abc' 24 | 25 | d.next_integrity_chunk = 1 26 | assert d.next_integrity_chunk == 1 27 | 28 | d.completed = True 29 | assert d.completed 30 | 31 | assert len(str(d)) > 0 32 | 33 | 34 | def test_upload(): 35 | u = rmodels.Upload('lp', 1, 2, 2, 0, False, '') 36 | assert u.local_path == 'lp' 37 | assert u.length == 1 38 | assert u.chunk_size == 2 39 | assert u.total_chunks == 2 40 | assert u.completed_chunks == 0 41 | assert not u.completed 42 | assert u.md5hexdigest == '' 43 | 44 | u.md5hexdigest = None 45 | assert u.md5hexdigest == '' 46 | 47 | u.md5hexdigest = 'abc' 48 | assert u.md5hexdigest == 'abc' 49 | 50 | u.completed_chunks = 1 51 | assert u.completed_chunks == 1 52 | 53 | u.completed = True 54 | assert u.completed 55 | 56 | assert len(str(u)) > 0 57 | 58 | 59 | def test_synccopy(): 60 | s = rmodels.SyncCopy(1, [], 0, 2, 2, 0, False) 61 | assert s.length == 1 62 | assert len(s.src_block_list) == 0 63 | assert s.offset == 0 64 | assert s.chunk_size == 2 65 | assert s.total_chunks == 2 66 | assert s.completed_chunks == 0 67 | assert not s.completed 68 | 69 | s.offset = 1 70 | assert s.offset == 1 71 | 72 | s.completed_chunks = 1 73 | assert s.completed_chunks == 1 74 | 75 | s.completed = True 76 | assert s.completed 77 | 78 | assert len(str(s)) > 0 79 | -------------------------------------------------------------------------------- /tests/test_blobxfer_operations_azure_blob.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for general blob operations""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | # non-stdlib imports 7 | import azure.common 8 | import azure.storage.blob 9 | import pytest 10 | # local imports 11 | import blobxfer.models.azure as azmodels 12 | # module under test 13 | import blobxfer.operations.azure.blob as ops 14 | 15 | 16 | def test_check_if_single_blob(): 17 | client = mock.MagicMock() 18 | client.get_blob_properties.return_value = True 19 | 20 | result = ops.check_if_single_blob(client, 'a', 'b/c') 21 | assert result 22 | 23 | result = ops.check_if_single_blob( 24 | client, 'a', 'a?snapshot=2017-02-23T22:21:14.8121864Z') 25 | assert result 26 | 27 | client = mock.MagicMock() 28 | client.get_blob_properties = mock.MagicMock() 29 | client.get_blob_properties.side_effect = \ 30 | azure.common.AzureMissingResourceHttpError('msg', 404) 31 | 32 | result = ops.check_if_single_blob(client, 'a', 'b/c') 33 | assert not result 34 | 35 | 36 | def test_get_blob_properties(): 37 | with pytest.raises(RuntimeError): 38 | ops.get_blob_properties( 39 | None, 'cont', None, azmodels.StorageModes.File) 40 | 41 | client = mock.MagicMock() 42 | blob = mock.MagicMock() 43 | client.get_blob_properties.side_effect = \ 44 | azure.common.AzureMissingResourceHttpError('msg', 'code') 45 | 46 | ret = ops.get_blob_properties( 47 | client, 'cont', None, azmodels.StorageModes.Append) 48 | assert ret is None 49 | 50 | blob = mock.MagicMock() 51 | blob.properties.blob_type = azure.storage.blob.models._BlobTypes.PageBlob 52 | client = mock.MagicMock() 53 | client.get_blob_properties.return_value = blob 54 | 55 | with pytest.raises(RuntimeError): 56 | ops.get_blob_properties( 57 | client, 'cont', None, azmodels.StorageModes.Append) 58 | 59 | with pytest.raises(RuntimeError): 60 | ops.get_blob_properties( 61 | client, 'cont', None, azmodels.StorageModes.Block) 62 | 63 | blob.properties.blob_type = azure.storage.blob.models._BlobTypes.BlockBlob 64 | with pytest.raises(RuntimeError): 65 | ops.get_blob_properties( 66 | client, 'cont', None, azmodels.StorageModes.Page) 67 | 68 | ret = ops.get_blob_properties( 69 | client, 'cont', None, azmodels.StorageModes.Block) 70 | assert ret == blob 71 | 72 | 73 | def test_list_blobs(): 74 | with pytest.raises(RuntimeError): 75 | for blob in ops.list_blobs( 76 | None, 'cont', 'prefix', azmodels.StorageModes.File, True): 77 | pass 78 | 79 | _blob = azure.storage.blob.models.Blob(name='dir/name') 80 | _blob.properties = azure.storage.blob.models.BlobProperties() 81 | client = mock.MagicMock() 82 | client.list_blobs.return_value = [_blob] 83 | 84 | i = 0 85 | for blob in ops.list_blobs( 86 | client, 'cont', 'prefix', azmodels.StorageModes.Auto, False): 87 | i += 1 88 | assert blob.name == _blob.name 89 | assert i == 0 90 | 91 | i = 0 92 | for blob in ops.list_blobs( 93 | client, 'cont', 'prefix', azmodels.StorageModes.Auto, True): 94 | i += 1 95 | assert blob.name == _blob.name 96 | assert i == 1 97 | 98 | _blob.properties.blob_type = \ 99 | azure.storage.blob.models._BlobTypes.AppendBlob 100 | i = 0 101 | for blob in ops.list_blobs( 102 | client, 'dir', 'prefix', azmodels.StorageModes.Block, True): 103 | i += 1 104 | assert blob.name == _blob.name 105 | assert i == 0 106 | 107 | i = 0 108 | for blob in ops.list_blobs( 109 | client, 'dir', 'prefix', azmodels.StorageModes.Page, True): 110 | i += 1 111 | assert blob.name == _blob.name 112 | assert i == 0 113 | 114 | _blob.properties.blob_type = \ 115 | azure.storage.blob.models._BlobTypes.BlockBlob 116 | i = 0 117 | for blob in ops.list_blobs( 118 | client, 'dir', 'prefix', azmodels.StorageModes.Append, True): 119 | i += 1 120 | assert blob.name == _blob.name 121 | assert i == 0 122 | 123 | _blob.snapshot = '2017-02-23T22:21:14.8121864Z' 124 | client.get_blob_properties.return_value = _blob 125 | i = 0 126 | for blob in ops.list_blobs( 127 | client, 'cont', 128 | 'a?snapshot=2017-02-23T22:21:14.8121864Z', 129 | azmodels.StorageModes.Auto, 130 | True): 131 | i += 1 132 | assert blob.name == _blob.name 133 | assert blob.snapshot == _blob.snapshot 134 | assert i == 1 135 | 136 | 137 | def test_list_all_blobs(): 138 | client = mock.MagicMock() 139 | blob = mock.MagicMock() 140 | client.list_blobs.return_value = [blob, blob] 141 | 142 | assert len(list(ops.list_all_blobs(client, 'cont'))) == 2 143 | 144 | 145 | def test_get_blob_range(): 146 | ase = mock.MagicMock() 147 | ret = mock.MagicMock() 148 | ret.content = b'\0' 149 | ase.client._get_blob.return_value = ret 150 | ase.container = 'cont' 151 | ase.name = 'name' 152 | ase.snapshot = None 153 | offsets = mock.MagicMock() 154 | offsets.start_range = 0 155 | offsets.end_range = 1 156 | 157 | assert ops.get_blob_range(ase, offsets) == ret.content 158 | 159 | 160 | def test_create_container(): 161 | ase = mock.MagicMock() 162 | ase.can_create_containers = False 163 | 164 | ops.create_container(ase, None) 165 | assert ase.client.create_container.call_count == 0 166 | 167 | ase.can_create_containers = True 168 | ase.client.account_name = 'sa' 169 | ase.container = 'cont' 170 | 171 | cc = set() 172 | ase.client.create_container.return_value = True 173 | ops.create_container(ase, cc) 174 | assert len(cc) == 1 175 | 176 | ase.client.create_container.return_value = False 177 | ops.create_container(ase, cc) 178 | assert len(cc) == 1 179 | 180 | ase.container = 'cont2' 181 | ops.create_container(ase, cc) 182 | assert len(cc) == 2 183 | 184 | ops.create_container(ase, cc) 185 | assert len(cc) == 2 186 | -------------------------------------------------------------------------------- /tests/test_blobxfer_operations_azure_blob_append.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for operations: blob append""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | # non-stdlib imports 7 | import azure.storage.common 8 | # local imports 9 | import blobxfer.version 10 | # module under test 11 | import blobxfer.operations.azure as azops 12 | import blobxfer.operations.azure.blob.append as ops 13 | 14 | 15 | def test_create_client(): 16 | to = mock.MagicMock() 17 | to.max_retries = None 18 | 19 | sa = azops.StorageAccount( 20 | 'name', 'AAAAAA==', 'core.windows.net', 10, to, mock.MagicMock()) 21 | client = ops.create_client(sa, to, mock.MagicMock()) 22 | assert client is not None 23 | assert isinstance(client, azure.storage.blob.AppendBlobService) 24 | assert isinstance( 25 | client.authentication, 26 | azure.storage.common._auth._StorageSharedKeyAuthentication) 27 | assert client._USER_AGENT_STRING.startswith( 28 | 'blobxfer/{}'.format(blobxfer.version.__version__)) 29 | assert client._httpclient.proxies is not None 30 | 31 | sa = azops.StorageAccount( 32 | 'name', '?key&sig=key', 'core.windows.net', 10, to, None) 33 | client = ops.create_client(sa, to, None) 34 | assert client is not None 35 | assert isinstance(client, azure.storage.blob.AppendBlobService) 36 | assert isinstance( 37 | client.authentication, 38 | azure.storage.common._auth._StorageSASAuthentication) 39 | assert client._USER_AGENT_STRING.startswith( 40 | 'blobxfer/{}'.format(blobxfer.version.__version__)) 41 | assert client._httpclient.proxies is None 42 | -------------------------------------------------------------------------------- /tests/test_blobxfer_operations_azure_blob_block.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for operations: block blob""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | # non-stdlib imports 7 | import azure.storage.common 8 | # local imports 9 | import blobxfer.version 10 | import blobxfer.models.azure 11 | # module under test 12 | import blobxfer.operations.azure as azops 13 | import blobxfer.operations.azure.blob.block as ops 14 | 15 | 16 | def test_create_client(): 17 | to = mock.MagicMock() 18 | to.max_retries = None 19 | 20 | sa = azops.StorageAccount( 21 | 'name', 'AAAAAA==', 'core.windows.net', 10, to, mock.MagicMock()) 22 | client = ops.create_client(sa, to, mock.MagicMock()) 23 | assert client is not None 24 | assert isinstance(client, azure.storage.blob.BlockBlobService) 25 | assert isinstance( 26 | client.authentication, 27 | azure.storage.common._auth._StorageSharedKeyAuthentication) 28 | assert client._USER_AGENT_STRING.startswith( 29 | 'blobxfer/{}'.format(blobxfer.version.__version__)) 30 | assert client._httpclient.proxies is not None 31 | 32 | sa = azops.StorageAccount( 33 | 'name', '?key&sig=key', 'core.windows.net', 10, to, None) 34 | client = ops.create_client(sa, to, None) 35 | assert client is not None 36 | assert isinstance(client, azure.storage.blob.BlockBlobService) 37 | assert isinstance( 38 | client.authentication, 39 | azure.storage.common._auth._StorageSASAuthentication) 40 | assert client._USER_AGENT_STRING.startswith( 41 | 'blobxfer/{}'.format(blobxfer.version.__version__)) 42 | assert client._httpclient.proxies is None 43 | 44 | 45 | def test_format_block_id(): 46 | assert '00000001' == ops._format_block_id(1) 47 | 48 | 49 | def test_put_block_from_url(): 50 | dst_ase = mock.MagicMock() 51 | dst_ase.client.put_block_from_url = mock.MagicMock() 52 | 53 | src_ase = mock.MagicMock() 54 | src_ase.path = 'https://host/remote/path' 55 | src_ase.is_arbitrary_url = True 56 | 57 | offsets = mock.MagicMock() 58 | offsets.chunk_num = 0 59 | 60 | ops.put_block_from_url(src_ase, dst_ase, offsets) 61 | assert dst_ase.client.put_block_from_url.call_count == 1 62 | 63 | src_ase.is_arbitrary_url = False 64 | 65 | src_ase.client.account_key = 'key' 66 | src_ase.client.generate_blob_shared_access_signature.return_value = 'sas' 67 | 68 | ops.put_block_from_url(src_ase, dst_ase, offsets) 69 | assert dst_ase.client.put_block_from_url.call_count == 2 70 | 71 | src_ase.client.account_key = None 72 | src_ase.client.sas_token = 'sastoken' 73 | 74 | ops.put_block_from_url(src_ase, dst_ase, offsets) 75 | assert dst_ase.client.put_block_from_url.call_count == 3 76 | 77 | src_ase.client.account_key = 'key' 78 | src_ase.client.sas_token = None 79 | src_ase.mode = blobxfer.models.azure.StorageModes.File 80 | src_ase.client.generate_file_shared_access_signature.return_value = 'sas' 81 | 82 | ops.put_block_from_url(src_ase, dst_ase, offsets) 83 | assert dst_ase.client.put_block_from_url.call_count == 4 84 | 85 | 86 | def test_put_block_list(): 87 | ase = mock.MagicMock() 88 | ase.name = 'abc' 89 | ops.put_block_list(ase, 1, None, None) 90 | assert ase.client.put_block_list.call_count == 1 91 | 92 | 93 | def test_get_committed_block_list(): 94 | ase = mock.MagicMock() 95 | ase.name = 'abc' 96 | gbl = mock.MagicMock() 97 | gbl.committed_blocks = 1 98 | ase.client.get_block_list.return_value = gbl 99 | assert ops.get_committed_block_list(ase) == 1 100 | 101 | ase.name = 'abc?snapshot=123' 102 | gbl.committed_blocks = 2 103 | assert ops.get_committed_block_list(ase) == 2 104 | -------------------------------------------------------------------------------- /tests/test_blobxfer_operations_azure_blob_page.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for models""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | # non-stdlib imports 7 | import azure.storage.common 8 | # local imports 9 | import blobxfer.version 10 | # module under test 11 | import blobxfer.operations.azure as azops 12 | import blobxfer.operations.azure.blob.page as ops 13 | 14 | 15 | def test_create_client(): 16 | to = mock.MagicMock() 17 | to.max_retries = None 18 | 19 | sa = azops.StorageAccount( 20 | 'name', 'AAAAAA==', 'core.windows.net', 10, to, mock.MagicMock()) 21 | client = ops.create_client(sa, to, mock.MagicMock()) 22 | assert client is not None 23 | assert isinstance(client, azure.storage.blob.PageBlobService) 24 | assert isinstance( 25 | client.authentication, 26 | azure.storage.common._auth._StorageSharedKeyAuthentication) 27 | assert client._USER_AGENT_STRING.startswith( 28 | 'blobxfer/{}'.format(blobxfer.version.__version__)) 29 | assert client._httpclient.proxies is not None 30 | 31 | sa = azops.StorageAccount( 32 | 'name', '?key&sig=key', 'core.windows.net', 10, to, None) 33 | client = ops.create_client(sa, to, None) 34 | assert client is not None 35 | assert isinstance(client, azure.storage.blob.PageBlobService) 36 | assert isinstance( 37 | client.authentication, 38 | azure.storage.common._auth._StorageSASAuthentication) 39 | assert client._USER_AGENT_STRING.startswith( 40 | 'blobxfer/{}'.format(blobxfer.version.__version__)) 41 | assert client._httpclient.proxies is None 42 | -------------------------------------------------------------------------------- /tests/test_blobxfer_operations_azure_file.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for file operations""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | import pathlib 7 | # non-stdlib imports 8 | import azure.common 9 | import azure.storage.common 10 | import pytest 11 | # local imports 12 | import blobxfer.util as util 13 | import blobxfer.version 14 | # module under test 15 | import blobxfer.operations.azure as azops 16 | import blobxfer.operations.azure.file as ops 17 | 18 | 19 | def test_create_client(): 20 | to = mock.MagicMock() 21 | to.max_retries = None 22 | 23 | sa = azops.StorageAccount( 24 | 'name', 'AAAAAA==', 'core.windows.net', 10, to, mock.MagicMock()) 25 | client = ops.create_client(sa, to, mock.MagicMock()) 26 | assert client is not None 27 | assert isinstance(client, azure.storage.file.FileService) 28 | assert isinstance( 29 | client.authentication, 30 | azure.storage.common._auth._StorageSharedKeyAuthentication) 31 | assert client._USER_AGENT_STRING.startswith( 32 | 'blobxfer/{}'.format(blobxfer.version.__version__)) 33 | assert client._httpclient.proxies is not None 34 | 35 | sa = azops.StorageAccount( 36 | 'name', '?key&sig=key', 'core.windows.net', 10, to, None) 37 | client = ops.create_client(sa, to, None) 38 | assert client is not None 39 | assert isinstance(client, azure.storage.file.FileService) 40 | assert isinstance( 41 | client.authentication, 42 | azure.storage.common._auth._StorageSASAuthentication) 43 | assert client._USER_AGENT_STRING.startswith( 44 | 'blobxfer/{}'.format(blobxfer.version.__version__)) 45 | assert client._httpclient.proxies is None 46 | 47 | 48 | def test_parse_file_path(): 49 | rpath = '/a/b/c' 50 | fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath)) 51 | dir, fname, ss = ops.parse_file_path(path) 52 | assert fshare == 'a' 53 | assert dir == 'b' 54 | assert fname == 'c' 55 | assert ss is None 56 | 57 | rpath = '/a/b/c?sharesnapshot=2017-10-25T21:17:42.0000000Z' 58 | fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath)) 59 | dir, fname, ss = ops.parse_file_path(path) 60 | assert fshare == 'a' 61 | assert dir == 'b' 62 | assert fname == 'c' 63 | assert ss == '2017-10-25T21:17:42.0000000Z' 64 | 65 | rpath = 'a/b/c/d' 66 | fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath)) 67 | dir, fname, ss = ops.parse_file_path(path) 68 | assert fshare == 'a' 69 | assert dir == 'b/c' 70 | assert fname == 'd' 71 | assert ss is None 72 | 73 | rpath = 'a/b/c/d?sharesnapshot=2017-10-25T21:17:42.0000000Z' 74 | fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath)) 75 | dir, fname, ss = ops.parse_file_path(path) 76 | assert fshare == 'a' 77 | assert dir == 'b/c' 78 | assert fname == 'd' 79 | assert ss == '2017-10-25T21:17:42.0000000Z' 80 | 81 | rpath = 'a/b' 82 | fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath)) 83 | dir, fname, ss = ops.parse_file_path(path) 84 | assert fshare == 'a' 85 | assert dir is None 86 | assert fname == 'b' 87 | assert ss is None 88 | 89 | rpath = 'a/b?sharesnapshot=2017-10-25T21:17:42.0000000Z' 90 | fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath)) 91 | dir, fname, ss = ops.parse_file_path(path) 92 | assert fshare == 'a' 93 | assert dir is None 94 | assert fname == 'b' 95 | assert ss == '2017-10-25T21:17:42.0000000Z' 96 | 97 | rpath = 'a' 98 | fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath)) 99 | dir, fname, ss = ops.parse_file_path(path) 100 | assert fshare == 'a' 101 | assert dir is None 102 | assert fname is None 103 | assert ss is None 104 | 105 | rpath = 'a?snapshot=2017-10-25T21:17:42.0000000Z' 106 | fshare, path = util.explode_azure_path(util.normalize_azure_path(rpath)) 107 | dir, fname, ss = ops.parse_file_path(path) 108 | assert fshare == 'a?snapshot=2017-10-25T21:17:42.0000000Z' 109 | assert dir is None 110 | assert fname is None 111 | assert ss is None 112 | 113 | 114 | @mock.patch('blobxfer.operations.azure.file.parse_file_path') 115 | def test_get_file_properties(patched_pfp): 116 | client = mock.MagicMock() 117 | client.get_file_properties = mock.MagicMock() 118 | client.get_file_properties.return_value = mock.MagicMock() 119 | 120 | patched_pfp.return_value = ('dir', 'fname', 'ss') 121 | 122 | with pytest.raises(RuntimeError): 123 | result = ops.get_file_properties(client, 'a', 'dir', snapshot='0') 124 | 125 | result = ops.get_file_properties(client, 'a', 'dir', snapshot=None) 126 | assert result is not None 127 | 128 | 129 | def test_check_if_single_file(): 130 | client = mock.MagicMock() 131 | client.get_file_properties = mock.MagicMock() 132 | client.get_file_properties.return_value = mock.MagicMock() 133 | 134 | result = ops.check_if_single_file(client, 'a', 'b/c') 135 | assert result[0] 136 | 137 | result = ops.check_if_single_file(client, 'a', '') 138 | assert not result[0] 139 | 140 | client = mock.MagicMock() 141 | client.get_file_properties = mock.MagicMock() 142 | client.get_file_properties.side_effect = \ 143 | azure.common.AzureMissingResourceHttpError('msg', 404) 144 | 145 | result = ops.check_if_single_file(client, 'a', 'b/c') 146 | assert not result[0] 147 | 148 | 149 | def test_list_files_single_file(): 150 | client = mock.MagicMock() 151 | client.get_file_properties = mock.MagicMock() 152 | client.get_file_properties.return_value = 'fp' 153 | 154 | i = 0 155 | for file in ops.list_files(client, 'a', 'b/c', True): 156 | i += 1 157 | assert file == 'fp' 158 | assert i == 1 159 | 160 | 161 | def test_list_all_files(): 162 | client = mock.MagicMock() 163 | client.list_directories_and_files.side_effect = [ 164 | [ 165 | azure.storage.file.models.Directory(name='dir'), 166 | ], 167 | [ 168 | azure.storage.file.models.File(name='a'), 169 | ], 170 | ] 171 | 172 | i = 0 173 | for f in ops.list_all_files(client, 'fshare'): 174 | assert pathlib.Path(f) == pathlib.Path('dir/a') 175 | i += 1 176 | assert i == 1 177 | 178 | 179 | @mock.patch( 180 | 'blobxfer.operations.azure.file.check_if_single_file', 181 | return_value=(False, None) 182 | ) 183 | def test_list_files_directory(patched_cisf): 184 | _file = azure.storage.file.models.File(name='name') 185 | client = mock.MagicMock() 186 | client.list_directories_and_files.side_effect = [[_file]] 187 | client.get_file_properties.side_effect = [_file] 188 | 189 | i = 0 190 | for file in ops.list_files(client, 'dir', '', True): 191 | i += 1 192 | assert file.name == 'name' 193 | assert i == 1 194 | 195 | _dir = azure.storage.file.models.Directory(name='dirname') 196 | _file = azure.storage.file.models.File(name='dirname/name') 197 | client = mock.MagicMock() 198 | client.list_directories_and_files.side_effect = [[_dir], [_file]] 199 | client.get_file_properties.side_effect = [_file] 200 | 201 | i = 0 202 | for file in ops.list_files(client, '', '', True): 203 | i += 1 204 | assert file.name == _file.name 205 | assert type(file) == azure.storage.file.models.File 206 | assert i == 1 207 | 208 | 209 | def test_delete_file(): 210 | assert ops.delete_file(mock.MagicMock(), 'fshare', 'dir/name') is None 211 | 212 | with pytest.raises(RuntimeError): 213 | ops.delete_file( 214 | mock.MagicMock(), 215 | 'fshare', 216 | 'dir/name?sharesnapshot=2017-10-25T21:17:42.0000000Z') 217 | 218 | 219 | def test_get_file_range(): 220 | ase = mock.MagicMock() 221 | ret = mock.MagicMock() 222 | ret.content = b'\0' 223 | ase.client._get_file.return_value = ret 224 | ase.container = 'cont' 225 | ase.name = 'name' 226 | offsets = mock.MagicMock() 227 | offsets.start_range = 0 228 | offsets.end_range = 1 229 | 230 | assert ops.get_file_range(ase, offsets) == ret.content 231 | 232 | 233 | def test_create_share(): 234 | ase = mock.MagicMock() 235 | ase.can_create_containers = False 236 | 237 | ops.create_share(ase, None) 238 | assert ase.client.create_share.call_count == 0 239 | 240 | ase.can_create_containers = True 241 | ase.client.account_name = 'sa' 242 | ase.container = 'cont' 243 | 244 | cc = set() 245 | ase.client.create_shuare.return_value = True 246 | ops.create_share(ase, cc) 247 | assert len(cc) == 1 248 | 249 | ase.client.create_shuare.return_value = False 250 | ops.create_share(ase, cc) 251 | assert len(cc) == 1 252 | 253 | ase.container = 'cont2' 254 | ops.create_share(ase, cc) 255 | assert len(cc) == 2 256 | 257 | ops.create_share(ase, cc) 258 | assert len(cc) == 2 259 | 260 | 261 | def test_create_all_parent_directories(): 262 | ase = mock.MagicMock() 263 | ase.client.account_name = 'sa' 264 | ase.container = 'cont' 265 | ase.name = 'abc' 266 | 267 | dirs = {} 268 | ops.create_all_parent_directories(ase, dirs) 269 | assert len(dirs) == 0 270 | 271 | ase.name = 'a/b/c.bin' 272 | ops.create_all_parent_directories(ase, dirs) 273 | assert len(dirs) == 1 274 | assert len(dirs['sa:cont']) == 2 275 | 276 | 277 | def test_create_file(): 278 | ase = mock.MagicMock() 279 | ase.name = 'a/b/c.bin' 280 | assert ops.create_file(ase) is None 281 | 282 | 283 | def test_put_file_range(): 284 | ase = mock.MagicMock() 285 | ase.name = 'a/b/c.bin' 286 | assert ops.put_file_range(ase, mock.MagicMock(), b'\0') is None 287 | 288 | 289 | def test_set_file_properties(): 290 | ase = mock.MagicMock() 291 | ase.name = 'a/b/c.bin' 292 | assert ops.set_file_properties(ase, 'md5') is None 293 | 294 | 295 | def test_set_file_metadata(): 296 | ase = mock.MagicMock() 297 | ase.name = 'a/b/c.bin' 298 | assert ops.set_file_metadata(ase, 'md') is None 299 | -------------------------------------------------------------------------------- /tests/test_blobxfer_operations_crypto.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for crypto operations""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | import os 7 | import time 8 | # non-stdlib imports 9 | import cryptography.hazmat.primitives.asymmetric.rsa 10 | # local imports 11 | import blobxfer.models.download 12 | # module under test 13 | import blobxfer.operations.crypto as ops 14 | 15 | 16 | _RSAKEY = cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key( 17 | public_exponent=65537, key_size=2048, 18 | backend=cryptography.hazmat.backends.default_backend()) 19 | 20 | 21 | @mock.patch( 22 | 'cryptography.hazmat.primitives.serialization.load_pem_private_key') 23 | def test_load_rsa_private_key_file(patched_load, tmpdir): 24 | keyfile = tmpdir.join('keyfile') 25 | keyfile.write('a') 26 | patched_load.return_value = _RSAKEY 27 | 28 | rv = ops.load_rsa_private_key_file(str(keyfile), None) 29 | assert rv == _RSAKEY 30 | 31 | 32 | @mock.patch('cryptography.hazmat.primitives.serialization.load_pem_public_key') 33 | def test_load_rsa_public_key_file(patched_load, tmpdir): 34 | keyfile = tmpdir.join('keyfile') 35 | keyfile.write('b') 36 | patched_load.return_value = 'rv' 37 | 38 | rv = ops.load_rsa_public_key_file(str(keyfile)) 39 | assert rv == 'rv' 40 | 41 | 42 | def test_rsa_encrypt_decrypt_keys(): 43 | symkey = os.urandom(32) 44 | enckey = ops.rsa_encrypt_key_base64_encoded(_RSAKEY, None, symkey) 45 | assert enckey is not None 46 | plainkey = ops.rsa_decrypt_base64_encoded_key(_RSAKEY, enckey) 47 | assert symkey == plainkey 48 | 49 | 50 | def test_pkcs7_padding(): 51 | buf = os.urandom(32) 52 | pbuf = ops.pkcs7_pad(buf) 53 | buf2 = ops.pkcs7_unpad(pbuf) 54 | assert buf == buf2 55 | 56 | 57 | def test_aes_cbc_encryption(): 58 | enckey = ops.aes256_generate_random_key() 59 | assert len(enckey) == ops._AES256_KEYLENGTH_BYTES 60 | 61 | # test random binary data, unaligned 62 | iv = os.urandom(16) 63 | plaindata = os.urandom(31) 64 | encdata = ops.aes_cbc_encrypt_data(enckey, iv, plaindata, True) 65 | assert encdata != plaindata 66 | decdata = ops.aes_cbc_decrypt_data(enckey, iv, encdata, True) 67 | assert decdata == plaindata 68 | 69 | # test random binary data aligned on boundary 70 | plaindata = os.urandom(32) 71 | encdata = ops.aes_cbc_encrypt_data(enckey, iv, plaindata, True) 72 | assert encdata != plaindata 73 | decdata = ops.aes_cbc_decrypt_data(enckey, iv, encdata, True) 74 | assert decdata == plaindata 75 | 76 | # test "text" data 77 | plaintext = 'attack at dawn!' 78 | plaindata = plaintext.encode('utf8') 79 | encdata = ops.aes_cbc_encrypt_data(enckey, iv, plaindata, True) 80 | assert encdata != plaindata 81 | decdata = ops.aes_cbc_decrypt_data(enckey, iv, encdata, True) 82 | assert decdata == plaindata 83 | assert plaindata.decode('utf8') == plaintext 84 | 85 | # test unpadded 86 | plaindata = os.urandom(32) 87 | encdata = ops.aes_cbc_encrypt_data(enckey, iv, plaindata, False) 88 | assert encdata != plaindata 89 | decdata = ops.aes_cbc_decrypt_data(enckey, iv, encdata, False) 90 | assert decdata == plaindata 91 | 92 | 93 | def test_cryptooffload_decrypt(tmpdir): 94 | symkey = ops.aes256_generate_random_key() 95 | iv = os.urandom(16) 96 | plainlen = 16 97 | plaindata = os.urandom(plainlen) 98 | encdata = ops.aes_cbc_encrypt_data(symkey, iv, plaindata, False) 99 | 100 | afile = tmpdir.join('a') 101 | afile.write(encdata, mode='wb') 102 | hmacfile = str(afile) 103 | bfile = tmpdir.join('b') 104 | bfile.ensure(file=True) 105 | 106 | a = None 107 | try: 108 | a = ops.CryptoOffload(1) 109 | offsets = blobxfer.models.download.Offsets( 110 | chunk_num=0, 111 | fd_start=0, # this matters! 112 | num_bytes=2, 113 | range_end=3, 114 | range_start=4, 115 | unpad=False, 116 | ) 117 | a.add_decrypt_chunk( 118 | str(bfile), 0, offsets, symkey, iv, hmacfile) 119 | i = 33 120 | checked = False 121 | while i > 0: 122 | result = a.pop_done_queue() 123 | if result is None: 124 | time.sleep(0.3) 125 | i -= 1 126 | continue 127 | assert result == (str(bfile), offsets) 128 | checked = True 129 | break 130 | assert checked 131 | assert bfile.stat().size == plainlen 132 | decdata = bfile.read(mode='rb') 133 | assert decdata == plaindata 134 | finally: 135 | if a is not None: 136 | a.finalize_processes() 137 | -------------------------------------------------------------------------------- /tests/test_blobxfer_operations_md5.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for md5""" 3 | 4 | # stdlib imports 5 | import time 6 | import uuid 7 | # non-stdlib imports 8 | import pytest 9 | # local imports 10 | import blobxfer.models.azure as azmodels 11 | import blobxfer.models.upload as modelsul 12 | # module under test 13 | import blobxfer.operations.md5 as ops 14 | 15 | 16 | def test_compute_md5(tmpdir): 17 | lpath = str(tmpdir.join('test.tmp')) 18 | testdata = str(uuid.uuid4()) 19 | with open(lpath, 'wt') as f: 20 | f.write(testdata) 21 | md5_file = ops.compute_md5_for_file_asbase64(lpath) 22 | md5_data = ops.compute_md5_for_data_asbase64(testdata.encode('utf8')) 23 | assert md5_file == md5_data 24 | 25 | # test offset 26 | md5_file = ops.compute_md5_for_file_asbase64(lpath, start=1) 27 | md5_data = ops.compute_md5_for_data_asbase64(testdata[1:].encode('utf8')) 28 | assert md5_file == md5_data 29 | 30 | md5_file = ops.compute_md5_for_file_asbase64(lpath, end=2) 31 | md5_data = ops.compute_md5_for_data_asbase64(testdata[:2].encode('utf8')) 32 | assert md5_file == md5_data 33 | 34 | # test mismatch 35 | md5_file_page = ops.compute_md5_for_file_asbase64(lpath, True) 36 | assert md5_file != md5_file_page 37 | 38 | # test non-existent file 39 | with pytest.raises(IOError): 40 | ops.compute_md5_for_file_asbase64(testdata) 41 | 42 | 43 | def test_check_data_is_empty(): 44 | data = b'\0' * ops._MAX_PAGE_SIZE_BYTES 45 | assert ops.check_data_is_empty(data) 46 | 47 | data = b'\0' * 8 48 | assert ops.check_data_is_empty(data) 49 | 50 | data = str(uuid.uuid4()).encode('utf8') 51 | assert not ops.check_data_is_empty(data) 52 | 53 | 54 | def test_done_cv(): 55 | a = None 56 | try: 57 | a = ops.LocalFileMd5Offload(num_workers=1) 58 | assert a.done_cv == a._done_cv 59 | finally: 60 | if a: 61 | a.finalize_processes() 62 | 63 | 64 | def test_finalize_md5_processes(): 65 | with pytest.raises(ValueError): 66 | ops.LocalFileMd5Offload(num_workers=0) 67 | 68 | a = None 69 | try: 70 | a = ops.LocalFileMd5Offload(num_workers=1) 71 | finally: 72 | if a: 73 | a.finalize_processes() 74 | 75 | for proc in a._procs: 76 | assert not proc.is_alive() 77 | 78 | 79 | def test_from_add_to_done_non_pagealigned(tmpdir): 80 | file = tmpdir.join('a') 81 | file.write('abc') 82 | fpath = str(file) 83 | key = 'key' 84 | 85 | remote_md5 = ops.compute_md5_for_file_asbase64(str(file)) 86 | 87 | a = None 88 | try: 89 | a = ops.LocalFileMd5Offload(num_workers=1) 90 | result = a.pop_done_queue() 91 | assert result is None 92 | 93 | with pytest.raises(ValueError): 94 | a.add_localfile_for_md5_check(None, None, None, None, None, None) 95 | 96 | a.add_localfile_for_md5_check( 97 | key, fpath, fpath, remote_md5, azmodels.StorageModes.Block, None) 98 | i = 33 99 | checked = False 100 | while i > 0: 101 | result = a.pop_done_queue() 102 | if result is None: 103 | time.sleep(0.3) 104 | i -= 1 105 | continue 106 | assert len(result) == 5 107 | assert result[0] == key 108 | assert result[1] == str(file) 109 | assert result[2] is None 110 | assert result[3] == remote_md5 111 | assert result[4] 112 | checked = True 113 | break 114 | assert checked 115 | finally: 116 | if a: 117 | a.finalize_processes() 118 | 119 | 120 | def test_from_add_to_done_lpview(tmpdir): 121 | file = tmpdir.join('a') 122 | file.write('abc') 123 | fpath = str(file) 124 | key = 'key' 125 | 126 | remote_md5 = ops.compute_md5_for_file_asbase64(str(file)) 127 | 128 | a = None 129 | lpview = modelsul.LocalPathView( 130 | fd_start=0, 131 | fd_end=3, 132 | mode=None, 133 | next=None, 134 | slice_num=None, 135 | total_slices=1, 136 | ) 137 | try: 138 | a = ops.LocalFileMd5Offload(num_workers=1) 139 | result = a.pop_done_queue() 140 | assert result is None 141 | 142 | a.add_localfile_for_md5_check( 143 | key, fpath, fpath, remote_md5, azmodels.StorageModes.Block, lpview) 144 | i = 33 145 | checked = False 146 | while i > 0: 147 | result = a.pop_done_queue() 148 | if result is None: 149 | time.sleep(0.3) 150 | i -= 1 151 | continue 152 | assert len(result) == 5 153 | assert result[0] == key 154 | assert result[1] == str(file) 155 | assert result[2] == 3 156 | assert result[3] == remote_md5 157 | assert result[4] 158 | checked = True 159 | break 160 | assert checked 161 | finally: 162 | if a: 163 | a.finalize_processes() 164 | 165 | 166 | def test_from_add_to_done_pagealigned(tmpdir): 167 | file = tmpdir.join('a') 168 | file.write('abc') 169 | fpath = str(file) 170 | key = 'key' 171 | 172 | remote_md5 = ops.compute_md5_for_file_asbase64(str(file), True) 173 | 174 | a = None 175 | try: 176 | a = ops.LocalFileMd5Offload(num_workers=1) 177 | result = a.pop_done_queue() 178 | assert result is None 179 | 180 | a.add_localfile_for_md5_check( 181 | key, fpath, fpath, remote_md5, azmodels.StorageModes.Page, None) 182 | i = 33 183 | checked = False 184 | while i > 0: 185 | result = a.pop_done_queue() 186 | if result is None: 187 | time.sleep(0.3) 188 | i -= 1 189 | continue 190 | assert len(result) == 5 191 | assert result[0] == key 192 | assert result[1] == str(file) 193 | assert result[2] is None 194 | assert result[3] == remote_md5 195 | assert result[4] 196 | checked = True 197 | break 198 | assert checked 199 | finally: 200 | if a: 201 | a.finalize_processes() 202 | -------------------------------------------------------------------------------- /tests/test_blobxfer_operations_progress.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for progress operations""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | # non-stdlib imports 7 | # local imports 8 | import blobxfer.models.azure as azmodels 9 | import blobxfer.models.download as modelsdl 10 | import blobxfer.models.options as options 11 | import blobxfer.models.synccopy as modelssc 12 | import blobxfer.models.upload as modelsul 13 | import blobxfer.util as util 14 | # module under test 15 | import blobxfer.operations.progress as ops 16 | 17 | 18 | def test_output_parameters(): 19 | go = mock.MagicMock() 20 | go.quiet = False 21 | go.log_file = 'abc' 22 | 23 | spec = modelsdl.Specification( 24 | download_options=options.Download( 25 | check_file_md5=True, 26 | chunk_size_bytes=4194304, 27 | delete_extraneous_destination=False, 28 | delete_only=False, 29 | max_single_object_concurrency=8, 30 | mode=azmodels.StorageModes.Auto, 31 | overwrite=True, 32 | recursive=True, 33 | rename=False, 34 | restore_file_properties=options.FileProperties( 35 | attributes=False, 36 | cache_control=None, 37 | content_type=None, 38 | lmt=False, 39 | md5=None, 40 | ), 41 | rsa_private_key=None, 42 | strip_components=0, 43 | ), 44 | skip_on_options=options.SkipOn( 45 | filesize_match=True, 46 | lmt_ge=False, 47 | md5_match=True, 48 | ), 49 | local_destination_path=mock.MagicMock(), 50 | ) 51 | ops.output_parameters(go, spec) 52 | assert util.is_not_empty(go.log_file) 53 | 54 | spec = modelsul.Specification( 55 | upload_options=options.Upload( 56 | access_tier='cool', 57 | chunk_size_bytes=4194304, 58 | delete_extraneous_destination=False, 59 | delete_only=False, 60 | mode=azmodels.StorageModes.Auto, 61 | one_shot_bytes=0, 62 | overwrite=True, 63 | recursive=True, 64 | rename=False, 65 | rsa_public_key=None, 66 | stdin_as_page_blob_size=0, 67 | store_file_properties=options.FileProperties( 68 | attributes=True, 69 | cache_control='cc', 70 | content_type='ct', 71 | lmt=None, 72 | md5=True, 73 | ), 74 | strip_components=0, 75 | vectored_io=None, 76 | ), 77 | skip_on_options=options.SkipOn( 78 | filesize_match=True, 79 | lmt_ge=False, 80 | md5_match=True, 81 | ), 82 | local_source_path=mock.MagicMock() 83 | ) 84 | ops.output_parameters(go, spec) 85 | assert util.is_not_empty(go.log_file) 86 | 87 | spec = modelsul.Specification( 88 | upload_options=options.Upload( 89 | access_tier='cool', 90 | chunk_size_bytes=4194304, 91 | delete_extraneous_destination=False, 92 | delete_only=False, 93 | mode=azmodels.StorageModes.Auto, 94 | one_shot_bytes=0, 95 | overwrite=True, 96 | recursive=True, 97 | rename=False, 98 | rsa_public_key=None, 99 | stdin_as_page_blob_size=0, 100 | store_file_properties=options.FileProperties( 101 | attributes=True, 102 | cache_control=None, 103 | content_type=None, 104 | lmt=None, 105 | md5=True, 106 | ), 107 | strip_components=0, 108 | vectored_io=None, 109 | ), 110 | skip_on_options=options.SkipOn( 111 | filesize_match=True, 112 | lmt_ge=False, 113 | md5_match=True, 114 | ), 115 | local_source_path=mock.MagicMock() 116 | ) 117 | ops.output_parameters(go, spec) 118 | assert util.is_not_empty(go.log_file) 119 | 120 | spec = modelssc.Specification( 121 | synccopy_options=options.SyncCopy( 122 | access_tier='archive', 123 | delete_extraneous_destination=False, 124 | delete_only=False, 125 | dest_mode=azmodels.StorageModes.Auto, 126 | mode=azmodels.StorageModes.Auto, 127 | overwrite=True, 128 | recursive=True, 129 | rename=False, 130 | server_side_copy=True, 131 | strip_components=0, 132 | ), 133 | skip_on_options=options.SkipOn( 134 | filesize_match=True, 135 | lmt_ge=False, 136 | md5_match=True, 137 | ) 138 | ) 139 | ops.output_parameters(go, spec) 140 | assert util.is_not_empty(go.log_file) 141 | 142 | 143 | def test_update_progress_bar(): 144 | go = mock.MagicMock() 145 | go.quiet = False 146 | go.progress_bar = True 147 | go.log_file = 'abc' 148 | 149 | start = util.datetime_now() 150 | 151 | ops.update_progress_bar( 152 | go, 'download', start, None, 1, None, 1) 153 | 154 | ops.update_progress_bar( 155 | go, 'upload', start, 1, 0, 256, 0, stdin_upload=True) 156 | 157 | with mock.patch('blobxfer.util.datetime_now') as patched_dt: 158 | patched_dt.return_value = start 159 | ops.update_progress_bar( 160 | go, 'synccopy', start, 1, 1, 1, 1) 161 | 162 | assert util.is_not_empty(go.log_file) 163 | -------------------------------------------------------------------------------- /tests/test_blobxfer_operations_resume.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for operations resume""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | import pathlib 7 | # non-stdlib imports 8 | # local imports 9 | # module under test 10 | import blobxfer.operations.resume as ops 11 | 12 | 13 | def test_generate_record_key(): 14 | ase = mock.MagicMock() 15 | ase._client.primary_endpoint = 'ep' 16 | ase.path = 'abc' 17 | 18 | assert ops._BaseResumeManager.generate_record_key(ase) == 'ep:abc' 19 | 20 | 21 | def test_download_resume_manager(tmpdir): 22 | tmpdb = pathlib.Path(str(tmpdir.join('tmp.db'))) 23 | tmpdb_dat = pathlib.Path(str(tmpdir.join('tmp.db.dat'))) 24 | 25 | drm = ops.DownloadResumeManager(tmpdb) 26 | 27 | assert drm._data is not None 28 | drm.close() 29 | assert drm._data is None 30 | assert tmpdb_dat.exists() or tmpdb.exists() 31 | drm.delete() 32 | assert drm._data is None 33 | assert not tmpdb_dat.exists() and not tmpdb.exists() 34 | 35 | ase = mock.MagicMock() 36 | ase._name = 'name' 37 | ase._client.primary_endpoint = 'ep' 38 | ase._size = 16 39 | 40 | final_path = 'fp' 41 | drm = ops.DownloadResumeManager(tmpdb) 42 | drm.add_or_update_record(final_path, ase, 2, 0, False, None) 43 | d = drm.get_record(ase) 44 | 45 | assert d.final_path == final_path 46 | 47 | drm.add_or_update_record(final_path, ase, 2, 1, False, 'abc') 48 | d = drm.get_record(ase) 49 | 50 | assert d.final_path == final_path 51 | assert not d.completed 52 | assert d.next_integrity_chunk == 1 53 | assert d.md5hexdigest == 'abc' 54 | 55 | drm.add_or_update_record(final_path, ase, 2, 1, True, None) 56 | d = drm.get_record(ase) 57 | 58 | assert d.final_path == final_path 59 | assert d.completed 60 | assert d.next_integrity_chunk == 1 61 | assert d.md5hexdigest == 'abc' 62 | 63 | # idempotent check after completed 64 | drm.add_or_update_record(final_path, ase, 2, 1, True, None) 65 | d = drm.get_record(ase) 66 | 67 | assert d.final_path == final_path 68 | assert d.completed 69 | assert d.next_integrity_chunk == 1 70 | assert d.md5hexdigest == 'abc' 71 | 72 | drm.close() 73 | assert drm._data is None 74 | assert tmpdb_dat.exists() or tmpdb.exists() 75 | 76 | drm.delete() 77 | assert drm._data is None 78 | assert not tmpdb_dat.exists() and not tmpdb.exists() 79 | 80 | # oserror path 81 | with mock.patch('blobxfer.util.on_windows', return_value=False): 82 | drm.delete() 83 | assert drm._data is None 84 | 85 | # oserror path 86 | with mock.patch('blobxfer.util.on_windows', return_value=True): 87 | drm.delete() 88 | assert drm._data is None 89 | 90 | 91 | def test_upload_resume_manager(tmpdir): 92 | tmpdb = pathlib.Path(str(tmpdir.join('tmp.db'))) 93 | tmpdb_dat = pathlib.Path(str(tmpdir.join('tmp.db.dat'))) 94 | 95 | urm = ops.UploadResumeManager(tmpdb) 96 | assert urm._data is not None 97 | urm.close() 98 | assert urm._data is None 99 | assert tmpdb_dat.exists() or tmpdb.exists() 100 | urm.delete() 101 | assert urm._data is None 102 | assert not tmpdb_dat.exists() and not tmpdb.exists() 103 | 104 | ase = mock.MagicMock() 105 | ase._name = 'name' 106 | ase._client.primary_endpoint = 'ep' 107 | ase._size = 16 108 | 109 | local_path = 'fp' 110 | urm = ops.UploadResumeManager(tmpdb) 111 | urm.add_or_update_record(local_path, ase, 2, 8, 0, False, None) 112 | u = urm.get_record(ase) 113 | 114 | assert u.local_path == local_path 115 | assert u.length == ase._size 116 | assert u.chunk_size == 2 117 | assert u.total_chunks == 8 118 | assert u.completed_chunks == 0 119 | assert not u.completed 120 | 121 | urm.add_or_update_record(local_path, ase, 2, 8, 1, False, 'abc') 122 | u = urm.get_record(ase) 123 | 124 | assert u.local_path == local_path 125 | assert u.length == ase._size 126 | assert u.chunk_size == 2 127 | assert u.total_chunks == 8 128 | assert u.completed_chunks == 1 129 | assert not u.completed 130 | assert u.md5hexdigest == 'abc' 131 | 132 | urm.add_or_update_record(local_path, ase, 2, 8, 8, True, None) 133 | u = urm.get_record(ase) 134 | 135 | assert u.local_path == local_path 136 | assert u.length == ase._size 137 | assert u.chunk_size == 2 138 | assert u.total_chunks == 8 139 | assert u.completed_chunks == 8 140 | assert u.completed 141 | assert u.md5hexdigest == 'abc' 142 | 143 | # idempotent check after completed 144 | urm.add_or_update_record(local_path, ase, 2, 8, 8, True, None) 145 | u = urm.get_record(ase) 146 | 147 | assert u.local_path == local_path 148 | assert u.length == ase._size 149 | assert u.chunk_size == 2 150 | assert u.total_chunks == 8 151 | assert u.completed_chunks == 8 152 | assert u.completed 153 | assert u.md5hexdigest == 'abc' 154 | 155 | urm.close() 156 | assert urm._data is None 157 | assert tmpdb_dat.exists() or tmpdb.exists() 158 | 159 | urm.delete() 160 | assert urm._data is None 161 | assert not tmpdb_dat.exists() and not tmpdb.exists() 162 | 163 | 164 | def test_synccopy_resume_manager(tmpdir): 165 | tmpdb = pathlib.Path(str(tmpdir.join('tmp.db'))) 166 | tmpdb_dat = pathlib.Path(str(tmpdir.join('tmp.db.dat'))) 167 | 168 | srm = ops.SyncCopyResumeManager(tmpdb) 169 | assert srm._data is not None 170 | srm.close() 171 | assert srm._data is None 172 | assert tmpdb_dat.exists() or tmpdb.exists() 173 | srm.delete() 174 | assert srm._data is None 175 | assert not tmpdb_dat.exists() and not tmpdb.exists() 176 | 177 | ase = mock.MagicMock() 178 | ase._name = 'name' 179 | ase._client.primary_endpoint = 'ep' 180 | ase._size = 16 181 | 182 | src_block_list = 'srcbl' 183 | 184 | srm = ops.SyncCopyResumeManager(tmpdb) 185 | srm.add_or_update_record(ase, src_block_list, 0, 2, 8, 0, False) 186 | s = srm.get_record(ase) 187 | 188 | assert s.src_block_list == src_block_list 189 | assert s.length == ase._size 190 | assert s.offset == 0 191 | assert s.chunk_size == 2 192 | assert s.total_chunks == 8 193 | assert s.completed_chunks == 0 194 | assert not s.completed 195 | 196 | srm.add_or_update_record(ase, src_block_list, 1, 2, 8, 1, False) 197 | s = srm.get_record(ase) 198 | 199 | assert s.src_block_list == src_block_list 200 | assert s.length == ase._size 201 | assert s.offset == 1 202 | assert s.chunk_size == 2 203 | assert s.total_chunks == 8 204 | assert s.completed_chunks == 1 205 | assert not s.completed 206 | 207 | srm.add_or_update_record(ase, src_block_list, 8, 2, 8, 8, True) 208 | s = srm.get_record(ase) 209 | 210 | assert s.src_block_list == src_block_list 211 | assert s.length == ase._size 212 | assert s.offset == 8 213 | assert s.chunk_size == 2 214 | assert s.total_chunks == 8 215 | assert s.completed_chunks == 8 216 | assert s.completed 217 | 218 | # idempotent check after completed 219 | srm.add_or_update_record(ase, src_block_list, 8, 2, 8, 8, True) 220 | s = srm.get_record(ase) 221 | 222 | assert s.src_block_list == src_block_list 223 | assert s.length == ase._size 224 | assert s.offset == 8 225 | assert s.chunk_size == 2 226 | assert s.total_chunks == 8 227 | assert s.completed_chunks == 8 228 | assert s.completed 229 | 230 | srm.close() 231 | assert srm._data is None 232 | assert tmpdb_dat.exists() or tmpdb.exists() 233 | 234 | srm.delete() 235 | assert srm._data is None 236 | assert not tmpdb_dat.exists() and not tmpdb.exists() 237 | -------------------------------------------------------------------------------- /tests/test_blobxfer_retry.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for retry""" 3 | 4 | # stdlib imports 5 | import unittest.mock as mock 6 | import ssl 7 | # non-stdlib imports 8 | import azure.storage.common.models 9 | import pytest 10 | import requests 11 | import urllib3 12 | # module under test 13 | import blobxfer.retry as retry 14 | 15 | 16 | def test_should_retry(): 17 | er = retry.ExponentialRetryWithMaxWait() 18 | context = mock.MagicMock() 19 | context.count = 1 20 | er.max_attempts = 1 21 | assert not er._should_retry(context) 22 | 23 | context.count = 0 24 | er.max_attempts = 20 25 | context.response.status = None 26 | context.exception = requests.Timeout() 27 | assert er._should_retry(context) 28 | 29 | # test malformed 30 | ex = requests.ConnectionError( 31 | urllib3.exceptions.MaxRetryError( 32 | mock.MagicMock(), mock.MagicMock()) 33 | ) 34 | context.exception = ex 35 | assert not er._should_retry(context) 36 | 37 | ex = requests.ConnectionError( 38 | urllib3.exceptions.MaxRetryError( 39 | mock.MagicMock(), mock.MagicMock(), 40 | reason=urllib3.exceptions.NewConnectionError( 41 | list(retry._RETRYABLE_ERRNO_MAXRETRY)[0], 'message') 42 | ) 43 | ) 44 | context.exception = ex 45 | assert er._should_retry(context) 46 | 47 | ex = requests.ConnectionError( 48 | urllib3.exceptions.MaxRetryError( 49 | mock.MagicMock(), mock.MagicMock(), 50 | reason=urllib3.exceptions.NewConnectionError( 51 | '[Errno N]', 'message') 52 | ) 53 | ) 54 | context.exception = ex 55 | assert not er._should_retry(context) 56 | 57 | ex = requests.ConnectionError( 58 | urllib3.exceptions.MaxRetryError( 59 | mock.MagicMock(), mock.MagicMock(), 60 | reason=ssl.SSLError(ssl.SSLWantWriteError()) 61 | ) 62 | ) 63 | context.exception = ex 64 | assert er._should_retry(context) 65 | 66 | # test malformed 67 | ex = requests.ConnectionError( 68 | urllib3.exceptions.ProtocolError() 69 | ) 70 | context.exception = ex 71 | assert not er._should_retry(context) 72 | 73 | ex = requests.ConnectionError( 74 | urllib3.exceptions.ProtocolError( 75 | '({}, message)'.format(list(retry._RETRYABLE_ERRNO_PROTOCOL)[0]) 76 | ) 77 | ) 78 | context.exception = ex 79 | assert er._should_retry(context) 80 | 81 | ex = requests.ConnectionError( 82 | urllib3.exceptions.ProtocolError('(N, message)') 83 | ) 84 | context.exception = ex 85 | assert not er._should_retry(context) 86 | 87 | ex = Exception('Connection aborted.') 88 | context.exception = ex 89 | assert er._should_retry(context) 90 | 91 | ex = Exception('Read timed out.') 92 | context.exception = ex 93 | assert er._should_retry(context) 94 | 95 | ex = Exception('The write operation timed out.') 96 | context.exception = ex 97 | assert er._should_retry(context) 98 | 99 | ex = Exception('no retry') 100 | context.exception = ex 101 | assert not er._should_retry(context) 102 | 103 | ex = requests.exceptions.ContentDecodingError() 104 | context.exception = ex 105 | assert er._should_retry(context) 106 | 107 | context.exception = None 108 | context.response.status = 200 109 | assert er._should_retry(context) 110 | 111 | context.response.status = 300 112 | assert not er._should_retry(context) 113 | 114 | context.response.status = 404 115 | context.location_mode = azure.storage.common.models.LocationMode.SECONDARY 116 | assert er._should_retry(context) 117 | 118 | context.response.status = 408 119 | assert er._should_retry(context) 120 | 121 | context.response.status = 500 122 | assert er._should_retry(context) 123 | 124 | context.response.status = 501 125 | assert not er._should_retry(context) 126 | 127 | 128 | def test_exponentialretrywithmaxwait(): 129 | with pytest.raises(ValueError): 130 | er = retry.ExponentialRetryWithMaxWait( 131 | initial_backoff=1, max_backoff=0) 132 | 133 | with pytest.raises(ValueError): 134 | er = retry.ExponentialRetryWithMaxWait( 135 | initial_backoff=1, max_backoff=1, max_retries=-1) 136 | 137 | with pytest.raises(ValueError): 138 | er = retry.ExponentialRetryWithMaxWait( 139 | initial_backoff=2, max_backoff=1) 140 | 141 | er = retry.ExponentialRetryWithMaxWait() 142 | context = mock.MagicMock() 143 | context.count = 0 144 | context.response.status = 500 145 | bo = er.retry(context) 146 | assert context.count == 1 147 | assert bo == 0.1 148 | 149 | bo = er.retry(context) 150 | assert context.count == 2 151 | assert bo == 0.2 152 | 153 | bo = er.retry(context) 154 | assert context.count == 3 155 | assert bo == 0.4 156 | 157 | bo = er.retry(context) 158 | assert context.count == 4 159 | assert bo == 0.8 160 | 161 | bo = er.retry(context) 162 | assert context.count == 5 163 | assert bo == 0.1 164 | -------------------------------------------------------------------------------- /tests/test_blobxfer_util.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """Tests for util""" 3 | 4 | # stdlib imports 5 | import datetime 6 | import time 7 | # non-stdlib imports 8 | import dateutil.tz 9 | import pytest 10 | # module under test 11 | import blobxfer.util 12 | 13 | 14 | def test_is_none_or_empty(): 15 | a = None 16 | assert blobxfer.util.is_none_or_empty(a) 17 | a = [] 18 | assert blobxfer.util.is_none_or_empty(a) 19 | a = {} 20 | assert blobxfer.util.is_none_or_empty(a) 21 | a = '' 22 | assert blobxfer.util.is_none_or_empty(a) 23 | a = 'asdf' 24 | assert not blobxfer.util.is_none_or_empty(a) 25 | a = ['asdf'] 26 | assert not blobxfer.util.is_none_or_empty(a) 27 | a = {'asdf': 0} 28 | assert not blobxfer.util.is_none_or_empty(a) 29 | a = [None] 30 | assert not blobxfer.util.is_none_or_empty(a) 31 | 32 | 33 | def test_is_not_empty(): 34 | a = None 35 | assert not blobxfer.util.is_not_empty(a) 36 | a = [] 37 | assert not blobxfer.util.is_not_empty(a) 38 | a = {} 39 | assert not blobxfer.util.is_not_empty(a) 40 | a = '' 41 | assert not blobxfer.util.is_not_empty(a) 42 | a = 'asdf' 43 | assert blobxfer.util.is_not_empty(a) 44 | a = ['asdf'] 45 | assert blobxfer.util.is_not_empty(a) 46 | a = {'asdf': 0} 47 | assert blobxfer.util.is_not_empty(a) 48 | a = [None] 49 | assert blobxfer.util.is_not_empty(a) 50 | 51 | 52 | def test_merge_dict(): 53 | with pytest.raises(ValueError): 54 | blobxfer.util.merge_dict(1, 2) 55 | 56 | a = {'a_only': 42, 'a_and_b': 43, 57 | 'a_only_dict': {'a': 44}, 'a_and_b_dict': {'a_o': 45, 'a_a_b': 46}} 58 | b = {'b_only': 45, 'a_and_b': 46, 59 | 'b_only_dict': {'a': 47}, 'a_and_b_dict': {'b_o': 48, 'a_a_b': 49}} 60 | c = blobxfer.util.merge_dict(a, b) 61 | assert c['a_only'] == 42 62 | assert c['b_only'] == 45 63 | assert c['a_and_b_dict']['a_o'] == 45 64 | assert c['a_and_b_dict']['b_o'] == 48 65 | assert c['a_and_b_dict']['a_a_b'] == 49 66 | assert c['b_only_dict']['a'] == 47 67 | assert c['a_and_b'] == 46 68 | assert a['a_only'] == 42 69 | assert a['a_and_b'] == 43 70 | assert b['b_only'] == 45 71 | assert b['a_and_b'] == 46 72 | 73 | 74 | def test_datetime_now(): 75 | a = blobxfer.util.datetime_now() 76 | assert type(a) == datetime.datetime 77 | 78 | 79 | def test_datetime_from_timestamp(): 80 | ts = time.time() 81 | a = blobxfer.util.datetime_from_timestamp(ts) 82 | assert type(a) == datetime.datetime 83 | 84 | b = a.astimezone(dateutil.tz.tzutc()) 85 | assert(b) == blobxfer.util.datetime_from_timestamp(ts, as_utc=True) 86 | 87 | 88 | def test_scantree(tmpdir): 89 | tmpdir.mkdir('abc') 90 | abcpath = tmpdir.join('abc') 91 | abcpath.join('hello.txt').write('hello') 92 | abcpath.mkdir('def') 93 | defpath = abcpath.join('def') 94 | defpath.join('world.txt').write('world') 95 | found = set() 96 | for de in blobxfer.util.scantree(str(tmpdir)): 97 | if de.name != '.lock': 98 | found.add(de.name) 99 | assert 'hello.txt' in found 100 | assert 'world.txt' in found 101 | assert len(found) == 2 102 | 103 | 104 | def test_get_mime_type(): 105 | a = 'b.txt' 106 | mt = blobxfer.util.get_mime_type(a) 107 | assert mt == 'text/plain' 108 | a = 'c.probably_cant_determine_this' 109 | mt = blobxfer.util.get_mime_type(a) 110 | assert mt == 'application/octet-stream' 111 | 112 | 113 | def test_base64_encode_as_string(): 114 | a = b'abc' 115 | enc = blobxfer.util.base64_encode_as_string(a) 116 | assert type(enc) != bytes 117 | dec = blobxfer.util.base64_decode_string(enc) 118 | assert a == dec 119 | 120 | 121 | def test_new_md5_hasher(): 122 | assert blobxfer.util.new_md5_hasher() is not None 123 | 124 | 125 | def test_page_align_content_length(): 126 | assert 0 == blobxfer.util.page_align_content_length(0) 127 | assert 512 == blobxfer.util.page_align_content_length(1) 128 | assert 512 == blobxfer.util.page_align_content_length(511) 129 | assert 512 == blobxfer.util.page_align_content_length(512) 130 | assert 1024 == blobxfer.util.page_align_content_length(513) 131 | assert 1024 == blobxfer.util.page_align_content_length(1023) 132 | assert 1024 == blobxfer.util.page_align_content_length(1024) 133 | assert 1536 == blobxfer.util.page_align_content_length(1025) 134 | 135 | 136 | def test_normalize_azure_path(): 137 | a = '\\cont\\r1\\r2\\r3\\' 138 | b = blobxfer.util.normalize_azure_path(a) 139 | assert b == 'cont/r1/r2/r3' 140 | 141 | a = '/cont/r1/r2/r3/' 142 | b = blobxfer.util.normalize_azure_path(a) 143 | assert b == 'cont/r1/r2/r3' 144 | 145 | a = '/cont\\r1/r2\\r3/' 146 | b = blobxfer.util.normalize_azure_path(a) 147 | assert b == 'cont/r1/r2/r3' 148 | 149 | with pytest.raises(ValueError): 150 | blobxfer.util.normalize_azure_path('') 151 | 152 | 153 | def test_explode_azure_path(): 154 | p = 'cont' 155 | cont, rpath = blobxfer.util.explode_azure_path(p) 156 | assert cont == 'cont' 157 | assert rpath == '' 158 | 159 | p = 'cont/' 160 | cont, rpath = blobxfer.util.explode_azure_path(p) 161 | assert cont == 'cont' 162 | assert rpath == '' 163 | 164 | p = 'cont/a/' 165 | cont, rpath = blobxfer.util.explode_azure_path(p) 166 | assert cont == 'cont' 167 | assert rpath == 'a' 168 | 169 | p = '/some/remote/path' 170 | cont, rpath = blobxfer.util.explode_azure_path(p) 171 | assert cont == 'some' 172 | assert rpath == 'remote/path' 173 | 174 | 175 | def test_blob_is_snapshot(): 176 | a = '/cont/a?snapshot=2017-02-23T22:21:14.8121864Z' 177 | assert blobxfer.util.blob_is_snapshot(a) 178 | 179 | a = '/cont/a?snapshot=abc' 180 | assert not blobxfer.util.blob_is_snapshot(a) 181 | 182 | a = '/cont/a?snapshot=' 183 | assert not blobxfer.util.blob_is_snapshot(a) 184 | 185 | a = '/cont/a?snapshot=2017-02-23T22:21:14.8121864Z?snapshot=' 186 | assert not blobxfer.util.blob_is_snapshot(a) 187 | 188 | 189 | def test_parse_blob_snapshot_parameter(): 190 | base = '/cont/a' 191 | param = '2017-02-23T22:21:14.8121864Z' 192 | 193 | a = base + '?snapshot=' + param 194 | assert blobxfer.util.parse_blob_snapshot_parameter(a) == (base, param) 195 | 196 | a = base + '?snapshot=' 197 | assert blobxfer.util.parse_blob_snapshot_parameter(a) is None 198 | 199 | 200 | def test_parse_fileshare_or_file_snapshot_parameter(): 201 | base = 'fs/a' 202 | param = '2017-02-23T22:21:14.8121864Z' 203 | 204 | a = base + '?sharesnapshot=' + param 205 | assert blobxfer.util.parse_fileshare_or_file_snapshot_parameter(a) == ( 206 | base, param) 207 | 208 | a = base + '?sharesnapshot=abc' 209 | assert blobxfer.util.parse_fileshare_or_file_snapshot_parameter(a) == ( 210 | a, None) 211 | 212 | base = 'fs' 213 | 214 | a = base + '?snapshot=' + param 215 | assert blobxfer.util.parse_fileshare_or_file_snapshot_parameter(a) == ( 216 | base, param) 217 | 218 | a = base + '?snapshot=abc' 219 | assert blobxfer.util.parse_fileshare_or_file_snapshot_parameter(a) == ( 220 | a, None) 221 | 222 | 223 | def test_explode_azure_storage_url(): 224 | url = 'https://sa.blob.core.windows.net/cont/file' 225 | sa, mode, ep, rpath, sas = blobxfer.util.explode_azure_storage_url(url) 226 | assert sa == 'sa' 227 | assert mode == 'blob' 228 | assert ep == 'core.windows.net' 229 | assert rpath == 'cont/file' 230 | assert sas is None 231 | 232 | url = 'https://sa2.file.core.usgovcloudapi.net/cont2/file2?sas' 233 | sa, mode, ep, rpath, sas = blobxfer.util.explode_azure_storage_url(url) 234 | assert sa == 'sa2' 235 | assert mode == 'file' 236 | assert ep == 'core.usgovcloudapi.net' 237 | assert rpath == 'cont2/file2' 238 | assert sas == 'sas' 239 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py36, py37, py38, py39 3 | 4 | [testenv] 5 | deps = -rtest_requirements.txt 6 | passenv = CI TRAVIS TRAVIS_* 7 | commands = 8 | pip list --format=columns --outdated 9 | flake8 --exit-zero {envsitepackagesdir}/blobxfer_cli/ 10 | flake8 --exit-zero {envsitepackagesdir}/blobxfer/ 11 | py.test \ 12 | -x -l -s \ 13 | --ignore venv/ \ 14 | --cov-config .coveragerc \ 15 | --cov-report term-missing \ 16 | --cov {envsitepackagesdir}/blobxfer 17 | 18 | [flake8] 19 | max-line-length = 79 20 | select = F,E,W 21 | ignore = W504 22 | --------------------------------------------------------------------------------