├── mpiFileUtils ├── test │ ├── __init__.py │ ├── conftest.py │ └── test_mpiFileUtils.py ├── exceptions.py └── __init__.py ├── .bandit ├── pyproject.toml ├── bin ├── archivepurge ├── unarchivetar ├── .archivetar ├── archivetar └── archivescan ├── .travis.yml ├── SuperTar ├── exceptions.py ├── test │ └── test_SuperTar.py └── __init__.py ├── tox.ini ├── .coveragerc ├── archivetar ├── exceptions.py ├── purge.py ├── unarchivetar.py ├── archive_args.py └── __init__.py ├── Pipfile ├── .pre-commit-config.yaml ├── GlobusTransfer ├── exceptions.py ├── test │ └── test_GlobusTransfer.py └── __init__.py ├── setup.py ├── test ├── test_purge.py ├── conftest.py ├── test_unarchivetar.py ├── test_archivetar.py ├── test_DwalkParser.py └── data │ └── ident-example-support.txt ├── LICENSE ├── singularity ├── README.md └── archivetar.def ├── README.md ├── .gitignore ├── INSTALL.md ├── USAGE.md └── Pipfile.lock /mpiFileUtils/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.bandit: -------------------------------------------------------------------------------- 1 | [bandit] 2 | exclude = test/*,*/test/* 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /bin/archivepurge: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Brock Palen 4 | # brockp@umich.edu 5 | # 7/2020 6 | 7 | import sys 8 | 9 | import archivetar.purge 10 | 11 | archivetar.purge.main(sys.argv) 12 | -------------------------------------------------------------------------------- /bin/unarchivetar: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Brock Palen 4 | # brockp@umich.edu 5 | # 7/2020 6 | 7 | import sys 8 | 9 | import archivetar.unarchivetar 10 | 11 | archivetar.unarchivetar.main(sys.argv) 12 | -------------------------------------------------------------------------------- /mpiFileUtils/test/conftest.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from unittest.mock import MagicMock 3 | 4 | import pytest 5 | 6 | 7 | @pytest.fixture 8 | def mock_subprocess(): 9 | mock = MagicMock(spec=subprocess) 10 | return mock 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: jammy 3 | python: 4 | - "3.10" 5 | 6 | # command to install dependencies 7 | install: 8 | - pip install pipenv 9 | - pipenv install --dev 10 | 11 | script: 12 | - pytest 13 | 14 | after_success: 15 | - codecov 16 | -------------------------------------------------------------------------------- /SuperTar/exceptions.py: -------------------------------------------------------------------------------- 1 | class SuperTarException(Exception): 2 | """ 3 | SuperTar base exception class. 4 | """ 5 | 6 | pass 7 | 8 | 9 | class SuperTarMissmatchedOptions(SuperTarException): 10 | """ 11 | Exception for user selected optoins that cannot be used together. 12 | """ 13 | 14 | pass 15 | -------------------------------------------------------------------------------- /mpiFileUtils/exceptions.py: -------------------------------------------------------------------------------- 1 | class mpiFileUtilsError(BaseException): 2 | """Base Exception Class for Module""" 3 | 4 | def __init__(self, *kargs, **kwargs): 5 | super().__init__(*kargs, **kwargs) 6 | 7 | 8 | class mpirunError(mpiFileUtilsError): 9 | """problem with mpirun option given""" 10 | 11 | pass 12 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203, E266, E501, W503 3 | max-line-length = 88 4 | max-complexity = 18 5 | select = B,C,E,F,W,T4 6 | 7 | [pytest] 8 | 9 | addopts = --cov-report term --cov=. 10 | 11 | [isort] 12 | multi_line_output=3 13 | include_trailing_comma=True 14 | force_grid_wrap=0 15 | use_parentheses=True 16 | line_length=88 17 | 18 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = */test/* 3 | 4 | [report] 5 | exclude_lines = 6 | # Have to re-enable the standard pragma 7 | pragma: no cover 8 | 9 | # Don't complain about missing debug-only code: 10 | def __repr__ 11 | if self\.debug 12 | 13 | # Don't complain if tests don't hit defensive assertion code: 14 | raise AssertionError 15 | raise NotImplementedError 16 | 17 | # Don't complain if non-runnable code isn't run: 18 | if 0: 19 | if __name__ == .__main__.: 20 | -------------------------------------------------------------------------------- /archivetar/exceptions.py: -------------------------------------------------------------------------------- 1 | class ArchiveTarException(Exception): 2 | """ArchiveTar base exception class.""" 3 | 4 | pass 5 | 6 | 7 | class ArchivePrefixConflict(ArchiveTarException): 8 | """Selected prefix conflicts with existing files.""" 9 | 10 | pass 11 | 12 | 13 | class ArchiveTarArchiveError(ArchiveTarException): 14 | """Errors related to the archiving process, tar, Globus, etc.""" 15 | 16 | pass 17 | 18 | 19 | class TarError(ArchiveTarException): 20 | """ 21 | Error during Tar Process 22 | """ 23 | 24 | pass 25 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | pytest = "*" 8 | flake8 = "*" 9 | isort = "*" 10 | pytest-flake8 = "*" 11 | pytest-freezegun = "*" 12 | pytest-cov = "*" 13 | codecov = "*" 14 | pydocstyle = "*" 15 | bandit = "*" 16 | black = "*" 17 | pre-commit = "*" 18 | yamllint = "*" 19 | pyinstaller = "*" 20 | 21 | [packages] 22 | humanfriendly = "*" 23 | python-dotenv = "*" 24 | archivetar = {editable = true, path = "."} 25 | natsort = "*" 26 | globus-sdk = "*" 27 | environs = "*" 28 | 29 | [requires] 30 | python_version = "3.10" 31 | -------------------------------------------------------------------------------- /bin/.archivetar: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Brock Palen 4 | # brockp@umich.edu 5 | # 7/2020 6 | # 7 | # prep a directory for placement in dataden 8 | # process: 9 | # 1. run mpiFileUtils / dwalk (deafault sort in name / path order) all files < minsize 10 | # 2. Take resulting list build tar lists by summing size until > tarsize (before compression) 11 | # 3. Tar each list: OR --dryrun create list with est size 12 | # a. Create Index file of contents 13 | # b. Optionally compress -z / -j with gzip/pigz bzip/lbzip2 if installed 14 | # c. Optionally purge 15 | # 4. (?) Kick out optimized untar script (pigz / lbzip2) 16 | 17 | import sys 18 | 19 | import archivetar 20 | 21 | archivetar.main(sys.argv) 22 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: isort 5 | name: isort 6 | stages: [commit] 7 | language: system 8 | entry: pipenv run isort 9 | types: [python] 10 | 11 | - id: black 12 | name: black 13 | stages: [commit] 14 | language: system 15 | entry: pipenv run black 16 | types: [python] 17 | 18 | #- id: flake8 19 | # name: flake8 20 | # stages: [commit] 21 | # language: system 22 | # entry: pipenv run flake8 23 | # types: [python] 24 | # exclude: setup.py 25 | 26 | - id: pytest 27 | name: pytest 28 | stages: [commit] 29 | language: system 30 | entry: pipenv run pytest --no-cov 31 | types: [python] 32 | pass_filenames: false 33 | -------------------------------------------------------------------------------- /GlobusTransfer/exceptions.py: -------------------------------------------------------------------------------- 1 | class GlobusError(BaseException): 2 | """Globus base exception class.""" 3 | 4 | pass 5 | 6 | 7 | class GlobusFailedTransfer(GlobusError): 8 | """Transfer failed or was canceled.""" 9 | 10 | def __init__(self, status): 11 | """ 12 | Messy hack, picling the exception and re-raising it causes error, 13 | Checking if already a string and pass rather than building from results dict. 14 | """ 15 | if isinstance(status, str): 16 | super().__init__(status) 17 | else: 18 | self.message = f"Task: {status['label']} with id: {status['task_id']}" 19 | super().__init__(self.message) 20 | 21 | 22 | class ScopeOrSingleDomainError(GlobusError): 23 | """Auth found missing scope or single_domain requirement""" 24 | 25 | pass 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="archivetar", 8 | version="0.13.1", 9 | author="Brock Palen", 10 | author_email="brockp@umich.edu", 11 | description="Prep folder for archive", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/brockpalen/archivetar/", 15 | packages=setuptools.find_packages(), 16 | classifiers=[ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: OS Independent", 20 | ], 21 | python_requires=">=3.6", 22 | scripts=[ 23 | "bin/archivetar", 24 | "bin/unarchivetar", 25 | "bin/archivepurge", 26 | "bin/archivescan", 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /test/test_purge.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from archivetar.purge import purge_empty_folders 4 | 5 | 6 | def test_purge_empty_folders(tmp_path): 7 | """test purge empty folders leaves correct number of items""" 8 | 9 | root = tmp_path / "root" 10 | root.mkdir() 11 | 12 | # root/dir1/dir1_1/file1 13 | # don't remove anything 14 | dir1 = root / "dir1" 15 | dir1_1 = dir1 / "dir1_1" 16 | file1 = dir1 / "file1" 17 | dir1.mkdir() 18 | dir1_1.mkdir() 19 | file1.touch() 20 | 21 | # root/dir2/dir2_1/dir2_2 22 | dir2 = root / "dir2" 23 | dir2_1 = dir2 / "dir2_1" 24 | dir2_2 = dir2_1 / "dir2_2" 25 | dir2.mkdir() 26 | dir2_1.mkdir() 27 | dir2_2.mkdir() 28 | 29 | # count number entries before 30 | before = len(list(root.iterdir())) 31 | 32 | # remove emtpy items 33 | purge_empty_folders(tmp_path) 34 | print(f"Before: {before} entries") 35 | assert before == 2 36 | 37 | # count number of entires after 38 | after = len(list(root.iterdir())) 39 | print(f"After: {after} entries") 40 | assert after == 1 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Brock Palen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /GlobusTransfer/test/test_GlobusTransfer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import sys 4 | 5 | import pytest 6 | 7 | sys.path.append(pathlib.Path(__file__).parent.parent) 8 | 9 | 10 | from GlobusTransfer import GlobusTransfer 11 | 12 | pytestmark = pytest.mark.globus 13 | 14 | 15 | @pytest.fixture(scope="module") 16 | def globus(): 17 | """simple globus constructor""" 18 | # DEST_EP = 'fa67b5dc-1b2d-11e9-9835-0262a1f2f698' # conflux 19 | SOURCE_EP = "e0370902-9f48-11e9-821b-02b7a92d8e58" # greatlakes 20 | DEST_EP = SOURCE_EP 21 | globus = GlobusTransfer(SOURCE_EP, DEST_EP, "~") 22 | 23 | yield globus 24 | 25 | 26 | @pytest.mark.skip 27 | def test_ls(globus): 28 | globus.ls_endpoint() 29 | 30 | 31 | def test_transfer(globus): 32 | """Create file in tmp_path and transfer it""" 33 | # save cwd to switch back 34 | cwd = os.getcwd() 35 | 36 | # setup test data 37 | test_file = pathlib.Path.home() / "tmp" / "test_file.txt" 38 | test_file.touch() 39 | 40 | # change to testing location that's global 41 | os.chdir(pathlib.Path.home()) 42 | globus.add_item(test_file) 43 | globus.submit_pending_transfer() 44 | 45 | # change back 46 | os.chdir(cwd) 47 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | from subprocess import check_output 2 | 3 | import pytest 4 | 5 | 6 | def pytest_addoption(parser): 7 | parser.addoption( 8 | "--runglobus", action="store_true", default=False, help="run globus tests" 9 | ) 10 | 11 | 12 | def pytest_configure(config): 13 | config.addinivalue_line("markers", "globus: mark test as globus to run") 14 | 15 | 16 | def pytest_collection_modifyitems(config, items): 17 | if config.getoption("--runglobus"): 18 | # --runglobus given in cli: do not skip globus tests 19 | return 20 | skip_globus = pytest.mark.skip(reason="need --runglobus option to run") 21 | for item in items: 22 | if "globus" in item.keywords: 23 | item.add_marker(skip_globus) 24 | 25 | 26 | def count_files_dir(path): 27 | """count number of files recursivly in path.""" 28 | # IN pathlib path 29 | num_f_dest = 0 30 | 31 | for f in path.glob("**/*"): 32 | if f.is_file(): 33 | num_f_dest += 1 34 | 35 | return num_f_dest 36 | 37 | 38 | def count_lines_dir(path): 39 | """count number of files recursivly in path""" 40 | # IN pathlib path 41 | num_f_dest = 0 42 | 43 | for f in path.glob("**/*"): 44 | if f.is_file(): 45 | num_f_dest += int(check_output(["wc", "-l", f]).split()[0]) 46 | 47 | return num_f_dest 48 | -------------------------------------------------------------------------------- /singularity/README.md: -------------------------------------------------------------------------------- 1 | # Singularity Container 2 | 3 | Archivetar can be built as a [Singularity](https://cloud.sylabs.io) container. For users without existing MPI and CMake installs the container provides all the tools needed. 4 | 5 | ## Installing Singularity 6 | 7 | Refresh to the [Singularity Documentation](https://sylabs.io/guides/3.6/admin-guide/installation.html#distribution-packages-of-singularity) how to install. For CentOS and RHEL users it can be installed from `epel`. 8 | 9 | 10 | ## Running Archivetar container 11 | 12 | Once singularity is installed you can pull the [offical image](https://cloud.sylabs.io/library/brockp/default/archivetar) 13 | 14 | ``` 15 | singularity pull --arch amd64 library://brockp/archivetar/archivetar:master 16 | singularity run-help archivetar_master.sif 17 | singularity exec archivetar_master.sif archivetar --help 18 | ``` 19 | 20 | ## Building Singuarlity Image 21 | 22 | ``` 23 | singularity build --remote archivetar.sif archivetar.def 24 | singularity push -U archivetar.sif library://brockp/archivetar/archivetar:master 25 | singularity push -U archivetar.sif library://brockp/archivetar/archivetar:[tag] 26 | ``` 27 | 28 | ## Aditional Utilities 29 | 30 | The image includes several utilities used by `archivetar` but may also be useful on their own: 31 | 32 | * mpifileutils eg `mpirun dwalk --help` 33 | * Parallel Compressors `lbzip2` `pigz` `pixz` 34 | * lz4-tools 35 | -------------------------------------------------------------------------------- /mpiFileUtils/test/test_mpiFileUtils.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from contextlib import ExitStack as does_not_raise 3 | 4 | import pytest 5 | 6 | from mpiFileUtils import DWalk, mpiFileUtils, mpirunError 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "kwargs,expex", 11 | [ 12 | ({}, pytest.raises(mpirunError)), 13 | ({"mpirun": "/does/not/mpirun", "inst": "/my/install"}, does_not_raise()), 14 | ], 15 | ) 16 | def test_mpiFileUtils(kwargs, expex): 17 | with expex: 18 | mpiFileUtils(**kwargs) 19 | 20 | 21 | @pytest.mark.parametrize( 22 | "kwargs,expex", 23 | [ 24 | ({}, pytest.raises(mpirunError)), 25 | ({"mpirun": "/does/not/mpirun", "inst": "/my/install"}, does_not_raise()), 26 | ], 27 | ) 28 | def test_DWalk(kwargs, expex, monkeypatch, mock_subprocess): 29 | monkeypatch.setattr(subprocess, "run", mock_subprocess) 30 | with expex: 31 | dwalk = DWalk(**kwargs) 32 | dwalk.scanpath(path="/tmp", textout="/tmp/output.txt") 33 | args, kwargs = mock_subprocess.call_args 34 | print(f"org: {mock_subprocess.call_args}") 35 | print(f"args: {args}") 36 | print(f"kwargs: {kwargs}") 37 | 38 | # check that defaults are set 39 | assert "--oversubscribe" in mock_subprocess.call_args[0][0] 40 | assert "-np" in mock_subprocess.call_args[0][0] 41 | assert str(12) in mock_subprocess.call_args[0][0] 42 | -------------------------------------------------------------------------------- /test/test_unarchivetar.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | import archivetar.unarchivetar 7 | from archivetar.unarchivetar import find_prefix_files 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "prefix,suffix,args", 12 | [ 13 | ("prefix", "tar", {}), 14 | ("prefix", "tar.bz2", {}), 15 | ("prefix-1234", "tar", {}), 16 | ("1234-1234", "tar", {}), 17 | ("1234-1234", "tar.gz", {}), 18 | ("1234-1234", "tar.lz4", {}), 19 | ("1234-1234", "tar.xz", {}), 20 | ("1234-1234", "tar.lzma", {}), 21 | ("1234-1234", "index.txt", {"suffix": "index.txt"}), 22 | ("1234-1234", "DONT_DELETE.txt", {"suffix": "DONT_DELETE.txt"}), 23 | ], 24 | ) 25 | def test_find_prefix_files(tmp_path, prefix, suffix, args): 26 | """ 27 | Test find_prefix_files(). 28 | 29 | Several archives with same prefix 30 | Count number found in array 31 | 32 | Takes finds all tars 33 | """ 34 | # need to start in tmp_dir to matchin real usecases 35 | os.chdir(tmp_path) 36 | 37 | # create a few files 38 | a1 = Path(f"{prefix}-1.{suffix}") 39 | a10 = Path(f"{prefix}-10.{suffix}") 40 | a2 = Path(f"{prefix}-2.{suffix}") 41 | a33 = Path(f"{prefix}-33.{suffix}") 42 | a1.touch() 43 | a10.touch() 44 | a2.touch() 45 | a33.touch() 46 | 47 | tars = find_prefix_files(prefix, **args) 48 | 49 | assert len(tars) == 4 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.com/brockpalen/archivetar.svg?branch=master)](https://travis-ci.com/brockpalen/archivetar) 2 | [![codecov](https://codecov.io/gh/brockpalen/archivetar/branch/master/graph/badge.svg)](https://codecov.io/gh/brockpalen/archivetar) 3 | 4 | 5 | Archivetar 6 | ========== 7 | 8 | archivetar (V2) is a collection of several tools intended to make the archiving and the use big data easier. 9 | Targeted mostly at the research / HPC use case it is useful in other cases where having fewer files but not one gigantic file is beneficial. 10 | 11 | `archivetar` was to make our Spectrum Archive install [Data Den](https://arc-ts.umich.edu/data-den/) more useful. It has no dependencies on SA, and could easily be used with AWS Glacier, HPSS, DMF etc. Any service where you want to minimize the number of objects keeping the data/object ratio high. 12 | 13 | For additional performance `archivetar` will detect many multi-core capable compressors. 14 | 15 | #### Example Output 16 | 17 | ``` 18 | # number of files before 19 | $ find . -type f | wc -l 20 | 6656 21 | 22 | # bundle all files < 1M, into tars 200M in size 23 | # Delete input files 24 | archivetar --prefix boxout --remove-files --size 1M --tar-size 200M 25 | 26 | # number of files after 27 | $ find . -type f | wc -l 28 | 1831 29 | 30 | # expand using tar 31 | for x in $(ls boxout*.tar) 32 | tar -xf $x 33 | done 34 | 35 | # Alternative using provided unarchivetar 36 | unarchivetar --prefix boxout 37 | ``` 38 | 39 | ### archivetar vs tar 40 | 41 | 42 | archivetar doesn't try to replace tar. Actually it uses it internally rather than Pythons native implementation. 43 | 44 | Usage 45 | ----- 46 | 47 | See [USAGE.md](USAGE.md) 48 | 49 | Building archivetar 50 | ------------------- 51 | 52 | See [INSTALL.md](INSTALL.md) 53 | -------------------------------------------------------------------------------- /singularity/archivetar.def: -------------------------------------------------------------------------------- 1 | BootStrap: library 2 | From: ubuntu:22.04 3 | 4 | %post 5 | apt-get -y update 6 | apt-get -y install software-properties-common 7 | add-apt-repository universe 8 | apt-get -y update 9 | apt-get -y install cmake git libopenmpi-dev wget gcc g++ libarchive-dev pkg-config libssl-dev libcap-dev python3-pip 10 | apt-get -y install lbzip2 pigz liblz4-tool pixz bzip2 gzip tar libbz2-dev zstd 11 | git clone https://github.com/brockpalen/archivetar.git 12 | cd archivetar 13 | bash build.sh 14 | pip3 install pipenv 15 | pipenv install --dev --python 3.10 16 | pipenv run pyinstaller --collect-all globus_sdk bin/archivetar -p . --onefile 17 | pipenv run pyinstaller --collect-all globus_sdk bin/.archivetar -p . --onefile 18 | pipenv run pyinstaller bin/unarchivetar -p . --onefile 19 | pipenv run pyinstaller bin/archivepurge -p . --onefile 20 | pipenv run pyinstaller bin/archivescan -p . --onefile 21 | # all installed now the python and dev libraries are not used anymore 22 | pipenv --rm 23 | apt-get -y remove cmake git libopenmpi-dev wget gcc g++ libarchive-dev pkg-config libssl-dev libcap-dev python3-pip python3 libbz2-dev 24 | apt-get -y clean 25 | 26 | %environment 27 | export AT_MPIFILEUTILS=/archivetar/install 28 | export AT_MPIRUN=mpirun 29 | export OMPI_MCA_btl=^openib 30 | export PATH=/archivetar/dist:/archivetar/install/bin:${PATH} 31 | 32 | %runscript 33 | echo "Run as:" 34 | echo "singularity exec archivetar.sif archivetar --help" 35 | 36 | %labels 37 | Author brockp@umich.edu 38 | URL https://github.com/brockpalen/archivetar/ 39 | 40 | 41 | %help 42 | Archivetar provides tools for archiving big data in services such as AWS Glacier and research HSM archives such as HPSS, DMF, and Spectrum Archive. 43 | Documentation: https://github.com/brockpalen/archivetar/ 44 | It provides several commands 45 | 46 | archivetar Primary tool to prep data for archive 47 | archivepurge Purges small files that were included in tars created in archivetar 48 | unarchivetar Helper utility to parallel extract tars created by archivetar 49 | 50 | There are several other useful tools included used by Archivetar 51 | 52 | pigz Multi-Threaded gzip 53 | lbzip2 Multi-Threaded bzip2 54 | pixz Multi-Threaded xz / lzma 55 | lz4 Fast serial compressor 56 | 57 | MPI File Utils 58 | 59 | mpirun dwalk Parallel filesystem walker 60 | mpirun dfind Parallel find 61 | mpirun dcp Parallel copy 62 | mpirun drm Parallel rm 63 | mpirun dchmod Parallel chmod 64 | mpirun ddup Parallel find duplicates 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # archive tar specific ignores 2 | *.cache 3 | *.txt 4 | *.swp 5 | *.sif 6 | 7 | 8 | # taken from https://github.com/github/gitignore/blob/master/Python.gitignore 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | cover/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | .pybuilder/ 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | # For a library or package, you might want to ignore these files since the code is 95 | # intended to run in multiple environments; otherwise, check them in: 96 | # .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 106 | __pypackages__/ 107 | 108 | # Celery stuff 109 | celerybeat-schedule 110 | celerybeat.pid 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # Environments 116 | .env 117 | .venv 118 | env/ 119 | venv/ 120 | ENV/ 121 | env.bak/ 122 | venv.bak/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | .spyproject 127 | 128 | # Rope project settings 129 | .ropeproject 130 | 131 | # mkdocs documentation 132 | /site 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # Pyre type checker 140 | .pyre/ 141 | 142 | # pytype static type analyzer 143 | .pytype/ 144 | 145 | # Cython debug symbols 146 | cython_debug/ 147 | -------------------------------------------------------------------------------- /bin/archivetar: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import re 4 | import sys 5 | import shutil 6 | from pathlib import Path 7 | 8 | 9 | def get_max_wall_time(max_epoch_time, mwt, default_time): 10 | """ 11 | Extracts the max wall time from the output of the mwt file, or returns a default value. 12 | """ 13 | if max_epoch_time.exists() and mwt.exists(): 14 | try: 15 | # Execute the command and store the output 16 | output = subprocess.check_output([str(mwt)], text=True) 17 | 18 | # Extract the time using regex 19 | time_match = re.search(r'\d{2}-\d{2}:\d{2}:\d{2}', output) 20 | 21 | # Return the extracted time or the default 22 | return time_match.group(0) if time_match else default_time 23 | 24 | except Exception as e: 25 | raise Exception(f"Unknown error calling maxwalltime {e}") 26 | 27 | # Return the default value if the files don't exist 28 | return default_time 29 | 30 | def main(): 31 | ''' 32 | To disable slurm execution entirely, unset env-variable AT_SLURM_OFFLOAD. 33 | To enable slurm execution, set env-variable AT_SLURM_OFFLOAD=1 34 | 35 | ARCHIVETAR_TASKS, ARCHIVETAR_MEM, and ARCHIVETAR_PAR control the cores, memory and partition requirements 36 | of the SLURM job srun executes. 37 | ''' 38 | # Check for help options and run locally 39 | if len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "--help"): 40 | print("\033[34m==>\033[32m Running locally for help option\033[0m") 41 | subprocess.run([".archivetar", "-h"]) 42 | sys.exit(0) 43 | 44 | # Handle the case where no arguments are provided (just "archivetar") 45 | if len(sys.argv) == 1: 46 | print("\033[34m==>\033[32m Running archivetar with no arguments\033[0m") 47 | subprocess.run([".archivetar"]) 48 | sys.exit(0) 49 | 50 | # Check if running inside a SLURM job 51 | slurm_job_id = os.getenv("SLURM_JOB_ID") 52 | at_slurm_offload = os.getenv("AT_SLURM_OFFLOAD") 53 | 54 | if slurm_job_id: 55 | # Run locally inside SLURM without executing another SLURM job. 56 | result = subprocess.run([".archivetar"] + sys.argv[1:]) 57 | sys.exit(result.returncode) 58 | elif at_slurm_offload and shutil.which("srun"): 59 | print("\033[34m==>\033[35m Running archivetar within SLURM\033[0m") 60 | # Get environment variables or use default values 61 | tasks = os.getenv("AT_TASKS", "8") # Default to 8 tasks if ARCHIVETAR_TASKS is not set 62 | mem = os.getenv("AT_MEM", "40G") # Default to 40G if ARCHIVETAR_MEM is not set 63 | partition = os.getenv("AT_PAR", "archive") # Default to archive if ARCHIVETAR_PAR is not set 64 | default_time = os.getenv("AT_DEFAULT_TIME", "14-00:00:00") # Default to archive if AT_DEFAULT_TIME is not set 65 | 66 | # Extract environment variable 67 | cluster_name = os.environ["CLUSTER_NAME"] 68 | 69 | # Paths for time management 70 | max_epoch_time = Path("/sw/pkgs/arc/usertools/etc/") / f"{cluster_name}_next_maintenance_epochtime" 71 | mwt = Path("/sw/pkgs/arc/usertools/bin/maxwalltime") 72 | 73 | # Usage 74 | _maxwalltime = get_max_wall_time(max_epoch_time, mwt, default_time) 75 | print(f"\033[34m==>\033[35m Requesting {_maxwalltime} maximum wall time\033[0m") 76 | # Run Python script from within SLURM 77 | cmd = f"srun --partition={partition} --cpu-bind=no --ntasks=1 --cpus-per-task={tasks} --mem={mem} --job-name=archivetar_{os.getenv('USER')} --time={_maxwalltime} --pty bash -c '.archivetar {' '.join(sys.argv[1:])}'" 78 | result = subprocess.run(cmd, shell=True) 79 | sys.exit(result.returncode) 80 | else: 81 | # Run locally without SLURM 82 | result = subprocess.run([".archivetar"] + sys.argv[1:]) 83 | sys.exit(result.returncode) 84 | 85 | 86 | if __name__ == "__main__": 87 | main() -------------------------------------------------------------------------------- /archivetar/purge.py: -------------------------------------------------------------------------------- 1 | # Brock Palen 2 | # brockp@umich.edu 3 | # 7/2020 4 | # 5 | # purge files using mpiFileUtils drm optionally blow away empty folders 6 | 7 | import argparse 8 | import logging 9 | import os 10 | import pathlib 11 | import sys 12 | 13 | from environs import Env 14 | 15 | from mpiFileUtils import DRm 16 | 17 | 18 | def parse_args(args): 19 | """CLI options takes sys.argv[1:].""" 20 | 21 | parser = argparse.ArgumentParser( 22 | description="Un-Archive a directory prepped by archivetar", 23 | epilog="Brock Palen brockp@umich.edu", 24 | ) 25 | parser.add_argument( 26 | "--dryrun", help="Print what would do but dont do it", action="store_true" 27 | ) 28 | parser.add_argument( 29 | "--purge-list", 30 | help="File created by --save-purge-list generated by archivetar", 31 | type=str, 32 | required=True, 33 | ) 34 | parser.add_argument( 35 | "--save-purge-list", 36 | help="Don't remove purge list when complete", 37 | action="store_true", 38 | ) 39 | parser.add_argument( 40 | "--keep-empty-dirs", help="Don't remove empty directories", action="store_true" 41 | ) 42 | 43 | verbosity = parser.add_mutually_exclusive_group() 44 | verbosity.add_argument( 45 | "-v", 46 | "--verbose", 47 | help="Increase messages, including files as added", 48 | action="store_true", 49 | ) 50 | verbosity.add_argument( 51 | "-q", "--quiet", help="Decrease messages", action="store_true" 52 | ) 53 | 54 | args = parser.parse_args(args) 55 | return args 56 | 57 | 58 | def purge_empty_folders(path): 59 | """Rcurssively remove empty folders""" 60 | if not isinstance(path, pathlib.Path): 61 | # make pathlib 62 | path = pathlib.Path(path) 63 | 64 | if not path.is_dir(): 65 | # path isn't a directory 66 | logging.debug(f"{path} is not a directory returning") 67 | return 68 | 69 | # remove empty sudir 70 | for f in path.iterdir(): 71 | if f.is_dir(): 72 | purge_empty_folders(f) 73 | 74 | # remove folders if empty 75 | # have to check path again count items in it 76 | entries = path.iterdir() 77 | if len(list(entries)) == 0: 78 | logging.debug(f"Removing emptry {path}") 79 | path.rmdir() 80 | 81 | 82 | def main(argv): 83 | args = parse_args(argv[1:]) 84 | if args.quiet: 85 | logging.basicConfig(level=logging.WARNING) 86 | elif args.verbose: 87 | logging.basicConfig(level=logging.DEBUG) 88 | else: 89 | logging.basicConfig(level=logging.INFO) 90 | 91 | # load in config from .env 92 | env = Env() 93 | # Can't load breaks singularity 94 | # env.read_env() # read .env file, if it exists 95 | 96 | # check if cachefile given exists 97 | purge_list = pathlib.Path(args.purge_list) 98 | if not purge_list.is_file(): 99 | logging.critical(f"{purge_list} does not exist or not a file") 100 | sys.exit(-2) 101 | 102 | # setup drm 103 | 104 | drm_kwargs = {} 105 | if args.dryrun: 106 | drm_kwargs["dryrun"] = True 107 | 108 | drm = DRm( 109 | inst=env.str( 110 | "AT_MPIFILEUTILS", default="/sw/pkgs/arc/archivetar/0.17.0/install" 111 | ), 112 | mpirun=env.str( 113 | "AT_MPIRUN", 114 | default="/sw/pkgs/arc/stacks/gcc/10.3.0/openmpi/4.1.6/bin/mpirun", 115 | ), 116 | progress="10", 117 | verbose=args.verbose, 118 | **drm_kwargs, 119 | ) 120 | 121 | drm.scancache(cachein=purge_list) 122 | 123 | if args.dryrun: 124 | logging.debug("Dryrun requested exiting") 125 | sys.exit(0) 126 | 127 | # remove empty directories if requsted 128 | if args.keep_empty_dirs: 129 | logging.debug("Skipping removing empty directories") 130 | else: 131 | logging.debug("Removing empty directories") 132 | purge_empty_folders(pathlib.Path.cwd()) 133 | 134 | # remove purge list unless requsted 135 | if not args.save_purge_list: 136 | logging.debug("Removing purge list") 137 | purge_list.unlink() 138 | -------------------------------------------------------------------------------- /test/test_archivetar.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | from contextlib import ExitStack as does_not_raise 4 | from unittest.mock import MagicMock 5 | 6 | import pytest 7 | 8 | import archivetar 9 | from archivetar import build_list, validate_prefix 10 | from archivetar.archive_args import file_check, stat_check, unix_check 11 | from archivetar.exceptions import ArchivePrefixConflict 12 | from mpiFileUtils import DWalk 13 | 14 | 15 | @pytest.mark.parametrize( 16 | "string,exception", 17 | [ 18 | ("1", does_not_raise()), 19 | ("-1", does_not_raise()), 20 | ("+1", does_not_raise()), 21 | ("9999999", does_not_raise()), 22 | ("+9999999", does_not_raise()), 23 | ("-9999999", does_not_raise()), 24 | ("abc", pytest.raises(ValueError)), 25 | ("+ 1", pytest.raises(ValueError)), 26 | ("1 ", pytest.raises(ValueError)), 27 | (" 1 ", pytest.raises(ValueError)), 28 | (" 1", pytest.raises(ValueError)), 29 | (" +1", pytest.raises(ValueError)), 30 | ("1 2", pytest.raises(ValueError)), 31 | ("1.2", pytest.raises(ValueError)), 32 | ("+1.2", pytest.raises(ValueError)), 33 | ("a2", pytest.raises(ValueError)), 34 | ("$1", pytest.raises(ValueError)), 35 | ], 36 | ) 37 | def test_stat_check(string, exception): 38 | """Test stat_check parse function for valid entries.""" 39 | with exception: 40 | result = stat_check(string) 41 | print(result) 42 | 43 | 44 | @pytest.mark.parametrize( 45 | "string,exception", 46 | [ 47 | ("brockp", does_not_raise()), 48 | ("coe-brockp-turbo", does_not_raise()), 49 | ("%", pytest.raises(ValueError)), 50 | ("brockp%", pytest.raises(ValueError)), 51 | ("bro ckp", pytest.raises(ValueError)), 52 | ("brockp ", pytest.raises(ValueError)), 53 | (" brockp", pytest.raises(ValueError)), 54 | ], 55 | ) 56 | def test_unix_check(string, exception): 57 | """Test validation of usernames and groupnames.""" 58 | with exception: 59 | result = unix_check(string) 60 | print(result) 61 | 62 | 63 | def test_file_check(tmp_path): 64 | """Make sure file check throw correct errors.""" 65 | # bogus file 66 | f = tmp_path / "testfile.cache" 67 | 68 | # test it doesn't exist 69 | with pytest.raises(ValueError): 70 | file_check(f) 71 | 72 | # test it does exist 73 | f.touch() 74 | a = file_check(f) 75 | assert a == f # nosec 76 | 77 | 78 | @pytest.mark.parametrize( 79 | "kwargs,outcache", 80 | [ 81 | ({"path": ".", "prefix": "brockp"}, "/tmp/brockp-2017-05-21-00-00-00.cache"), 82 | ( 83 | {"path": ".", "prefix": "brockp", "savecache": "True"}, 84 | "hello/brockp-2017-05-21-00-00-00.cache", 85 | ), 86 | ], 87 | ) 88 | @pytest.mark.freeze_time("2017-05-21") 89 | def test_build_list(kwargs, outcache, monkeypatch): 90 | """test build_list function inputs/output expected""" 91 | # fake dwalk 92 | mock_dwalk = MagicMock(spec=DWalk) 93 | monkeypatch.setattr(archivetar, "DWalk", mock_dwalk) 94 | 95 | mock_cwd = MagicMock() 96 | mock_cwd.return_value = pathlib.Path("hello") 97 | monkeypatch.setattr(archivetar.pathlib.Path, "cwd", mock_cwd) 98 | 99 | # doesn't work because you cant patch internals that are in C 100 | # use https://pypi.org/project/pytest-freezegun/ 101 | # mock_datestr = MagicMock() 102 | # mock_datestr.return_value = 'my-fake-string' 103 | # monkeypatch.setattr(archivetar.datetime.datetime, "strftime", mock_datestr) 104 | 105 | path = build_list(**kwargs) 106 | print(mock_dwalk.call_args) 107 | print(path) 108 | assert str(path) == outcache 109 | 110 | 111 | @pytest.mark.parametrize( 112 | "prefix,tarname,exexception", 113 | [ 114 | ("myprefix", "box-archive-1.tar", does_not_raise()), 115 | ("myprefix", "myprefix-1.tar", pytest.raises(ArchivePrefixConflict)), 116 | ("myprefix", "myprefix-1.tar.gz", pytest.raises(ArchivePrefixConflict)), 117 | ("myprefix", "myprefix-1.tar.lz4", pytest.raises(ArchivePrefixConflict)), 118 | ("myprefix", "myprefix-100.tar", pytest.raises(ArchivePrefixConflict)), 119 | ], 120 | ) 121 | def test_validate_prefix(tmp_path, prefix, tarname, exexception): 122 | """ 123 | validate_prefix(prefix) protects against selected prefix conflicting 124 | 125 | eg existing myprefix-1.tar and would be selected by unarchivetar 126 | """ 127 | 128 | os.chdir(tmp_path) 129 | tar = tmp_path / tarname 130 | tar.touch() 131 | 132 | with exexception: 133 | validate_prefix(prefix) 134 | -------------------------------------------------------------------------------- /mpiFileUtils/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import subprocess # nosec 4 | 5 | from mpiFileUtils.exceptions import mpiFileUtilsError, mpirunError 6 | 7 | logging.getLogger(__name__).addHandler(logging.NullHandler) 8 | 9 | 10 | class mpiFileUtils: 11 | """wrapper class for github.io/hpc/mpifileutils""" 12 | 13 | def __init__( 14 | self, 15 | np=int(12), # MPI ranks to start 16 | inst=False, # path to mpiFileUtils install 17 | mpirun=False, 18 | umask=False, 19 | verbose=False, 20 | ): 21 | 22 | self.kwargs = {} 23 | 24 | if not mpirun: 25 | raise mpirunError("mpirun required") 26 | else: 27 | self.args = [mpirun] 28 | 29 | if umask: 30 | # set umask for call to subprocess 31 | self.kwargs["preexec_fn"] = lambda: os.umask(umask) 32 | 33 | self.args.append("--oversubscribe") 34 | self.args += ["-np", f"{np}"] 35 | 36 | self.inst = inst 37 | self.verbose = verbose # save verbose for apply 38 | 39 | def apply(self): 40 | """execute wrapped application""" 41 | if self.verbose: 42 | self.args.append("--verbose") 43 | logging.debug(f"BLANK invoked as {self.args}") 44 | try: 45 | subprocess.run(self.args, check=True, **self.kwargs) # nosec 46 | except Exception as e: 47 | logging.exception(f"Problem running: {self.args} and {e}") 48 | raise mpiFileUtilsError(f"Problems {e}") 49 | 50 | 51 | class DRm(mpiFileUtils): 52 | """ 53 | Wrapper for drm. 54 | 55 | progress int seconds to print progress 56 | exe str alternative executable name 57 | dryrun bool Don't run just print 58 | """ 59 | 60 | def __init__(self, progress=False, exe="drm", dryrun=False, *kargs, **kwargs): 61 | super().__init__(*kargs, **kwargs) 62 | 63 | # add exeutable before options 64 | # BaseClass ( mpirun -np ... ) SubClass (exe { exe options } ) 65 | self.args.append(f"{self.inst}/bin/{exe}") 66 | 67 | if progress: 68 | self.args += ["--progress", str(progress)] 69 | 70 | if dryrun: 71 | self.args += ["--dryrun"] 72 | 73 | def scancache(self, cachein=False): 74 | """ 75 | Pass in .cache file as input list to use in purge. 76 | 77 | cachein str/pathlib 78 | """ 79 | if not cachein: 80 | logging.error("cachein required") 81 | raise mpiFileUtilsError("cache in required") 82 | else: 83 | self.args += ["--input", str(cachein)] 84 | 85 | self.apply() 86 | 87 | 88 | class DWalk(mpiFileUtils): 89 | """wrapper for dwalk""" 90 | 91 | def __init__( 92 | self, sort=False, filter=False, progress=False, exe="dwalk", *kargs, **kwargs 93 | ): 94 | super().__init__(*kargs, **kwargs) 95 | 96 | # add exeutable before options 97 | # BaseClass ( mpirun -np ... ) SubClass (exe { exe options } ) 98 | self.args.append(f"{self.inst}/bin/{exe}") 99 | 100 | if sort: 101 | self.args += ["--sort", str(sort)] 102 | 103 | if filter: 104 | # can be many options -atime +60 -user user etc 105 | self.args += filter 106 | 107 | if progress: 108 | self.args += ["--progress", str(progress)] 109 | 110 | def scanpath(self, path=False, textout=False, cacheout=False): 111 | """walk a path on filesystem""" 112 | 113 | self._setoutput(textout=textout, cacheout=cacheout) 114 | 115 | if not path: 116 | logging.error(f"path: {path} not set/exist") 117 | raise mpiFileUtilsError(f"path: {path} not set/exist") 118 | else: 119 | self.args.append(path) 120 | 121 | # actually run it 122 | self.apply() 123 | 124 | def scancache(self, cachein=False, textout=False, cacheout=False): 125 | """pass cache file from prior scan""" 126 | if not cachein: 127 | logging.error("cachein required") 128 | raise mpiFileUtilsError("cache in required") 129 | else: 130 | self.args += ["--input", str(cachein)] 131 | 132 | self._setoutput(textout=textout, cacheout=cacheout) 133 | # actually run it 134 | self.apply() 135 | 136 | def _setoutput(self, textout=False, cacheout=False): 137 | """stay DRY""" 138 | if textout: 139 | self.args += ["--text-output", f"{textout}"] 140 | self.textout = textout 141 | else: 142 | self.textout = None 143 | 144 | if cacheout: 145 | self.args += ["--output", f"{cacheout}"] 146 | self.cacheout = cacheout 147 | else: 148 | self.cacheout = None 149 | -------------------------------------------------------------------------------- /test/test_DwalkParser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | from contextlib import ExitStack as does_not_raise 4 | from unittest.mock import MagicMock 5 | 6 | import pytest 7 | from conftest import count_files_dir, count_lines_dir 8 | 9 | from archivetar import DwalkLine, DwalkParser 10 | 11 | 12 | @pytest.fixture 13 | def example_data(): 14 | """Example data setup""" 15 | tdata = ( 16 | pathlib.Path(__file__).parent.absolute() / "data" / "ident-example-support.txt" 17 | ) 18 | return tdata 19 | 20 | 21 | @pytest.fixture 22 | def parser(example_data): 23 | """test constructor with default options""" 24 | parser = DwalkParser(path=example_data) 25 | 26 | yield parser 27 | 28 | 29 | @pytest.mark.parametrize("kwargs", ["", {"path": "/tmp/absolutegarbage.txta"}]) 30 | def test_DwalkParser_doesntexist(kwargs): 31 | """test invalid paths""" 32 | with pytest.raises(BaseException): 33 | DwalkParser(**kwargs) 34 | 35 | 36 | @pytest.mark.parametrize( 37 | "kwargs,result,expex", 38 | [ 39 | ({"minsize": 1e9}, 6, does_not_raise()), 40 | ({}, 2, does_not_raise()), # default arg 10 GByte 41 | ( 42 | {"minsize": 1e12}, 43 | 2, 44 | does_not_raise(), 45 | ), # PB should only create 2 files (index + tar) 46 | ], 47 | ) 48 | def test_DwalkParser_tarlist(parser, tmp_path, kwargs, result, expex): 49 | """test DwalkParser.tarlist()""" 50 | os.chdir(tmp_path) 51 | print(tmp_path) 52 | for count, index, tarlist in parser.tarlist(**kwargs): 53 | print(f"Index -> {index}") 54 | print(f"tarlist -> {tarlist}") 55 | 56 | assert result == count_files_dir(tmp_path) 57 | assert 69 * 2 == count_lines_dir( 58 | tmp_path 59 | ) # sample data has 69 lines * 2 (index + tar) 60 | 61 | 62 | def test_DwalkParser_getpath(parser): 63 | """Test getting a path.""" 64 | path = parser.getpath() 65 | path = next(path) # advance generator 66 | print(path) 67 | assert ( 68 | path 69 | == b"/scratch/support_root/support/bennet/haoransh/DDA_2D_60x70_kulow_1.batch\n" 70 | ) # nosec 71 | 72 | 73 | @pytest.fixture 74 | def test_DwalkLine(monkeypatch): 75 | 76 | s = b"-rwxr-xr-x mmiranda support 1.220 GB Mar 4 2020 15:58 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libdeal_II.g.so" 77 | 78 | # patch out os.getcwd() to use that expected by test data 79 | mock_os_getcwd = MagicMock(spec=os.path) 80 | mock_os_getcwd.return_value = "/scratch/support_root/support" 81 | monkeypatch.setattr(os, "getcwd", mock_os_getcwd) 82 | 83 | line = DwalkLine(line=s) 84 | assert line.size == 1.220 * 1e9 85 | assert line.path == b"mmiranda/ouser/dmontiel/mg1/lib/libdeal_II.g.so" 86 | yield line 87 | 88 | 89 | @pytest.mark.parametrize( 90 | "line,result,size", 91 | [ 92 | ( 93 | b"-rwxr-xr-x mmiranda support 1.220 GB Mar 4 2020 15:58 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libdeal_II.g.so", 94 | b"mmiranda/ouser/dmontiel/mg1/lib/libdeal_II.g.so", 95 | 1.220 * 1e9, 96 | ), 97 | ( 98 | b"-rw-r--r-- joegrego okoues_root 875.000 B Jul 15 2020 12:55 /scratch/support_root/support/joegrego/CoreSequence/Data/0-9999/9000-9999/9800-9899/9810-9819/9814_Jm_2004-11-05_1/.AppleDouble/Icon\n", 99 | b"joegrego/CoreSequence/Data/0-9999/9000-9999/9800-9899/9810-9819/9814_Jm_2004-11-05_1/.AppleDouble/Icon\n", 100 | 875, 101 | ), 102 | ], 103 | ) 104 | def test_DwalkLine_parse(monkeypatch, line, result, size): 105 | 106 | s = line 107 | 108 | # patch out os.getcwd() to use that expected by test data 109 | mock_os_getcwd = MagicMock(spec=os.path) 110 | mock_os_getcwd.return_value = "/scratch/support_root/support" 111 | monkeypatch.setattr(os, "getcwd", mock_os_getcwd) 112 | 113 | line = DwalkLine(line=s) 114 | assert line.size == size 115 | assert line.path == result 116 | 117 | 118 | @pytest.mark.parametrize( 119 | "kwargs,result,expex", 120 | [ 121 | ({"units": b"B", "count": 909}, 909, does_not_raise()), 122 | ({"units": b"KB", "count": 1}, 1000, does_not_raise()), 123 | ({"units": b"MB", "count": 1}, 1000000, does_not_raise()), 124 | ({"units": b"GB", "count": 1}, 1e9, does_not_raise()), 125 | ({"units": b"TB", "count": 1}, 1e12, does_not_raise()), 126 | ({"units": b"KB", "count": 321.310}, 321310, does_not_raise()), # fractional 127 | ( 128 | {"units": "mB", "count": 1}, 129 | 1000000, 130 | pytest.raises(BaseException), 131 | ), # case maters 132 | ], 133 | ) 134 | def test_DwalkLine_normalizeunits(test_DwalkLine, kwargs, result, expex): 135 | with expex: 136 | count = test_DwalkLine._normalizeunits(**kwargs) 137 | assert count == result 138 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | Workflow 2 | ======== 3 | 4 | 5 | * Scan current directory using [mpiFileUtils](https://github.com/hpc/mpifileutils) 6 | * Optionally filter only files under `--size` 7 | * Build sub-tar's where each tar aims to be (before compression) to be at least `--tar-size` 8 | * Optionally delete or generate a purge delete list 9 | * Use `--remove-files` to delete files as they are added to tar 10 | * Re-hydrate an archived directory with `unarchivetar --prefix ` 11 | 12 | Singularity 13 | ----------- 14 | 15 | See [singularity](singularity) this is likely recomended for workstation and single user installs. 16 | 17 | Building archivetar 18 | ------------------- 19 | 20 | ### Requirements 21 | 22 | * Patched [mpiFileUtils](https://github.com/brockp/mpifileutils) `build.sh` is a shortcut 23 | * python3.6+ 24 | * `pip install pipenv` 25 | * `pipenv install` 26 | * `pipenv run pyinstaller bin/archivetar --collect-all globus_sdk -p . --onefile` # create executable no need for pipenv 27 | * `pipenv run pyinstaller bin/.archivetar --collect-all globus_sdk -p . --onefile` # create executable no need for pipenv 28 | * `pipenv run pyinstaller bin/archivepurge -p . --onefile` # create executable no need for pipenv 29 | * `pipenv run pyinstaller bin/unarchivetar -p . --onefile` # create executable no need for pipenv 30 | * `pipenv run pyinstaller bin/archivescan -p . --onefile` # create executable no need for pipenv 31 | 32 | 33 | #### Install using PIP 34 | 35 | Archivetar does use setuptools so it can be installed by `pip` to add to your global config. It does require manual setup of the external mpiFileUtils. 36 | 37 | * Need to still build mpiFileUtils and setup environment variables for configuration 38 | * `pip install git+https://github.com/brockpalen/archivetar.git` 39 | 40 | ### Configuration 41 | 42 | Archivetar uses environment variables for configuration 43 | 44 | ``` 45 | # Required 46 | AT_MPIFILEUTILS= 47 | AT_MPIRUN= 48 | 49 | # Optional 50 | AT_SOURCE= 51 | AT_DESTINATION= 52 | AT_TAR_SIZE= 53 | ``` 54 | 55 | Singularity containers already have required variables defined inside the 56 | container (e.g. see `singularity exec archivetar_master.sif env | grep ^AT_`), 57 | so you would only need to [re]define the optional ones to suite your site's 58 | needs. 59 | 60 | ### Runtime Configuration 61 | Runtime Environment Variables Used 62 | 63 | `AT_SLURM_OFFLOAD` 64 | * Determines if SLURM execution is enabled. If set (to any value), SLURM is used for job submission. 65 | 66 | `SLURM_JOB_ID` 67 | * Indicates if the script is running inside a SLURM job. 68 | 69 | `AT_TASKS` 70 | * Determines the number of cores for the SLURM job. Default: 8 71 | 72 | `AT_MEM` 73 | * Specifies memory allocation for the SLURM job. Default: 40G 74 | 75 | `AT_PAR` 76 | * Sets the partition for the SLURM job. Default: archive 77 | 78 | `AT_DEFAULT_TIME` 79 | * Sets the default wall time for the SLURM job. Default: 14-00:00:00 80 | 81 | `CLUSTER_NAME` 82 | * Used to construct the path for the cluster-specific maintenance epoch time file. 83 | 84 | `USER` 85 | * Used in the SLURM job name for identification 86 | 87 | `SLURM_ACCOUNT` 88 | * Used by Slurm as the account under the which the job is submitted if `AT_SLURM_OFFLOAD` is enabled. Defaults to the user's default Slurm subaccount. 89 | * Note, if the user's default subaccount is a class account -- (class accounts have walltime limits) then the user may experience an AssocMaxWallDurationPerJobLimit error. In such cases, its advised to pre-pend `SLURM_ACCOUNT= to your `archivetar` commands. 90 | ```srun: error: AssocMaxWallDurationPerJobLimit 91 | srun: error: Unable to allocate resources: Job violates accounting/QOS policy 92 | (job submit limit, user's size and/or time limits) 93 | ``` 94 | #### Dev options 95 | 96 | * pipenv install --dev 97 | * pipenv shell ( like venv activate ) 98 | * pytest 99 | 100 | ### Optional add ons 101 | 102 | Most are auto detected in the primary executable is in `$PATH` 103 | 104 | * lbzip2, pbzip (parallel bzip2) 105 | * pigz (parallel gzip) 106 | * pixz (parallel xz with tar index support) 107 | * lz4 (fast compressor/decompressor single threaded) 108 | 109 | Performance 110 | ----------- 111 | 112 | ### Filter large files with --size 113 | 114 | `--size` is is the minimum size a file has to be in to *not* be included in a tar. Files under this size are grouped in path order into to tar's that aim to be about `--tar-size` before compression. 115 | 116 | By skipping larger files that are often binary uncompressible data one can avoid all the IO copying the large files twice and the CPU time on compressing most of the data volume for little benefit for uncompressible data. For systems like Data Den and HPSS the backend tape systems will compress data at full network speed and thus is much faster than software compression tools. 117 | 118 | ### Parallel IO Requests 119 | 120 | Archivetar makes heavy use of MPI and Python Multiprocess package. The filesystem walk that finds all files and builds the list of files for each tar is `dwalk` from mpiFileUtils and uses MPI and `libcircle`. This if often 5-20x faster than normal filesystem walks. If ran in a batch job if the MPI picks up the environment it will also use multiple nodes. The rest of archivetar will not use multiple nodes. 121 | 122 | The python multiprocess module is used to stuff IO requests pipelines by running multiple `tar` processes at once. By default this is 1/4 the number of threads detected on the system but can also be set with `--tar-processes N`. This is very useful on network filesystems and flash storage systems where multiple requests can be serviced at once generally to higher iops. Multiple tar processes will help when the average file size is small, or for compressors like `xz` that struggle to use all cores in modern systems. 123 | 124 | Lastly the `SuperTar` package used by archivetar will auto detect if parallel compressors are available. Thus if data are compressible `tar` will be able to use multiple cores to speed compression of larger files from fast storage systems. 125 | -------------------------------------------------------------------------------- /USAGE.md: -------------------------------------------------------------------------------- 1 | Using Archivetar / Archivepurge / Unarchivetar 2 | ============================================== 3 | 4 | Quick Start 5 | ----------- 6 | 7 | Uses default settings and will create `archivetar-1.tar archivetar-2.tar ... 8 | archivetar-N.tar` This will tar every file as no `--size` cutoff is provided. 9 | 10 | ``` 11 | archivetar --prefix myarchive 12 | ``` 13 | 14 | ### Specify small file cutoff and size before creating a new tar 15 | 16 | ``` 17 | # the tar size is a minimum, so tars may be much larger than listed here. The 18 | # size is also the size before compression 19 | archivetar --prefix myarchive --size 20G --tar-size 10G 20 | ``` 21 | 22 | ### Expand archived directory 23 | 24 | ``` 25 | unarchivetar --prefix project1 26 | ``` 27 | 28 | ### Upload via Globus to Archive 29 | 30 | ``` 31 | archivetar --prefix project1 --source 32 | --destination --destination-path 33 | ``` 34 | 35 | Deleting files in Tars 36 | ------------------- 37 | 38 | `archivepurge` is a wrapper around `drm` from mpiFileUtils and is much faster 39 | than `rm`. It is intended to be used to remove all the files that were tard as 40 | part of `archivetar` but not any that were not. This is most commonly used to 41 | prep uploading a directory to an archive when not using Globus. 42 | 43 | Run `archivetar` with `--save-purge-list`. This will create an extra file that 44 | is passed to `archivepurge --purge-list .cache`. 45 | 46 | Archiving Full Volumes 47 | ---------------------- 48 | 49 | When trying to archive data from a volume without free space requires another 50 | volume with free space and using the `--bundle-path` option to redirect the 51 | creation of tars and indexes to an alternative storage location eg `scratch` or 52 | `tmp`. This option is safe to use with Globus transfers. 53 | 54 | ``` 55 | archivetar --prefix project1 --bundle-path /tmp/ 56 | ``` 57 | 58 | Backups with Archivetar 59 | ----------------------- 60 | 61 | *NOTE* archivetar is not meant to be a backup tool, but can fake it when used 62 | carefully. By using the filtering options `--atime` `--mtime` `--ctime` 63 | `archivetar` can select only files matching the filters specifically files that 64 | were changed. Incorrectly using settings can cause gaps resulting in data loss. 65 | We recommend repeating a full copy periodically to correct for any missing data. 66 | 67 | Work-flow, grab all files modified sense last run of `archivetar` and placing 68 | them in their own folder. 69 | 70 | Limitations, `archivetar` cannot track deletion of files. For users not using 71 | Globus (`--destination-path`) setting a size cutoff `--size` you can not tell 72 | what files larger than `--size` need to be copied. If using Globus those large 73 | files and the tars created are uploaded. 74 | 75 | ``` 76 | # initial full backup 77 | archivetar --prefix full-backup --source --destination 78 | --destination-path /path/on/dest/project/full/ 79 | 80 | # 7 day later grab all files changed (ctime) less than 8 days ago (small overlap 81 | # recommended) 82 | archivetar --prefix inc-backup-7day --source --destination 83 | --destination-path /path/on/dest/project/inc-7day/ --ctime -8 84 | ``` 85 | 86 | Archiving Specific Files (Filters) 87 | ---------------------------------- 88 | 89 | `archivetar` wraps 90 | [mpiFileUtils](https://mpifileutils.readthedocs.io/en/latest/). Thus we are 91 | able to use many of the options in `dfind` to filter the initial list of files 92 | to only archive a subset of data. 93 | 94 | *NOTE* If not using Globus to upload using the `--size` option you will not have 95 | an simple way without manually using `dcp` with the `over.cache` created by 96 | archivetar. So it is not recommended unless using Globus to upload the data to 97 | another location. 98 | 99 | Currently `archivetar` understands the following filters: 100 | 101 | ``` 102 | --atime --mtime --ctime --user --group 103 | ``` 104 | 105 | Multiple filters use logical and, eg `--atime +180 --user brockp` will archive 106 | only files accessed more than 180days ago AND owned by user `brockp`. 107 | 108 | Filters are only applied in the initial scan. They are ignored if used with the 109 | `--list` option. 110 | 111 | It is possible to use filters with `archivepurge` archive all files from a 112 | specific user and delete. Use `--save-list` rather than `--save-purge-list` 113 | because the first has ALL files to be archived, not just those in tars. 114 | 115 | ``` 116 | # find and archive all files owned by user `brockp` in given group space. 117 | # scan once and get meta-data but do not archive 118 | archivetar --prefix brockp-archive --user brockp --dryrun --save-list 119 | 120 | # actually archive using list created above update timestamp 121 | archivetar --prefix brockp-archive --list brockp-archive-.cache 122 | --source --destination --destination-path 123 | /path/on/dest/brockp-archive/ --size 1G 124 | 125 | # once all transfer above finish delete file in the initial list 126 | archivepurge --purge-list brockp-archive-.cache 127 | ``` 128 | 129 | Recovering Specific Folders (partial restores) 130 | ---------------------------------------------- 131 | 132 | Restoring sub folders is a multi-step process. 133 | 134 | 1. Pull back the `DONT_DELETE.txt` files 135 | 1. (optionally) pull back the folder with big files if archived with `--size 136 | ` 137 | 1. Find the needed tars with: `unarchivetar --prefix my-prefix --which-archive 138 | --folder "exactfolder/subfolder"` 139 | 1. Recall the required tars returned by the prior command 140 | 1. Expand: `unarchivetar --prefix my-prefix --folder "exactfolder/subfolder"` 141 | 142 | Folder names must be exact and not have a trailing `/`. You can optionally use 143 | `grep` and look around the `index` and `DONT_DELETE` files yourself if unsure of 144 | the exact name. 145 | 146 | Managing Globus Transfers 147 | ------------------------ 148 | 149 | By default `archivetar` will hand off transfers to globus to manage and not wait 150 | for them to finish. This is ok in most cases but not ones where you want to 151 | know the transfer is complete before modifying / deleting data or scripting 152 | multiple archives. 153 | 154 | The `--wait` option tells archivetar to wait for all Globus transfer to finish. 155 | It will also print print Globus performance information as it runs. 156 | 157 | The option `--rm-at-files` implies `--wait` for tars _only_ and not transfers 158 | created by the `--size` option. 159 | 160 | 161 | Environment Variables 162 | --------------------- 163 | 164 | Several `archivetar` settings are controlled by environment variables (handy 165 | for setting or overriding defaults, or for site-specific customization, e.g. 166 | inside Lmod modules or personal shell starup files). See the 167 | [configuration section of INSTALL.md](INSTALL.md#configuration) for details. 168 | -------------------------------------------------------------------------------- /archivetar/unarchivetar.py: -------------------------------------------------------------------------------- 1 | # Brock Palen 2 | # brockp@umich.edu 3 | # 7/2020 4 | # 5 | # Take a directory that was prepped by archivetar and expand 6 | 7 | import argparse 8 | import logging 9 | import multiprocessing as mp 10 | import pathlib 11 | import sys 12 | 13 | from natsort import natsorted 14 | 15 | from SuperTar import SuperTar 16 | 17 | 18 | def parse_args(args): 19 | """CLI Optoins takes sys.argv[1:].""" 20 | parser = argparse.ArgumentParser( 21 | description="Un-Archive a directory prepped by archivetar", 22 | epilog="Brock Palen brockp@umich.edu", 23 | ) 24 | parser.add_argument( 25 | "--dryrun", help="Print what would do but dont do it", action="store_true" 26 | ) 27 | parser.add_argument( 28 | "-p", 29 | "--prefix", 30 | help="prefix for tar eg prefix-1.tar prefix-2.tar etc, used to match tarx to expand", 31 | type=str, 32 | required=True, 33 | ) 34 | num_cores = round(mp.cpu_count() / 4) 35 | parser.add_argument( 36 | "--tar-processes", 37 | help=f"Number of parallel tars to invoke a once. Default {num_cores} is dynamic. Increase for iop bound not using compression", 38 | type=int, 39 | default=num_cores, 40 | ) 41 | 42 | parser.add_argument( 43 | "--folder", 44 | help="Extract/Search only the given folder similar to tar -xf a.tar folder/sub", 45 | type=str, 46 | default=None, 47 | ) 48 | parser.add_argument( 49 | "-w", 50 | "--which-archive", 51 | help="Using DONT_DELETE files when used with --folder report which archives will be needed for a given prefix", 52 | action="store_true", 53 | ) 54 | 55 | verbosity = parser.add_mutually_exclusive_group() 56 | verbosity.add_argument( 57 | "-v", 58 | "--verbose", 59 | help="Increase messages, including files as added", 60 | action="store_true", 61 | ) 62 | verbosity.add_argument( 63 | "-q", "--quiet", help="Decrease messages", action="store_true" 64 | ) 65 | 66 | tar_opts = parser.add_argument_group( 67 | title="Tar Options", description="Options to pass to underlying tar commands" 68 | ) 69 | tar_opts.add_argument( 70 | "--tar-verbose", 71 | help="Pass -v to tar (print files as tar'd)", 72 | action="store_true", 73 | ) 74 | tar_opts.add_argument( 75 | "-k", 76 | "--keep-old-files", 77 | help="don't replace existing files when extracting, treat them as errors", 78 | action="store_true", 79 | ) 80 | tar_opts.add_argument( 81 | "--skip-old-files", 82 | help="don't replace existing files when extracting, silently skip over them", 83 | action="store_true", 84 | ) 85 | tar_opts.add_argument( 86 | "--keep-newer-files", 87 | help="don't replace existing files that are newer than their archive copies", 88 | action="store_true", 89 | ) 90 | tar_opts.add_argument( 91 | "--tar-options", 92 | help="ADVANCED: pass arbitrary tar options to the tar command. eg. --tar-options='--sparse --xattr'", 93 | default=None, 94 | ) 95 | 96 | args = parser.parse_args(args) 97 | return args 98 | 99 | 100 | def find_prefix_files(prefix, path=None, suffix="tar"): 101 | """ 102 | Find all tar's in current directory matching pattern. 103 | 104 | match -####.tar or -#####.tar.* 105 | 106 | Return array of pathlibs.Path() 107 | """ 108 | if path: 109 | p = pathlib.Path(path) 110 | else: 111 | p = pathlib.Path(".") 112 | 113 | tars = natsorted(p.glob(f"{prefix}-[0-9]*.{suffix}*"), key=str) 114 | 115 | logging.debug(f"Found files prefix: {prefix} Suffix: {suffix} : {tars}") 116 | 117 | return tars 118 | 119 | 120 | def process(q, iolock): 121 | """process the archives to expand them if they exist on the queue""" 122 | while True: 123 | args = q.get() # tuple (t_args, e_args, archive) 124 | if args is None: 125 | break 126 | with iolock: 127 | t_args, e_args, archive = args 128 | tar = SuperTar( 129 | filename=archive, **t_args 130 | ) # call inside the lock to keep stdout pretty 131 | tar.extract( 132 | **e_args 133 | ) # this is the long running portion so let run outside the lock it prints nothing anyway 134 | with iolock: 135 | logging.info(f"Complete {tar.filename}") 136 | 137 | 138 | def main(argv): 139 | """ 140 | Main event loop phases. 141 | 142 | 1. Parse arguments and set logging 143 | 2. Find all tars that match -####.tar.* 144 | 3. Invoke tar in parallel with multiprocessing 145 | """ 146 | args = parse_args(argv[1:]) 147 | if args.quiet: 148 | logging.basicConfig(level=logging.WARNING) 149 | elif args.verbose: 150 | logging.basicConfig(level=logging.DEBUG) 151 | else: 152 | logging.basicConfig(level=logging.INFO) 153 | 154 | # alternative path when trying to find a folder in an archive, we need to know which archives to get 155 | if args.which_archive: 156 | if not args.folder: 157 | print("Selecting archives without --folder which is required") 158 | sys.exit(1) 159 | file_lists = find_prefix_files(args.prefix, suffix="DONT_DELETE") 160 | logging.info(f"Found {len(file_lists)} file lists with prefix {args.prefix}") 161 | 162 | matches = set() 163 | for file_list in file_lists: 164 | with open(file_list, "r") as file: 165 | for line_no, line in enumerate(file, start=1): 166 | # add a / to make a folder 167 | if line.startswith(args.folder + "/"): 168 | logging.debug( 169 | f"{file_list} : Match found at line {line_no}: {line}" 170 | ) 171 | matches.add(str(file_list)) 172 | 173 | print("\nRecall archives for the following:\n") 174 | for match in matches: 175 | print(match) 176 | 177 | # don't continue on 178 | sys.exit(0) 179 | 180 | # find all archives for prefix 181 | archives = find_prefix_files(args.prefix) 182 | logging.info(f"Found {len(archives)} archives with prefix {args.prefix}") 183 | 184 | # start parallel pool 185 | q = mp.Queue() 186 | iolock = mp.Lock() 187 | pool = mp.Pool(args.tar_processes, initializer=process, initargs=(q, iolock)) 188 | for archive in archives: 189 | logging.info(f"Expanding archive {archive}") 190 | 191 | if args.dryrun: 192 | logging.info("Dryrun requested will not expand") 193 | else: 194 | t_args = {} # arguments to tar constructor 195 | if args.tar_verbose: 196 | t_args["verbose"] = True 197 | if args.folder: 198 | t_args["path"] = args.folder 199 | if args.tar_options: 200 | t_args["extra_options"] = args.tar_options.split() 201 | 202 | e_args = {} # arguments to extract() 203 | if args.keep_old_files: 204 | e_args["keep_old_files"] = True 205 | if args.skip_old_files: 206 | e_args["skip_old_files"] = True 207 | if args.keep_newer_files: 208 | e_args["keep_newer_files"] = True 209 | 210 | q.put((t_args, e_args, archive)) # put work on the queue 211 | 212 | for _ in range(args.tar_processes): # tell workers we're done 213 | q.put(None) 214 | 215 | pool.close() 216 | pool.join() 217 | -------------------------------------------------------------------------------- /SuperTar/test/test_SuperTar.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import shutil 4 | import subprocess 5 | import tarfile 6 | from contextlib import ExitStack as does_not_raise 7 | from pathlib import Path 8 | from pprint import pprint as pp 9 | from unittest.mock import Mock 10 | 11 | import pytest 12 | from conftest import count_files_dir 13 | 14 | from SuperTar import SuperTar, what_comp 15 | from SuperTar.exceptions import SuperTarMissmatchedOptions 16 | 17 | 18 | @pytest.mark.parametrize( 19 | "kwargs,expex", 20 | [ 21 | ({"filename": "mytar.tar"}, does_not_raise()), 22 | ({}, pytest.raises(BaseException)), # missing required filename= kwarg 23 | ], 24 | ) 25 | def test_SuperTar(kwargs, expex): 26 | with expex: 27 | tar = SuperTar(**kwargs) 28 | pp(tar._flags) 29 | 30 | 31 | @pytest.mark.parametrize( 32 | "kwargs,expex,expected_flags", 33 | [ 34 | ({"filename": "mytar.tar"}, does_not_raise(), ["tar"]), 35 | ( 36 | {"filename": "mytar.tar", "verbose": True}, 37 | does_not_raise(), 38 | ["tar", "--verbose"], 39 | ), 40 | ( 41 | {"filename": "mytar.tar", "extra_options": ["--sparse", "--xattr"]}, 42 | does_not_raise(), 43 | ["tar", "--sparse", "--xattr"], 44 | ), 45 | ], 46 | ) 47 | def test_SuperTar_options_test(kwargs, expex, expected_flags): 48 | """Check handeling of options being passed show up in the command. 49 | 50 | kwargs : options to pass to SuperTar 51 | expec : expected exception 52 | expected_flags : The expected built out tar command to that point 53 | """ 54 | 55 | with expex: 56 | tar = SuperTar(**kwargs) 57 | pp(tar._flags) 58 | if expected_flags is not None: 59 | assert tar._flags == expected_flags 60 | 61 | 62 | @pytest.mark.parametrize( 63 | "kwargs,kresult,kwresult,expex", 64 | [ 65 | ( 66 | {"verbose": True, "filename": "mytar.tar"}, 67 | ["tar", "--create", "--file", "mytar.tar", "--verbose"], 68 | {"check": True}, 69 | does_not_raise(), 70 | ), 71 | ( 72 | {"filename": "mytar.tar"}, 73 | ["tar", "--create", "--file", "mytar.tar"], 74 | {"check": True}, 75 | does_not_raise(), 76 | ), 77 | ], 78 | ) 79 | @pytest.mark.xfail 80 | def test_SuperTar_opts_addfromfile(monkeypatch, kwargs, kresult, kwresult, expex): 81 | mock = Mock(spec=subprocess) 82 | mock.return_value = 0 83 | 84 | monkeypatch.setattr(subprocess, "run", mock) 85 | 86 | # actual test code 87 | tar = SuperTar(**kwargs) 88 | tar.addfromfile("/does/not/exist") 89 | tar.archive() 90 | kresult.append("--files-from=/does/not/exist") # added from .addfromfile() 91 | mock.assert_called_once_with(kresult, **kwresult) 92 | 93 | 94 | @pytest.mark.parametrize( 95 | "kwargs,mreturn,expex", 96 | [ 97 | ({"compress": "GZIP"}, False, BaseException), # GZIP requested but none found 98 | ({"compress": "NOT REAL"}, "/usr/bin/gzip", BaseException), # No real parser 99 | ], 100 | ) 101 | def test_SuperTar_ops_comp(monkeypatch, kwargs, mreturn, expex): 102 | """check how compressions handlers behave when not found or not exist""" 103 | 104 | mock = Mock(spec=shutil) 105 | mock.return_value = mreturn 106 | monkeypatch.setattr(shutil, "which", mock) 107 | 108 | with pytest.raises(expex): 109 | SuperTar(**kwargs) 110 | 111 | 112 | @pytest.mark.parametrize( 113 | "infile,expcomp", 114 | [ 115 | ("testfile.tar.gz", "GZIP"), 116 | ("testfile.tar.GZ", "GZIP"), # Check mixed case 117 | ("testfile.tar.tgz", "GZIP"), 118 | ("testfile.tar.bz2", "BZ2"), 119 | ("testfile.tar.xz", "XZ"), 120 | ("testfile.tar.lzma", "XZ"), 121 | ("testfile.tar.lz4", "LZ4"), 122 | ("testfile.tar.zst", "ZSTD"), 123 | ("testfile.tar", None), 124 | ], 125 | ) 126 | def test_what_comp(tmp_path, infile, expcomp): 127 | """Check type of compressoin a given file uses""" 128 | filename = tmp_path / infile 129 | 130 | # create actual tar, only needed for last test 131 | tar = tarfile.open(filename, "w") 132 | tar.close() 133 | comptype = what_comp(filename) 134 | 135 | assert comptype == expcomp 136 | 137 | 138 | def test_what_comp_not_tar(tmp_path): 139 | """Check we get exception if a nontar file is passed to what_comp()""" 140 | filename = tmp_path / "junkfile.tar" 141 | with open(filename, "a") as f: 142 | f.write("test data but not a real tar!") 143 | 144 | with pytest.raises(BaseException, match=r"has unknown compression or not tar file"): 145 | what_comp(filename) 146 | 147 | 148 | @pytest.fixture 149 | def junk_tar(tmp_path): 150 | """Extract a tar file""" 151 | 152 | # set CWD 153 | os.chdir(tmp_path) 154 | filename = Path("junktar.tar.gz") 155 | tar = tarfile.open(filename, "w:gz") 156 | 157 | # add some fake files 158 | a = tmp_path / "a" 159 | a.touch() 160 | tar.add("a") 161 | b = tmp_path / "b" 162 | b.touch() 163 | tar.add("b") 164 | 165 | # create test tar w 2 files 166 | tar.close() 167 | 168 | # remove files 169 | a.unlink() 170 | b.unlink() 171 | 172 | return filename.resolve() 173 | 174 | 175 | def test_SuperTar_extract(tmp_path, junk_tar): 176 | """ 177 | Basic tar extraction test nothing fancy just untar the example and count number of files after 178 | """ 179 | os.chdir(tmp_path) 180 | # Try to extract 181 | st = SuperTar(filename=junk_tar, verbose=True) 182 | st.extract() 183 | 184 | num_files = count_files_dir(tmp_path) 185 | assert num_files == 3 # two files in tar + tar 186 | 187 | 188 | @pytest.mark.parametrize( 189 | "keyword,cli,expex", 190 | [ 191 | ({"keep_old_files": True}, "--keep-old-files", does_not_raise()), 192 | ({"skip_old_files": True}, "--skip-old-files", does_not_raise()), 193 | ({"keep_newer_files": True}, "--keep-newer-files", does_not_raise()), 194 | ], 195 | ) 196 | def test_SuperTar_extract_preserve(tmp_path, junk_tar, caplog, keyword, cli, expex): 197 | """ 198 | Tar extraction with multiple extract only options 199 | 200 | SuperTar.extract() will log.DEBUG the flags check it's included 201 | """ 202 | os.chdir(tmp_path) 203 | 204 | with caplog.at_level(logging.DEBUG): 205 | # Try to extract 206 | st = SuperTar(filename=junk_tar, verbose=True) 207 | 208 | with expex: 209 | st.extract(**keyword) 210 | 211 | # check option eg --keep-old-files is in log text 212 | assert cli in caplog.text 213 | 214 | num_files = count_files_dir(tmp_path) 215 | assert num_files == 3 # two files in tar + tar 216 | 217 | 218 | @pytest.mark.parametrize( 219 | "keyword,cli,expex", 220 | [ 221 | ( 222 | {"keep_newer_files": True, "skip_old_files": True}, 223 | "--keep-newer-files", 224 | pytest.raises(SuperTarMissmatchedOptions), 225 | ) 226 | ], 227 | ) 228 | def test_SuperTar_extract_preserve_errors( 229 | tmp_path, junk_tar, caplog, keyword, cli, expex 230 | ): 231 | """ 232 | Tar extraction with multiple extract only options 233 | 234 | SuperTar.extract() will log.DEBUG the flags check it's included 235 | """ 236 | os.chdir(tmp_path) 237 | 238 | with caplog.at_level(logging.DEBUG): 239 | # Try to extract 240 | st = SuperTar(filename=junk_tar, verbose=True) 241 | 242 | with expex: 243 | st.extract(**keyword) 244 | 245 | # check option eg --keep-old-files is in log text 246 | # assert cli in caplog.text # can't use as exception thrown 247 | 248 | num_files = count_files_dir(tmp_path) 249 | assert num_files == 1 # no untar, origonal only 250 | -------------------------------------------------------------------------------- /test/data/ident-example-support.txt: -------------------------------------------------------------------------------- 1 | -rw-r--r-- bennet support 578.000 B Oct 22 2019 09:35 /scratch/support_root/support/bennet/haoransh/DDA_2D_60x70_kulow_1.batch 2 | -rw-r--r-- bennet support 823.000 B Oct 22 2019 09:38 /scratch/support_root/support/bennet/haoransh/slurm-18847_1.out 3 | -rw-r--r-- bennet support 823.000 B Oct 22 2019 09:38 /scratch/support_root/support/bennet/haoransh/slurm-18847_2.out 4 | -rw-r--r-- bennet support 823.000 B Oct 22 2019 09:38 /scratch/support_root/support/bennet/haoransh/slurm-18847_3.out 5 | -rw-r--r-- bennet support 823.000 B Oct 22 2019 09:38 /scratch/support_root/support/bennet/haoransh/slurm-18847_4.out 6 | -rw-r--r-- bennet support 4.138 KB Oct 22 2019 09:38 /scratch/support_root/support/bennet/haoransh/test_DDA_hs_PBC_period_flux_60x70_kulow_1.m 7 | -rw-r--r-- mmiranda support 8.004 KB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/Mg_Precip_test_7/.DS_Store 8 | -rwxr-xr-x mmiranda support 4.357 MB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/Mg_Precip_test_7/main 9 | -rw-r--r-- mmiranda support 357.000 B Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/Mg_Precip_test_7/myjob.e173709 10 | -rw-r--r-- mmiranda support 321.000 B Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/Mg_Precip_test_7/myjob.e174464 11 | -rw-r--r-- mmiranda support 357.000 B Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/Mg_Precip_test_7/myjob.e174526 12 | -rw-r--r-- mmiranda support 8.571 KB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/Mg_Precip_test_7/myjob.o173709 13 | -rw-r--r-- mmiranda support 8.571 KB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/Mg_Precip_test_7/myjob.o174464 14 | -rw-r--r-- mmiranda support 8.571 KB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/Mg_Precip_test_7/myjob.o174526 15 | -rw-r--r-- mmiranda support 10.766 KB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/Mg_Precip_test_7/parameters.in 16 | -rw-r--r-- mmiranda support 549.000 B Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/Mg_Precip_test_7/submit_lh.sh 17 | -rw-r--r-- mmiranda support 8.004 KB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/.DS_Store 18 | -rw-r--r-- mmiranda support 607.000 B Mar 5 2020 16:37 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/go.sbat 19 | -rwxr-xr-x mmiranda support 1.220 GB Mar 4 2020 15:58 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libdeal_II.g.so 20 | -rwxr-xr-x mmiranda support 1.220 GB Mar 4 2020 15:58 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libdeal_II.g.so.9.2.0-pre 21 | -rwxr-xr-x mmiranda support 178.935 MB Mar 4 2020 15:58 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libdeal_II.so 22 | -rwxr-xr-x mmiranda support 178.935 MB Mar 4 2020 15:58 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libdeal_II.so.9.2.0-pre 23 | -rwxr-xr-x mmiranda support 758.773 KB Mar 4 2020 15:55 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libp4est-2.2.so 24 | -rw-r--r-- mmiranda support 1.089 MB Mar 4 2020 15:55 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libp4est.a 25 | -rwxr-xr-x mmiranda support 1.000 KB Mar 4 2020 15:55 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libp4est.la 26 | -rwxr-xr-x mmiranda support 758.773 KB Mar 4 2020 15:55 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libp4est.so 27 | -rwxr-xr-x mmiranda support 63.391 MB Mar 5 2020 16:35 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libpetsc.so 28 | -rwxr-xr-x mmiranda support 63.391 MB Mar 5 2020 16:35 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libpetsc.so.3.12 29 | -rwxr-xr-x mmiranda support 63.391 MB Mar 5 2020 16:35 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libpetsc.so.3.12.1 30 | -rwxr-xr-x mmiranda support 206.742 KB Mar 4 2020 15:55 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libsc-2.2.so 31 | -rw-r--r-- mmiranda support 297.209 KB Mar 4 2020 15:55 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libsc.a 32 | -rwxr-xr-x mmiranda support 956.000 B Mar 4 2020 15:55 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libsc.la 33 | -rwxr-xr-x mmiranda support 206.742 KB Mar 4 2020 15:55 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/lib/libsc.so 34 | -rwxr-xr-x mmiranda support 4.357 MB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/main 35 | -rw-r--r-- mmiranda support 357.000 B Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/myjob.e173709 36 | -rw-r--r-- mmiranda support 321.000 B Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/myjob.e174464 37 | -rw-r--r-- mmiranda support 357.000 B Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/myjob.e174526 38 | -rw-r--r-- mmiranda support 8.571 KB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/myjob.o173709 39 | -rw-r--r-- mmiranda support 8.571 KB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/myjob.o174464 40 | -rw-r--r-- mmiranda support 8.571 KB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/myjob.o174526 41 | -rw-r--r-- mmiranda support 10.766 KB Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/parameters.in 42 | -rw-r--r-- mmiranda support 7.700 KB Mar 4 2020 15:40 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/slurm-195704.out 43 | -rw-r--r-- mmiranda support 7.317 KB Mar 4 2020 16:09 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/slurm-195711.out 44 | -rw-r--r-- mmiranda support 2.951 KB Mar 6 2020 00:37 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/slurm-203462.out 45 | -rw-r--r-- mmiranda support 549.000 B Mar 4 2020 15:28 /scratch/support_root/support/mmiranda/ouser/dmontiel/mg1/submit_lh.sh 46 | -rw-r--r-- mmiranda support 64.989 KB Nov 18 2019 21:09 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/CONTCAR 47 | -rw-r--r-- mmiranda support 14.517 KB Nov 18 2019 14:50 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/CONTCAR_2x2x2.POSCAR.vasp 48 | -rw-r--r-- mmiranda support 167.000 B Nov 18 2019 15:01 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/DOSCAR 49 | -rw-r--r-- mmiranda support 193.000 B Nov 18 2019 15:01 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/EIGENVAL 50 | -rw-r--r-- mmiranda support 132.000 B Nov 18 2019 15:00 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/IBZKPT 51 | -rw-r--r-- mmiranda support 863.000 B Nov 18 2019 14:50 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/INCAR 52 | -rw-r--r-- mmiranda support 42.000 B Nov 18 2019 14:50 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/KPOINTS 53 | -rw-r--r-- mmiranda support 321.310 KB Nov 18 2019 21:09 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/OSZICAR 54 | -rw-r--r-- mmiranda support 46.624 MB Nov 18 2019 21:09 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/OUTCAR 55 | -rw-r--r-- mmiranda support 234.000 B Nov 18 2019 15:01 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/PCDAT 56 | -rw-r--r-- mmiranda support 64.989 KB Nov 18 2019 14:50 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/POSCAR 57 | -rw-r--r-- mmiranda support 445.769 KB Nov 18 2019 14:50 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/POTCAR 58 | -rw-r--r-- mmiranda support 255.306 KB Nov 18 2019 21:09 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/REPORT 59 | -rw-r--r-- mmiranda support 4.717 MB Nov 18 2019 21:09 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/XDATCAR 60 | -rw-r--r-- mmiranda support 371.000 B Nov 18 2019 15:00 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/run_vasp.slurm 61 | -rw-r--r-- mmiranda support 2.718 KB Nov 18 2019 14:50 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/slurm-18832.out 62 | -rw-r--r-- mmiranda support 909.000 B Nov 18 2019 15:00 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/slurm-28475.out 63 | -rw-r--r-- mmiranda support 829.000 B Nov 18 2019 15:01 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/slurm-28476.out 64 | -rw-r--r-- mmiranda support 582.000 B Nov 18 2019 14:50 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/stdout 65 | -rw-r--r-- mmiranda support 0.000 B Nov 18 2019 14:55 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/vasp.log 66 | -rw-r--r-- mmiranda support 17.388 MB Nov 18 2019 21:09 /scratch/support_root/support/mmiranda/ouser/jgsmi/40/vasprun.xml 67 | -rwxr-xr-x mmiranda support 4.357 MB Mar 5 2020 16:37 /scratch/support_root/support/mmiranda/tmp/dmontiel/mg1/main 68 | -rw-r--r-- mmiranda support 10.766 KB Mar 5 2020 16:37 /scratch/support_root/support/mmiranda/tmp/dmontiel/mg1/parameters.in 69 | -rw-r--r-- msbritt support 18.000 B Aug 14 2019 17:04 /scratch/support_root/support/msbritt/testout 70 | -------------------------------------------------------------------------------- /SuperTar/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import shutil 3 | import subprocess # nosec 4 | import tarfile 5 | 6 | from SuperTar.exceptions import SuperTarMissmatchedOptions 7 | 8 | logging.getLogger(__name__).addHandler(logging.NullHandler) 9 | 10 | 11 | def find_gzip(): 12 | """find pigz if installed in PATH otherwise return gzip""" 13 | pigz = shutil.which("pigz") 14 | gzip = shutil.which("gzip") 15 | if pigz: 16 | return pigz 17 | elif gzip: 18 | return gzip 19 | else: 20 | raise Exception("gzip compression but no gzip or pigz found in PATH") 21 | 22 | 23 | def find_bzip(): 24 | """find pigz if installed in PATH otherwise return gzip""" 25 | lbzip2 = shutil.which("lbzip2") 26 | pbzip2 = shutil.which("pbzip2") 27 | bzip2 = shutil.which("bzip2") 28 | if lbzip2: 29 | return lbzip2 30 | elif pbzip2: 31 | return pbzip2 32 | elif bzip2: 33 | return bzip2 34 | else: 35 | raise Exception("gzip compression but no gzip or pigz found in PATH") 36 | 37 | 38 | def find_lz4(): 39 | """find pixz if installed in PATH otherwise return xz""" 40 | lz4 = shutil.which("lz4") 41 | if lz4: 42 | return lz4 43 | else: 44 | raise Exception("lzma/xz compression but no pixz or xz found in PATH") 45 | 46 | 47 | def find_xz(): 48 | """find pixz if installed in PATH otherwise return xz""" 49 | pixz = shutil.which("pixz") 50 | xz = shutil.which("xz") 51 | if pixz: 52 | return pixz 53 | elif xz: 54 | return xz 55 | else: 56 | raise Exception("lzma/xz compression but no pixz or xz found in PATH") 57 | 58 | 59 | def find_lzma(): 60 | """alias for find_xz()""" 61 | return find_xz() 62 | 63 | 64 | def find_zstd(): 65 | """find zstd if installed""" 66 | zstdmt = shutil.which("zstdmt") 67 | zstd = shutil.which("zstd") 68 | if zstdmt: 69 | return zstdmt 70 | if zstd: 71 | return zstd 72 | else: 73 | raise Exception("zstd/zst compression but no zstd found in PATH") 74 | 75 | 76 | def what_comp(filename): 77 | """ 78 | Return what compression type based on file suffix passed. 79 | 80 | Currently based on suffix could be updated to be based on FileMagic 81 | Currently assumes input is a pathlib 82 | 83 | Current known versions: 84 | GZIP .gz or .tgz 85 | BZ2 .bz2 86 | XZ .xz or .lzma 87 | LZ4 .lz4 88 | None None 89 | """ 90 | 91 | # Grab current suffix, force lower case 92 | suffix = filename.suffix 93 | suffix = suffix.lower() 94 | 95 | # GZIP 96 | if suffix in [".gz", ".tgz"]: 97 | return "GZIP" 98 | elif suffix in [".bz2"]: 99 | return "BZ2" 100 | elif suffix in [".xz", ".lzma"]: 101 | return "XZ" 102 | elif suffix in [".lz4"]: 103 | return "LZ4" 104 | elif suffix in [".zst"]: 105 | return "ZSTD" 106 | 107 | # check that it's an actual tar file 108 | elif tarfile.is_tarfile(filename): 109 | # is a tar just without compression so continue 110 | return None 111 | else: 112 | # we don't know what this file is throw 113 | raise Exception(f"{filename} has unknown compression or not tar file") 114 | 115 | 116 | class SuperTar: 117 | """tar wrapper class for high speed""" 118 | 119 | # requires gnu tar 120 | def __init__( 121 | self, 122 | filename=False, # path to file eg output.tar 123 | compress=False, # compress or not False | GZIP | BZ2 | LZ4 124 | verbose=False, # print extra information when arching 125 | purge=False, # pass --remove-files 126 | ignore_failed_read=False, # pass --ignore-failed-read when creating files, does nothing on extract 127 | dereference=False, # pass --dereference when creating files, does nothing on extract 128 | path=None, # path to extract TODO: (not currently used for compress) 129 | extra_options=None, # list of additional options to pass to GNU tar 130 | ): 131 | 132 | if not filename: # filename needed eg tar --file 133 | raise Exception("no filename given for tar") 134 | 135 | self.filename = filename 136 | self._purge = purge 137 | self._compress = compress 138 | self._ignore_failed_read = ignore_failed_read 139 | self._dereference = dereference 140 | self._path = path 141 | self.extra_options = extra_options if extra_options is not None else [] 142 | 143 | # set inital tar options, 144 | self._flags = ["tar"] 145 | 146 | # add any extra flags passed with --tar-options="" 147 | # add now so they apply to both archive() and extract() 148 | # it is on the caller to use only valid tar options as this is an advanced feature 149 | self._flags.extend(self.extra_options) 150 | 151 | if verbose: 152 | self._flags.append("--verbose") 153 | self._verbose = True 154 | 155 | def _setComp(self, compress): 156 | # if a compression option is given set the suffix (unused in extraction) 157 | # Set the compression program 158 | self.compsuffix = None 159 | if compress == "GZIP": 160 | self._flags.append(f"--use-compress-program={find_gzip()}") 161 | self.compsuffix = ".gz" 162 | elif compress == "BZ2": 163 | self._flags.append(f"--use-compress-program={find_bzip()}") 164 | self.compsuffix = ".bz2" 165 | elif compress == "XZ": 166 | self._flags.append(f"--use-compress-program={find_xz()}") 167 | self.compsuffix = ".xz" 168 | elif compress == "LZ4": 169 | self._flags.append(f"--use-compress-program={find_lz4()}") 170 | self.compsuffix = ".lz4" 171 | elif compress == "ZSTD": 172 | self._flags.append(f"--use-compress-program={find_zstd()}") 173 | self.compsuffix = ".zst" 174 | elif compress: 175 | raise Exception("Invalid Compressor {compress}") 176 | 177 | def addfromfile(self, path): 178 | """Load list of files from file eg tar -cvf output.tar --files-from=.""" 179 | # check that were not told to use a path 180 | if self._path: 181 | raise Exception("cannot provide a path and use addfromfile()") 182 | 183 | self._flags.append(f"--files-from={path}") 184 | 185 | def addfrompath(self, path): 186 | """load from fs path eg tar -cvf output.tar /path/to/tar""" 187 | pass 188 | 189 | def archive(self): 190 | """actually kick off the tar""" 191 | # we are creating a tar 192 | self._flags += ["--create"] 193 | 194 | # set compression options suffix and program if set 195 | self._setComp(self._compress) 196 | 197 | # are we deleting as we go? 198 | if self._purge: 199 | self._flags.append("--remove-files") 200 | if self.compsuffix: 201 | self.filename = f"{self.filename}{self.compsuffix}" 202 | 203 | # are we ignoring files that are deleted before we run? 204 | if self._ignore_failed_read: 205 | self._flags.append("--ignore-failed-read") 206 | 207 | # grab what symlinks point to and not the links themselves 208 | if self._dereference: 209 | self._flags.append("--dereference") 210 | 211 | self._flags += ["--file", self.filename] 212 | 213 | logging.debug(f"Tar invoked with: {self._flags}") 214 | subprocess.run(self._flags, check=True) # nosec 215 | 216 | def extract( 217 | self, skip_old_files=False, keep_old_files=False, keep_newer_files=False 218 | ): 219 | """Extract the tar listed""" 220 | # we are extracting an existing tar 221 | self._flags += ["--extract"] 222 | self._flags += ["--file", str(self.filename)] 223 | 224 | # These options must be exclusive, if more than one is given outcome is ambigous 225 | preserve_ops = 0 226 | if skip_old_files: # --skip-old-files don't replace files that already exist 227 | self._flags += ["--skip-old-files"] 228 | preserve_ops += 1 229 | if ( 230 | keep_old_files 231 | ): # --keep-old-files don't replace files that already exist and error 232 | self._flags += ["--keep-old-files"] 233 | preserve_ops += 1 234 | if ( 235 | keep_newer_files 236 | ): # --keep-newer-files don't replace files that are newer than archive 237 | self._flags += ["--keep-newer-files"] 238 | preserve_ops += 1 239 | 240 | if preserve_ops > 1: 241 | raise SuperTarMissmatchedOptions( 242 | "skip_old_files keep_old_files keep_newer_files are exclusive options and cannot be combined" 243 | ) 244 | 245 | # set compress program 246 | self._setComp(what_comp(self.filename)) 247 | 248 | # add path last if set 249 | if self._path: 250 | self._flags.append(str(self._path)) 251 | 252 | logging.debug(f"Tar invoked with: {self._flags}") 253 | 254 | try: 255 | subprocess.run(self._flags, check=True) # nosec 256 | except Exception as e: 257 | logging.error(f"{e}") 258 | -------------------------------------------------------------------------------- /archivetar/archive_args.py: -------------------------------------------------------------------------------- 1 | """archivetar CLI arguments parsing.""" 2 | import argparse 3 | import multiprocessing as mp 4 | import pathlib 5 | import re 6 | 7 | from environs import Env 8 | 9 | # load in defaults from environment 10 | env = Env() 11 | 12 | 13 | def stat_check(string): 14 | """Validate input of filter string for values like atime, mtime, ctime. 15 | 16 | These shuld be integer values prefixed by a + or a - only with no spaces 17 | 18 | Eg: 19 | 1 20 | 5 21 | -10 22 | +20 23 | 24 | Invalid: 25 | 1.5 26 | abc 27 | $#@ 28 | + 5 29 | a5 30 | 31 | Return: 32 | String passed in 33 | 34 | Rasies ValueError if check doesn't pass 35 | """ 36 | matched = re.match(r"^[+,-]?\d+$", string) 37 | if bool(matched): 38 | return string 39 | raise ValueError("Integers only, optionally prefixed with + or -") 40 | 41 | 42 | def unix_check(string): 43 | """Validate input for username and group names which should be alpha numeric only with no spaces or special chars not allowed in user/group names. 44 | 45 | Borrowed from: https://unix.stackexchange.com/questions/157426/what-is-the-regex-to-validate-linux-users 46 | 47 | Should match 31 char unix usernames and groups 48 | """ 49 | matched = re.match(r"^[a-z_]([a-z0-9_-]{0,31}|[a-z0-9_-]{0,30}\$)$", string) 50 | if bool(matched): 51 | return string 52 | raise ValueError("Integers only, optionally prefixed with + or -") 53 | 54 | 55 | def file_check(string): 56 | """Check if the user provided list file actaully exists.""" 57 | path = pathlib.Path(string) 58 | if path.is_file(): 59 | return path 60 | raise ValueError(f"file {string} not found") 61 | 62 | 63 | def parse_args(args): 64 | """CLI options. 65 | 66 | Several options have option groups to logically bundle like commands. 67 | """ 68 | parser = argparse.ArgumentParser( 69 | description="Prepare a directory for archive", 70 | epilog="Brock Palen brockp@umich.edu", 71 | ) 72 | parser.add_argument( 73 | "--dryrun", 74 | help="Print what would do but don't do it, aditional --dryrun increases how far the script runs\n 1 = Walk Filesystem and stop, 2 = Filter and create sublists", 75 | action="count", 76 | default=0, 77 | ) 78 | parser.add_argument( 79 | "-p", 80 | "--prefix", 81 | help="prefix for tar, e.g. prefix-1.tar prefix-2.tar etc", 82 | type=str, 83 | required=True, 84 | ) 85 | 86 | parser.add_argument( 87 | "-s", 88 | "--size", 89 | help="Cutoff size for files include (eg. 10G 100M)", 90 | type=str, 91 | default=None, 92 | ) 93 | tar_size = env.str("AT_TAR_SIZE", default="100G") 94 | parser.add_argument( 95 | "-t", 96 | "--tar-size", 97 | help=f"Target tar size before options (eg. 10G 1T). Can be set with AT_TAR_SIZE environment variable. Default: {tar_size}", 98 | type=str, 99 | default=tar_size, 100 | ) 101 | num_cores = round(mp.cpu_count() / 4) 102 | parser.add_argument( 103 | "--tar-processes", 104 | help=f"Number of parallel tars to invoke a once. Default {num_cores} is dynamic. Increase for iop bound not using compression", 105 | type=int, 106 | default=num_cores, 107 | ) 108 | parser.add_argument( 109 | "--save-purge-list", 110 | help="Save an mpiFileUtils purge list -.under.cache for files saved in tars, used to delete files under --size after archive process. Use as alternative to --remove-files", 111 | action="store_true", 112 | ) 113 | parser.add_argument( 114 | "--bundle-dir", 115 | "--bundle-path", 116 | help="Alternative path to bundle tars and indexes. Useful if directory being archived is at or over quota and cannot write tars to current location. Defaults to CWD.", 117 | default=None, 118 | ) 119 | 120 | build_list_args = parser.add_mutually_exclusive_group() 121 | build_list_args.add_argument( 122 | "--save-list", 123 | help="Save the initial scan of target archive files (including filters)", 124 | action="store_true", 125 | ) 126 | build_list_args.add_argument( 127 | "--list", 128 | help="Provide a prior scan from --dryrun --save-list", 129 | type=file_check, 130 | ) 131 | 132 | verbosity = parser.add_mutually_exclusive_group() 133 | verbosity.add_argument( 134 | "-v", 135 | "--verbose", 136 | help="Increase messages, including files as added", 137 | action="store_true", 138 | ) 139 | verbosity.add_argument( 140 | "-q", "--quiet", help="Decrease messages", action="store_true" 141 | ) 142 | 143 | filter_ops = parser.add_argument_group( 144 | title="Filtering Options", 145 | description="Options to limit files included in the archive similar to options for unix find. NOTICE: These should be used with care. Improper mixing of filter and the --size option could result in unintended behavior if used without Globus.", 146 | ) 147 | filter_ops.add_argument( 148 | "--atime", 149 | metavar="N", 150 | type=stat_check, 151 | help="File was last accessed exactly N days ago. Use + for more than and - for less than N days ago (not inclusive)", 152 | ) 153 | filter_ops.add_argument( 154 | "--mtime", 155 | metavar="N", 156 | type=stat_check, 157 | help="File data was last modified exactly N days ago. Use + for more than and - for less than N days ago (not inclusive)", 158 | ) 159 | filter_ops.add_argument( 160 | "--ctime", 161 | metavar="N", 162 | type=stat_check, 163 | help="File status was last modified exactly N days ago. Use + for more than and - for less than N days ago (not inclusive)", 164 | ) 165 | filter_ops.add_argument( 166 | "--user", 167 | metavar="username", 168 | type=unix_check, 169 | help="Only include files owned by username.", 170 | ) 171 | filter_ops.add_argument( 172 | "--group", 173 | metavar="group", 174 | type=unix_check, 175 | help="Only include files owned by group.", 176 | ) 177 | 178 | tar_opts = parser.add_argument_group( 179 | title="Tar Options", description="Options to pass to underlying tar commands" 180 | ) 181 | tar_opts.add_argument( 182 | "--tar-verbose", 183 | help="Pass -v to tar (print files as tar'd)", 184 | action="store_true", 185 | ) 186 | tar_opts.add_argument( 187 | "--remove-files", 188 | help="Pass --remove-files to tar, Delete files as/when added to archive (CAREFUL). --save-purge-list is safer but requires more storage space.", 189 | action="store_true", 190 | ) 191 | tar_opts.add_argument( 192 | "--ignore-failed-read", 193 | help="Pass --ignore-failed-read to tar, Do not exit with nonzero on unreadable files or directories.", 194 | action="store_true", 195 | ) 196 | tar_opts.add_argument( 197 | "--dereference", 198 | help="Pass --dereference to tar, Follow symlinks; archive and dump the files they point to.", 199 | action="store_true", 200 | ) 201 | tar_opts.add_argument( 202 | "--tar-options", 203 | help="ADVANCED: pass arbitrary tar options to the tar command. eg. --tar-options='--sparse --xattr'", 204 | default=None, 205 | ) 206 | 207 | compression = parser.add_mutually_exclusive_group() 208 | compression.add_argument( 209 | "-z", "--gzip", help="Compress tar with GZIP", action="store_true" 210 | ) 211 | compression.add_argument( 212 | "-j", "--bzip", "--bzip2", help="Compress tar with BZIP", action="store_true" 213 | ) 214 | compression.add_argument("--lz4", help="Compress tar with lz4", action="store_true") 215 | compression.add_argument( 216 | "--zstd", help="Compress tar with zstd", action="store_true" 217 | ) 218 | compression.add_argument( 219 | "--xz", 220 | "--lzma", 221 | help='Compress tar with xz/lzma\n If using xz to enable multi-threaded set XZ_OPT="-T0 -9"', 222 | action="store_true", 223 | ) 224 | 225 | globus = parser.add_argument_group( 226 | title="Globus Transfer Options", 227 | description="Options to setup transfer of data to archive", 228 | ) 229 | source_default = env.str("AT_SOURCE", default="umich#greatlakes") 230 | globus.add_argument( 231 | "--source", 232 | help=f"Source endpoint/collection. Can be set with AT_SOURCE environment variable. Default: {source_default}", 233 | default=source_default, 234 | ) 235 | 236 | dest_default = env.str("AT_DESTINATION", default="umich#flux") 237 | globus.add_argument( 238 | "--destination", 239 | help=f"Destination endpoint/collection. Can be set with AT_DESTINATION environment variable. Default: {dest_default}", 240 | default=dest_default, 241 | ) 242 | globus.add_argument( 243 | "--destination-dir", 244 | "--destination-path", 245 | help="Directory on Destination server", 246 | ) 247 | globus.add_argument( 248 | "--wait", 249 | help="Wait for all Globus Transfers to finish before moving to next tar process / existing archivetar", 250 | action="store_true", 251 | ) 252 | globus.add_argument( 253 | "--rm-at-files", 254 | help="Remove archivetar created files (tar, index, tar-list) after Globus transfer of tars", 255 | action="store_true", 256 | ) 257 | globus.add_argument( 258 | "--preserve-timestamp", 259 | help="Globus Transfer will attempt to set file timestamps on the destination to match those on the origin.", 260 | action="store_true", 261 | ) 262 | globus.add_argument( 263 | "--no-notify-on-succeeded", 264 | help="Do not send notification email when the transfer completes with a status of SUCCEEDED", 265 | action="store_false", 266 | ) 267 | globus.add_argument( 268 | "--no-notify-on-failed", 269 | help="Do not send notification email when the transfer completes with a status of FAILED", 270 | action="store_false", 271 | ) 272 | globus.add_argument( 273 | "--no-notify-on-inactive", 274 | help="Do not send notification email when the transfer changes status to INACTIVE. e.g. From credentials expiring.", 275 | action="store_false", 276 | ) 277 | globus.add_argument( 278 | "--fail-on-quota-errors", 279 | help="When true, quota exceeded errors will cause the task to fail.", 280 | action="store_true", 281 | ) 282 | globus.add_argument( 283 | "--skip-source-errors", 284 | help="When true, source permission denied and file not found errors from the source endpoint will cause the offending path to be skipped.", 285 | action="store_true", 286 | ) 287 | globus.add_argument( 288 | "--globus-verbose", help="Globus Verbose Logging", action="store_true" 289 | ) 290 | 291 | args = parser.parse_args(args) 292 | return args 293 | -------------------------------------------------------------------------------- /bin/archivescan: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | 4 | # Brock Palen 5 | # brockp@umich.edu 6 | # 7 | # The intent of this is to generate a report for data to be uploaded to Data Den 8 | # https://arc-ts.umich.edu/data-den/ 9 | # Data Den only accepts files > 100MB, it enforces this by providing only 10,000 files/TB of prvisoned capacity 10 | # 11 | # Often data in a single folder looks like: 12 | # Files >100MB 100 Files 100GB 13 | # Files <100MB 10000 Files 2GB 14 | # 15 | # Example, migrate and premigrate are both 100MB. On a streatch cluster size and thus blocks is 2x if 2 copies etc 16 | # size du blocks (stat) 17 | # 100M 2048 4096 migrate 18 | # 100M 204800 409600 premigrate 19 | # 20 | # You could combine Locker 21 | # https://arc-ts.umich.edu/locker/ 22 | # Which frontends Data Den as a cache to hold smaller files 23 | 24 | ## TODO 25 | # 26 | # * Generate optional list of files 27 | # * Make seetings setable from envrionment 28 | 29 | import argparse 30 | import fnmatch 31 | import logging 32 | import math 33 | import os 34 | import re 35 | import time 36 | from multiprocessing import Lock, Pool, Queue 37 | 38 | # setup logging 39 | logger = logging.getLogger(__name__) 40 | logger.setLevel(logging.DEBUG) 41 | st_handler = logging.StreamHandler() 42 | logger.addHandler(st_handler) 43 | 44 | # This is to get the directory that the program 45 | # is currently running in. 46 | dir_path = os.getcwd() 47 | 48 | datadenrate = float(os.getenv("DATADENRATE", 20.04)) 49 | 50 | # data den Migrate size MB 51 | migratesize = int(os.getenv("MIGRATESIZE", 100)) 52 | 53 | # cost of Locker/TB/yr 54 | lockerrate = float(os.getenv("LOCKERRATE", 54.37)) 55 | 56 | # Locker file quota/MB 57 | lockerinode = int(os.getenv("LOCKERINODE", 1.0e6)) 58 | 59 | # minimum size of a file to consider offline based on block size. 60 | # for GPFS this normally means 2 61 | minblocks = float(os.getenv("MINBLOCKS", 2.0)) 62 | 63 | # Replica factor 64 | # for GPFS / Spectrum Scale streatch systems the blocks on disk will be a multiple of replication. 65 | replicas = int(os.getenv("REPLICAS", 2.0)) 66 | 67 | # ratio offline. This often needs to be higher than 1 to account for some sparsenes in files such as zip files. 68 | # This tool assumes that your mostly managing data on large offline storage systems such as AWS Glacier, HPSS, and Spectrum Archive. 69 | # Thus the offline ratio if often very large often much greater than 10:1 70 | offlineratio = int(os.getenv("OFFLINERATIO", 2.0)) 71 | 72 | byteintbyte = 1024.0 * 1024 * 1024 * 1024 73 | 74 | 75 | parser = argparse.ArgumentParser( 76 | description=""" 77 | Standalone tool for number of archive management functions. 78 | """, 79 | epilog="Author Brock Palen brockp@umich.edu", 80 | ) 81 | 82 | default_threads = 8 83 | default_recall = 20 84 | 85 | parser.add_argument( 86 | "-f", 87 | "--filter", 88 | help="Include only matching files in statistics and recalls (Python fnmatch syntax).", 89 | type=str, 90 | default="*", 91 | ) 92 | parser.add_argument( 93 | "-v", 94 | "--verbose", 95 | help="Print additional informaiton while running, multiple -v will increase output", 96 | action="count", 97 | default=0, 98 | ) 99 | parser.add_argument( 100 | "-q", 101 | "--quiet", 102 | help="Don't print the totals summary at the end", 103 | action="store_true", 104 | ) 105 | parser.add_argument( 106 | "-c", 107 | "--current-state", 108 | help="Show the current state of files in archive rather than future state", 109 | action="store_true", 110 | ) 111 | parser.add_argument( 112 | "--print", 113 | "--print-cached", 114 | help="Print the names of files on cache", 115 | action="store_true", 116 | ) 117 | parser.add_argument( 118 | "--print-offline", 119 | help="Print the names of files currently offline", 120 | action="store_true", 121 | ) 122 | parser.add_argument( 123 | "-p", 124 | "--parallel", 125 | help=f"Number of parallel workers for scan to start default: {default_threads}", 126 | type=int, 127 | default=default_threads, 128 | metavar="N", 129 | ) 130 | parser.add_argument( 131 | "-r", 132 | "--recall", 133 | help="Trigger recall of any file that appears offline", 134 | action="store_true", 135 | ) 136 | parser.add_argument( 137 | "-w", 138 | "--recall-workers", 139 | help="Number of recall workers DO NOT use more than 50 default: {default_recall}", 140 | type=int, 141 | default=default_recall, 142 | metavar="N", 143 | ) 144 | 145 | ops = parser.parse_args() 146 | 147 | if ops.verbose == 1: 148 | st_handler.setLevel(logging.INFO) 149 | elif ops.verbose >= 2: 150 | st_handler.setLevel(logging.DEBUG) 151 | else: 152 | st_handler.setLevel(logging.WARNING) 153 | 154 | 155 | recall_queue = Queue() 156 | 157 | # get size of all files in a directory path 158 | # filter_size : files greater than this are counted 159 | def get_size(start_path=".", filter_size=104857600): 160 | total_size = 0 # total size to archive 161 | total_cnt = 0 # counts for archive 162 | ctotal_size = 0 # size for cache (to small for archive) 163 | ctotal_cnt = 0 # counts for cache (to small for archive) 164 | p = Pool(processes=ops.parallel) 165 | # for dirpath, dirnames, filenames in os.walk(start_path): 166 | output = p.map( 167 | get_size_local, 168 | [ 169 | (dirpath, filenames, filter_size) 170 | for dirpath, dirnames, filenames in os.walk(start_path) 171 | ], 172 | ) 173 | for (a, b, c, d) in output: 174 | total_size += a 175 | total_cnt += b 176 | ctotal_size += c 177 | ctotal_cnt += d 178 | 179 | return total_size, total_cnt, ctotal_size, ctotal_cnt 180 | 181 | 182 | def get_size_local(args): 183 | dirpath, filenames, filter_size = args 184 | total_size = 0 # total size to archive / already archived 185 | total_cnt = 0 # counts for archive 186 | ctotal_size = 0 # size for cache (to small for archive) / on cache 187 | ctotal_cnt = 0 # counts for cache (to small for archive) 188 | 189 | # skip if a locker '.snapshot' directory 190 | if re.search(r".snapshot", dirpath): 191 | return total_size, total_cnt, ctotal_size, ctotal_cnt 192 | 193 | for f in filenames: 194 | fp = os.path.join(dirpath, f) 195 | if not fnmatch.fnmatch(fp, ops.filter): 196 | continue 197 | 198 | # skip if it is symbolic link 199 | if not os.path.islink(fp) and os.path.isfile(fp): 200 | st = os.stat(fp) 201 | blocks = st.st_blocks 202 | size = st.st_size 203 | if blocks == 0: # zero size files 204 | ratio = 1.0 205 | elif blocks <= minblocks: 206 | # files smaller than blocksize * 2 on GPFS will show up as offline 207 | ratio = replicas * (float(size) / (float(blocks) * 512.0)) 208 | logger.info( 209 | f"file: {f} size: {size} blocks: {blocks} is less than minblocks: {minblocks} thus assumed online ratio before adjustmint: {ratio}" 210 | ) 211 | ratio = 1.0 212 | else: 213 | ratio = replicas * (float(size) / (float(blocks) * 512.0)) 214 | 215 | logger.debug( 216 | f"file: {f} size: {size} blocks: {blocks} archive to cache ratio: {ratio:.2f}" 217 | ) 218 | if ops.current_state: # are se seeing what would or what did 219 | metric = ratio 220 | value = offlineratio 221 | else: 222 | metric = size 223 | value = filter_size 224 | 225 | if metric > value: 226 | total_size += size 227 | total_cnt += 1 228 | if metric <= value: 229 | ctotal_size += size 230 | ctotal_cnt += 1 231 | 232 | if ops.print_offline and ratio > 1.0: 233 | # file is offline 234 | print(fp) 235 | 236 | if ops.print and ratio <= 1.0: # file is online/cached 237 | print(fp) 238 | 239 | if ops.recall and ratio > 1.0: 240 | logger.debug(f"Adding to recall queue: {fp}") 241 | recall_queue.put(fp) 242 | 243 | return total_size, total_cnt, ctotal_size, ctotal_cnt 244 | 245 | 246 | def recall_worker(iolock): 247 | """Worker that knows how to recall a file from tape.""" 248 | while True: 249 | fp = recall_queue.get() 250 | if fp is None: 251 | break 252 | 253 | logger.debug(f"Attempting to recall: {fp}") 254 | with open(fp, "rb") as token: 255 | token.seek(-1, 2) # seek to end of file minus one byte 256 | token.read(1) # read exactly one byte to trigger recall don't save it 257 | logger.info( 258 | "Recall time for %s is %.2f Seconds" % (fp, time.time() - start_time) 259 | ) 260 | 261 | 262 | def recall(): 263 | """Use parallel pool of workers to recall.""" 264 | iolock = Lock() 265 | logger.debug("Starting Recall") 266 | p = Pool(ops.recall_workers, initializer=recall_worker, initargs=(iolock,)) 267 | for _ in range(ops.recall_workers): # tell workers we are done 268 | recall_queue.put(None) 269 | 270 | p.close() 271 | p.join() 272 | 273 | 274 | # borrowed from 275 | # https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size 276 | # Thank You! 277 | # Converts to human friendly units 278 | def sizeof_fmt(num, suffix="B"): 279 | for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: 280 | if abs(num) < 1024.0: 281 | return "%3.1f%s%s" % (num, unit, suffix) 282 | num /= 1024.0 283 | return "%.1f%s%s" % (num, "Yi", suffix) 284 | 285 | 286 | # cache size for transent data to and from tape 287 | # In TB max(1.0, min( 10% tape, 10)) 288 | def calc_cache(tapesize): 289 | return max(1.0, min(0.1 * tapesize, 10)) 290 | 291 | 292 | #### Main Program 293 | if __name__ == "__main__": 294 | 295 | # This is to get the directory that the program 296 | # is currently running in. 297 | dir_path = os.getcwd() 298 | 299 | start_time = time.time() 300 | 301 | size, count, csize, ccnt = get_size(filter_size=migratesize * 1024 * 1024) 302 | tbyte = math.ceil(size / byteintbyte) 303 | extra_cache = calc_cache(tbyte) # calculate extra cache for tape data in flight 304 | 305 | if not ops.quiet: 306 | print("----- Results ------") 307 | print("Data Den Candidates:") 308 | print("Files: %s" % (count)) 309 | print("Size: %s" % (sizeof_fmt(size))) 310 | # print("Terabyte %s Cost: $%d" % (tbyte, tbyte*datadenrate)) 311 | 312 | # get locker sizes 313 | tbyte = math.ceil(csize / byteintbyte) 314 | filestb = math.ceil((count + ccnt) / lockerinode) 315 | tbyte = max(tbyte, filestb) 316 | print("") 317 | print("Cache (Locker) Candidates:") 318 | print("Files: %s" % (ccnt)) 319 | print("Size: %s" % (sizeof_fmt(csize))) 320 | # print("Terabyte %s (Storage: %s, Tape Cache: %s) Cost: $%d" % (tbyte+extra_cache, tbyte, extra_cache, tbyte*lockerrate)) 321 | print("") 322 | print("Total Time %.2f Seconds" % (time.time() - start_time)) 323 | 324 | print("Fraction Offline: %.5f %%" % (size / (size + csize) * 100)) 325 | 326 | if ops.recall: 327 | recall() 328 | -------------------------------------------------------------------------------- /GlobusTransfer/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import stat 5 | from pathlib import Path 6 | 7 | import globus_sdk 8 | from globus_sdk.scopes import GCSCollectionScopeBuilder, TransferScopes 9 | from humanfriendly import format_size 10 | 11 | from .exceptions import GlobusFailedTransfer, ScopeOrSingleDomainError 12 | 13 | logging.getLogger(__name__).addHandler(logging.NullHandler) 14 | 15 | 16 | class GlobusTransfer: 17 | """ 18 | object of where / how to transfer data 19 | """ 20 | 21 | def __init__( 22 | self, 23 | ep_source, 24 | ep_dest, 25 | path_dest, 26 | notify_on_succeeded=True, 27 | notify_on_failed=True, 28 | notify_on_inactive=True, 29 | fail_on_quota_errors=False, 30 | skip_source_errors=False, 31 | preserve_timestamp=False, 32 | ): 33 | """ 34 | ep_source Globus Collection/Endpoint Source Name 35 | ep_dest Globus Collection/Endpoint Destination Name 36 | path_dest Path on destination endpoint 37 | 38 | Other options see: https://globus-sdk-python.readthedocs.io/en/stable/services/transfer.html#globus_sdk.TransferData 39 | """ 40 | 41 | self._CLIENT_ID = "8359fb34-39cf-410d-bd93-e8502aa68c46" 42 | self.ep_source = ep_source 43 | self.ep_dest = ep_dest 44 | self.path_dest = path_dest 45 | self.notify_on_succeeded = notify_on_succeeded 46 | self.notify_on_failed = notify_on_failed 47 | self.notify_on_inactive = notify_on_inactive 48 | # CANT USE multiple jobs will cause other files to be wiped out 49 | # self.delete_destination_extra = delete_destination_extra 50 | self.fail_on_quota_errors = fail_on_quota_errors 51 | self.skip_source_errors = skip_source_errors 52 | self.preserve_timestamp = preserve_timestamp 53 | self.session_required_single_domain = None # used with HA collections 54 | self.TransferData = None # start empty created as needed 55 | self.transfers = [] 56 | 57 | """Create an authorizer to use with Globus Service Clients.""" 58 | """ 59 | Get globus tokens data. 60 | 61 | Check if ~/.globus exists else create 62 | If it exists check permissions are user only 63 | If overly permissive bail 64 | Try to load tokens 65 | Else start authorization 66 | """ 67 | 68 | self.client = globus_sdk.NativeAppAuthClient(self._CLIENT_ID) 69 | self.required_scopes = [] # list of scopes for GCS5 collections 70 | 71 | save_path = Path.home() / ".globus" 72 | self.token_file = save_path / "tokens.json" 73 | 74 | if save_path.is_dir(): # exists and directory 75 | st = os.stat(save_path) 76 | logging.debug(f"{str(save_path)} exists permissions {st.st_mode}") 77 | if bool(st.st_mode & stat.S_IRWXO): 78 | raise Exception("~/.globus is world readable and to permissive set 700") 79 | if bool(st.st_mode & stat.S_IRWXG): 80 | raise Exception("~/.globus is group readable and to permissive set 700") 81 | else: # create ~/.globus 82 | logging.debug(f"Creating {str(save_path)}") 83 | save_path.mkdir(mode=0o700) 84 | 85 | try: # try and read tokens from file else create and save 86 | with self.token_file.open() as f: 87 | tokens = json.load(f) 88 | 89 | authorizer = globus_sdk.RefreshTokenAuthorizer( 90 | tokens["refresh_token"], 91 | self.client, 92 | access_token=tokens["access_token"], 93 | expires_at=tokens["expires_at_seconds"], 94 | on_refresh=self._save_tokens, 95 | ) 96 | self.tc = globus_sdk.TransferClient(authorizer=authorizer) 97 | except FileNotFoundError: 98 | self.tc = self.do_native_app_authentication() 99 | 100 | # keep checking until no exceptions 101 | clean = False 102 | while clean is False: 103 | try: 104 | # check our concent situation for GCS5 systems 105 | self.check_for_concent_required(self.ep_source, os.getcwd()) 106 | self.check_for_concent_required(self.ep_dest, self.path_dest) 107 | except ScopeOrSingleDomainError as e: 108 | print(e) 109 | if self.required_scopes: 110 | # we need to auth again asking for these scopes 111 | print( 112 | "\n" 113 | "One of your endpoints requires consent in order to be used.\n" 114 | "You must login a second time to grant consents.\n\n" 115 | ) 116 | self.tc = self.do_native_app_authentication( 117 | scopes=self.required_scopes 118 | ) 119 | 120 | if self.session_required_single_domain: 121 | # we need to auth again asking for these scopes 122 | print( 123 | "\n" 124 | "One of your endpoints requires domain constraints in order to be used.\n" 125 | "You must login a second time to grant consents.\n\n" 126 | ) 127 | self.tc = self.do_native_app_authentication() 128 | else: 129 | clean = True 130 | 131 | # attempt to auto activate each endpoint so to not stop later in the flow 132 | self.endpoint_autoactivate(self.ep_source) 133 | self.endpoint_autoactivate(self.ep_dest) 134 | 135 | def _save_tokens(self, tokens): 136 | """Save Globus auth tokens as required. 137 | 138 | Expects OAuthTokenResponse 139 | https://globus-sdk-python.readthedocs.io/en/stable/authorization.html#globus_sdk.RefreshTokenAuthorizer 140 | """ 141 | 142 | # we only want transfer tokens 143 | tokens = tokens.by_resource_server["transfer.api.globus.org"] 144 | with self.token_file.open("w") as f: 145 | logging.debug("Saving tokens to {str(token_file)}") 146 | json.dump(tokens, f) 147 | 148 | def do_native_app_authentication(self, scopes=TransferScopes.all): 149 | """ 150 | Does Native App Authentication Flow and returns a transfer client. 151 | """ 152 | 153 | self.client.oauth2_start_flow(refresh_tokens=True, requested_scopes=scopes) 154 | authorize_url = self.client.oauth2_get_authorize_url( 155 | session_required_single_domain=self.session_required_single_domain 156 | ) 157 | print("\nPlease go to this URL and login: \n{0}".format(authorize_url)) 158 | 159 | auth_code = input("\nPlease enter the code you get after login here: ").strip() 160 | tokens = self.client.oauth2_exchange_code_for_tokens(auth_code) 161 | self._save_tokens(tokens) 162 | tokens = tokens.by_resource_server["transfer.api.globus.org"] 163 | authorizer = globus_sdk.RefreshTokenAuthorizer( 164 | tokens["refresh_token"], 165 | self.client, 166 | access_token=tokens["access_token"], 167 | expires_at=tokens["expires_at_seconds"], 168 | on_refresh=self._save_tokens, 169 | ) 170 | return globus_sdk.TransferClient(authorizer=authorizer) 171 | 172 | def check_for_concent_required(self, target, path): 173 | """ 174 | To make sure our tokens have access before doing anything try to ls each. 175 | 176 | target : UUID of collection / endpoint 177 | path : path to list 178 | 179 | If there is any transfer errors it liekly is because of not having required concent on GCS5 hoosts 180 | or domain constraints when using HA collections. These are populated and looped through until 181 | no additional errors exist. 182 | 183 | This could cause issues if there are other unknown errors because the ones we care about are all the same exception. 184 | """ 185 | 186 | try: 187 | self.tc.operation_ls(target, path) 188 | except globus_sdk.TransferAPIError as err: 189 | print(err) 190 | print(err.info.authorization_parameters.session_required_single_domain) 191 | if err.info.consent_required: 192 | self.required_scopes.extend(err.info.consent_required.required_scopes) 193 | raise ScopeOrSingleDomainError("adding missing consent") 194 | if err.info.authorization_parameters: 195 | self.session_required_single_domain = ( 196 | err.info.authorization_parameters.session_required_single_domain 197 | ) 198 | raise ScopeOrSingleDomainError("adding missing domain") 199 | 200 | def endpoint_autoactivate(self, endpoint, if_expires_in=3600): 201 | """Use TransferClient.endpoint_autoactivate() to make sure the endpoint is question is active.""" 202 | # attempt to auto activate if fail prompt to activate 203 | r = self.tc.endpoint_autoactivate(endpoint, if_expires_in=if_expires_in) 204 | while r["code"] == "AutoActivationFailed": 205 | print( 206 | "Endpoint requires manual activation, please open " 207 | "the following URL in a browser to activate the " 208 | "endpoint:" 209 | ) 210 | print(f"https://app.globus.org/file-manager?origin_id={endpoint}") 211 | input("Press ENTER after activating the endpoint:") 212 | r = self.tc.endpoint_autoactivate(endpoint, if_expires_in=3600) 213 | 214 | def ls_endpoint(self): 215 | """Just here for debug that globus is working.""" 216 | for entry in self.tc.operation_ls(self.ep_source, path=self.path_source): 217 | print(entry["name"] + ("/" if entry["type"] == "dir" else "")) 218 | 219 | def task_wait(self, task_id, timeout=60, polling_interval=30): 220 | """Wait for task to finish.""" 221 | while not self.tc.task_wait( 222 | task_id, timeout=timeout, polling_interval=polling_interval 223 | ): 224 | status = self.tc.get_task(task_id) 225 | print( 226 | f"Status: {status['status']} Task: {status['label']} TX: {format_size(status['bytes_transferred'])} Speed: {format_size(status['effective_bytes_per_second'])}/s TaskID: {task_id}" 227 | ) 228 | 229 | status = self.tc.get_task(task_id) 230 | print( 231 | f"Status: {status['status']} Task: {status['label']} TX: {format_size(status['bytes_transferred'])} Speed: {format_size(status['effective_bytes_per_second'])}/s TaskID: {task_id}" 232 | ) 233 | # if status is FAILED raise an exception 234 | if status["status"] == "FAILED": 235 | logging.debug(f"Failed Transfer status object: {status}") 236 | raise GlobusFailedTransfer(status) 237 | 238 | def add_item(self, source_path, label="PY", in_root=False): 239 | """Add an item to send as part of the current bundle.""" 240 | if not self.TransferData: 241 | # no prior TransferData object create a new one 242 | logging.debug("No prior TransferData object found creating") 243 | 244 | # labels can only be letters, numbers, spaces, dashes, and underscores 245 | label = label.replace(".", "-") 246 | self.TransferData = globus_sdk.TransferData( 247 | self.tc, 248 | self.ep_source, 249 | self.ep_dest, 250 | verify_checksum=True, 251 | label=f"archivetar {label}", 252 | notify_on_succeeded=self.notify_on_succeeded, 253 | notify_on_failed=self.notify_on_failed, 254 | notify_on_inactive=self.notify_on_inactive, 255 | fail_on_quota_errors=self.fail_on_quota_errors, 256 | skip_source_errors=self.skip_source_errors, 257 | preserve_timestamp=self.preserve_timestamp, 258 | ) 259 | 260 | # add item 261 | logging.debug(f"Source Path: {source_path}") 262 | 263 | # pathlib comes though as absolute we need just the relative string 264 | # then append that to the destimations path eg: 265 | 266 | # cwd /home/brockp 267 | # pathlib /home/brockp/dir1/data.txt 268 | # result dir1/data.txt 269 | # Final Dest path: path_dest/dir1/data.txt 270 | 271 | # UNLESS in_root=True then stick the file right in the root of destination 272 | if in_root: 273 | path_dest = Path(self.path_dest) / source_path.name 274 | else: 275 | relative_paths = os.path.relpath(source_path, os.getcwd()) 276 | path_dest = Path(self.path_dest) / relative_paths 277 | 278 | logging.debug(f"Dest Path: {path_dest}") 279 | 280 | # convert PosixPath to string to avoid JSON serlizer issues 281 | self.TransferData.add_item(str(source_path), str(path_dest)) 282 | 283 | # TODO check if threshold hit 284 | 285 | def submit_pending_transfer(self): 286 | """Submit actual transfer, could be called automatically or manually""" 287 | if not self.TransferData: 288 | # no current transfer queued up do nothing 289 | logging.debug("No current TransferData queued found") 290 | return None 291 | 292 | transfer = self.tc.submit_transfer(self.TransferData) 293 | logging.debug(f"Submitted Transfer: {transfer['task_id']}") 294 | self.transfers.append(transfer) 295 | return transfer["task_id"] 296 | -------------------------------------------------------------------------------- /archivetar/__init__.py: -------------------------------------------------------------------------------- 1 | # Brock Palen 2 | # brockp@umich.edu 3 | # 7/2020 4 | # 5 | # prep a directory for placement in dataden 6 | # process: 7 | # 1. run mpiFileUtils / dwalk (deafault sort in name / path order) all files < minsize 8 | # 2. Take resulting list build tar lists by summing size until > tarsize (before compression) 9 | # 3. Tar each list: OR --dryrun create list with est size 10 | # a. Create Index file of contents 11 | # b. Optionally compress -z / -j with gzip/pigz bzip/lbzip2 if installed 12 | # c. Optionally purge 13 | # 4. (?) Kick out optimized untar script (pigz / lbzip2) 14 | 15 | ## TODO 16 | # * filter and produce list to feed to scp/globus? 17 | # * allow direct handoff to Globus CLI 18 | # * mpibzip2 19 | 20 | import datetime 21 | import logging 22 | import multiprocessing as mp 23 | import os 24 | import pathlib 25 | import re 26 | import sys 27 | import tempfile 28 | from subprocess import CalledProcessError # nosec 29 | 30 | import humanfriendly 31 | from environs import Env 32 | 33 | from archivetar.archive_args import parse_args 34 | from archivetar.exceptions import ArchivePrefixConflict, TarError 35 | from archivetar.unarchivetar import find_prefix_files 36 | from GlobusTransfer import GlobusTransfer 37 | from GlobusTransfer.exceptions import GlobusError, GlobusFailedTransfer 38 | from mpiFileUtils import DWalk 39 | from SuperTar import SuperTar 40 | 41 | # load in config from .env 42 | env = Env() 43 | 44 | # can't load breaks singularity 45 | # env.read_env() # read .env file, if it exists 46 | 47 | 48 | # defaults used for development 49 | # overridden with AT_MPIRUN and AT_MPIFILEUTILS 50 | fileutils = "/sw/pkgs/arc/archivetar/0.17.0/install" 51 | mpirun = "/sw/pkgs/arc/stacks/gcc/10.3.0/openmpi/4.1.6/bin/mpirun" 52 | 53 | 54 | class DwalkLine: 55 | def __init__(self, line=False, relativeto=False, stripcwd=True): 56 | """parse dwalk output line""" 57 | # -rw-r--r-- bennet support 578.000 B Oct 22 2019 09:35 /scratch/support_root/support/bennet/haoransh/DDA_2D_60x70_kulow_1.batch 58 | match = re.match( 59 | rb"\S+\s+\S+\s+\S+\s+(\d+\.\d+)\s+(\S+)\s+.+\s(/.+)", line, re.DOTALL 60 | ) # use re.DOTALL to match newlines in filenames 61 | if relativeto: 62 | self.relativeto = relativeto 63 | else: 64 | self.relativeto = os.getcwd() 65 | 66 | self.size = self._normalizeunits( 67 | units=match[2], count=float(match[1]) 68 | ) # size in bytes 69 | if stripcwd: 70 | self.path = self._stripcwd(match[3]) 71 | else: 72 | self.path = match[3] 73 | 74 | def _normalizeunits(self, units=False, count=False): 75 | """convert size by SI units to Bytes""" 76 | units = units.decode() # convert binary data to string type 77 | # SI powers, e.g., 1 KB = 10**3 bytes 78 | SI_powers = dict(B=0, KB=3, MB=6, GB=9, TB=12, PB=15) 79 | try: 80 | num_bytes = count * 10 ** SI_powers[units] 81 | except KeyError as ex: 82 | raise Exception(f"{units} is not a known SI unit") 83 | return num_bytes 84 | 85 | def _stripcwd(self, path): 86 | """dwalk print absolute paths, we need relative""" 87 | return os.path.relpath(path, self.relativeto.encode()) 88 | 89 | 90 | class DwalkParser: 91 | def __init__(self, path=False): 92 | # check that path exists 93 | path = pathlib.Path(path) 94 | self.indexcount = 1 95 | if path.is_file(): 96 | logging.debug(f"using {path} as input for DwalkParser") 97 | self.path = path.open("br") 98 | else: 99 | raise Exception(f"{self.path} doesn't exist") 100 | 101 | def getpath(self): 102 | """Get path one line at a time.""" 103 | for line in self.path: 104 | pl = DwalkLine(line=line, stripcwd=False) 105 | yield pl.path 106 | 107 | def tarlist( 108 | self, prefix="archivetar", minsize=1e9 * 100, bundle_path=None 109 | ): # prefix for files 110 | # min size sum of all files in list 111 | # bundle_path where should indexes and files be created 112 | # OUT tar list suitable for gnutar 113 | # OUT index list 114 | """takes dwalk output walks though until sum(size) >= minsize""" 115 | 116 | logging.debug(f"minsize is set to {minsize} B") 117 | 118 | if bundle_path: 119 | # set outpath to this location 120 | outpath = pathlib.Path(bundle_path) 121 | else: 122 | # set to cwd 123 | outpath = pathlib.Path.cwd() 124 | 125 | logging.debug(f"Indexes and lists will be written to: {outpath}") 126 | 127 | tartmp_p = ( 128 | outpath / f"{prefix}-{self.indexcount}.DONT_DELETE.txt" 129 | ) # list of files suitable for gnutar 130 | index_p = outpath / f"{prefix}-{self.indexcount}.index.txt" 131 | sizesum = 0 # size in bytes thus far 132 | index = index_p.open("wb") 133 | tartmp = tartmp_p.open("wb") 134 | for line in self.path: 135 | pl = DwalkLine(line=line) 136 | sizesum += pl.size 137 | index.write(line) # already has newline 138 | tartmp.write(pl.path) # already has newline (binary) 139 | if sizesum >= minsize: 140 | # max size in tar reached 141 | tartmp.close() 142 | index.close() 143 | logging.info( 144 | f"Minimum Archive Size {humanfriendly.format_size(minsize)} reached, Expected size: {humanfriendly.format_size(sizesum)}" 145 | ) 146 | yield self.indexcount, index_p, tartmp_p 147 | self.indexcount += 1 148 | # continue after yeilding file paths back to program 149 | sizesum = 0 150 | tartmp_p = ( 151 | outpath / f"{prefix}-{self.indexcount}.DONT_DELETE.txt" 152 | ) # list of files suitable for gnutar 153 | index_p = outpath / f"{prefix}-{self.indexcount}.index.txt" 154 | index = index_p.open("wb") 155 | tartmp = tartmp_p.open("wb") 156 | index.close() # close and return for final round 157 | tartmp.close() 158 | yield self.indexcount, index_p, tartmp_p 159 | 160 | 161 | ############# MAIN ################ 162 | 163 | 164 | def build_list(path=False, prefix=False, savecache=False, filters=None): 165 | """ 166 | scan filelist and return path to results 167 | 168 | Parameters: 169 | path (str/pathlib) Path to scan 170 | prefix (str) Prefix for scan file eg. prefix-{date}.cache 171 | savecache (bool) Save cache file in cwd or only in TMPDIR 172 | filters (args) HACK pass in argparser for passing filter options eg --atime 173 | 174 | Returns: 175 | cache (pathlib) Path to cache file 176 | """ 177 | 178 | # build filter list 179 | filter = ["--distribution", "size:0,1K,1M,10M,100M,1G,10G,100G,1T"] 180 | if filters: 181 | if filters.atime: 182 | filter.extend(["--atime", filters.atime]) 183 | if filters.mtime: 184 | filter.extend(["--mtime", filters.mtime]) 185 | if filters.ctime: 186 | filter.extend(["--ctime", filters.ctime]) 187 | if filters.user: 188 | filter.extend(["--user", filters.user]) 189 | if filters.group: 190 | filter.extend(["--group", filters.group]) 191 | 192 | logging.debug(f"build_list filter options: {filter}") 193 | 194 | # configure DWalk 195 | dwalk = DWalk( 196 | inst=env.str("AT_MPIFILEUTILS", default=fileutils), 197 | mpirun=env.str("AT_MPIRUN", default=mpirun), 198 | sort="name", 199 | filter=filter, 200 | progress="10", 201 | umask=0o077, # set premissions to only the user invoking 202 | ) 203 | 204 | # generate timestamp name 205 | today = datetime.datetime.today() 206 | datestr = today.strftime("%Y-%m-%d-%H-%M-%S") 207 | 208 | # put into cwd or TMPDIR ? 209 | c_path = pathlib.Path.cwd() if savecache else pathlib.Path(tempfile.gettempdir()) 210 | cache = c_path / f"{prefix}-{datestr}.cache" 211 | print(f"Scan saved to {cache}") 212 | 213 | # start the actual scan 214 | dwalk.scanpath(path=path, cacheout=cache) 215 | 216 | return cache 217 | 218 | 219 | def filter_list(path=False, size=False, prefix=False, purgelist=False): 220 | """ 221 | Take cache list and filter it into two lists 222 | Files greater than size and those less than 223 | 224 | Prameters: 225 | path (pathlib) Path to existing cache file 226 | size (int) size in bytes to filter on 227 | prefix (str) Prefix for scanfiles 228 | purgelist (bool) Save the undersize cache in CWD for purges 229 | 230 | Returns: 231 | TODO o_textout (pathlib) Path to files over or equal size text format 232 | TODO o_cacheout (pathlib) Path to files over or equal size mpifileutils bin format 233 | u_textout (pathlib) Path to files under size text format 234 | u_cacheout (pathlib) Path to files under size mpifileutils bin format 235 | """ 236 | 237 | # configure DWalk 238 | under_dwalk = DWalk( 239 | inst=env.str("AT_MPIFILEUTILS", default=fileutils), 240 | mpirun=env.str("AT_MPIRUN", default=mpirun), 241 | sort="name", 242 | progress="10", 243 | filter=["--type", "f", "--size", f"-{size}"], 244 | umask=0o077, # set premissions to only the user invoking 245 | ) 246 | 247 | ut_path = pathlib.Path(tempfile.gettempdir()) 248 | u_textout = ut_path / f"{prefix}.under.txt" 249 | uc_path = pathlib.Path.cwd() if purgelist else pathlib.Path(tempfile.gettempdir()) 250 | u_cacheout = uc_path / f"{prefix}.under.cache" 251 | 252 | # start the actual scan 253 | under_dwalk.scancache(cachein=path, textout=u_textout, cacheout=u_cacheout) 254 | 255 | # get the list of all symlinks 256 | symlink_dwalk = DWalk( 257 | inst=env.str("AT_MPIFILEUTILS", default=fileutils), 258 | mpirun=env.str("AT_MPIRUN", default=mpirun), 259 | sort="name", 260 | progress="10", 261 | filter=["--type", "l"], # don't set size so even --size 0B works 262 | umask=0o077, # set premissions to only the user invoking 263 | ) 264 | 265 | symlink_path = pathlib.Path(tempfile.gettempdir()) 266 | symlink_textout = symlink_path / f"{prefix}.symlink.txt" 267 | 268 | # start the actual scan 269 | symlink_dwalk.scancache(cachein=path, textout=symlink_textout) 270 | 271 | # append symlink_textout to u_textout to add to tars 272 | with u_textout.open("a+") as u: 273 | with symlink_textout.open() as links: 274 | u.write(links.read()) 275 | 276 | # get the list of files larger than 277 | over_dwalk = DWalk( 278 | inst=env.str("AT_MPIFILEUTILS", default=fileutils), 279 | mpirun=env.str("AT_MPIRUN", default=mpirun), 280 | sort="name", 281 | progress="10", 282 | filter=["--type", "f", "--size", f"+{size}"], 283 | umask=0o077, # set premissions to only the user invoking 284 | ) 285 | 286 | ot_path = pathlib.Path(tempfile.gettempdir()) 287 | o_textout = ot_path / f"{prefix}.over.txt" 288 | 289 | # start the actual scan 290 | over_dwalk.scancache(cachein=path, textout=o_textout) 291 | 292 | # get the list of files exactly equal to 293 | at_dwalk = DWalk( 294 | inst=env.str("AT_MPIFILEUTILS", default=fileutils), 295 | mpirun=env.str("AT_MPIRUN", default=mpirun), 296 | sort="name", 297 | progress="10", 298 | filter=["--type", "f", "--size", f"{size}"], 299 | umask=0o077, # set premissions to only the user invoking 300 | ) 301 | 302 | at_path = pathlib.Path(tempfile.gettempdir()) 303 | a_textout = at_path / f"{prefix}.at.txt" 304 | 305 | # start the actual scan 306 | at_dwalk.scancache(cachein=path, textout=a_textout) 307 | 308 | # append a_textout to o_textout 309 | with o_textout.open("a+") as o: 310 | with a_textout.open() as a: 311 | o.write(a.read()) 312 | 313 | return u_textout, u_cacheout, o_textout 314 | 315 | 316 | def process(q, out_q, iolock, args): 317 | while True: 318 | q_args = q.get() # tuple (t_args, tar_list, index) 319 | if q_args is None: 320 | break 321 | try: 322 | t_args, tar_list, index = q_args 323 | with iolock: 324 | tar = SuperTar(**t_args) # call inside the lock to keep stdout pretty 325 | tar.addfromfile(tar_list) 326 | tar.archive() # this is the long running portion so let run outside the lock it prints nothing anyway 327 | filesize = pathlib.Path(tar.filename).stat().st_size 328 | with iolock: 329 | logging.info( 330 | f"Complete {tar.filename} Size: {humanfriendly.format_size(filesize)}" 331 | ) 332 | if args.destination_dir: # if globus destination is set upload 333 | globus = GlobusTransfer( 334 | args.source, 335 | args.destination, 336 | args.destination_dir, 337 | # note notify are the reverse of the SDK 338 | notify_on_succeeded=args.no_notify_on_succeeded, 339 | notify_on_failed=args.no_notify_on_failed, 340 | notify_on_inactive=args.no_notify_on_inactive, 341 | fail_on_quota_errors=args.fail_on_quota_errors, 342 | skip_source_errors=args.skip_source_errors, 343 | preserve_timestamp=args.preserve_timestamp, 344 | ) 345 | path = pathlib.Path(tar.filename).resolve() 346 | logging.debug(f"Adding file {path} to Globus Transfer") 347 | globus.add_item(path, label=f"{path.name}", in_root=True) 348 | tar_list = pathlib.Path(tar_list).resolve() 349 | logging.debug(f"Adding file {tar_list} to Globus Transfer") 350 | globus.add_item(tar_list, label=f"{path.name}", in_root=True) 351 | index_p = pathlib.Path(index).resolve() 352 | logging.debug(f"Adding file {index_p} to Globus Transfer") 353 | globus.add_item(index_p, label=f"{path.name}", in_root=True) 354 | taskid = globus.submit_pending_transfer() 355 | logging.info( 356 | f"Globus Transfer of Small file tar {path.name} : {taskid}" 357 | ) 358 | 359 | if ( 360 | args.wait or args.rm_at_files 361 | ): # wait for globus transfers to finish, in own block to avoid iolock 362 | globus.task_wait(taskid) 363 | if args.rm_at_files: # delete the AT created files tar, index, etc 364 | logging.info(f"Deleting {path}") 365 | path.unlink() 366 | logging.info(f"Deleting {tar_list}") 367 | tar_list.unlink() 368 | logging.info(f"Deleting {index_p}") 369 | index_p.unlink() 370 | except GlobusFailedTransfer as e: 371 | logging.error(f"error with globus transfer of: {tar.filename}") 372 | out_q.put((-1, tar.filename, e)) 373 | raise e 374 | except CalledProcessError as e: 375 | logging.error(f"error with external tar process: {tar.filename}") 376 | out_q.put((-1, tar.filename, e)) 377 | raise e 378 | except Exception as e: 379 | # something bad happened put it on the out_q for return code 380 | logging.error(f"Unknown error in worker process for: {tar.filename}") 381 | out_q.put((-1, tar.filename, e)) 382 | raise e 383 | else: 384 | # no issues put on were ok 385 | out_q.put((0, tar.filename, None)) 386 | 387 | 388 | def validate_prefix(prefix, path=None): 389 | """Check that the prefix selected won't conflict with current files""" 390 | 391 | # use find_prefix_files from unarchivetar to use the same match 392 | tars = find_prefix_files(prefix, path) 393 | tars.extend(find_prefix_files(prefix, path, suffix="index.txt")) 394 | tars.extend(find_prefix_files(prefix, path, suffix="DONT_DELETE.txt")) 395 | 396 | if len(tars) != 0: 397 | logging.critical(f"Prefix {prefix} conflicts with current files {tars}") 398 | print("\n") 399 | print( 400 | "Conflicting filex for selected prefix stopping to avoid unexpected behavior" 401 | ) 402 | for item in tars: 403 | print(f"\t{item}") 404 | 405 | print("\n") 406 | raise ArchivePrefixConflict( 407 | f"Prefix {prefix} conflicts with current files {tars}" 408 | ) 409 | else: 410 | return True 411 | 412 | 413 | def main(argv): 414 | args = parse_args(argv[1:]) 415 | if args.quiet: 416 | logging.basicConfig(level=logging.WARNING) 417 | elif args.verbose: 418 | logging.basicConfig(level=logging.DEBUG) 419 | else: 420 | logging.basicConfig(level=logging.INFO) 421 | 422 | # globus built in logger is very verbose adjust lower unless verbose 423 | globus_logger = logging.getLogger("globus_sdk") 424 | urllib_logger = logging.getLogger("urllib3") 425 | if not args.globus_verbose: 426 | globus_logger.setLevel(logging.WARNING) 427 | urllib_logger.setLevel(logging.WARNING) 428 | 429 | # check that selected prefix is usable 430 | validate_prefix(args.prefix, path=args.bundle_dir) 431 | 432 | # if using globus, init to prompt for endpoiont activation etc 433 | if args.destination_dir: 434 | globus = GlobusTransfer( 435 | args.source, 436 | args.destination, 437 | args.destination_dir, 438 | # note notify are the reverse of the SDK 439 | notify_on_succeeded=args.no_notify_on_succeeded, 440 | notify_on_failed=args.no_notify_on_failed, 441 | notify_on_inactive=args.no_notify_on_inactive, 442 | fail_on_quota_errors=args.fail_on_quota_errors, 443 | skip_source_errors=args.skip_source_errors, 444 | preserve_timestamp=args.preserve_timestamp, 445 | ) 446 | 447 | # do we have a user provided list? 448 | if args.list: 449 | logging.info("---> [Phase 1] Found User Provided File List") 450 | cache = args.list 451 | else: 452 | # scan entire filesystem 453 | logging.info("----> [Phase 1] Build Global List of Files") 454 | b_args = { 455 | "path": ".", 456 | "prefix": args.prefix, 457 | "savecache": args.save_list, 458 | "filters": args, 459 | } 460 | cache = build_list(**b_args) 461 | logging.debug(f"Results of full path scan saved at {cache}") 462 | 463 | # bail if --dryrun requested 464 | if args.dryrun == 1: 465 | logging.info("--dryrun requested exiting") 466 | sys.exit(0) 467 | 468 | # filter for files under size 469 | # Set --size filter to 1ExaByte if not set 470 | filtersize = args.size if args.size else "1EB" 471 | logging.info( 472 | f"----> [Phase 1.5] Filter out files greater than {filtersize} if --size given" 473 | ) 474 | 475 | # IN: List of files 476 | # OUT: pathlib: undersize_text, undersize_cache, oversize_text, atsize_text 477 | under_t, under_c, over_t = filter_list( 478 | path=cache, 479 | size=humanfriendly.parse_size(filtersize), 480 | prefix=cache.stem, 481 | purgelist=args.save_purge_list, 482 | ) 483 | 484 | # if globus get transfer the large files 485 | if args.destination_dir and not args.dryrun: 486 | # transfer = upload_overlist(over_t, globus) 487 | over_p = DwalkParser(path=over_t) 488 | for path in over_p.getpath(): 489 | path = path.rstrip(b"\n") # strip trailing newline 490 | path = path.decode("utf-8") # convert byte array to string 491 | path = pathlib.Path(path) 492 | logging.debug(f"Adding file {path} to Globus Transfer") 493 | globus.add_item(path, label=f"Large File List {args.prefix}") 494 | 495 | large_taskid = globus.submit_pending_transfer() 496 | logging.info(f"Globus Transfer of Oversize files: {large_taskid}") 497 | 498 | # Dwalk list parser 499 | logging.info( 500 | f"----> [Phase 2] Parse fileted list into sublists of size {args.tar_size}" 501 | ) 502 | parser = DwalkParser(path=under_t) 503 | 504 | # start parallel pool 505 | q = mp.Queue() # input data 506 | out_q = mp.Queue() # output return code from pool worker 507 | iolock = mp.Lock() 508 | try: 509 | for index, index_p, tar_list in parser.tarlist( 510 | prefix=args.prefix, 511 | minsize=humanfriendly.parse_size(args.tar_size), 512 | bundle_path=args.bundle_dir, 513 | ): 514 | logging.info(f" Index: {index_p}") 515 | logging.info(f" tar: {tar_list}") 516 | 517 | # actauly tar them up 518 | if not args.dryrun: 519 | # if compression 520 | # if remove 521 | if args.bundle_dir: 522 | t_args = { 523 | "filename": pathlib.Path(args.bundle_dir) 524 | / f"{args.prefix}-{index}.tar" 525 | } 526 | else: 527 | t_args = {"filename": f"{args.prefix}-{index}.tar"} 528 | if args.remove_files: 529 | t_args["purge"] = True 530 | if args.tar_verbose: 531 | t_args["verbose"] = True 532 | if args.ignore_failed_read: 533 | t_args["ignore_failed_read"] = True 534 | if args.dereference: 535 | t_args["dereference"] = True 536 | 537 | # compression options 538 | if args.gzip: 539 | t_args["compress"] = "GZIP" 540 | if args.zstd: 541 | t_args["compress"] = "ZSTD" 542 | if args.bzip: 543 | t_args["compress"] = "BZ2" 544 | if args.lz4: 545 | t_args["compress"] = "LZ4" 546 | if args.xz: 547 | t_args["compress"] = "XZ" 548 | if args.tar_options: 549 | t_args["extra_options"] = args.tar_options.split() 550 | 551 | q.put((t_args, tar_list, index_p)) # put work on the queue 552 | 553 | # bail if --dryrun requested 554 | if args.dryrun: 555 | logging.info("--dryrun --dryrun requested exiting") 556 | sys.exit(0) 557 | 558 | # start parallel pool of workers 559 | pool = mp.Pool( 560 | args.tar_processes, 561 | initializer=process, 562 | initargs=(q, out_q, iolock, args), 563 | ) 564 | 565 | for _ in range(args.tar_processes): # tell workers we're done 566 | q.put(None) 567 | 568 | pool.close() 569 | pool.join() 570 | 571 | # wait for large_taskid to finish 572 | # large_taskid only esists if --size given to create a large file option 573 | # this will break once we have 1EB files 574 | if args.wait and large_taskid: 575 | logging.debug("Wait for large_taskid to finish") 576 | globus.task_wait(large_taskid) 577 | 578 | # check no pool workers had problems running the tar 579 | # any task that raised an exception should find a returncode on the out_q 580 | suspect_tars = list() 581 | for _ in range(index): 582 | rc, filename, exception = out_q.get() 583 | logging.debug(f"Return code from tar {filename} is {rc}") 584 | if rc != 0: 585 | # found an issue with one worker log and push onto list 586 | logging.error( 587 | f"An issue was found running the tars for index {filename}" 588 | ) 589 | suspect_tars.append(filename) 590 | 591 | # raise if we found suspect tars 592 | if suspect_tars: 593 | raise TarError(f"An issue was found processing the tars for {suspect_tars}") 594 | 595 | except Exception as e: 596 | logging.error("Issue during tar process killing") 597 | raise e 598 | sys.exit(-1) 599 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "55b914a459619eb36f11b99e22b2e522b533c256db1f180424c5f673eab65e8c" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.10" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "archivetar": { 20 | "editable": true, 21 | "path": "." 22 | }, 23 | "certifi": { 24 | "hashes": [ 25 | "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", 26 | "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" 27 | ], 28 | "markers": "python_version >= '3.6'", 29 | "version": "==2024.2.2" 30 | }, 31 | "cffi": { 32 | "hashes": [ 33 | "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc", 34 | "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a", 35 | "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417", 36 | "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab", 37 | "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520", 38 | "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36", 39 | "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743", 40 | "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8", 41 | "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed", 42 | "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684", 43 | "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56", 44 | "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324", 45 | "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d", 46 | "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235", 47 | "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e", 48 | "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088", 49 | "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000", 50 | "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7", 51 | "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e", 52 | "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673", 53 | "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c", 54 | "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe", 55 | "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2", 56 | "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098", 57 | "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8", 58 | "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a", 59 | "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0", 60 | "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b", 61 | "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896", 62 | "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e", 63 | "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9", 64 | "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2", 65 | "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b", 66 | "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6", 67 | "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404", 68 | "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f", 69 | "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0", 70 | "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4", 71 | "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc", 72 | "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936", 73 | "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba", 74 | "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872", 75 | "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb", 76 | "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614", 77 | "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1", 78 | "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d", 79 | "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969", 80 | "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b", 81 | "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4", 82 | "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627", 83 | "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956", 84 | "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357" 85 | ], 86 | "markers": "platform_python_implementation != 'PyPy'", 87 | "version": "==1.16.0" 88 | }, 89 | "charset-normalizer": { 90 | "hashes": [ 91 | "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", 92 | "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", 93 | "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", 94 | "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", 95 | "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", 96 | "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", 97 | "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", 98 | "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", 99 | "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", 100 | "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", 101 | "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", 102 | "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", 103 | "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", 104 | "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", 105 | "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", 106 | "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", 107 | "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", 108 | "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", 109 | "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", 110 | "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", 111 | "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", 112 | "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", 113 | "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", 114 | "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", 115 | "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", 116 | "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", 117 | "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", 118 | "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", 119 | "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", 120 | "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", 121 | "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", 122 | "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", 123 | "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", 124 | "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", 125 | "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", 126 | "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", 127 | "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", 128 | "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", 129 | "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", 130 | "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", 131 | "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", 132 | "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", 133 | "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", 134 | "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", 135 | "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", 136 | "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", 137 | "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", 138 | "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", 139 | "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", 140 | "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", 141 | "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", 142 | "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", 143 | "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", 144 | "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", 145 | "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", 146 | "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", 147 | "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", 148 | "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", 149 | "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", 150 | "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", 151 | "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", 152 | "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", 153 | "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", 154 | "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", 155 | "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", 156 | "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", 157 | "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", 158 | "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", 159 | "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", 160 | "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", 161 | "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", 162 | "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", 163 | "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", 164 | "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", 165 | "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", 166 | "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", 167 | "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", 168 | "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", 169 | "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", 170 | "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", 171 | "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", 172 | "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", 173 | "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", 174 | "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", 175 | "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", 176 | "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", 177 | "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", 178 | "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", 179 | "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", 180 | "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" 181 | ], 182 | "markers": "python_full_version >= '3.7.0'", 183 | "version": "==3.3.2" 184 | }, 185 | "cryptography": { 186 | "hashes": [ 187 | "sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380", 188 | "sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589", 189 | "sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea", 190 | "sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65", 191 | "sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a", 192 | "sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3", 193 | "sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008", 194 | "sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1", 195 | "sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2", 196 | "sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635", 197 | "sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2", 198 | "sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90", 199 | "sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee", 200 | "sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a", 201 | "sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242", 202 | "sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12", 203 | "sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2", 204 | "sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d", 205 | "sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be", 206 | "sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee", 207 | "sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6", 208 | "sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529", 209 | "sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929", 210 | "sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1", 211 | "sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6", 212 | "sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a", 213 | "sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446", 214 | "sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9", 215 | "sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888", 216 | "sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4", 217 | "sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33", 218 | "sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f" 219 | ], 220 | "markers": "python_version >= '3.7'", 221 | "version": "==42.0.2" 222 | }, 223 | "environs": { 224 | "hashes": [ 225 | "sha256:cc421ddb143fa30183568164755aa113a160e555cd19e97e664c478662032c24", 226 | "sha256:feeaf28f17fd0499f9cd7c0fcf408c6d82c308e69e335eb92d09322fc9ed8138" 227 | ], 228 | "index": "pypi", 229 | "version": "==10.3.0" 230 | }, 231 | "globus-sdk": { 232 | "hashes": [ 233 | "sha256:0cf6a1b2f108de0ad4b01a091772e8f9f136793cf693fa656f582a02c2e85fa8", 234 | "sha256:9da40d5f251f98d89297c2a92abd9f24bfa3c041dfd0e957579884ef0c882cf2" 235 | ], 236 | "index": "pypi", 237 | "version": "==3.35.0" 238 | }, 239 | "humanfriendly": { 240 | "hashes": [ 241 | "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", 242 | "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc" 243 | ], 244 | "index": "pypi", 245 | "version": "==10.0" 246 | }, 247 | "idna": { 248 | "hashes": [ 249 | "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca", 250 | "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f" 251 | ], 252 | "markers": "python_version >= '3.5'", 253 | "version": "==3.6" 254 | }, 255 | "marshmallow": { 256 | "hashes": [ 257 | "sha256:4c1daff273513dc5eb24b219a8035559dc573c8f322558ef85f5438ddd1236dd", 258 | "sha256:c21d4b98fee747c130e6bc8f45c4b3199ea66bc00c12ee1f639f0aeca034d5e9" 259 | ], 260 | "markers": "python_version >= '3.8'", 261 | "version": "==3.20.2" 262 | }, 263 | "natsort": { 264 | "hashes": [ 265 | "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581", 266 | "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c" 267 | ], 268 | "index": "pypi", 269 | "version": "==8.4.0" 270 | }, 271 | "packaging": { 272 | "hashes": [ 273 | "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", 274 | "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7" 275 | ], 276 | "markers": "python_version >= '3.7'", 277 | "version": "==23.2" 278 | }, 279 | "pycparser": { 280 | "hashes": [ 281 | "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9", 282 | "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206" 283 | ], 284 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 285 | "version": "==2.21" 286 | }, 287 | "pyjwt": { 288 | "extras": [ 289 | "crypto" 290 | ], 291 | "hashes": [ 292 | "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de", 293 | "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320" 294 | ], 295 | "markers": "python_version >= '3.7'", 296 | "version": "==2.8.0" 297 | }, 298 | "python-dotenv": { 299 | "hashes": [ 300 | "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", 301 | "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a" 302 | ], 303 | "index": "pypi", 304 | "version": "==1.0.1" 305 | }, 306 | "requests": { 307 | "hashes": [ 308 | "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", 309 | "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" 310 | ], 311 | "markers": "python_version >= '3.7'", 312 | "version": "==2.31.0" 313 | }, 314 | "urllib3": { 315 | "hashes": [ 316 | "sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20", 317 | "sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224" 318 | ], 319 | "markers": "python_version >= '3.8'", 320 | "version": "==2.2.0" 321 | } 322 | }, 323 | "develop": { 324 | "altgraph": { 325 | "hashes": [ 326 | "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406", 327 | "sha256:642743b4750de17e655e6711601b077bc6598dbfa3ba5fa2b2a35ce12b508dff" 328 | ], 329 | "version": "==0.17.4" 330 | }, 331 | "bandit": { 332 | "hashes": [ 333 | "sha256:17e60786a7ea3c9ec84569fd5aee09936d116cb0cb43151023258340dbffb7ed", 334 | "sha256:527906bec6088cb499aae31bc962864b4e77569e9d529ee51df3a93b4b8ab28a" 335 | ], 336 | "index": "pypi", 337 | "version": "==1.7.7" 338 | }, 339 | "black": { 340 | "hashes": [ 341 | "sha256:0269dfdea12442022e88043d2910429bed717b2d04523867a85dacce535916b8", 342 | "sha256:07204d078e25327aad9ed2c64790d681238686bce254c910de640c7cc4fc3aa6", 343 | "sha256:08b34e85170d368c37ca7bf81cf67ac863c9d1963b2c1780c39102187ec8dd62", 344 | "sha256:1a95915c98d6e32ca43809d46d932e2abc5f1f7d582ffbe65a5b4d1588af7445", 345 | "sha256:2588021038bd5ada078de606f2a804cadd0a3cc6a79cb3e9bb3a8bf581325a4c", 346 | "sha256:2fa6a0e965779c8f2afb286f9ef798df770ba2b6cee063c650b96adec22c056a", 347 | "sha256:34afe9da5056aa123b8bfda1664bfe6fb4e9c6f311d8e4a6eb089da9a9173bf9", 348 | "sha256:3897ae5a21ca132efa219c029cce5e6bfc9c3d34ed7e892113d199c0b1b444a2", 349 | "sha256:40657e1b78212d582a0edecafef133cf1dd02e6677f539b669db4746150d38f6", 350 | "sha256:48b5760dcbfe5cf97fd4fba23946681f3a81514c6ab8a45b50da67ac8fbc6c7b", 351 | "sha256:5242ecd9e990aeb995b6d03dc3b2d112d4a78f2083e5a8e86d566340ae80fec4", 352 | "sha256:5cdc2e2195212208fbcae579b931407c1fa9997584f0a415421748aeafff1168", 353 | "sha256:5d7b06ea8816cbd4becfe5f70accae953c53c0e53aa98730ceccb0395520ee5d", 354 | "sha256:7258c27115c1e3b5de9ac6c4f9957e3ee2c02c0b39222a24dc7aa03ba0e986f5", 355 | "sha256:854c06fb86fd854140f37fb24dbf10621f5dab9e3b0c29a690ba595e3d543024", 356 | "sha256:a21725862d0e855ae05da1dd25e3825ed712eaaccef6b03017fe0853a01aa45e", 357 | "sha256:a83fe522d9698d8f9a101b860b1ee154c1d25f8a82ceb807d319f085b2627c5b", 358 | "sha256:b3d64db762eae4a5ce04b6e3dd745dcca0fb9560eb931a5be97472e38652a161", 359 | "sha256:e298d588744efda02379521a19639ebcd314fba7a49be22136204d7ed1782717", 360 | "sha256:e2c8dfa14677f90d976f68e0c923947ae68fa3961d61ee30976c388adc0b02c8", 361 | "sha256:ecba2a15dfb2d97105be74bbfe5128bc5e9fa8477d8c46766505c1dda5883aac", 362 | "sha256:fc1ec9aa6f4d98d022101e015261c056ddebe3da6a8ccfc2c792cbe0349d48b7" 363 | ], 364 | "index": "pypi", 365 | "version": "==24.1.1" 366 | }, 367 | "certifi": { 368 | "hashes": [ 369 | "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", 370 | "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" 371 | ], 372 | "markers": "python_version >= '3.6'", 373 | "version": "==2024.2.2" 374 | }, 375 | "cfgv": { 376 | "hashes": [ 377 | "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", 378 | "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560" 379 | ], 380 | "markers": "python_version >= '3.8'", 381 | "version": "==3.4.0" 382 | }, 383 | "charset-normalizer": { 384 | "hashes": [ 385 | "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", 386 | "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", 387 | "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", 388 | "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", 389 | "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", 390 | "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", 391 | "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", 392 | "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", 393 | "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", 394 | "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", 395 | "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", 396 | "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", 397 | "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", 398 | "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", 399 | "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", 400 | "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", 401 | "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", 402 | "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", 403 | "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", 404 | "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", 405 | "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", 406 | "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", 407 | "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", 408 | "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", 409 | "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", 410 | "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", 411 | "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", 412 | "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", 413 | "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", 414 | "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", 415 | "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", 416 | "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", 417 | "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", 418 | "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", 419 | "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", 420 | "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", 421 | "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", 422 | "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", 423 | "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", 424 | "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", 425 | "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", 426 | "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", 427 | "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", 428 | "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", 429 | "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", 430 | "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", 431 | "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", 432 | "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", 433 | "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", 434 | "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", 435 | "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", 436 | "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", 437 | "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", 438 | "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", 439 | "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", 440 | "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", 441 | "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", 442 | "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", 443 | "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", 444 | "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", 445 | "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", 446 | "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", 447 | "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", 448 | "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", 449 | "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", 450 | "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", 451 | "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", 452 | "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", 453 | "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", 454 | "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", 455 | "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", 456 | "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", 457 | "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", 458 | "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", 459 | "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", 460 | "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", 461 | "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", 462 | "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", 463 | "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", 464 | "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", 465 | "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", 466 | "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", 467 | "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", 468 | "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", 469 | "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", 470 | "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", 471 | "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", 472 | "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", 473 | "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", 474 | "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" 475 | ], 476 | "markers": "python_full_version >= '3.7.0'", 477 | "version": "==3.3.2" 478 | }, 479 | "click": { 480 | "hashes": [ 481 | "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", 482 | "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" 483 | ], 484 | "markers": "python_version >= '3.7'", 485 | "version": "==8.1.7" 486 | }, 487 | "codecov": { 488 | "hashes": [ 489 | "sha256:2362b685633caeaf45b9951a9b76ce359cd3581dd515b430c6c3f5dfb4d92a8c", 490 | "sha256:7d2b16c1153d01579a89a94ff14f9dbeb63634ee79e18c11036f34e7de66cbc9", 491 | "sha256:c2ca5e51bba9ebb43644c43d0690148a55086f7f5e6fd36170858fa4206744d5" 492 | ], 493 | "index": "pypi", 494 | "version": "==2.1.13" 495 | }, 496 | "coverage": { 497 | "hashes": [ 498 | "sha256:0193657651f5399d433c92f8ae264aff31fc1d066deee4b831549526433f3f61", 499 | "sha256:02f2edb575d62172aa28fe00efe821ae31f25dc3d589055b3fb64d51e52e4ab1", 500 | "sha256:0491275c3b9971cdbd28a4595c2cb5838f08036bca31765bad5e17edf900b2c7", 501 | "sha256:077d366e724f24fc02dbfe9d946534357fda71af9764ff99d73c3c596001bbd7", 502 | "sha256:10e88e7f41e6197ea0429ae18f21ff521d4f4490aa33048f6c6f94c6045a6a75", 503 | "sha256:18e961aa13b6d47f758cc5879383d27b5b3f3dcd9ce8cdbfdc2571fe86feb4dd", 504 | "sha256:1a78b656a4d12b0490ca72651fe4d9f5e07e3c6461063a9b6265ee45eb2bdd35", 505 | "sha256:1ed4b95480952b1a26d863e546fa5094564aa0065e1e5f0d4d0041f293251d04", 506 | "sha256:23b27b8a698e749b61809fb637eb98ebf0e505710ec46a8aa6f1be7dc0dc43a6", 507 | "sha256:23f5881362dcb0e1a92b84b3c2809bdc90db892332daab81ad8f642d8ed55042", 508 | "sha256:32a8d985462e37cfdab611a6f95b09d7c091d07668fdc26e47a725ee575fe166", 509 | "sha256:3468cc8720402af37b6c6e7e2a9cdb9f6c16c728638a2ebc768ba1ef6f26c3a1", 510 | "sha256:379d4c7abad5afbe9d88cc31ea8ca262296480a86af945b08214eb1a556a3e4d", 511 | "sha256:3cacfaefe6089d477264001f90f55b7881ba615953414999c46cc9713ff93c8c", 512 | "sha256:3e3424c554391dc9ef4a92ad28665756566a28fecf47308f91841f6c49288e66", 513 | "sha256:46342fed0fff72efcda77040b14728049200cbba1279e0bf1188f1f2078c1d70", 514 | "sha256:536d609c6963c50055bab766d9951b6c394759190d03311f3e9fcf194ca909e1", 515 | "sha256:5d6850e6e36e332d5511a48a251790ddc545e16e8beaf046c03985c69ccb2676", 516 | "sha256:6008adeca04a445ea6ef31b2cbaf1d01d02986047606f7da266629afee982630", 517 | "sha256:64e723ca82a84053dd7bfcc986bdb34af8d9da83c521c19d6b472bc6880e191a", 518 | "sha256:6b00e21f86598b6330f0019b40fb397e705135040dbedc2ca9a93c7441178e74", 519 | "sha256:6d224f0c4c9c98290a6990259073f496fcec1b5cc613eecbd22786d398ded3ad", 520 | "sha256:6dceb61d40cbfcf45f51e59933c784a50846dc03211054bd76b421a713dcdf19", 521 | "sha256:7ac8f8eb153724f84885a1374999b7e45734bf93a87d8df1e7ce2146860edef6", 522 | "sha256:85ccc5fa54c2ed64bd91ed3b4a627b9cce04646a659512a051fa82a92c04a448", 523 | "sha256:869b5046d41abfea3e381dd143407b0d29b8282a904a19cb908fa24d090cc018", 524 | "sha256:8bdb0285a0202888d19ec6b6d23d5990410decb932b709f2b0dfe216d031d218", 525 | "sha256:8dfc5e195bbef80aabd81596ef52a1277ee7143fe419efc3c4d8ba2754671756", 526 | "sha256:8e738a492b6221f8dcf281b67129510835461132b03024830ac0e554311a5c54", 527 | "sha256:918440dea04521f499721c039863ef95433314b1db00ff826a02580c1f503e45", 528 | "sha256:9641e21670c68c7e57d2053ddf6c443e4f0a6e18e547e86af3fad0795414a628", 529 | "sha256:9d2f9d4cc2a53b38cabc2d6d80f7f9b7e3da26b2f53d48f05876fef7956b6968", 530 | "sha256:a07f61fc452c43cd5328b392e52555f7d1952400a1ad09086c4a8addccbd138d", 531 | "sha256:a3277f5fa7483c927fe3a7b017b39351610265308f5267ac6d4c2b64cc1d8d25", 532 | "sha256:a4a3907011d39dbc3e37bdc5df0a8c93853c369039b59efa33a7b6669de04c60", 533 | "sha256:aeb2c2688ed93b027eb0d26aa188ada34acb22dceea256d76390eea135083950", 534 | "sha256:b094116f0b6155e36a304ff912f89bbb5067157aff5f94060ff20bbabdc8da06", 535 | "sha256:b8ffb498a83d7e0305968289441914154fb0ef5d8b3157df02a90c6695978295", 536 | "sha256:b9bb62fac84d5f2ff523304e59e5c439955fb3b7f44e3d7b2085184db74d733b", 537 | "sha256:c61f66d93d712f6e03369b6a7769233bfda880b12f417eefdd4f16d1deb2fc4c", 538 | "sha256:ca6e61dc52f601d1d224526360cdeab0d0712ec104a2ce6cc5ccef6ed9a233bc", 539 | "sha256:ca7b26a5e456a843b9b6683eada193fc1f65c761b3a473941efe5a291f604c74", 540 | "sha256:d12c923757de24e4e2110cf8832d83a886a4cf215c6e61ed506006872b43a6d1", 541 | "sha256:d17bbc946f52ca67adf72a5ee783cd7cd3477f8f8796f59b4974a9b59cacc9ee", 542 | "sha256:dfd1e1b9f0898817babf840b77ce9fe655ecbe8b1b327983df485b30df8cc011", 543 | "sha256:e0860a348bf7004c812c8368d1fc7f77fe8e4c095d661a579196a9533778e156", 544 | "sha256:f2f5968608b1fe2a1d00d01ad1017ee27efd99b3437e08b83ded9b7af3f6f766", 545 | "sha256:f3771b23bb3675a06f5d885c3630b1d01ea6cac9e84a01aaf5508706dba546c5", 546 | "sha256:f68ef3660677e6624c8cace943e4765545f8191313a07288a53d3da188bd8581", 547 | "sha256:f86f368e1c7ce897bf2457b9eb61169a44e2ef797099fb5728482b8d69f3f016", 548 | "sha256:f90515974b39f4dea2f27c0959688621b46d96d5a626cf9c53dbc653a895c05c", 549 | "sha256:fe558371c1bdf3b8fa03e097c523fb9645b8730399c14fe7721ee9c9e2a545d3" 550 | ], 551 | "markers": "python_version >= '3.8'", 552 | "version": "==7.4.1" 553 | }, 554 | "distlib": { 555 | "hashes": [ 556 | "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784", 557 | "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64" 558 | ], 559 | "version": "==0.3.8" 560 | }, 561 | "exceptiongroup": { 562 | "hashes": [ 563 | "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14", 564 | "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68" 565 | ], 566 | "markers": "python_version < '3.11'", 567 | "version": "==1.2.0" 568 | }, 569 | "filelock": { 570 | "hashes": [ 571 | "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e", 572 | "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c" 573 | ], 574 | "markers": "python_version >= '3.8'", 575 | "version": "==3.13.1" 576 | }, 577 | "flake8": { 578 | "hashes": [ 579 | "sha256:33f96621059e65eec474169085dc92bf26e7b2d47366b70be2f67ab80dc25132", 580 | "sha256:a6dfbb75e03252917f2473ea9653f7cd799c3064e54d4c8140044c5c065f53c3" 581 | ], 582 | "index": "pypi", 583 | "version": "==7.0.0" 584 | }, 585 | "freezegun": { 586 | "hashes": [ 587 | "sha256:10939b0ba0ff5adaecf3b06a5c2f73071d9678e507c5eaedb23c761d56ac774b", 588 | "sha256:55e0fc3c84ebf0a96a5aa23ff8b53d70246479e9a68863f1fcac5a3e52f19dd6" 589 | ], 590 | "markers": "python_version >= '3.7'", 591 | "version": "==1.4.0" 592 | }, 593 | "identify": { 594 | "hashes": [ 595 | "sha256:161558f9fe4559e1557e1bff323e8631f6a0e4837f7497767c1782832f16b62d", 596 | "sha256:d40ce5fcd762817627670da8a7d8d8e65f24342d14539c59488dc603bf662e34" 597 | ], 598 | "markers": "python_version >= '3.8'", 599 | "version": "==2.5.33" 600 | }, 601 | "idna": { 602 | "hashes": [ 603 | "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca", 604 | "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f" 605 | ], 606 | "markers": "python_version >= '3.5'", 607 | "version": "==3.6" 608 | }, 609 | "iniconfig": { 610 | "hashes": [ 611 | "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", 612 | "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" 613 | ], 614 | "markers": "python_version >= '3.7'", 615 | "version": "==2.0.0" 616 | }, 617 | "isort": { 618 | "hashes": [ 619 | "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109", 620 | "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6" 621 | ], 622 | "index": "pypi", 623 | "version": "==5.13.2" 624 | }, 625 | "markdown-it-py": { 626 | "hashes": [ 627 | "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", 628 | "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb" 629 | ], 630 | "markers": "python_version >= '3.8'", 631 | "version": "==3.0.0" 632 | }, 633 | "mccabe": { 634 | "hashes": [ 635 | "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325", 636 | "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e" 637 | ], 638 | "markers": "python_version >= '3.6'", 639 | "version": "==0.7.0" 640 | }, 641 | "mdurl": { 642 | "hashes": [ 643 | "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", 644 | "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba" 645 | ], 646 | "markers": "python_version >= '3.7'", 647 | "version": "==0.1.2" 648 | }, 649 | "mypy-extensions": { 650 | "hashes": [ 651 | "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", 652 | "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782" 653 | ], 654 | "markers": "python_version >= '3.5'", 655 | "version": "==1.0.0" 656 | }, 657 | "nodeenv": { 658 | "hashes": [ 659 | "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2", 660 | "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec" 661 | ], 662 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", 663 | "version": "==1.8.0" 664 | }, 665 | "packaging": { 666 | "hashes": [ 667 | "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", 668 | "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7" 669 | ], 670 | "markers": "python_version >= '3.7'", 671 | "version": "==23.2" 672 | }, 673 | "pathspec": { 674 | "hashes": [ 675 | "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", 676 | "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712" 677 | ], 678 | "markers": "python_version >= '3.8'", 679 | "version": "==0.12.1" 680 | }, 681 | "pbr": { 682 | "hashes": [ 683 | "sha256:4a7317d5e3b17a3dccb6a8cfe67dab65b20551404c52c8ed41279fa4f0cb4cda", 684 | "sha256:d1377122a5a00e2f940ee482999518efe16d745d423a670c27773dfbc3c9a7d9" 685 | ], 686 | "markers": "python_version >= '2.6'", 687 | "version": "==6.0.0" 688 | }, 689 | "platformdirs": { 690 | "hashes": [ 691 | "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068", 692 | "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768" 693 | ], 694 | "markers": "python_version >= '3.8'", 695 | "version": "==4.2.0" 696 | }, 697 | "pluggy": { 698 | "hashes": [ 699 | "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981", 700 | "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be" 701 | ], 702 | "markers": "python_version >= '3.8'", 703 | "version": "==1.4.0" 704 | }, 705 | "pre-commit": { 706 | "hashes": [ 707 | "sha256:c255039ef399049a5544b6ce13d135caba8f2c28c3b4033277a788f434308376", 708 | "sha256:d30bad9abf165f7785c15a21a1f46da7d0677cb00ee7ff4c579fd38922efe15d" 709 | ], 710 | "index": "pypi", 711 | "version": "==3.6.0" 712 | }, 713 | "pycodestyle": { 714 | "hashes": [ 715 | "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f", 716 | "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67" 717 | ], 718 | "markers": "python_version >= '3.8'", 719 | "version": "==2.11.1" 720 | }, 721 | "pydocstyle": { 722 | "hashes": [ 723 | "sha256:118762d452a49d6b05e194ef344a55822987a462831ade91ec5c06fd2169d019", 724 | "sha256:7ce43f0c0ac87b07494eb9c0b462c0b73e6ff276807f204d6b53edc72b7e44e1" 725 | ], 726 | "index": "pypi", 727 | "version": "==6.3.0" 728 | }, 729 | "pyflakes": { 730 | "hashes": [ 731 | "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f", 732 | "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a" 733 | ], 734 | "markers": "python_version >= '3.8'", 735 | "version": "==3.2.0" 736 | }, 737 | "pygments": { 738 | "hashes": [ 739 | "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c", 740 | "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367" 741 | ], 742 | "markers": "python_version >= '3.7'", 743 | "version": "==2.17.2" 744 | }, 745 | "pyinstaller": { 746 | "hashes": [ 747 | "sha256:0597fb04337695e5cc5250253e0655530bf14f264b7a5b7d219cc65f6889c4bd", 748 | "sha256:156b32ba943e0090bcc68e40ae1cb68fd92b7f1ab6fe0bdf8faf3d3cfc4e12dd", 749 | "sha256:1eadbd1fae84e2e6c678d8b4ed6a232ec5c8fe3a839aea5a3071c4c0282f98cc", 750 | "sha256:41c937fe8f07ae02009b3b5a96ac3eb0800a4f8a97af142d4100060fe2135bb9", 751 | "sha256:75a6f2a6f835a2e6e0899d10e60c10caf5defd25aced38b1dd48fbbabc89de07", 752 | "sha256:886b3b995b674905a20ad5b720b47cc395897d7b391117831027a4c8c5d67a58", 753 | "sha256:914d4c96cc99472e37ac552fdd82fbbe09e67bb592d0717fcffaa99ea74273df", 754 | "sha256:96c37a1ee5b2fd5bb25c098ef510661d6d17b6515d0b86d8fc93727dd2475ba3", 755 | "sha256:abe91106a3bbccc3f3a27af4325676ecdb6f46cb842ac663625002a870fc503b", 756 | "sha256:b721d793a33b6d9946c7dd95d3ea7589c0424b51cf1b9fe580f03c544f1336b2", 757 | "sha256:de25beb176f73a944758553caacec46cc665bf3910ad8a174706d79cf6e95340", 758 | "sha256:e436fcc0ea87c3f132baac916d508c24c84a8f6d8a06c3154fbc753f169b76c7" 759 | ], 760 | "index": "pypi", 761 | "version": "==6.3.0" 762 | }, 763 | "pyinstaller-hooks-contrib": { 764 | "hashes": [ 765 | "sha256:469b5690df53223e2e8abffb2e44d6ee596e7d79d4b1eed9465123b67439875a", 766 | "sha256:a7118c1a5c9788595e5c43ad058a7a5b7b6d59e1eceb42362f6ec1f0b61986b0" 767 | ], 768 | "markers": "python_version >= '3.7'", 769 | "version": "==2024.0" 770 | }, 771 | "pytest": { 772 | "hashes": [ 773 | "sha256:249b1b0864530ba251b7438274c4d251c58d868edaaec8762893ad4a0d71c36c", 774 | "sha256:50fb9cbe836c3f20f0dfa99c565201fb75dc54c8d76373cd1bde06b06657bdb6" 775 | ], 776 | "index": "pypi", 777 | "version": "==8.0.0" 778 | }, 779 | "pytest-cov": { 780 | "hashes": [ 781 | "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6", 782 | "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a" 783 | ], 784 | "index": "pypi", 785 | "version": "==4.1.0" 786 | }, 787 | "pytest-flake8": { 788 | "hashes": [ 789 | "sha256:ba4f243de3cb4c2486ed9e70752c80dd4b636f7ccb27d4eba763c35ed0cd316e", 790 | "sha256:e0661a786f8cbf976c185f706fdaf5d6df0b1667c3bcff8e823ba263618627e7" 791 | ], 792 | "index": "pypi", 793 | "version": "==1.1.1" 794 | }, 795 | "pytest-freezegun": { 796 | "hashes": [ 797 | "sha256:19c82d5633751bf3ec92caa481fb5cffaac1787bd485f0df6436fd6242176949", 798 | "sha256:5318a6bfb8ba4b709c8471c94d0033113877b3ee02da5bfcd917c1889cde99a7" 799 | ], 800 | "index": "pypi", 801 | "version": "==0.4.2" 802 | }, 803 | "python-dateutil": { 804 | "hashes": [ 805 | "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", 806 | "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" 807 | ], 808 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 809 | "version": "==2.8.2" 810 | }, 811 | "pyyaml": { 812 | "hashes": [ 813 | "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", 814 | "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", 815 | "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", 816 | "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", 817 | "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", 818 | "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", 819 | "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595", 820 | "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", 821 | "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", 822 | "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", 823 | "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", 824 | "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", 825 | "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", 826 | "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", 827 | "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", 828 | "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", 829 | "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", 830 | "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6", 831 | "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", 832 | "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", 833 | "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", 834 | "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", 835 | "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", 836 | "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", 837 | "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", 838 | "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", 839 | "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", 840 | "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", 841 | "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", 842 | "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef", 843 | "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", 844 | "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", 845 | "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", 846 | "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0", 847 | "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515", 848 | "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c", 849 | "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c", 850 | "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924", 851 | "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34", 852 | "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", 853 | "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", 854 | "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", 855 | "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", 856 | "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", 857 | "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", 858 | "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", 859 | "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", 860 | "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", 861 | "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585", 862 | "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d", 863 | "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" 864 | ], 865 | "markers": "python_version >= '3.6'", 866 | "version": "==6.0.1" 867 | }, 868 | "requests": { 869 | "hashes": [ 870 | "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", 871 | "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" 872 | ], 873 | "markers": "python_version >= '3.7'", 874 | "version": "==2.31.0" 875 | }, 876 | "rich": { 877 | "hashes": [ 878 | "sha256:5cb5123b5cf9ee70584244246816e9114227e0b98ad9176eede6ad54bf5403fa", 879 | "sha256:6da14c108c4866ee9520bbffa71f6fe3962e193b7da68720583850cd4548e235" 880 | ], 881 | "markers": "python_full_version >= '3.7.0'", 882 | "version": "==13.7.0" 883 | }, 884 | "setuptools": { 885 | "hashes": [ 886 | "sha256:385eb4edd9c9d5c17540511303e39a147ce2fc04bc55289c322b9e5904fe2c05", 887 | "sha256:be1af57fc409f93647f2e8e4573a142ed38724b8cdd389706a867bb4efcf1e78" 888 | ], 889 | "markers": "python_version >= '3.8'", 890 | "version": "==69.0.3" 891 | }, 892 | "six": { 893 | "hashes": [ 894 | "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", 895 | "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" 896 | ], 897 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 898 | "version": "==1.16.0" 899 | }, 900 | "snowballstemmer": { 901 | "hashes": [ 902 | "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1", 903 | "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a" 904 | ], 905 | "version": "==2.2.0" 906 | }, 907 | "stevedore": { 908 | "hashes": [ 909 | "sha256:8cc040628f3cea5d7128f2e76cf486b2251a4e543c7b938f58d9a377f6694a2d", 910 | "sha256:a54534acf9b89bc7ed264807013b505bf07f74dbe4bcfa37d32bd063870b087c" 911 | ], 912 | "markers": "python_version >= '3.8'", 913 | "version": "==5.1.0" 914 | }, 915 | "tomli": { 916 | "hashes": [ 917 | "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", 918 | "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" 919 | ], 920 | "markers": "python_version < '3.11'", 921 | "version": "==2.0.1" 922 | }, 923 | "typing-extensions": { 924 | "hashes": [ 925 | "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783", 926 | "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd" 927 | ], 928 | "markers": "python_version < '3.11'", 929 | "version": "==4.9.0" 930 | }, 931 | "urllib3": { 932 | "hashes": [ 933 | "sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20", 934 | "sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224" 935 | ], 936 | "markers": "python_version >= '3.8'", 937 | "version": "==2.2.0" 938 | }, 939 | "virtualenv": { 940 | "hashes": [ 941 | "sha256:4238949c5ffe6876362d9c0180fc6c3a824a7b12b80604eeb8085f2ed7460de3", 942 | "sha256:bf51c0d9c7dd63ea8e44086fa1e4fb1093a31e963b86959257378aef020e1f1b" 943 | ], 944 | "markers": "python_version >= '3.7'", 945 | "version": "==20.25.0" 946 | }, 947 | "yamllint": { 948 | "hashes": [ 949 | "sha256:28a19f5d68d28d8fec538a1db21bb2d84c7dc2e2ea36266da8d4d1c5a683814d", 950 | "sha256:2dceab9ef2d99518a2fcf4ffc964d44250ac4459be1ba3ca315118e4a1a81f7d" 951 | ], 952 | "index": "pypi", 953 | "version": "==1.33.0" 954 | } 955 | } 956 | } 957 | --------------------------------------------------------------------------------