├── tests ├── __init__.py ├── drivers │ ├── __init__.py │ └── s3 │ │ ├── __init__.py │ │ ├── data │ │ └── packages │ │ │ └── package1 │ │ │ ├── test1 │ │ │ └── dir1 │ │ │ └── test1 │ │ └── test_drivers.py ├── data │ ├── test_project_dir │ │ └── working_dir1 │ │ │ ├── file1.txt │ │ │ └── subdir1 │ │ │ └── file1.txt │ ├── test_packages_dir │ │ └── test │ │ │ └── test_root │ │ │ └── group1 │ │ │ └── subgroup1 │ │ │ └── .artifacts │ │ │ └── artifact1-1.0 │ │ │ ├── data │ │ │ ├── file1.txt │ │ │ ├── file2.txt │ │ │ └── subdir1 │ │ │ │ └── file1.txt │ │ │ └── info.json │ └── configs │ │ ├── without_dependencies.yaml │ │ ├── without_repositories.yaml │ │ ├── broken_dependency.yaml │ │ ├── broken_repository.yaml │ │ ├── repository_not_found.yaml │ │ ├── darty.yaml │ │ └── duplicated_dependency.yaml ├── test_ds_manager.py └── test_dependency.py ├── darty ├── commands │ ├── __init__.py │ ├── publish_local.py │ ├── configure.py │ ├── abstract.py │ ├── update.py │ ├── download.py │ └── publish.py ├── drivers │ ├── __init__.py │ ├── s3 │ │ ├── __init__.py │ │ ├── zip │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── driver.py │ │ └── files │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── driver.py │ ├── test │ │ ├── __init__.py │ │ └── driver.py │ ├── factory.py │ └── abstract.py ├── helpers │ ├── __init__.py │ ├── validation.py │ └── commands.py ├── package │ ├── __init__.py │ ├── package_info.py │ ├── validators.py │ ├── repository.py │ └── dependency.py ├── __init__.py ├── output_writer.py ├── settings.py ├── utils.py └── dependency_manager.py ├── requirements.txt ├── MAINTAINERS ├── SECURITY.md ├── .gitignore ├── setup.py ├── CONTRIBUTING.md ├── bin └── darty ├── LICENSE └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darty/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darty/drivers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darty/drivers/s3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darty/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darty/package/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/drivers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/drivers/s3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darty/drivers/s3/zip/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darty/drivers/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darty/drivers/s3/files/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darty/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.1' 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools==36.8.0 2 | -------------------------------------------------------------------------------- /tests/drivers/s3/data/packages/package1/test1: -------------------------------------------------------------------------------- 1 | test -------------------------------------------------------------------------------- /tests/drivers/s3/data/packages/package1/dir1/test1: -------------------------------------------------------------------------------- 1 | test -------------------------------------------------------------------------------- /tests/data/test_project_dir/working_dir1/file1.txt: -------------------------------------------------------------------------------- 1 | some content -------------------------------------------------------------------------------- /tests/data/test_project_dir/working_dir1/subdir1/file1.txt: -------------------------------------------------------------------------------- 1 | some content -------------------------------------------------------------------------------- /tests/data/test_packages_dir/test/test_root/group1/subgroup1/.artifacts/artifact1-1.0/data/file1.txt: -------------------------------------------------------------------------------- 1 | some content -------------------------------------------------------------------------------- /tests/data/test_packages_dir/test/test_root/group1/subgroup1/.artifacts/artifact1-1.0/data/file2.txt: -------------------------------------------------------------------------------- 1 | some content -------------------------------------------------------------------------------- /tests/data/test_packages_dir/test/test_root/group1/subgroup1/.artifacts/artifact1-1.0/data/subdir1/file1.txt: -------------------------------------------------------------------------------- 1 | some content -------------------------------------------------------------------------------- /tests/data/configs/without_dependencies.yaml: -------------------------------------------------------------------------------- 1 | repositories: 2 | default: 3 | type: test 4 | root: test 5 | 6 | dependencies: [] 7 | -------------------------------------------------------------------------------- /MAINTAINERS: -------------------------------------------------------------------------------- 1 | Oleg Polosin 2 | Pascal Pompey 3 | Alex Martinelli 4 | -------------------------------------------------------------------------------- /tests/data/configs/without_repositories.yaml: -------------------------------------------------------------------------------- 1 | repositories: {} 2 | dependencies: 3 | group: group 4 | artifact: artifact 5 | version: 1.0 6 | -------------------------------------------------------------------------------- /tests/data/configs/broken_dependency.yaml: -------------------------------------------------------------------------------- 1 | repositories: 2 | default: 3 | type: test 4 | root: test 5 | 6 | dependencies: 7 | - group: group 8 | artifact: artifact 9 | -------------------------------------------------------------------------------- /tests/data/configs/broken_repository.yaml: -------------------------------------------------------------------------------- 1 | repositories: 2 | default: 3 | root: test 4 | 5 | dependencies: 6 | - group: group 7 | artifact: artifact 8 | version: 1.0 9 | -------------------------------------------------------------------------------- /tests/data/configs/repository_not_found.yaml: -------------------------------------------------------------------------------- 1 | repositories: 2 | default: 3 | type: test 4 | root: test 5 | 6 | dependencies: 7 | - group: group 8 | artifact: artifact 9 | version: 1.0 10 | repository: my_repository 11 | -------------------------------------------------------------------------------- /tests/data/configs/darty.yaml: -------------------------------------------------------------------------------- 1 | repositories: 2 | default: 3 | type: test 4 | root: test 5 | 6 | dependencies: 7 | - group: group1 8 | artifact: artifact1 9 | version: 1.0 10 | 11 | - group: group1 12 | artifact: artifact2 13 | version: 1.0 14 | -------------------------------------------------------------------------------- /tests/data/configs/duplicated_dependency.yaml: -------------------------------------------------------------------------------- 1 | repositories: 2 | default: 3 | type: test 4 | root: test 5 | 6 | dependencies: 7 | - group: group 8 | artifact: artifact 9 | version: 1.0 10 | 11 | - group: group 12 | artifact: artifact 13 | version: 1.1 14 | -------------------------------------------------------------------------------- /tests/data/test_packages_dir/test/test_root/group1/subgroup1/.artifacts/artifact1-1.0/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "group": "group1.subgroup1", 3 | "artifact": "artifact1", 4 | "version": "1.0", 5 | "files": [ 6 | "file1.txt", 7 | "file2.txt", 8 | "subdir1/file1.txt" 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | We acknowledge that every line of code that we write may potentially contain security issues. 2 | 3 | We are trying to deal with it responsibly and provide patches as quickly as possible. If you have 4 | anything to report, please email us to [Tech-Security@zalando.de](mailto:Tech-Security@zalando.de). 5 | -------------------------------------------------------------------------------- /darty/helpers/validation.py: -------------------------------------------------------------------------------- 1 | from schema import Schema, Use 2 | 3 | 4 | def validate_dependency_config(data): 5 | # TODO: move all validations for the config file here 6 | schema = Schema({ 7 | 'repositories': object, 8 | 'dependencies': [{ 9 | 'version': Use(str), 10 | object: object, 11 | }], 12 | }) 13 | 14 | return schema.validate(data) 15 | -------------------------------------------------------------------------------- /darty/drivers/s3/files/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def get_dir_files(dir_path: str): 5 | """Returns paths for all files in the directory.""" 6 | for cur_dir, directories, filenames in os.walk(dir_path): 7 | cur_rel_dir = os.path.relpath(cur_dir, dir_path) 8 | if cur_rel_dir == '.': 9 | cur_rel_dir = '' 10 | 11 | for filename in filenames: 12 | yield os.path.join(cur_rel_dir, filename) 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *$py.class 4 | 5 | # Distribution / packaging 6 | .Python 7 | env/ 8 | build/ 9 | develop-eggs/ 10 | dist/ 11 | downloads/ 12 | eggs/ 13 | .eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # PyBuilder 24 | target/ 25 | 26 | # virtualenv 27 | venv/ 28 | ENV/ 29 | 30 | #PYCHARM 31 | .idea/ 32 | 33 | .DS_Store 34 | /htmlcov/ 35 | -------------------------------------------------------------------------------- /darty/package/package_info.py: -------------------------------------------------------------------------------- 1 | class PackageInfo(object): 2 | 3 | def __init__(self, config: dict, local: bool): 4 | """ 5 | :param config: package configuration (from "info.json" file) 6 | :param local: True if this package published locally, False otherwise 7 | """ 8 | # TODO: check for errors 9 | self.group = config['group'] 10 | self.artifact = config['artifact'] 11 | self.version = config['version'] 12 | self.files = config['files'] 13 | self.name = config.get('name', '') 14 | self.description = config.get('description', '') 15 | 16 | self.local = local 17 | -------------------------------------------------------------------------------- /darty/drivers/factory.py: -------------------------------------------------------------------------------- 1 | import pkg_resources 2 | from darty.drivers.abstract import AbstractDriver 3 | from darty.drivers.test.driver import TestDriver 4 | 5 | 6 | class DriverFactory(object): 7 | @classmethod 8 | def create_driver(cls, driver_name, root: str, parameters: dict) -> AbstractDriver: 9 | # driver for unit tests 10 | if driver_name == 'test': 11 | return TestDriver(root, parameters) 12 | 13 | # search the driver 14 | for entry_point in pkg_resources.iter_entry_points('darty_drivers'): 15 | if driver_name == entry_point.name: 16 | driver = entry_point.load() 17 | return driver(root, parameters) 18 | 19 | raise ValueError('Driver "%s" not found' % driver_name) 20 | -------------------------------------------------------------------------------- /darty/helpers/commands.py: -------------------------------------------------------------------------------- 1 | from darty.dependency_manager import DependencyManager 2 | 3 | 4 | def get_dependencies_by_name(manager: DependencyManager, group: str, artifact: str): 5 | """Searches dependencies by group and artifact.""" 6 | if group and artifact: 7 | dependency = manager.get_dependency_by_name(group, artifact) 8 | if not dependency: 9 | raise ValueError('Package with group=%s and artifact=%s not found' % (group, artifact)) 10 | dependencies = [dependency] 11 | elif artifact: 12 | dependencies = manager.search_dependency_by_artifact(artifact) 13 | if not dependencies: 14 | raise ValueError('Package with artifact=%s not found' % artifact) 15 | else: 16 | dependencies = list(manager.dependencies.values()) 17 | 18 | return dependencies 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | from setuptools import setup, find_packages 4 | 5 | 6 | def get_version(): 7 | version_re = re.compile(r'''__version__ = ['"]([0-9.]+)['"]''') 8 | init = open(os.path.join(os.path.dirname(__file__), 'darty', '__init__.py')).read() 9 | return version_re.search(init).group(1) 10 | 11 | 12 | setup( 13 | name='darty', 14 | version=get_version(), 15 | description='Data Dependency Manager', 16 | packages=find_packages(exclude=['tests*']), 17 | scripts=['bin/darty'], 18 | entry_points={ 19 | 'darty_drivers': [ 20 | 's3_files = darty.drivers.s3.files.driver:S3FilesDriver', 21 | 's3_zip = darty.drivers.s3.zip.driver:S3ZipDriver', 22 | ], 23 | }, 24 | install_requires=['boto3', 'schema'], 25 | tests_require=['moto'], 26 | test_suite='tests', 27 | ) 28 | -------------------------------------------------------------------------------- /darty/commands/publish_local.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace, ArgumentParser 2 | from darty.commands.publish import PublishCommand 3 | from darty.output_writer import AbstractOutputWriter 4 | 5 | 6 | class PublishLocalCommand(PublishCommand): 7 | 8 | @staticmethod 9 | def get_command_name(): 10 | return 'publish-local' 11 | 12 | @staticmethod 13 | def get_description(): 14 | return 'Publish package locally' 15 | 16 | def configure(self, subparser: ArgumentParser): 17 | super().configure(subparser) 18 | subparser.add_argument('-r', '--rewrite', action='store_true', 19 | help='Rewrite local package if it exists') 20 | 21 | def run(self, args: Namespace, settings: dict, output: AbstractOutputWriter): 22 | # get the dependency to publish 23 | dependency = self._resolve_dependency(args, output) 24 | 25 | # publish the package 26 | dependency.publish(local=True, rewrite_local=args.rewrite, output=output) 27 | -------------------------------------------------------------------------------- /darty/package/validators.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def check_group_name(group: str): 5 | regexp = '^[a-z][a-z0-9_]*(\.[a-z_][a-z0-9_]*)*$' 6 | pattern = re.compile(regexp) 7 | return pattern.match(group) 8 | 9 | 10 | def check_artifact_name(artifact: str): 11 | regexp = '^[a-z][a-z0-9_-]*[a-z0-9]$' 12 | pattern = re.compile(regexp) 13 | return pattern.match(artifact) 14 | 15 | 16 | def check_version_number(version: str): 17 | regexp = '^v?[0-9]+(\.[0-9]+){0,2}([a-z-][a-z0-9-][a-z0-9]*)?$' 18 | pattern = re.compile(regexp) 19 | return pattern.match(version) 20 | 21 | 22 | def check_files_file_path(file_path: str): 23 | regexp = '^[a-z0-9\.-_]([a-z0-9-/_]+\.?)*[a-z0-9-_]$' 24 | pattern = re.compile(regexp) 25 | return pattern.match(file_path) 26 | 27 | 28 | def check_repository_type(repository_type: str): 29 | regexp = '^[a-z0-9_]*$' 30 | pattern = re.compile(regexp) 31 | return pattern.match(repository_type) 32 | 33 | 34 | def check_repository_root(root: str): 35 | regexp = '^[a-z0-9_-]*$' 36 | pattern = re.compile(regexp) 37 | return pattern.match(root) 38 | -------------------------------------------------------------------------------- /darty/commands/configure.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from darty.commands.abstract import AbstractCommand 3 | from darty.output_writer import AbstractOutputWriter 4 | from darty.settings import save_profile_settings, get_config_file_path 5 | from darty.utils import get_input 6 | 7 | 8 | class ConfigureCommand(AbstractCommand): 9 | 10 | @staticmethod 11 | def get_command_name(): 12 | return 'configure' 13 | 14 | @staticmethod 15 | def get_description(): 16 | return 'Configure the tool' 17 | 18 | def run(self, args: Namespace, settings: dict, output: AbstractOutputWriter): 19 | # ask the user to update Darty config 20 | inputs = [ 21 | ('packages_dir', 'Directory where all the packages will be stored [%s]: '), 22 | # TODO: include settings necessary for drivers 23 | ] 24 | 25 | for setting, message in inputs: 26 | settings[setting] = get_input(message % str(settings[setting]), settings[setting]) 27 | 28 | # save the config file 29 | save_profile_settings(get_config_file_path(), args.profile, settings) 30 | 31 | return True 32 | -------------------------------------------------------------------------------- /darty/drivers/s3/zip/utils.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | import os 3 | 4 | 5 | def get_dir_files(dir_path: str): 6 | """Returns paths for all files in the directory.""" 7 | for cur_dir, directories, filenames in os.walk(dir_path): 8 | cur_rel_dir = os.path.relpath(cur_dir, dir_path) 9 | if cur_rel_dir == '.': 10 | cur_rel_dir = '' 11 | 12 | for filename in filenames: 13 | yield os.path.join(cur_rel_dir, filename) 14 | 15 | 16 | def unpack_archive(archive_path: str, dst_dir: str, delete_file: bool = False): 17 | """Unpacks downloaded package.""" 18 | archive = zipfile.ZipFile(archive_path) 19 | archive.extractall(dst_dir) 20 | 21 | if delete_file: 22 | os.remove(archive_path) 23 | 24 | 25 | def pack_archive(src_dir: str, archive_path: str): 26 | """Creates a new package.""" 27 | 28 | # get all paths before an archive is created 29 | file_paths = list(get_dir_files(src_dir)) 30 | 31 | # create an archive 32 | archive = zipfile.ZipFile(archive_path, 'w') 33 | 34 | for file_path in file_paths: 35 | archive.write(os.path.join(src_dir, file_path), arcname=file_path) 36 | 37 | archive.close() 38 | -------------------------------------------------------------------------------- /darty/package/repository.py: -------------------------------------------------------------------------------- 1 | from darty.drivers.factory import DriverFactory 2 | from darty.package.validators import check_repository_root, check_repository_type 3 | 4 | 5 | class Repository(object): 6 | 7 | def __init__(self, config: dict): 8 | self.type = config.get('type', '') 9 | self.root = config.get('root', '') # unique identificator within particular repository type 10 | self.parameters = config.get('parameters', {}) 11 | 12 | # check repository type 13 | if not self.type: 14 | raise ValueError('Repository type must be specified') 15 | if not check_repository_type(self.type): 16 | raise ValueError('Repository type has invalid format') 17 | 18 | # check repository root 19 | if not self.root: 20 | raise ValueError('Repository root must be specified') 21 | if not check_repository_root(self.root): 22 | raise ValueError('Repository root has invalid format') 23 | 24 | self._driver = None 25 | 26 | @property 27 | def driver(self): 28 | if not self._driver: 29 | self._driver = DriverFactory.create_driver(self.type, self.root, self.parameters) 30 | 31 | return self._driver 32 | -------------------------------------------------------------------------------- /darty/commands/abstract.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from argparse import Namespace, ArgumentParser 3 | from darty.output_writer import AbstractOutputWriter 4 | 5 | 6 | class AbstractCommand(ABC): 7 | 8 | """Abstract class to implement a command""" 9 | 10 | def __init__(self): 11 | """Command's constructor 12 | 13 | Raises: 14 | ValueError: If command's arguments can't be processed. 15 | """ 16 | 17 | @staticmethod 18 | @abstractmethod 19 | def get_command_name() -> str: 20 | """Returns a sub-command name.""" 21 | pass 22 | 23 | @staticmethod 24 | @abstractmethod 25 | def get_description() -> str: 26 | """Returns a sub-command description.""" 27 | pass 28 | 29 | def configure(self, subparser: ArgumentParser): 30 | """Adds arguments to the parser.""" 31 | pass 32 | 33 | @abstractmethod 34 | def run(self, args: Namespace, settings: dict, output: AbstractOutputWriter) -> bool: 35 | """Performs a command 36 | Returns: 37 | bool: True for success, False otherwise. 38 | Raises: 39 | ValueError: If command's arguments can't be processed. 40 | """ 41 | return True 42 | -------------------------------------------------------------------------------- /darty/output_writer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from abc import ABC, abstractmethod 4 | from contextlib import contextmanager 5 | 6 | 7 | class AbstractOutputWriter(ABC): 8 | def __init__(self): 9 | self._indent = 0 10 | 11 | @abstractmethod 12 | def write(self, message): 13 | pass 14 | 15 | def increase_indent(self): 16 | self._indent += 2 17 | 18 | def decrease_indent(self): 19 | self._indent -= 2 20 | 21 | @contextmanager 22 | def indent(self): 23 | self.increase_indent() 24 | yield 25 | self.decrease_indent() 26 | 27 | 28 | class OutputWriter(AbstractOutputWriter): 29 | def __init__(self): 30 | super().__init__() 31 | 32 | stream_handler = logging.StreamHandler(sys.stdout) 33 | stream_handler.setLevel(logging.INFO) 34 | stream_handler.setFormatter(logging.Formatter('%(message)s')) 35 | 36 | logger = logging.Logger('darty.output') 37 | logger.addHandler(stream_handler) 38 | 39 | self._logger = logger 40 | 41 | def write(self, message: str): 42 | self._logger.info(' ' * self._indent + message) 43 | 44 | 45 | class NullOutputWriter(AbstractOutputWriter): 46 | def __init__(self): 47 | super().__init__() 48 | 49 | def write(self, message): 50 | pass 51 | -------------------------------------------------------------------------------- /darty/commands/update.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace, ArgumentParser 2 | from darty.commands.abstract import AbstractCommand 3 | from darty.dependency_manager import DependencyManager 4 | from darty.helpers.commands import get_dependencies_by_name 5 | from darty.output_writer import AbstractOutputWriter 6 | 7 | 8 | class UpdateCommand(AbstractCommand): 9 | 10 | @staticmethod 11 | def get_command_name(): 12 | return 'update' 13 | 14 | @staticmethod 15 | def get_description(): 16 | return 'Update dependencies' 17 | 18 | def configure(self, subparser: ArgumentParser): 19 | subparser.add_argument('-c', '--config', type=str, help='Path to the model\'s config file', default=None) 20 | subparser.add_argument('--group', type=str, help='Group name of the package to update', default=None) 21 | subparser.add_argument('--artifact', type=str, help='Artifact name of the package to update', default=None) 22 | subparser.add_argument('-r', '--rewrite', action='store_true', help='Rewrite working directories') 23 | 24 | def run(self, args: Namespace, settings: dict, output: AbstractOutputWriter): 25 | # instantiate the manager 26 | manager = DependencyManager(args.config, args.profile) 27 | 28 | # get dependencies 29 | dependencies = get_dependencies_by_name(manager, args.group, args.artifact) 30 | 31 | if not dependencies: 32 | output.write('No dependencies found') 33 | else: 34 | # update dependencies 35 | for dependency in dependencies: 36 | dependency.update(args.rewrite, output) 37 | output.write('') 38 | -------------------------------------------------------------------------------- /darty/settings.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os 3 | from darty.utils import check_path 4 | 5 | 6 | def get_config_file_path(): 7 | """Path to Darty "config" file.""" 8 | return os.path.join(os.path.expanduser('~'), '.darty', 'config') 9 | 10 | 11 | def get_settings(profile: str = 'default'): 12 | """Returns Darty settings for a specific profile.""" 13 | return get_profile_settings(get_config_file_path(), profile, { 14 | 'packages_dir': os.path.join(os.path.dirname(get_config_file_path()), 'packages') 15 | }) 16 | 17 | 18 | def get_profile_settings(filename: str, section: str, defaults: dict): 19 | """Reads a particular section in a configuration file. 20 | Args: 21 | filename (str): Path to a configuration file. 22 | section (str): Section name which should be read. 23 | defaults (dict): Dictionary with default values. 24 | Returns: 25 | dict: Default values merged with the actual values from the section. 26 | """ 27 | config = configparser.ConfigParser() 28 | config.read(filename) 29 | 30 | settings = dict(defaults) 31 | if section in config: 32 | settings = {**settings, **config[section]} 33 | 34 | return settings 35 | 36 | 37 | def save_profile_settings(filename: str, section: str, settings: dict): 38 | """Saves a particular section to a configuration file. 39 | Args: 40 | filename (str): Path to a configuration file. 41 | section (str): Section name where to write the settings. 42 | settings (dict): Dictionary with settings to save. 43 | """ 44 | config = configparser.ConfigParser() 45 | config.read(filename) 46 | config[section] = settings 47 | 48 | check_path(os.path.dirname(filename)) 49 | 50 | with open(filename, 'w') as f: 51 | config.write(f) 52 | -------------------------------------------------------------------------------- /darty/drivers/test/driver.py: -------------------------------------------------------------------------------- 1 | import os 2 | from darty.drivers.abstract import AbstractDriver 3 | from darty.output_writer import AbstractOutputWriter 4 | from darty.utils import file_exists, copy_dir 5 | 6 | 7 | class TestDriver(AbstractDriver): 8 | """Driver class for unit tests.""" 9 | def __init__(self, root: str, parameters: dict): 10 | super().__init__(root, parameters) 11 | 12 | assert 'local_dir' in parameters 13 | 14 | # create a directory for the repository 15 | os.makedirs(parameters['local_dir'], exist_ok=True) 16 | 17 | self._repository_dir = parameters['local_dir'] 18 | self._bucket_name = root 19 | 20 | def download_package(self, group: str, artifact: str, version: str, 21 | tmp_artifact_dir: str, output: AbstractOutputWriter) -> bool: 22 | artifact_dir = self._get_artifact_dir(group, artifact, version) 23 | 24 | # download a file 25 | copy_dir(artifact_dir, tmp_artifact_dir) 26 | 27 | return True 28 | 29 | def upload_package(self, group: str, artifact: str, version: str, 30 | tmp_artifact_dir: str, output: AbstractOutputWriter) -> bool: 31 | artifact_dir = self._get_artifact_dir(group, artifact, version) 32 | 33 | # upload a file 34 | os.makedirs(os.path.dirname(artifact_dir), exist_ok=True) 35 | copy_dir(tmp_artifact_dir, artifact_dir) 36 | 37 | return True 38 | 39 | def package_exists(self, group: str, artifact: str, version: str) -> bool: 40 | artifact_path = self._get_artifact_dir(group, artifact, version) 41 | return file_exists(artifact_path) 42 | 43 | def _get_artifact_dir(self, group: str, artifact: str, version: str): 44 | """Path to local artifact directory.""" 45 | artifact_dir = os.path.join(self._repository_dir, self._bucket_name, group.replace('.', os.sep), 46 | artifact + '-' + version) 47 | return artifact_dir 48 | -------------------------------------------------------------------------------- /darty/commands/download.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace, ArgumentParser 2 | from darty.commands.abstract import AbstractCommand 3 | from darty.dependency_manager import DependencyManager 4 | from darty.helpers.commands import get_dependencies_by_name 5 | from darty.output_writer import AbstractOutputWriter 6 | 7 | 8 | class DownloadCommand(AbstractCommand): 9 | 10 | @staticmethod 11 | def get_command_name(): 12 | return 'download' 13 | 14 | @staticmethod 15 | def get_description(): 16 | return 'Download dependencies' 17 | 18 | def configure(self, subparser: ArgumentParser): 19 | subparser.add_argument('-c', '--config', type=str, help='Path to the model\'s config file', default=None) 20 | subparser.add_argument('--py-package', type=str, help='Python package that contains Darty configuration file', 21 | default=None) 22 | subparser.add_argument('--group', type=str, help='Group name of the package to download', default=None) 23 | subparser.add_argument('--artifact', type=str, help='Artifact name of the package to download', default=None) 24 | 25 | def run(self, args: Namespace, settings: dict, output: AbstractOutputWriter): 26 | # instantiate the manager 27 | if args.py_package: 28 | try: 29 | manager = DependencyManager.from_py_package(args.py_package, args.profile) 30 | except ImportError: 31 | raise ValueError('Python package "%s" not found' % args.py_package) 32 | else: 33 | manager = DependencyManager(args.config, args.profile) 34 | 35 | # get dependencies 36 | dependencies = get_dependencies_by_name(manager, args.group, args.artifact) 37 | 38 | if not dependencies: 39 | output.write('No dependencies found') 40 | else: 41 | # download dependencies 42 | for dependency in dependencies: 43 | dependency.download(output) 44 | output.write('') 45 | -------------------------------------------------------------------------------- /darty/drivers/abstract.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from darty.output_writer import AbstractOutputWriter 3 | 4 | 5 | class AbstractDriver(ABC): 6 | """ 7 | This class enables to abstract the storage layer responsible for centrally storing packages and their versions. 8 | Drivers for new storage mediums can be added by inheriting this class. Any capability used for the storage layer 9 | should be configured to guarantee immutability in order for the darty to work properly 10 | """ 11 | def __init__(self, root: str, parameters: dict = None): 12 | self._root = root 13 | self._params = parameters if parameters else {} 14 | 15 | @abstractmethod 16 | def download_package(self, group: str, artifact: str, version: str, 17 | tmp_artifact_dir: str, output: AbstractOutputWriter): 18 | """Downloads the package from a repository to the temporary directory.""" 19 | pass 20 | 21 | @abstractmethod 22 | def upload_package(self, group: str, artifact: str, version: str, 23 | tmp_artifact_dir: str, output: AbstractOutputWriter): 24 | """Uploads the package from the temporary directory to a repository.""" 25 | pass 26 | 27 | 28 | class DriverError(Exception): 29 | def __init__(self, msg: str): 30 | self.msg = msg 31 | 32 | def __str__(self): 33 | return self.msg 34 | 35 | 36 | class PackageNotFoundError(DriverError): 37 | def __init__(self, msg: str = 'Package not found'): 38 | super().__init__(msg) 39 | 40 | 41 | class VersionExistsError(DriverError): 42 | def __init__(self, msg: str = 'This version of the package already exists in the repository'): 43 | super().__init__(msg) 44 | 45 | 46 | class ReadAccessError(DriverError): 47 | def __init__(self, msg: str = 'No read access'): 48 | super().__init__(msg) 49 | 50 | 51 | class WriteAccessError(DriverError): 52 | def __init__(self, msg: str = 'No write access'): 53 | super().__init__(msg) 54 | -------------------------------------------------------------------------------- /tests/test_ds_manager.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | from darty.dependency_manager import DependencyManager 4 | from schema import SchemaError 5 | 6 | 7 | def get_config_path(config_filename: str): 8 | return os.path.join(os.path.dirname(__file__), 'data', 'configs', config_filename) 9 | 10 | 11 | class TestDependencyManager(unittest.TestCase): 12 | 13 | def test_configuration(self): 14 | # configuration file doesn't exist 15 | with self.assertRaises(ValueError): 16 | DependencyManager('file_doesnt_exist.yaml') 17 | 18 | # no repositories specified 19 | with self.assertRaises(SchemaError): 20 | DependencyManager(get_config_path('without_repositories.yaml')) 21 | 22 | # no dependencies specified 23 | with self.assertRaises(ValueError): 24 | DependencyManager(get_config_path('without_dependencies.yaml')) 25 | 26 | # broken dependency specified 27 | with self.assertRaises(SchemaError): 28 | DependencyManager(get_config_path('broken_dependency.yaml')) 29 | 30 | # repository with such name not found in the configuration file 31 | with self.assertRaises(ValueError): 32 | DependencyManager(get_config_path('repository_not_found.yaml')) 33 | 34 | # broken repository specified 35 | with self.assertRaises(ValueError): 36 | DependencyManager(get_config_path('broken_repository.yaml')) 37 | 38 | # different versions of the same package are specified 39 | with self.assertRaises(ValueError): 40 | DependencyManager(get_config_path('duplicated_dependency.yaml')) 41 | 42 | def test_get_dependency(self): 43 | dm = DependencyManager(get_config_path('darty.yaml')) 44 | 45 | # get dependency by name 46 | dependency = dm.get_dependency_by_name('group1', 'artifact1') 47 | self.assertEqual(dependency.group, 'group1') 48 | self.assertEqual(dependency.artifact, 'artifact1') 49 | 50 | self.assertIsNone(dm.get_dependency_by_name('group1', 'wrong-artifact')) 51 | 52 | 53 | if __name__ == '__main__': 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /darty/commands/publish.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace, ArgumentParser 2 | from darty.commands.abstract import AbstractCommand 3 | from darty.dependency_manager import DependencyManager 4 | from darty.helpers.commands import get_dependencies_by_name 5 | from darty.output_writer import AbstractOutputWriter 6 | 7 | 8 | class PublishCommand(AbstractCommand): 9 | 10 | @staticmethod 11 | def get_command_name(): 12 | return 'publish' 13 | 14 | @staticmethod 15 | def get_description(): 16 | return 'Publish package' 17 | 18 | def configure(self, subparser: ArgumentParser): 19 | subparser.add_argument('-c', '--config', type=str, help='Path to the model\'s config file', default=None) 20 | subparser.add_argument('--group', type=str, help='Group name of the package to publish', default=None) 21 | subparser.add_argument('--artifact', type=str, help='Artifact name of the package to publish', default=None) 22 | 23 | def run(self, args: Namespace, settings: dict, output: AbstractOutputWriter): 24 | # get the dependency to publish 25 | dependency = self._resolve_dependency(args, output) 26 | 27 | # publish the package 28 | dependency.publish(output=output) 29 | 30 | @staticmethod 31 | def _resolve_dependency(args: Namespace, output: AbstractOutputWriter): 32 | # instantiate the manager 33 | manager = DependencyManager(args.config, args.profile) 34 | 35 | # get dependencies 36 | dependencies = get_dependencies_by_name(manager, args.group, args.artifact) 37 | if not dependencies: 38 | raise ValueError('No packages to publish') 39 | 40 | if len(dependencies) > 1: 41 | # ask user to choose a package to publish 42 | output.write('Multiple packages detected, select one to publish:\n') 43 | with output.indent(): 44 | for i, dependency in enumerate(dependencies): 45 | output.write('[%d] %s:%s:%s' % (i + 1, dependency.group, dependency.artifact, dependency.version)) 46 | 47 | output.write('') 48 | 49 | try: 50 | num = int(input('Enter number: ')) 51 | except ValueError: 52 | raise ValueError('\nWrong value.') 53 | 54 | if num > len(dependencies): 55 | raise ValueError('Value between 1 and %d was expected.' % num) 56 | else: 57 | num = 1 58 | 59 | return dependencies[num - 1] 60 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Darty 2 | 3 | **Thank you for your interest in Darty. Your contributions are highly welcome.** 4 | 5 | There are multiple ways of getting involved: 6 | 7 | - [Report a bug](#report-a-bug) 8 | - [Suggest a feature](#suggest-a-feature) 9 | - [Contribute code](#contribute-code) 10 | 11 | Below are a few guidelines we would like you to follow. 12 | If you need help, please reach out to us by opening an issue. 13 | 14 | ## Report a bug 15 | Reporting bugs is one of the best ways to contribute. Before creating a bug report, 16 | please check that an [issue](https://github.com/zalando-incubator/darty/issues) reporting the same problem does not already 17 | exist. If there is such an issue, you may add your information as a comment. 18 | 19 | To report a new bug you should open an issue that summarizes the bug and set the label to "bug". 20 | 21 | If you want to provide a fix along with your bug report: That is great! In this case please send us a pull request as 22 | described in section [Contribute Code](#contribute-code). 23 | 24 | ## Suggest a feature 25 | To request a new feature you should open an [issue](https://github.com/zalando-incubator/darty/issues/new) and summarize 26 | the desired functionality and its use case. Set the issue label to "feature". 27 | 28 | ## Contribute code 29 | This is a rough outline of what the workflow for code contributions looks like: 30 | - Check the list of open [issues](https://github.com/zalando-incubator/darty/issues). Either assign an existing issue to 31 | yourself, or create a new one that you would like work on and discuss your ideas and use cases. It is always best to 32 | discuss your plans beforehand, to ensure that your contribution is in line with our goals for Darty. 33 | - Fork the repository on GitHub 34 | - Create a topic branch from where you want to base your work. This is usually master. 35 | - Make commits of logical units 36 | - Write good commit messages (see below) 37 | - Push your changes to a topic branch in your fork of the repository 38 | - Submit a pull request to [zalando-incubator/darty](https://github.com/zalando-incubator/darty) 39 | - Your pull request must receive a :thumbsup: from two [maintainers](https://github.com/zalando-incubator/darty/blob/master/MAINTAINERS) 40 | 41 | Thanks for your contributions! 42 | 43 | ### Commit messages 44 | Your commit messages ideally can answer two questions: what changed and why. The subject line should feature 45 | the “what” and the body of the commit should describe the “why”. 46 | 47 | When creating a pull request, its comment should reference the corresponding issue id. 48 | 49 | **Have fun, and happy hacking!** 50 | -------------------------------------------------------------------------------- /tests/drivers/s3/test_drivers.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import boto3 4 | from darty.drivers.abstract import VersionExistsError, PackageNotFoundError 5 | from darty.drivers.s3.files.driver import S3FilesDriver 6 | from darty.drivers.s3.zip.driver import S3ZipDriver 7 | from moto import mock_s3 8 | from darty.output_writer import NullOutputWriter 9 | from shutil import rmtree 10 | 11 | 12 | def list_dir_files(dir_path): 13 | for cur_dir, directories, filenames in os.walk(dir_path): 14 | rel_dir = os.path.relpath(cur_dir, dir_path) 15 | for filename in filenames: 16 | yield os.path.join(rel_dir, filename) 17 | 18 | 19 | class TestDrivers(unittest.TestCase): 20 | @mock_s3 21 | def test_upload_and_download(self): 22 | for driver_class in [S3FilesDriver, S3ZipDriver]: 23 | # create a test bucket 24 | bucket_name = 'test-bucket-%s' % driver_class.__name__ 25 | s3 = boto3.resource('s3') 26 | s3.create_bucket(Bucket=bucket_name) 27 | 28 | driver = driver_class(bucket_name, {}) 29 | 30 | # upload test package 31 | pkg1_path = os.path.join(os.path.dirname(__file__), 'data', 'packages', 'package1') 32 | driver.upload_package('group1', 'artifact1', '1.1', pkg1_path, output=NullOutputWriter()) 33 | 34 | # upload the same package second time (raises an exception) 35 | with self.assertRaises(VersionExistsError): 36 | driver.upload_package('group1', 'artifact1', '1.1', pkg1_path, output=NullOutputWriter()) 37 | 38 | # create a folder for downloaded package 39 | downloaded_pkg_path = os.path.join(os.path.dirname(__file__), 'data', 'packages', 'downloaded') 40 | rmtree(downloaded_pkg_path, ignore_errors=True) 41 | os.makedirs(downloaded_pkg_path, exist_ok=True) 42 | 43 | # download the package 44 | driver.download_package('group1', 'artifact1', '1.1', downloaded_pkg_path, output=NullOutputWriter()) 45 | orig_files = list(list_dir_files(pkg1_path)) 46 | downloaded_files = list(list_dir_files(downloaded_pkg_path)) 47 | self.assertEqual(orig_files, downloaded_files) 48 | rmtree(downloaded_pkg_path, ignore_errors=True) 49 | 50 | # download not-existing package (raises an exception) 51 | with self.assertRaises(PackageNotFoundError): 52 | driver.download_package('group1', 'artifact_doesnt_exist', '1.0', downloaded_pkg_path, 53 | output=NullOutputWriter()) 54 | 55 | 56 | if __name__ == '__main__': 57 | unittest.main() 58 | -------------------------------------------------------------------------------- /bin/darty: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import logging 5 | import sys 6 | import darty 7 | from darty.commands.configure import ConfigureCommand 8 | from darty.commands.publish import PublishCommand 9 | from darty.commands.publish_local import PublishLocalCommand 10 | from darty.commands.update import UpdateCommand 11 | from darty.commands.download import DownloadCommand 12 | from darty.output_writer import OutputWriter 13 | from darty.settings import get_settings 14 | 15 | 16 | # list of existing commands 17 | commands_classes = [ 18 | ConfigureCommand, 19 | PublishCommand, 20 | PublishLocalCommand, 21 | UpdateCommand, 22 | DownloadCommand, 23 | ] 24 | 25 | # build the parser 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument('-p', '--profile', type=str, default='default', help='Settings profile') 28 | parser.add_argument('-d', '--debug', action='store_true', help='Show debug messages') 29 | parser.add_argument('--version', action='store_true', help='Display the version of this tool') 30 | 31 | # build subparsers 32 | subparsers = parser.add_subparsers() 33 | 34 | for command_class in commands_classes: 35 | # create a subparser 36 | subparser = subparsers.add_parser(command_class.get_command_name(), help=command_class.get_description()) 37 | 38 | # create a sub-command 39 | try: 40 | command = command_class() 41 | except ValueError as e: 42 | parser.print_usage() 43 | print(e) 44 | sys.exit(1) 45 | 46 | # configure a sub-parser for a sub-command 47 | command.configure(subparser) 48 | subparser.set_defaults(command_object=command, command_subparser=subparser) 49 | 50 | # parse arguments 51 | args = parser.parse_args() 52 | 53 | # logging 54 | logging_level = logging.DEBUG if args.debug else logging.WARNING 55 | logging.basicConfig(level=logging_level, format='%(levelname)s %(message)s') 56 | 57 | # output writer 58 | output = OutputWriter() 59 | 60 | # display the version of the tool 61 | display_version = args.version 62 | if display_version: 63 | output.write(darty.__version__) 64 | sys.exit(0) 65 | 66 | # check that some command was called 67 | if not hasattr(args, 'command_object'): 68 | parser.print_usage() 69 | sys.exit(1) 70 | 71 | # get settings 72 | settings = get_settings(args.profile) 73 | 74 | # run a command 75 | try: 76 | res = args.command_object.run(args, settings, output) 77 | except ValueError as e: 78 | output.write('') 79 | args.command_subparser.print_usage() 80 | output.write('-----') 81 | output.write(str(e)) 82 | output.write('-----') 83 | sys.exit(1) 84 | except KeyboardInterrupt: 85 | sys.exit(1) 86 | 87 | if not res: 88 | sys.exit(1) 89 | -------------------------------------------------------------------------------- /darty/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import errno 3 | import hashlib 4 | from shutil import rmtree, copytree, copyfile 5 | 6 | 7 | def check_path(path): 8 | """Creates a directory if it doesn't exist.""" 9 | if not os.path.exists(path): 10 | try: 11 | os.makedirs(path) 12 | except OSError as exception: 13 | if exception.errno != errno.EEXIST: 14 | raise exception 15 | 16 | 17 | def get_input(message: str, default_value=None): 18 | """Gets user's input or uses a default value.""" 19 | value = input(message) 20 | if not value: 21 | if default_value is not None: 22 | value = default_value 23 | else: 24 | print('Value is required', flush=True) 25 | value = get_input(message) 26 | 27 | return value 28 | 29 | 30 | def dir_exists(path: str): 31 | return os.path.exists(path) or os.path.isdir(path) 32 | 33 | 34 | def file_exists(path: str): 35 | return os.path.exists(path) or os.path.isfile(path) 36 | 37 | 38 | def is_dir_empty(path: str): 39 | return not dir_exists(path) or not len(os.listdir(path)) 40 | 41 | 42 | def copy_file(src_path, dst_path): 43 | """Copies a file. 44 | Creates necessary directories if they didn't exists. 45 | """ 46 | os.makedirs(os.path.dirname(dst_path), exist_ok=True) 47 | copyfile(src_path, dst_path) 48 | 49 | 50 | def copy_dir(src_dir, dst_dir): 51 | """Copies a directory. 52 | Destination directory will be removed before copying. 53 | """ 54 | rmtree(dst_dir, True) 55 | res = copytree(src_dir, dst_dir) 56 | 57 | return res 58 | 59 | 60 | def list_dir_files(dir_path): 61 | for cur_dir, directories, filenames in os.walk(dir_path): 62 | rel_dir = os.path.relpath(cur_dir, dir_path) 63 | for filename in filenames: 64 | yield os.path.join(rel_dir, filename) 65 | 66 | 67 | def get_dir_hash(path: str): 68 | """Gets SHA1 hash of the directory. 69 | 70 | :param path: 71 | :return: 72 | """ 73 | sha_hash = hashlib.sha1() 74 | if not dir_exists(path): 75 | raise ValueError('Directory "%s" doesn\'t exist' % dir) 76 | 77 | for cur_dir, directories, filenames in os.walk(path): 78 | for filename in filenames: 79 | file_path = os.path.join(cur_dir, filename) 80 | 81 | # add filename to a hash 82 | relative_path = os.path.relpath(file_path, path).replace('\\', '/') 83 | sha_hash.update(relative_path.encode('utf-8')) 84 | 85 | # add file content to a hash 86 | with open(file_path, 'rb') as f: 87 | while True: 88 | buf = f.read(4096) 89 | if not buf: 90 | break 91 | 92 | sha_hash.update(hashlib.sha1(buf).hexdigest().encode('utf-8')) 93 | 94 | return sha_hash.hexdigest() 95 | 96 | 97 | def convert_path_w2u(path): 98 | """Converts path from Windows style to Unix style. 99 | If the path was already written in Unix style it remains unchanged. 100 | """ 101 | return path.replace('\\', '/') 102 | -------------------------------------------------------------------------------- /darty/drivers/s3/zip/driver.py: -------------------------------------------------------------------------------- 1 | import os 2 | import boto3 3 | from darty.drivers.abstract import AbstractDriver, VersionExistsError, DriverError, PackageNotFoundError, \ 4 | ReadAccessError 5 | from darty.output_writer import AbstractOutputWriter 6 | from darty.drivers.s3.zip.utils import pack_archive, unpack_archive 7 | from botocore.exceptions import ClientError 8 | 9 | 10 | class S3ZipDriver(AbstractDriver): 11 | 12 | def __init__(self, root: str, parameters: dict): 13 | super().__init__(root, parameters) 14 | 15 | self._s3 = boto3.resource('s3') 16 | self._client = self._s3.meta.client 17 | 18 | def download_package(self, group: str, artifact: str, version: str, 19 | tmp_artifact_dir: str, output: AbstractOutputWriter): 20 | # check that package exists in the repository 21 | package_exists = self._package_exists(group, artifact, version) 22 | if not package_exists: 23 | raise PackageNotFoundError() 24 | 25 | # download an archive 26 | s3_path = self._get_s3_artifact_path(group, artifact, version) 27 | archive_path = os.path.join(tmp_artifact_dir, 'package.zip') 28 | 29 | try: 30 | self._s3.Bucket(self._root).download_file(s3_path, archive_path) 31 | except ClientError as e: 32 | raise DriverError('Download Error: %s' % e.response['Error']['Message']) 33 | 34 | # unarchive a package 35 | unpack_archive(archive_path, tmp_artifact_dir) 36 | 37 | # remove an archive 38 | os.remove(archive_path) 39 | 40 | def upload_package(self, group: str, artifact: str, version: str, 41 | tmp_artifact_dir: str, output: AbstractOutputWriter): 42 | # check that this version of the package doesn't exist in the repository 43 | package_exists = self._package_exists(group, artifact, version) 44 | if package_exists: 45 | raise VersionExistsError() 46 | 47 | # archive a package 48 | archive_path = os.path.join(tmp_artifact_dir, 'package.zip') 49 | pack_archive(tmp_artifact_dir, archive_path) 50 | 51 | # upload an archive to S3 52 | s3_path = self._get_s3_artifact_path(group, artifact, version) 53 | try: 54 | self._client.upload_file(archive_path, self._root, s3_path) 55 | except ClientError as e: 56 | raise DriverError('Upload Error: %s' % e.response['Error']['Message']) 57 | 58 | # remove an archive 59 | os.remove(archive_path) 60 | 61 | def _package_exists(self, group: str, artifact: str, version: str) -> bool: 62 | path = self._get_s3_artifact_path(group, artifact, version) 63 | exists = True 64 | 65 | try: 66 | self._client.head_object(Bucket=self._root, Key=path) 67 | except ClientError as e: 68 | if e.response['Error']['Code'] == '404': 69 | exists = False 70 | elif e.response['Error']['Code'] == '403': 71 | raise ReadAccessError() 72 | else: 73 | raise DriverError(e.response['Error']['Message']) 74 | 75 | return exists 76 | 77 | @staticmethod 78 | def _get_s3_artifact_path(group: str, artifact: str, version: str): 79 | path = group.replace('.', '/') + '/' + artifact + '-' + version + '.zip' 80 | return path 81 | -------------------------------------------------------------------------------- /darty/drivers/s3/files/driver.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import boto3 4 | from botocore.exceptions import ClientError 5 | from darty.drivers.abstract import AbstractDriver, PackageNotFoundError, ReadAccessError, DriverError, \ 6 | VersionExistsError 7 | from darty.output_writer import AbstractOutputWriter 8 | from darty.drivers.s3.files.utils import get_dir_files 9 | 10 | 11 | class S3FilesDriver(AbstractDriver): 12 | 13 | def __init__(self, root: str, parameters: dict): 14 | super().__init__(root, parameters) 15 | 16 | self._s3 = boto3.resource('s3') 17 | self._client = self._s3.meta.client 18 | 19 | def download_package(self, group: str, artifact: str, version: str, 20 | tmp_artifact_dir: str, output: AbstractOutputWriter): 21 | # check that package exists in the repository 22 | package_exists = self._package_exists(group, artifact, version) 23 | if not package_exists: 24 | raise PackageNotFoundError() 25 | 26 | # get a list of package files 27 | s3_prefix = self._get_s3_file_path(group, artifact, version, '') 28 | 29 | bucket = self._s3.Bucket(self._root) 30 | try: 31 | s3_file_paths = [obj.key for obj in bucket.objects.filter(Prefix=s3_prefix)] 32 | except ClientError as e: 33 | raise DriverError(e.response['Error']['Message']) 34 | 35 | # download the files 36 | bucket = self._s3.Bucket(self._root) 37 | for s3_file_path in s3_file_paths: 38 | local_file_path = os.path.join(tmp_artifact_dir, s3_file_path[len(s3_prefix):]) 39 | 40 | logging.debug('Downloading "s3://%s/%s" to "%s"' % (self._root, s3_file_path, local_file_path)) 41 | 42 | os.makedirs(os.path.dirname(local_file_path), exist_ok=True) 43 | try: 44 | bucket.download_file(s3_file_path, local_file_path) 45 | except ClientError as e: 46 | if e.response['Error']['Code'] == '403': 47 | raise ReadAccessError() 48 | else: 49 | raise DriverError('Download Error: %s' % e.response['Error']['Message']) 50 | 51 | def upload_package(self, group: str, artifact: str, version: str, 52 | tmp_artifact_dir: str, output: AbstractOutputWriter): 53 | # check that this version of the package doesn't exist in the repository 54 | package_exists = self._package_exists(group, artifact, version) 55 | if package_exists: 56 | raise VersionExistsError() 57 | 58 | # upload files to S3 59 | for file_path in get_dir_files(tmp_artifact_dir): 60 | s3_file_path = self._get_s3_file_path(group, artifact, version, file_path) 61 | local_file_path = os.path.join(tmp_artifact_dir, file_path) 62 | 63 | logging.debug('Uploading "%s" to "s3://%s/%s"' % (local_file_path, self._root, s3_file_path)) 64 | 65 | try: 66 | self._client.upload_file(os.path.join(tmp_artifact_dir, file_path), self._root, s3_file_path) 67 | except ClientError as e: 68 | raise DriverError('Upload Error: %s' % e.response['Error']['Message']) 69 | 70 | def _package_exists(self, group: str, artifact: str, version: str) -> bool: 71 | prefix = self._get_s3_file_path(group, artifact, version, '') 72 | 73 | try: 74 | res = self._client.list_objects_v2(Bucket=self._root, Prefix=prefix) 75 | except ClientError as e: 76 | if e.response['Error']['Code'] == '403': 77 | raise ReadAccessError() 78 | else: 79 | raise DriverError(e.response['Error']['Message']) 80 | 81 | return bool(res['KeyCount']) 82 | 83 | @staticmethod 84 | def _get_s3_file_path(group: str, artifact: str, version: str, file_path: str) -> str: 85 | path = group.replace('.', '/') + '/.artifacts/' + artifact + '-' + version + '/' + file_path 86 | return path 87 | -------------------------------------------------------------------------------- /darty/dependency_manager.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import os 3 | from collections import OrderedDict 4 | 5 | from darty.helpers.validation import validate_dependency_config 6 | from darty.package.dependency import Dependency 7 | from darty.package.repository import Repository 8 | from darty.settings import get_settings 9 | from darty.utils import file_exists 10 | 11 | 12 | class DependencyManager(object): 13 | """This class reads, maintains and indexes all the known dependencies in the project.""" 14 | 15 | DEFAULT_CONFIG_FILE = 'darty.yaml' 16 | DEFAULT_DARTY_PROFILE = 'default' 17 | 18 | def __init__(self, config_path: str = None, darty_profile: str = None): 19 | if not config_path: 20 | config_path = self.DEFAULT_CONFIG_FILE 21 | 22 | if not darty_profile: 23 | darty_profile = self.DEFAULT_DARTY_PROFILE 24 | 25 | # check that a configuration file exists 26 | if not file_exists(config_path): 27 | raise ValueError('Configuration file "%s" was not found.' % config_path) 28 | 29 | # get packages directory 30 | settings = get_settings(darty_profile) 31 | packages_dir = os.path.expanduser(settings['packages_dir']) 32 | 33 | # read a config file 34 | with open(config_path, 'r') as f: 35 | config = yaml.load(f) 36 | 37 | config = validate_dependency_config(config) 38 | 39 | project_dir = os.path.dirname(config_path) # project directory 40 | 41 | if 'repositories' not in config or not len(config['repositories']): 42 | raise ValueError('Repositories are not specified') 43 | 44 | if 'dependencies' not in config or not len(config['dependencies']): 45 | raise ValueError('Dependencies are not specified') 46 | 47 | # creating repositories objects 48 | repositories = {} 49 | for rep_name, rep_config in config['repositories'].items(): 50 | try: 51 | repositories[rep_name] = Repository(rep_config) 52 | except ValueError as e: 53 | raise ValueError('Repository "%s": %s' % (rep_name, str(e))) 54 | 55 | # creating dependency objects 56 | self._dependencies = OrderedDict() 57 | 58 | for i, dep_config in enumerate(config['dependencies']): 59 | # get repository object 60 | repository_name = dep_config['repository'] if 'repository' in dep_config else 'default' 61 | if repository_name not in repositories: 62 | raise ValueError('Repository "%s" doesn\'t exist' % repository_name) 63 | 64 | # create an object for dependency 65 | try: 66 | dependency = Dependency(dep_config, repositories[repository_name], packages_dir, project_dir) 67 | except ValueError as e: 68 | raise ValueError('Dependency #%d: %s' % (i + 1, str(e))) 69 | 70 | key = self._get_dependency_key(dependency.group, dependency.artifact) 71 | 72 | if key in self._dependencies: 73 | raise ValueError('Config contains two dependencies with the same name: "%s:%s"' 74 | % (dependency.group, dependency.artifact)) 75 | 76 | self._dependencies[key] = dependency 77 | 78 | # TODO: 79 | # check that dependencies with the same working directories 80 | # always contain "files" parameter and files are not overlapped 81 | 82 | @property 83 | def dependencies(self): 84 | return self._dependencies 85 | 86 | @classmethod 87 | def from_py_package(cls, package_name: str, config_path: str = None, darty_profile: str = None): 88 | """Creates an instance of DependencyManager by Python package name. 89 | It automatically finds a path to dependency file within a Python package. 90 | 91 | :param package_name: 92 | :param config_path: 93 | :param darty_profile: 94 | :return: 95 | """ 96 | import pkg_resources 97 | 98 | if not config_path: 99 | config_path = cls.DEFAULT_CONFIG_FILE 100 | 101 | return cls(pkg_resources.resource_filename(package_name, config_path), darty_profile) 102 | 103 | def get_path(self, group: str, artifact: str, file_path: str = None): 104 | dependency = self.get_dependency_by_name(group, artifact) 105 | if not dependency: 106 | raise ValueError('The package "%s:%s" was not found in the configuration file' % (group, artifact)) 107 | 108 | return dependency.get_path(file_path) 109 | 110 | def get_dependency_by_name(self, group: str, artifact: str) -> Dependency: 111 | """Returns dependency object by group and artifact name or "None" if the dependency is not specified. 112 | 113 | :param group: 114 | :param artifact: 115 | :return: Dependency 116 | """ 117 | name = self._get_dependency_key(group, artifact) 118 | if name not in self._dependencies: 119 | return None 120 | 121 | return self._dependencies[name] 122 | 123 | def search_dependency_by_artifact(self, artifact: str) -> list: 124 | """Finds dependencies by artifact name. 125 | 126 | :param artifact: 127 | :return: [Dependency] 128 | """ 129 | res = [] 130 | for key, dependency in self._dependencies.items(): 131 | if dependency.artifact == artifact: 132 | res.append(dependency) 133 | 134 | return res 135 | 136 | def _get_dependency_key(self, group, artifact): 137 | """Returns a unique key for dependency for faster lookups.""" 138 | return group + '.' + artifact 139 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) Copyright © 2018 Zalando SE, https://tech.zalando.com 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | documentation files (the “Software”), to deal in the Software without restriction, including without limitation the 5 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 6 | persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the 9 | Software. 10 | 11 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 12 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 14 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | 17 | License file for "darty/drivers/s3/" code: 18 | 19 | Copyright 2013-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. 20 | 21 | Licensed under the Apache License, Version 2.0 (the "License"). You 22 | may not use this file except in compliance with the License. A copy of 23 | the License is located at 24 | 25 | http://aws.amazon.com/apache2.0/ 26 | 27 | or in the "license" file accompanying this file. This file is 28 | distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 29 | ANY KIND, either express or implied. See the License for the specific 30 | language governing permissions and limitations under the License. 31 | 32 | 33 | Apache License 34 | 35 | Version 2.0, January 2004 36 | 37 | http://www.apache.org/licenses/ 38 | 39 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 40 | 41 | 1. Definitions. 42 | 43 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 44 | of this document. 45 | 46 | "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. 47 | 48 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are 49 | under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or 50 | indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of 51 | fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. 52 | 53 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. 54 | 55 | "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, 56 | documentation source, and configuration files. 57 | 58 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including 59 | but not limited to compiled object code, generated documentation, and conversions to other media types. 60 | 61 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as 62 | indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix 63 | below). 64 | 65 | "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work 66 | and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an 67 | original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain 68 | separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. 69 | 70 | "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or 71 | additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the 72 | Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. 73 | For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to 74 | the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code 75 | control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of 76 | discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in 77 | writing by the copyright owner as "Not a Contribution." 78 | 79 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received 80 | by Licensor and subsequently incorporated within the Work. 81 | 82 | 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to 83 | You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare 84 | Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works 85 | in Source or Object form. 86 | 87 | 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a 88 | perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent 89 | license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license 90 | applies only to those patent claims licensable by such Contributor that are necessarily infringed by their 91 | Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was 92 | submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) 93 | alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent 94 | infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date 95 | such litigation is filed. 96 | 97 | 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with 98 | or without modifications, and in Source or Object form, provided that You meet the following conditions: 99 | 100 | a. You must give any other recipients of the Work or Derivative Works a copy of this License; and 101 | 102 | b. You must cause any modified files to carry prominent notices stating that You changed the files; and 103 | 104 | c. You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, 105 | trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to 106 | any part of the Derivative Works; and 107 | 108 | d. If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You 109 | distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding 110 | those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: 111 | within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if 112 | provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever 113 | such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do 114 | not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, 115 | alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices 116 | cannot be construed as modifying the License. 117 | 118 | You may add Your own copyright statement to Your modifications and may provide additional or different license terms 119 | and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a 120 | whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated 121 | in this License. 122 | 123 | 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for 124 | inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any 125 | additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any 126 | separate license agreement you may have executed with Licensor regarding such Contributions. 127 | 128 | 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product 129 | names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and 130 | reproducing the content of the NOTICE file. 131 | 132 | 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and 133 | each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 134 | express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, 135 | MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of 136 | using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 137 | 138 | 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or 139 | otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, 140 | shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or 141 | consequential damages of any character arising as a result of this License or out of the use or inability to use the 142 | Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any 143 | and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such 144 | damages. 145 | 146 | 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose 147 | to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights 148 | consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your 149 | sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each 150 | Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your 151 | accepting any such warranty or additional liability. 152 | 153 | END OF TERMS AND CONDITIONS 154 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Darty 2 | 3 | __Darty__ is a data dependency manager for data science projects. 4 | It helps to share data across projects and control data versions. 5 | 6 | ## Getting Started 7 | 8 | ### Installation 9 | 10 | Requirements: 11 | - Python 3 12 | - Installed and configured [AWS CLI](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) 13 | - Bucket on S3 to publish your packages 14 | 15 | Use [pip](http://www.pip-installer.org/en/latest/) to install or upgrade Darty: 16 | 17 | ```bash 18 | $ pip install -U darty 19 | ``` 20 | 21 | ### Dependency File 22 | 23 | To manage data dependencies the project should contain a configuration file. By default 24 | Darty is looking for a `darty.yaml` file. 25 | 26 | Example of a dependency file: 27 | 28 | ```yaml 29 | repositories: 30 | default: 31 | type: s3_zip 32 | root: sapphire-data-packages 33 | 34 | dependencies: 35 | - group: entity_detection.lexicons 36 | artifact: lexicons-en 37 | version: 1.1.0 38 | workingDir: data/lexicons/en 39 | 40 | - group: entity_detection.lexicons 41 | artifact: lexicons-de 42 | version: 1.0.0 43 | workingDir: data/lexicons/de 44 | ``` 45 | 46 | The file contains a list of repositories and a list of dependencies. Each dependency belongs to particular repository. 47 | By default all dependencies belong to the "default" repository. 48 | 49 | __“repositories”__ section is a dictionary of repositories where keys are names of repositories and values are 50 | configurations. 51 | 52 | Repository configuration: 53 | - __“type”__: name of the repository driver (see "[Darty Drivers](#darty-drivers)" section) 54 | - __“root”__: unique identificator inside the repository. Meaning of this value is different for 55 | different types of repositories. For Amazon S3 it’s a bucket name. 56 | - __“parameters”__: custom parameters for the repository driver. 57 | 58 | __“dependencies”__ section is a list of elements where each element contains dependency configuration. 59 | 60 | Dependency configuration: 61 | - __“group”__: the id of the data group. 62 | - __“artifact”__: the name of the package under the specified group. 63 | - __“version”__: the version of the package under the specified group. 64 | - __“workingDir”__ _(optional)_: this directory is being used to work with package files directly from the project 65 | and to publish the new version of the package. 66 | - __“files”__ _(optional)_: list of files which belong to the package. It can be used to specify particular files 67 | which should be copied to the working directory ignoring other ones. Or it can be used to scope the list of files 68 | which you want to publish inside new version of the package. 69 | - __“repository”__ _(optional)_: the name of the repository where the package is located (by default 70 | it has value "default", then the "default" repository must be specified) 71 | 72 | ##### Shared Working Directory 73 | 74 | You can use the same working directory for several dependencies, but in this case 75 | you must use __“files”__ setting. You must specify the list of files for each dependency 76 | which you are going to use. The files across dependencies which are sharing 77 | the same working directory cannot have the same filenames. 78 | 79 | 80 | ### Publishing Data Package 81 | 82 | 1. Create a working directory for the data you want to publish. For example, `data/my_word_vectors/`. 83 | 2. Create the `darty.yaml` file in the project's root folder: 84 | ```yaml 85 | repositories: 86 | default: 87 | type: s3_zip # name of the driver for S3 88 | root: my-data-packages # bucket name 89 | 90 | dependencies: 91 | - group: datasets.word_vectors # package group name 92 | artifact: my-word-vectors # package artifact name 93 | version: 1.0.0 # package version 94 | workingDir: data/my_word_vectors # package working directory 95 | ``` 96 | 97 | 3. Make sure you've configured AWS CLI and have an access to the bucket. 98 | 4. Go to the project’s directory in the terminal and run the following command: 99 | ```bash 100 | $ darty publish 101 | ``` 102 | 103 | If everything was configured correctly you will see the message that the package was successfully 104 | published and the list of published files. 105 | 106 | 107 | ##### Local Publishing 108 | 109 | Package can be published locally. Then it will be available for all your local projects. 110 | Use the `publish-local` command. To rewrite existing version of local package, use __"-r"__ flag. 111 | 112 | ```bash 113 | $ darty publish-local 114 | ``` 115 | 116 | ### Downloading Dependencies 117 | 118 | Tu get all the dependencies specified in the configuration file, use the following command: 119 | 120 | ```bash 121 | $ darty update 122 | ``` 123 | 124 | __Note:__ if the working directory for the dependency is not empty, it __will not__ be updated. 125 | To rewrite working directories for the dependencies, use __"-r"__ flag. 126 | 127 | By default Darty is looking for a `darty.yaml` file in the current directory, but you can specify 128 | the path to your dependency file using __"-c"__ flag: 129 | 130 | ```bash 131 | $ darty update -c path/to/project/config.yaml 132 | ``` 133 | 134 | Also you can get only particular dependency from the list by specifying a group and an artifact name 135 | or just an artifact name: 136 | 137 | ```bash 138 | $ darty update --group {{package_group}} --artifact {{package_artifact}} 139 | $ darty update --artifact {{package_artifact}} 140 | ``` 141 | 142 | 143 | ## Integration with a Python Project 144 | 145 | Integration of Darty with your project would be helpful if: 146 | - you want to distribute your Python package, and the data should be outside of this package 147 | - you have a heavy dataset and you don't want to have the second copy in the dependency working directory 148 | (the first copy Darty always caches in the `~/.darty` directory and it's for a read-only access) 149 | 150 | Integration steps: 151 | 152 | 1. Move the `darty.yaml` file from the project root directory inside your Python package. Don’t forget to rewrite 153 | all relative paths for working directories inside the file. 154 | 155 | 2. Create an instance of __DependencyManager__ in the **{{package_name}}/\_\_init\_\_.py** file: 156 | 157 | ```python 158 | from darty.dependency_manager import DependencyManager 159 | DM = DependencyManager.from_py_package(__package__) 160 | ``` 161 | 162 | 3. Use __get_path()__ method to get path to your data package: 163 | 164 | ```python 165 | from {{package_name}} import DM 166 | lexicons_path = DM.get_path('entity_detection.lexicons', 'lexicons-en', file_path='en-curated-color') 167 | ``` 168 | 169 | __Note:__ the __get_path()__ method is trying to find the files in the working directory if the directory exists. 170 | If it doesn't exist or it's empty, the method will return the absolute path to the data package. 171 | 172 | Python package distribution: 173 | 174 | 1. Add the path to the `darty.yaml` file to the __setup.py__ script: 175 | 176 | ```python 177 | setup(name='{{package_name}}', 178 | ... 179 | package_data={'{{package_name}}': [ 180 | 'darty.yaml', 181 | ... 182 | ]}) 183 | ``` 184 | 185 | 2. Now if user installed your Python package with __pip__, he can get the data dependencies 186 | using the following command: 187 | 188 | ```bash 189 | $ darty download --py-package {{package_name}} 190 | ``` 191 | 192 | Because you are using __"download"__ command and not __"update"__, the working directories 193 | for data dependencies **_will not_** be created, and the application will access files using 194 | absolute paths. 195 | 196 | 197 | ## Darty Configuration 198 | 199 | Darty keeps its configuration in the `~/.darty/config` file. 200 | 201 | To change default settings use the following command: 202 | 203 | ``` 204 | $ darty [-p ] configure 205 | ``` 206 | 207 | If you didn't specify a configuration profile name, name __"default"__ will be used by default. 208 | 209 | At the moment the command allows you to configure only the directory where data packages will be 210 | saved locally. By default, it's the directory `~/.darty/packages/`. 211 | 212 | 213 | ## Darty Drivers 214 | 215 | At the moment Darty supports only AWS S3 buckets, but you can always develop your custom driver as a plugin to 216 | Darty and use it in your dependency files. 217 | 218 | This repository contains 2 S3 drivers: 219 | - __s3_files__: stores packages on S3 as a bunch of files, without packing them to a single archive, 220 | - __s3_zip__: stores packages on S3 as zip archives. 221 | 222 | 223 | ## FAQ 224 | 225 | #### 1. Where all my downloaded and locally published packages are stored? 226 | 227 | By default, all packages are stored in the `~/.darty/packages/` directory, but you can change 228 | it using the `configure` command. 229 | 230 | #### 2. What is "working directory"? 231 | 232 | You can specify a working directory for any dependency in your dependency file. Once 233 | a package is downloaded to the local central directory, the data from that package will 234 | be copied to the specified working directory. You should use working directories in 2 cases: 235 | - if you want to work with the data using relative to your project's root paths 236 | - if you are going to modify and publish a new version of the package 237 | 238 | #### 3. How to work with package data if I didn't specify a working directory? 239 | 240 | You can do it only if you're working with a Python project. See _"Integration 241 | with a Python Project"_ section. 242 | 243 | #### 4. Can I specify the same working directory for several dependencies? 244 | 245 | Yes, you can share a working directory across several packages. See 246 | _"Shared Working Directory"_ section. 247 | 248 | #### 5. If I have changes in a dependency working directory and I call the update command, will I loose my changes? 249 | 250 | No, you won't. All the files in the working directory will remain unchanged. Only if you 251 | called the command with the `-r` flag, they will be overwritten. 252 | 253 | If you have a list of files for the dependency specified and you call update command, 254 | the existing files in the working directory will remain unchanged, but those ones which 255 | didn't exist in the directory before will be appended. 256 | 257 | #### 6. How can I resolve the conflict if I've made some changes to the package, but another person just published newer version of this package? 258 | 259 | Follow the steps: 260 | 1. Rename the current working directory for the package to something temporary. 261 | 2. Change the version of the package in the __darty.yaml__ file to the latest one. 262 | 3. Perform `darty update` command. It will download the latest package and 263 | create a new working directory with the latest data. 264 | 4. Move your changes to newly created directory and remove the temporary one. 265 | 5. Now you can publish a new version of the package. 266 | 267 | 268 | ## TODO 269 | 270 | - add "author" (dict: "name", "email") field for dependency configuration 271 | - wildcards for files in the dependency configuration 272 | - "show" command similar to [pip show](https://pip.pypa.io/en/stable/reference/pip_show/) 273 | - command to get package versions from repository (show -i 1 -f (to show also files), -v (to 274 | show also available versions in repository). Returns information about installed package (env 275 | flag, without files by default; or print "Not installed") 276 | - "clean" command to remove locally published package 277 | - "compare" command to compare current working directory with particular version of the package 278 | - add "message" parameter for the "publish" command and save it to package metadata 279 | 280 | 281 | ## Contributing 282 | Darty welcomes contributions from the open source community. To get started, take a look at our 283 | [contributing guidelines](CONTRIBUTING.md), then check the [Issues Tracker](https://github.com/zalando-incubator/darty/issues) for ideas. 284 | 285 | 286 | ## Contact 287 | Feel free to contact one the [maintainers](MAINTAINERS). 288 | 289 | 290 | ## License 291 | See the [LICENSE](LICENSE) file. 292 | -------------------------------------------------------------------------------- /tests/test_dependency.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | from darty.package.dependency import Dependency 4 | from darty.package.repository import Repository 5 | from darty.utils import file_exists, dir_exists, list_dir_files 6 | from shutil import rmtree 7 | 8 | 9 | class TestDependency(unittest.TestCase): 10 | 11 | PROJECT_DIR = os.path.join(os.path.dirname(__file__), 'data', 'test_project_dir') 12 | PACKAGES_DIR = os.path.join(os.path.dirname(__file__), 'data', 'test_packages_dir') 13 | 14 | REPOSITORY_TYPE = 'test' 15 | REPOSITORY_ROOT = 'test_root' 16 | REPOSITORY_DIR = os.path.join(os.path.dirname(__file__), 'data', 'test_repository') 17 | 18 | @classmethod 19 | def _get_dependency(cls, config: dict): 20 | return Dependency(config, Repository({ 21 | 'type': cls.REPOSITORY_TYPE, 22 | 'root': cls.REPOSITORY_ROOT, 23 | 'parameters': { 24 | 'local_dir': cls.REPOSITORY_DIR 25 | } 26 | }), cls.PACKAGES_DIR, cls.PROJECT_DIR) 27 | 28 | def test_get_path(self): 29 | dep_installed = self._get_dependency({ 30 | 'group': 'group1.subgroup1', 31 | 'artifact': 'artifact1', 32 | 'version': '1.0', 33 | }) 34 | 35 | dep_not_installed = self._get_dependency({ 36 | 'group': 'group1.subgroup1', 37 | 'artifact': 'artifact1-not-installed', 38 | 'version': '1.0', 39 | }) 40 | 41 | dep_working_dir_exists = self._get_dependency({ 42 | 'group': 'group1.subgroup1', 43 | 'artifact': 'artifact1', 44 | 'version': '1.0', 45 | 'workingDir': 'working_dir1', 46 | }) 47 | 48 | dep_working_dir_doesnt_exist = self._get_dependency({ 49 | 'group': 'group1.subgroup1', 50 | 'artifact': 'artifact1', 51 | 'version': '1.0', 52 | 'workingDir': 'working_dir_doesnt_exist', 53 | }) 54 | 55 | dep_files = self._get_dependency({ 56 | 'group': 'group1.subgroup1', 57 | 'artifact': 'artifact1', 58 | 'version': '1.0', 59 | 'workingDir': 'working_dir1', 60 | 'files': [ 61 | 'file1.txt', 62 | 'file2.txt', 63 | 'subdir1/file1.txt', 64 | ] 65 | }) 66 | 67 | dep_repository_dir = os.path.join(self.PACKAGES_DIR, self.REPOSITORY_TYPE, self.REPOSITORY_ROOT) 68 | dependency_data_dir = os.path.join(dep_repository_dir, 'group1', 'subgroup1', '.artifacts', 'artifact1-1.0', 'data') 69 | working_dir = os.path.join(self.PROJECT_DIR, 'working_dir1') 70 | 71 | """ TEST PATHS TO A PACKAGE DIRECTORY """ 72 | 73 | # package is not installed 74 | with self.assertRaises(ValueError): 75 | dep_not_installed.get_path() 76 | 77 | # package without working directory 78 | path = dep_installed.get_path() 79 | self.assertEqual(path, dependency_data_dir) 80 | 81 | """ A WORKING DIRECTORY IS SPECIFIED IN THE DEPENDENCY CONFIGURATION """ 82 | 83 | # the working directory exists 84 | path = dep_working_dir_exists.get_path() 85 | self.assertEqual(path, working_dir) 86 | 87 | # the working directory doesn't exists 88 | path = dep_working_dir_doesnt_exist.get_path() 89 | self.assertEqual(path, dependency_data_dir) 90 | 91 | """ TEST PATHS TO A PACKAGE FILE """ 92 | 93 | # package is not installed 94 | with self.assertRaises(ValueError): 95 | dep_not_installed.get_path('file1.txt') 96 | 97 | # package without working directory 98 | path = dep_installed.get_path('subdir1/file1.txt') 99 | self.assertEqual(path, os.path.join(dependency_data_dir, 'subdir1', 'file1.txt')) 100 | 101 | """ A WORKING DIRECTORY IS SPECIFIED IN THE DEPENDENCY CONFIGURATION """ 102 | 103 | # package with working directory, the file exists 104 | path = dep_working_dir_exists.get_path('subdir1/file1.txt') 105 | self.assertEqual(path, os.path.join(working_dir, 'subdir1', 'file1.txt')) 106 | 107 | # the file exists (windows format path) 108 | path = dep_working_dir_exists.get_path('subdir1\\file1.txt') 109 | self.assertEqual(path, os.path.join(working_dir, 'subdir1', 'file1.txt')) 110 | 111 | # the file doesn't exists in the working directory 112 | path = dep_working_dir_exists.get_path('file2.txt') 113 | self.assertEqual(path, os.path.join(dependency_data_dir, 'file2.txt')) 114 | 115 | # the working directory doesn't exists 116 | path = dep_working_dir_doesnt_exist.get_path('subdir1/file1.txt') 117 | self.assertEqual(path, os.path.join(dependency_data_dir, 'subdir1', 'file1.txt')) 118 | 119 | # the file doesn't exists in the package 120 | with self.assertRaises(FileNotFoundError): 121 | dep_working_dir_exists.get_path('file_doesnt_exist.txt') 122 | 123 | """ A FILES LIST IS SPECIFIED IN THE DEPENDENCY CONFIGURATION """ 124 | 125 | # the file is in the list, the file exists in the working directory 126 | path = dep_files.get_path('subdir1/file1.txt') 127 | self.assertEqual(path, os.path.join(working_dir, 'subdir1', 'file1.txt')) 128 | 129 | # the file doesn't exists in the working directory 130 | path = dep_files.get_path('file2.txt') 131 | self.assertEqual(path, os.path.join(dependency_data_dir, 'file2.txt')) 132 | 133 | # package with files, the file is not in the list 134 | with self.assertRaises(ValueError): 135 | dep_files.get_path('file_not_in_list.txt') 136 | 137 | def test_publish_and_update(self): 138 | dep_without_working_dir = self._get_dependency({ 139 | 'group': 'group1.subgroup1', 140 | 'artifact': 'artifact1', 141 | 'version': '1.1', 142 | }) 143 | 144 | dep_to_publish = self._get_dependency({ 145 | 'group': 'group1.subgroup1', 146 | 'artifact': 'artifact1', 147 | 'version': '1.1', 148 | 'workingDir': 'working_dir1', 149 | }) 150 | 151 | dep_to_update = self._get_dependency({ 152 | 'group': 'group1.subgroup1', 153 | 'artifact': 'artifact1', 154 | 'version': '1.1', 155 | 'workingDir': 'working_dir_update', 156 | }) 157 | 158 | # dependency paths 159 | group_dir = os.path.join(self.PACKAGES_DIR, self.REPOSITORY_TYPE, self.REPOSITORY_ROOT, 'group1', 'subgroup1') 160 | 161 | installation_dir = os.path.join(group_dir, '.artifacts', 'artifact1-1.1') 162 | installation_data_dir = os.path.join(installation_dir, 'data') 163 | local_installation_dir = os.path.join(group_dir, '.local-artifacts', 'artifact1-1.1') 164 | local_installation_data_dir = os.path.join(local_installation_dir, 'data') 165 | 166 | working_dir = os.path.join(self.PROJECT_DIR, 'working_dir1') 167 | working_dir_update = os.path.join(self.PROJECT_DIR, 'working_dir_update') 168 | 169 | # path to local repository (using TestDriver) 170 | rep_root_dir = os.path.join(self.REPOSITORY_DIR, self.REPOSITORY_ROOT) 171 | rep_artifact_dir = os.path.join(rep_root_dir, 'group1', 'subgroup1', 'artifact1-1.1') 172 | 173 | """ PREPARE THE TEST """ 174 | 175 | # clear installed package 176 | if dir_exists(installation_dir): 177 | rmtree(installation_dir) 178 | 179 | # clear locally installed package 180 | if dir_exists(local_installation_dir): 181 | rmtree(local_installation_dir) 182 | 183 | # clear package in the repository 184 | if dir_exists(rep_root_dir): 185 | rmtree(rep_root_dir) 186 | 187 | # clear "update" working directory 188 | if dir_exists(working_dir_update): 189 | rmtree(working_dir_update) 190 | 191 | # check that the right paths are returned 192 | self.assertEqual(dep_to_publish.get_path(), working_dir) 193 | 194 | with self.assertRaises(ValueError): 195 | dep_without_working_dir.get_path() 196 | 197 | with self.assertRaises(ValueError): 198 | dep_to_update.get_path() 199 | 200 | """ PUBLISHING LOCALLY """ 201 | 202 | # publish repository locally 203 | dep_to_publish.publish(local=True) 204 | self.assertEqual(dep_without_working_dir.get_path(), local_installation_data_dir) 205 | 206 | # should return False because the package already published locally 207 | res = dep_to_publish.publish(local=True) 208 | self.assertFalse(res) 209 | 210 | # rewrite locally published package 211 | dep_to_publish.publish(local=True, rewrite_local=True) 212 | self.assertEqual(dep_without_working_dir.get_path(), local_installation_data_dir) 213 | 214 | # remove locally installed package 215 | rmtree(local_installation_dir) 216 | with self.assertRaises(ValueError): 217 | dep_without_working_dir.get_path() 218 | 219 | """ PUBLISHING TO THE TEST REPOSITORY """ 220 | 221 | # publish repository 222 | dep_to_publish.publish() 223 | 224 | files_to_check = [ 225 | 'info.json', 226 | os.path.join('data', 'file1.txt'), 227 | os.path.join('data', 'subdir1', 'file1.txt'), 228 | ] 229 | for file_path in files_to_check: 230 | self.assertTrue(file_exists(os.path.join(rep_artifact_dir, file_path)), 231 | 'File "%s" doesn\'t exist in the repository' % file_path) 232 | 233 | self.assertEqual(dep_without_working_dir.get_path(), installation_data_dir) 234 | 235 | # should return False because the package already exists in the repository 236 | res = dep_to_publish.publish() 237 | self.assertFalse(res) 238 | 239 | # check that the right paths are returned 240 | self.assertEqual(dep_to_publish.get_path(), working_dir) 241 | self.assertEqual(dep_to_update.get_path(), installation_data_dir) 242 | 243 | # remove the installed package 244 | rmtree(installation_dir) 245 | with self.assertRaises(ValueError): 246 | dep_to_update.get_path() 247 | 248 | """ DOWNLOAD THE PACKAGE """ 249 | 250 | # download the package 251 | dep_to_update.download() 252 | self.assertEqual(dep_to_update.get_path(), installation_data_dir) 253 | 254 | # remove the installed package again 255 | rmtree(installation_dir) 256 | with self.assertRaises(ValueError): 257 | dep_to_update.get_path() 258 | 259 | """ UPDATE THE DEPENDENCY """ 260 | 261 | # update the dependency (working directory should be created) 262 | dep_to_update.update() 263 | self.assertEqual(dep_to_update.get_path(), working_dir_update) 264 | 265 | # remove the package after test 266 | rmtree(installation_dir) 267 | rmtree(rep_root_dir) 268 | rmtree(working_dir_update) 269 | 270 | def test_publish_files(self): 271 | dep_without_working_dir = self._get_dependency({ 272 | 'group': 'group1.subgroup1', 273 | 'artifact': 'artifact1', 274 | 'version': '1.1', 275 | }) 276 | 277 | dep_to_publish = self._get_dependency({ 278 | 'group': 'group1.subgroup1', 279 | 'artifact': 'artifact1', 280 | 'version': '1.1', 281 | 'workingDir': 'working_dir1', 282 | 'files': [ 283 | 'subdir1/file1.txt', 284 | ] 285 | }) 286 | 287 | dep_file_doesnt_exist = self._get_dependency({ 288 | 'group': 'group1.subgroup1', 289 | 'artifact': 'artifact1', 290 | 'version': '1.1', 291 | 'workingDir': 'working_dir1', 292 | 'files': [ 293 | 'file_doesnt_exist.txt', 294 | ] 295 | }) 296 | 297 | group_dir = os.path.join(self.PACKAGES_DIR, self.REPOSITORY_TYPE, self.REPOSITORY_ROOT, 'group1', 'subgroup1') 298 | local_installation_dir = os.path.join(group_dir, '.local-artifacts', 'artifact1-1.1') 299 | 300 | # clear locally installed package 301 | if dir_exists(local_installation_dir): 302 | rmtree(local_installation_dir) 303 | 304 | # publishing file doesn't exist in the working directory 305 | res = dep_file_doesnt_exist.publish(local=True) 306 | self.assertFalse(res) 307 | 308 | # publish package locally 309 | dep_to_publish.publish(local=True) 310 | 311 | # TODO: fix paths logic and tests for Windows 312 | published_files = list(list_dir_files(dep_without_working_dir.get_path())) 313 | self.assertEqual(published_files, dep_to_publish.files) 314 | 315 | # remove the package after test 316 | rmtree(local_installation_dir) 317 | 318 | 319 | if __name__ == '__main__': 320 | unittest.main() 321 | -------------------------------------------------------------------------------- /darty/package/dependency.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from collections import OrderedDict 4 | from shutil import rmtree 5 | from darty.output_writer import AbstractOutputWriter, NullOutputWriter 6 | from darty.package.package_info import PackageInfo 7 | from darty.package.repository import Repository 8 | from darty.package.validators import check_group_name, check_artifact_name, check_version_number, \ 9 | check_files_file_path 10 | from darty.utils import file_exists, dir_exists, get_dir_hash, is_dir_empty, copy_dir, copy_file, convert_path_w2u 11 | 12 | 13 | class Dependency(object): 14 | """ 15 | Key class describing a dependency and enabling to resolve a paths to the files containing the dependency's content 16 | On the current machine, a dependency is stored under: 17 | ${local_cache}/${repository_type}/${repository_root}/${group_name}/${development_stage}/$(${artifact_name}+'-'+${version}) 18 | during update 19 | """ 20 | 21 | ENV_PRODUCTION = '.artifacts' 22 | ENV_LOCAL = '.local-artifacts' 23 | ENV_TMP = '.tmp-artifacts' 24 | 25 | def __init__(self, config: dict, repository: Repository, packages_dir: str, project_dir: str): 26 | 27 | self.group = config.get('group', '') 28 | self.artifact = config.get('artifact', '') 29 | self.version = config.get('version', '') 30 | self.working_dir = config.get('workingDir', None) 31 | self.files = config.get('files', None) 32 | self.default_file = config.get('defaultFile', None) 33 | self.name = config.get('name', '') 34 | self.description = config.get('description', '') 35 | 36 | self.repository = repository 37 | self.packages_dir = packages_dir 38 | self.project_dir = project_dir 39 | 40 | # check group name 41 | if not self.group: 42 | raise ValueError('Group name must be specified') 43 | if not check_group_name(self.group): 44 | raise ValueError('Group name has invalid format') 45 | 46 | # check artifact name 47 | if not self.artifact: 48 | raise ValueError('Artifact name must be specified') 49 | if not check_artifact_name(self.artifact): 50 | raise ValueError('Artifact name has invalid format') 51 | 52 | # check version number 53 | if not self.version: 54 | raise ValueError('Version number must be specified') 55 | if not check_version_number(self.version): 56 | raise ValueError('Version number has invalid format') 57 | 58 | # check filenames 59 | if self.files: 60 | for file_path in self.files: 61 | if not file_path: 62 | raise ValueError('Path cannot be empty') 63 | if not check_files_file_path(file_path): 64 | raise ValueError('Path "%s" has invalid format' % file_path) 65 | 66 | @property 67 | def group_dir(self): 68 | """Returns absolute local path to the directory where all of a group's artifacts are stored.""" 69 | return os.path.join(self.packages_dir, self.repository.type, self.repository.root, os.path.join(*self.group.split('.'))) 70 | 71 | @property 72 | def artifact_archive_path(self): 73 | """Path to artifact's archive.""" 74 | return os.path.join(self.group_dir, self.artifact + '-' + self.version + '.zip') 75 | 76 | def get_artifacts_dir(self, env=ENV_PRODUCTION): 77 | """Directory where all artifacts are unpacked.""" 78 | return os.path.join(self.group_dir, env) 79 | 80 | def get_artifact_dir(self, env=ENV_PRODUCTION): 81 | """Directory for particular unpacked artifact; contains the data directory and the info.json file""" 82 | return os.path.join(self.get_artifacts_dir(env), self.artifact + '-' + self.version) 83 | 84 | def get_artifact_data_dir(self, env=ENV_PRODUCTION): 85 | """Directory containing the actual data of a particular unpacked artifact.""" 86 | return os.path.join(self.get_artifact_dir(env), 'data') 87 | 88 | def get_artifact_info_path(self, env=ENV_PRODUCTION): 89 | """Path to artifact's info.json file.""" 90 | return os.path.join(self.get_artifact_dir(env), 'info.json') 91 | 92 | def get_package_info(self): 93 | """Returns a package info if package is published locally or downloaded.""" 94 | package_info = None 95 | 96 | for env in (self.ENV_LOCAL, self.ENV_PRODUCTION): 97 | info_path = self.get_artifact_info_path(env) 98 | if file_exists(info_path): 99 | with open(info_path) as f: 100 | info = json.load(f) 101 | 102 | package_info = PackageInfo(info, (env == self.ENV_LOCAL)) 103 | break 104 | 105 | return package_info 106 | 107 | def get_path(self, file_path: str = None): 108 | """Returns a path to the package directory 109 | or to a particular file from the package. 110 | 111 | If you are getting a path to a package directory (without specifying "file_path"): 112 | - if a working directory for the dependency is not specified, the absolute path 113 | for the central package directory will be returned 114 | - if the dependency configuration specifies a working directory and that 115 | directory exists and is not empty, the method will return a path to that working directory 116 | - if a working directory is specified, but is empty, the absolute path for the 117 | central package directory will be returned 118 | 119 | If you are getting a path to a file within the package ("file_path" is specified): 120 | - if a working directory for the dependency is not specified, the absolute path 121 | for the central directory will be returned 122 | - if the dependency configuration specifies only a working directory (without a files list) 123 | and the file exists in the working directory, the method will return that path 124 | - if the file doesn't exist in the working directory, the method will try to get 125 | the absolute path to the file from the central directory 126 | - if the dependency configuration also specifies a files list, a file list must 127 | contain the requested file path, otherwise the exception will be raised 128 | 129 | :param file_path: get a path to a particular file within the package 130 | :return: str 131 | """ 132 | # convert relative path from windows format to linux one 133 | # because only linux format is accepted for file paths in "files" 134 | if file_path: 135 | file_path = convert_path_w2u(file_path) 136 | 137 | # return a working directory if "file_path" is not specified and the directory is not empty 138 | if self.working_dir and not file_path and not self.files: 139 | working_dir = os.path.normpath(os.path.join(self.project_dir, self.working_dir)) 140 | if not is_dir_empty(working_dir): 141 | return working_dir 142 | 143 | # raise an error if "file_path" specified, but doesn't exist in the list of working files 144 | if self.working_dir and file_path and self.files and (file_path not in self.files): 145 | raise ValueError('File "%s" is not a part of the package "%s:%s"' 146 | % (file_path, self.group, self.artifact)) 147 | 148 | # return file path from a working directory if "file_path" is specified and the file exists 149 | if self.working_dir and file_path: 150 | res_path = os.path.normpath(os.path.join(self.project_dir, self.working_dir, file_path)) 151 | if file_exists(res_path): 152 | return res_path 153 | 154 | # otherwise return absolute path 155 | package_info = self.get_package_info() 156 | if not package_info: 157 | raise ValueError('Package "%s:%s:%s" is not installed' % (self.group, self.artifact, self.version)) 158 | 159 | # check that the file exists in the package 160 | if file_path and (file_path not in package_info.files): 161 | raise FileNotFoundError('File "%s" doesn\'t exist in the package "%s:%s:%s"' 162 | % (file_path, self.group, self.artifact, self.version)) 163 | 164 | # get package data directory 165 | env = Dependency.ENV_LOCAL if package_info.local else Dependency.ENV_PRODUCTION 166 | data_dir = self.get_artifact_data_dir(env) 167 | 168 | # package path or file path 169 | res_path = os.path.normpath(os.path.join(data_dir, file_path)) if file_path else data_dir 170 | 171 | return res_path 172 | 173 | def update(self, rewrite_working_dir: bool = False, output: AbstractOutputWriter = None): 174 | """Downloads the package and updates the package's working directory.""" 175 | if not output: 176 | output = NullOutputWriter() 177 | 178 | package_info = self.download(output) 179 | if not package_info or not self.working_dir: 180 | return 181 | 182 | # copy files to a working directory 183 | with output.indent(): 184 | output.write('Copying files to the working directory "%s"...' % self.working_dir) 185 | 186 | with output.indent(): 187 | # create a working directory if it doesn't exist 188 | working_dir = os.path.join(self.project_dir, self.working_dir) 189 | os.makedirs(working_dir, exist_ok=True) 190 | 191 | # get package data directory 192 | env = Dependency.ENV_LOCAL if package_info.local else Dependency.ENV_PRODUCTION 193 | data_dir = self.get_artifact_data_dir(env) 194 | 195 | if self.files: 196 | # copy only specified files if they don't exist in a target directory 197 | for filename in self.files: 198 | if filename in package_info.files: 199 | src_path = os.path.join(data_dir, filename) 200 | dst_path = os.path.join(working_dir, filename) 201 | 202 | # adding to working directory only files which don't exist 203 | if not file_exists(dst_path): 204 | copy_file(src_path, dst_path) 205 | output.write('[+] "%s": file copied' % filename) 206 | elif rewrite_working_dir: 207 | copy_file(src_path, dst_path) 208 | output.write('[+] "%s": file rewritten' % filename) 209 | else: 210 | output.write('[-] "%s": file already exists' % filename) 211 | else: 212 | output.write('[-] "%s": file doesn\'t exist in the package' % filename) 213 | else: 214 | # copy all files only if a working directory is empty 215 | if is_dir_empty(working_dir): 216 | copy_dir(data_dir, working_dir) 217 | output.write('[+] files copied to the "%s" directory' % self.working_dir) 218 | elif rewrite_working_dir: 219 | rmtree(working_dir) 220 | copy_dir(data_dir, working_dir) 221 | output.write('[+] directory "%s" was rewritten' % self.working_dir) 222 | else: 223 | output.write('[-] files not changed: directory "%s" is not empty' % self.working_dir) 224 | 225 | def publish(self, local: bool = False, rewrite_local: bool = False, output: AbstractOutputWriter = None) -> bool: 226 | """Publishes the package to the repository.""" 227 | if not output: 228 | output = NullOutputWriter() 229 | 230 | # check if the package already exists on the local machine 231 | package_info = self.get_package_info() 232 | if package_info: 233 | if not package_info.local: 234 | output.write('[-] Version "%s" already exists in the repository' % self.version) 235 | return False 236 | elif not rewrite_local: 237 | output.write( 238 | '[-] Version "%s" already exists locally. Use "-r" flag to rewrite this version.' % self.version) 239 | return False 240 | 241 | # paths for package 242 | artifact_dir = self.get_artifact_dir() 243 | local_artifact_dir = self.get_artifact_dir(Dependency.ENV_LOCAL) 244 | 245 | # build the package 246 | output.write('Building the package... ') 247 | 248 | with output.indent(): 249 | try: 250 | tmp_artifact_dir = self._build() 251 | except Exception as e: 252 | output.write('[-] ' + str(e)) 253 | return False 254 | 255 | # publish the package 256 | if local: 257 | output.write('Publishing the package locally... ') 258 | 259 | # move temporary directory to local one 260 | copy_dir(tmp_artifact_dir, local_artifact_dir) 261 | rmtree(tmp_artifact_dir) 262 | 263 | with output.indent(): 264 | output.write('[+] Package "%s:%s:%s" was successfully published locally.' % 265 | (self.group, self.artifact, self.version)) 266 | else: 267 | output.write('Publishing the package... ') 268 | 269 | with output.indent(): 270 | # upload a package 271 | driver = self.repository.driver 272 | 273 | try: 274 | driver.upload_package(self.group, self.artifact, self.version, tmp_artifact_dir, output) 275 | except Exception as e: 276 | rmtree(tmp_artifact_dir) # remove building directory 277 | output.write('[-] ' + str(e)) 278 | return False 279 | 280 | # move temporary directory to production one 281 | copy_dir(tmp_artifact_dir, artifact_dir) 282 | 283 | # remove building directory 284 | rmtree(tmp_artifact_dir) 285 | 286 | # remove local version of the same package if it exists 287 | if dir_exists(local_artifact_dir): 288 | rmtree(local_artifact_dir) 289 | 290 | output.write('[+] Package "%s:%s:%s" was successfully published.' % 291 | (self.group, self.artifact, self.version)) 292 | 293 | # show published files 294 | package_info = self.get_package_info() 295 | 296 | output.write('\nPackage files:') 297 | with output.indent(): 298 | for filename in package_info.files: 299 | output.write(filename) 300 | 301 | return True 302 | 303 | def download(self, output: AbstractOutputWriter = None): 304 | """Downloads a package. 305 | This method only puts the package to the central directory, 306 | without updating the package's working directory. 307 | """ 308 | if not output: 309 | output = NullOutputWriter() 310 | 311 | output.write('Downloading package "%s:%s:%s"... ' % (self.group, self.artifact, self.version)) 312 | 313 | with output.indent(): 314 | # check if a package is already downloaded or published locally 315 | package_info = self.get_package_info() 316 | if package_info: 317 | if package_info.local: 318 | output.write('[+] It\'s a locally published package') 319 | else: 320 | output.write('[+] The package was already downloaded') 321 | 322 | return package_info 323 | 324 | # download dependency 325 | driver = self.repository.driver 326 | tmp_artifact_dir = self.get_artifact_dir(self.ENV_TMP) 327 | os.makedirs(tmp_artifact_dir, exist_ok=True) 328 | 329 | try: 330 | driver.download_package(self.group, self.artifact, self.version, tmp_artifact_dir, output) 331 | except Exception as e: 332 | output.write('[-] ' + str(e)) 333 | return None 334 | 335 | # TODO: check "tmp_dir" contains info.json file, format is correct and a list of files matches "data" directory 336 | 337 | # move temporary directory to production one 338 | artifact_dir = self.get_artifact_dir(self.ENV_PRODUCTION) 339 | copy_dir(tmp_artifact_dir, artifact_dir) 340 | rmtree(tmp_artifact_dir) 341 | 342 | package_info = self.get_package_info() 343 | 344 | output.write('[+] The package was successfully downloaded') 345 | 346 | return package_info 347 | 348 | def _build(self): 349 | """Builds package.""" 350 | if not self.working_dir: 351 | raise ValueError('Package doesn\'t have working directory') 352 | 353 | # working directory 354 | working_dir = os.path.join(self.project_dir, self.working_dir) 355 | if not dir_exists(working_dir): 356 | raise ValueError('Working directory doesn\'t exist') 357 | 358 | # package paths 359 | artifact_dir = self.get_artifact_dir(self.ENV_TMP) 360 | info_path = self.get_artifact_info_path(self.ENV_TMP) 361 | data_dir = self.get_artifact_data_dir(self.ENV_TMP) 362 | 363 | # remove artifact directory if it exists 364 | if dir_exists(artifact_dir): 365 | rmtree(artifact_dir) 366 | 367 | # create artifact data directory 368 | os.makedirs(data_dir, exist_ok=True) 369 | 370 | # copy files to package data directory 371 | if self.files: 372 | for filename in self.files: 373 | src_path = os.path.join(working_dir, filename) 374 | if not file_exists(src_path): 375 | raise FileNotFoundError('File "%s" doesn\'t exist in the working directory' % filename) 376 | 377 | copy_file(src_path, os.path.join(data_dir, filename)) 378 | else: 379 | copy_dir(working_dir, data_dir) 380 | 381 | # get the list of copied files 382 | files = [] 383 | for cur_dir, directories, filenames in os.walk(data_dir): 384 | relative_dir = cur_dir.replace(data_dir, '').lstrip(os.sep) 385 | for filename in filenames: 386 | files.append(os.path.join(relative_dir, filename)) 387 | 388 | # create info.json file 389 | package_info = OrderedDict([ 390 | ('group', self.group), 391 | ('artifact', self.artifact), 392 | ('version', self.version), 393 | ('files', files), 394 | ('name', self.name), 395 | ('description', self.description), 396 | ('hash', get_dir_hash(data_dir)) 397 | ]) 398 | with open(info_path, 'w+') as f: 399 | json.dump(package_info, f, indent=2) 400 | 401 | return artifact_dir 402 | --------------------------------------------------------------------------------