├── tests ├── __init__.py ├── test_docker_alluxio.py ├── hash_res │ ├── workerHostnames.json │ ├── workerList.json │ ├── fileUrlWorkers.json │ └── activeNodesMap.json ├── test_fake_server.py ├── test_worker_entity.py ├── test_worker_hash_ring.py ├── test_read_range_docker.py ├── conftest.py └── test_read_range.py ├── alluxio ├── posix │ ├── __init__.py │ ├── ufs │ │ ├── __init__.py │ │ ├── oss.py │ │ └── alluxio.py │ ├── setup.py │ ├── const.py │ ├── delegate.py │ ├── exception.py │ ├── config.py │ ├── fileimpl.py │ └── delegateFs.py ├── __init__.py ├── const.py ├── worker_ring.py └── alluxio_file_system.py ├── pytest.ini ├── environment_conda.yaml ├── setup.py ├── setup.cfg ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── .github └── workflows │ └── ci.yml ├── README.md └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /alluxio/posix/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /alluxio/posix/ufs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore::DeprecationWarning 4 | -------------------------------------------------------------------------------- /tests/test_docker_alluxio.py: -------------------------------------------------------------------------------- 1 | from tests.conftest import TEST_ROOT 2 | 3 | 4 | def test_simple(fs): 5 | fs.listdir(TEST_ROOT) # no error 6 | -------------------------------------------------------------------------------- /alluxio/__init__.py: -------------------------------------------------------------------------------- 1 | from .alluxio_file_system import AlluxioAsyncFileSystem 2 | from .alluxio_file_system import AlluxioFileSystem 3 | from .alluxio_file_system import AlluxioPathStatus 4 | 5 | __all__ = ["AlluxioFileSystem", "AlluxioAsyncFileSystem", "AlluxioPathStatus"] 6 | -------------------------------------------------------------------------------- /environment_conda.yaml: -------------------------------------------------------------------------------- 1 | name: alluxio_test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - aiohttp 6 | - pytest=7.4.4 7 | - pytest-timeout 8 | - pytest-aiohttp 9 | - requests 10 | - humanfriendly 11 | - mmh3 12 | - sortedcontainers 13 | - yaml 14 | - fsspec 15 | - pip: 16 | - git+https://github.com/kragniz/python-etcd3 17 | -------------------------------------------------------------------------------- /alluxio/posix/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from setuptools import setup 3 | 4 | setup( 5 | name="alluxio_posix", 6 | version="0.1.0", 7 | packages=find_packages(), 8 | license="MIT", 9 | description="Alluxio POSIX Python SDK", 10 | author="lzq", 11 | author_email="liuzq0909@163.com", 12 | data_files=[("config", ["config/ufs_config.yaml"])], 13 | include_package_data=True, 14 | zip_safe=False, 15 | ) 16 | -------------------------------------------------------------------------------- /alluxio/posix/const.py: -------------------------------------------------------------------------------- 1 | class Constants: 2 | 3 | # general config 4 | BUCKET_NAME = "bucket_name" 5 | # URL prefix 6 | OSS_URL_PREFIX = "oss://" 7 | ALLUXIO_URL_PREFIX = "alluxio://" 8 | 9 | # enable FileSystem types 10 | LOCAL_FILESYSTEM_TYPE = "local" 11 | OSS_FILESYSTEM_TYPE = "oss" 12 | ALLUXIO_FILESYSTEM_TYPE = "alluxio" 13 | S3_FILESYSTEM_TYPE = "s3" 14 | 15 | # assist constants 16 | ALLUXIO_SEP_SIGN = "_" 17 | -------------------------------------------------------------------------------- /alluxio/posix/delegate.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from alluxio.posix import fileimpl 4 | from alluxio.posix.config import ConfigManager 5 | from alluxio.posix.delegateFs import DelegateFileSystem 6 | 7 | config_manager = ConfigManager() 8 | delegate_fs = DelegateFileSystem(config_manager) 9 | 10 | os.stat = fileimpl.stat 11 | os.open = fileimpl.open 12 | os.listdir = fileimpl.listdir 13 | os.rename = fileimpl.rename 14 | os.mkdir = fileimpl.mkdir 15 | os.remove = fileimpl.remove 16 | os.rmdir = fileimpl.rmdir 17 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from setuptools import setup 3 | 4 | setup( 5 | name="alluxio", 6 | version="0.3", 7 | packages=find_packages(exclude=["tests", "tests.*"]), 8 | include_package_data=True, 9 | zip_safe=False, 10 | install_requires=[ 11 | "aiohttp", 12 | "decorator", 13 | "humanfriendly", 14 | "requests", 15 | "etcd3", 16 | "mmh3", 17 | "sortedcontainers", 18 | "protobuf>=3.20.0,<3.21.0", 19 | ], 20 | extras_require={"tests": ["pytest", "pytest-aiohttp"]}, 21 | python_requires=">=3.8", 22 | ) 23 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | # Extra space in brackets 4 | E20, 5 | # Conflict with black 6 | E203, 7 | # Multiple spaces around "," 8 | E231,E241, 9 | # Comments 10 | E26, 11 | # Import formatting 12 | E4, 13 | # Line too long, conflict with black 14 | E501, 15 | # Comparing types instead of isinstance 16 | E721, 17 | # Assigning lambda expression 18 | E731, 19 | # Ambiguous variable names 20 | E741, 21 | # line break before binary operator 22 | W503, 23 | # line break after binary operator 24 | W504, 25 | # redefinition of unused 'loop' from line 10 26 | F811, 27 | max-line-length = 79 28 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.4.0 6 | hooks: 7 | - id: end-of-file-fixer 8 | - id: trailing-whitespace 9 | - id: requirements-txt-fixer 10 | - repo: https://github.com/psf/black 11 | rev: 22.10.0 12 | hooks: 13 | - id: black 14 | args: 15 | - --target-version=py37 16 | - --line-length=79 17 | - repo: https://github.com/pycqa/flake8 18 | rev: 6.0.0 19 | hooks: 20 | - id: flake8 21 | exclude: ^alluxio/worker_ring.py 22 | - repo: https://github.com/asottile/reorder-python-imports 23 | rev: v3.12.0 24 | hooks: 25 | - id: reorder-python-imports 26 | -------------------------------------------------------------------------------- /alluxio/posix/exception.py: -------------------------------------------------------------------------------- 1 | class ConfigMissingError(Exception): 2 | def __init__(self, config_key, message="Configuration key is missing"): 3 | self.config_key = config_key 4 | self.message = message 5 | super().__init__(f"{message}: {config_key}") 6 | 7 | 8 | class ConfigInvalidError(Exception): 9 | def __init__( 10 | self, config_key, message="Configuration key is invalid, config_key" 11 | ): 12 | self.config_key = config_key 13 | self.message = message 14 | super().__init__(f"{message}: {config_key}") 15 | 16 | 17 | class UnsupportedDelegateFileSystemError(Exception): 18 | def __init__( 19 | self, fs_name, message="FileSystem is not supported, filesystem" 20 | ): 21 | self.fs_name = fs_name 22 | self.message = message 23 | super().__init__(f"{message}: {fs_name}") 24 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to alluxio-py 2 | 3 | Thanks for your interest in **alluxio-py**. The project is to enable data access to Alluxio in Python. 4 | 5 | ## Getting Started 6 | 7 | We recommend you follow the [readme](README.md) first to get familiar with the project. 8 | Contributors would need to install the Python library and dependencies. 9 | 10 | ## Code Style 11 | 12 | The project leverages [Black](https://github.com/psf/black) as the code formatter and [reorder-python-imports](https://github.com/asottile/reorder_python_imports) to format imports. 13 | Black defaults to 88 characters per line (10% over 80), while this project still uses 80 characters per line. 14 | We recommend running the following commands before submitting pull requests. 15 | 16 | ```bash 17 | black [changed-file].py --line-length 79 18 | reorder-python-imports [changed-file].py 19 | ``` 20 | 21 | ## Testing 22 | The testing framework supports using your Alluxio endpoint, by setting the “ALLUXIO_URL” environment variable(ex. `http://127.0.0.1:28080`). If this is not set, then an alluxio worker will be spun up using docker and alluxio docker image. 23 | -------------------------------------------------------------------------------- /alluxio/posix/ufs/oss.py: -------------------------------------------------------------------------------- 1 | from alluxio.posix.const import Constants 2 | from alluxio.posix.exception import ConfigMissingError 3 | 4 | OSS_ACCESS_KEY_ID = "access_key_id" 5 | OSS_ACCESS_KEY_SECRET = "access_key_secret" 6 | OSS_ENDPOINT = "endpoint" 7 | 8 | 9 | def validate_oss_config(config): 10 | required_keys = [ 11 | OSS_ACCESS_KEY_ID, 12 | OSS_ACCESS_KEY_SECRET, 13 | OSS_ENDPOINT, 14 | Constants.BUCKET_NAME, 15 | ] 16 | 17 | for key in required_keys: 18 | if key not in config: 19 | raise ConfigMissingError(f"Missing required OSS config key: {key}") 20 | if not config[key]: 21 | raise ValueError(f"OSS config key '{key}' cannot be empty") 22 | 23 | 24 | def update_oss_config(config_data, updates): 25 | valid_keys = [ 26 | "OSS_ACCESS_KEY_ID", 27 | "OSS_ACCESS_KEY_SECRET", 28 | "OSS_ENDPOINT", 29 | Constants.BUCKET_NAME, 30 | ] 31 | for key, value in updates.items(): 32 | if key not in valid_keys: 33 | raise ValueError(f"Invalid configuration key: {key}") 34 | config_data[key] = value 35 | 36 | validate_oss_config(config_data) 37 | return config_data 38 | -------------------------------------------------------------------------------- /alluxio/posix/ufs/alluxio.py: -------------------------------------------------------------------------------- 1 | from alluxio.posix.exception import ConfigMissingError 2 | 3 | ALLUXIO_ETCD_ENABLE = "alluxio_etcd_enable" 4 | ALLUXIO_ETCD_HOST = "alluxio_etcd_host" 5 | ALLUXIO_WORKER_HOSTS = "alluxio_worker_hosts" 6 | ALLUXIO_BACKUP_FS = "alluxio_backup_fs" 7 | ALLUXIO_ENABLE = "alluxio_enable" 8 | 9 | 10 | def validate_alluxio_config(config): 11 | required_keys = [] 12 | if config.get(ALLUXIO_ETCD_ENABLE, False): 13 | required_keys.append(ALLUXIO_ETCD_HOST) 14 | else: 15 | required_keys.append(ALLUXIO_WORKER_HOSTS) 16 | 17 | if not all(config.get(key) for key in required_keys): 18 | raise ConfigMissingError( 19 | f"The following keys must be set in the configuration: {required_keys}" 20 | ) 21 | 22 | 23 | def update_alluxio_config(config_data, updates): 24 | allowed_keys = [ 25 | "ALLUXIO_ETCD_ENABLE", 26 | "ALLUXIO_ETCD_HOST", 27 | "ALLUXIO_WORKER_HOSTS", 28 | ] 29 | for key, value in updates.items(): 30 | if key not in allowed_keys: 31 | raise ValueError(f"Invalid configuration key for Alluxio: {key}") 32 | config_data[key] = value 33 | 34 | validate_alluxio_config(config_data) 35 | return config_data 36 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [pull_request, workflow_dispatch] 4 | 5 | defaults: 6 | run: 7 | shell: bash -l -eo pipefail {0} 8 | 9 | jobs: 10 | # test: 11 | # name: Test 12 | # runs-on: ubuntu-latest 13 | # timeout-minutes: 10 14 | # strategy: 15 | # fail-fast: false 16 | # matrix: 17 | # python-version: ["3.8","3.9"] 18 | # 19 | # steps: 20 | # - name: Checkout source 21 | # uses: actions/checkout@v4 22 | # 23 | # - name: Setup Conda Environment 24 | # uses: mamba-org/setup-micromamba@v1 25 | # with: 26 | # cache-downloads: true 27 | # environment-file: environment_conda.yaml 28 | # environment-name: alluxio_test 29 | # create-args: >- 30 | # python=${{ matrix.PY }} 31 | # 32 | # - name: Conda info 33 | # run: | 34 | # conda list 35 | # conda --version 36 | # 37 | # - name: Run tests 38 | # run: | 39 | # pytest -vv \ 40 | # --log-format="%(asctime)s %(levelname)s %(message)s" \ 41 | # --log-date-format="%H:%M:%S" \ 42 | # tests/ 43 | 44 | lint: 45 | name: lint 46 | runs-on: ubuntu-latest 47 | steps: 48 | - uses: actions/checkout@v4 49 | - uses: actions/setup-python@v4 50 | with: 51 | python-version: "3.9" 52 | - uses: pre-commit/action@v3.0.0 53 | -------------------------------------------------------------------------------- /tests/hash_res/workerHostnames.json: -------------------------------------------------------------------------------- 1 | [ 2 | "worker-fmvjygixyt-8993", 3 | "worker-ujvhyfprjc-79", 4 | "worker-nakxlhtbwu-355", 5 | "worker-uohlaocjcb-9854", 6 | "worker-gxdkefonqf-1738", 7 | "worker-gjzlwiuifu-4991", 8 | "worker-oqjcznkqhh-1621", 9 | "worker-muzlyqhjtf-1501", 10 | "worker-fjqojdrnun-394", 11 | "worker-hasicqeqms-2548", 12 | "worker-pxtwffhwpv-1106", 13 | "worker-iakqckkxxd-3188", 14 | "worker-vjuxlgfjsn-2944", 15 | "worker-fnwnfpqdsi-6567", 16 | "worker-xwcsdwxijw-2610", 17 | "worker-bfydonscfx-4806", 18 | "worker-kvfkefpucw-7829", 19 | "worker-hzwbeyabvg-6268", 20 | "worker-atzylwpgju-9727", 21 | "worker-crrkjzttdk-9462", 22 | "worker-kfgorghjgw-235", 23 | "worker-gukpxxujsl-526", 24 | "worker-imoaepvvdz-8203", 25 | "worker-dnbkvfqaed-811", 26 | "worker-wtborcfzha-3675", 27 | "worker-yfkdrypeuq-5502", 28 | "worker-vocaaeuwug-8440", 29 | "worker-nndnhtubyb-3198", 30 | "worker-edmtlksxyz-8687", 31 | "worker-jpswgyapjb-9091", 32 | "worker-powoalhuan-889", 33 | "worker-uhfxhulyiz-2526", 34 | "worker-nufuphxfbk-3794", 35 | "worker-yqfvjtwvyc-3118", 36 | "worker-rfczdltwwt-3569", 37 | "worker-keebwqvzoi-7748", 38 | "worker-sqkpasdrca-9840", 39 | "worker-lkjdkmlqdb-3457", 40 | "worker-goakzjzyca-4901", 41 | "worker-bcnbtjkqeg-6684", 42 | "worker-xinbxyhoxj-4420", 43 | "worker-usazyzkozq-6526", 44 | "worker-nkegjyykfy-5121", 45 | "worker-dmnwyfzetu-4312", 46 | "worker-agyqpurqcz-1984", 47 | "worker-ibolbcdres-8977", 48 | "worker-kdikdfjtcb-352", 49 | "worker-auuevjgrbb-2181", 50 | "worker-zayysvqavb-2677", 51 | "worker-hyvoweednm-4491" 52 | ] 53 | -------------------------------------------------------------------------------- /tests/test_fake_server.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | import pytest 4 | from aiohttp import web 5 | from aiohttp.test_utils import TestServer 6 | 7 | from alluxio.alluxio_file_system import AlluxioAsyncFileSystem 8 | 9 | pytestmark = pytest.mark.asyncio 10 | 11 | 12 | @pytest.fixture 13 | def server(event_loop): 14 | async def get_file_handler(request: web.Request) -> web.Response: 15 | alluxio: dict = request.app["alluxio"] 16 | bytes = alluxio[request.match_info["path_id"]][ 17 | request.match_info["page_index"] 18 | ] 19 | 20 | offset = int(request.query.get("offset", 0)) 21 | length = int(request.query.get("length", 0)) 22 | return web.Response( 23 | status=200, 24 | body=bytes[offset : offset + length], 25 | ) 26 | 27 | async def put_file_handler(request: web.Request) -> web.Response: 28 | data = await request.read() 29 | alluxio: dict = request.app["alluxio"] 30 | alluxio[request.match_info["path_id"]] = { 31 | request.match_info["page_index"]: data 32 | } 33 | return web.json_response( 34 | { 35 | "path_id": request.match_info["path_id"], 36 | } 37 | ) 38 | 39 | async def startup(app: web.Application): 40 | app["alluxio"] = defaultdict(dict) 41 | 42 | app = web.Application() 43 | app.on_startup.append(startup) 44 | app.router.add_get( 45 | "/v1/file/{path_id}/page/{page_index}", get_file_handler 46 | ) 47 | app.router.add_post( 48 | "/v1/file/{path_id}/page/{page_index}", put_file_handler 49 | ) 50 | server = TestServer(app) 51 | event_loop.run_until_complete(server.start_server()) 52 | return server 53 | 54 | 55 | @pytest.mark.asyncio 56 | async def test_read_page(server): 57 | fs = AlluxioAsyncFileSystem( 58 | worker_hosts=server.host, http_port=server.port 59 | ) 60 | assert await fs.write_page("s3://a/a.txt", 0, b"test") 61 | data = await fs.read_range("s3://a/a.txt", 1, 2) 62 | assert data == b"es" 63 | 64 | 65 | @pytest.mark.asyncio 66 | async def test_put_page(server): 67 | fs = AlluxioAsyncFileSystem( 68 | worker_hosts=server.host, http_port=server.port 69 | ) 70 | assert await fs.write_page("s3://a/a.txt", 1, b"test") 71 | -------------------------------------------------------------------------------- /tests/test_worker_entity.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | 4 | from alluxio.worker_ring import WorkerEntity 5 | 6 | 7 | def test_worker_entity_from_info_dynamic(): 8 | # Define a mapping of field names to their specific values 9 | field_values = { 10 | "version": 1, 11 | "identifier": "cb157baaafe04b988af01a4645d38456", 12 | "Host": "192.168.4.36", 13 | "ContainerHost": "container_host_value", 14 | "RpcPort": 432423, 15 | "DataPort": 54237, 16 | "SecureRpcPort": 23514, 17 | "NettyDataPort": 45837, 18 | "WebPort": 65473, 19 | "DomainSocketPath": "domain_socket_path_value", 20 | "HttpServerPort": 39282, 21 | } 22 | 23 | # Dynamically construct worker_info_dict using field_values 24 | worker_info_dict = { 25 | "Identity": { 26 | k: v 27 | for k, v in field_values.items() 28 | if k in ["version", "identifier"] 29 | }, 30 | "WorkerNetAddress": { 31 | k: v 32 | for k, v in field_values.items() 33 | if k not in ["version", "identifier"] 34 | }, 35 | } 36 | worker_info_bytes = json.dumps(worker_info_dict).encode("utf-8") 37 | 38 | # Convert worker_info_bytes and instantiate WorkerEntity 39 | worker_entity = WorkerEntity.from_worker_info(worker_info_bytes) 40 | 41 | # Validate WorkerIdentity fields 42 | assert worker_entity.worker_identity.version == field_values["version"] 43 | assert worker_entity.worker_identity.identifier == bytes.fromhex( 44 | field_values["identifier"] 45 | ) 46 | # Dynamically validate WorkerNetAddress fields using field_values 47 | for field_name, expected_value in field_values.items(): 48 | if field_name in [ 49 | "version", 50 | "identifier", 51 | ]: # Skip identity-specific fields 52 | continue 53 | # Convert CamelCase field_name to snake_case to match WorkerNetAddress attribute names 54 | snake_case_field_name = camel_to_snake(field_name) 55 | actual_value = getattr( 56 | worker_entity.worker_net_address, snake_case_field_name 57 | ) 58 | assert ( 59 | actual_value == expected_value 60 | ), f"Field '{snake_case_field_name}' expected '{expected_value}', got '{actual_value}'" 61 | 62 | 63 | def camel_to_snake(name): 64 | """ 65 | Convert a CamelCase name into snake_case. 66 | """ 67 | name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) 68 | return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower() 69 | -------------------------------------------------------------------------------- /tests/test_worker_hash_ring.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from alluxio.worker_ring import ConsistentHashProvider 4 | from alluxio.worker_ring import WorkerIdentity 5 | from alluxio.worker_ring import WorkerNetAddress 6 | 7 | 8 | def test_hash_ring(): 9 | worker_hostnames_path = "tests/hash_res/workerHostnames.json" 10 | with open(worker_hostnames_path, "r") as file: 11 | worker_hostnames = json.load(file) 12 | 13 | hash_provider = ConsistentHashProvider( 14 | worker_hosts=", ".join(worker_hostnames), 15 | hash_node_per_worker=5, 16 | etcd_refresh_workers_interval=100000000, 17 | ) 18 | 19 | hash_ring_path = "tests/hash_res/activeNodesMap.json" 20 | validate_hash_ring(hash_provider.hash_ring, hash_ring_path) 21 | 22 | worker_list_path = "tests/hash_res/workerList.json" 23 | with open(worker_list_path, "r") as file: 24 | workers_data = json.load(file) 25 | 26 | worker_info_map = {} 27 | for worker_data in workers_data: 28 | worker_identity = WorkerIdentity( 29 | version=int(worker_data["version"]), 30 | identifier=bytes.fromhex(worker_data["identifier"]), 31 | ) 32 | default_worker_net_address = WorkerNetAddress() 33 | worker_info_map[worker_identity] = default_worker_net_address 34 | 35 | hash_provider._update_hash_ring(worker_info_map) 36 | validate_hash_ring(hash_provider.hash_ring, hash_ring_path) 37 | 38 | file_workers_path = "tests/hash_res/fileUrlWorkers.json" 39 | with open(file_workers_path, "r") as file: 40 | file_workers_data = json.load(file) 41 | 42 | for ufs_url, workers in file_workers_data.items(): 43 | current_worker_identities = ( 44 | hash_provider._get_multiple_worker_identities(ufs_url, 5) 45 | ) 46 | original_set = { 47 | (worker["version"], bytes.fromhex(worker["identifier"])) 48 | for worker in workers 49 | } 50 | current_set = { 51 | (worker.version, worker.identifier) 52 | for worker in current_worker_identities 53 | } 54 | assert original_set == current_set 55 | 56 | 57 | def validate_hash_ring(current_ring, result_file_path): 58 | with open(result_file_path, "r") as file: 59 | hash_ring_data = json.load(file) 60 | 61 | not_found_count = 0 62 | mismatch_count = 0 63 | for hash_key, worker_identity in hash_ring_data.items(): 64 | key = int(hash_key) 65 | if key in current_ring: 66 | # Fetch the WorkerIdentity object from current_ring 67 | current_worker_identity = current_ring[key] 68 | 69 | # Check if the version and identifier match 70 | if current_worker_identity.version == worker_identity[ 71 | "version" 72 | ] and current_worker_identity.identifier == bytes.fromhex( 73 | worker_identity["identifier"] 74 | ): 75 | continue 76 | else: 77 | mismatch_count += 1 78 | else: 79 | not_found_count += 1 80 | 81 | assert ( 82 | not_found_count == 0 83 | ), "Some hash keys were not found in the current ring" 84 | assert mismatch_count == 0, "Some hash keys had mismatched WorkerIdentity" 85 | -------------------------------------------------------------------------------- /tests/test_read_range_docker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | from alluxio import AlluxioFileSystem 5 | from tests.conftest import ALLUXIO_FILE_PATH 6 | from tests.conftest import LOCAL_FILE_PATH 7 | 8 | NUM_TESTS = 10 9 | 10 | import logging 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | 15 | def validate_read_range( 16 | alluxio_fs: AlluxioFileSystem, 17 | alluxio_file_path, 18 | local_file_path, 19 | offset, 20 | length, 21 | ): 22 | alluxio_data = alluxio_fs.read_range(alluxio_file_path, offset, length) 23 | 24 | with open(local_file_path, "rb") as local_file: 25 | local_file.seek(offset) 26 | local_data = local_file.read(length) 27 | 28 | try: 29 | assert alluxio_data == local_data 30 | except AssertionError: 31 | error_message = ( 32 | f"Data mismatch between Alluxio and local file\n" 33 | f"Alluxio file path: {alluxio_file_path}\n" 34 | f"Local file path: {local_file_path}\n" 35 | f"Offset: {offset}\n" 36 | f"Length: {length}\n" 37 | f"Alluxio data: {alluxio_data}\n" 38 | f"Local data: {local_data}" 39 | ) 40 | raise AssertionError(error_message) 41 | 42 | 43 | def validate_invalid_read_range( 44 | alluxio_fs, alluxio_file_path, local_file_path, offset, length 45 | ): 46 | try: 47 | alluxio_fs.read_range(alluxio_file_path, offset, length) 48 | except Exception: 49 | pass 50 | else: 51 | raise AssertionError( 52 | "Expected an exception from Alluxio but none occurred." 53 | ) 54 | 55 | try: 56 | with open(local_file_path, "rb") as local_file: 57 | local_file.seek(offset) 58 | local_file.read(length) 59 | except Exception: 60 | pass 61 | else: 62 | raise AssertionError( 63 | "Expected an exception from local file read but none occurred." 64 | ) 65 | 66 | 67 | def test_alluxio_filesystem(fs: AlluxioFileSystem): 68 | file_size = os.path.getsize(LOCAL_FILE_PATH) 69 | assert fs.load(ALLUXIO_FILE_PATH, 200) 70 | invalid_test_cases = [(-1, 100), (file_size - 1, -2)] 71 | for offset, length in invalid_test_cases: 72 | validate_invalid_read_range( 73 | fs, 74 | ALLUXIO_FILE_PATH, 75 | LOCAL_FILE_PATH, 76 | offset, 77 | length, 78 | ) 79 | LOGGER.debug("Passed invalid test cases") 80 | 81 | # Validate normal case 82 | max_length = 13 * 1024 83 | for _ in range(NUM_TESTS): 84 | offset = random.randint(0, file_size - 1) 85 | length = min(random.randint(1, file_size - offset), max_length) 86 | validate_read_range( 87 | fs, 88 | ALLUXIO_FILE_PATH, 89 | LOCAL_FILE_PATH, 90 | offset, 91 | length, 92 | ) 93 | 94 | LOGGER.debug( 95 | f"Data matches between Alluxio file and local source file for {NUM_TESTS} times" 96 | ) 97 | 98 | special_test_cases = [ 99 | (file_size - 1, -1), 100 | (file_size - 1, file_size + 1), 101 | (file_size, 100), 102 | ] 103 | 104 | for offset, length in special_test_cases: 105 | validate_read_range( 106 | fs, 107 | ALLUXIO_FILE_PATH, 108 | LOCAL_FILE_PATH, 109 | offset, 110 | length, 111 | ) 112 | LOGGER.debug("Passed corner test cases") 113 | -------------------------------------------------------------------------------- /alluxio/const.py: -------------------------------------------------------------------------------- 1 | # The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 2 | # (the "License"). You may not use this work except in compliance with the License, which is 3 | # available at www.apache.org/licenses/LICENSE-2.0 4 | # 5 | # This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 6 | # either express or implied, as more fully set forth in the License. 7 | # 8 | # See the NOTICE file distributed with this work for information regarding copyright ownership. 9 | 10 | ALLUXIO_CLUSTER_NAME_KEY = "alluxio.cluster.name" 11 | ALLUXIO_CLUSTER_NAME_DEFAULT_VALUE = "DefaultAlluxioCluster" 12 | ALLUXIO_ETCD_USERNAME_KEY = "alluxio.etcd.username" 13 | ALLUXIO_ETCD_PASSWORD_KEY = "alluxio.etcd.password" 14 | ALLUXIO_PAGE_SIZE_KEY = "alluxio.worker.page.store.page.size" 15 | ALLUXIO_PAGE_SIZE_DEFAULT_VALUE = "1MB" 16 | ALLUXIO_HASH_NODE_PER_WORKER_KEY = ( 17 | "alluxio.user.consistent.hash.virtual.node.count.per.worker" 18 | ) 19 | ALLUXIO_WORKER_HTTP_SERVER_PORT_KEY = "alluxio.worker.http.server.port" 20 | ALLUXIO_WORKER_HTTP_SERVER_PORT_DEFAULT_VALUE = 28080 21 | ALLUXIO_HASH_NODE_PER_WORKER_DEFAULT_VALUE = 5 22 | ALLUXIO_SUCCESS_IDENTIFIER = "success" 23 | ALLUXIO_COMMON_EXTENSION_ENABLE = "alluxio.common.extension.enable" 24 | ALLUXIO_COMMON_ONDEMANDPOOL_DISABLE = "alluxio.common.ondemandpool.disable" 25 | LIST_URL_FORMAT = "http://{worker_host}:{http_port}/v1/files" 26 | FULL_PAGE_URL_FORMAT = ( 27 | "http://{worker_host}:{http_port}/v1/file/{path_id}/page/{page_index}" 28 | ) 29 | FULL_RANGE_URL_FORMAT = "http://{worker_host}:{http_port}/v1/range/{path_id}?ufsFullPath={file_path}&offset={offset}&length={length}" 30 | FULL_CHUNK_URL_FORMAT = "http://{worker_host}:{http_port}/v1/chunk/{path_id}?ufsFullPath={file_path}&chunkSize={chunk_size}" 31 | PAGE_URL_FORMAT = "http://{worker_host}:{http_port}/v1/file/{path_id}/page/{page_index}?offset={page_offset}&length={page_length}" 32 | WRITE_PAGE_URL_FORMAT = ( 33 | "http://{worker_host}:{http_port}/v1/file/{path_id}/page/{page_index}" 34 | ) 35 | WRITE_CHUNK_URL_FORMAT = "http://{worker_host}:{http_port}/v1/chunk/{path_id}?ufsFullPath={file_path}&chunkSize={chunk_size}" 36 | MKDIR_URL_FORMAT = "http://{worker_host}:{http_port}/v1/mkdir/{path_id}?ufsFullPath={file_path}" 37 | TOUCH_URL_FORMAT = "http://{worker_host}:{http_port}/v1/touch/{path_id}?ufsFullPath={file_path}" 38 | MV_URL_FORMAT = "http://{worker_host}:{http_port}/v1/mv/{path_id}?srcPath={srcPath}&dstPath={dstPath}" 39 | RM_URL_FORMAT = ( 40 | "http://{worker_host}:{http_port}/v1/rm/{path_id}?ufsFullPath={file_path}" 41 | ) 42 | CP_URL_FORMAT = "http://{worker_host}:{http_port}/v1/copy/{path_id}?srcPath={srcPath}&dstPath={dstPath}" 43 | TAIL_URL_FORMAT = "http://{worker_host}:{http_port}/v1/tail/{path_id}?ufsFullPath={file_path}" 44 | HEAD_URL_FORMAT = "http://{worker_host}:{http_port}/v1/head/{path_id}?ufsFullPath={file_path}" 45 | PAGE_PATH_URL_FORMAT = "/v1/file/{path_id}/page/{page_index}" 46 | GET_FILE_STATUS_URL_FORMAT = "http://{worker_host}:{http_port}/v1/info" 47 | LOAD_URL_FORMAT = "http://{worker_host}:{http_port}/v1/load" 48 | # TODO (chunxu): Remove the concrete types of LOAD formats. Keep them for asyncio. 49 | LOAD_SUBMIT_URL_FORMAT = ( 50 | "http://{worker_host}:{http_port}/v1/load?path={path}&opType=submit" 51 | ) 52 | LOAD_PROGRESS_URL_FORMAT = ( 53 | "http://{worker_host}:{http_port}/v1/load?path={path}&opType=progress" 54 | ) 55 | LOAD_STOP_URL_FORMAT = ( 56 | "http://{worker_host}:{http_port}/v1/load?path={path}&opType=stop" 57 | ) 58 | ETCD_PREFIX_FORMAT = "/ServiceDiscovery/{cluster_name}/" 59 | EXCEPTION_CONTENT = ( 60 | "Worker's address: {worker_host}:{http_port}, Error: {error}" 61 | ) 62 | -------------------------------------------------------------------------------- /alluxio/posix/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import yaml 5 | 6 | from alluxio.posix.const import Constants 7 | from alluxio.posix.exception import ConfigInvalidError 8 | from alluxio.posix.exception import ConfigMissingError 9 | from alluxio.posix.ufs.alluxio import update_alluxio_config 10 | from alluxio.posix.ufs.alluxio import validate_alluxio_config 11 | from alluxio.posix.ufs.oss import update_oss_config 12 | from alluxio.posix.ufs.oss import validate_oss_config 13 | 14 | 15 | class ConfigManager: 16 | def __init__(self): 17 | self.logger = logging.getLogger(__name__) 18 | logging.basicConfig(level=logging.INFO) 19 | current_dir = os.path.dirname(os.path.abspath(__file__)) 20 | self.config_file_path = os.getenv( 21 | "ALLUXIO_PY_CONFIG_FILE_PATH", 22 | os.path.join(current_dir, "config", "ufs_config.yaml"), 23 | ) 24 | self.config_data = self._load_config() 25 | self.validation_functions = { 26 | Constants.OSS_FILESYSTEM_TYPE: validate_oss_config, 27 | Constants.ALLUXIO_FILESYSTEM_TYPE: validate_alluxio_config, 28 | } 29 | 30 | def _load_config(self): 31 | if not os.path.exists(self.config_file_path): 32 | logging.warning( 33 | f"Config file not found: {self.config_file_path}. Initializing without loading config." 34 | ) 35 | return 36 | 37 | with open(self.config_file_path, "r", encoding="utf-8") as file: 38 | try: 39 | config = yaml.safe_load(file) 40 | return config 41 | except yaml.YAMLError as e: 42 | raise ValueError(f"Error parsing YAML file: {e}") 43 | 44 | def set_config_path(self, new_path): 45 | self.config_file_path = new_path 46 | self.config_data = self._load_config() 47 | print( 48 | f"Configuration path updated and config reloaded from {new_path}." 49 | ) 50 | 51 | def get_config(self, fs_name: str) -> dict: 52 | try: 53 | fs_config = self.config_data[fs_name] 54 | validation_function = self.validation_functions.get(fs_name) 55 | if validation_function is not None: 56 | validation_function(fs_config) 57 | else: 58 | raise ConfigInvalidError( 59 | fs_name, 60 | f"No validation function for file system: {fs_name}", 61 | ) 62 | return fs_config 63 | except KeyError: 64 | raise ConfigMissingError( 65 | fs_name, "FileSystem Configuration is missing" 66 | ) 67 | except ValueError as e: 68 | raise ConfigMissingError(fs_name, str(e)) 69 | 70 | def get_config_fs_list(self) -> list: 71 | if self.config_data is None: 72 | return [] 73 | else: 74 | return self.config_data.keys() 75 | 76 | def update_config(self, fs_type, **kwargs): 77 | if fs_type not in self.get_config_fs_list(): 78 | raise KeyError(f"No configuration available for {fs_type}") 79 | config_data = self.get_config(fs_type) 80 | 81 | if fs_type == Constants.OSS_FILESYSTEM_TYPE: 82 | self.config_data[fs_type] = update_oss_config(config_data, kwargs) 83 | elif fs_type == Constants.ALLUXIO_FILESYSTEM_TYPE: 84 | self.config_data[fs_type] = update_alluxio_config( 85 | config_data, kwargs 86 | ) 87 | elif fs_type == Constants.S3_FILESYSTEM_TYPE: 88 | raise NotImplementedError() 89 | else: 90 | raise ValueError(f"Unsupported file system type: {fs_type}") 91 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import shlex 4 | import subprocess 5 | import time 6 | from urllib.parse import urlparse 7 | 8 | import pytest 9 | import requests 10 | 11 | from alluxio.alluxio_file_system import AlluxioFileSystem 12 | 13 | LOGGER = logging.getLogger("alluxio_test") 14 | TEST_ROOT = os.getenv("TEST_ROOT", "file:///opt/alluxio/ufs/") 15 | # This is the path to the file you want to access 16 | TEST_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") 17 | LOCAL_FILE_PATH = os.path.join(TEST_DIR, "test.csv") 18 | ALLUXIO_FILE_PATH = "file://{}".format("/opt/alluxio/ufs/test.csv") 19 | 20 | 21 | def stop_docker(container): 22 | cmd = shlex.split('docker ps -a -q --filter "name=%s"' % container) 23 | cid = subprocess.check_output(cmd).strip().decode() 24 | if cid: 25 | LOGGER.debug("Stopping existing container %s" % cid) 26 | subprocess.call(["docker", "rm", "-f", "-v", cid]) 27 | 28 | 29 | @pytest.fixture(scope="module") 30 | def docker_alluxio(): 31 | if "ALLUXIO_URL" in os.environ: 32 | # assume we already have a server already set up 33 | yield os.getenv("ALLUXIO_URL") 34 | return 35 | master_container = "alluxio-master" 36 | worker_container = "alluxio-worker" 37 | network_cmd = "docker network create alluxio_network" 38 | 39 | run_cmd_master = ( 40 | "docker run --platform linux/amd64 -d --rm --net=alluxio_network -p 19999:19999 -p 19998:19998 " 41 | f"--name=alluxio-master -v {TEST_DIR}:/opt/alluxio/ufs " 42 | '-e ALLUXIO_JAVA_OPTS=" -Dalluxio.master.hostname=alluxio-master ' 43 | "-Dalluxio.security.authentication.type=NOSASL " 44 | "-Dalluxio.security.authorization.permission.enabled=false " 45 | "-Dalluxio.security.authorization.plugins.enabled=false " 46 | "-Dalluxio.master.journal.type=NOOP " 47 | "-Dalluxio.master.scheduler.initial.wait.time=1s " 48 | "-Dalluxio.dora.client.ufs.root=file:/// " 49 | '-Dalluxio.underfs.xattr.change.enabled=false " alluxio/alluxio:308-SNAPSHOT master' 50 | ) 51 | run_cmd_worker = ( 52 | "docker run --platform linux/amd64 -d --rm --net=alluxio_network -p 28080:28080 -p 29999:29999 -p 29997:29997 " 53 | f"--name=alluxio-worker --shm-size=1G -v {TEST_DIR}:/opt/alluxio/ufs " 54 | '-e ALLUXIO_JAVA_OPTS=" -Dalluxio.master.hostname=alluxio-master ' 55 | "-Dalluxio.security.authentication.type=NOSASL " 56 | "-Dalluxio.security.authorization.permission.enabled=false " 57 | "-Dalluxio.security.authorization.plugins.enabled=false " 58 | "-Dalluxio.dora.client.ufs.root=file:/// " 59 | '-Dalluxio.underfs.xattr.change.enabled=false " alluxio/alluxio:308-SNAPSHOT worker' 60 | ) 61 | 62 | stop_docker(worker_container) 63 | stop_docker(master_container) 64 | subprocess.run( 65 | shlex.split(network_cmd) 66 | ) # could return error code if network already exists 67 | subprocess.check_output(shlex.split(run_cmd_master)) 68 | subprocess.check_output(shlex.split(run_cmd_worker)) 69 | url = "http://127.0.0.1:28080" 70 | timeout = 10 71 | while True: 72 | try: 73 | LOGGER.debug("trying to connected to alluxio") 74 | r = requests.get(url + "/v1/files?path=/") 75 | LOGGER.debug("successfullly connected to alluxio") 76 | if r.ok: 77 | yield url 78 | break 79 | except Exception as e: # noqa: E722 80 | timeout -= 1 81 | if timeout < 0: 82 | raise SystemError from e 83 | time.sleep(10) 84 | stop_docker(worker_container) 85 | stop_docker(master_container) 86 | 87 | 88 | @pytest.fixture 89 | def fs(docker_alluxio): 90 | 91 | LOGGER.debug(f"get AlluxioFileSystem connect to {docker_alluxio}") 92 | parsed_url = urlparse(docker_alluxio) 93 | host = parsed_url.hostname 94 | port = parsed_url.port 95 | fs = AlluxioFileSystem(worker_hosts=host, worker_http_port=port) 96 | yield fs 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Alluxio Python Library 2 | 3 | This repo contains the Alluxio Python API to interact with Alluxio servers, bridging the gap between computation frameworks and undelying storage systems. This module provides a convenient interface for performing file system operations such as reading, writing, and listing files in an Alluxio cluster. 4 | 5 | ## Features 6 | 7 | - Directory listing and file status fetching 8 | - Put data to Alluxio system cache and read from Alluxio system cache (include range read) 9 | - Alluxio system Load operations with progress tracking 10 | - Support dynamic Alluxio worker membership services (ETCD periodically refreshing and manually specified worker hosts) 11 | 12 | ## Limitations 13 | 14 | Alluxio Python library supports reading from Alluxio cached data. 15 | The data needs to either 16 | - Loaded into Alluxio servers via `load` operations 17 | - Put into Alluxio servers via `write_page` operation. 18 | 19 | If you need to read from storage systems directly with Alluxio on demand caching capabilities, 20 | please use [alluxiofs](https://github.com/fsspec/alluxiofs) instead. 21 | 22 | ## Installation 23 | 24 | Install from source 25 | ``` 26 | cd alluxio-python-library 27 | python setup.py sdist bdist_wheel 28 | pip install dist/alluxio_python_library-0.1-py3-none-any.whl 29 | ``` 30 | 31 | ## Usage 32 | 33 | ### Initialization 34 | Import and initialize the `AlluxioFileSystem` class: 35 | ``` 36 | # Minimum setup for Alluxio with ETCD membership service 37 | alluxio = AlluxioFileSystem(etcd_hosts="localhost") 38 | 39 | # Minimum setup for Alluxio with user-defined worker list 40 | alluxio = AlluxioFileSystem(worker_hosts="worker_host1,worker_host2") 41 | 42 | # Minimum setup for Alluxio with self-defined page size 43 | alluxio = AlluxioFileSystem( 44 | etcd_hosts="localhost", 45 | options={"alluxio.worker.page.store.page.size": "20MB"} 46 | ) 47 | # Minimum setup for Alluxio with ETCD membership service with username/password 48 | options = { 49 | "alluxio.etcd.username": "my_user", 50 | "alluxio.etcd.password": "my_password", 51 | "alluxio.worker.page.store.page.size": "20MB" # Any other options should be included here 52 | } 53 | alluxio = AlluxioFileSystem( 54 | etcd_hosts="localhost", 55 | options=options 56 | ) 57 | ``` 58 | 59 | ### Load Operations 60 | Dataset metadata and data in the Alluxio under storage need to be loaded into Alluxio system cache 61 | to read by end-users. Run the load operations before executing the read commands. 62 | ``` 63 | # Start a load operation 64 | load_success = alluxio_fs.load('s3://mybucket/mypath/file') 65 | print('Load successful:', load_success) 66 | 67 | # Check load progress 68 | progress = alluxio_fs.load_progress('s3://mybucket/mypath/file') 69 | print('Load progress:', progress) 70 | 71 | # Stop a load operation 72 | stop_success = alluxio_fs.stop_load('s3://mybucket/mypath/file') 73 | print('Stop successful:', stop_success) 74 | ``` 75 | 76 | ### (Advanced) Page Write 77 | Alluxio system cache can be used as a key value cache system. 78 | Data can be written to Alluxio system cache via `write_page` command 79 | after which the data can be read from Alluxio system cache (Alternative to load operations). 80 | 81 | ``` 82 | success = alluxio_fs.write_page('s3://mybucket/mypath/file', page_index, page_bytes) 83 | print('Write successful:', success) 84 | ``` 85 | 86 | ### Directory Listing 87 | List the contents of a directory: 88 | ``` 89 | """ 90 | contents = alluxio_fs.listdir('s3://mybucket/mypath/dir') 91 | print(contents) 92 | ``` 93 | 94 | ### Get File Status 95 | Retrieve the status of a file or directory: 96 | ``` 97 | status = alluxio_fs.get_file_status('s3://mybucket/mypath/file') 98 | print(status) 99 | ``` 100 | 101 | ### File Reading 102 | Read the entire content of a file: 103 | ``` 104 | """ 105 | Reads a file. 106 | 107 | Args: 108 | file_path (str): The full ufs file path to read data from 109 | 110 | Returns: 111 | file content (str): The full file content 112 | """ 113 | content = alluxio_fs.read('s3://mybucket/mypath/file') 114 | print(content) 115 | ``` 116 | Read a specific range of a file: 117 | ``` 118 | content = alluxio_fs.read_range('s3://mybucket/mypath/file', offset, length) 119 | print(content) 120 | ``` 121 | 122 | ## Development 123 | 124 | See [Contributions](CONTRIBUTING.md) for guidelines around making new contributions and reviewing them. 125 | -------------------------------------------------------------------------------- /alluxio/posix/fileimpl.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from alluxio.posix.delegateFs import DelegateFileSystem 5 | 6 | local_open = os.path 7 | local_stat = os.stat 8 | local_listdir = os.listdir 9 | local_rename = os.rename 10 | local_close = os.close 11 | local_mkdir = os.mkdir 12 | local_remove = os.remove 13 | local_rmdir = os.rmdir 14 | 15 | 16 | def open(file: str, mode: str = "r", **kw): 17 | logging.debug("DelegateFileSystem opening file: %s", file) 18 | instance = DelegateFileSystem.instance 19 | fs = instance.get_file_system(file) 20 | if fs: 21 | try: 22 | return fs.open(file, mode, **kw) 23 | except Exception as e: 24 | logging.error( 25 | f"Failed to open file by delegateFileSystem with exception:{e}." 26 | f"Used local filesystem instead." 27 | ) 28 | return local_open(file, mode, **kw) 29 | return local_open(file, mode, **kw) 30 | 31 | 32 | def stat(path: str, **kw): 33 | instance = DelegateFileSystem.instance 34 | fs = instance.get_file_system(path) 35 | if fs: 36 | try: 37 | logging.debug("DelegateFileSystem getStatus filemeta: %s", path) 38 | return fs.stat(path, **kw) 39 | except Exception as e: 40 | logging.error( 41 | f"Failed to stat file by delegateFileSystem with exception:{e}." 42 | f"Used local filesystem instead." 43 | ) 44 | return local_stat(path, **kw) 45 | logging.info("LocalFileSystem getStatus filemeta: %s", path) 46 | return local_stat(path, **kw) 47 | 48 | 49 | def listdir(path: str, **kw): 50 | instance = DelegateFileSystem.instance 51 | fs = instance.get_file_system(path) 52 | if fs: 53 | try: 54 | return fs.listdir(path, **kw) 55 | except Exception as e: 56 | logging.error( 57 | f"Failed to list directory by delegateFileSystem with exception: {e}." 58 | f"Used local filesystem instead." 59 | ) 60 | return local_listdir(path, **kw) 61 | return local_listdir(path, **kw) 62 | 63 | 64 | def mkdir(path: str, mode=0o777, **kw): 65 | instance = DelegateFileSystem.instance 66 | fs = instance.get_file_system(path) 67 | if fs: 68 | try: 69 | return fs.mkdir(path, mode, **kw) 70 | except Exception as e: 71 | logging.error( 72 | f"Failed to make directory by delegateFileSystem with exception: {e}." 73 | f"Used local filesystem instead." 74 | ) 75 | return local_mkdir(path, mode, **kw) 76 | return local_mkdir(path, mode, **kw) 77 | 78 | 79 | def rmdir(path: str, **kw): 80 | instance = DelegateFileSystem.instance 81 | fs = instance.get_file_system(path) 82 | if fs: 83 | try: 84 | return fs.rmdir(path, **kw) 85 | except Exception as e: 86 | logging.error( 87 | f"Failed to remove directory by delegateFileSystem with exception: {e}." 88 | f"Used local filesystem instead." 89 | ) 90 | return local_rmdir(path, **kw) 91 | return local_rmdir(path, **kw) 92 | 93 | 94 | def remove(path: str, **kw): 95 | instance = DelegateFileSystem.instance 96 | fs = instance.get_file_system(path) 97 | if fs: 98 | try: 99 | return fs.rm(path, **kw) 100 | except Exception as e: 101 | logging.error( 102 | f"Failed to remove file by delegateFileSystem with exception: {e}." 103 | f"Used local filesystem instead." 104 | ) 105 | return local_remove(path, **kw) 106 | return local_remove(path, **kw) 107 | 108 | 109 | def rename(src: str, dest: str, **kw): 110 | instance = DelegateFileSystem.instance 111 | fs_src = instance.get_file_system(src) 112 | fs_dest = instance.get_file_system(dest) 113 | if fs_src and fs_dest and fs_src == fs_dest: 114 | try: 115 | return fs_src.rename(src, dest, **kw) 116 | except Exception as e: 117 | logging.error( 118 | f"Failed to rename file by delegateFileSystem with exception: {e}." 119 | f"Used local filesystem instead." 120 | ) 121 | return local_rename(src, dest, **kw) 122 | logging.error( 123 | "Source and destination are on different file systems or not supported." 124 | ) 125 | return local_rename(src, dest, **kw) 126 | -------------------------------------------------------------------------------- /tests/test_read_range.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | 5 | from alluxio import AlluxioFileSystem 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser( 10 | description="Validate Alluxio read_range with local file." 11 | ) 12 | parser.add_argument( 13 | "--alluxio_file_path", 14 | default="s3://ai-ref-arch/small-dataset/iris.csv", 15 | required=False, 16 | help="The Alluxio file path to read", 17 | ) 18 | parser.add_argument( 19 | "--local_file_path", 20 | default="/Users/alluxio/Downloads/iris.csv", 21 | required=False, 22 | help="The local file path to validate against", 23 | ) 24 | parser.add_argument( 25 | "--etcd_hosts", 26 | type=str, 27 | default="localhost", 28 | required=False, 29 | help="The host address(es) for etcd", 30 | ) 31 | parser.add_argument( 32 | "--num_tests", 33 | type=int, 34 | default=100, 35 | required=False, 36 | help="The total number of read range test to run", 37 | ) 38 | return parser.parse_args() 39 | 40 | 41 | def validate_read_range( 42 | alluxio_fs, alluxio_file_path, local_file_path, offset, length 43 | ): 44 | alluxio_data = alluxio_fs.read_range(alluxio_file_path, offset, length) 45 | 46 | with open(local_file_path, "rb") as local_file: 47 | local_file.seek(offset) 48 | local_data = local_file.read(length) 49 | 50 | try: 51 | assert alluxio_data == local_data 52 | except AssertionError: 53 | error_message = ( 54 | f"Data mismatch between Alluxio and local file\n" 55 | f"Alluxio file path: {alluxio_file_path}\n" 56 | f"Local file path: {local_file_path}\n" 57 | f"Offset: {offset}\n" 58 | f"Length: {length}\n" 59 | f"Alluxio data: {alluxio_data}\n" 60 | f"Local data: {local_data}" 61 | ) 62 | raise AssertionError(error_message) 63 | 64 | 65 | def manual_test_invalid_read_range( 66 | alluxio_fs, alluxio_file_path, local_file_path, offset, length 67 | ): 68 | try: 69 | alluxio_fs.read_range(alluxio_file_path, offset, length) 70 | except Exception: 71 | pass 72 | else: 73 | raise AssertionError( 74 | "Expected an exception from Alluxio but none occurred." 75 | ) 76 | 77 | try: 78 | with open(local_file_path, "rb") as local_file: 79 | local_file.seek(offset) 80 | local_file.read(length) 81 | except Exception: 82 | pass 83 | else: 84 | raise AssertionError( 85 | "Expected an exception from local file read but none occurred." 86 | ) 87 | 88 | 89 | def main(args): 90 | alluxio_fs = AlluxioFileSystem(etcd_hosts=args.etcd_hosts) 91 | file_size = os.path.getsize(args.local_file_path) 92 | 93 | invalid_test_cases = [(-1, 100), (file_size - 1, -2)] 94 | for offset, length in invalid_test_cases: 95 | manual_test_invalid_read_range( 96 | alluxio_fs, 97 | args.alluxio_file_path, 98 | args.local_file_path, 99 | offset, 100 | length, 101 | ) 102 | print("Passed invalid test cases") 103 | 104 | # Validate normal case 105 | max_length = 13 * 1024 * 1024 106 | for _ in range(args.num_tests): 107 | offset = random.randint(0, file_size - 1) 108 | length = min(random.randint(-1, file_size - offset), max_length) 109 | # -1 and None length represents read from offset to file end 110 | if length == 0: 111 | length = None 112 | validate_read_range( 113 | alluxio_fs, 114 | args.alluxio_file_path, 115 | args.local_file_path, 116 | offset, 117 | length, 118 | ) 119 | 120 | print( 121 | f"Data matches between Alluxio file and local source file for {args.num_tests} times" 122 | ) 123 | 124 | special_test_cases = [ 125 | (file_size - 1, -1), 126 | (file_size - 1, None), 127 | (file_size - 1, file_size + 1), 128 | (file_size, 100), 129 | ] 130 | 131 | for offset, length in special_test_cases: 132 | validate_read_range( 133 | alluxio_fs, 134 | args.alluxio_file_path, 135 | args.local_file_path, 136 | offset, 137 | length, 138 | ) 139 | print("Passed corner test cases") 140 | 141 | 142 | if __name__ == "__main__": 143 | args = parse_args() 144 | main(args) 145 | -------------------------------------------------------------------------------- /tests/hash_res/workerList.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "version": 1, 4 | "identifier": "b785d1f098863821a4eb6bee226c35b7" 5 | }, 6 | { 7 | "version": 1, 8 | "identifier": "065141ec74bd3ad8bff25e7bc18408be" 9 | }, 10 | { 11 | "version": 1, 12 | "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" 13 | }, 14 | { 15 | "version": 1, 16 | "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" 17 | }, 18 | { 19 | "version": 1, 20 | "identifier": "35a44a2979bb392b86a4d51b44b666c7" 21 | }, 22 | { 23 | "version": 1, 24 | "identifier": "55da12e6f8ea35059e783843d4733281" 25 | }, 26 | { 27 | "version": 1, 28 | "identifier": "a11b880d043231fba4a14c9da7c86a83" 29 | }, 30 | { 31 | "version": 1, 32 | "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" 33 | }, 34 | { 35 | "version": 1, 36 | "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" 37 | }, 38 | { 39 | "version": 1, 40 | "identifier": "ed014f952f6c3e1ca0d5a479e4141456" 41 | }, 42 | { 43 | "version": 1, 44 | "identifier": "b065a65d21613f30b210668f36b99865" 45 | }, 46 | { 47 | "version": 1, 48 | "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" 49 | }, 50 | { 51 | "version": 1, 52 | "identifier": "ff552a31dc9c31dfaae746915261cc0f" 53 | }, 54 | { 55 | "version": 1, 56 | "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" 57 | }, 58 | { 59 | "version": 1, 60 | "identifier": "1ae5694808e43da58ba98374c7e51190" 61 | }, 62 | { 63 | "version": 1, 64 | "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" 65 | }, 66 | { 67 | "version": 1, 68 | "identifier": "70d7e21f675633c38b057731c646283b" 69 | }, 70 | { 71 | "version": 1, 72 | "identifier": "861111d5e78e39bcb4111b1ac713d40b" 73 | }, 74 | { 75 | "version": 1, 76 | "identifier": "464294a547843ecca2be5a615b1397c0" 77 | }, 78 | { 79 | "version": 1, 80 | "identifier": "0fcc8d949efd3ed594bec8171d131a4e" 81 | }, 82 | { 83 | "version": 1, 84 | "identifier": "acf3522e534a35c2a811716eeb29c22e" 85 | }, 86 | { 87 | "version": 1, 88 | "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" 89 | }, 90 | { 91 | "version": 1, 92 | "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" 93 | }, 94 | { 95 | "version": 1, 96 | "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" 97 | }, 98 | { 99 | "version": 1, 100 | "identifier": "d71a716e22ef33e4baa94336282c6f3b" 101 | }, 102 | { 103 | "version": 1, 104 | "identifier": "08dfb7f3f3bf3668889accd20d27ae56" 105 | }, 106 | { 107 | "version": 1, 108 | "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" 109 | }, 110 | { 111 | "version": 1, 112 | "identifier": "b43de09a152b3d42ba0f628e69742db3" 113 | }, 114 | { 115 | "version": 1, 116 | "identifier": "d53648c23d853765a0662482a16a4ca3" 117 | }, 118 | { 119 | "version": 1, 120 | "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" 121 | }, 122 | { 123 | "version": 1, 124 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 125 | }, 126 | { 127 | "version": 1, 128 | "identifier": "e4d8ef54fd3834628e07fd42e79978b3" 129 | }, 130 | { 131 | "version": 1, 132 | "identifier": "aacf21dd9eb537eeb332c48248e79865" 133 | }, 134 | { 135 | "version": 1, 136 | "identifier": "865668aadaba3511a84d8054613dd14b" 137 | }, 138 | { 139 | "version": 1, 140 | "identifier": "959e6e70afa63804bf6f6f003ab54adb" 141 | }, 142 | { 143 | "version": 1, 144 | "identifier": "d69c0d9d83d7348bbf3ff323a450babc" 145 | }, 146 | { 147 | "version": 1, 148 | "identifier": "ce88e51340d034eb95942143f1d177dd" 149 | }, 150 | { 151 | "version": 1, 152 | "identifier": "d9824b27273034f69ddf0369b907de1e" 153 | }, 154 | { 155 | "version": 1, 156 | "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" 157 | }, 158 | { 159 | "version": 1, 160 | "identifier": "07a1007d01053baeb0aa798f88e1a0f2" 161 | }, 162 | { 163 | "version": 1, 164 | "identifier": "f4c597fd0865329ca22e1ab30e0adf33" 165 | }, 166 | { 167 | "version": 1, 168 | "identifier": "5d2e3793fc6638e98507610a19a6079f" 169 | }, 170 | { 171 | "version": 1, 172 | "identifier": "18da1234f4cb339d8e563665adcbaae1" 173 | }, 174 | { 175 | "version": 1, 176 | "identifier": "f170065c69273b4092cf73ffe2fb8a49" 177 | }, 178 | { 179 | "version": 1, 180 | "identifier": "f4017d1d98933b659d6b0e2811c22a1c" 181 | }, 182 | { 183 | "version": 1, 184 | "identifier": "63612b5f0daf3724b30428ee6a390e86" 185 | }, 186 | { 187 | "version": 1, 188 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 189 | }, 190 | { 191 | "version": 1, 192 | "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" 193 | }, 194 | { 195 | "version": 1, 196 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 197 | }, 198 | { 199 | "version": 1, 200 | "identifier": "9f1304b5ef553bb99291561ab5fdccba" 201 | } 202 | ] 203 | -------------------------------------------------------------------------------- /alluxio/posix/delegateFs.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | import threading 4 | 5 | import fsspec 6 | 7 | from alluxio.posix.config import ConfigManager 8 | from alluxio.posix.const import Constants 9 | from alluxio.posix.exception import UnsupportedDelegateFileSystemError 10 | from alluxio.posix.ufs.alluxio import ALLUXIO_BACKUP_FS 11 | from alluxio.posix.ufs.alluxio import ALLUXIO_ENABLE 12 | from alluxio.posix.ufs.alluxio import ALLUXIO_ETCD_ENABLE 13 | from alluxio.posix.ufs.alluxio import ALLUXIO_ETCD_HOST 14 | from alluxio.posix.ufs.oss import OSS_ACCESS_KEY_ID 15 | from alluxio.posix.ufs.oss import OSS_ACCESS_KEY_SECRET 16 | from alluxio.posix.ufs.oss import OSS_ENDPOINT 17 | 18 | 19 | class DelegateFileSystem: 20 | instance = None 21 | 22 | def __init__(self, config_manager: ConfigManager): 23 | self.config_manager = config_manager 24 | self.filesystem_storage = FSStorage() 25 | self.filesystem_storage.data = {} 26 | self.enableFileSystems = [ 27 | Constants.OSS_FILESYSTEM_TYPE, 28 | Constants.ALLUXIO_FILESYSTEM_TYPE, 29 | Constants.S3_FILESYSTEM_TYPE, 30 | ] 31 | self.__init__file__system() 32 | DelegateFileSystem.instance = self 33 | 34 | def __create__file__system(self, fs_name: str): 35 | config = self.config_manager.get_config(fs_name) 36 | if fs_name not in self.enableFileSystems: 37 | raise UnsupportedDelegateFileSystemError( 38 | f"Unsupported file system: {fs_name}" 39 | ) 40 | if config[ALLUXIO_ENABLE]: 41 | fs_name = ( 42 | Constants.ALLUXIO_FILESYSTEM_TYPE 43 | + Constants.ALLUXIO_SEP_SIGN 44 | + fs_name 45 | + Constants.ALLUXIO_SEP_SIGN 46 | + config[Constants.BUCKET_NAME] 47 | ) 48 | if config.get(ALLUXIO_ETCD_ENABLE): 49 | self.filesystem_storage.fs[fs_name] = fsspec.filesystem( 50 | Constants.ALLUXIO_FILESYSTEM_TYPE, 51 | etcd_hosts=config[ALLUXIO_ETCD_HOST], 52 | etcd_port=2379, 53 | target_protocol=config[ALLUXIO_BACKUP_FS], 54 | ) 55 | return self.filesystem_storage.fs[fs_name] 56 | else: 57 | logging.error( 58 | "Failed to create Alluxio filesystem, using the default %s filesystem.", 59 | fs_name, 60 | ) 61 | if fs_name == Constants.OSS_FILESYSTEM_TYPE: 62 | self.filesystem_storage.fs[fs_name] = fsspec.filesystem( 63 | Constants.OSS_FILESYSTEM_TYPE, 64 | key=config[OSS_ACCESS_KEY_ID], 65 | secret=config[OSS_ACCESS_KEY_SECRET], 66 | endpoint=config[OSS_ENDPOINT], 67 | ) 68 | return self.filesystem_storage.fs[fs_name] 69 | elif fs_name == Constants.S3_FILESYSTEM_TYPE: 70 | # todo:新增s3FileSystem 71 | raise NotImplementedError 72 | 73 | return None 74 | 75 | def get_file_system(self, path: str): 76 | fs_name, bucket = self.__parse__url(path) 77 | if fs_name == Constants.LOCAL_FILESYSTEM_TYPE: 78 | return None 79 | config = self.config_manager.get_config(fs_name) 80 | if config[ALLUXIO_ENABLE]: 81 | fs_name = ( 82 | Constants.ALLUXIO_FILESYSTEM_TYPE 83 | + Constants.ALLUXIO_SEP_SIGN 84 | + fs_name 85 | + Constants.ALLUXIO_SEP_SIGN 86 | + config[Constants.BUCKET_NAME] 87 | ) 88 | if hasattr(self.filesystem_storage, fs_name): 89 | return self.filesystem_storage.fs[fs_name] 90 | else: 91 | self.__create__file__system(fs_name) 92 | return self.filesystem_storage.fs[fs_name] 93 | 94 | def __init__file__system(self): 95 | fs_list = self.config_manager.get_config_fs_list() 96 | for fs_name in fs_list: 97 | self.__create__file__system(fs_name) 98 | 99 | def __parse__url(self, path: str): 100 | # parse the schema and bucket name in filepath 101 | if (type(path) is not str) or (path.startswith("/")): 102 | return Constants.LOCAL_FILESYSTEM_TYPE, None 103 | pattern = re.compile(r"^(\w+)://([^/]+)/.*") 104 | match = pattern.match(path) 105 | if match: 106 | fs_name, bucket_name = match.groups() 107 | # Check whether the file system corresponding to the path is supported 108 | if fs_name.lower() in self.enableFileSystems: 109 | return fs_name, bucket_name 110 | else: 111 | raise UnsupportedDelegateFileSystemError( 112 | f"Unsupported file system: {fs_name}" 113 | ) 114 | else: 115 | return Constants.LOCAL_FILESYSTEM_TYPE, None 116 | 117 | 118 | class FSStorage(threading.local): 119 | def __init__(self): 120 | self.fs = {} 121 | 122 | def __getitem__(self, key): 123 | return self.fs[key] 124 | 125 | def __setitem__(self, key, value): 126 | self.fs[key] = value 127 | 128 | def __delitem__(self, key): 129 | del self.fs[key] 130 | 131 | def __contains__(self, key): 132 | return key in self.fs 133 | 134 | def get(self, key, default=None): 135 | return self.fs.get(key, default) 136 | -------------------------------------------------------------------------------- /tests/hash_res/fileUrlWorkers.json: -------------------------------------------------------------------------------- 1 | { 2 | "s3://ai-ref-arch/yelp-review/yelp_academic_dataset_checkin.json": [ 3 | { 4 | "version": 1, 5 | "identifier": "f4c597fd0865329ca22e1ab30e0adf33" 6 | }, 7 | { 8 | "version": 1, 9 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 10 | }, 11 | { 12 | "version": 1, 13 | "identifier": "55da12e6f8ea35059e783843d4733281" 14 | }, 15 | { 16 | "version": 1, 17 | "identifier": "acf3522e534a35c2a811716eeb29c22e" 18 | }, 19 | { 20 | "version": 1, 21 | "identifier": "18da1234f4cb339d8e563665adcbaae1" 22 | } 23 | ], 24 | "gcs://bucket/file.txt": [ 25 | { 26 | "version": 1, 27 | "identifier": "ce88e51340d034eb95942143f1d177dd" 28 | }, 29 | { 30 | "version": 1, 31 | "identifier": "d53648c23d853765a0662482a16a4ca3" 32 | }, 33 | { 34 | "version": 1, 35 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 36 | }, 37 | { 38 | "version": 1, 39 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 40 | }, 41 | { 42 | "version": 1, 43 | "identifier": "d69c0d9d83d7348bbf3ff323a450babc" 44 | } 45 | ], 46 | "s://ai-testing/": [ 47 | { 48 | "version": 1, 49 | "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" 50 | }, 51 | { 52 | "version": 1, 53 | "identifier": "ed014f952f6c3e1ca0d5a479e4141456" 54 | }, 55 | { 56 | "version": 1, 57 | "identifier": "acf3522e534a35c2a811716eeb29c22e" 58 | }, 59 | { 60 | "version": 1, 61 | "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" 62 | }, 63 | { 64 | "version": 1, 65 | "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" 66 | } 67 | ], 68 | "s3://ai-ref-arch/yelp-review/yelp_academic_dataset_review.json": [ 69 | { 70 | "version": 1, 71 | "identifier": "d53648c23d853765a0662482a16a4ca3" 72 | }, 73 | { 74 | "version": 1, 75 | "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" 76 | }, 77 | { 78 | "version": 1, 79 | "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" 80 | }, 81 | { 82 | "version": 1, 83 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 84 | }, 85 | { 86 | "version": 1, 87 | "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" 88 | } 89 | ], 90 | "s3://bucket/path/to/dir": [ 91 | { 92 | "version": 1, 93 | "identifier": "464294a547843ecca2be5a615b1397c0" 94 | }, 95 | { 96 | "version": 1, 97 | "identifier": "0fcc8d949efd3ed594bec8171d131a4e" 98 | }, 99 | { 100 | "version": 1, 101 | "identifier": "d71a716e22ef33e4baa94336282c6f3b" 102 | }, 103 | { 104 | "version": 1, 105 | "identifier": "e4d8ef54fd3834628e07fd42e79978b3" 106 | }, 107 | { 108 | "version": 1, 109 | "identifier": "5d2e3793fc6638e98507610a19a6079f" 110 | } 111 | ], 112 | "hdfs://host:port/path/to/file": [ 113 | { 114 | "version": 1, 115 | "identifier": "b785d1f098863821a4eb6bee226c35b7" 116 | }, 117 | { 118 | "version": 1, 119 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 120 | }, 121 | { 122 | "version": 1, 123 | "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" 124 | }, 125 | { 126 | "version": 1, 127 | "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" 128 | }, 129 | { 130 | "version": 1, 131 | "identifier": "959e6e70afa63804bf6f6f003ab54adb" 132 | } 133 | ], 134 | "wasbs://container@account.blob.core.windows.net/dir": [ 135 | { 136 | "version": 1, 137 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 138 | }, 139 | { 140 | "version": 1, 141 | "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" 142 | }, 143 | { 144 | "version": 1, 145 | "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" 146 | }, 147 | { 148 | "version": 1, 149 | "identifier": "d71a716e22ef33e4baa94336282c6f3b" 150 | }, 151 | { 152 | "version": 1, 153 | "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" 154 | } 155 | ], 156 | "gs://bucket/file.txt": [ 157 | { 158 | "version": 1, 159 | "identifier": "35a44a2979bb392b86a4d51b44b666c7" 160 | }, 161 | { 162 | "version": 1, 163 | "identifier": "acf3522e534a35c2a811716eeb29c22e" 164 | }, 165 | { 166 | "version": 1, 167 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 168 | }, 169 | { 170 | "version": 1, 171 | "identifier": "b065a65d21613f30b210668f36b99865" 172 | }, 173 | { 174 | "version": 1, 175 | "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" 176 | } 177 | ], 178 | "s3://ai-ref-arch/yelp-review/yelp_review_sample_large.csv": [ 179 | { 180 | "version": 1, 181 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 182 | }, 183 | { 184 | "version": 1, 185 | "identifier": "d53648c23d853765a0662482a16a4ca3" 186 | }, 187 | { 188 | "version": 1, 189 | "identifier": "1ae5694808e43da58ba98374c7e51190" 190 | }, 191 | { 192 | "version": 1, 193 | "identifier": "e4d8ef54fd3834628e07fd42e79978b3" 194 | }, 195 | { 196 | "version": 1, 197 | "identifier": "d69c0d9d83d7348bbf3ff323a450babc" 198 | } 199 | ], 200 | "s3://ai-ref-arch/yelp-review/model.pt": [ 201 | { 202 | "version": 1, 203 | "identifier": "b43de09a152b3d42ba0f628e69742db3" 204 | }, 205 | { 206 | "version": 1, 207 | "identifier": "35a44a2979bb392b86a4d51b44b666c7" 208 | }, 209 | { 210 | "version": 1, 211 | "identifier": "b785d1f098863821a4eb6bee226c35b7" 212 | }, 213 | { 214 | "version": 1, 215 | "identifier": "1ae5694808e43da58ba98374c7e51190" 216 | }, 217 | { 218 | "version": 1, 219 | "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" 220 | } 221 | ], 222 | "s3://ai-ref-arch/yelp-review/yelp_academic_dataset_business.json": [ 223 | { 224 | "version": 1, 225 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 226 | }, 227 | { 228 | "version": 1, 229 | "identifier": "18da1234f4cb339d8e563665adcbaae1" 230 | }, 231 | { 232 | "version": 1, 233 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 234 | }, 235 | { 236 | "version": 1, 237 | "identifier": "b43de09a152b3d42ba0f628e69742db3" 238 | }, 239 | { 240 | "version": 1, 241 | "identifier": "55da12e6f8ea35059e783843d4733281" 242 | } 243 | ], 244 | "hdfs://namenode:8020/user/hadoop/file.txt": [ 245 | { 246 | "version": 1, 247 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 248 | }, 249 | { 250 | "version": 1, 251 | "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" 252 | }, 253 | { 254 | "version": 1, 255 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 256 | }, 257 | { 258 | "version": 1, 259 | "identifier": "63612b5f0daf3724b30428ee6a390e86" 260 | }, 261 | { 262 | "version": 1, 263 | "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" 264 | } 265 | ], 266 | "gs://bucket/dir": [ 267 | { 268 | "version": 1, 269 | "identifier": "b065a65d21613f30b210668f36b99865" 270 | }, 271 | { 272 | "version": 1, 273 | "identifier": "065141ec74bd3ad8bff25e7bc18408be" 274 | }, 275 | { 276 | "version": 1, 277 | "identifier": "07a1007d01053baeb0aa798f88e1a0f2" 278 | }, 279 | { 280 | "version": 1, 281 | "identifier": "1ae5694808e43da58ba98374c7e51190" 282 | }, 283 | { 284 | "version": 1, 285 | "identifier": "ff552a31dc9c31dfaae746915261cc0f" 286 | } 287 | ], 288 | "gcs://bucket/dir": [ 289 | { 290 | "version": 1, 291 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 292 | }, 293 | { 294 | "version": 1, 295 | "identifier": "18da1234f4cb339d8e563665adcbaae1" 296 | }, 297 | { 298 | "version": 1, 299 | "identifier": "f4017d1d98933b659d6b0e2811c22a1c" 300 | }, 301 | { 302 | "version": 1, 303 | "identifier": "d69c0d9d83d7348bbf3ff323a450babc" 304 | }, 305 | { 306 | "version": 1, 307 | "identifier": "ed014f952f6c3e1ca0d5a479e4141456" 308 | } 309 | ], 310 | "s3://ai-ref-arch/yelp-review/yelp_academic_dataset_user.json": [ 311 | { 312 | "version": 1, 313 | "identifier": "ed014f952f6c3e1ca0d5a479e4141456" 314 | }, 315 | { 316 | "version": 1, 317 | "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" 318 | }, 319 | { 320 | "version": 1, 321 | "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" 322 | }, 323 | { 324 | "version": 1, 325 | "identifier": "b785d1f098863821a4eb6bee226c35b7" 326 | }, 327 | { 328 | "version": 1, 329 | "identifier": "d53648c23d853765a0662482a16a4ca3" 330 | } 331 | ], 332 | "s3://bucket/path/to/file": [ 333 | { 334 | "version": 1, 335 | "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" 336 | }, 337 | { 338 | "version": 1, 339 | "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" 340 | }, 341 | { 342 | "version": 1, 343 | "identifier": "b43de09a152b3d42ba0f628e69742db3" 344 | }, 345 | { 346 | "version": 1, 347 | "identifier": "464294a547843ecca2be5a615b1397c0" 348 | }, 349 | { 350 | "version": 1, 351 | "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" 352 | } 353 | ], 354 | "hdfs://host:port/path/to/dir": [ 355 | { 356 | "version": 1, 357 | "identifier": "d71a716e22ef33e4baa94336282c6f3b" 358 | }, 359 | { 360 | "version": 1, 361 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 362 | }, 363 | { 364 | "version": 1, 365 | "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" 366 | }, 367 | { 368 | "version": 1, 369 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 370 | }, 371 | { 372 | "version": 1, 373 | "identifier": "d69c0d9d83d7348bbf3ff323a450babc" 374 | } 375 | ], 376 | "s3://ai-ref-arch/yelp-review/yelp_academic_dataset_tip.json": [ 377 | { 378 | "version": 1, 379 | "identifier": "5d2e3793fc6638e98507610a19a6079f" 380 | }, 381 | { 382 | "version": 1, 383 | "identifier": "d9824b27273034f69ddf0369b907de1e" 384 | }, 385 | { 386 | "version": 1, 387 | "identifier": "35a44a2979bb392b86a4d51b44b666c7" 388 | }, 389 | { 390 | "version": 1, 391 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 392 | }, 393 | { 394 | "version": 1, 395 | "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" 396 | } 397 | ], 398 | "hdfs://namenode:8020/user/hadoop/dir": [ 399 | { 400 | "version": 1, 401 | "identifier": "865668aadaba3511a84d8054613dd14b" 402 | }, 403 | { 404 | "version": 1, 405 | "identifier": "e4d8ef54fd3834628e07fd42e79978b3" 406 | }, 407 | { 408 | "version": 1, 409 | "identifier": "b065a65d21613f30b210668f36b99865" 410 | }, 411 | { 412 | "version": 1, 413 | "identifier": "aacf21dd9eb537eeb332c48248e79865" 414 | }, 415 | { 416 | "version": 1, 417 | "identifier": "b785d1f098863821a4eb6bee226c35b7" 418 | } 419 | ], 420 | "s3://ai-ref-arch/yelp-review/yelp_review_sample.csv": [ 421 | { 422 | "version": 1, 423 | "identifier": "ff552a31dc9c31dfaae746915261cc0f" 424 | }, 425 | { 426 | "version": 1, 427 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 428 | }, 429 | { 430 | "version": 1, 431 | "identifier": "aacf21dd9eb537eeb332c48248e79865" 432 | }, 433 | { 434 | "version": 1, 435 | "identifier": "0fcc8d949efd3ed594bec8171d131a4e" 436 | }, 437 | { 438 | "version": 1, 439 | "identifier": "f4017d1d98933b659d6b0e2811c22a1c" 440 | } 441 | ], 442 | "wasbs://container@account.blob.core.windows.net/file.txt": [ 443 | { 444 | "version": 1, 445 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 446 | }, 447 | { 448 | "version": 1, 449 | "identifier": "63612b5f0daf3724b30428ee6a390e86" 450 | }, 451 | { 452 | "version": 1, 453 | "identifier": "acf3522e534a35c2a811716eeb29c22e" 454 | }, 455 | { 456 | "version": 1, 457 | "identifier": "865668aadaba3511a84d8054613dd14b" 458 | }, 459 | { 460 | "version": 1, 461 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 462 | } 463 | ] 464 | } 465 | -------------------------------------------------------------------------------- /alluxio/worker_ring.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import random 4 | import threading 5 | import time 6 | import uuid 7 | from dataclasses import dataclass 8 | from typing import List 9 | from typing import Set 10 | 11 | import etcd3 12 | import mmh3 13 | from sortedcontainers import SortedDict 14 | 15 | from .const import ALLUXIO_CLUSTER_NAME_DEFAULT_VALUE 16 | from .const import ALLUXIO_CLUSTER_NAME_KEY 17 | from .const import ALLUXIO_ETCD_PASSWORD_KEY 18 | from .const import ALLUXIO_ETCD_USERNAME_KEY 19 | from .const import ALLUXIO_WORKER_HTTP_SERVER_PORT_DEFAULT_VALUE 20 | from .const import ETCD_PREFIX_FORMAT 21 | 22 | DEFAULT_HOST = "localhost" 23 | DEFAULT_CONTAINER_HOST = "" 24 | DEFAULT_RPC_PORT = 29999 25 | DEFAULT_DATA_PORT = 29997 26 | DEFAULT_SECURE_RPC_PORT = 0 27 | DEFAULT_NETTY_DATA_PORT = 29997 28 | DEFAULT_WEB_PORT = 30000 29 | DEFAULT_DOMAIN_SOCKET_PATH = "" 30 | DEFAULT_WORKER_IDENTIFIER_VERSION = 1 31 | 32 | 33 | @dataclass(frozen=True) 34 | class WorkerNetAddress: 35 | host: str = DEFAULT_HOST 36 | container_host: str = DEFAULT_CONTAINER_HOST 37 | rpc_port: int = DEFAULT_RPC_PORT 38 | data_port: int = DEFAULT_DATA_PORT 39 | secure_rpc_port: int = DEFAULT_SECURE_RPC_PORT 40 | netty_data_port: int = DEFAULT_NETTY_DATA_PORT 41 | web_port: int = DEFAULT_WEB_PORT 42 | domain_socket_path: str = DEFAULT_DOMAIN_SOCKET_PATH 43 | http_server_port: int = ALLUXIO_WORKER_HTTP_SERVER_PORT_DEFAULT_VALUE 44 | 45 | 46 | @dataclass(frozen=True) 47 | class WorkerIdentity: 48 | version: int 49 | identifier: bytes 50 | 51 | 52 | class NULL_NAMESPACE: 53 | bytes = b"" 54 | 55 | 56 | @dataclass(frozen=True) 57 | class WorkerEntity: 58 | worker_identity: WorkerIdentity 59 | worker_net_address: WorkerNetAddress 60 | 61 | @staticmethod 62 | def from_worker_info(worker_info): 63 | try: 64 | worker_info_string = worker_info.decode("utf-8") 65 | worker_info_json = json.loads(worker_info_string) 66 | identity_info = worker_info_json.get("Identity", {}) 67 | worker_identity = WorkerIdentity( 68 | version=int(identity_info.get("version")), 69 | identifier=bytes.fromhex(identity_info.get("identifier")), 70 | ) 71 | 72 | worker_net_address_info = worker_info_json.get( 73 | "WorkerNetAddress", {} 74 | ) 75 | worker_net_address = WorkerNetAddress( 76 | host=worker_net_address_info.get("Host", DEFAULT_HOST), 77 | container_host=worker_net_address_info.get( 78 | "ContainerHost", DEFAULT_CONTAINER_HOST 79 | ), 80 | rpc_port=worker_net_address_info.get( 81 | "RpcPort", DEFAULT_RPC_PORT 82 | ), 83 | data_port=worker_net_address_info.get( 84 | "DataPort", DEFAULT_DATA_PORT 85 | ), 86 | secure_rpc_port=worker_net_address_info.get( 87 | "SecureRpcPort", DEFAULT_SECURE_RPC_PORT 88 | ), 89 | netty_data_port=worker_net_address_info.get( 90 | "NettyDataPort", DEFAULT_NETTY_DATA_PORT 91 | ), 92 | web_port=worker_net_address_info.get( 93 | "WebPort", DEFAULT_WEB_PORT 94 | ), 95 | domain_socket_path=worker_net_address_info.get( 96 | "DomainSocketPath", 97 | DEFAULT_DOMAIN_SOCKET_PATH, 98 | ), 99 | http_server_port=worker_net_address_info.get( 100 | "HttpServerPort", 101 | ALLUXIO_WORKER_HTTP_SERVER_PORT_DEFAULT_VALUE, 102 | ), 103 | ) 104 | return WorkerEntity(worker_identity, worker_net_address) 105 | except json.JSONDecodeError as e: 106 | raise ValueError( 107 | f"Provided worker_info is not a valid JSON string {e}" 108 | ) from e 109 | except AttributeError as e: 110 | raise AttributeError( 111 | f"Provided worker_info must be a bytes-like object {e}" 112 | ) from e 113 | except Exception as e: 114 | raise Exception( 115 | f"Failed to process given worker_info {worker_info} {e}" 116 | ) from e 117 | 118 | @staticmethod 119 | def from_host_and_port(worker_host, worker_http_port): 120 | worker_uuid = uuid.uuid3(NULL_NAMESPACE, worker_host) 121 | uuid_bytes = worker_uuid.bytes 122 | 123 | worker_identity = WorkerIdentity( 124 | DEFAULT_WORKER_IDENTIFIER_VERSION, uuid_bytes 125 | ) 126 | worker_net_address = WorkerNetAddress( 127 | host=worker_host, 128 | container_host=DEFAULT_CONTAINER_HOST, 129 | rpc_port=DEFAULT_RPC_PORT, 130 | data_port=DEFAULT_DATA_PORT, 131 | secure_rpc_port=DEFAULT_SECURE_RPC_PORT, 132 | netty_data_port=DEFAULT_NETTY_DATA_PORT, 133 | web_port=DEFAULT_WEB_PORT, 134 | domain_socket_path=DEFAULT_DOMAIN_SOCKET_PATH, 135 | http_server_port=worker_http_port, 136 | ) 137 | return WorkerEntity(worker_identity, worker_net_address) 138 | 139 | 140 | class EtcdClient: 141 | def __init__(self, host="localhost", port=2379, options=None): 142 | self._host = host 143 | self._port = port 144 | 145 | # Parse options 146 | self._etcd_username = None 147 | self._etcd_password = None 148 | self._prefix = ETCD_PREFIX_FORMAT.format( 149 | cluster_name=ALLUXIO_CLUSTER_NAME_DEFAULT_VALUE 150 | ) 151 | if options: 152 | if ALLUXIO_ETCD_USERNAME_KEY in options: 153 | self._etcd_username = options[ALLUXIO_ETCD_USERNAME_KEY] 154 | if ALLUXIO_ETCD_PASSWORD_KEY in options: 155 | self._etcd_password = options[ALLUXIO_ETCD_PASSWORD_KEY] 156 | if ALLUXIO_CLUSTER_NAME_KEY in options: 157 | self._prefix = ETCD_PREFIX_FORMAT.format( 158 | cluster_name=options[ALLUXIO_CLUSTER_NAME_KEY] 159 | ) 160 | 161 | if (self._etcd_username is None) != (self._etcd_password is None): 162 | raise ValueError( 163 | "Both ETCD username and password must be set or both should be unset." 164 | ) 165 | 166 | def get_worker_entities(self) -> Set[WorkerEntity]: 167 | """ 168 | Retrieve worker entities from etcd using the specified prefix. 169 | 170 | Returns: 171 | set: A set of WorkerEntity objects. 172 | """ 173 | # Note that EtcdClient should not be passed through python multiprocessing 174 | etcd = self._get_etcd_client() 175 | worker_entities: Set[WorkerEntity] = set() 176 | try: 177 | worker_entities = { 178 | WorkerEntity.from_worker_info(worker_info) 179 | for worker_info, _ in etcd.get_prefix(self._prefix) 180 | } 181 | except Exception as e: 182 | raise Exception( 183 | f"Failed to achieve worker info list from ETCD server {self._host}:{self._port} {e}" 184 | ) from e 185 | 186 | if not worker_entities: 187 | # TODO(lu) deal with the alluxio cluster initializing issue 188 | raise Exception( 189 | "Alluxio cluster may still be initializing. No worker registered" 190 | ) 191 | return worker_entities 192 | 193 | def _get_etcd_client(self): 194 | if self._etcd_username: 195 | return etcd3.client( 196 | host=self._host, 197 | port=self._port, 198 | user=self._etcd_username, 199 | password=self._etcd_password, 200 | ) 201 | return etcd3.client(host=self._host, port=self._port) 202 | 203 | 204 | class ConsistentHashProvider: 205 | def __init__( 206 | self, 207 | etcd_hosts=None, 208 | etcd_port=None, 209 | worker_hosts=None, 210 | worker_http_port=None, 211 | options=None, 212 | logger=None, 213 | etcd_refresh_workers_interval=None, 214 | hash_node_per_worker=None, 215 | max_attempts=100, 216 | ): 217 | self._logger = logger or logging.getLogger("ConsistentHashProvider") 218 | self._etcd_hosts = etcd_hosts 219 | self._etcd_port = etcd_port 220 | self._options = options 221 | self._hash_node_per_worker = hash_node_per_worker 222 | self._max_attempts = max_attempts 223 | self._lock = threading.Lock() 224 | self._is_ring_initialized = False 225 | self._worker_info_map = {} 226 | self._etcd_refresh_workers_interval = etcd_refresh_workers_interval 227 | if worker_hosts: 228 | self._update_hash_ring( 229 | self._generate_worker_info_map(worker_hosts, worker_http_port) 230 | ) 231 | if self._etcd_hosts: 232 | self._fetch_workers_and_update_ring() 233 | if self._etcd_refresh_workers_interval > 0: 234 | self._shutdown_background_update_ring_event = threading.Event() 235 | self._background_thread = None 236 | self._start_background_update_ring( 237 | self._etcd_refresh_workers_interval 238 | ) 239 | 240 | def get_multiple_workers( 241 | self, key: str, count: int 242 | ) -> List[WorkerNetAddress]: 243 | """ 244 | Retrieve a specified number of worker addresses based on a given key. 245 | 246 | Args: 247 | key (str): The unique path identifier, e.g., full UFS path. 248 | count (int): The number of worker addresses to retrieve. 249 | 250 | Returns: 251 | List[WorkerNetAddress]: A list containing the desired number of WorkerNetAddress objects. 252 | """ 253 | with self._lock: 254 | worker_identities = self._get_multiple_worker_identities( 255 | key, count 256 | ) 257 | worker_addresses = [] 258 | for worker_identity in worker_identities: 259 | worker_address = self._worker_info_map.get(worker_identity) 260 | if worker_address: 261 | worker_addresses.append(worker_address) 262 | return worker_addresses 263 | 264 | def _get_multiple_worker_identities( 265 | self, key: str, count: int 266 | ) -> List[WorkerIdentity]: 267 | """ 268 | This method needs external lock to ensure safety 269 | """ 270 | count = ( 271 | len(self._worker_info_map) 272 | if count >= len(self._worker_info_map) 273 | else count 274 | ) 275 | workers = [] 276 | attempts = 0 277 | while len(workers) < count and attempts < self._max_attempts: 278 | attempts += 1 279 | worker = self._get_ceiling_value(self._hash(key, attempts)) 280 | if worker not in workers: 281 | workers.append(worker) 282 | 283 | return workers 284 | 285 | def _start_background_update_ring(self, interval): 286 | def update_loop(): 287 | while not self._shutdown_background_update_ring_event.is_set(): 288 | try: 289 | self._fetch_workers_and_update_ring() 290 | except Exception as e: 291 | self._logger.error(f"Error updating worker hash ring: {e}") 292 | time.sleep(interval) 293 | 294 | self._background_thread = threading.Thread(target=update_loop) 295 | self._background_thread.daemon = True 296 | self._background_thread.start() 297 | 298 | def shutdown_background_update_ring(self): 299 | if self._etcd_hosts and self._etcd_refresh_workers_interval > 0: 300 | self._shutdown_background_update_ring_event.set() 301 | if self._background_thread: 302 | self._background_thread.join() 303 | 304 | def __del__(self): 305 | self.shutdown_background_update_ring() 306 | 307 | def _fetch_workers_and_update_ring(self): 308 | etcd_hosts_list = self._etcd_hosts.split(",") 309 | random.shuffle(etcd_hosts_list) 310 | worker_entities: Set[WorkerEntity] = set() 311 | for host in etcd_hosts_list: 312 | try: 313 | worker_entities = EtcdClient( 314 | host=host, port=self._etcd_port, options=self._options 315 | ).get_worker_entities() 316 | break 317 | except Exception as e: 318 | continue 319 | if not worker_entities: 320 | if self._is_ring_initialized: 321 | self._logger.info( 322 | f"Failed to achieve worker info list from ETCD servers:{self._etcd_hosts}" 323 | ) 324 | return 325 | else: 326 | raise Exception( 327 | f"Failed to achieve worker info list from ETCD servers:{self._etcd_hosts}" 328 | ) 329 | 330 | worker_info_map = {} 331 | diff_in_worker_info_detected = False 332 | for worker_entity in worker_entities: 333 | worker_info_map[ 334 | worker_entity.worker_identity 335 | ] = worker_entity.worker_net_address 336 | if worker_entity.worker_identity not in self._worker_info_map: 337 | diff_in_worker_info_detected = True 338 | elif ( 339 | self._worker_info_map[worker_entity.worker_identity] 340 | != worker_entity.worker_net_address 341 | ): 342 | diff_in_worker_info_detected = True 343 | 344 | if len(worker_info_map) != len(self._worker_info_map): 345 | diff_in_worker_info_detected = True 346 | 347 | if diff_in_worker_info_detected: 348 | self._update_hash_ring(worker_info_map) 349 | 350 | def _update_hash_ring( 351 | self, worker_info_map: dict[WorkerIdentity, WorkerNetAddress] 352 | ): 353 | with self._lock: 354 | hash_ring = SortedDict() 355 | for worker_identity in worker_info_map.keys(): 356 | for i in range(self._hash_node_per_worker): 357 | hash_key = self._hash_worker_identity(worker_identity, i) 358 | hash_ring[hash_key] = worker_identity 359 | self.hash_ring = hash_ring 360 | self._worker_info_map = worker_info_map 361 | self._is_ring_initialized = True 362 | 363 | def _get_ceiling_value(self, hash_key: int): 364 | key_index = self.hash_ring.bisect_right(hash_key) 365 | if key_index < len(self.hash_ring): 366 | ceiling_key = self.hash_ring.keys()[key_index] 367 | ceiling_value = self.hash_ring[ceiling_key] 368 | return ceiling_value 369 | else: 370 | return self.hash_ring.peekitem(0)[1] 371 | 372 | def _hash(self, key: str, index: int) -> int: 373 | hasher = mmh3.mmh3_32() 374 | hasher.update(key.encode("utf-8")) 375 | hasher.update(index.to_bytes(4, "little")) 376 | return hasher.sintdigest() 377 | 378 | def _hash_worker_identity( 379 | self, worker: WorkerIdentity, node_index: int 380 | ) -> int: 381 | # Hash the combined bytes 382 | hasher = mmh3.mmh3_32() 383 | hasher.update(worker.identifier) 384 | hasher.update(worker.version.to_bytes(4, "little")) 385 | hasher.update(node_index.to_bytes(4, "little")) 386 | return hasher.sintdigest() 387 | 388 | def _generate_worker_info_map(self, worker_hosts, worker_http_port): 389 | worker_info_map = {} 390 | host_list = [host.strip() for host in worker_hosts.split(",")] 391 | for worker_host in host_list: 392 | worker_entity = WorkerEntity.from_host_and_port( 393 | worker_host, worker_http_port 394 | ) 395 | worker_info_map[ 396 | worker_entity.worker_identity 397 | ] = worker_entity.worker_net_address 398 | return worker_info_map 399 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2017 Alluxio Open Foundation 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | 204 | ======================================================================= 205 | 206 | Alluxio Subcomponents: 207 | 208 | The Alluxio project contains subcomponents with separate copyright 209 | notices and license terms. Your use of the source code for the these 210 | subcomponents is subject to the terms and conditions of the following 211 | licenses. 212 | 213 | ----------------------------------------------------------------------- 214 | The Apache License 215 | ----------------------------------------------------------------------- 216 | 217 | Alluxio bundles portions of the following under the Apache License 2.0 as detailed 218 | above: 219 | 220 | - Bootstrap 2 (http://getbootstrap.com) - Copyright 2011-2015 Twitter Inc 221 | 222 | ----------------------------------------------------------------------- 223 | The MIT License 224 | ----------------------------------------------------------------------- 225 | 226 | Alluxio bundles portions of the following under the MIT License: 227 | 228 | - jQuery (http://jquery.com) - Copyright 2014 jQuery Foundation and other contributors 229 | - Popper (https://popper.js.org/) - Copyright (C) Federico Zivolo 2019 230 | 231 | Permission is hereby granted, free of charge, to any person obtaining a copy 232 | of this software and associated documentation files (the "Software"), to deal 233 | in the Software without restriction, including without limitation the rights 234 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 235 | copies of the Software, and to permit persons to whom the Software is 236 | furnished to do so, subject to the following conditions: 237 | 238 | The above copyright notice and this permission notice shall be included in 239 | all copies or substantial portions of the Software. 240 | 241 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 242 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 243 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 244 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 245 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 246 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 247 | THE SOFTWARE. 248 | 249 | ----------------------------------------------------------------------- 250 | The BSD 2-Clause License 251 | ----------------------------------------------------------------------- 252 | 253 | Alluxio bundles portions of the following under the BSD 2-Clause License: 254 | 255 | - Pygments (http://pygments.org) - Copyright 2006-2015 various contributors 256 | 257 | All rights reserved. 258 | 259 | Redistribution and use in source and binary forms, with or without 260 | modification, are permitted provided that the following conditions are 261 | met: 262 | 263 | * Redistributions of source code must retain the above copyright 264 | notice, this list of conditions and the following disclaimer. 265 | 266 | * Redistributions in binary form must reproduce the above copyright 267 | notice, this list of conditions and the following disclaimer in the 268 | documentation and/or other materials provided with the distribution. 269 | 270 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 271 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 272 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 273 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 274 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 275 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 276 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 277 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 278 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 279 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 280 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 281 | 282 | ----------------------------------------------------------------------- 283 | Eclipse Public License 284 | ----------------------------------------------------------------------- 285 | 286 | Alluxio depends on Jetty the following portions of which are under the Eclipse Public License 287 | 288 | - org.eclipse.jetty.orbit:org.eclipse.jdt.core (https://www.eclipse.org/jetty/) 289 | 290 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE ("AGREEMENT"). 291 | ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS 292 | AGREEMENT. 293 | 294 | 1. DEFINITIONS 295 | 296 | "Contribution" means: 297 | 298 | a) in the case of the initial Contributor, the initial code and documentation distributed under this 299 | Agreement, and 300 | 301 | b) in the case of each subsequent Contributor: 302 | 303 | i) changes to the Program, and 304 | 305 | ii) additions to the Program; 306 | 307 | where such changes and/or additions to the Program originate from and are distributed by that 308 | particular Contributor. A Contribution 'originates' from a Contributor if it was added to the 309 | Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do 310 | not include additions to the Program which: (i) are separate modules of software distributed in 311 | conjunction with the Program under their own license agreement, and (ii) are not derivative works of 312 | the Program. 313 | 314 | "Contributor" means any person or entity that distributes the Program. 315 | 316 | "Licensed Patents mean patent claims licensable by a Contributor which are necessarily infringed by 317 | "the use or sale of its Contribution alone or when combined with the Program. 318 | 319 | "Program" means the Contributions distributed in accordance with this Agreement. 320 | 321 | "Recipient" means anyone who receives the Program under this Agreement, including all Contributors. 322 | 323 | 2. GRANT OF RIGHTS 324 | 325 | a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, 326 | worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly 327 | display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, 328 | and such derivative works, in source code and object code form. 329 | 330 | b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, 331 | worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, 332 | import and otherwise transfer the Contribution of such Contributor, if any, in source code and 333 | object code form. This patent license shall apply to the combination of the Contribution and the 334 | Program if, at the time the Contribution is added by the Contributor, such addition of the 335 | Contribution causes such combination to be covered by the Licensed Patents. The patent license shall 336 | not apply to any other combinations which include the Contribution. No hardware per se is licensed 337 | hereunder. 338 | 339 | c) Recipient understands that although each Contributor grants the licenses to its Contributions set 340 | forth herein, no assurances are provided by any Contributor that the Program does not infringe the 341 | patent or other intellectual property rights of any other entity. Each Contributor disclaims any 342 | liability to Recipient for claims brought by any other entity based on infringement of intellectual 343 | property rights or otherwise. As a condition to exercising the rights and licenses granted 344 | hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual 345 | property rights needed, if any. For example, if a third party patent license is required to allow 346 | Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before 347 | distributing the Program. 348 | 349 | d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its 350 | Contribution, if any, to grant the copyright license set forth in this Agreement. 351 | 352 | 3. REQUIREMENTS 353 | 354 | A Contributor may choose to distribute the Program in object code form under its own license 355 | agreement, provided that: 356 | 357 | a) it complies with the terms and conditions of this Agreement; and 358 | 359 | b) its license agreement: 360 | 361 | i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and 362 | implied, including warranties or conditions of title and non-infringement, and implied warranties or 363 | conditions of merchantability and fitness for a particular purpose; 364 | 365 | ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, 366 | indirect, special, incidental and consequential damages, such as lost profits; 367 | 368 | iii) states that any provisions which differ from this Agreement are offered by that Contributor 369 | alone and not by any other party; and 370 | 371 | iv) states that source code for the Program is available from such Contributor, and informs 372 | licensees how to obtain it in a reasonable manner on or through a medium customarily used for 373 | software exchange. 374 | 375 | When the Program is made available in source code form: 376 | 377 | a) it must be made available under this Agreement; and 378 | 379 | b) a copy of this Agreement must be included with each copy of the Program. 380 | 381 | Contributors may not remove or alter any copyright notices contained within the Program. 382 | 383 | Each Contributor must identify itself as the originator of its Contribution, if any, in a manner 384 | that reasonably allows subsequent Recipients to identify the originator of the Contribution. 385 | 386 | 4. COMMERCIAL DISTRIBUTION 387 | 388 | Commercial distributors of software may accept certain responsibilities with respect to end users, 389 | business partners and the like. While this license is intended to facilitate the commercial use of 390 | the Program, the Contributor who includes the Program in a commercial product offering should do so 391 | in a manner which does not create potential liability for other Contributors. Therefore, if a 392 | Contributor includes the Program in a commercial product offering, such Contributor ("Commercial 393 | Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified 394 | Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, 395 | lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the 396 | extent caused by the acts or omissions of such Commercial Contributor in connection with its 397 | distribution of the Program in a commercial product offering. The obligations in this section do not 398 | apply to any claims or Losses relating to any actual or alleged intellectual property infringement. 399 | In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor 400 | in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the 401 | Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified 402 | Contributor may participate in any such claim at its own expense. 403 | 404 | For example, a Contributor might include the Program in a commercial product offering, Product X. 405 | That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes 406 | performance claims, or offers warranties related to Product X, those performance claims and 407 | warranties are such Commercial Contributor's responsibility alone. Under this section, the 408 | Commercial Contributor would have to defend claims against the other Contributors related to those 409 | performance claims and warranties, and if a court requires any other Contributor to pay any damages 410 | as a result, the Commercial Contributor must pay those damages. 411 | 412 | 5. NO WARRANTY 413 | 414 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, 415 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT 416 | LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR 417 | A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of 418 | using and distributing the Program and assumes all risks associated with its exercise of rights 419 | under this Agreement , including but not limited to the risks and costs of program errors, 420 | compliance with applicable laws, damage to or loss of data, programs or equipment, and 421 | unavailability or interruption of operations. 422 | 423 | 6. DISCLAIMER OF LIABILITY 424 | 425 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE 426 | ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 427 | (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 428 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF 429 | THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF 430 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 431 | 432 | 7. GENERAL 433 | 434 | If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not 435 | affect the validity or enforceability of the remainder of the terms of this Agreement, and without 436 | further action by the parties hereto, such provision shall be reformed to the minimum extent 437 | necessary to make such provision valid and enforceable. 438 | 439 | If Recipient institutes patent litigation against any entity (including a cross-claim or 440 | counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program 441 | with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights 442 | granted under Section 2(b) shall terminate as of the date such litigation is filed. 443 | 444 | All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the 445 | material terms or conditions of this Agreement and does not cure such failure in a reasonable period 446 | of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement 447 | terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably 448 | practicable. However, Recipient's obligations under this Agreement and any licenses granted by 449 | Recipient relating to the Program shall continue and survive. 450 | 451 | Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid 452 | inconsistency the Agreement is copyrighted and may only be modified in the following manner. The 453 | Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement 454 | from time to time. No one other than the Agreement Steward has the right to modify this Agreement. 455 | The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation may assign the 456 | responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of 457 | the Agreement will be given a distinguishing version number. The Program (including Contributions) 458 | may always be distributed subject to the version of the Agreement under which it was received. In 459 | addition, after a new version of the Agreement is published, Contributor may elect to distribute the 460 | Program (including its Contributions) under the new version. Except as expressly stated in Sections 461 | 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any 462 | Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All 463 | rights in the Program not expressly granted under this Agreement are reserved. 464 | 465 | This Agreement is governed by the laws of the State of New York and the intellectual property laws 466 | of the United States of America. No party to this Agreement will bring a legal action under this 467 | Agreement more than one year after the cause of action arose. Each party waives its rights to a jury 468 | trial in any resulting litigation. 469 | 470 | ----------------------------------------------------------------------- 471 | Public Domain 472 | ----------------------------------------------------------------------- 473 | 474 | Alluxio bundles portions of the following which are in the public domain: 475 | 476 | - Cookies.js (https://github.com/ScottHamper/Cookies/) 477 | 478 | 479 | Alluxio bundles JSR-166 classes which are donated to public domain. 480 | For details, see CC0 1.0 Universal (1.0), Public Domain Dedication, 481 | http://creativecommons.org/publicdomain/zero/1.0/ 482 | -------------------------------------------------------------------------------- /tests/hash_res/activeNodesMap.json: -------------------------------------------------------------------------------- 1 | { 2 | "-2115369034": { 3 | "version": 1, 4 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 5 | }, 6 | "-2091368727": { 7 | "version": 1, 8 | "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" 9 | }, 10 | "-2035703500": { 11 | "version": 1, 12 | "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" 13 | }, 14 | "-2020503163": { 15 | "version": 1, 16 | "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" 17 | }, 18 | "-2018838375": { 19 | "version": 1, 20 | "identifier": "d53648c23d853765a0662482a16a4ca3" 21 | }, 22 | "-2001073348": { 23 | "version": 1, 24 | "identifier": "5d2e3793fc6638e98507610a19a6079f" 25 | }, 26 | "-2001008823": { 27 | "version": 1, 28 | "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" 29 | }, 30 | "-2000498259": { 31 | "version": 1, 32 | "identifier": "ed014f952f6c3e1ca0d5a479e4141456" 33 | }, 34 | "-1998016219": { 35 | "version": 1, 36 | "identifier": "ed014f952f6c3e1ca0d5a479e4141456" 37 | }, 38 | "-1992280867": { 39 | "version": 1, 40 | "identifier": "ed014f952f6c3e1ca0d5a479e4141456" 41 | }, 42 | "-1949531595": { 43 | "version": 1, 44 | "identifier": "d53648c23d853765a0662482a16a4ca3" 45 | }, 46 | "-1945615066": { 47 | "version": 1, 48 | "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" 49 | }, 50 | "-1905409545": { 51 | "version": 1, 52 | "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" 53 | }, 54 | "-1898825753": { 55 | "version": 1, 56 | "identifier": "d71a716e22ef33e4baa94336282c6f3b" 57 | }, 58 | "-1880929873": { 59 | "version": 1, 60 | "identifier": "acf3522e534a35c2a811716eeb29c22e" 61 | }, 62 | "-1879245916": { 63 | "version": 1, 64 | "identifier": "55da12e6f8ea35059e783843d4733281" 65 | }, 66 | "-1874448919": { 67 | "version": 1, 68 | "identifier": "acf3522e534a35c2a811716eeb29c22e" 69 | }, 70 | "-1836289830": { 71 | "version": 1, 72 | "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" 73 | }, 74 | "-1807338973": { 75 | "version": 1, 76 | "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" 77 | }, 78 | "-1785695523": { 79 | "version": 1, 80 | "identifier": "5d2e3793fc6638e98507610a19a6079f" 81 | }, 82 | "-1644192530": { 83 | "version": 1, 84 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 85 | }, 86 | "-1640144922": { 87 | "version": 1, 88 | "identifier": "1ae5694808e43da58ba98374c7e51190" 89 | }, 90 | "-1621072852": { 91 | "version": 1, 92 | "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" 93 | }, 94 | "-1613871358": { 95 | "version": 1, 96 | "identifier": "865668aadaba3511a84d8054613dd14b" 97 | }, 98 | "-1607875238": { 99 | "version": 1, 100 | "identifier": "9f1304b5ef553bb99291561ab5fdccba" 101 | }, 102 | "-1603622913": { 103 | "version": 1, 104 | "identifier": "464294a547843ecca2be5a615b1397c0" 105 | }, 106 | "-1598860684": { 107 | "version": 1, 108 | "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" 109 | }, 110 | "-1591914451": { 111 | "version": 1, 112 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 113 | }, 114 | "-1582782076": { 115 | "version": 1, 116 | "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" 117 | }, 118 | "-1572608538": { 119 | "version": 1, 120 | "identifier": "065141ec74bd3ad8bff25e7bc18408be" 121 | }, 122 | "-1565343821": { 123 | "version": 1, 124 | "identifier": "35a44a2979bb392b86a4d51b44b666c7" 125 | }, 126 | "-1544884377": { 127 | "version": 1, 128 | "identifier": "f4017d1d98933b659d6b0e2811c22a1c" 129 | }, 130 | "-1533586579": { 131 | "version": 1, 132 | "identifier": "1ae5694808e43da58ba98374c7e51190" 133 | }, 134 | "-1502419178": { 135 | "version": 1, 136 | "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" 137 | }, 138 | "-1497415322": { 139 | "version": 1, 140 | "identifier": "f170065c69273b4092cf73ffe2fb8a49" 141 | }, 142 | "-1481119148": { 143 | "version": 1, 144 | "identifier": "55da12e6f8ea35059e783843d4733281" 145 | }, 146 | "-1460481702": { 147 | "version": 1, 148 | "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" 149 | }, 150 | "-1454067970": { 151 | "version": 1, 152 | "identifier": "63612b5f0daf3724b30428ee6a390e86" 153 | }, 154 | "-1428680219": { 155 | "version": 1, 156 | "identifier": "b065a65d21613f30b210668f36b99865" 157 | }, 158 | "-1402396211": { 159 | "version": 1, 160 | "identifier": "0fcc8d949efd3ed594bec8171d131a4e" 161 | }, 162 | "-1388188426": { 163 | "version": 1, 164 | "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" 165 | }, 166 | "-1361504644": { 167 | "version": 1, 168 | "identifier": "9f1304b5ef553bb99291561ab5fdccba" 169 | }, 170 | "-1353690830": { 171 | "version": 1, 172 | "identifier": "b43de09a152b3d42ba0f628e69742db3" 173 | }, 174 | "-1308819006": { 175 | "version": 1, 176 | "identifier": "aacf21dd9eb537eeb332c48248e79865" 177 | }, 178 | "-1274190161": { 179 | "version": 1, 180 | "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" 181 | }, 182 | "-1274184776": { 183 | "version": 1, 184 | "identifier": "70d7e21f675633c38b057731c646283b" 185 | }, 186 | "-1254602663": { 187 | "version": 1, 188 | "identifier": "63612b5f0daf3724b30428ee6a390e86" 189 | }, 190 | "-1242990879": { 191 | "version": 1, 192 | "identifier": "959e6e70afa63804bf6f6f003ab54adb" 193 | }, 194 | "-1209240677": { 195 | "version": 1, 196 | "identifier": "b43de09a152b3d42ba0f628e69742db3" 197 | }, 198 | "-1205533474": { 199 | "version": 1, 200 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 201 | }, 202 | "-1181002889": { 203 | "version": 1, 204 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 205 | }, 206 | "-1170547462": { 207 | "version": 1, 208 | "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" 209 | }, 210 | "-1136284210": { 211 | "version": 1, 212 | "identifier": "1ae5694808e43da58ba98374c7e51190" 213 | }, 214 | "-1131198756": { 215 | "version": 1, 216 | "identifier": "d71a716e22ef33e4baa94336282c6f3b" 217 | }, 218 | "-1108751870": { 219 | "version": 1, 220 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 221 | }, 222 | "-1085625064": { 223 | "version": 1, 224 | "identifier": "1ae5694808e43da58ba98374c7e51190" 225 | }, 226 | "-1072596604": { 227 | "version": 1, 228 | "identifier": "865668aadaba3511a84d8054613dd14b" 229 | }, 230 | "-1067521732": { 231 | "version": 1, 232 | "identifier": "ff552a31dc9c31dfaae746915261cc0f" 233 | }, 234 | "-1027139400": { 235 | "version": 1, 236 | "identifier": "18da1234f4cb339d8e563665adcbaae1" 237 | }, 238 | "-965000512": { 239 | "version": 1, 240 | "identifier": "f4017d1d98933b659d6b0e2811c22a1c" 241 | }, 242 | "-963201551": { 243 | "version": 1, 244 | "identifier": "b43de09a152b3d42ba0f628e69742db3" 245 | }, 246 | "-953941995": { 247 | "version": 1, 248 | "identifier": "ce88e51340d034eb95942143f1d177dd" 249 | }, 250 | "-949681777": { 251 | "version": 1, 252 | "identifier": "08dfb7f3f3bf3668889accd20d27ae56" 253 | }, 254 | "-948975218": { 255 | "version": 1, 256 | "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" 257 | }, 258 | "-929168312": { 259 | "version": 1, 260 | "identifier": "f4c597fd0865329ca22e1ab30e0adf33" 261 | }, 262 | "-928233989": { 263 | "version": 1, 264 | "identifier": "b065a65d21613f30b210668f36b99865" 265 | }, 266 | "-910886540": { 267 | "version": 1, 268 | "identifier": "0fcc8d949efd3ed594bec8171d131a4e" 269 | }, 270 | "-887685381": { 271 | "version": 1, 272 | "identifier": "865668aadaba3511a84d8054613dd14b" 273 | }, 274 | "-868669701": { 275 | "version": 1, 276 | "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" 277 | }, 278 | "-858699180": { 279 | "version": 1, 280 | "identifier": "f4c597fd0865329ca22e1ab30e0adf33" 281 | }, 282 | "-819778837": { 283 | "version": 1, 284 | "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" 285 | }, 286 | "-813715057": { 287 | "version": 1, 288 | "identifier": "ce88e51340d034eb95942143f1d177dd" 289 | }, 290 | "-788575516": { 291 | "version": 1, 292 | "identifier": "d9824b27273034f69ddf0369b907de1e" 293 | }, 294 | "-782399481": { 295 | "version": 1, 296 | "identifier": "07a1007d01053baeb0aa798f88e1a0f2" 297 | }, 298 | "-765629868": { 299 | "version": 1, 300 | "identifier": "18da1234f4cb339d8e563665adcbaae1" 301 | }, 302 | "-713207990": { 303 | "version": 1, 304 | "identifier": "e4d8ef54fd3834628e07fd42e79978b3" 305 | }, 306 | "-704126197": { 307 | "version": 1, 308 | "identifier": "70d7e21f675633c38b057731c646283b" 309 | }, 310 | "-703905062": { 311 | "version": 1, 312 | "identifier": "b43de09a152b3d42ba0f628e69742db3" 313 | }, 314 | "-677761763": { 315 | "version": 1, 316 | "identifier": "ff552a31dc9c31dfaae746915261cc0f" 317 | }, 318 | "-677716059": { 319 | "version": 1, 320 | "identifier": "f170065c69273b4092cf73ffe2fb8a49" 321 | }, 322 | "-650059480": { 323 | "version": 1, 324 | "identifier": "b785d1f098863821a4eb6bee226c35b7" 325 | }, 326 | "-601500921": { 327 | "version": 1, 328 | "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" 329 | }, 330 | "-601452604": { 331 | "version": 1, 332 | "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" 333 | }, 334 | "-558607524": { 335 | "version": 1, 336 | "identifier": "d9824b27273034f69ddf0369b907de1e" 337 | }, 338 | "-536222129": { 339 | "version": 1, 340 | "identifier": "63612b5f0daf3724b30428ee6a390e86" 341 | }, 342 | "-535276574": { 343 | "version": 1, 344 | "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" 345 | }, 346 | "-531165090": { 347 | "version": 1, 348 | "identifier": "d9824b27273034f69ddf0369b907de1e" 349 | }, 350 | "-513594848": { 351 | "version": 1, 352 | "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" 353 | }, 354 | "-510745204": { 355 | "version": 1, 356 | "identifier": "d9824b27273034f69ddf0369b907de1e" 357 | }, 358 | "-492474525": { 359 | "version": 1, 360 | "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" 361 | }, 362 | "-436715699": { 363 | "version": 1, 364 | "identifier": "d69c0d9d83d7348bbf3ff323a450babc" 365 | }, 366 | "-436169307": { 367 | "version": 1, 368 | "identifier": "35a44a2979bb392b86a4d51b44b666c7" 369 | }, 370 | "-421725118": { 371 | "version": 1, 372 | "identifier": "07a1007d01053baeb0aa798f88e1a0f2" 373 | }, 374 | "-420639473": { 375 | "version": 1, 376 | "identifier": "865668aadaba3511a84d8054613dd14b" 377 | }, 378 | "-402034263": { 379 | "version": 1, 380 | "identifier": "a11b880d043231fba4a14c9da7c86a83" 381 | }, 382 | "-372367314": { 383 | "version": 1, 384 | "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" 385 | }, 386 | "-363276737": { 387 | "version": 1, 388 | "identifier": "d53648c23d853765a0662482a16a4ca3" 389 | }, 390 | "-350701540": { 391 | "version": 1, 392 | "identifier": "1ae5694808e43da58ba98374c7e51190" 393 | }, 394 | "-329451756": { 395 | "version": 1, 396 | "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" 397 | }, 398 | "-322939709": { 399 | "version": 1, 400 | "identifier": "464294a547843ecca2be5a615b1397c0" 401 | }, 402 | "-308807501": { 403 | "version": 1, 404 | "identifier": "70d7e21f675633c38b057731c646283b" 405 | }, 406 | "-273283867": { 407 | "version": 1, 408 | "identifier": "f4c597fd0865329ca22e1ab30e0adf33" 409 | }, 410 | "-257651875": { 411 | "version": 1, 412 | "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" 413 | }, 414 | "-246789829": { 415 | "version": 1, 416 | "identifier": "aacf21dd9eb537eeb332c48248e79865" 417 | }, 418 | "-235365093": { 419 | "version": 1, 420 | "identifier": "07a1007d01053baeb0aa798f88e1a0f2" 421 | }, 422 | "-214752869": { 423 | "version": 1, 424 | "identifier": "55da12e6f8ea35059e783843d4733281" 425 | }, 426 | "-211889573": { 427 | "version": 1, 428 | "identifier": "18da1234f4cb339d8e563665adcbaae1" 429 | }, 430 | "-205438280": { 431 | "version": 1, 432 | "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" 433 | }, 434 | "-162809880": { 435 | "version": 1, 436 | "identifier": "18da1234f4cb339d8e563665adcbaae1" 437 | }, 438 | "-149368597": { 439 | "version": 1, 440 | "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" 441 | }, 442 | "-111074588": { 443 | "version": 1, 444 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 445 | }, 446 | "-106187699": { 447 | "version": 1, 448 | "identifier": "d9824b27273034f69ddf0369b907de1e" 449 | }, 450 | "-90712726": { 451 | "version": 1, 452 | "identifier": "9f1304b5ef553bb99291561ab5fdccba" 453 | }, 454 | "-43170766": { 455 | "version": 1, 456 | "identifier": "f170065c69273b4092cf73ffe2fb8a49" 457 | }, 458 | "-37332753": { 459 | "version": 1, 460 | "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" 461 | }, 462 | "21700536": { 463 | "version": 1, 464 | "identifier": "d53648c23d853765a0662482a16a4ca3" 465 | }, 466 | "26589100": { 467 | "version": 1, 468 | "identifier": "f4c597fd0865329ca22e1ab30e0adf33" 469 | }, 470 | "49988399": { 471 | "version": 1, 472 | "identifier": "861111d5e78e39bcb4111b1ac713d40b" 473 | }, 474 | "50999564": { 475 | "version": 1, 476 | "identifier": "b785d1f098863821a4eb6bee226c35b7" 477 | }, 478 | "63571604": { 479 | "version": 1, 480 | "identifier": "f4017d1d98933b659d6b0e2811c22a1c" 481 | }, 482 | "104739144": { 483 | "version": 1, 484 | "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" 485 | }, 486 | "130354123": { 487 | "version": 1, 488 | "identifier": "aacf21dd9eb537eeb332c48248e79865" 489 | }, 490 | "130591054": { 491 | "version": 1, 492 | "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" 493 | }, 494 | "135923079": { 495 | "version": 1, 496 | "identifier": "aacf21dd9eb537eeb332c48248e79865" 497 | }, 498 | "140323032": { 499 | "version": 1, 500 | "identifier": "ce88e51340d034eb95942143f1d177dd" 501 | }, 502 | "143743215": { 503 | "version": 1, 504 | "identifier": "959e6e70afa63804bf6f6f003ab54adb" 505 | }, 506 | "153476705": { 507 | "version": 1, 508 | "identifier": "07a1007d01053baeb0aa798f88e1a0f2" 509 | }, 510 | "248532166": { 511 | "version": 1, 512 | "identifier": "d53648c23d853765a0662482a16a4ca3" 513 | }, 514 | "274411076": { 515 | "version": 1, 516 | "identifier": "065141ec74bd3ad8bff25e7bc18408be" 517 | }, 518 | "305678758": { 519 | "version": 1, 520 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 521 | }, 522 | "346850862": { 523 | "version": 1, 524 | "identifier": "ce88e51340d034eb95942143f1d177dd" 525 | }, 526 | "347339085": { 527 | "version": 1, 528 | "identifier": "b785d1f098863821a4eb6bee226c35b7" 529 | }, 530 | "350498951": { 531 | "version": 1, 532 | "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" 533 | }, 534 | "350954699": { 535 | "version": 1, 536 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 537 | }, 538 | "408849612": { 539 | "version": 1, 540 | "identifier": "b785d1f098863821a4eb6bee226c35b7" 541 | }, 542 | "429427545": { 543 | "version": 1, 544 | "identifier": "ed014f952f6c3e1ca0d5a479e4141456" 545 | }, 546 | "446249379": { 547 | "version": 1, 548 | "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" 549 | }, 550 | "449449981": { 551 | "version": 1, 552 | "identifier": "861111d5e78e39bcb4111b1ac713d40b" 553 | }, 554 | "473117705": { 555 | "version": 1, 556 | "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" 557 | }, 558 | "497367507": { 559 | "version": 1, 560 | "identifier": "b065a65d21613f30b210668f36b99865" 561 | }, 562 | "528221851": { 563 | "version": 1, 564 | "identifier": "ed014f952f6c3e1ca0d5a479e4141456" 565 | }, 566 | "535236085": { 567 | "version": 1, 568 | "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" 569 | }, 570 | "538142841": { 571 | "version": 1, 572 | "identifier": "861111d5e78e39bcb4111b1ac713d40b" 573 | }, 574 | "582005327": { 575 | "version": 1, 576 | "identifier": "d71a716e22ef33e4baa94336282c6f3b" 577 | }, 578 | "584259208": { 579 | "version": 1, 580 | "identifier": "d69c0d9d83d7348bbf3ff323a450babc" 581 | }, 582 | "584568901": { 583 | "version": 1, 584 | "identifier": "065141ec74bd3ad8bff25e7bc18408be" 585 | }, 586 | "632432913": { 587 | "version": 1, 588 | "identifier": "d69c0d9d83d7348bbf3ff323a450babc" 589 | }, 590 | "632822913": { 591 | "version": 1, 592 | "identifier": "0fcc8d949efd3ed594bec8171d131a4e" 593 | }, 594 | "643043431": { 595 | "version": 1, 596 | "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" 597 | }, 598 | "656552419": { 599 | "version": 1, 600 | "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" 601 | }, 602 | "664478574": { 603 | "version": 1, 604 | "identifier": "5d2e3793fc6638e98507610a19a6079f" 605 | }, 606 | "676134760": { 607 | "version": 1, 608 | "identifier": "e4d8ef54fd3834628e07fd42e79978b3" 609 | }, 610 | "684975246": { 611 | "version": 1, 612 | "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" 613 | }, 614 | "707171019": { 615 | "version": 1, 616 | "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" 617 | }, 618 | "713982546": { 619 | "version": 1, 620 | "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" 621 | }, 622 | "755048076": { 623 | "version": 1, 624 | "identifier": "acf3522e534a35c2a811716eeb29c22e" 625 | }, 626 | "826319166": { 627 | "version": 1, 628 | "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" 629 | }, 630 | "826364895": { 631 | "version": 1, 632 | "identifier": "861111d5e78e39bcb4111b1ac713d40b" 633 | }, 634 | "836074637": { 635 | "version": 1, 636 | "identifier": "464294a547843ecca2be5a615b1397c0" 637 | }, 638 | "845789493": { 639 | "version": 1, 640 | "identifier": "a11b880d043231fba4a14c9da7c86a83" 641 | }, 642 | "846443122": { 643 | "version": 1, 644 | "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" 645 | }, 646 | "852985392": { 647 | "version": 1, 648 | "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" 649 | }, 650 | "867550676": { 651 | "version": 1, 652 | "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" 653 | }, 654 | "895327246": { 655 | "version": 1, 656 | "identifier": "08dfb7f3f3bf3668889accd20d27ae56" 657 | }, 658 | "914523155": { 659 | "version": 1, 660 | "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" 661 | }, 662 | "922409680": { 663 | "version": 1, 664 | "identifier": "ff552a31dc9c31dfaae746915261cc0f" 665 | }, 666 | "943339311": { 667 | "version": 1, 668 | "identifier": "5d2e3793fc6638e98507610a19a6079f" 669 | }, 670 | "954829692": { 671 | "version": 1, 672 | "identifier": "065141ec74bd3ad8bff25e7bc18408be" 673 | }, 674 | "962743900": { 675 | "version": 1, 676 | "identifier": "07a1007d01053baeb0aa798f88e1a0f2" 677 | }, 678 | "1008763691": { 679 | "version": 1, 680 | "identifier": "35a44a2979bb392b86a4d51b44b666c7" 681 | }, 682 | "1011027326": { 683 | "version": 1, 684 | "identifier": "9f1304b5ef553bb99291561ab5fdccba" 685 | }, 686 | "1033410522": { 687 | "version": 1, 688 | "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" 689 | }, 690 | "1035110164": { 691 | "version": 1, 692 | "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" 693 | }, 694 | "1042604743": { 695 | "version": 1, 696 | "identifier": "89d35a3cb15e3664b0e5e9e5091d54c4" 697 | }, 698 | "1070649741": { 699 | "version": 1, 700 | "identifier": "e4d8ef54fd3834628e07fd42e79978b3" 701 | }, 702 | "1076442160": { 703 | "version": 1, 704 | "identifier": "d69c0d9d83d7348bbf3ff323a450babc" 705 | }, 706 | "1087399852": { 707 | "version": 1, 708 | "identifier": "959e6e70afa63804bf6f6f003ab54adb" 709 | }, 710 | "1130655362": { 711 | "version": 1, 712 | "identifier": "02211ad6f0cf359e8a6b9ca7f79329c1" 713 | }, 714 | "1149185862": { 715 | "version": 1, 716 | "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" 717 | }, 718 | "1190920924": { 719 | "version": 1, 720 | "identifier": "18da1234f4cb339d8e563665adcbaae1" 721 | }, 722 | "1201967894": { 723 | "version": 1, 724 | "identifier": "5d2e3793fc6638e98507610a19a6079f" 725 | }, 726 | "1224529416": { 727 | "version": 1, 728 | "identifier": "70d7e21f675633c38b057731c646283b" 729 | }, 730 | "1233480337": { 731 | "version": 1, 732 | "identifier": "b065a65d21613f30b210668f36b99865" 733 | }, 734 | "1240271849": { 735 | "version": 1, 736 | "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" 737 | }, 738 | "1276199166": { 739 | "version": 1, 740 | "identifier": "d71a716e22ef33e4baa94336282c6f3b" 741 | }, 742 | "1288248433": { 743 | "version": 1, 744 | "identifier": "63612b5f0daf3724b30428ee6a390e86" 745 | }, 746 | "1300668502": { 747 | "version": 1, 748 | "identifier": "959e6e70afa63804bf6f6f003ab54adb" 749 | }, 750 | "1318427302": { 751 | "version": 1, 752 | "identifier": "861111d5e78e39bcb4111b1ac713d40b" 753 | }, 754 | "1331625336": { 755 | "version": 1, 756 | "identifier": "d44ef730090b3bf4ad6ab93c7fc2ecd1" 757 | }, 758 | "1334054318": { 759 | "version": 1, 760 | "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" 761 | }, 762 | "1341896786": { 763 | "version": 1, 764 | "identifier": "d69c0d9d83d7348bbf3ff323a450babc" 765 | }, 766 | "1346329458": { 767 | "version": 1, 768 | "identifier": "35a44a2979bb392b86a4d51b44b666c7" 769 | }, 770 | "1361550001": { 771 | "version": 1, 772 | "identifier": "d5d8e197ad4f353b9ba5c2856bc844e1" 773 | }, 774 | "1374838964": { 775 | "version": 1, 776 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 777 | }, 778 | "1406091652": { 779 | "version": 1, 780 | "identifier": "08dfb7f3f3bf3668889accd20d27ae56" 781 | }, 782 | "1408472174": { 783 | "version": 1, 784 | "identifier": "ff552a31dc9c31dfaae746915261cc0f" 785 | }, 786 | "1420996528": { 787 | "version": 1, 788 | "identifier": "4f23e7a1a230318a912dbdf8f78e0aed" 789 | }, 790 | "1442515016": { 791 | "version": 1, 792 | "identifier": "464294a547843ecca2be5a615b1397c0" 793 | }, 794 | "1444111170": { 795 | "version": 1, 796 | "identifier": "b065a65d21613f30b210668f36b99865" 797 | }, 798 | "1456142202": { 799 | "version": 1, 800 | "identifier": "b43de09a152b3d42ba0f628e69742db3" 801 | }, 802 | "1479814255": { 803 | "version": 1, 804 | "identifier": "065141ec74bd3ad8bff25e7bc18408be" 805 | }, 806 | "1487691117": { 807 | "version": 1, 808 | "identifier": "ff552a31dc9c31dfaae746915261cc0f" 809 | }, 810 | "1493252170": { 811 | "version": 1, 812 | "identifier": "8aac9641549c3663bc5fc2a1d7b652d1" 813 | }, 814 | "1513450563": { 815 | "version": 1, 816 | "identifier": "ce88e51340d034eb95942143f1d177dd" 817 | }, 818 | "1516202376": { 819 | "version": 1, 820 | "identifier": "00dd0f17afad3c54a1ee3b42cbfde3f0" 821 | }, 822 | "1518368864": { 823 | "version": 1, 824 | "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" 825 | }, 826 | "1555735296": { 827 | "version": 1, 828 | "identifier": "b2e9c4abf2c133cab118acca6b6fe4e7" 829 | }, 830 | "1567793620": { 831 | "version": 1, 832 | "identifier": "f170065c69273b4092cf73ffe2fb8a49" 833 | }, 834 | "1573984571": { 835 | "version": 1, 836 | "identifier": "b785d1f098863821a4eb6bee226c35b7" 837 | }, 838 | "1579991687": { 839 | "version": 1, 840 | "identifier": "d1450ca53c3c3ccbb875326d4ec2d38f" 841 | }, 842 | "1593672398": { 843 | "version": 1, 844 | "identifier": "0fcc8d949efd3ed594bec8171d131a4e" 845 | }, 846 | "1602725778": { 847 | "version": 1, 848 | "identifier": "a11b880d043231fba4a14c9da7c86a83" 849 | }, 850 | "1612739546": { 851 | "version": 1, 852 | "identifier": "63612b5f0daf3724b30428ee6a390e86" 853 | }, 854 | "1639399392": { 855 | "version": 1, 856 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 857 | }, 858 | "1667669984": { 859 | "version": 1, 860 | "identifier": "d7028a8eab373a28b2a2f6109a13011d" 861 | }, 862 | "1692521345": { 863 | "version": 1, 864 | "identifier": "a11b880d043231fba4a14c9da7c86a83" 865 | }, 866 | "1696767179": { 867 | "version": 1, 868 | "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" 869 | }, 870 | "1700325412": { 871 | "version": 1, 872 | "identifier": "f170065c69273b4092cf73ffe2fb8a49" 873 | }, 874 | "1750718921": { 875 | "version": 1, 876 | "identifier": "f4017d1d98933b659d6b0e2811c22a1c" 877 | }, 878 | "1753707681": { 879 | "version": 1, 880 | "identifier": "f4017d1d98933b659d6b0e2811c22a1c" 881 | }, 882 | "1765578299": { 883 | "version": 1, 884 | "identifier": "a8fc3852f5b636bc846996f2f5fd52c4" 885 | }, 886 | "1770298781": { 887 | "version": 1, 888 | "identifier": "aacf21dd9eb537eeb332c48248e79865" 889 | }, 890 | "1808790008": { 891 | "version": 1, 892 | "identifier": "464294a547843ecca2be5a615b1397c0" 893 | }, 894 | "1811712400": { 895 | "version": 1, 896 | "identifier": "d71a716e22ef33e4baa94336282c6f3b" 897 | }, 898 | "1821685772": { 899 | "version": 1, 900 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 901 | }, 902 | "1828829657": { 903 | "version": 1, 904 | "identifier": "08dfb7f3f3bf3668889accd20d27ae56" 905 | }, 906 | "1830498928": { 907 | "version": 1, 908 | "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" 909 | }, 910 | "1836162724": { 911 | "version": 1, 912 | "identifier": "acf3522e534a35c2a811716eeb29c22e" 913 | }, 914 | "1853048372": { 915 | "version": 1, 916 | "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" 917 | }, 918 | "1857766797": { 919 | "version": 1, 920 | "identifier": "70d7e21f675633c38b057731c646283b" 921 | }, 922 | "1866065729": { 923 | "version": 1, 924 | "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" 925 | }, 926 | "1871485982": { 927 | "version": 1, 928 | "identifier": "8a812752548f33d4a7e83a2a7b4a5c80" 929 | }, 930 | "1892545061": { 931 | "version": 1, 932 | "identifier": "eb5a6614aa793f968ecc6426a8aaf31b" 933 | }, 934 | "1894012653": { 935 | "version": 1, 936 | "identifier": "08dfb7f3f3bf3668889accd20d27ae56" 937 | }, 938 | "1899051478": { 939 | "version": 1, 940 | "identifier": "55da12e6f8ea35059e783843d4733281" 941 | }, 942 | "1907304406": { 943 | "version": 1, 944 | "identifier": "4310bdfd3c763fb3a58edb9b4030d99f" 945 | }, 946 | "1907464917": { 947 | "version": 1, 948 | "identifier": "9f1304b5ef553bb99291561ab5fdccba" 949 | }, 950 | "1973184342": { 951 | "version": 1, 952 | "identifier": "acf3522e534a35c2a811716eeb29c22e" 953 | }, 954 | "1977837486": { 955 | "version": 1, 956 | "identifier": "a11b880d043231fba4a14c9da7c86a83" 957 | }, 958 | "1988098064": { 959 | "version": 1, 960 | "identifier": "e4d8ef54fd3834628e07fd42e79978b3" 961 | }, 962 | "2002022199": { 963 | "version": 1, 964 | "identifier": "55da12e6f8ea35059e783843d4733281" 965 | }, 966 | "2015796569": { 967 | "version": 1, 968 | "identifier": "35a44a2979bb392b86a4d51b44b666c7" 969 | }, 970 | "2050900322": { 971 | "version": 1, 972 | "identifier": "a0e69147a1bc3b55ba88803686ee8ef8" 973 | }, 974 | "2091004539": { 975 | "version": 1, 976 | "identifier": "3e05246f5a4b37e7bd9a0b09b1706b95" 977 | }, 978 | "2103863933": { 979 | "version": 1, 980 | "identifier": "959e6e70afa63804bf6f6f003ab54adb" 981 | }, 982 | "2105959596": { 983 | "version": 1, 984 | "identifier": "e4d8ef54fd3834628e07fd42e79978b3" 985 | }, 986 | "2108854606": { 987 | "version": 1, 988 | "identifier": "f4c597fd0865329ca22e1ab30e0adf33" 989 | }, 990 | "2111136993": { 991 | "version": 1, 992 | "identifier": "0fcc8d949efd3ed594bec8171d131a4e" 993 | }, 994 | "2118332817": { 995 | "version": 1, 996 | "identifier": "0714e1f5551736ff85cef7d9fd8c28f2" 997 | }, 998 | "2143686309": { 999 | "version": 1, 1000 | "identifier": "865668aadaba3511a84d8054613dd14b" 1001 | } 1002 | } 1003 | -------------------------------------------------------------------------------- /alluxio/alluxio_file_system.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import hashlib 3 | import json 4 | import logging 5 | import re 6 | import time 7 | import weakref 8 | from dataclasses import dataclass 9 | from enum import Enum 10 | from typing import Dict 11 | 12 | import aiohttp 13 | import humanfriendly 14 | import requests 15 | from requests.adapters import HTTPAdapter 16 | 17 | from .const import ALLUXIO_HASH_NODE_PER_WORKER_DEFAULT_VALUE 18 | from .const import ALLUXIO_HASH_NODE_PER_WORKER_KEY 19 | from .const import ALLUXIO_PAGE_SIZE_DEFAULT_VALUE 20 | from .const import ALLUXIO_PAGE_SIZE_KEY 21 | from .const import ALLUXIO_SUCCESS_IDENTIFIER 22 | from .const import ALLUXIO_WORKER_HTTP_SERVER_PORT_DEFAULT_VALUE 23 | from .const import FULL_PAGE_URL_FORMAT 24 | from .const import GET_FILE_STATUS_URL_FORMAT 25 | from .const import LIST_URL_FORMAT 26 | from .const import LOAD_PROGRESS_URL_FORMAT 27 | from .const import LOAD_SUBMIT_URL_FORMAT 28 | from .const import LOAD_URL_FORMAT 29 | from .const import PAGE_URL_FORMAT 30 | from .const import WRITE_PAGE_URL_FORMAT 31 | from .worker_ring import ConsistentHashProvider 32 | 33 | logging.basicConfig( 34 | level=logging.WARN, 35 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 36 | ) 37 | 38 | 39 | @dataclass 40 | class AlluxioPathStatus: 41 | type: str 42 | name: str 43 | path: str 44 | ufs_path: str 45 | last_modification_time_ms: float 46 | human_readable_file_size: str 47 | length: float 48 | 49 | 50 | class LoadState(Enum): 51 | RUNNING = "RUNNING" 52 | VERIFYING = "VERIFYING" 53 | STOPPED = "STOPPED" 54 | SUCCEEDED = "SUCCEEDED" 55 | FAILED = "FAILED" 56 | 57 | 58 | class Method(Enum): 59 | GET = "GET" 60 | POST = "POST" 61 | PUT = "PUT" 62 | DELETE = "DELETE" 63 | HEAD = "HEAD" 64 | OPTIONS = "OPTIONS" 65 | PATCH = "PATCH" 66 | 67 | 68 | class OpType(Enum): 69 | SUBMIT = "submit" 70 | PROGRESS = "progress" 71 | STOP = "stop" 72 | 73 | 74 | class AlluxioFileSystem: 75 | """ 76 | Access Alluxio file system 77 | 78 | Examples 79 | -------- 80 | >>> # Launch Alluxio with ETCD as service discovery 81 | >>> alluxio = AlluxioFileSystem(etcd_hosts="localhost") 82 | >>> # Or launch Alluxio with user provided worker list 83 | >>> alluxio = AlluxioFileSystem(worker_hosts="host1,host2,host3") 84 | 85 | >>> print(alluxio.listdir("s3://mybucket/mypath/dir")) 86 | [ 87 | { 88 | type: "file", 89 | name: "my_file_name", 90 | path: '/my_file_name', 91 | ufs_path: 's3://example-bucket/my_file_name', 92 | last_modification_time_ms: 0, 93 | length: 77542, 94 | human_readable_file_size: '75.72KB' 95 | }, 96 | 97 | ] 98 | >>> print(alluxio.read("s3://mybucket/mypath/dir/myfile")) 99 | my_file_content 100 | """ 101 | 102 | def __init__( 103 | self, 104 | etcd_hosts=None, 105 | worker_hosts=None, 106 | options=None, 107 | logger=None, 108 | concurrency=64, 109 | etcd_port=2379, 110 | worker_http_port=ALLUXIO_WORKER_HTTP_SERVER_PORT_DEFAULT_VALUE, 111 | etcd_refresh_workers_interval=120, 112 | ): 113 | """ 114 | Inits Alluxio file system. 115 | 116 | Args: 117 | etcd_hosts (str, optional): 118 | The hostnames of ETCD to get worker addresses from 119 | The hostnames in host1,host2,host3 format. Either etcd_hosts or worker_hosts should be provided, not both. 120 | worker_hosts (str, optional): 121 | The worker hostnames in host1,host2,host3 format. Either etcd_hosts or worker_hosts should be provided, not both. 122 | options (dict, optional): 123 | A dictionary of Alluxio property key and values. 124 | Note that Alluxio Python API only support a limited set of Alluxio properties. 125 | logger (Logger, optional): 126 | A logger instance for logging messages. 127 | concurrency (int, optional): 128 | The maximum number of concurrent operations for HTTP requests. Default to 64. 129 | etcd_port (int, optional): 130 | The port of each etcd server. 131 | worker_http_port (int, optional): 132 | The port of the HTTP server on each Alluxio worker node. 133 | etcd_refresh_workers_interval(int, optional): 134 | The interval to refresh worker list from ETCD membership service periodically. All negative values mean the service is disabled. 135 | 136 | """ 137 | # TODO(lu/chunxu) change to ETCD endpoints in format of 'http://etcd_host:port, http://etcd_host:port' & worker hosts in 'host:port, host:port' format 138 | self.logger = logger or logging.getLogger("AlluxioPython") 139 | if not (etcd_hosts or worker_hosts): 140 | raise ValueError( 141 | "Must supply either 'etcd_hosts' or 'worker_hosts'" 142 | ) 143 | if etcd_hosts and worker_hosts: 144 | raise ValueError( 145 | "Supply either 'etcd_hosts' or 'worker_hosts', not both" 146 | ) 147 | if not etcd_hosts: 148 | self.logger.warning( 149 | "'etcd_hosts' not supplied. An etcd cluster is required for dynamic cluster changes." 150 | ) 151 | if not isinstance(etcd_port, int) or not (1 <= etcd_port <= 65535): 152 | raise ValueError( 153 | "'etcd_port' should be an integer in the range 1-65535" 154 | ) 155 | if not isinstance(worker_http_port, int) or not ( 156 | 1 <= worker_http_port <= 65535 157 | ): 158 | raise ValueError( 159 | "'worker_http_port' should be an integer in the range 1-65535" 160 | ) 161 | if not isinstance(concurrency, int) or concurrency <= 0: 162 | raise ValueError("'concurrency' should be a positive integer") 163 | if concurrency < 10 or concurrency > 128: 164 | self.logger.warning( 165 | f"'concurrency' value of {concurrency} is outside the recommended range (10-128). " 166 | "This may lead to suboptimal performance or resource utilization.", 167 | ) 168 | if not isinstance(etcd_refresh_workers_interval, int): 169 | raise ValueError( 170 | "'etcd_refresh_workers_interval' should be an integer" 171 | ) 172 | 173 | self.session = self._create_session(concurrency) 174 | 175 | # parse options 176 | page_size = ALLUXIO_PAGE_SIZE_DEFAULT_VALUE 177 | hash_node_per_worker = ALLUXIO_HASH_NODE_PER_WORKER_DEFAULT_VALUE 178 | if options: 179 | if ALLUXIO_PAGE_SIZE_KEY in options: 180 | page_size = options[ALLUXIO_PAGE_SIZE_KEY] 181 | self.logger.debug(f"Page size is set to {page_size}") 182 | if ALLUXIO_HASH_NODE_PER_WORKER_KEY in options: 183 | hash_node_per_worker = int( 184 | options[ALLUXIO_HASH_NODE_PER_WORKER_KEY] 185 | ) 186 | self.logger.debug( 187 | f"Hash node per worker is set to {hash_node_per_worker}" 188 | ) 189 | if ( 190 | not isinstance(hash_node_per_worker, int) 191 | or hash_node_per_worker <= 0 192 | ): 193 | raise ValueError( 194 | "'hash_node_per_worker' should be a positive integer" 195 | ) 196 | 197 | self.page_size = humanfriendly.parse_size(page_size, binary=True) 198 | 199 | self.hash_provider = ConsistentHashProvider( 200 | etcd_hosts=etcd_hosts, 201 | etcd_port=etcd_port, 202 | worker_hosts=worker_hosts, 203 | worker_http_port=worker_http_port, 204 | hash_node_per_worker=hash_node_per_worker, 205 | options=options, 206 | logger=self.logger, 207 | etcd_refresh_workers_interval=etcd_refresh_workers_interval, 208 | ) 209 | 210 | def listdir(self, path): 211 | """ 212 | Lists the directory. 213 | 214 | Args: 215 | path (str): The full ufs path to list from 216 | 217 | Returns: 218 | list of dict: A list containing dictionaries, where each dictionary has: 219 | - type (str): 'directory' or 'file'. 220 | - name (str): Name of the directory/file. 221 | - path (str): Path of the directory/file. 222 | - ufs_path (str): UFS path of the directory/file. 223 | - last_modification_time_ms (int): Last modification time in milliseconds. 224 | - length (int): Length of the file or 0 for directory. 225 | - human_readable_file_size (str): Human-readable file size. 226 | 227 | Example: 228 | [ 229 | { 230 | type: "file", 231 | name: "my_file_name", 232 | path: '/my_file_name', 233 | ufs_path: 's3://example-bucket/my_file_name', 234 | last_modification_time_ms: 0, 235 | length: 77542, 236 | human_readable_file_size: '75.72KB' 237 | }, 238 | { 239 | type: "directory", 240 | name: "my_dir_name", 241 | path: '/my_dir_name', 242 | ufs_path: 's3://example-bucket/my_dir_name', 243 | last_modification_time_ms: 0, 244 | length: 0, 245 | human_readable_file_size: '0B' 246 | }, 247 | ] 248 | """ 249 | self._validate_path(path) 250 | worker_host, worker_http_port = self._get_preferred_worker_address( 251 | path 252 | ) 253 | params = {"path": path} 254 | try: 255 | response = self.session.get( 256 | LIST_URL_FORMAT.format( 257 | worker_host=worker_host, http_port=worker_http_port 258 | ), 259 | params=params, 260 | ) 261 | response.raise_for_status() 262 | result = [] 263 | for data in json.loads(response.content): 264 | result.append( 265 | AlluxioPathStatus( 266 | data["mType"], 267 | data["mName"], 268 | data["mPath"], 269 | data["mUfsPath"], 270 | data["mLastModificationTimeMs"], 271 | data["mHumanReadableFileSize"], 272 | data["mLength"], 273 | ) 274 | ) 275 | return result 276 | except Exception as e: 277 | raise Exception( 278 | f"Error when listing path {path}: error {e}" 279 | ) from e 280 | 281 | def get_file_status(self, path): 282 | """ 283 | Gets the file status of the path. 284 | 285 | Args: 286 | path (str): The full ufs path to get the file status of 287 | 288 | Returns: 289 | File Status: The struct has: 290 | - type (string): directory or file 291 | - name (string): name of the directory/file 292 | - path (string): the path of the file 293 | - ufs_path (string): the ufs path of the file 294 | - last_modification_time_ms (long): the last modification time 295 | - length (integer): length of the file or 0 for directory 296 | - human_readable_file_size (string): the size of the human readable files 297 | 298 | Example: 299 | { 300 | type: 'directory', 301 | name: 'a', 302 | path: '/a', 303 | ufs_path: 's3://example-bucket/a', 304 | last_modification_time_ms: 0, 305 | length: 0, 306 | human_readable_file_size: '0B' 307 | } 308 | """ 309 | self._validate_path(path) 310 | worker_host, worker_http_port = self._get_preferred_worker_address( 311 | path 312 | ) 313 | params = {"path": path} 314 | try: 315 | response = self.session.get( 316 | GET_FILE_STATUS_URL_FORMAT.format( 317 | worker_host=worker_host, 318 | http_port=worker_http_port, 319 | ), 320 | params=params, 321 | ) 322 | response.raise_for_status() 323 | data = json.loads(response.content)[0] 324 | return AlluxioPathStatus( 325 | data["mType"], 326 | data["mName"], 327 | data["mPath"], 328 | data["mUfsPath"], 329 | data["mLastModificationTimeMs"], 330 | data["mHumanReadableFileSize"], 331 | data["mLength"], 332 | ) 333 | except Exception as e: 334 | raise Exception( 335 | f"Error when getting file status path {path}: error {e}" 336 | ) from e 337 | 338 | def load( 339 | self, 340 | path, 341 | timeout=None, 342 | ): 343 | """ 344 | Loads a file. 345 | 346 | Args: 347 | path (str): The full path with storage protocol to load data from 348 | timeout (integer): The number of seconds for timeout, optional 349 | 350 | Returns: 351 | result (boolean): Whether the file has been loaded successfully 352 | """ 353 | self._validate_path(path) 354 | worker_host, worker_http_port = self._get_preferred_worker_address( 355 | path 356 | ) 357 | return self._load_file(worker_host, worker_http_port, path, timeout) 358 | 359 | def submit_load( 360 | self, 361 | path, 362 | ): 363 | """ 364 | Submits a load job for a file. 365 | 366 | Args: 367 | path (str): The full ufs file path to load data from 368 | 369 | Returns: 370 | result (boolean): Whether the job has been submitted successfully 371 | """ 372 | self._validate_path(path) 373 | worker_host, worker_http_port = self._get_preferred_worker_address( 374 | path 375 | ) 376 | try: 377 | params = {"path": path, "opType": OpType.SUBMIT.value} 378 | response = self.session.get( 379 | LOAD_URL_FORMAT.format( 380 | worker_host=worker_host, 381 | http_port=worker_http_port, 382 | ), 383 | params=params, 384 | ) 385 | response.raise_for_status() 386 | content = json.loads(response.content.decode("utf-8")) 387 | return content[ALLUXIO_SUCCESS_IDENTIFIER] 388 | except Exception as e: 389 | raise Exception( 390 | f"Error when submitting load job for path {path} from {worker_host}: error {e}" 391 | ) from e 392 | 393 | def stop_load( 394 | self, 395 | path, 396 | ): 397 | """ 398 | Stops a load job for a file. 399 | 400 | Args: 401 | path (str): The full ufs file path to load data from 402 | 403 | Returns: 404 | result (boolean): Whether the job has been stopped successfully 405 | """ 406 | self._validate_path(path) 407 | worker_host, worker_http_port = self._get_preferred_worker_address( 408 | path 409 | ) 410 | try: 411 | params = {"path": path, "opType": OpType.STOP.value} 412 | response = self.session.get( 413 | LOAD_URL_FORMAT.format( 414 | worker_host=worker_host, 415 | http_port=worker_http_port, 416 | ), 417 | params=params, 418 | ) 419 | response.raise_for_status() 420 | content = json.loads(response.content.decode("utf-8")) 421 | return content[ALLUXIO_SUCCESS_IDENTIFIER] 422 | except Exception as e: 423 | raise Exception( 424 | f"Error when stopping load job for path {path} from {worker_host}: error {e}" 425 | ) from e 426 | 427 | def load_progress( 428 | self, 429 | path, 430 | ): 431 | """ 432 | Gets the progress of the load job for a path. 433 | 434 | Args: 435 | path (str): The full UFS file path to load data from UFS to Alluxio. 436 | 437 | Returns: 438 | LoadState: The current state of the load job as a LoadState enum. Possible values are: 439 | - LoadState.RUNNING: The load job is in progress. 440 | - LoadState.VERIFYING: The load job is verifying the loaded data. 441 | - LoadState.STOPPED: The load job has been stopped. 442 | - LoadState.SUCCEEDED: The load job completed successfully. 443 | - LoadState.FAILED: The load job failed. 444 | 445 | Example: 446 | load_state = alluxio_fs.load_progress("s3://mybucket/mypath/file") 447 | print(f"Current Load State: {load_state.name}") 448 | """ 449 | self._validate_path(path) 450 | worker_host, worker_http_port = self._get_preferred_worker_address( 451 | path 452 | ) 453 | params = {"path": path, "opType": OpType.PROGRESS.value} 454 | load_progress_url = LOAD_URL_FORMAT.format( 455 | worker_host=worker_host, 456 | http_port=worker_http_port, 457 | ) 458 | return self._load_progress_internal(load_progress_url, params) 459 | 460 | def read(self, file_path): 461 | """ 462 | Reads the full file. 463 | 464 | Args: 465 | file_path (str): The full ufs file path to read data from 466 | 467 | Returns: 468 | file content (str): The full file content 469 | """ 470 | self._validate_path(file_path) 471 | worker_host, worker_http_port = self._get_preferred_worker_address( 472 | file_path 473 | ) 474 | path_id = self._get_path_hash(file_path) 475 | try: 476 | return b"".join( 477 | self._all_page_generator( 478 | worker_host, worker_http_port, path_id 479 | ) 480 | ) 481 | except Exception as e: 482 | raise Exception( 483 | f"Error when reading file {file_path}: error {e}" 484 | ) from e 485 | 486 | def read_range(self, file_path, offset, length): 487 | """ 488 | Reads parts of a file. 489 | 490 | Args: 491 | file_path (str): The full ufs file path to read data from 492 | offset (integer): The offset to start reading data from 493 | length (integer): The file length to read 494 | 495 | Returns: 496 | file content (str): The file content with length from offset 497 | """ 498 | self._validate_path(file_path) 499 | if not isinstance(offset, int) or offset < 0: 500 | raise ValueError("Offset must be a non-negative integer") 501 | 502 | if length is None or length == -1: 503 | file_status = self.get_file_status(file_path) 504 | if file_status is None: 505 | raise FileNotFoundError(f"File {file_path} not found") 506 | length = file_status.length - offset 507 | 508 | if length == 0: 509 | return b"" 510 | 511 | if not isinstance(length, int) or length < 0: 512 | raise ValueError( 513 | f"Invalid length: {length}. Length must be a non-negative integer, -1, or None. Requested offset: {offset}" 514 | ) 515 | 516 | worker_host, worker_http_port = self._get_preferred_worker_address( 517 | file_path 518 | ) 519 | path_id = self._get_path_hash(file_path) 520 | 521 | try: 522 | return b"".join( 523 | self._range_page_generator( 524 | worker_host, worker_http_port, path_id, offset, length 525 | ) 526 | ) 527 | except Exception as e: 528 | raise Exception( 529 | f"Error when reading file {file_path}: error {e}" 530 | ) from e 531 | 532 | def write_page(self, file_path, page_index, page_bytes): 533 | """ 534 | Writes a page. 535 | 536 | Args: 537 | file_path: The path of the file where data is to be written. 538 | page_index: The page index in the file to write the data. 539 | page_bytes: The byte data to write to the specified page, MUST BE FULL PAGE. 540 | 541 | Returns: 542 | True if the write was successful, False otherwise. 543 | """ 544 | self._validate_path(file_path) 545 | worker_host, worker_http_port = self._get_preferred_worker_address( 546 | file_path 547 | ) 548 | path_id = self._get_path_hash(file_path) 549 | try: 550 | response = requests.post( 551 | WRITE_PAGE_URL_FORMAT.format( 552 | worker_host=worker_host, 553 | http_port=worker_http_port, 554 | path_id=path_id, 555 | page_index=page_index, 556 | ), 557 | headers={"Content-Type": "application/octet-stream"}, 558 | data=page_bytes, 559 | ) 560 | response.raise_for_status() 561 | return 200 <= response.status_code < 300 562 | except requests.RequestException as e: 563 | raise Exception( 564 | f"Error writing to file {file_path} at page {page_index}: {e}" 565 | ) 566 | 567 | def _all_page_generator(self, worker_host, worker_http_port, path_id): 568 | page_index = 0 569 | while True: 570 | try: 571 | page_content = self._read_page( 572 | worker_host, worker_http_port, path_id, page_index 573 | ) 574 | except Exception as e: 575 | if page_index == 0: 576 | raise Exception( 577 | f"Error when reading page 0 of {path_id}: error {e}" 578 | ) from e 579 | else: 580 | # TODO(lu) distinguish end of file exception and real exception 581 | break 582 | if not page_content: 583 | break 584 | yield page_content 585 | if len(page_content) < self.page_size: # last page 586 | break 587 | page_index += 1 588 | 589 | def _range_page_generator( 590 | self, worker_host, worker_http_port, path_id, offset, length 591 | ): 592 | start_page_index = offset // self.page_size 593 | start_page_offset = offset % self.page_size 594 | 595 | end_page_index = (offset + length - 1) // self.page_size 596 | end_page_read_to = ((offset + length - 1) % self.page_size) + 1 597 | 598 | page_index = start_page_index 599 | while True: 600 | try: 601 | read_offset = 0 602 | read_length = self.page_size 603 | if page_index == start_page_index: 604 | read_offset = start_page_offset 605 | if start_page_index == end_page_index: 606 | read_length = end_page_read_to - start_page_offset 607 | else: 608 | read_length = self.page_size - start_page_offset 609 | elif page_index == end_page_index: 610 | read_length = end_page_read_to 611 | 612 | page_content = self._read_page( 613 | worker_host, 614 | worker_http_port, 615 | path_id, 616 | page_index, 617 | read_offset, 618 | read_length, 619 | ) 620 | yield page_content 621 | 622 | # Check if it's the last page or the end of the file 623 | if ( 624 | page_index == end_page_index 625 | or len(page_content) < read_length 626 | ): 627 | break 628 | 629 | page_index += 1 630 | 631 | except Exception as e: 632 | if page_index == start_page_index: 633 | raise Exception( 634 | f"Error when reading page {page_index} of {path_id}: error {e}" 635 | ) from e 636 | else: 637 | # read some data successfully, return those data 638 | break 639 | 640 | def _create_session(self, concurrency): 641 | session = requests.Session() 642 | adapter = HTTPAdapter( 643 | pool_connections=concurrency, pool_maxsize=concurrency 644 | ) 645 | session.mount("http://", adapter) 646 | return session 647 | 648 | def _load_file(self, worker_host, worker_http_port, path, timeout): 649 | try: 650 | params = {"path": path, "opType": OpType.SUBMIT.value} 651 | response = self.session.get( 652 | LOAD_URL_FORMAT.format( 653 | worker_host=worker_host, 654 | http_port=worker_http_port, 655 | ), 656 | params=params, 657 | ) 658 | response.raise_for_status() 659 | content = json.loads(response.content.decode("utf-8")) 660 | if not content[ALLUXIO_SUCCESS_IDENTIFIER]: 661 | return False 662 | 663 | params = {"path": path, "opType": OpType.PROGRESS.value} 664 | load_progress_url = LOAD_URL_FORMAT.format( 665 | worker_host=worker_host, 666 | http_port=worker_http_port, 667 | ) 668 | stop_time = 0 669 | if timeout is not None: 670 | stop_time = time.time() + timeout 671 | while True: 672 | job_state, content = self._load_progress_internal( 673 | load_progress_url, params 674 | ) 675 | if job_state == LoadState.SUCCEEDED: 676 | return True 677 | if job_state == LoadState.FAILED: 678 | self.logger.error( 679 | f"Failed to load path {path} with return message {content}" 680 | ) 681 | return False 682 | if job_state == LoadState.STOPPED: 683 | self.logger.warning( 684 | f"Failed to load path {path} with return message {content}, load stopped" 685 | ) 686 | return False 687 | if timeout is None or stop_time - time.time() >= 10: 688 | time.sleep(10) 689 | else: 690 | self.logger.debug( 691 | f"Failed to load path {path} within timeout" 692 | ) 693 | return False 694 | 695 | except Exception as e: 696 | self.logger.debug( 697 | f"Error when loading file {path} from {worker_host} with timeout {timeout}: error {e}" 698 | ) 699 | return False 700 | 701 | def _load_progress_internal( 702 | self, load_url: str, params: Dict 703 | ) -> (LoadState, str): 704 | try: 705 | response = self.session.get(load_url, params=params) 706 | response.raise_for_status() 707 | content = json.loads(response.content.decode("utf-8")) 708 | if "jobState" not in content: 709 | raise KeyError( 710 | "The field 'jobState' is missing from the load progress response content" 711 | ) 712 | state = content["jobState"] 713 | if "FAILED" in state: 714 | return LoadState.FAILED, content 715 | return LoadState(state), content 716 | except Exception as e: 717 | raise Exception( 718 | f"Error when getting load job progress for {load_url}: error {e}" 719 | ) from e 720 | 721 | def _read_page( 722 | self, 723 | worker_host, 724 | worker_http_port, 725 | path_id, 726 | page_index, 727 | offset=None, 728 | length=None, 729 | ): 730 | if (offset is None) != (length is None): 731 | raise ValueError( 732 | "Both offset and length should be either None or both not None" 733 | ) 734 | 735 | try: 736 | if offset is None: 737 | page_url = FULL_PAGE_URL_FORMAT.format( 738 | worker_host=worker_host, 739 | http_port=worker_http_port, 740 | path_id=path_id, 741 | page_index=page_index, 742 | ) 743 | self.logger.debug(f"Reading full page request {page_url}") 744 | else: 745 | page_url = PAGE_URL_FORMAT.format( 746 | worker_host=worker_host, 747 | http_port=worker_http_port, 748 | path_id=path_id, 749 | page_index=page_index, 750 | page_offset=offset, 751 | page_length=length, 752 | ) 753 | self.logger.debug(f"Reading page request {page_url}") 754 | response = self.session.get(page_url) 755 | response.raise_for_status() 756 | return response.content 757 | 758 | except Exception as e: 759 | raise Exception( 760 | f"Error when requesting file {path_id} page {page_index} from {worker_host}: error {e}" 761 | ) from e 762 | 763 | def _get_path_hash(self, uri): 764 | hash_functions = [ 765 | hashlib.sha256, 766 | hashlib.md5, 767 | lambda x: hex(hash(x))[2:].lower(), # Fallback to simple hashCode 768 | ] 769 | for hash_function in hash_functions: 770 | try: 771 | hash_obj = hash_function() 772 | hash_obj.update(uri.encode("utf-8")) 773 | return hash_obj.hexdigest().lower() 774 | except AttributeError: 775 | continue 776 | 777 | def _get_preferred_worker_address(self, full_ufs_path): 778 | workers = self.hash_provider.get_multiple_workers(full_ufs_path, 1) 779 | if len(workers) != 1: 780 | raise ValueError( 781 | "Expected exactly one worker from hash ring, but found {} workers {}.".format( 782 | len(workers), workers 783 | ) 784 | ) 785 | return workers[0].host, workers[0].http_server_port 786 | 787 | def _validate_path(self, path): 788 | if not isinstance(path, str): 789 | raise TypeError("path must be a string") 790 | 791 | if not re.search(r"^[a-zA-Z0-9]+://", path): 792 | raise ValueError( 793 | "path must be a full path with a protocol (e.g., 'protocol://path')" 794 | ) 795 | 796 | 797 | class AlluxioAsyncFileSystem: 798 | """ 799 | Access Alluxio file system 800 | 801 | Examples 802 | -------- 803 | >>> # Launch Alluxio with ETCD as service discovery 804 | >>> alluxio = AlluxioAsyncFileSystem(etcd_hosts="localhost") 805 | >>> # Or launch Alluxio with user provided worker list 806 | >>> alluxio = AlluxioAsyncFileSystem(worker_hosts="host1,host2,host3") 807 | 808 | >>> print(await alluxio.listdir("s3://mybucket/mypath/dir")) 809 | [ 810 | { 811 | "mType": "file", 812 | "mName": "myfile", 813 | "mLength": 77542 814 | } 815 | 816 | ] 817 | >>> print(await alluxio.read("s3://mybucket/mypath/dir/myfile")) 818 | my_file_content 819 | """ 820 | 821 | def __init__( 822 | self, 823 | etcd_hosts=None, 824 | worker_hosts=None, 825 | options=None, 826 | logger=None, 827 | http_port="28080", 828 | etcd_port="2379", 829 | loop=None, 830 | ): 831 | """ 832 | Inits Alluxio file system. 833 | 834 | Args: 835 | etcd_hosts (str, optional): 836 | The hostnames of ETCD to get worker addresses from 837 | The hostnames in host1,host2,host3 format. Either etcd_hosts or worker_hosts should be provided, not both. 838 | worker_hosts (str, optional): 839 | The worker hostnames in host1,host2,host3 format. Either etcd_hosts or worker_hosts should be provided, not both. 840 | options (dict, optional): 841 | A dictionary of Alluxio property key and values. 842 | Note that Alluxio Python API only support a limited set of Alluxio properties. 843 | logger (Logger, optional): 844 | A logger instance for logging messages. 845 | etcd_port (str, optional): 846 | The port of each etcd server. 847 | http_port (string, optional): 848 | The port of the HTTP server on each Alluxio worker node. 849 | """ 850 | if etcd_hosts is None and worker_hosts is None: 851 | raise ValueError( 852 | "Must supply either 'etcd_hosts' or 'worker_hosts'" 853 | ) 854 | if etcd_hosts and worker_hosts: 855 | raise ValueError( 856 | "Supply either 'etcd_hosts' or 'worker_hosts', not both" 857 | ) 858 | self.logger = logger or logging.getLogger("AlluxioFileSystem") 859 | self._session = None 860 | 861 | # parse options 862 | page_size = ALLUXIO_PAGE_SIZE_DEFAULT_VALUE 863 | if options: 864 | if ALLUXIO_PAGE_SIZE_KEY in options: 865 | page_size = options[ALLUXIO_PAGE_SIZE_KEY] 866 | self.logger.debug(f"Page size is set to {page_size}") 867 | self.page_size = humanfriendly.parse_size(page_size, binary=True) 868 | self.hash_provider = ConsistentHashProvider( 869 | etcd_hosts=etcd_hosts, 870 | etcd_port=int(etcd_port), 871 | worker_hosts=worker_hosts, 872 | worker_http_port=int(http_port), 873 | hash_node_per_worker=ALLUXIO_HASH_NODE_PER_WORKER_DEFAULT_VALUE, 874 | options=options, 875 | logger=self.logger, 876 | etcd_refresh_workers_interval=120, 877 | ) 878 | self.http_port = http_port 879 | self._loop = loop or asyncio.get_event_loop() 880 | 881 | async def _set_session(self): 882 | if self._session is None: 883 | self._session = aiohttp.ClientSession(loop=self._loop) 884 | weakref.finalize( 885 | self, self.close_session, self._loop, self._session 886 | ) 887 | return self._session 888 | 889 | @property 890 | def session(self) -> aiohttp.ClientSession: 891 | if self._session is None: 892 | raise RuntimeError("Please await _connect* before anything else") 893 | return self._session 894 | 895 | @staticmethod 896 | def close_session(loop, session): 897 | if loop is not None and session is not None: 898 | if loop.is_running(): 899 | try: 900 | loop = asyncio.get_event_loop() 901 | loop.create_task(session.close()) 902 | return 903 | except RuntimeError: 904 | pass 905 | else: 906 | pass 907 | 908 | async def listdir(self, path: str): 909 | """ 910 | Lists the directory. 911 | 912 | Args: 913 | path (str): The full ufs path to list from 914 | 915 | Returns: 916 | list of dict: A list containing dictionaries, where each dictionary has: 917 | - mType (string): directory or file 918 | - mName (string): name of the directory/file 919 | - mLength (integer): length of the file or 0 for directory 920 | 921 | Example: 922 | [ 923 | { 924 | type: "file", 925 | name: "my_file_name", 926 | path: '/my_file_name', 927 | ufs_path: 's3://example-bucket/my_file_name', 928 | last_modification_time_ms: 0, 929 | length: 77542, 930 | human_readable_file_size: '75.72KB' 931 | }, 932 | { 933 | type: "directory", 934 | name: "my_dir_name", 935 | path: '/my_dir_name', 936 | ufs_path: 's3://example-bucket/my_dir_name', 937 | last_modification_time_ms: 0, 938 | length: 0, 939 | human_readable_file_size: '0B' 940 | }, 941 | 942 | ] 943 | """ 944 | self._validate_path(path) 945 | worker_host = self._get_preferred_worker_host(path) 946 | params = {"path": path} 947 | 948 | _, content = await self._request( 949 | Method.GET, 950 | LIST_URL_FORMAT.format( 951 | worker_host=worker_host, http_port=self.http_port 952 | ), 953 | params=params, 954 | ) 955 | 956 | result = [] 957 | for data in json.loads(content): 958 | result.append( 959 | AlluxioPathStatus( 960 | data["mType"], 961 | data["mName"], 962 | data["mPath"], 963 | data["mUfsPath"], 964 | data["mLastModificationTimeMs"], 965 | data["mHumanReadableFileSize"], 966 | data["mLength"], 967 | ) 968 | ) 969 | return result 970 | 971 | async def get_file_status(self, path): 972 | """ 973 | Gets the file status of the path. 974 | 975 | Args: 976 | path (str): The full ufs path to get the file status of 977 | 978 | Returns: 979 | File Status: The struct has: 980 | - type (string): directory or file 981 | - name (string): name of the directory/file 982 | - path (string): the path of the file 983 | - ufs_path (string): the ufs path of the file 984 | - last_modification_time_ms (long): the last modification time 985 | - length (integer): length of the file or 0 for directory 986 | - human_readable_file_size (string): the size of the human readable files 987 | 988 | Example: 989 | { 990 | type: 'directory', 991 | name: 'a', 992 | path: '/a', 993 | ufs_path: 's3://example-bucket/a', 994 | last_modification_time_ms: 0, 995 | length: 0, 996 | human_readable_file_size: '0B' 997 | } 998 | """ 999 | self._validate_path(path) 1000 | worker_host = self._get_preferred_worker_host(path) 1001 | params = {"path": path} 1002 | _, content = await self._request( 1003 | Method.GET, 1004 | GET_FILE_STATUS_URL_FORMAT.format( 1005 | worker_host=worker_host, 1006 | http_port=self.http_port, 1007 | ), 1008 | params=params, 1009 | ) 1010 | data = json.loads(content)[0] 1011 | return AlluxioPathStatus( 1012 | data["mType"], 1013 | data["mName"], 1014 | data["mPath"], 1015 | data["mUfsPath"], 1016 | data["mLastModificationTimeMs"], 1017 | data["mHumanReadableFileSize"], 1018 | data["mLength"], 1019 | ) 1020 | 1021 | async def load( 1022 | self, 1023 | path: str, 1024 | timeout=None, 1025 | ): 1026 | """ 1027 | Loads a file. 1028 | 1029 | Args: 1030 | path (str): The full path with storage protocol to load data from 1031 | timeout (integer): The number of seconds for timeout, optional 1032 | 1033 | Returns: 1034 | result (boolean): Whether the file has been loaded successfully 1035 | """ 1036 | self._validate_path(path) 1037 | worker_host = self._get_preferred_worker_host(path) 1038 | return self._load_file(worker_host, path, timeout) 1039 | 1040 | async def read_range( 1041 | self, file_path: str, offset: int, length: int 1042 | ) -> bytes: 1043 | """ 1044 | Reads parts of a file. 1045 | 1046 | Args: 1047 | file_path (str): The full ufs file path to read data from 1048 | offset (integer): The offset to start reading data from 1049 | length (integer): The file length to read 1050 | 1051 | Returns: 1052 | file content (str): The file content with length from offset 1053 | """ 1054 | self._validate_path(file_path) 1055 | if not isinstance(offset, int) or offset < 0: 1056 | raise ValueError("Offset must be a non-negative integer") 1057 | 1058 | if not isinstance(length, int) or (length <= 0 and length != -1): 1059 | raise ValueError("Length must be a positive integer or -1") 1060 | 1061 | worker_host = self._get_preferred_worker_host(file_path) 1062 | path_id = self._get_path_hash(file_path) 1063 | page_contents = await self._range_page_generator( 1064 | worker_host, path_id, offset, length 1065 | ) 1066 | return b"".join(await page_contents) 1067 | 1068 | async def write_page( 1069 | self, file_path: str, page_index: int, page_bytes: bytes 1070 | ): 1071 | """ 1072 | Writes a page. 1073 | 1074 | Args: 1075 | file_path: The path of the file where data is to be written. 1076 | page_index: The page index in the file to write the data. 1077 | page_bytes: The byte data to write to the specified page, MUST BE FULL PAGE. 1078 | 1079 | Returns: 1080 | True if the write was successful, False otherwise. 1081 | """ 1082 | self._validate_path(file_path) 1083 | worker_host = self._get_preferred_worker_host(file_path) 1084 | path_id = self._get_path_hash(file_path) 1085 | 1086 | status, content = await self._request( 1087 | Method.POST, 1088 | WRITE_PAGE_URL_FORMAT.format( 1089 | worker_host=worker_host, 1090 | http_port=self.http_port, 1091 | path_id=path_id, 1092 | page_index=page_index, 1093 | ), 1094 | headers={"Content-Type": "application/octet-stream"}, 1095 | data=page_bytes, 1096 | ) 1097 | return 200 <= status < 300 1098 | 1099 | async def _range_page_generator( 1100 | self, worker_host: str, path_id: str, offset: float, length: float 1101 | ): 1102 | start_page_index = offset // self.page_size 1103 | start_page_offset = offset % self.page_size 1104 | 1105 | # Determine the end page index and the read-to position 1106 | if length == -1: 1107 | end_page_index = None 1108 | else: 1109 | end_page_index = (offset + length - 1) // self.page_size 1110 | end_page_read_to = ((offset + length - 1) % self.page_size) + 1 1111 | 1112 | page_index = start_page_index 1113 | page_contents = [] 1114 | while True: 1115 | if page_index == start_page_index: 1116 | if start_page_index == end_page_index: 1117 | read_length = end_page_read_to - start_page_offset 1118 | else: 1119 | read_length = self.page_size - start_page_offset 1120 | page_content = self._read_page( 1121 | worker_host, 1122 | path_id, 1123 | page_index, 1124 | start_page_offset, 1125 | read_length, 1126 | ) 1127 | page_contents.append(page_content) 1128 | elif page_index == end_page_index: 1129 | page_content = self._read_page( 1130 | worker_host, path_id, page_index, 0, end_page_read_to 1131 | ) 1132 | page_contents.append(page_content) 1133 | else: 1134 | page_content = self._read_page( 1135 | worker_host, path_id, page_index 1136 | ) 1137 | page_contents.append(page_content) 1138 | 1139 | # Check if it's the last page or the end of the file 1140 | if ( 1141 | page_index == end_page_index 1142 | or len(page_content) < self.page_size 1143 | ): 1144 | break 1145 | 1146 | page_index += 1 1147 | return asyncio.gather(*page_contents) 1148 | 1149 | async def _load_file(self, worker_host: str, path: str, timeout): 1150 | _, content = await self._request( 1151 | Method.GET, 1152 | LOAD_SUBMIT_URL_FORMAT.format( 1153 | worker_host=worker_host, 1154 | http_port=self.http_port, 1155 | path=path, 1156 | ), 1157 | ) 1158 | 1159 | content = json.loads(content.decode("utf-8")) 1160 | if not content[ALLUXIO_SUCCESS_IDENTIFIER]: 1161 | return False 1162 | 1163 | load_progress_url = LOAD_PROGRESS_URL_FORMAT.format( 1164 | worker_host=worker_host, 1165 | http_port=self.http_port, 1166 | path=path, 1167 | ) 1168 | stop_time = 0 1169 | if timeout is not None: 1170 | stop_time = time.time() + timeout 1171 | while True: 1172 | job_state = await self._load_progress_internal(load_progress_url) 1173 | if job_state == LoadState.SUCCEEDED: 1174 | return True 1175 | if job_state == LoadState.FAILED: 1176 | self.logger.debug( 1177 | f"Failed to load path {path} with return message {content}" 1178 | ) 1179 | return False 1180 | if job_state == LoadState.STOPPED: 1181 | self.logger.debug( 1182 | f"Failed to load path {path} with return message {content}, load stopped" 1183 | ) 1184 | return False 1185 | if timeout is None or stop_time - time.time() >= 10: 1186 | asyncio.sleep(10) 1187 | else: 1188 | self.logger.debug(f"Failed to load path {path} within timeout") 1189 | return False 1190 | 1191 | async def _load_progress_internal(self, load_url: str): 1192 | _, content = await self._request(Method.GET, load_url) 1193 | content = json.loads(content.decode("utf-8")) 1194 | if "jobState" not in content: 1195 | raise KeyError( 1196 | "The field 'jobState' is missing from the load progress response content" 1197 | ) 1198 | return LoadState(content["jobState"]) 1199 | 1200 | async def _read_page( 1201 | self, 1202 | worker_host, 1203 | path_id: str, 1204 | page_index: int, 1205 | offset=None, 1206 | length=None, 1207 | ): 1208 | if (offset is None) != (length is None): 1209 | raise ValueError( 1210 | "Both offset and length should be either None or both not None" 1211 | ) 1212 | 1213 | if offset is None: 1214 | page_url = FULL_PAGE_URL_FORMAT.format( 1215 | worker_host=worker_host, 1216 | http_port=self.http_port, 1217 | path_id=path_id, 1218 | page_index=page_index, 1219 | ) 1220 | else: 1221 | page_url = PAGE_URL_FORMAT.format( 1222 | worker_host=worker_host, 1223 | http_port=self.http_port, 1224 | path_id=path_id, 1225 | page_index=page_index, 1226 | page_offset=offset, 1227 | page_length=length, 1228 | ) 1229 | 1230 | _, content = await self._request(Method.GET, page_url) 1231 | return content 1232 | 1233 | def _get_path_hash(self, uri: str): 1234 | hash_functions = [ 1235 | hashlib.sha256, 1236 | hashlib.md5, 1237 | lambda x: hex(hash(x))[2:].lower(), # Fallback to simple hashCode 1238 | ] 1239 | for hash_function in hash_functions: 1240 | try: 1241 | hash_obj = hash_function() 1242 | hash_obj.update(uri.encode("utf-8")) 1243 | return hash_obj.hexdigest().lower() 1244 | except AttributeError: 1245 | continue 1246 | 1247 | def _get_preferred_worker_host(self, full_ufs_path: str): 1248 | workers = self.hash_provider.get_multiple_workers(full_ufs_path, 1) 1249 | if len(workers) != 1: 1250 | raise ValueError( 1251 | "Expected exactly one worker from hash ring, but found {} workers {}.".format( 1252 | len(workers), workers 1253 | ) 1254 | ) 1255 | return workers[0].host 1256 | 1257 | def _validate_path(self, path: str): 1258 | if not isinstance(path, str): 1259 | raise TypeError("path must be a string") 1260 | 1261 | if not re.search(r"^[a-zA-Z0-9]+://", path): 1262 | raise ValueError( 1263 | "path must be a full path with a protocol (e.g., 'protocol://path')" 1264 | ) 1265 | 1266 | async def _request( 1267 | self, 1268 | method: Method, 1269 | url: str, 1270 | *args, 1271 | params: dict = None, 1272 | headers=None, 1273 | json=None, 1274 | data=None, 1275 | ) -> tuple[int, bytes]: 1276 | await self._set_session() 1277 | async with self.session.request( 1278 | method=method.value, 1279 | url=url, 1280 | params=params, 1281 | json=json, 1282 | headers=headers, 1283 | data=data, 1284 | timeout=None, 1285 | ) as r: 1286 | status = r.status 1287 | contents = await r.read() 1288 | # validate_response(status, contents, url, args) 1289 | return status, contents 1290 | --------------------------------------------------------------------------------