├── tests ├── __init__.py └── test_api.py ├── test-requirements.txt ├── requirements.txt ├── easybase ├── pytest.ini ├── tox.ini ├── _version.py ├── __init__.py ├── util.py ├── kerberos.py ├── batch.py ├── pool.py ├── connection.py ├── table.py └── HBase.thrift ├── MANIFEST.in ├── .gitignore ├── setup.cfg ├── Makefile ├── travis_install.sh ├── .github └── workflows │ └── python-package.yml ├── setup.py ├── README.rst ├── CODE_OF_CONDUCT.md ├── DemoClient.py ├── LICENSE.rst └── coverage.xml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | coverage 3 | nose 4 | tox 5 | codecov 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | thriftpy2>=0.4.5 2 | six>=1.15.0 3 | pure-sasl>=0.6.2 4 | kerberos>=1.3.0 -------------------------------------------------------------------------------- /easybase/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | error 4 | ignore::UserWarning 5 | ignore::DeprecationWarning -------------------------------------------------------------------------------- /easybase/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27,py37 3 | 4 | [testenv] 5 | deps= 6 | nose 7 | coverage 8 | commands=nosetests -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include Makefile 2 | include requirements.txt 3 | include easybase/*.thrift 4 | include easybase/hbase/*.py 5 | include *.md *.rst -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | coverage 3 | dist 4 | build 5 | *.egg-info 6 | .coverage 7 | .vscode 8 | __pycache__/ 9 | .eggs/ 10 | py3/ 11 | py2/ 12 | .tox/ 13 | .idea -------------------------------------------------------------------------------- /easybase/_version.py: -------------------------------------------------------------------------------- 1 | """ 2 | EasyBase version module. 3 | 4 | This module defines the package version for use in __init__.py and 5 | setup.py. 6 | """ 7 | 8 | __version__ = '1.1.3' 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [nosetests] 2 | stop = 1 3 | verbosity = 2 4 | with-coverage = 1 5 | cover-erase = 1 6 | cover-package=easybase.connection,easybase.table,easybase.batch,easybase.pool,easybase.util,tests 7 | cover-tests = 1 8 | cover-html = 1 9 | cover-html-dir = coverage/ 10 | 11 | [bdist_wheel] 12 | universal = 1 -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all doc test clean 2 | 3 | all: doc clean 4 | 5 | doc: 6 | python setup.py build_sphinx 7 | @echo 8 | @echo Generated documentation: "file://"$$(readlink -f doc/build/html/index.html) 9 | @echo 10 | 11 | test: 12 | -find coverage/ -mindepth 1 -delete 13 | python $$(which nosetests) $${TESTS} 14 | 15 | clean: 16 | find . -name '*.py[co]' -delete 17 | $$(rm -rf dist) 18 | dist: 19 | python setup.py bdist_wheel sdist 20 | 21 | upload: 22 | python -m twine upload dist/* -------------------------------------------------------------------------------- /easybase/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | EasyBase, a developer-friendly Python library to interact with Apache 3 | HBase. Support Time-Range scan and multi-version access 4 | """ 5 | # using thriftpy2 server as thrift bridge directly 6 | from pkg_resources import resource_filename 7 | import thriftpy2 8 | 9 | thriftpy2.load( 10 | resource_filename('easybase', 'HBase.thrift'), 11 | module_name='HBase_thrift' 12 | ) 13 | 14 | from .connection import DEFAULT_HOST, DEFAULT_PORT, Connection 15 | from .table import Table 16 | from .pool import ConnectionPool, NoConnectionsAvailable 17 | -------------------------------------------------------------------------------- /travis_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eux 2 | sudo apt-get update 3 | sudo apt-get -q install -y wget openjdk-8-jdk-headless 4 | #sudo update-java-alternatives -s java-8-oracle 5 | 6 | # download and install hbase 2.2.x 7 | ver='2.2.5' 8 | tarball="hbase-${ver}-bin.tar.gz" 9 | wget -q -O /tmp/${tarball} https://archive.apache.org/dist/hbase/${ver}/${tarball} 10 | 11 | tar -xzf /tmp/${tarball} -C /var/tmp/ 12 | 13 | #configuration 14 | cd /var/tmp/hbase-${ver} 15 | 16 | cat <conf/hbase-site.xml 17 | 18 | 19 | 20 | 21 | hbase.rootdir 22 | file:///var/tmp/hbase 23 | 24 | 25 | hbase.zookeeper.property.dataDir 26 | /var/tmp/zookeeper 27 | 28 | 29 | hbase.unsafe.stream.capability.enforce 30 | false 31 | 32 | 33 | EOF 34 | 35 | export HBASE_HOME=/var/tmp/hbase-${ver} 36 | export PATH=$PATH:$HBASE_HOME/bin 37 | 38 | #start hbase server and thriftserver 39 | bin/start-hbase.sh 40 | sleep 2 41 | bin/hbase-daemon.sh start thrift2 42 | sleep 2 -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: ['2.7.18', '3.x'] 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | sudo apt-get update && sudo apt-get install -y gcc krb5-config libkrb5-dev 29 | python -m pip install --upgrade pip 30 | pip install flake8 pytest 31 | if [ -f test-requirements.txt ]; then pip install -r test-requirements.txt; fi 32 | - name: Install HBase 33 | run: | 34 | bash ./travis_install.sh 35 | - name: Lint with flake8 36 | run: | 37 | # stop the build if there are Python syntax errors or undefined names 38 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 39 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 40 | flake8 . --count --ignore F401,E401,E402 --exit-zero --max-line-length=140 --statistics 41 | - name: Test with pytest 42 | run: | 43 | pytest --disable-warnings 44 | 45 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os.path import join, dirname 2 | from setuptools import find_packages, setup 3 | 4 | __version__ = None 5 | exec(open('easybase/_version.py', 'r').read()) 6 | 7 | 8 | def get_file_contents(filename): 9 | with open(join(dirname(__file__), filename)) as fp: 10 | return fp.read() 11 | 12 | 13 | def get_install_requires(): 14 | requirements = get_file_contents('requirements.txt') 15 | install_requires = [] 16 | for line in requirements.split('\n'): 17 | line = line.strip() 18 | if line and not line.startswith('-'): 19 | install_requires.append(line) 20 | return install_requires 21 | 22 | 23 | setup( 24 | name='easybase', 25 | version=__version__, 26 | description="""Python/Python3 library to interact with Apache HBase,support HBase 2.0, time-range scan and HBase thrift 2 procotol. """, 27 | long_description=get_file_contents('README.rst'), 28 | author="wgzhao", 29 | author_email="wgzhao@gmail.com", 30 | url='https://github.com/wgzhao/easybase', 31 | install_requires=get_install_requires(), 32 | keywords="HBase,easybase,thrift2", 33 | packages=find_packages(exclude=['tests']), 34 | include_package_data=True, 35 | license="MIT", 36 | classifiers=[ 37 | "Development Status :: 5 - Production/Stable", 38 | "Intended Audience :: Developers", 39 | "License :: OSI Approved :: MIT License", 40 | "Programming Language :: Python :: 2", 41 | 'Programming Language :: Python :: 2.7', 42 | "Programming Language :: Python :: 3", 43 | 'Programming Language :: Python :: 3.6', 44 | 'Programming Language :: Python :: 3.7', 45 | 'Programming Language :: Python :: 3.8', 46 | "Topic :: Database", 47 | "Topic :: Software Development :: Libraries :: Python Modules", 48 | ] 49 | ) 50 | -------------------------------------------------------------------------------- /easybase/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | EasyBase utility module. 3 | 4 | These functions are not part of the public API. 5 | """ 6 | 7 | import re 8 | from six.moves import xrange 9 | 10 | CAPITALS = re.compile('([A-Z])') 11 | 12 | 13 | try: 14 | # Python 2.7 15 | from collections import OrderedDict 16 | except ImportError: 17 | try: 18 | # External package for Python 2.6 19 | from ordereddict import OrderedDict 20 | except ImportError: 21 | # Stub to throw errors at run-time (not import time) 22 | def OrderedDict(*args, **kwargs): 23 | raise RuntimeError( 24 | "No OrderedDict implementation available; please " 25 | "install the 'ordereddict' Package from PyPI.") 26 | 27 | 28 | def camel_case_to_pep8(name): 29 | """Convert a camel cased name to PEP8 style.""" 30 | converted = CAPITALS.sub(lambda m: '_' + m.groups()[0].lower(), name) 31 | if converted[0] == '_': 32 | return converted[1:] 33 | else: 34 | return converted 35 | 36 | 37 | def pep8_to_camel_case(name, initial=False): 38 | """Convert a PEP8 style name to camel case.""" 39 | chunks = name.split('_') 40 | converted = [s[0].upper() + s[1:].lower() for s in chunks] 41 | if initial: 42 | return ''.join(converted) 43 | else: 44 | return chunks[0].lower() + ''.join(converted[1:]) 45 | 46 | 47 | def thrift_attrs(obj_or_cls): 48 | """Obtain Thrift data type attribute names for an instance or class.""" 49 | return [v[1] for v in obj_or_cls.thrift_spec.values()] 50 | 51 | 52 | def thrift_type_to_dict(obj): 53 | """Convert a Thrift data type to a regular dictionary.""" 54 | return dict((camel_case_to_pep8(attr), getattr(obj, attr)) 55 | for attr in thrift_attrs(obj)) 56 | 57 | 58 | def str_increment(s): 59 | """Increment and truncate a byte string (for sorting purposes) 60 | 61 | This functions returns the shortest string that sorts after the given 62 | string when compared using regular string comparison semantics. 63 | 64 | This function increments the last byte that is smaller than ``0xFF``, and 65 | drops everything after it. If the string only contains ``0xFF`` bytes, 66 | `None` is returned. 67 | """ 68 | for i in xrange(len(s) - 1, -1, -1): 69 | if s[i] != '\xff': 70 | return s[:i] + chr(ord(s[i]) + 1) 71 | 72 | return None 73 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | EasyBase 2 | ======== 3 | 4 | .. image:: https://github.com/wgzhao/easybase/actions/workflows/python-package.yml/badge.svg 5 | :target: https://github.com/wgzhao/easybase 6 | 7 | .. image:: https://img.shields.io/pypi/dm/easybase.svg 8 | :target: https://pypi.org/project/easybase/ 9 | 10 | .. image:: https://img.shields.io/pypi/v/easybase.svg 11 | :target: https://pypi.org/project/easybase/ 12 | 13 | .. image:: https://img.shields.io/pypi/pyversions/easybase.svg 14 | :target: https://pypi.org/project/easybase/ 15 | 16 | .. image:: https://img.shields.io/pypi/implementation/easybase.svg 17 | :target: https://pypi.org/project/easybase/ 18 | 19 | 20 | **EasyBase** is a developer-friendly Python library to interact with 21 | `Apache HBase `__ . The original source code 22 | forked from `HappyBase `__. 23 | 24 | 25 | Feature highlight 26 | ================= 27 | 28 | - easy using 29 | 30 | - support HBase Thrift 2 protocol(HBase Thrift 1 is NO longer supported) 31 | 32 | - using `thriftpy2 `__ instead of 33 | old thriftpy 34 | 35 | 36 | Installation 37 | ============ 38 | 39 | .. code:: shell 40 | 41 | pip install easybase 42 | 43 | 44 | Usage 45 | ===== 46 | 47 | 48 | Connect 49 | ------- 50 | 51 | .. code:: python 52 | 53 | import easybase 54 | host, port = 'localhost', 9000 55 | tbl = 'test1' 56 | conn = easybase.Connection(host=host, port=port) 57 | table = conn.table(tbl) 58 | rs = conn.scan(limit=10) 59 | for row in rs: 60 | print(row) 61 | 62 | 63 | Create Table 64 | ------------ 65 | 66 | .. code:: python 67 | 68 | table_def = {'cf1':dict(), 69 | 'cf2':{'max_versions':2000}} 70 | conn.create_table('test1', table_def) 71 | 72 | 73 | Write row to table 74 | ------------------ 75 | 76 | .. code:: python 77 | 78 | puts = {'cf1:c1': 'v1', 79 | 'cf1:c2': 'v2' 80 | 'cf2:c2': 'v3'} 81 | tbl = conn.table('test1') 82 | tbl.put(row='rk1', puts) 83 | 84 | 85 | Get row from table 86 | ------------------ 87 | 88 | .. code:: python 89 | 90 | rk = 'rk1' 91 | tbl = conn.table('test1') 92 | rs = tbl.row(rk) 93 | 94 | 95 | Scan rows 96 | ---------- 97 | 98 | .. code:: python 99 | 100 | tbl = conn.table('test1') 101 | scanner = tbl.scan(row_start='rk_0001', row_stop='rk_0100') 102 | for row in scanner: 103 | print(row) 104 | 105 | Get all namespace 106 | ------------------ 107 | 108 | .. code:: python 109 | 110 | for ns in conn.list_namespaces(): 111 | print(ns) 112 | 113 | You can get detail in 114 | `DemoClient.py `__ 115 | 116 | 117 | License 118 | ================= 119 | MIT License ``_. 120 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at wgzhao@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /easybase/kerberos.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from struct import pack, unpack 3 | 4 | import six 5 | from thriftpy2.transport import TTransportBase, TTransportException 6 | try: 7 | from thriftpy2.transport import readall 8 | except ImportError: 9 | from thriftpy2.transport.base import readall 10 | from puresasl.client import SASLClient 11 | 12 | 13 | class TSaslClientTransport(TTransportBase): 14 | """ 15 | SASL transport 16 | """ 17 | 18 | START = 1 19 | OK = 2 20 | BAD = 3 21 | ERROR = 4 22 | COMPLETE = 5 23 | 24 | def __init__(self, transport, host, service, mechanism=six.u('GSSAPI'), 25 | **sasl_kwargs): 26 | """ 27 | transport: an underlying transport to use, typically just a TSocket 28 | host: the name of the server, from a SASL perspective 29 | service: the name of the server's service, from a SASL perspective 30 | mechanism: the name of the preferred mechanism to use 31 | All other kwargs will be passed to the puresasl.client.SASLClient 32 | constructor. 33 | """ 34 | 35 | self.transport = transport 36 | 37 | # if six.PY3: 38 | # self._patch_pure_sasl() 39 | self.sasl = SASLClient(host, service, mechanism, **sasl_kwargs) 40 | 41 | self.__wbuf = BytesIO() 42 | self.__rbuf = BytesIO() 43 | 44 | # def _patch_pure_sasl(self): 45 | # ''' we need to patch pure_sasl to support python 3 ''' 46 | # puresasl.mechanisms.mechanisms['GSSAPI'] = CustomGSSAPIMechanism 47 | 48 | def is_open(self): 49 | return self.transport.is_open() and bool(self.sasl) 50 | 51 | def open(self): 52 | if not self.transport.is_open(): 53 | self.transport.open() 54 | 55 | self.send_sasl_msg(self.START, self.sasl.mechanism.encode('utf8')) 56 | self.send_sasl_msg(self.OK, self.sasl.process()) 57 | 58 | while True: 59 | status, challenge = self.recv_sasl_msg() 60 | if status == self.OK: 61 | self.send_sasl_msg(self.OK, self.sasl.process(challenge)) 62 | elif status == self.COMPLETE: 63 | if not self.sasl.complete: 64 | raise TTransportException( 65 | TTransportException.NOT_OPEN, 66 | "The server erroneously indicated " 67 | "that SASL negotiation was complete") 68 | else: 69 | break 70 | else: 71 | raise TTransportException( 72 | TTransportException.NOT_OPEN, 73 | "Bad SASL negotiation status: %d (%s)" 74 | % (status, challenge)) 75 | 76 | def send_sasl_msg(self, status, body): 77 | ''' 78 | body:bytes 79 | ''' 80 | header = pack(">BI", status, len(body)) 81 | self.transport.write(header + body) 82 | self.transport.flush() 83 | 84 | def recv_sasl_msg(self): 85 | header = readall(self.transport.read, 5) 86 | status, length = unpack(">BI", header) 87 | if length > 0: 88 | payload = readall(self.transport.read, length) 89 | else: 90 | payload = "" 91 | return status, payload 92 | 93 | def write(self, data): 94 | self.__wbuf.write(data) 95 | 96 | def flush(self): 97 | data = self.__wbuf.getvalue() 98 | encoded = self.sasl.wrap(data) 99 | if six.PY2: 100 | self.transport.write(''.join([ 101 | pack("!i", len(encoded)), 102 | encoded 103 | ]) 104 | ) 105 | else: 106 | self.transport.write(b''.join((pack("!i", len(encoded)), encoded))) 107 | self.transport.flush() 108 | self.__wbuf = BytesIO() 109 | 110 | def read(self, sz): 111 | ret = self.__rbuf.read(sz) 112 | if len(ret) != 0 or sz == 0: 113 | return ret 114 | 115 | self._read_frame() 116 | return self.__rbuf.read(sz) 117 | 118 | def _read_frame(self): 119 | header = readall(self.transport.read, 4) 120 | length, = unpack('!i', header) 121 | encoded = readall(self.transport.read, length) 122 | self.__rbuf = BytesIO(self.sasl.unwrap(encoded)) 123 | 124 | def close(self): 125 | self.sasl.dispose() 126 | self.transport.close() 127 | 128 | def get_transport(self, trans): 129 | return self.transport(trans) 130 | -------------------------------------------------------------------------------- /easybase/batch.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import logging 3 | from numbers import Integral 4 | 5 | import six 6 | 7 | from HBase_thrift import BatchMutation, Mutation 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class Batch(object): 13 | """Batch mutation class. 14 | 15 | This class cannot be instantiated directly; use :py:meth:`Table.batch` 16 | instead. 17 | """ 18 | 19 | def __init__(self, table, timestamp=None, batch_size=None, 20 | transaction=False, wal=True): 21 | # type: (str, str, int, bool, bool) -> None 22 | """Initialise a new Batch instance.""" 23 | if not (timestamp is None or isinstance(timestamp, Integral)): 24 | raise TypeError("'timestamp' must be an integer or None") 25 | 26 | if batch_size is not None: 27 | if transaction: 28 | raise TypeError("'transaction' cannot be used when " 29 | "'batch_size' is specified") 30 | if not batch_size > 0: 31 | raise ValueError("'batch_size' must be > 0") 32 | 33 | self._table = table 34 | self._batch_size = batch_size 35 | self._timestamp = timestamp 36 | self._transaction = transaction 37 | self._wal = wal 38 | self._families = None 39 | self._reset_mutations() 40 | 41 | def _reset_mutations(self): 42 | """Reset the internal mutation buffer.""" 43 | self._mutations = defaultdict(list) 44 | self._mutation_count = 0 45 | 46 | def send(self): 47 | """Send the batch to the server.""" 48 | bms = [ 49 | BatchMutation(row, m) 50 | for row, m in six.iteritems(self._mutations) 51 | ] 52 | if not bms: 53 | return 54 | 55 | logger.debug("Sending batch for '%s' (%d mutations on %d rows)", 56 | self._table.name, self._mutation_count, len(bms)) 57 | if self._timestamp is None: 58 | self._table.connection.client.mutateRows(self._table.name, bms, {}) 59 | else: 60 | self._table.connection.client.mutateRowsTs( 61 | self._table.name, bms, self._timestamp, {}) 62 | 63 | self._reset_mutations() 64 | 65 | # 66 | # Mutation methods 67 | # 68 | 69 | def put(self, row, data, wal=None): 70 | # type: (str, dict, bool) -> None 71 | """Store data in the table. 72 | 73 | See :py:meth:`Table.put` for a description of the `row`, `data`, 74 | and `wal` arguments. The `wal` argument should normally not be 75 | used; its only use is to override the batch-wide value passed to 76 | :py:meth:`Table.batch`. 77 | """ 78 | if wal is None: 79 | wal = self._wal 80 | 81 | self._mutations[row].extend( 82 | Mutation( 83 | isDelete=False, 84 | column=column, 85 | value=value, 86 | writeToWAL=wal) 87 | for column, value in six.iteritems(data)) 88 | 89 | self._mutation_count += len(data) 90 | if self._batch_size and self._mutation_count >= self._batch_size: 91 | self.send() 92 | 93 | def delete(self, row, columns=None, wal=None): 94 | # type: (str, list, bool) -> None 95 | """Delete data from the table. 96 | 97 | See :py:meth:`Table.put` for a description of the `row`, `data`, 98 | and `wal` arguments. The `wal` argument should normally not be 99 | used; its only use is to override the batch-wide value passed to 100 | :py:meth:`Table.batch`. 101 | """ 102 | # Work-around Thrift API limitation: the mutation API can only 103 | # delete specified columns, not complete rows, so just list the 104 | # column families once and cache them for later use by the same 105 | # batch instance. 106 | if columns is None: 107 | if self._families is None: 108 | self._families = self._table._column_family_names() 109 | columns = self._families 110 | 111 | if wal is None: 112 | wal = self._wal 113 | 114 | self._mutations[row].extend( 115 | Mutation(isDelete=True, column=column, writeToWAL=wal) 116 | for column in columns) 117 | 118 | self._mutation_count += len(columns) 119 | if self._batch_size and self._mutation_count >= self._batch_size: 120 | self.send() 121 | 122 | # 123 | # Context manager methods 124 | # 125 | 126 | def __enter__(self): 127 | """Called upon entering a ``with`` block""" 128 | return self 129 | 130 | def __exit__(self, exc_type, exc_value, traceback): 131 | """Called upon exiting a ``with`` block""" 132 | # If the 'with' block raises an exception, the batch will not be 133 | # sent to the server. 134 | if self._transaction and exc_type is not None: 135 | return 136 | 137 | self.send() 138 | -------------------------------------------------------------------------------- /DemoClient.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import easybase 5 | 6 | host = os.getenv('HBASE_HOST', 'localhost') 7 | port = int(os.getenv('HBASE_PORT', 9090)) 8 | compat = os.getenv('COMPAT', '2.2.0') 9 | # table_name must be exists in hbase 10 | table_name = os.getenv('HBASE_TABLE', 'easybase_test') 11 | 12 | try: 13 | conn = easybase.Connection( 14 | host, port=port, timeout=2000, use_kerberos=False) 15 | except Exception as e: 16 | raise SystemError("failed to connection {}:{}, {}".format(host, port, e)) 17 | 18 | print('list all table in current namespace') 19 | print(conn.tables()) 20 | 21 | print('list all tables in default namespace') 22 | print(conn.get_tables_by_namespace('default')) 23 | 24 | if conn.exist_table(table_name): 25 | # drop table first 26 | print("drop table {}".format(table_name)) 27 | conn.delete_table(table_name, disable=True) 28 | 29 | # create table 30 | print("create table {}".format(table_name)) 31 | conn.create_table(table_name, {'cf1': dict(), 'cf2': {'max_versions': 2000}}) 32 | 33 | print("create table {} in namespace: {}".format(table_name, 'eb_ns')) 34 | conn.create_table(table_name, {'cf1': dict()}, ns_name='eb_ns') 35 | 36 | tbl = conn.table(table_name) 37 | 38 | # first clean up table 39 | 40 | # simple put 41 | puts = {'cf1:c1': 'v1', 'cf1:c2': 'v2', 'cf2:c3': 'v3'} 42 | print("write row id = r1 ,", puts) 43 | tbl.put(row='r1', data=puts) 44 | 45 | # put with timestamp 46 | ts = 111111 47 | ts2 = 499999999999999999 48 | cnt = 10 49 | vs = 10 50 | print("try to write {} records".format(cnt * 4)) 51 | for i in range(cnt): 52 | # print("write row id = r1 ,",puts,ts + random.randint(10,100)) 53 | tbl.put(row='r1', data=puts, timestamp=ts) 54 | tbl.put(row='r1', data=puts, timestamp=ts + random.randint(10, 100)) 55 | tbl.put(row='r2', data=puts, timestamp=ts + random.randint(10, 100)) 56 | tbl.put(row='r2', data=puts, timestamp=ts2) 57 | 58 | # simple get 59 | print("get row id = r1") 60 | print(tbl.row('r1')) 61 | 62 | print("get row id = r1 with 'cf1:c1' and 'cf1:c2'") 63 | print(tbl.row('r1', columns=['cf1:c1', 'cf1:c2'])) 64 | 65 | # get with timestamp 66 | print("get row id = r1 with timestamp = %d only get 'cf1:c1' column" % ts) 67 | print(tbl.row('r1', columns=['cf1:c1'], timestamp=ts)) 68 | 69 | # get with time range 70 | print("get row id = r1 and time range between %d and %d" % (ts, ts2)) 71 | result = tbl.row('r1', columns=['cf1:c1'], timerange=[ 72 | ts - 30, ts2], max_versions=vs) 73 | print("get {} records".format(len(result))) 74 | print(result) 75 | 76 | # scan rows with time range 77 | print("scan with time range from %d to %d and limit %d " % (ts, ts2, cnt)) 78 | result = tbl.scan(timerange=[ts - 50, ts2], limit=cnt, max_versions=vs) 79 | cnt = 0 80 | for rs in result: 81 | print(rs) 82 | cnt += 1 83 | print("total {} records".format(cnt)) 84 | 85 | # scan rows with time range additional columns 86 | print("scan with time range from %d to %d and limit %d " % (ts, ts2, cnt)) 87 | result = tbl.scan(timerange=[ts - 80, ts2], columns=['cf1:c2'], limit=cnt) 88 | 89 | for rs in result: 90 | print(rs) 91 | 92 | print("delete row id = r1 with timestapm = %d" % ts) 93 | tbl.delete('r1', timestamp=ts) 94 | 95 | print("delete row id = r2 and ['cf1:c1']") 96 | tbl.delete('r2', ['cf1:c2']) 97 | 98 | # version test 99 | print("multi-version test") 100 | tbl.put('r10', puts, timestamp=111) 101 | tbl.put('r10', puts, timestamp=222) 102 | 103 | # get 1 record by default 104 | rs = tbl.row('r10', columns=['cf2:c3'], include_timestamp=True) 105 | 106 | assert len(rs['cf2:c3']) == 1 and rs['cf2:c3'][0][1] == 222 107 | print(rs) 108 | 109 | # get 2 record by specify version 110 | rs = tbl.row('r10', columns=['cf2:c3'], max_versions=10) 111 | assert len(rs['cf2:c3']) == 2 112 | print(rs) 113 | 114 | # version-based scan 115 | print("scan with versions") 116 | rs = tbl.scan(row_start='r10', row_stop='r10', 117 | columns=['cf2:c3'], max_versions=10) 118 | 119 | cnt = 0 120 | for row in rs: 121 | for item in row[1]['cf2:c3']: 122 | print("value: {}, timestamp: {}".format(*item)) 123 | cnt += 1 124 | 125 | print("should be retrieved 2 records, actually get {} records".format(cnt)) 126 | 127 | print("test namespace operator") 128 | ns_name = 'eb_ns' 129 | print('create namespace: {}'.format(ns_name)) 130 | conn.create_namespace(ns_name) 131 | print('get namespace: {}'.format(ns_name)) 132 | res = conn.get_namespace(ns_name) 133 | print(res) 134 | print('delete namespace: {}'.format(ns_name)) 135 | conn.delete_namespace(ns_name, cascade=True) 136 | 137 | print('list all namespaces') 138 | print(conn.list_namespaces()) 139 | 140 | print('search table with regex') 141 | print('create table tbl1,tbl2...tbl10') 142 | for i in range(1, 11): 143 | tbl = 'tbl{}'.format(i) 144 | if not conn.exist_table(tbl): 145 | conn.create_table(tbl, {'cf1': dict()}) 146 | 147 | print('search table which starts with tbl') 148 | tbls = conn.search_table('tbl.*', include_systable=False) 149 | print(tbls) 150 | 151 | conn.close() 152 | 153 | print("test connection pool") 154 | 155 | pool = easybase.ConnectionPool(size=5, host=host, port=port, use_kerberos=True) 156 | 157 | with pool.connection() as connect: 158 | tbl = connect.table(table_name) 159 | tbl.put(row='r4', data=puts, timestamp=ts) 160 | print(tbl.row(row='r4')) 161 | tbl.put(row='r5', data=puts, timestamp=ts) 162 | print(tbl.row(row='r5')) 163 | rs = tbl.scan(row_start='r1', row_stop='r2') 164 | for row in rs: 165 | print(row) 166 | 167 | # try to connect secondly with kerberos 168 | with pool.connection() as connect: 169 | tbl = connect.table(table_name) 170 | tbl.put(row='r6', data=puts, timestamp=ts) 171 | print(tbl.row(row='r6')) 172 | tbl.put(row='r7', data=puts, timestamp=ts) 173 | print(tbl.row(row='r7')) 174 | rs = tbl.scan(row_start='r1', row_stop='r3') 175 | for row in rs: 176 | print(row) 177 | 178 | connect.delete_table(table_name, disable=True) 179 | -------------------------------------------------------------------------------- /easybase/pool.py: -------------------------------------------------------------------------------- 1 | """ 2 | EasyBase connection pool module. 3 | """ 4 | 5 | import contextlib 6 | import logging 7 | from six.moves import queue, xrange 8 | import socket 9 | import threading 10 | 11 | from thriftpy2.thrift import TException 12 | 13 | from .connection import Connection 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | # 19 | # TODO: maybe support multiple Thrift servers. What would a reasonable 20 | # distribution look like? Round-robin? Randomize the list upon 21 | # instantiation and then cycle through it? How to handle (temporary?) 22 | # connection errors? 23 | # 24 | 25 | 26 | class NoConnectionsAvailable(RuntimeError): 27 | """ 28 | Exception raised when no connections are available. 29 | 30 | This happens if a timeout was specified when obtaining a connection, 31 | and no connection became available within the specified timeout. 32 | 33 | .. versionadded:: 0.5 34 | """ 35 | pass 36 | 37 | 38 | class ConnectionPool(object): 39 | """ 40 | Thread-safe connection pool. 41 | 42 | .. versionadded:: 0.5 43 | 44 | The `size` argument specifies how many connections this pool 45 | manages. Additional keyword arguments are passed unmodified to the 46 | :py:class:`easybase.Connection` constructor, with the exception of 47 | the `autoconnect` argument, since maintaining connections is the 48 | task of the pool. 49 | 50 | :param int size: the maximum number of concurrently open connections 51 | :param kwargs: keyword arguments passed to 52 | :py:class:`easybase.Connection` 53 | """ 54 | 55 | def __init__(self, size, **kwargs): 56 | if not isinstance(size, int): 57 | raise TypeError("Pool 'size' arg must be an integer") 58 | 59 | if not size > 0: 60 | raise ValueError("Pool 'size' arg must be greater than zero") 61 | 62 | logger.debug( 63 | "Initializing connection pool with %d connections", size) 64 | 65 | self._lock = threading.Lock() 66 | self._queue = queue.LifoQueue(maxsize=size) 67 | self._thread_connections = threading.local() 68 | 69 | connection_kwargs = kwargs 70 | connection_kwargs['autoconnect'] = False 71 | 72 | for i in xrange(size): 73 | connection = Connection(**connection_kwargs) 74 | self._queue.put(connection) 75 | 76 | # The first connection is made immediately so that trivial 77 | # mistakes like unresolvable host names are raised immediately. 78 | # Subsequent connections are connected lazily. 79 | with self.connection(): 80 | pass 81 | 82 | def _acquire_connection(self, timeout=None): 83 | """Acquire a connection from the pool.""" 84 | try: 85 | return self._queue.get(True, timeout) 86 | except queue.Empty: 87 | raise NoConnectionsAvailable( 88 | "No connection available from pool within specified " 89 | "timeout") 90 | 91 | def _return_connection(self, connection): 92 | """Return a connection to the pool.""" 93 | self._queue.put(connection) 94 | 95 | @contextlib.contextmanager 96 | def connection(self, timeout=None): 97 | """ 98 | Obtain a connection from the pool. 99 | 100 | This method *must* be used as a context manager, i.e. with 101 | Python's ``with`` block. Example:: 102 | 103 | with pool.connection() as connection: 104 | pass # do something with the connection 105 | 106 | If `timeout` is specified, this is the number of seconds to wait 107 | for a connection to become available before 108 | :py:exc:`NoConnectionsAvailable` is raised. If omitted, this 109 | method waits forever for a connection to become available. 110 | 111 | :param int timeout: number of seconds to wait (optional) 112 | :return: active connection from the pool 113 | :rtype: :py:class:`easybase.Connection` 114 | """ 115 | 116 | connection = getattr(self._thread_connections, 'current', None) 117 | 118 | return_after_use = False 119 | if connection is None: 120 | # This is the outermost connection requests for this thread. 121 | # Obtain a new connection from the pool and keep a reference 122 | # in a thread local so that nested connection requests from 123 | # the same thread can return the same connection instance. 124 | # 125 | # Note: this code acquires a lock before assigning to the 126 | # thread local; see 127 | # http://emptysquare.net/blog/another-thing-about-pythons- 128 | # threadlocals/ 129 | return_after_use = True 130 | connection = self._acquire_connection(timeout) 131 | with self._lock: 132 | self._thread_connections.current = connection 133 | 134 | try: 135 | # Open connection, because connections are opened lazily. 136 | # This is a no-op for connections that are already open. 137 | connection.open() 138 | 139 | # Return value from the context manager's __enter__() 140 | yield connection 141 | 142 | except (TException, socket.error): 143 | # Refresh the underlying Thrift client if an exception 144 | # occurred in the Thrift layer, since we don't know whether 145 | # the connection is still usable. 146 | logger.info("Replacing tainted pool connection") 147 | connection._refresh_thrift_client() 148 | connection.open() 149 | 150 | # Reraise to caller; see contextlib.contextmanager() docs 151 | raise 152 | 153 | finally: 154 | # Remove thread local reference after the outermost 'with' 155 | # block ends. Afterwards the thread no longer owns the 156 | # connection. 157 | if return_after_use: 158 | del self._thread_connections.current 159 | self._return_connection(connection) 160 | -------------------------------------------------------------------------------- /LICENSE.rst: -------------------------------------------------------------------------------- 1 | ******* 2 | License 3 | ******* 4 | 5 | EasyBase itself is licensed under a `MIT License 6 | `_. EasyBase contains code originating 7 | from HBase sources, licensed under the `Apache License 8 | `_ (version 2.0). Both license texts are 9 | included below. 10 | 11 | 12 | EasyBase License 13 | ================= 14 | 15 | (This is the `MIT License `_.) 16 | 17 | Copyright © 2012 Wouter Bolsterlee 18 | 19 | Permission is hereby granted, free of charge, to any person obtaining a copy of 20 | this software and associated documentation files (the "Software"), to deal in 21 | the Software without restriction, including without limitation the rights to 22 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 23 | of the Software, and to permit persons to whom the Software is furnished to do 24 | so, subject to the following conditions: 25 | 26 | The above copyright notice and this permission notice shall be included in all 27 | copies or substantial portions of the Software. 28 | 29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 31 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 32 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 33 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 34 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 | SOFTWARE. 36 | 37 | 38 | 39 | HBase License 40 | ============= 41 | 42 | (This is the `Apache License `_, version 2.0, 43 | January 2004.) 44 | 45 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 46 | 47 | 1. Definitions. 48 | 49 | "License" shall mean the terms and conditions for use, reproduction, 50 | and distribution as defined by Sections 1 through 9 of this document. 51 | 52 | "Licensor" shall mean the copyright owner or entity authorized by 53 | the copyright owner that is granting the License. 54 | 55 | "Legal Entity" shall mean the union of the acting entity and all 56 | other entities that control, are controlled by, or are under common 57 | control with that entity. For the purposes of this definition, 58 | "control" means (i) the power, direct or indirect, to cause the 59 | direction or management of such entity, whether by contract or 60 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 61 | outstanding shares, or (iii) beneficial ownership of such entity. 62 | 63 | "You" (or "Your") shall mean an individual or Legal Entity 64 | exercising permissions granted by this License. 65 | 66 | "Source" form shall mean the preferred form for making modifications, 67 | including but not limited to software source code, documentation 68 | source, and configuration files. 69 | 70 | "Object" form shall mean any form resulting from mechanical 71 | transformation or translation of a Source form, including but 72 | not limited to compiled object code, generated documentation, 73 | and conversions to other media types. 74 | 75 | "Work" shall mean the work of authorship, whether in Source or 76 | Object form, made available under the License, as indicated by a 77 | copyright notice that is included in or attached to the work 78 | (an example is provided in the Appendix below). 79 | 80 | "Derivative Works" shall mean any work, whether in Source or Object 81 | form, that is based on (or derived from) the Work and for which the 82 | editorial revisions, annotations, elaborations, or other modifications 83 | represent, as a whole, an original work of authorship. For the purposes 84 | of this License, Derivative Works shall not include works that remain 85 | separable from, or merely link (or bind by name) to the interfaces of, 86 | the Work and Derivative Works thereof. 87 | 88 | "Contribution" shall mean any work of authorship, including 89 | the original version of the Work and any modifications or additions 90 | to that Work or Derivative Works thereof, that is intentionally 91 | submitted to Licensor for inclusion in the Work by the copyright owner 92 | or by an individual or Legal Entity authorized to submit on behalf of 93 | the copyright owner. For the purposes of this definition, "submitted" 94 | means any form of electronic, verbal, or written communication sent 95 | to the Licensor or its representatives, including but not limited to 96 | communication on electronic mailing lists, source code control systems, 97 | and issue tracking systems that are managed by, or on behalf of, the 98 | Licensor for the purpose of discussing and improving the Work, but 99 | excluding communication that is conspicuously marked or otherwise 100 | designated in writing by the copyright owner as "Not a Contribution." 101 | 102 | "Contributor" shall mean Licensor and any individual or Legal Entity 103 | on behalf of whom a Contribution has been received by Licensor and 104 | subsequently incorporated within the Work. 105 | 106 | 2. Grant of Copyright License. Subject to the terms and conditions of 107 | this License, each Contributor hereby grants to You a perpetual, 108 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 109 | copyright license to reproduce, prepare Derivative Works of, 110 | publicly display, publicly perform, sublicense, and distribute the 111 | Work and such Derivative Works in Source or Object form. 112 | 113 | 3. Grant of Patent License. Subject to the terms and conditions of 114 | this License, each Contributor hereby grants to You a perpetual, 115 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 116 | (except as stated in this section) patent license to make, have made, 117 | use, offer to sell, sell, import, and otherwise transfer the Work, 118 | where such license applies only to those patent claims licensable 119 | by such Contributor that are necessarily infringed by their 120 | Contribution(s) alone or by combination of their Contribution(s) 121 | with the Work to which such Contribution(s) was submitted. If You 122 | institute patent litigation against any entity (including a 123 | cross-claim or counterclaim in a lawsuit) alleging that the Work 124 | or a Contribution incorporated within the Work constitutes direct 125 | or contributory patent infringement, then any patent licenses 126 | granted to You under this License for that Work shall terminate 127 | as of the date such litigation is filed. 128 | 129 | 4. Redistribution. You may reproduce and distribute copies of the 130 | Work or Derivative Works thereof in any medium, with or without 131 | modifications, and in Source or Object form, provided that You 132 | meet the following conditions: 133 | 134 | (a) You must give any other recipients of the Work or 135 | Derivative Works a copy of this License; and 136 | 137 | (b) You must cause any modified files to carry prominent notices 138 | stating that You changed the files; and 139 | 140 | (c) You must retain, in the Source form of any Derivative Works 141 | that You distribute, all copyright, patent, trademark, and 142 | attribution notices from the Source form of the Work, 143 | excluding those notices that do not pertain to any part of 144 | the Derivative Works; and 145 | 146 | (d) If the Work includes a "NOTICE" text file as part of its 147 | distribution, then any Derivative Works that You distribute must 148 | include a readable copy of the attribution notices contained 149 | within such NOTICE file, excluding those notices that do not 150 | pertain to any part of the Derivative Works, in at least one 151 | of the following places: within a NOTICE text file distributed 152 | as part of the Derivative Works; within the Source form or 153 | documentation, if provided along with the Derivative Works; or, 154 | within a display generated by the Derivative Works, if and 155 | wherever such third-party notices normally appear. The contents 156 | of the NOTICE file are for informational purposes only and 157 | do not modify the License. You may add Your own attribution 158 | notices within Derivative Works that You distribute, alongside 159 | or as an addendum to the NOTICE text from the Work, provided 160 | that such additional attribution notices cannot be construed 161 | as modifying the License. 162 | 163 | You may add Your own copyright statement to Your modifications and 164 | may provide additional or different license terms and conditions 165 | for use, reproduction, or distribution of Your modifications, or 166 | for any such Derivative Works as a whole, provided Your use, 167 | reproduction, and distribution of the Work otherwise complies with 168 | the conditions stated in this License. 169 | 170 | 5. Submission of Contributions. Unless You explicitly state otherwise, 171 | any Contribution intentionally submitted for inclusion in the Work 172 | by You to the Licensor shall be under the terms and conditions of 173 | this License, without any additional terms or conditions. 174 | Notwithstanding the above, nothing herein shall supersede or modify 175 | the terms of any separate license agreement you may have executed 176 | with Licensor regarding such Contributions. 177 | 178 | 6. Trademarks. This License does not grant permission to use the trade 179 | names, trademarks, service marks, or product names of the Licensor, 180 | except as required for reasonable and customary use in describing the 181 | origin of the Work and reproducing the content of the NOTICE file. 182 | 183 | 7. Disclaimer of Warranty. Unless required by applicable law or 184 | agreed to in writing, Licensor provides the Work (and each 185 | Contributor provides its Contributions) on an "AS IS" BASIS, 186 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 187 | implied, including, without limitation, any warranties or conditions 188 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 189 | PARTICULAR PURPOSE. You are solely responsible for determining the 190 | appropriateness of using or redistributing the Work and assume any 191 | risks associated with Your exercise of permissions under this License. 192 | 193 | 8. Limitation of Liability. In no event and under no legal theory, 194 | whether in tort (including negligence), contract, or otherwise, 195 | unless required by applicable law (such as deliberate and grossly 196 | negligent acts) or agreed to in writing, shall any Contributor be 197 | liable to You for damages, including any direct, indirect, special, 198 | incidental, or consequential damages of any character arising as a 199 | result of this License or out of the use or inability to use the 200 | Work (including but not limited to damages for loss of goodwill, 201 | work stoppage, computer failure or malfunction, or any and all 202 | other commercial damages or losses), even if such Contributor 203 | has been advised of the possibility of such damages. 204 | 205 | 9. Accepting Warranty or Additional Liability. While redistributing 206 | the Work or Derivative Works thereof, You may choose to offer, 207 | and charge a fee for, acceptance of support, warranty, indemnity, 208 | or other liability obligations and/or rights consistent with this 209 | License. However, in accepting such obligations, You may act only 210 | on Your own behalf and on Your sole responsibility, not on behalf 211 | of any other Contributor, and only if You agree to indemnify, 212 | defend, and hold each Contributor harmless for any liability 213 | incurred by, or claims asserted against, such Contributor by reason 214 | of your accepting any such warranty or additional liability. 215 | 216 | END OF TERMS AND CONDITIONS 217 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | """ 2 | EasyBase tests. 3 | """ 4 | 5 | import os 6 | import random 7 | import threading 8 | 9 | from six.moves import range 10 | from six import text_type, iteritems 11 | 12 | from thriftpy2.thrift import TDecodeException 13 | 14 | from nose.tools import ( 15 | assert_dict_equal, 16 | assert_equal, 17 | assert_false, 18 | assert_not_in, 19 | assert_in, 20 | assert_is_instance, 21 | assert_is_not_none, 22 | assert_raises, 23 | assert_true, 24 | nottest, 25 | ) 26 | 27 | from easybase.connection import Connection 28 | from easybase.pool import ConnectionPool, NoConnectionsAvailable 29 | 30 | EASYBASE_HOST = os.environ.get('EASYBASE_HOST', '127.0.0.1') 31 | EASYBASE_PORT = os.environ.get('EASYBASE_PORT', 9090) 32 | EASYBASE_COMPAT = os.environ.get('EASYBASE_COMPAT', '0.98') 33 | EASYBASE_TRANSPORT = os.environ.get('EASYBASE_TRANSPORT', 'buffered') 34 | 35 | TABLE_PREFIX = 'easybase_test_tmp' 36 | TEST_TABLE_NAME = 'test1' 37 | 38 | connection_kwargs = dict(zip( 39 | ('host', 'port', 'table_prefix', 'compat', 'transport'), 40 | (EASYBASE_HOST, 41 | EASYBASE_PORT, 42 | TABLE_PREFIX, 43 | EASYBASE_COMPAT, 44 | EASYBASE_TRANSPORT 45 | ), 46 | )) 47 | 48 | connection = None 49 | tbl = None 50 | 51 | 52 | def setup_module(): 53 | global connection, tbl 54 | connection = Connection(**connection_kwargs) 55 | 56 | assert_is_not_none(connection) 57 | 58 | attempt_delete_table() 59 | 60 | cfs = { 61 | 'cf1': {}, 62 | 'cf2': None, 63 | 'cf3': {'max_versions': 10} 64 | } 65 | connection.create_table(TEST_TABLE_NAME, families=cfs) 66 | 67 | tbl = connection.table(TEST_TABLE_NAME) 68 | assert_is_not_none(tbl) 69 | 70 | 71 | def attempt_delete_table(): 72 | if connection.exist_table(TEST_TABLE_NAME): 73 | print("Test table already exists; removing it...") 74 | connection.delete_table(TEST_TABLE_NAME, disable=True) 75 | 76 | 77 | def test_tablename(): 78 | assert_equal(TABLE_PREFIX + '_' + TEST_TABLE_NAME, tbl.name) 79 | 80 | 81 | def test_connect_compat(): 82 | with assert_raises(ValueError): 83 | Connection(compat='a_invalide_version') 84 | 85 | 86 | def test_timeout_arg(): 87 | Connection( 88 | host=EASYBASE_HOST, 89 | port=EASYBASE_PORT, 90 | timeout=5000, 91 | autoconnect=False 92 | ) 93 | 94 | 95 | def test_enabling(): 96 | assert_true(connection.is_table_enabled(TEST_TABLE_NAME)) 97 | connection.disable_table(TEST_TABLE_NAME) 98 | assert_false(connection.is_table_enabled(TEST_TABLE_NAME)) 99 | connection.enable_table(TEST_TABLE_NAME) 100 | assert_true(connection.is_table_enabled(TEST_TABLE_NAME)) 101 | 102 | 103 | def test_prefix(): 104 | assert_equal(TABLE_PREFIX + '_', connection._table_name('')) 105 | assert_equal(TABLE_PREFIX + '_foo', connection._table_name('foo')) 106 | 107 | assert_equal(connection.table('foobar').name, TABLE_PREFIX + '_foobar') 108 | assert_equal(connection.table('foobar', use_prefix=False).name, 'foobar') 109 | 110 | c = Connection(EASYBASE_HOST, EASYBASE_PORT, autoconnect=False) 111 | assert_equal('foo', c._table_name('foo')) 112 | 113 | with assert_raises(TypeError): 114 | Connection(EASYBASE_HOST, EASYBASE_PORT, compat="0.98", autoconnect=False, table_prefix=111) 115 | 116 | with assert_raises(TypeError): 117 | Connection(EASYBASE_HOST, EASYBASE_PORT, compat="0.98", autoconnect=False, table_prefix=6.4) 118 | 119 | 120 | def test_families(): 121 | families = tbl.families() 122 | for name, fdesc in iteritems(families): 123 | assert_is_instance(name, text_type) 124 | assert_is_instance(fdesc, dict) 125 | # assert_is_instance(fdesc['BLOCKSIZE'], int) 126 | assert_in('VERSIONS', fdesc) 127 | 128 | 129 | @nottest 130 | def test_table_region(): 131 | regions = tbl.regions() 132 | assert_is_instance(regions, list) 133 | 134 | 135 | def test_invalid_table_create(): 136 | with assert_raises(ValueError): 137 | connection.create_table('t1', families={}) 138 | with assert_raises(TypeError): 139 | connection.create_table('t2', families=0) 140 | with assert_raises(TypeError): 141 | connection.create_table('t3', families=[]) 142 | 143 | 144 | def test_put(): 145 | tbl.put('r1', {'cf1:c1': 'v1', 'cf2:c2': 'v2'}) 146 | tbl.put('r2', {'cf1:c1': 'v2'}, timestamp=19890604) 147 | tbl.put('r3', {'cf1:c1': 'v3'}, timestamp=1568028613) 148 | 149 | assert_equal({'cf1:c1': 'v1', 'cf2:c2': 'v2'}, tbl.row('r1')) 150 | r = tbl.row('r2', include_timestamp=False) 151 | with assert_raises(IndexError): 152 | assert_raises(r['cf1:c1'][0][1]) 153 | r = tbl.row('r2', include_timestamp=True) 154 | assert_equal(19890604, r['cf1:c1'][0][1]) 155 | 156 | # cleanup 157 | tbl.delete('r1') 158 | tbl.delete('r2') 159 | tbl.delete('r3') 160 | 161 | 162 | def test_puts(): 163 | rows = {} 164 | rks = [] 165 | for i in range(100): 166 | rk = 'rk_puts_{}'.format(i) 167 | rks.append(rk) 168 | rows[rk] = {'data': {'cf1:c1': 'v1', 'cf2:c2': 'v2'}} 169 | if random.random() > 0.5: 170 | rows[rk]['wal'] = True 171 | rows[rk]['timestamp'] = random.randint(100, 1000) 172 | tbl.puts(rows) 173 | 174 | rs = tbl.rows(rks) 175 | assert_equal(100, calc_rows(rs)) 176 | 177 | # cleanup 178 | for i in range(100): 179 | tbl.delete('rk_puts_{}'.format(i)) 180 | 181 | 182 | def test_compaction(): 183 | with assert_raises(NotImplementedError): 184 | connection.compact_table(TEST_TABLE_NAME) 185 | connection.compact_table(TEST_TABLE_NAME, major=True) 186 | 187 | 188 | def test_row(): 189 | row = tbl.row 190 | put = tbl.put 191 | rk = 'rk-test' 192 | 193 | with assert_raises(TypeError): 194 | row(rk, 123) 195 | 196 | with assert_raises(TypeError): 197 | row(rk, columns='a column string') 198 | 199 | with assert_raises(TypeError): 200 | row(rk, timerange=123) 201 | 202 | put(rk, {'cf1:c1': 'v1'}, timestamp=123) 203 | put(rk, {'cf1:c1': 'v2'}, timestamp=456) 204 | put(rk, {'cf1:c2': 'v3', 205 | 'cf2:c1': 'v4'}) 206 | put(rk, {'cf2:c2': 'v5'}, timestamp=789) 207 | 208 | rs = { 209 | 'cf1:c1': 'v2', 210 | 'cf1:c2': 'v3', 211 | 'cf2:c1': 'v4', 212 | 'cf2:c2': 'v5' 213 | } 214 | 215 | assert_dict_equal(rs, row(rk, include_timestamp=False)) 216 | 217 | rs = {'cf1:c1': [('v2', 456), ], 218 | 'cf2:c2': [('v5', 789), ], 219 | } 220 | 221 | assert_dict_equal(rs, row(rk, columns=['cf1:c1', 'cf2:c2'], include_timestamp=True)) 222 | 223 | rs = { 224 | 'cf1:c1': [('v2', 456), ] 225 | } 226 | assert_dict_equal(rs, row(rk, timestamp=456, include_timestamp=True)) 227 | assert_dict_equal({}, row(rk, timestamp=111, include_timestamp=True)) 228 | 229 | # cleanup 230 | tbl.delete(rk) 231 | 232 | 233 | def test_rows(): 234 | row_keys = ['rk_1', 'rk_2', 'rk_3'] 235 | old_value = {'cf1:c1': 'v_old_c1', 'cf1:c2': 'v_old_c2'} 236 | new_value = {'cf1:c1': 'v_new_c1', 'cf1:c2': 'v_new_c2'} 237 | 238 | # with assert_raises(TypeError): 239 | # tbl.rows(row_keys, object()) 240 | 241 | with assert_raises(TDecodeException): 242 | tbl.rows(row_keys, timestamp='invalid_timestamp') 243 | 244 | for rk in row_keys: 245 | tbl.put(rk, old_value, timestamp=111) 246 | 247 | for rk in row_keys: 248 | tbl.put(rk, new_value) 249 | 250 | assert_dict_equal({}, tbl.rows([])) 251 | 252 | rows = dict(tbl.rows(row_keys)) 253 | 254 | for rk in row_keys: 255 | assert_in(rk, rows) 256 | assert_dict_equal(new_value, rows[rk]) 257 | 258 | rows = dict(tbl.rows(row_keys, timestamp=222)) 259 | assert_equal(0, len(rows)) 260 | 261 | # cleanup 262 | for rk in row_keys: 263 | tbl.delete(rk) 264 | 265 | 266 | def calc_rows(scanner): 267 | idx = 0 268 | for _ in scanner: 269 | idx += 1 270 | return idx 271 | 272 | 273 | def test_scan(): 274 | with assert_raises(TypeError): 275 | list(tbl.scan(row_prefix='foo', row_start='bar')) 276 | 277 | if connection.compat == '0.99': 278 | with assert_raises(NotImplementedError): 279 | list(tbl.scan(filter='foo')) 280 | 281 | with assert_raises(ValueError): 282 | list(tbl.scan(limit=0)) 283 | 284 | with assert_raises(ValueError): 285 | list(tbl.scan(batch_size=0)) 286 | 287 | with assert_raises(ValueError): 288 | list(tbl.scan(scan_batching=0)) 289 | 290 | # write mass rows 291 | for i in range(1000): 292 | tbl.put( 293 | 'rk_scan_{:04}'.format(i), 294 | { 295 | 'cf1:c1': 'v1', 296 | 'cf2:c2': 'v2', 297 | } 298 | ) 299 | 300 | scanner = tbl.scan(row_start='rk_scan_0010', row_stop='rk_scan_0020', columns=['cf1:c1']) 301 | assert_equal(10, calc_rows(scanner)) 302 | 303 | scanner = tbl.scan(row_start='non_exists', row_stop='end_stop') 304 | assert_equal(0, calc_rows(scanner)) 305 | 306 | scanner = tbl.scan(row_start='rk_scan_', row_stop='rk_scan_0010', columns=['cf2:c2']) 307 | 308 | rk, row = next(scanner) 309 | assert_equal(rk, 'rk_scan_0000') 310 | assert_equal(10 - 1, calc_rows(scanner)) 311 | 312 | scanner = tbl.scan(row_start='rk_scan_', row_stop='rk_scan_0100', columns=['cf2:c2'], limit=10) 313 | assert_equal(10, calc_rows(scanner)) 314 | 315 | scanner = tbl.scan(row_prefix='rk_scan_01', batch_size=10, limit=20) 316 | assert_equal(20, calc_rows(scanner)) 317 | 318 | scanner = tbl.scan(limit=20) 319 | next(scanner) 320 | next(scanner) 321 | scanner.close() 322 | 323 | with assert_raises(StopIteration): 324 | next(scanner) 325 | 326 | # cleanup 327 | for i in range(1000): 328 | tbl.delete('rk_scan_{:04}'.format(i)) 329 | 330 | 331 | def test_scan_reverse(): 332 | for i in range(1000): 333 | tbl.put( 334 | 'rk_scan_rev_{:04}'.format(i), 335 | { 336 | 'cf1:c1': 'v1', 337 | 'cf2:c2': 'v2', 338 | } 339 | ) 340 | 341 | scanner = tbl.scan(row_start='rk_scan_rev_0999', reversed=True) 342 | assert_equal(1000, calc_rows(scanner)) 343 | 344 | scanner = tbl.scan(limit=10, reversed=True) 345 | assert_equal(10, calc_rows(scanner)) 346 | 347 | scanner = tbl.scan(row_start='rk_scan_rev_0050', row_stop='rk_scan_rev_0000', reversed=True) 348 | 349 | k, _ = next(scanner) 350 | assert_equal('rk_scan_rev_0050', k) 351 | 352 | assert_equal(50 - 1, calc_rows(scanner)) 353 | 354 | # cleanup 355 | for i in range(1000): 356 | tbl.delete('rk_scan_rev_{:04}'.format(i)) 357 | 358 | 359 | def test_scan_filter(): 360 | for i in range(10): 361 | tbl.put( 362 | 'rk_filter_row_{:02}'.format(i), 363 | { 364 | 'cf1:c1': 'filter_v1', 365 | 'cf2:v2': 'v2' 366 | } 367 | ) 368 | _filter = "SingleColumnValueFilter('cf1','c1', = , 'binary:filter_v1')" 369 | scanner = tbl.scan(filter=_filter) 370 | assert_equal(10, calc_rows(scanner)) 371 | 372 | # cleanup 373 | for i in range(10): 374 | tbl.delete('rk_filter_row_{:02}'.format(i)) 375 | 376 | 377 | def test_delete(): 378 | rk = 'rk_test_del' 379 | 380 | cols = { 381 | 'cf1:c1': 'v1', 382 | 'cf1:c2': 'v2', 383 | 'cf2:c1': 'v3', 384 | } 385 | 386 | tbl.put(rk, {'cf1:c1': 'v1old'}, timestamp=123) 387 | tbl.put(rk, cols) 388 | 389 | tbl.delete(rk, timestamp=111) 390 | assert_dict_equal({'cf1:c1': 'v1'}, tbl.row(rk, columns=['cf1:c1'])) 391 | 392 | tbl.delete(rk, ['cf1:c1'], timestamp=111) 393 | assert_equal({}, tbl.row(rk, columns=['cf1:c1'], max_versions=2)) 394 | 395 | rs = tbl.row(rk) 396 | assert_not_in('cf1:c1', rs) 397 | assert_in('cf1:c2', rs) 398 | assert_in('cf2:c1', rs) 399 | 400 | tbl.delete(rk) 401 | assert_dict_equal({}, tbl.row(rk)) 402 | 403 | 404 | def test_connection_pool(): 405 | from thriftpy2.thrift import TException 406 | 407 | def run(): 408 | name = threading.current_thread().name 409 | print("Thread %s starting" % name) 410 | 411 | def inner_function(): 412 | # Nested connection requests must return the same connection 413 | with pool.connection() as another_connection: 414 | assert connection is another_connection 415 | 416 | # Fake an exception once in a while 417 | if random.random() < .25: 418 | print("Introducing random failure") 419 | # connection.transport.close() 420 | raise TException("Fake transport exception") 421 | 422 | for _ in range(50): 423 | with pool.connection() as conn: 424 | conn.table(TEST_TABLE_NAME) 425 | 426 | try: 427 | inner_function() 428 | except TException: 429 | # This error should have been picked up by the 430 | # connection pool, and the connection should have 431 | # been replaced by a fresh one 432 | pass 433 | 434 | conn.table(TEST_TABLE_NAME) 435 | 436 | print("Thread %s done" % name) 437 | 438 | N_THREADS = 10 439 | 440 | with assert_raises(TypeError): 441 | ConnectionPool(EASYBASE_HOST, EASYBASE_PORT, size=[]) 442 | 443 | with assert_raises(ValueError): 444 | ConnectionPool(host=EASYBASE_HOST, port=EASYBASE_PORT, size=0) 445 | 446 | pool = ConnectionPool(host=EASYBASE_HOST, port=EASYBASE_PORT, size=3) 447 | threads = [threading.Thread(target=run) for i in range(N_THREADS)] 448 | 449 | for t in threads: 450 | t.start() 451 | 452 | while threads: 453 | for t in threads: 454 | t.join(timeout=.1) 455 | 456 | # filter out finished threads 457 | threads = [t for t in threads if t.is_alive()] 458 | print("%d threads still alive" % len(threads)) 459 | 460 | 461 | def test_pool_exhaustion(): 462 | pool = ConnectionPool(host=EASYBASE_HOST, port=EASYBASE_PORT, size=1) 463 | 464 | def run(): 465 | with assert_raises(NoConnectionsAvailable): 466 | with pool.connection(timeout=.1) as connection: 467 | connection.table(TEST_TABLE_NAME) 468 | 469 | with pool.connection(): 470 | # At this point the only connection is assigned to this thread, 471 | # so another thread cannot obtain a connection at this point. 472 | 473 | t = threading.Thread(target=run) 474 | t.start() 475 | t.join() 476 | 477 | 478 | if __name__ == '__main__': 479 | import logging 480 | import sys 481 | 482 | try: 483 | import faulthandler 484 | except ImportError: 485 | pass 486 | else: 487 | import signal 488 | 489 | faulthandler.register(signal.SIGUSR1) 490 | 491 | logging.basicConfig(level=logging.DEBUG) 492 | setup_module() 493 | method_name = 'test_{}'.format(sys.argv[1]) 494 | method = globals()[method_name] 495 | method() 496 | -------------------------------------------------------------------------------- /easybase/connection.py: -------------------------------------------------------------------------------- 1 | # coding: UTF-8 2 | 3 | """ 4 | EasyBase connection module. 5 | """ 6 | 7 | import logging 8 | from six import iteritems, binary_type, text_type 9 | 10 | from thriftpy2.thrift import TApplicationException 11 | 12 | from thriftpy2.transport import TBufferedTransport, TFramedTransport 13 | from thriftpy2.protocol import TBinaryProtocol, TCompactProtocol 14 | from thriftpy2.rpc import make_client 15 | 16 | from HBase_thrift import TTableName, TColumnFamilyDescriptor, TTableDescriptor, TNamespaceDescriptor 17 | from HBase_thrift import THBaseService as HBase 18 | from HBase_thrift import TIOError 19 | 20 | from .kerberos import TSaslClientTransport 21 | from .table import Table 22 | from .util import pep8_to_camel_case 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | COMPAT_MODES = ('0.90', '0.92', '0.94', '0.96', '0.98', '2.2.0') 27 | THRIFT_TRANSPORTS = dict( 28 | buffered=TBufferedTransport, 29 | framed=TFramedTransport, 30 | ) 31 | THRIFT_PROTOCOLS = dict( 32 | binary=TBinaryProtocol, 33 | compact=TCompactProtocol, 34 | ) 35 | 36 | DEFAULT_HOST = 'localhost' 37 | DEFAULT_PORT = 9090 38 | DEFAULT_TRANSPORT = 'buffered' 39 | DEFAULT_COMPAT = '0.96' 40 | DEFAULT_PROTOCOL = 'binary' 41 | 42 | STRING_OR_BINARY = (binary_type, text_type) 43 | 44 | 45 | class Connection(object): 46 | """Connection to an HBase Thrift server. 47 | 48 | The `host` and `port` arguments specify the host name and TCP port 49 | of the HBase Thrift server to connect to. If omitted or ``None``, 50 | a connection to the default port on ``localhost`` is made. If 51 | specifed, the `timeout` argument specifies the socket timeout in 52 | milliseconds. 53 | 54 | If `autoconnect` is `True` (the default) the connection is made 55 | directly, otherwise :py:meth:`Connection.open` must be called 56 | explicitly before first use. 57 | 58 | The optional `table_prefix` and `table_prefix_separator` arguments 59 | specify a prefix and a separator string to be prepended to all table 60 | names, e.g. when :py:meth:`Connection.table` is invoked. For 61 | example, if `table_prefix` is ``myproject``, all tables tables will 62 | have names like ``myproject_XYZ``. 63 | 64 | The optional `compat` argument sets the compatibility level for 65 | this connection. Older HBase versions have slightly different Thrift 66 | interfaces, and using the wrong protocol can lead to crashes caused 67 | by communication errors, so make sure to use the correct one. This 68 | value can be either the string ``0.90``, ``0.92``, ``0.94``, or 69 | ``0.96`` (the default). 70 | 71 | The optional `transport` argument specifies the Thrift transport 72 | mode to use. Supported values for this argument are ``buffered`` 73 | (the default) and ``framed``. Make sure to choose the right one, 74 | since otherwise you might see non-obvious connection errors or 75 | program hangs when making a connection. HBase versions before 0.94 76 | always use the buffered transport. Starting with HBase 0.94, the 77 | Thrift server optionally uses a framed transport, depending on the 78 | argument passed to the ``hbase-daemon.sh start thrift`` command. 79 | The default ``-threadpool`` mode uses the buffered transport; the 80 | ``-hsha``, ``-nonblocking``, and ``-threadedselector`` modes use the 81 | framed transport. 82 | 83 | The optional `protocol` argument specifies the Thrift transport 84 | protocol to use. Supported values for this argument are ``binary`` 85 | (the default) and ``compact``. Make sure to choose the right one, 86 | since otherwise you might see non-obvious connection errors or 87 | program hangs when making a connection. ``TCompactProtocol`` is 88 | a more compact binary format that is typically more efficient to 89 | process as well. ``TBinaryProtocol`` is the default protocol that 90 | Happybase uses. 91 | 92 | .. versionadded:: 0.9 93 | `protocol` argument 94 | 95 | .. versionadded:: 0.5 96 | `timeout` argument 97 | 98 | .. versionadded:: 0.4 99 | `table_prefix_separator` argument 100 | 101 | .. versionadded:: 0.4 102 | support for framed Thrift transports 103 | 104 | :param str host: The host to connect to 105 | :param int port: The port to connect to 106 | :param int timeout: The socket timeout in milliseconds (optional) 107 | :param bool autoconnect: Whether the connection should be opened directly 108 | :param str table_prefix: Prefix used to construct table names (optional) 109 | :param str table_prefix_separator: Separator used for `table_prefix` 110 | :param str compat: Compatibility mode (optional) 111 | :param str transport: Thrift transport mode (optional) 112 | :param bool use_kerberos: Whether enable kerberos support or not (optional) 113 | :param str sasl_service_name: The HBase's kerberos service name, defaults to 'hbase' (optional) 114 | """ 115 | 116 | def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, timeout=None, 117 | autoconnect=True, table_prefix=None, 118 | table_prefix_separator='_', compat=DEFAULT_COMPAT, 119 | transport=DEFAULT_TRANSPORT, protocol=DEFAULT_PROTOCOL, 120 | use_kerberos=False, sasl_service_name='hbase'): 121 | # type: (str, int, int, bool, str, str, str, str, str, bool, str) -> None 122 | if transport not in THRIFT_TRANSPORTS: 123 | raise ValueError("'transport' must be one of %s" 124 | % ", ".join(THRIFT_TRANSPORTS.keys())) 125 | 126 | if table_prefix is not None \ 127 | and not isinstance(table_prefix, str): 128 | raise TypeError("'table_prefix' must be a string") 129 | 130 | if not isinstance(table_prefix_separator, str): 131 | raise TypeError("'table_prefix_separator' must be a string") 132 | 133 | if compat not in COMPAT_MODES: 134 | raise ValueError("'compat' must be one of %s" 135 | % ", ".join(COMPAT_MODES)) 136 | 137 | if protocol not in THRIFT_PROTOCOLS: 138 | raise ValueError("'protocol' must be one of %s" 139 | % ", ".join(THRIFT_PROTOCOLS)) 140 | 141 | # Allow host and port to be None, which may be easier for 142 | # applications wrapping a Connection instance. 143 | self.host = host or DEFAULT_HOST 144 | self.port = port or DEFAULT_PORT 145 | self.timeout = timeout 146 | self.table_prefix = table_prefix 147 | self.table_prefix_separator = table_prefix_separator 148 | self.compat = compat 149 | self.use_kerberos = use_kerberos 150 | self.sasl_service_name = sasl_service_name 151 | 152 | self._transport_class = THRIFT_TRANSPORTS[transport] 153 | self._protocol_class = THRIFT_PROTOCOLS[protocol] 154 | self._refresh_thrift_client() 155 | 156 | if autoconnect: 157 | self.open() 158 | 159 | self._initialized = True 160 | 161 | def _refresh_thrift_client(self): 162 | # socket = TSocket(host=self.host, port=self.port, socket_timeout=self.timeout) 163 | 164 | # self.transport = self._transport_class() 165 | # protocol = self._protocol_class(self.transport, decode_response=False) 166 | """Refresh the Thrift socket, transport, and client.""" 167 | if self.use_kerberos: 168 | transport = TSaslClientTransport( 169 | self._transport_class, self.host, self.sasl_service_name) 170 | self.client = make_client(HBase, self.host, port=self.port, 171 | # proto_factory=protocol, 172 | trans_factory=transport, 173 | timeout=self.timeout) 174 | else: 175 | self.client = make_client( 176 | HBase, self.host, port=self.port, timeout=self.timeout) 177 | 178 | def _table_name(self, name): 179 | # type: (str) -> str 180 | """Construct a table name by optionally adding a table name prefix.""" 181 | if self.table_prefix is None: 182 | return name 183 | 184 | return self.table_prefix + self.table_prefix_separator + name 185 | 186 | def open(self): 187 | """Open the underlying transport to the HBase instance. 188 | 189 | This method opens the underlying Thrift transport (TCP connection). 190 | """ 191 | # if self.transport.is_open(): 192 | # return 193 | 194 | logger.debug("Opening Thrift transport to %s:%d", self.host, self.port) 195 | # self.transport.open() 196 | 197 | def close(self): 198 | """Close the underyling transport to the HBase instance. 199 | 200 | This method closes the underlying Thrift transport (TCP connection). 201 | """ 202 | self.client.close() 203 | # if not self.transport.is_open(): 204 | # return 205 | 206 | # if logger is not None: 207 | # # If called from __del__(), module variables may no longer 208 | # # exist. 209 | # logger.debug( 210 | # "Closing Thrift transport to %s:%d", 211 | # self.host, self.port) 212 | 213 | # self.transport.close() 214 | 215 | def table(self, name, use_prefix=True): 216 | # type: (str, bool) -> Table 217 | """Return a table object. 218 | 219 | Returns a :py:class:`easybase.Table` instance for the table 220 | named `name`. This does not result in a round-trip to the 221 | server, and the table is not checked for existence. 222 | 223 | The optional `use_prefix` argument specifies whether the table 224 | prefix (if any) is prepended to the specified `name`. Set this 225 | to `False` if you want to use a table that resides in another 226 | ‘prefix namespace’, e.g. a table from a ‘friendly’ application 227 | co-hosted on the same HBase instance. See the `table_prefix` 228 | argument to the :py:class:`Connection` constructor for more 229 | information. 230 | 231 | :param str name: the name of the table 232 | :param bool use_prefix: whether to use the table prefix (if any) 233 | :return: Table instance 234 | :rtype: :py:class:`Table` 235 | """ 236 | if use_prefix: 237 | name = self._table_name(name) 238 | return Table(name, self) 239 | 240 | # 241 | # Table administration and maintenance 242 | # 243 | 244 | def tables(self): 245 | """Return a list of table names available in this HBase instance. 246 | 247 | If a `table_prefix` was set for this :py:class:`Connection`, only 248 | tables that have the specified prefix will be listed. 249 | 250 | :return: The table names 251 | :rtype: List of strings 252 | """ 253 | if self.table_prefix is not None: 254 | tbl_pattern = self.table_prefix + '.*' 255 | else: 256 | tbl_pattern = '.*' 257 | names = self.search_table(tbl_pattern, include_systable=False) 258 | if names: 259 | offset = len(self.table_prefix) if self.table_prefix else 0 260 | names = [n[offset:] for n in names] 261 | 262 | return names 263 | 264 | def create_table(self, name, families, ns_name=None): 265 | # type: (str, dict, str) -> None 266 | """Create a table. 267 | 268 | :param str name: The table name 269 | :param dict families: The name and options for each column family 270 | :param str ns_name: the name of namespace, defaults to None 271 | 272 | The `families` argument is a dictionary mapping column family 273 | names to a dictionary containing the options for this column 274 | family, e.g. 275 | 276 | :: 277 | 278 | families = { 279 | 'cf1': dict(max_versions=10), 280 | 'cf2': dict(max_versions=1, block_cache_enabled=False), 281 | 'cf3': dict(), # use defaults 282 | } 283 | connection.create_table('mytable', families) 284 | 285 | These options correspond to the ColumnDescriptor structure in 286 | the Thrift API, but note that the names should be provided in 287 | Python style, not in camel case notation, e.g. `time_to_live`, 288 | not `timeToLive`. The following options are supported: 289 | 290 | * ``max_versions`` (`int`) 291 | * ``compression`` (`str`) 292 | * ``in_memory`` (`bool`) 293 | * ``bloom_filter_type`` (`str`) 294 | * ``bloom_filter_vector_size`` (`int`) 295 | * ``bloom_filter_nb_hashes`` (`int`) 296 | * ``block_cache_enabled`` (`bool`) 297 | * ``time_to_live`` (`int`) 298 | """ 299 | name = self._table_name(name) 300 | if not isinstance(families, dict): 301 | raise TypeError("'families' arg must be a dictionary") 302 | 303 | if not families: 304 | raise ValueError( 305 | "Cannot create table %r (no column families specified)" 306 | % name) 307 | 308 | # table_descriptors = [{'tableName': name.encode()}] 309 | family_desc = [] 310 | for cf_name, options in iteritems(families): 311 | if options is None: 312 | options = dict() 313 | 314 | kwargs = dict() 315 | for option_name, value in iteritems(options): 316 | if isinstance(value, STRING_OR_BINARY): 317 | value = value.encode() 318 | 319 | kwargs[pep8_to_camel_case(option_name)] = value 320 | 321 | # if not cf_name.endswith(':'): 322 | # cf_name += ':' 323 | # kwargs['name'] = cf_name.encode() 324 | # table_descriptors.append(TTableDescriptor(**kwargs)) 325 | cf = TColumnFamilyDescriptor(name=cf_name.encode(), **kwargs) 326 | family_desc.append(cf) 327 | if ns_name and not self.get_namespace(ns_name): 328 | try: 329 | self.create_namespace(ns_name) 330 | except TIOError: 331 | print("Failed to create namespace: {}".format(ns_name)) 332 | return 333 | 334 | tbl_name = TTableName(ns=ns_name, qualifier=name.encode()) 335 | tdesc = TTableDescriptor(tableName=tbl_name, columns=family_desc) 336 | try: 337 | self.client.createTable(tdesc, splitKeys=None) 338 | except TApplicationException: 339 | raise NotImplementedError( 340 | "current thrift not support create_table method") 341 | except TIOError as e: 342 | print(e.message) 343 | 344 | def delete_table(self, name, disable=False, ns_name=None): 345 | # type: (str, bool, str) -> None 346 | """Delete the specified table. 347 | 348 | .. versionadded:: 0.5 349 | `disable` argument 350 | 351 | In HBase, a table always needs to be disabled before it can be 352 | deleted. If the `disable` argument is `True`, this method first 353 | disables the table if it wasn't already and then deletes it. 354 | 355 | :param str name: The table name 356 | :param bool disable: Whether to first disable the table if needed 357 | :param str ns_name: the namespace name, defaults to none 358 | """ 359 | if disable and self.is_table_enabled(name): 360 | self.disable_table(name, ns_name) 361 | 362 | self.client.deleteTable(self.get_tablename(name, ns_name)) 363 | 364 | def enable_table(self, name, ns_name=None): 365 | # type: (str, str) -> None 366 | """Enable the specified table. 367 | 368 | :param str name: The table name 369 | :param str ns_name: The tablespace name 370 | """ 371 | # name = self._table_name(name) 372 | self.client.enableTable(self.get_tablename(name, ns_name)) 373 | 374 | def disable_table(self, name, ns_name=None): 375 | # type: (str, str) -> None 376 | """Disable the specified table. 377 | 378 | :param str name: The table name 379 | :param str ns_name: The namespace name 380 | """ 381 | # name = self._table_name(name).encode() 382 | self.client.disableTable(self.get_tablename(name, ns_name)) 383 | 384 | def is_table_enabled(self, name, ns_name=None): 385 | # type: (str, str) -> bool 386 | """Return whether the specified table is enabled. 387 | 388 | :param str name: The table name 389 | :param str ns_name: The tablespace name 390 | 391 | :return: whether the table is enabled 392 | :rtype: bool 393 | """ 394 | # name = self._table_name(name).encode() 395 | return self.client.isTableEnabled(self.get_tablename(name, ns_name)) 396 | 397 | def compact_table(self, name, major=False): 398 | # type: (str, bool) -> None 399 | """Compact the specified table. 400 | 401 | :param str name: The table name 402 | :param bool major: Whether to perform a major compaction. 403 | """ 404 | raise NotImplementedError("not implement yet") 405 | # name = self._table_name(name) 406 | # if major: 407 | # self.client.majorCompact(name) 408 | # else: 409 | # self.client.compact(name) 410 | 411 | def exist_table(self, name, ns_name=None): 412 | # type: (str, str) -> bool 413 | """Return whether the sepcified table is exists 414 | Notes: HBase 1.x not support this method 415 | 416 | :param str name: The table name 417 | :param str ns_name: The tablespace name 418 | :return whether the table is exists 419 | :rtype: bool 420 | """ 421 | try: 422 | return self.client.tableExists(self.get_tablename(name, ns_name)) 423 | except TIOError: 424 | return False 425 | 426 | def search_table(self, pattern, include_systable): 427 | # type (str, bool) -> List[String] 428 | """Return table names of tables that match the given pattern 429 | 430 | :param str pattern: The regular expression to match against 431 | :param bool include_systable: set to false if match only against userspace tables 432 | :return the table names of the matching table 433 | """ 434 | try: 435 | result = self.client.getTableNamesByPattern( 436 | pattern, include_systable) 437 | return [x.qualifier for x in result] 438 | except TIOError: 439 | return [] 440 | 441 | def get_tables_by_namespace(self, ns_name): 442 | # type(str) -> List[Str] 443 | """Return names of tables in the given namespace 444 | 445 | :param str ns_name: the namespace's name 446 | :return the table names in the namespace 447 | """ 448 | try: 449 | if self.get_namespace(ns_name): 450 | result = self.client.getTableNamesByNamespace(ns_name) 451 | return [x.qualifier for x in result] 452 | except TIOError as e: 453 | print(e) 454 | return [] 455 | 456 | def get_tablename(self, name, ns_name=None): 457 | # type: (str, str) -> TTableName 458 | """Return the py:class:TTableName class of the spcified table name 459 | 460 | :param str name: The table name 461 | :param str ns_name: the namespace's name 462 | :return the py:class:TTableName Class 463 | :rtype: class 464 | """ 465 | return TTableName(ns=ns_name, qualifier=self._table_name(name).encode()) 466 | 467 | def create_namespace(self, ns_name): 468 | # type: (str) -> None 469 | """Create namespace with ns_name 470 | 471 | :param str ns_name: the name of namespace 472 | """ 473 | tns = TNamespaceDescriptor(ns_name, {}) 474 | try: 475 | if not self.get_namespace(ns_name): 476 | self.client.createNamespace(tns) 477 | except TIOError as e: 478 | print(e) 479 | 480 | def get_namespace(self, ns_name): 481 | # type: (str) -> TNamespaceDescriptor 482 | """Return the py:class:TNamespaceDescriptor class of the specified namepspace name 483 | 484 | :param str ns_name: the namespace name 485 | :return the py:class:TNamespaceDescriptor Class 486 | :rtype: class 487 | """ 488 | try: 489 | result = self.client.getNamespaceDescriptor(ns_name) 490 | return result 491 | except TIOError: 492 | return None 493 | 494 | def delete_namespace(self, ns_name, cascade=False): 495 | # type: (str, bool) -> None 496 | """Delete specified namespace 497 | 498 | :param str ns_name: the namespace name 499 | :param bool cascade: get ride of namespace with all tables in it 500 | :return None 501 | """ 502 | try: 503 | if self.get_namespace(ns_name): 504 | # exists table ? 505 | tbls = self.get_tables_by_namespace(ns_name) 506 | if tbls: 507 | if not cascade: 508 | print("namespace {} has {} tables, you cannot drop it without cascade=True option".format( 509 | ns_name, len(tbls) 510 | )) 511 | return 512 | else: 513 | # drop tables 514 | for tbl in tbls: 515 | self.delete_table( 516 | tbl, disable=True, ns_name=ns_name) 517 | self.client.deleteNamespace(ns_name) 518 | except TIOError as e: 519 | print(e.message) 520 | 521 | def list_namespaces(self): 522 | """Return a list of namespaces names available in this HBase instance 523 | 524 | :return: The namespace names 525 | :rtype: List of strings 526 | """ 527 | try: 528 | result = self.client.listNamespaceDescriptors() 529 | return [x.name for x in result] 530 | except TIOError as e: 531 | print(e) 532 | return [] 533 | -------------------------------------------------------------------------------- /easybase/table.py: -------------------------------------------------------------------------------- 1 | """ 2 | EasyBase table module. 3 | """ 4 | import time 5 | import logging 6 | from six import iteritems 7 | from operator import attrgetter 8 | from struct import Struct 9 | 10 | from HBase_thrift import TScan, TGet, TColumnValue, TPut, TColumn, TTimeRange, TDelete, TTableName 11 | 12 | from .util import str_increment, OrderedDict 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | make_cell = attrgetter('value') 17 | make_cell_timestamp = attrgetter('value', 'timestamp') 18 | pack_i64 = Struct('>q').pack 19 | 20 | 21 | def make_timerange(ts): 22 | """Make ttypes.TTimeRange format for ts. 23 | :param list ts: list include at least one timestamp element 24 | :return TTimeRange or None if ts is None 25 | """ 26 | if ts is None: 27 | return ts 28 | if not isinstance(ts, (tuple, list)): 29 | raise TypeError("'timerange' must be list or tuple") 30 | if len(ts) == 1: 31 | # only one element, take it as min timestamp 32 | ts[1] = int(time.time()) 33 | 34 | return TTimeRange(minStamp=ts[0], maxStamp=ts[1]) 35 | 36 | 37 | def make_columns(cols): 38 | """Make a ttypes.TColumn format for cols 39 | :param list_or_tuple cols: list of columns ,former of ['cf1:c2','cf2:c2',....] 40 | :return list: list of TColumns or None if cols is None 41 | """ 42 | if cols is None: 43 | return cols 44 | if not isinstance(cols, (tuple, list)): 45 | raise TypeError("'columns' must be list or tuple") 46 | columns = [] 47 | for c in cols: 48 | c = c.split(':') 49 | f, q = (c[0].encode(), c[1].encode()) if len( 50 | c) == 2 else (c[0].encode(), None) 51 | columns.append(TColumn(family=f, qualifier=q)) 52 | 53 | return columns 54 | 55 | 56 | def make_columnvalue(data): 57 | """Make a ttypes.TColumnValues format for data 58 | :param dict data: a dict of columns value ,e.g {'cf2:c3': 'v3', 'cf1:c1': 'v1', 'cf1:c2': 'v2'} 59 | :return list of TColumnValue 60 | """ 61 | cols = [] 62 | for column, value in iteritems(data): 63 | f, q = column.split(":") 64 | cols.append(TColumnValue(family=f.encode(), 65 | qualifier=q.encode(), value=value)) 66 | return cols 67 | 68 | 69 | def make_row(cell_map, include_timestamp): 70 | """Make a row dict for a cell mapping like ttypes.TRowResult.columns. 71 | [TColumnValue(family='cf1', qualifier='c1', value='v2', timestamp=456, tags=None, type=4), 72 | TColumnValue(family='cf1', qualifier='c2', value='v3', timestamp=1568097958364, tags=None, type=4), 73 | TColumnValue(family='cf2', qualifier='c1', value='v4', timestamp=1568097958364, tags=None, type=4), 74 | TColumnValue(family='cf2', qualifier='c2', value='v5', timestamp=789, tags=None, type=4)] 75 | 76 | if specify include_timestamp, then the result of result like below: 77 | {'cf1:c1':[('v2',456),], 78 | 'cf1:c2': [('v3', 1568097958364),], 79 | 'cf2:c1': [('v4', 1568097958364),], 80 | 'cf2:c2': [('v5', 789),], 81 | } 82 | else will return the below: 83 | {'cf1:c1':'v2', 84 | 'cf1:c2': 'v3', 85 | 'cf2:c1': 'v4', 86 | 'cf2:c2': 'v5', 87 | } 88 | 89 | """ 90 | rs = {} 91 | for r in cell_map: 92 | q = r.family.decode() + ":" + r.qualifier.decode() 93 | if include_timestamp: 94 | cell = rs.get(q, []) 95 | cell.append((r.value.decode(), r.timestamp)) 96 | rs[q] = cell 97 | else: 98 | rs[q] = r.value.decode() 99 | return rs 100 | # cellfn = include_timestamp and make_cell_timestamp or make_cell 101 | # return dict((cn, cellfn(cell)) for cn, cell in cell_map.iteritems()) 102 | 103 | 104 | def make_ordered_row(sorted_columns, include_timestamp): 105 | """Make a row dict for sorted column results from scans.""" 106 | cellfn = include_timestamp and make_cell_timestamp or make_cell 107 | return OrderedDict( 108 | (column.columnName, cellfn(column.cell)) 109 | for column in sorted_columns) 110 | 111 | 112 | class Table(object): 113 | """HBase table abstraction class. 114 | 115 | This class cannot be instantiated directly; use :py:meth:`Connection.table` 116 | instead. 117 | """ 118 | 119 | def __init__(self, name, connection): 120 | self.name = name 121 | self.connection = connection 122 | 123 | def __repr__(self): 124 | return '<%s.%s name=%r>' % ( 125 | __name__, 126 | self.__class__.__name__, 127 | self.name, 128 | ) 129 | 130 | def families(self): 131 | """Retrieve the column families for this table. 132 | 133 | :return: Mapping from column family name to settings dict 134 | :rtype: dict 135 | """ 136 | descriptor = self.connection.client.getTableDescriptor( 137 | self.get_tablename()) 138 | 139 | # convert bytes to string 140 | families = {} 141 | for cf in descriptor.columns: 142 | families[cf.name.decode()] = {k.decode(): v.decode() 143 | for k, v in iteritems(cf.attributes)} 144 | # families = {cf.name.decode(): cf.attributes for cf in descriptor.columns} 145 | 146 | return families 147 | 148 | def _column_family_names(self): 149 | """Retrieve the column family names for this table (internal use)""" 150 | names = self.connection.client.getColumnDescriptors(self.name).keys() 151 | return [name.rstrip(':') for name in names] 152 | 153 | # 154 | # Data retrieval 155 | # 156 | 157 | def row(self, row, columns=None, timestamp=None, timerange=None, max_versions=1, include_timestamp=False): 158 | """Retrieve a single row of data. 159 | 160 | This method retrieves the row with the row key specified in the `row` 161 | argument and returns the columns and values for this row as 162 | a dictionary. 163 | 164 | The `row` argument is the row key of the row. If the `columns` argument 165 | is specified, only the values for these columns will be returned 166 | instead of all available columns. The `columns` argument should be 167 | a list or tuple containing strings. Each name can be a column family, 168 | such as `cf1` or `cf1:` (the trailing colon is not required), or 169 | a column family with a qualifier, such as `cf1:col1`. 170 | 171 | If specified, the `timestamp` argument specifies the maximum version 172 | that results may have. The `include_timestamp` argument specifies 173 | whether cells are returned as single values or as `(value, timestamp)` 174 | tuples. 175 | 176 | :param str row: the row key 177 | :param list_or_tuple columns: list of columns (optional) 178 | :param int timestamp: timestamp (optional) 179 | :param list_or_tuple timerange: list of timestamp ,ONLY include 2 elements(option) 180 | :param int max_versions: number of row's version (optional) 181 | :param bool include_timestamp: whether timestamps are returned 182 | 183 | :return: Mapping of columns (both qualifier and family) to values 184 | :rtype: dict 185 | """ 186 | if columns is not None and not isinstance(columns, (tuple, list)): 187 | raise TypeError("'columns' must be a tuple or list") 188 | if timerange is not None and not isinstance(timerange, (tuple, list)): 189 | raise TypeError("'timerange' must be a tuple or list") 190 | cols = make_columns(columns) 191 | tt = make_timerange(timerange) 192 | 193 | tget = TGet(row=row.encode(), columns=cols, 194 | timestamp=timestamp, timeRange=tt, maxVersions=max_versions) 195 | result = self.connection.client.get(self.name.encode(), tget) 196 | if not result: 197 | return {} 198 | if max_versions > 1: 199 | include_timestamp = True 200 | return make_row(result.columnValues, include_timestamp) 201 | 202 | def rows(self, rows, columns=None, timestamp=None, timerange=None, max_versions=1, 203 | include_timestamp=False): 204 | """Retrieve multiple rows of data. 205 | 206 | This method retrieves the rows with the row keys specified in the 207 | `rows` argument, which should be should be a list (or tuple) of row 208 | keys. The return value is a list of `(row_key, row_dict)` tuples. 209 | 210 | The `columns`, `timestamp` and `include_timestamp` arguments behave 211 | exactly the same as for :py:meth:`row`. 212 | 213 | :param list rows: list of row keys 214 | :param list_or_tuple columns: list of columns (optional) 215 | :param int timestamp: timestamp (optional) 216 | :param list_or_tuple timerange: the range of timestamp, ONLY include 2 elements (optional) 217 | :param int max_versions: number of row's version (optional) 218 | :param bool include_timestamp: whether timestamps are returned 219 | 220 | :return: List of mappings (columns to values) 221 | :rtype: list of dicts 222 | """ 223 | 224 | if not rows: 225 | # Avoid round-trip if the result is empty anyway 226 | return {} 227 | 228 | if columns is not None and not isinstance(columns, (tuple, list)): 229 | raise TypeError("'columns' must be a tuple or list") 230 | if timerange is not None and not isinstance(timerange, (tuple, list)): 231 | raise TypeError("'timerange' must be a tuple or list") 232 | cols = make_columns(columns) 233 | tt = make_timerange(timerange) 234 | 235 | tgets = [] 236 | for r in rows: 237 | tgets.append( 238 | TGet(row=r.encode(), columns=cols, timestamp=timestamp, timeRange=tt, maxVersions=max_versions)) 239 | results = self.connection.client.getMultiple(self.name, tgets) 240 | 241 | return [(r.row.decode(), make_row(r.columnValues, include_timestamp)) 242 | for r in results if r.row] 243 | 244 | def scan(self, row_start=None, row_stop=None, row_prefix=None, 245 | columns=None, filter=None, timerange=None, 246 | include_timestamp=False, batch_size=1000, scan_batching=None, 247 | limit=None, reversed=False, max_versions=1): 248 | """Create a scanner for data in the table. 249 | 250 | This method returns an iterable that can be used for looping over the 251 | matching rows. Scanners can be created in two ways: 252 | 253 | * The `row_start` and `row_stop` arguments specify the row keys where 254 | the scanner should start and stop. It does not matter whether the 255 | table contains any rows with the specified keys: the first row after 256 | `row_start` will be the first result, and the last row before 257 | `row_stop` will be the last result. Note that the start of the range 258 | is inclusive, while the end is exclusive. 259 | 260 | Both `row_start` and `row_stop` can be `None` to specify the start 261 | and the end of the table respectively. If both are omitted, a full 262 | table scan is done. Note that this usually results in severe 263 | performance problems. 264 | 265 | * Alternatively, if `row_prefix` is specified, only rows with row keys 266 | matching the prefix will be returned. If given, `row_start` and 267 | `row_stop` cannot be used. 268 | 269 | The `columns`, `timestamp` and `include_timestamp` arguments behave 270 | exactly the same as for :py:meth:`row`. 271 | 272 | The `filter` argument may be a filter string that will be applied at 273 | the server by the region servers. 274 | 275 | If `limit` is given, at most `limit` results will be returned. 276 | 277 | The `batch_size` argument specifies how many results should be 278 | retrieved per batch when retrieving results from the scanner. Only set 279 | this to a low value (or even 1) if your data is large, since a low 280 | batch size results in added round-trips to the server. 281 | 282 | The optional `scan_batching` is for advanced usage only; it 283 | translates to `Scan.setBatching()` at the Java side (inside the 284 | Thrift server). By setting this value rows may be split into 285 | partial rows, so result rows may be incomplete, and the number 286 | of results returned by te scanner may no longer correspond to 287 | the number of rows matched by the scan. 288 | 289 | If `sorted_columns` is `True`, the columns in the rows returned 290 | by this scanner will be retrieved in sorted order, and the data 291 | will be stored in `OrderedDict` instances. 292 | 293 | The optional `max_version` argument specifies how many versions should be 294 | retrieved per row 295 | 296 | **Compatibility notes:** 297 | 298 | * The `filter` argument is only available when using HBase 0.92 299 | (or up). In HBase 0.90 compatibility mode, specifying 300 | a `filter` raises an exception. 301 | 302 | * The `sorted_columns` argument is only available when using 303 | HBase 0.96 (or up). 304 | 305 | .. versionadded:: 0.8 306 | `sorted_columns` argument 307 | 308 | .. versionadded:: 0.8 309 | `scan_batching` argument 310 | 311 | :param str row_start: the row key to start at (inclusive) 312 | :param str row_stop: the row key to stop at (exclusive) 313 | :param str row_prefix: a prefix of the row key that must match 314 | :param list_or_tuple columns: list of columns (optional) 315 | :param str filter: a filter string (optional) 316 | :param list_or_tuple timerange: time range(optional) 317 | :param bool include_timestamp: whether timestamps are returned 318 | :param int batch_size: batch size for retrieving resuls 319 | :param bool scan_batching: server-side scan batching (optional) 320 | :param int limit: max number of rows to return 321 | :param bool reversed: whether to reversed 322 | :param int max_versions: number of row's versions (optional) 323 | 324 | :return: generator yielding the rows matching the scan 325 | :rtype: iterable of `(row_key, row_data)` tuples 326 | """ 327 | # convert to bytes 328 | if row_start: 329 | row_start = row_start.encode() 330 | 331 | if row_stop: 332 | row_stop = row_stop.encode() 333 | 334 | if batch_size < 1: 335 | raise ValueError("'batch_size' must be >= 1") 336 | 337 | if limit is not None and limit < 1: 338 | raise ValueError("'limit' must be >= 1") 339 | 340 | if scan_batching is not None and scan_batching < 1: 341 | raise ValueError("'scan_batching' must be >= 1") 342 | 343 | if row_prefix is not None: 344 | if row_start is not None or row_stop is not None: 345 | raise TypeError( 346 | "'row_prefix' cannot be combined with 'row_start' " 347 | "or 'row_stop'") 348 | 349 | row_start = row_prefix.encode() 350 | row_stop = str_increment(row_prefix).encode() 351 | 352 | # if row_start is None: 353 | # row_start = b'' 354 | logger.debug("start_row = {}, stop_row = {}".format(row_start, row_stop)) 355 | cols = make_columns(columns) 356 | tt = make_timerange(timerange) 357 | if max_versions > 1: 358 | include_timestamp = True 359 | tscan = TScan( 360 | startRow=row_start, 361 | stopRow=row_stop, 362 | timeRange=tt, 363 | columns=cols, 364 | caching=batch_size, 365 | filterString=filter, 366 | batchSize=scan_batching, 367 | reversed=reversed, 368 | maxVersions=max_versions, 369 | ) 370 | 371 | scan_id = self.connection.client.openScanner(self.name.encode(), tscan) 372 | 373 | logger.debug("Opened scanner (id=%d) on '%s'", scan_id, self.name) 374 | 375 | n_returned = n_fetched = 0 376 | try: 377 | while True: 378 | if limit is None: 379 | how_many = batch_size 380 | else: 381 | how_many = min(batch_size, limit - n_returned) 382 | 383 | items = self.connection.client.getScannerRows( 384 | scan_id, how_many) 385 | 386 | if not items: 387 | return # scan has finished 388 | 389 | n_fetched += len(items) 390 | 391 | for n_returned, item in enumerate(items, n_returned + 1): 392 | row = make_row(item.columnValues, include_timestamp) 393 | 394 | yield item.row.decode(), row 395 | 396 | if limit is not None and n_returned == limit: 397 | return # scan has finished 398 | finally: 399 | self.connection.client.closeScanner(scan_id) 400 | logger.debug( 401 | "Closed scanner (id=%d) on '%s' (%d returned, %d fetched)", 402 | scan_id, self.name, n_returned, n_fetched) 403 | 404 | # 405 | # Data manipulation 406 | # 407 | 408 | def put(self, row, data, timestamp=None, wal=True): 409 | """Store data in the table. 410 | 411 | This method stores the data in the `data` argument for the row 412 | specified by `row`. The `data` argument is dictionary that maps columns 413 | to values. Column names must include a family and qualifier part, e.g. 414 | `cf:col`, though the qualifier part may be the empty string, e.g. 415 | `cf:`. 416 | 417 | Note that, in many situations, :py:meth:`batch()` is a more appropriate 418 | method to manipulate data. 419 | 420 | .. versionadded:: 0.7 421 | `wal` argument 422 | 423 | :param str row: the row key 424 | :param dict data: the data to store 425 | :param int timestamp: timestamp (optional) 426 | :param bool wal: whether to write to the WAL (optional) 427 | """ 428 | # if wal is None: 429 | # wal = self.wal 430 | cols = make_columnvalue(data) 431 | 432 | tput = TPut(row=row.encode(), columnValues=cols, 433 | durability=wal, timestamp=timestamp) 434 | self.connection.client.put(self.name, tput) 435 | 436 | def puts(self, rows): 437 | """"Commit a List of Puts to the table 438 | 439 | This method stores the data in sepcified by `row` . the `rows` argument is list that containers multiple `row` . 440 | e.g 441 | rows = { 442 | 'r1': {'data':{'cf1:c1':'v1', 'cf2:c2': 'v2'}, 443 | 'wal': True, 'timestamp':123}, 444 | 'r2': {'data':{'cf1:c1': 'v2', 'cf2:c2': 'v3'}, 445 | } 446 | } 447 | each `row` is dictionary that the key is row key and the 448 | value maps columns to values . Columns names must include a family and qualifier part. 449 | 450 | 451 | :param dict rows: contains multiple number of `row` 452 | """ 453 | tputs = [] 454 | for rk, item in iteritems(rows): 455 | cols = make_columnvalue(item['data']) 456 | tput = TPut(row=rk.encode(), columnValues=cols, 457 | durability=item.get('wal', True), 458 | timestamp=item.get('timestamp', None)) 459 | tputs.append(tput) 460 | self.connection.client.putMultiple(self.name.encode(), tputs) 461 | 462 | def delete(self, row, columns=None, timestamp=None, deletetype=1, attributes=None, durability=False): 463 | """Delete data from the table. 464 | 465 | This method deletes all columns for the row specified by `row`, or only 466 | some columns if the `columns` argument is specified. 467 | 468 | Note that, in many situations, :py:meth:`batch()` is a more appropriate 469 | method to manipulate data. 470 | 471 | .. versionadded:: 0.7 472 | `wal` argument 473 | .. versiondeleted:: 0.9 474 | `wal` argument 475 | - row 476 | - columns 477 | - timestamp 478 | - deleteType 479 | - attributes 480 | - durability 481 | :param str row: the row key 482 | :param list_or_tuple columns: list of columns (optional) 483 | :param int timestamp: timestamp (optional) 484 | :param int deletetype: delete type,default is 1 485 | :param dict attributes: attributes 486 | :param int durability: 487 | """ 488 | cols = make_columns(columns) 489 | tdelete = TDelete(row=row.encode(), columns=cols, timestamp=timestamp, deleteType=deletetype, 490 | attributes=attributes, durability=durability) 491 | self.connection.client.deleteSingle(self.name.encode(), tdelete) 492 | 493 | # 494 | # Atomic counters 495 | # 496 | 497 | def counter_get(self, row, column): 498 | """Retrieve the current value of a counter column. 499 | 500 | This method retrieves the current value of a counter column. If the 501 | counter column does not exist, this function initialises it to `0`. 502 | 503 | Note that application code should *never* store a incremented or 504 | decremented counter value directly; use the atomic 505 | :py:meth:`Table.counter_inc` and :py:meth:`Table.counter_dec` methods 506 | for that. 507 | 508 | :param str row: the row key 509 | :param str column: the column name 510 | 511 | :return: counter value 512 | :rtype: int 513 | """ 514 | # Don't query directly, but increment with value=0 so that the counter 515 | # is correctly initialised if didn't exist yet. 516 | return self.counter_inc(row, column, value=0) 517 | 518 | def counter_set(self, row, column, value=0): 519 | """Set a counter column to a specific value. 520 | 521 | This method stores a 64-bit signed integer value in the specified 522 | column. 523 | 524 | Note that application code should *never* store a incremented or 525 | decremented counter value directly; use the atomic 526 | :py:meth:`Table.counter_inc` and :py:meth:`Table.counter_dec` methods 527 | for that. 528 | 529 | :param str row: the row key 530 | :param str column: the column name 531 | :param int value: the counter value to set 532 | """ 533 | self.put(row, {column: pack_i64(value)}) 534 | 535 | def counter_inc(self, row, column, value=1): 536 | """Atomically increment (or decrements) a counter column. 537 | 538 | This method atomically increments or decrements a counter column in the 539 | row specified by `row`. The `value` argument specifies how much the 540 | counter should be incremented (for positive values) or decremented (for 541 | negative values). If the counter column did not exist, it is 542 | automatically initialised to 0 before incrementing it. 543 | 544 | :param str row: the row key 545 | :param str column: the column name 546 | :param int value: the amount to increment or decrement by (optional) 547 | 548 | :return: counter value after incrementing 549 | :rtype: int 550 | """ 551 | return self.connection.client.atomicIncrement( 552 | self.name, row, column, value) 553 | 554 | def counter_dec(self, row, column, value=1): 555 | """Atomically decrement (or increments) a counter column. 556 | 557 | This method is a shortcut for calling :py:meth:`Table.counter_inc` with 558 | the value negated. 559 | 560 | :return: counter value after decrementing 561 | :rtype: int 562 | """ 563 | return self.counter_inc(row, column, -value) 564 | 565 | def truncate(self): 566 | """truncate table 567 | 568 | This method will delete all rows in table 569 | 570 | :return True if successfully else False 571 | """ 572 | return self.connection.client.truncateTable(self.name, True) 573 | 574 | def get_tablename(self): 575 | """Return the py:class:TTableName class of the spcified table name 576 | 577 | :return the py:class:TTableName Class 578 | :rtype: class 579 | """ 580 | return TTableName(ns=None, qualifier=self.name.encode()) 581 | 582 | @staticmethod 583 | def _bytes2str(obj): 584 | if isinstance(obj, bytes): 585 | return obj.decode() 586 | if isinstance(obj, dict): 587 | return {x.decode(): y.decode() for x, y in iteritems(obj)} 588 | 589 | def batch(self, timestamp=None, batch_size=None, transaction=False): 590 | """Create a new batch operation for current table 591 | 592 | This method returns a new :py:class:`Batch` instance that can be 593 | used for mass data manipulation. The `timestamp` argument applies 594 | all puts and deletes on the batch 595 | 596 | If given, the `batch_size` argument specifies the maximum batch size 597 | after which the batch should send the mutations to the server, By 598 | default this is unbounded. 599 | 600 | The `transaction` argument specifies wether the returned :py:class:`Batch` 601 | instance should act in a transaction-like manner when used as context manager 602 | in a ``with`` block of code. The `transaction` flag cannot be used in combination 603 | with `batch_size`. 604 | 605 | :param int timestamp: timestamp (optional) 606 | :param int batch_size: batch size (optional) 607 | :param bool transaction: whether this batch should behave like a transaction 608 | 609 | :return: Batch instance 610 | :rtype: :py:class:`Batch` 611 | """ 612 | raise NotImplementedError 613 | # kwargs = locals().copy() 614 | 615 | # del kwargs['self'] 616 | # return Batch(table=self, **kwargs) 617 | -------------------------------------------------------------------------------- /coverage.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | /Users/wgzhao/Codes/easybase 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | -------------------------------------------------------------------------------- /easybase/HBase.thrift: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // NOTE: The "required" and "optional" keywords for the service methods are purely for documentation 20 | 21 | namespace java org.apache.hadoop.hbase.thrift2.generated 22 | namespace cpp apache.hadoop.hbase.thrift2 23 | namespace rb Apache.Hadoop.Hbase.Thrift2 24 | namespace py hbase 25 | namespace perl Hbase 26 | 27 | struct TTimeRange { 28 | 1: required i64 minStamp, 29 | 2: required i64 maxStamp 30 | } 31 | 32 | /** 33 | * Addresses a single cell or multiple cells 34 | * in a HBase table by column family and optionally 35 | * a column qualifier and timestamp 36 | */ 37 | struct TColumn { 38 | 1: required binary family, 39 | 2: optional binary qualifier, 40 | 3: optional i64 timestamp 41 | } 42 | 43 | /** 44 | * Represents a single cell and its value. 45 | */ 46 | struct TColumnValue { 47 | 1: required binary family, 48 | 2: required binary qualifier, 49 | 3: required binary value, 50 | 4: optional i64 timestamp, 51 | 5: optional binary tags, 52 | 6: optional byte type 53 | } 54 | 55 | /** 56 | * Represents a single cell and the amount to increment it by 57 | */ 58 | struct TColumnIncrement { 59 | 1: required binary family, 60 | 2: required binary qualifier, 61 | 3: optional i64 amount = 1 62 | } 63 | 64 | /** 65 | * if no Result is found, row and columnValues will not be set. 66 | */ 67 | struct TResult { 68 | 1: optional binary row, 69 | 2: required list columnValues, 70 | 3: optional bool stale = false 71 | 4: optional bool partial = false 72 | } 73 | 74 | /** 75 | * Specify type of delete: 76 | * - DELETE_COLUMN means exactly one version will be removed, 77 | * - DELETE_COLUMNS means previous versions will also be removed. 78 | */ 79 | enum TDeleteType { 80 | DELETE_COLUMN = 0, 81 | DELETE_COLUMNS = 1, 82 | DELETE_FAMILY = 2, 83 | DELETE_FAMILY_VERSION = 3 84 | } 85 | 86 | /** 87 | * Specify Durability: 88 | * - SKIP_WAL means do not write the Mutation to the WAL. 89 | * - ASYNC_WAL means write the Mutation to the WAL asynchronously, 90 | * - SYNC_WAL means write the Mutation to the WAL synchronously, 91 | * - FSYNC_WAL means Write the Mutation to the WAL synchronously and force the entries to disk. 92 | */ 93 | 94 | enum TDurability { 95 | USE_DEFAULT = 0, 96 | SKIP_WAL = 1, 97 | ASYNC_WAL = 2, 98 | SYNC_WAL = 3, 99 | FSYNC_WAL = 4 100 | } 101 | struct TAuthorization { 102 | 1: optional list labels 103 | } 104 | 105 | struct TCellVisibility { 106 | 1: optional string expression 107 | } 108 | 109 | /** 110 | * Specify Consistency: 111 | * - STRONG means reads only from primary region 112 | * - TIMELINE means reads might return values from secondary region replicas 113 | */ 114 | enum TConsistency { 115 | STRONG = 1, 116 | TIMELINE = 2 117 | } 118 | 119 | /** 120 | * Used to perform Get operations on a single row. 121 | * 122 | * The scope can be further narrowed down by specifying a list of 123 | * columns or column families. 124 | * 125 | * To get everything for a row, instantiate a Get object with just the row to get. 126 | * To further define the scope of what to get you can add a timestamp or time range 127 | * with an optional maximum number of versions to return. 128 | * 129 | * If you specify a time range and a timestamp the range is ignored. 130 | * Timestamps on TColumns are ignored. 131 | */ 132 | struct TGet { 133 | 1: required binary row, 134 | 2: optional list columns, 135 | 136 | 3: optional i64 timestamp, 137 | 4: optional TTimeRange timeRange, 138 | 139 | 5: optional i32 maxVersions, 140 | 6: optional binary filterString, 141 | 7: optional map attributes 142 | 8: optional TAuthorization authorizations 143 | 9: optional TConsistency consistency 144 | 10: optional i32 targetReplicaId 145 | 11: optional bool cacheBlocks 146 | 12: optional i32 storeLimit 147 | 13: optional i32 storeOffset 148 | 14: optional bool existence_only 149 | 15: optional binary filterBytes 150 | 151 | } 152 | 153 | /** 154 | * Used to perform Put operations for a single row. 155 | * 156 | * Add column values to this object and they'll be added. 157 | * You can provide a default timestamp if the column values 158 | * don't have one. If you don't provide a default timestamp 159 | * the current time is inserted. 160 | * 161 | * You can specify how this Put should be written to the write-ahead Log (WAL) 162 | * by changing the durability. If you don't provide durability, it defaults to 163 | * column family's default setting for durability. 164 | */ 165 | struct TPut { 166 | 1: required binary row, 167 | 2: required list columnValues 168 | 3: optional i64 timestamp, 169 | 5: optional map attributes, 170 | 6: optional TDurability durability, 171 | 7: optional TCellVisibility cellVisibility 172 | } 173 | 174 | /** 175 | * Used to perform Delete operations on a single row. 176 | * 177 | * The scope can be further narrowed down by specifying a list of 178 | * columns or column families as TColumns. 179 | * 180 | * Specifying only a family in a TColumn will delete the whole family. 181 | * If a timestamp is specified all versions with a timestamp less than 182 | * or equal to this will be deleted. If no timestamp is specified the 183 | * current time will be used. 184 | * 185 | * Specifying a family and a column qualifier in a TColumn will delete only 186 | * this qualifier. If a timestamp is specified only versions equal 187 | * to this timestamp will be deleted. If no timestamp is specified the 188 | * most recent version will be deleted. To delete all previous versions, 189 | * specify the DELETE_COLUMNS TDeleteType. 190 | * 191 | * The top level timestamp is only used if a complete row should be deleted 192 | * (i.e. no columns are passed) and if it is specified it works the same way 193 | * as if you had added a TColumn for every column family and this timestamp 194 | * (i.e. all versions older than or equal in all column families will be deleted) 195 | * 196 | * You can specify how this Delete should be written to the write-ahead Log (WAL) 197 | * by changing the durability. If you don't provide durability, it defaults to 198 | * column family's default setting for durability. 199 | */ 200 | struct TDelete { 201 | 1: required binary row, 202 | 2: optional list columns, 203 | 3: optional i64 timestamp, 204 | 4: optional TDeleteType deleteType = 1, 205 | 6: optional map attributes, 206 | 7: optional TDurability durability 207 | 208 | } 209 | 210 | /** 211 | * Used to perform Increment operations for a single row. 212 | * 213 | * You can specify how this Increment should be written to the write-ahead Log (WAL) 214 | * by changing the durability. If you don't provide durability, it defaults to 215 | * column family's default setting for durability. 216 | */ 217 | struct TIncrement { 218 | 1: required binary row, 219 | 2: required list columns, 220 | 4: optional map attributes, 221 | 5: optional TDurability durability 222 | 6: optional TCellVisibility cellVisibility 223 | 7: optional bool returnResults 224 | } 225 | 226 | /* 227 | * Used to perform append operation 228 | */ 229 | struct TAppend { 230 | 1: required binary row, 231 | 2: required list columns, 232 | 3: optional map attributes, 233 | 4: optional TDurability durability 234 | 5: optional TCellVisibility cellVisibility 235 | 6: optional bool returnResults 236 | } 237 | 238 | enum TReadType { 239 | DEFAULT = 1, 240 | STREAM = 2, 241 | PREAD = 3 242 | } 243 | 244 | /** 245 | * Any timestamps in the columns are ignored but the colFamTimeRangeMap included, use timeRange to select by timestamp. 246 | * Max versions defaults to 1. 247 | */ 248 | struct TScan { 249 | 1: optional binary startRow, 250 | 2: optional binary stopRow, 251 | 3: optional list columns 252 | 4: optional i32 caching, 253 | 5: optional i32 maxVersions=1, 254 | 6: optional TTimeRange timeRange, 255 | 7: optional binary filterString, 256 | 8: optional i32 batchSize, 257 | 9: optional map attributes 258 | 10: optional TAuthorization authorizations 259 | 11: optional bool reversed 260 | 12: optional bool cacheBlocks 261 | 13: optional map colFamTimeRangeMap 262 | 14: optional TReadType readType 263 | 15: optional i32 limit 264 | 16: optional TConsistency consistency 265 | 17: optional i32 targetReplicaId 266 | 18: optional binary filterBytes 267 | 268 | } 269 | 270 | /** 271 | * Atomic mutation for the specified row. It can be either Put or Delete. 272 | */ 273 | union TMutation { 274 | 1: TPut put 275 | 2: TDelete deleteSingle 276 | } 277 | 278 | /** 279 | * A TRowMutations object is used to apply a number of Mutations to a single row. 280 | */ 281 | struct TRowMutations { 282 | 1: required binary row 283 | 2: required list mutations 284 | } 285 | 286 | struct THRegionInfo { 287 | 1: required i64 regionId 288 | 2: required binary tableName 289 | 3: optional binary startKey 290 | 4: optional binary endKey 291 | 5: optional bool offline 292 | 6: optional bool split 293 | 7: optional i32 replicaId 294 | } 295 | 296 | struct TServerName { 297 | 1: required string hostName 298 | 2: optional i32 port 299 | 3: optional i64 startCode 300 | } 301 | 302 | struct THRegionLocation { 303 | 1: required TServerName serverName 304 | 2: required THRegionInfo regionInfo 305 | } 306 | 307 | /** 308 | * Thrift wrapper around 309 | * org.apache.hadoop.hbase.CompareOperator. 310 | */ 311 | enum TCompareOperator { 312 | LESS = 0, 313 | LESS_OR_EQUAL = 1, 314 | EQUAL = 2, 315 | NOT_EQUAL = 3, 316 | GREATER_OR_EQUAL = 4, 317 | GREATER = 5, 318 | NO_OP = 6 319 | } 320 | 321 | /** 322 | * Thrift wrapper around 323 | * org.apache.hadoop.hbase.regionserver.BloomType 324 | */ 325 | enum TBloomFilterType { 326 | /** 327 | * Bloomfilters disabled 328 | */ 329 | NONE = 0, 330 | /** 331 | * Bloom enabled with Table row as Key 332 | */ 333 | ROW = 1, 334 | /** 335 | * Bloom enabled with Table row & column (family+qualifier) as Key 336 | */ 337 | ROWCOL = 2, 338 | /** 339 | * Bloom enabled with Table row prefix as Key, specify the length of the prefix 340 | */ 341 | ROWPREFIX_FIXED_LENGTH = 3, 342 | } 343 | 344 | /** 345 | * Thrift wrapper around 346 | * org.apache.hadoop.hbase.io.compress.Algorithm 347 | */ 348 | enum TCompressionAlgorithm { 349 | LZO = 0, 350 | GZ = 1, 351 | NONE = 2, 352 | SNAPPY = 3, 353 | LZ4 = 4, 354 | BZIP2 = 5, 355 | ZSTD = 6 356 | } 357 | 358 | /** 359 | * Thrift wrapper around 360 | * org.apache.hadoop.hbase.io.encoding.DataBlockEncoding 361 | */ 362 | enum TDataBlockEncoding { 363 | /** Disable data block encoding. */ 364 | NONE = 0, 365 | // id 1 is reserved for the BITSET algorithm to be added later 366 | PREFIX = 2, 367 | DIFF = 3, 368 | FAST_DIFF = 4, 369 | // id 5 is reserved for the COPY_KEY algorithm for benchmarking 370 | // COPY_KEY(5, "org.apache.hadoop.hbase.io.encoding.CopyKeyDataBlockEncoder"), 371 | // PREFIX_TREE(6, "org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec"), 372 | ROW_INDEX_V1 = 7 373 | } 374 | 375 | /** 376 | * Thrift wrapper around 377 | * org.apache.hadoop.hbase.KeepDeletedCells 378 | */ 379 | enum TKeepDeletedCells { 380 | /** Deleted Cells are not retained. */ 381 | FALSE = 0, 382 | /** 383 | * Deleted Cells are retained until they are removed by other means 384 | * such TTL or VERSIONS. 385 | * If no TTL is specified or no new versions of delete cells are 386 | * written, they are retained forever. 387 | */ 388 | TRUE = 1, 389 | /** 390 | * Deleted Cells are retained until the delete marker expires due to TTL. 391 | * This is useful when TTL is combined with MIN_VERSIONS and one 392 | * wants to keep a minimum number of versions around but at the same 393 | * time remove deleted cells after the TTL. 394 | */ 395 | TTL = 2 396 | } 397 | 398 | /** 399 | * Thrift wrapper around 400 | * org.apache.hadoop.hbase.TableName 401 | */ 402 | struct TTableName { 403 | /** namespace name */ 404 | 1: optional binary ns 405 | /** tablename */ 406 | 2: required binary qualifier 407 | } 408 | 409 | /** 410 | * Thrift wrapper around 411 | * org.apache.hadoop.hbase.client.ColumnFamilyDescriptor 412 | */ 413 | struct TColumnFamilyDescriptor { 414 | 1: required binary name 415 | 2: optional map attributes 416 | 3: optional map configuration 417 | 4: optional i32 blockSize 418 | 5: optional TBloomFilterType bloomnFilterType 419 | 6: optional TCompressionAlgorithm compressionType 420 | 7: optional i16 dfsReplication 421 | 8: optional TDataBlockEncoding dataBlockEncoding 422 | 9: optional TKeepDeletedCells keepDeletedCells 423 | 10: optional i32 maxVersions 424 | 11: optional i32 minVersions 425 | 12: optional i32 scope 426 | 13: optional i32 timeToLive 427 | 14: optional bool blockCacheEnabled 428 | 15: optional bool cacheBloomsOnWrite 429 | 16: optional bool cacheDataOnWrite 430 | 17: optional bool cacheIndexesOnWrite 431 | 18: optional bool compressTags 432 | 19: optional bool evictBlocksOnClose 433 | 20: optional bool inMemory 434 | 435 | } 436 | 437 | /** 438 | * Thrift wrapper around 439 | * org.apache.hadoop.hbase.client.TableDescriptor 440 | */ 441 | struct TTableDescriptor { 442 | 1: required TTableName tableName 443 | 2: optional list columns 444 | 3: optional map attributes 445 | 4: optional TDurability durability 446 | } 447 | 448 | /** 449 | * Thrift wrapper around 450 | * org.apache.hadoop.hbase.NamespaceDescriptor 451 | */ 452 | struct TNamespaceDescriptor { 453 | 1: required string name 454 | 2: optional map configuration 455 | } 456 | 457 | enum TLogType { 458 | SLOW_LOG = 1, 459 | LARGE_LOG = 2 460 | } 461 | 462 | enum TFilterByOperator { 463 | AND, 464 | OR 465 | } 466 | 467 | /** 468 | * Thrift wrapper around 469 | * org.apache.hadoop.hbase.client.LogQueryFilter 470 | */ 471 | struct TLogQueryFilter { 472 | 1: optional string regionName 473 | 2: optional string clientAddress 474 | 3: optional string tableName 475 | 4: optional string userName 476 | 5: optional i32 limit = 10 477 | 6: optional TLogType logType = 1 478 | 7: optional TFilterByOperator filterByOperator = TFilterByOperator.OR 479 | } 480 | 481 | 482 | /** 483 | * Thrift wrapper around 484 | * org.apache.hadoop.hbase.client.OnlineLogRecord 485 | */ 486 | struct TOnlineLogRecord { 487 | 1: required i64 startTime 488 | 2: required i32 processingTime 489 | 3: required i32 queueTime 490 | 4: required i64 responseSize 491 | 5: required string clientAddress 492 | 6: required string serverClass 493 | 7: required string methodName 494 | 8: required string callDetails 495 | 9: required string param 496 | 10: required string userName 497 | 11: required i32 multiGetsCount 498 | 12: required i32 multiMutationsCount 499 | 13: required i32 multiServiceCalls 500 | 14: optional string regionName 501 | 15: optional i64 blockBytesScanned 502 | } 503 | 504 | // 505 | // Exceptions 506 | // 507 | 508 | /** 509 | * A TIOError exception signals that an error occurred communicating 510 | * to the HBase master or a HBase region server. Also used to return 511 | * more general HBase error conditions. 512 | */ 513 | exception TIOError { 514 | 1: optional string message 515 | 2: optional bool canRetry 516 | } 517 | 518 | /** 519 | * A TIllegalArgument exception indicates an illegal or invalid 520 | * argument was passed into a procedure. 521 | */ 522 | exception TIllegalArgument { 523 | 1: optional string message 524 | } 525 | 526 | /** 527 | * Specify type of thrift server: thrift and thrift2 528 | */ 529 | enum TThriftServerType { 530 | ONE = 1, 531 | TWO = 2 532 | } 533 | 534 | enum TPermissionScope { 535 | TABLE = 0, 536 | NAMESPACE = 1 537 | } 538 | 539 | /** 540 | * TAccessControlEntity for permission control 541 | */ 542 | struct TAccessControlEntity { 543 | 1: required string username 544 | 2: required TPermissionScope scope 545 | 4: required string actions 546 | 5: optional string tableName 547 | 6: optional string nsName 548 | } 549 | 550 | service THBaseService { 551 | 552 | /** 553 | * Test for the existence of columns in the table, as specified in the TGet. 554 | * 555 | * @return true if the specified TGet matches one or more keys, false if not 556 | */ 557 | bool exists( 558 | /** the table to check on */ 559 | 1: required binary table, 560 | 561 | /** the TGet to check for */ 562 | 2: required TGet tget 563 | ) throws (1:TIOError io) 564 | 565 | 566 | /** 567 | * Test for the existence of columns in the table, as specified by the TGets. 568 | * 569 | * This will return an array of booleans. Each value will be true if the related Get matches 570 | * one or more keys, false if not. 571 | */ 572 | list existsAll( 573 | /** the table to check on */ 574 | 1: required binary table, 575 | 576 | /** a list of TGets to check for */ 577 | 2: required list tgets 578 | ) throws (1:TIOError io) 579 | 580 | /** 581 | * Method for getting data from a row. 582 | * 583 | * If the row cannot be found an empty Result is returned. 584 | * This can be checked by the empty field of the TResult 585 | * 586 | * @return the result 587 | */ 588 | TResult get( 589 | /** the table to get from */ 590 | 1: required binary table, 591 | 592 | /** the TGet to fetch */ 593 | 2: required TGet tget 594 | ) throws (1: TIOError io) 595 | 596 | /** 597 | * Method for getting multiple rows. 598 | * 599 | * If a row cannot be found there will be a null 600 | * value in the result list for that TGet at the 601 | * same position. 602 | * 603 | * So the Results are in the same order as the TGets. 604 | */ 605 | list getMultiple( 606 | /** the table to get from */ 607 | 1: required binary table, 608 | 609 | /** a list of TGets to fetch, the Result list 610 | will have the Results at corresponding positions 611 | or null if there was an error */ 612 | 2: required list tgets 613 | ) throws (1: TIOError io) 614 | 615 | /** 616 | * Commit a TPut to a table. 617 | */ 618 | void put( 619 | /** the table to put data in */ 620 | 1: required binary table, 621 | 622 | /** the TPut to put */ 623 | 2: required TPut tput 624 | ) throws (1: TIOError io) 625 | 626 | /** 627 | * Atomically checks if a row/family/qualifier value matches the expected 628 | * value. If it does, it adds the TPut. 629 | * 630 | * @return true if the new put was executed, false otherwise 631 | */ 632 | bool checkAndPut( 633 | /** to check in and put to */ 634 | 1: required binary table, 635 | 636 | /** row to check */ 637 | 2: required binary row, 638 | 639 | /** column family to check */ 640 | 3: required binary family, 641 | 642 | /** column qualifier to check */ 643 | 4: required binary qualifier, 644 | 645 | /** the expected value, if not provided the 646 | check is for the non-existence of the 647 | column in question */ 648 | 5: binary value, 649 | 650 | /** the TPut to put if the check succeeds */ 651 | 6: required TPut tput 652 | ) throws (1: TIOError io) 653 | 654 | /** 655 | * Commit a List of Puts to the table. 656 | */ 657 | void putMultiple( 658 | /** the table to put data in */ 659 | 1: required binary table, 660 | 661 | /** a list of TPuts to commit */ 662 | 2: required list tputs 663 | ) throws (1: TIOError io) 664 | 665 | /** 666 | * Deletes as specified by the TDelete. 667 | * 668 | * Note: "delete" is a reserved keyword and cannot be used in Thrift 669 | * thus the inconsistent naming scheme from the other functions. 670 | */ 671 | void deleteSingle( 672 | /** the table to delete from */ 673 | 1: required binary table, 674 | 675 | /** the TDelete to delete */ 676 | 2: required TDelete tdelete 677 | ) throws (1: TIOError io) 678 | 679 | /** 680 | * Bulk commit a List of TDeletes to the table. 681 | * 682 | * Throws a TIOError if any of the deletes fail. 683 | * 684 | * Always returns an empty list for backwards compatibility. 685 | */ 686 | list deleteMultiple( 687 | /** the table to delete from */ 688 | 1: required binary table, 689 | 690 | /** list of TDeletes to delete */ 691 | 2: required list tdeletes 692 | ) throws (1: TIOError io) 693 | 694 | /** 695 | * Atomically checks if a row/family/qualifier value matches the expected 696 | * value. If it does, it adds the delete. 697 | * 698 | * @return true if the new delete was executed, false otherwise 699 | */ 700 | bool checkAndDelete( 701 | /** to check in and delete from */ 702 | 1: required binary table, 703 | 704 | /** row to check */ 705 | 2: required binary row, 706 | 707 | /** column family to check */ 708 | 3: required binary family, 709 | 710 | /** column qualifier to check */ 711 | 4: required binary qualifier, 712 | 713 | /** the expected value, if not provided the 714 | check is for the non-existence of the 715 | column in question */ 716 | 5: binary value, 717 | 718 | /** the TDelete to execute if the check succeeds */ 719 | 6: required TDelete tdelete 720 | ) throws (1: TIOError io) 721 | 722 | TResult increment( 723 | /** the table to increment the value on */ 724 | 1: required binary table, 725 | 726 | /** the TIncrement to increment */ 727 | 2: required TIncrement tincrement 728 | ) throws (1: TIOError io) 729 | 730 | TResult append( 731 | /** the table to append the value on */ 732 | 1: required binary table, 733 | 734 | /** the TAppend to append */ 735 | 2: required TAppend tappend 736 | ) throws (1: TIOError io) 737 | 738 | /** 739 | * Get a Scanner for the provided TScan object. 740 | * 741 | * @return Scanner Id to be used with other scanner procedures 742 | */ 743 | i32 openScanner( 744 | /** the table to get the Scanner for */ 745 | 1: required binary table, 746 | 747 | /** the scan object to get a Scanner for */ 748 | 2: required TScan tscan, 749 | ) throws (1: TIOError io) 750 | 751 | /** 752 | * Grabs multiple rows from a Scanner. 753 | * 754 | * @return Between zero and numRows TResults 755 | */ 756 | list getScannerRows( 757 | /** the Id of the Scanner to return rows from. This is an Id returned from the openScanner function. */ 758 | 1: required i32 scannerId, 759 | 760 | /** number of rows to return */ 761 | 2: i32 numRows = 1 762 | ) throws ( 763 | 1: TIOError io, 764 | 765 | /** if the scannerId is invalid */ 766 | 2: TIllegalArgument ia 767 | ) 768 | 769 | /** 770 | * Closes the scanner. Should be called to free server side resources timely. 771 | * Typically close once the scanner is not needed anymore, i.e. after looping 772 | * over it to get all the required rows. 773 | */ 774 | void closeScanner( 775 | /** the Id of the Scanner to close **/ 776 | 1: required i32 scannerId 777 | ) throws ( 778 | 1: TIOError io, 779 | 780 | /** if the scannerId is invalid */ 781 | 2: TIllegalArgument ia 782 | ) 783 | 784 | /** 785 | * mutateRow performs multiple mutations atomically on a single row. 786 | */ 787 | void mutateRow( 788 | /** table to apply the mutations */ 789 | 1: required binary table, 790 | 791 | /** mutations to apply */ 792 | 2: required TRowMutations trowMutations 793 | ) throws (1: TIOError io) 794 | 795 | /** 796 | * Get results for the provided TScan object. 797 | * This helper function opens a scanner, get the results and close the scanner. 798 | * 799 | * @return between zero and numRows TResults 800 | */ 801 | list getScannerResults( 802 | /** the table to get the Scanner for */ 803 | 1: required binary table, 804 | 805 | /** the scan object to get a Scanner for */ 806 | 2: required TScan tscan, 807 | 808 | /** number of rows to return */ 809 | 3: i32 numRows = 1 810 | ) throws ( 811 | 1: TIOError io 812 | ) 813 | 814 | /** 815 | * Given a table and a row get the location of the region that 816 | * would contain the given row key. 817 | * 818 | * reload = true means the cache will be cleared and the location 819 | * will be fetched from meta. 820 | */ 821 | THRegionLocation getRegionLocation( 822 | 1: required binary table, 823 | 2: required binary row, 824 | 3: bool reload, 825 | ) throws ( 826 | 1: TIOError io 827 | ) 828 | 829 | /** 830 | * Get all of the region locations for a given table. 831 | **/ 832 | list getAllRegionLocations( 833 | 1: required binary table, 834 | ) throws ( 835 | 1: TIOError io 836 | ) 837 | 838 | /** 839 | * Atomically checks if a row/family/qualifier value matches the expected 840 | * value. If it does, it mutates the row. 841 | * 842 | * @return true if the row was mutated, false otherwise 843 | */ 844 | bool checkAndMutate( 845 | /** to check in and delete from */ 846 | 1: required binary table, 847 | 848 | /** row to check */ 849 | 2: required binary row, 850 | 851 | /** column family to check */ 852 | 3: required binary family, 853 | 854 | /** column qualifier to check */ 855 | 4: required binary qualifier, 856 | 857 | /** comparison to make on the value */ 858 | 5: required TCompareOperator compareOperator, 859 | 860 | /** the expected value to be compared against, if not provided the 861 | check is for the non-existence of the column in question */ 862 | 6: binary value, 863 | 864 | /** row mutations to execute if the value matches */ 865 | 7: required TRowMutations rowMutations 866 | ) throws (1: TIOError io) 867 | 868 | /** 869 | * Get a table descriptor. 870 | * @return the TableDescriptor of the giving tablename 871 | **/ 872 | TTableDescriptor getTableDescriptor( 873 | /** the tablename of the table to get tableDescriptor*/ 874 | 1: required TTableName table 875 | ) throws (1: TIOError io) 876 | 877 | /** 878 | * Get table descriptors of tables. 879 | * @return the TableDescriptor of the giving tablename 880 | **/ 881 | list getTableDescriptors( 882 | /** the tablename list of the tables to get tableDescriptor*/ 883 | 1: required list tables 884 | ) throws (1: TIOError io) 885 | 886 | /** 887 | * 888 | * @return true if table exists already, false if not 889 | **/ 890 | bool tableExists( 891 | /** the tablename of the tables to check*/ 892 | 1: TTableName tableName 893 | ) throws (1: TIOError io) 894 | 895 | /** 896 | * Get table descriptors of tables that match the given pattern 897 | * @return the tableDescriptors of the matching table 898 | **/ 899 | list getTableDescriptorsByPattern( 900 | /** The regular expression to match against */ 901 | 1: optional string regex 902 | /** set to false if match only against userspace tables */ 903 | 2: required bool includeSysTables 904 | ) throws (1: TIOError io) 905 | 906 | /** 907 | * Get table descriptors of tables in the given namespace 908 | * @return the tableDescriptors in the namespce 909 | **/ 910 | list getTableDescriptorsByNamespace( 911 | /** The namesapce's name */ 912 | 1: required string name 913 | ) throws (1: TIOError io) 914 | 915 | /** 916 | * Get table names of tables that match the given pattern 917 | * @return the table names of the matching table 918 | **/ 919 | list getTableNamesByPattern( 920 | /** The regular expression to match against */ 921 | 1: optional string regex 922 | /** set to false if match only against userspace tables */ 923 | 2: required bool includeSysTables 924 | ) throws (1: TIOError io) 925 | 926 | /** 927 | * Get table names of tables in the given namespace 928 | * @return the table names of the matching table 929 | **/ 930 | list getTableNamesByNamespace( 931 | /** The namesapce's name */ 932 | 1: required string name 933 | ) throws (1: TIOError io) 934 | 935 | /** 936 | * Creates a new table with an initial set of empty regions defined by the specified split keys. 937 | * The total number of regions created will be the number of split keys plus one. Synchronous 938 | * operation. 939 | **/ 940 | void createTable( 941 | /** table descriptor for table */ 942 | 1: required TTableDescriptor desc 943 | /** rray of split keys for the initial regions of the table */ 944 | 2: optional list splitKeys 945 | ) throws (1: TIOError io) 946 | 947 | /** 948 | * Deletes a table. Synchronous operation. 949 | **/ 950 | void deleteTable( 951 | /** the tablename to delete */ 952 | 1: required TTableName tableName 953 | ) throws (1: TIOError io) 954 | 955 | /** 956 | * Truncate a table. Synchronous operation. 957 | **/ 958 | void truncateTable( 959 | /** the tablename to truncate */ 960 | 1: required TTableName tableName 961 | /** whether to preserve previous splits*/ 962 | 2: required bool preserveSplits 963 | ) throws (1: TIOError io) 964 | 965 | /** 966 | * Enalbe a table 967 | **/ 968 | void enableTable( 969 | /** the tablename to enable */ 970 | 1: required TTableName tableName 971 | ) throws (1: TIOError io) 972 | 973 | /** 974 | * Disable a table 975 | **/ 976 | void disableTable( 977 | /** the tablename to disable */ 978 | 1: required TTableName tableName 979 | ) throws (1: TIOError io) 980 | 981 | /** 982 | * 983 | * @return true if table is enabled, false if not 984 | **/ 985 | bool isTableEnabled( 986 | /** the tablename to check */ 987 | 1: required TTableName tableName 988 | ) throws (1: TIOError io) 989 | 990 | /** 991 | * 992 | * @return true if table is disabled, false if not 993 | **/ 994 | bool isTableDisabled( 995 | /** the tablename to check */ 996 | 1: required TTableName tableName 997 | ) throws (1: TIOError io) 998 | 999 | /** 1000 | * 1001 | * @return true if table is available, false if not 1002 | **/ 1003 | bool isTableAvailable( 1004 | /** the tablename to check */ 1005 | 1: required TTableName tableName 1006 | ) throws (1: TIOError io) 1007 | 1008 | /** 1009 | * Use this api to check if the table has been created with the specified number of splitkeys 1010 | * which was used while creating the given table. Note : If this api is used after a table's 1011 | * region gets splitted, the api may return false. 1012 | * 1013 | * @return true if table is available, false if not 1014 | * 1015 | * @deprecated Since 2.2.0. Because the same method in Table interface has been deprecated 1016 | * since 2.0.0, we will remove it in 3.0.0 release. 1017 | * Use {@link #isTableAvailable(TTableName tableName)} instead 1018 | **/ 1019 | bool isTableAvailableWithSplit( 1020 | /** the tablename to check */ 1021 | 1: required TTableName tableName 1022 | /** keys to check if the table has been created with all split keys */ 1023 | 2: optional list splitKeys 1024 | ) throws (1: TIOError io) 1025 | 1026 | /** 1027 | * Add a column family to an existing table. Synchronous operation. 1028 | **/ 1029 | void addColumnFamily( 1030 | /** the tablename to add column family to */ 1031 | 1: required TTableName tableName 1032 | /** column family descriptor of column family to be added */ 1033 | 2: required TColumnFamilyDescriptor column 1034 | ) throws (1: TIOError io) 1035 | 1036 | /** 1037 | * Delete a column family from a table. Synchronous operation. 1038 | **/ 1039 | void deleteColumnFamily( 1040 | /** the tablename to delete column family from */ 1041 | 1: required TTableName tableName 1042 | /** name of column family to be deleted */ 1043 | 2: required binary column 1044 | ) throws (1: TIOError io) 1045 | 1046 | /** 1047 | * Modify an existing column family on a table. Synchronous operation. 1048 | **/ 1049 | void modifyColumnFamily( 1050 | /** the tablename to modify column family */ 1051 | 1: required TTableName tableName 1052 | /** column family descriptor of column family to be modified */ 1053 | 2: required TColumnFamilyDescriptor column 1054 | ) throws (1: TIOError io) 1055 | 1056 | /** 1057 | * Modify an existing table 1058 | **/ 1059 | void modifyTable( 1060 | /** the descriptor of the table to modify */ 1061 | 1: required TTableDescriptor desc 1062 | ) throws (1: TIOError io) 1063 | 1064 | /** 1065 | * Create a new namespace. Blocks until namespace has been successfully created or an exception is 1066 | * thrown 1067 | **/ 1068 | void createNamespace( 1069 | /** descriptor which describes the new namespace */ 1070 | 1: required TNamespaceDescriptor namespaceDesc 1071 | ) throws (1: TIOError io) 1072 | 1073 | /** 1074 | * Modify an existing namespace. Blocks until namespace has been successfully modified or an 1075 | * exception is thrown 1076 | **/ 1077 | void modifyNamespace( 1078 | /** descriptor which describes the new namespace */ 1079 | 1: required TNamespaceDescriptor namespaceDesc 1080 | ) throws (1: TIOError io) 1081 | 1082 | /** 1083 | * Delete an existing namespace. Only empty namespaces (no tables) can be removed. 1084 | * Blocks until namespace has been successfully deleted or an 1085 | * exception is thrown. 1086 | **/ 1087 | void deleteNamespace( 1088 | /** namespace name */ 1089 | 1: required string name 1090 | ) throws (1: TIOError io) 1091 | 1092 | /** 1093 | * Get a namespace descriptor by name. 1094 | * @retrun the descriptor 1095 | **/ 1096 | TNamespaceDescriptor getNamespaceDescriptor( 1097 | /** name of namespace descriptor */ 1098 | 1: required string name 1099 | ) throws (1: TIOError io) 1100 | 1101 | /** 1102 | * @return all namespaces 1103 | **/ 1104 | list listNamespaceDescriptors( 1105 | ) throws (1: TIOError io) 1106 | 1107 | /** 1108 | * @return all namespace names 1109 | **/ 1110 | list listNamespaces( 1111 | ) throws (1: TIOError io) 1112 | 1113 | /** 1114 | * Get the type of this thrift server. 1115 | * 1116 | * @return the type of this thrift server 1117 | */ 1118 | TThriftServerType getThriftServerType() 1119 | 1120 | /** 1121 | * Returns the cluster ID for this cluster. 1122 | */ 1123 | string getClusterId() 1124 | 1125 | /** 1126 | * Retrieves online slow RPC logs from the provided list of 1127 | * RegionServers 1128 | * 1129 | * @return online slowlog response list 1130 | * @throws TIOError if a remote or network exception occurs 1131 | */ 1132 | list getSlowLogResponses( 1133 | /** @param serverNames Server names to get slowlog responses from */ 1134 | 1: set serverNames 1135 | /** @param logQueryFilter filter to be used if provided */ 1136 | 2: TLogQueryFilter logQueryFilter 1137 | ) throws (1: TIOError io) 1138 | 1139 | /** 1140 | * Clears online slow/large RPC logs from the provided list of 1141 | * RegionServers 1142 | * 1143 | * @return List of booleans representing if online slowlog response buffer is cleaned 1144 | * from each RegionServer 1145 | * @throws TIOError if a remote or network exception occurs 1146 | */ 1147 | list clearSlowLogResponses( 1148 | /** @param serverNames Set of Server names to clean slowlog responses from */ 1149 | 1: set serverNames 1150 | ) throws (1: TIOError io) 1151 | 1152 | /** 1153 | * Grant permissions in table or namespace level. 1154 | */ 1155 | bool grant( 1156 | 1: required TAccessControlEntity info 1157 | ) throws (1: TIOError io) 1158 | 1159 | /** 1160 | * Revoke permissions in table or namespace level. 1161 | */ 1162 | bool revoke( 1163 | 1: required TAccessControlEntity info 1164 | ) throws (1: TIOError io) 1165 | } --------------------------------------------------------------------------------