├── .coveragerc ├── .gitattributes ├── .gitignore ├── .landscape.yaml ├── CHANGELOG.md ├── DEVELOP.md ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── bin └── register-impala-udfs.py ├── build-dists.sh ├── dev └── merge-pr.py ├── ez_setup.py ├── impala ├── __init__.py ├── _thrift_api.py ├── _thrift_gen │ ├── ErrorCodes │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py │ ├── ExecStats │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py │ ├── ImpalaService │ │ ├── ImpalaHiveServer2Service-remote │ │ ├── ImpalaHiveServer2Service.py │ │ ├── ImpalaService-remote │ │ ├── ImpalaService.py │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py │ ├── Metrics │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py │ ├── RuntimeProfile │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py │ ├── Status │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py │ ├── TCLIService │ │ ├── TCLIService-remote │ │ ├── TCLIService.py │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py │ ├── Types │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py │ ├── __init__.py │ ├── beeswax │ │ ├── BeeswaxService-remote │ │ ├── BeeswaxService.py │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py │ ├── fb303 │ │ ├── FacebookService-remote │ │ ├── FacebookService.py │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py │ └── hive_metastore │ │ ├── ThriftHiveMetastore-remote │ │ ├── ThriftHiveMetastore.py │ │ ├── __init__.py │ │ ├── constants.py │ │ └── ttypes.py ├── compat.py ├── dbapi.py ├── error.py ├── hiveserver2.py ├── interface.py ├── sasl_compat.py ├── sqlalchemy.py ├── tests │ ├── __init__.py │ ├── _dbapi20_tests.py │ ├── compat.py │ ├── conftest.py │ ├── test_data_types.py │ ├── test_dbapi_compliance.py │ ├── test_dbapi_connect.py │ ├── test_hive.py │ ├── test_hive_dict_cursor.py │ ├── test_hs2_fault_injection.py │ ├── test_http_connect.py │ ├── test_impala.py │ ├── test_query_parameters.py │ ├── test_sqlalchemy.py │ ├── test_thrift_api.py │ ├── test_util.py │ └── util.py ├── thrift │ ├── ErrorCodes.thrift │ ├── ExecStats.thrift │ ├── ImpalaService.thrift │ ├── Metrics.thrift │ ├── RuntimeProfile.thrift │ ├── Status.thrift │ ├── TCLIService.thrift │ ├── Types.thrift │ ├── __init__.py │ ├── beeswax.thrift │ ├── fb303.thrift │ ├── hive_metastore.thrift │ └── process_thrift.sh └── util.py ├── io └── manylinux │ └── build.sh ├── jenkins ├── README.md ├── parse-build-result.py └── run-dbapi.sh ├── setup.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch: True 3 | omit: impala/tests*, 4 | impala/_thrift_gen/* 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | impala/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | scratch.py 2 | *.py[cod] 3 | 4 | # C extensions 5 | *.so 6 | *.bc 7 | 8 | # Packages 9 | *.egg 10 | *.egg-info 11 | *.zip 12 | dist 13 | build 14 | eggs 15 | parts 16 | var 17 | sdist 18 | develop-eggs 19 | .installed.cfg 20 | lib64 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | coverage.xml 30 | 31 | # Translations 32 | *.mo 33 | 34 | # Mr Developer 35 | .mr.developer.cfg 36 | .project 37 | .pydevproject 38 | 39 | udf-scratch/ 40 | .idea/ 41 | 42 | /venv/ 43 | .mypy_cache/ 44 | -------------------------------------------------------------------------------- /.landscape.yaml: -------------------------------------------------------------------------------- 1 | strictness: medium 2 | 3 | output-format: grouped 4 | 5 | test-warnings: true 6 | doc-warnings: false 7 | 8 | ignore-paths: 9 | - ez_setup.py 10 | - impala/tests/_dbapi20_tests.py 11 | - bin/register-impala-udfs.py 12 | 13 | ignore-patterns: 14 | - ^build 15 | - ^impala/_thrift_gen 16 | - ^dev 17 | - ^examples 18 | - udf 19 | 20 | pylint: 21 | disable: 22 | - redefined-builtin 23 | - too-many-arguments 24 | 25 | pep8: 26 | options: 27 | max-line-length: 79 28 | 29 | pyroma: 30 | run: true 31 | 32 | mccabe: 33 | run: false -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | 0.21 5 | ------ 6 | * **Improvements** 7 | - Remove versioneer and add Python3.12/3.13 testing (#572) 8 | - this is a temporary solution till Python2 support is dropped 9 | as no version of versioneer handles both Python 2.7 and 3.12 10 | - Allow users to add custom http headers when using hs2-http (#557) 11 | - this change is intended to help with testing the server side 12 | - Update Impala Thrift definitions. (#575) 13 | - this helps in testing newer features in Impala 14 | 15 | * **Bug Fixes** 16 | - Fix IPv6 address handling in hs2-http protocol 17 | - Fix proxy-authentication headers for Python 3.* and long basic 18 | credential encodings (#562) 19 | - Fix passing retry count configuration to rpc operations (#564) 20 | - Fix has_table() with sqlalchemy2 (#568) 21 | 22 | Note that this may be the last release with Python 2.7 support. 23 | 24 | 0.20 25 | ------ 26 | * **Improvements** 27 | - Support wildcard http_cookie_names (#509) 28 | - Add Knox cookies in default cookies list (#525) 29 | - Support CHAR type in SQLAlchemy (#516) 30 | - Support Cursor.rowcount and close finished queries (#528) 31 | Note that this is a potentially breaking change. See the PR 32 | for details about the side-effects. 33 | The old behavior can be restored by setting close_finished_queries=False 34 | when creating a Cursor. 35 | Also note that Cursor.rowcount only works with Impala server - with 36 | Hive it will always return -1. 37 | - Allow skipping utf8 conversion in Python3 (#548) 38 | - Subtract RPC time from sleep in _wait_to_finish (#551) 39 | - Reduced logging: 40 | - Log "Closing operation" at debug level (#539) 41 | - Never log passwords in http connections (#545) 42 | - Before the fix passwords were logged at debug level 43 | 44 | * **Bug Fixes** 45 | - Avoid retrying non-idempotent RPCs in binary connections (#549) 46 | - Always set ImpalaHttpClient.__preserve_all_cookies (#553) 47 | - Fix https connection with Python 3.12 (#531) 48 | Note that Python 3.12 support is not complete yet. 49 | A known issue is that installing with setuptools fails with Python 3.12. 50 | - Fix SQLAlchemy support for Impala on Python 3.10 (#538) 51 | - Turn regex strings into raw strings (#535) 52 | 53 | Note that this may be the last release with Python 2.7 support. 54 | 55 | 0.19.0 56 | ------ 57 | * **Improvements** 58 | - Add get_view_name support to SQLAlchemy (#511) 59 | SHOW VIEWS is expected to be supported in Impala soon. 60 | - Add additional checks to ensure connection arguments (#515) 61 | 62 | * **Bug Fixes** 63 | - Fix Cookie handling with Python 3 (#518) 64 | - Fix numeric parameter substitution bug (#508) 65 | 66 | 0.18.0 67 | ------ 68 | * **Improvements** 69 | - Add support for retaining cookies among http requests for LDAP and GSSAPI/PLAIN 70 | SASL (#465) 71 | Notes: Authentication cookie is enabled by default. The connect() API parameter 72 | auth_cookie_names is deprecated. If a user uses GSSAPI authentication with 73 | existing client code to call connect() API with auth_cookie_names set as None 74 | explicitly, the auth cookie will be enabled after upgrading Impyla to 0.18. 75 | - Add support for authentication via JWT token (#463) 76 | - Add support for retaining cookies among http requests for NOSASL and JWT 77 | authentication (#477) 78 | - Upgrade Thrift to 0.16.0 (#490) 79 | Notes: this fixes compatibility with Python 3.10 80 | - Enable supports_multivalues_insert in SQLAlchemy (#499) 81 | - Enable setting user_agent in http protocol (#498) 82 | 83 | * **Bug Fixes** 84 | - Fix an issue where datetime or date is not correctly quoted as string when 85 | performing sql substitutions (#487) 86 | - Fix parameter substitution in executemany() (#494) 87 | - Convert the values of VARCHAR/CHAR columns to unicode strings (#457) 88 | - Add missing expect_more_rows argument (#453) 89 | 90 | 0.17.0 91 | ------ 92 | * **Improvements** 93 | - Upgrade to thrift-sasl 0.4.3 94 | 95 | 0.17a8 96 | ------ 97 | * **Improvements** 98 | - Fix schema description returns for HiveServer2 when using dot in naming convention. 99 | - Extensions to SQLAlchemy ImpalaDLLCompiler to support Alembic schema migrations 100 | - Add impala4 sqlalchemy dialect for Impala >= 4.0 101 | 102 | * **Bug Fixes** 103 | - Fix regression in #445 (non-valid utf-8 strings handling in Python 3) 104 | 105 | 0.17a7 106 | ------ 107 | * **Improvements** 108 | - Speed up fetchmany() and fetchall() 109 | - Avoid unnecessary GetOperationStatus RPCs 110 | - Bump fetch size to 10240 (from 1024) 111 | 112 | * **Bug Fixes** 113 | - Update setup.py: thrift-sasl is needed for ldap/plain authentication 114 | - Hack to fix non-valid utf-8 stings handling in Python 3 115 | 116 | 0.17a6 117 | ------ 118 | * **Improvements** 119 | - Unify Python 2 and 3 thrift handling and remove thriftpy2 120 | 121 | 0.17a5 122 | ------ 123 | * **Improvements** 124 | - Add buffering to hs2-http (#442) 125 | - Remove references to 'sasl', always use 'pure-sasl' package 126 | 127 | 0.17a4 128 | ------ 129 | * **Improvements** 130 | - Switch to using manylinux2010 docker build environment 131 | - Upgrade to thrift-sasl 0.4.3a2 132 | 133 | 0.17a3 134 | ------ 135 | * **Bug Fixes** 136 | - Add no_utf8strings to thrift compiler option (#440) 137 | 138 | 0.17a2 139 | ------ 140 | * **Improvements** 141 | - Implement simple retry which throws the underlying HttpError if retrying fails 142 | - Ugprade Thift to 0.11.0 for Python 2 143 | - Add build script 144 | 145 | * **Bug Fixes** 146 | - Server certs should not be verified if SSL is enabled and ca_cert is not specified 147 | - Added "fetchType" to TFetchResultsReq. 148 | - Fix Thrift compilation with current Impala 149 | 150 | 0.17a1 151 | ------ 152 | * **Improvements** 153 | - Implement GSSAPI authentication over http transport. (#415) 154 | - Vendor thrift classes with Python 3 #277 (#404) 155 | - Add HTTP code handling to THttpClient (#380) 156 | - Disable failing tests - #401 (#402) 157 | 158 | * **Bug Fixes** 159 | - Fix #418, no 'message', just cast to string (#419) 160 | - Fix DATE, VARCHAR handling: #410 (#411) 161 | 162 | 0.16.3 163 | ------ 164 | * **Bug Fixes** 165 | - Fix specifying dependency on thrift package 166 | 167 | 0.16a3 168 | ------ 169 | * **Improvements** 170 | - Better documentation regarding SASL prerequisites (#394) 171 | 172 | * **Bug Fixes** 173 | - Fix compatibility with Python 3.9 (#386) 174 | - Fix interoperability with Hive client version >= V10 (#396) (#397) 175 | - Fix documentation bug reL cursor iteration (#388) 176 | - Fix connecting over HTTP using Python 3 (#378) 177 | 178 | 0.16.2 179 | ------ 180 | * **Bug Fixes** 181 | - Fix an issue that prevented use of Impyla with Python 2.6 (#375) 182 | 183 | 0.16.1 184 | ------ 185 | * **Bug Fixes** 186 | - Fix an issue whereby impyla incorrectly assumes there's no more data to fetch (#369) 187 | 188 | 0.16.0 189 | ------ 190 | * **Improvements** 191 | - Add HTTP endpoint support (#359) 192 | - Add rowcounts property to cursor object to return the number of rows affected (#346) 193 | 194 | * **Bug Fixes** 195 | - Set long_description_content_type to markdown in setup.py (#364) 196 | - Fix ImportError 'THttpClient' in python3+ (#363) 197 | - Enable executemany() to pass parameter configuration to inner execute() (#361) 198 | - Minor docstring corrections (#355) 199 | - Make thriftpy2 range locked, not specific-version locked (#353) 200 | - Fixed numeric parameter substitution bug (#348) 201 | - Add thrift_sasl as an official dependency (#273) 202 | 203 | 0.15.0 204 | ------ 205 | * **Improvements** 206 | - Selectively install thriftpy2 requirement based on python version (#342) 207 | - Add thrift query profile support (#333) 208 | - Add lastrowid to HiveServer2Cursor (#308) 209 | - Enable SQLAlchemy cursor configuration (#298) 210 | - Coerce floats if possible (#291) 211 | - Add SQLalchemy support storing table as Kudu (#259) (#260) 212 | - Add support for NULL_TYPE result column (#257) 213 | - Add support for optional krb_host parameter in connection (#248) 214 | - Various documentation improvements 215 | 216 | * **Bug Fixes** 217 | - Fix unexpected SQLAlchemy keyword argument 'username' (#343) 218 | - Fix unicode issue in README file (#341) 219 | - Fix for a socket leak in HiveServer2Cursor (#327) 220 | - Avoid using reserved async keyword to support Python 3.7 (#322) 221 | - Bump required thrift version to 0.9.3 or above (#303) 222 | - Fix SQLalchemy connection string parsing and LDAP auth (#261) 223 | -------------------------------------------------------------------------------- /DEVELOP.md: -------------------------------------------------------------------------------- 1 | #### Contribute 2 | 3 | The code is currently being developed on GitHub: https://github.com/cloudera/impyla 4 | 5 | Fork the repo and send a pull request against `master`. Contributions welcome! 6 | 7 | 8 | #### Thrift Codegen 9 | 10 | 0. To clean up 11 | 12 | ```bash 13 | rm -rf $IMPYLA_REPO/impala/_thrift_gen 14 | rm -rf $IMPYLA_REPO/impala/thrift/*.thrift 15 | ``` 16 | 17 | 1. `cp $IMPALA_REPO/common/thrift/ImpalaService.thrift $IMPYLA_REPO/impala/thrift` 18 | 19 | Hand edit ImpalaService.thrift to exclude files, API and definitions unrelated to query 20 | profile such as Frontend.thrift, BackendGflags.thrift, and Query.thrift. 21 | 22 | 2. Execute `$IMPYLA_REPO/impala/thrift/process_thrift.sh` 23 | 24 | This should only need to be done very irregularly, as the generated code is 25 | committed to the repo. Only when the original thrift IDL files change. People 26 | checking out the repo to develop on it do NOT need to run the codegen. Codegen 27 | performed with Thrift 0.16.x. 28 | 29 | 30 | #### UDF maintenance 31 | 32 | Copy a fresh copy of the `udf.h` header file 33 | 34 | ```bash 35 | cp $IMPALA_REPO/be/src/udf/udf.h $IMPYLA_REPO/impala/udf/precompiled 36 | ``` 37 | 38 | #### Release 39 | 40 | 1. Generate a summary of all the commits since the last release 41 | 42 | ```bash 43 | git log $LAST_RELEASE_TAG..HEAD 44 | ``` 45 | 46 | 2. Set the release version in `setup.py` (remove the `.dev0` tag if applicable) 47 | and commit the version number change. Also set the new version number in the 48 | readme (under "Installation") and update accordingly. 49 | 50 | 3. Tag version number and summarize changes in the tag message 51 | 52 | ```bash 53 | git tag -a vX.Y.Z 54 | ``` 55 | 56 | 4. Push the tag upstream 57 | 58 | ```bash 59 | git push upstream vX.Y.Z 60 | ``` 61 | 62 | or 63 | 64 | ```bash 65 | git push upstream --tags 66 | ``` 67 | 68 | 5. Register the release with PyPI 69 | 70 | ```bash 71 | python setup.py register sdist bdist_egg upload 72 | ``` 73 | 74 | 6. If working on master, bump up to the next anticipated version with a `.dev0` 75 | tag and commit 76 | 77 | 78 | *Backporting* 79 | 80 | 1. Checkout the tag for the version to backport onto and create a new branch 81 | 82 | ```bash 83 | git checkout vX.Y.Z 84 | git checkout -b backport 85 | ``` 86 | 87 | 2. Cherry pick the relevant commits onto the `backport` branch 88 | 89 | 3. Goto #1 for main release flow 90 | 91 | 4. Remove the `backport` branch 92 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include ez_setup.py 2 | include README.md 3 | include LICENSE.txt 4 | include CHANGELOG.md 5 | include impala/_version.py 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # impyla 2 | 3 | Python client for HiveServer2 implementations (e.g., Impala, Hive) for 4 | distributed query engines. 5 | 6 | For higher-level Impala functionality, including a Pandas-like interface over 7 | distributed data sets, see the [Ibis project][ibis]. 8 | 9 | ### Features 10 | 11 | * HiveServer2 compliant; works with Impala and Hive, including nested data 12 | 13 | * Fully [DB API 2.0 (PEP 249)][pep249]-compliant Python client (similar to 14 | sqlite or MySQL clients) supporting Python 2.6+ and Python 3.3+. 15 | 16 | * Works with Kerberos, LDAP, SSL 17 | 18 | * [SQLAlchemy][sqlalchemy] connector 19 | 20 | * Converter to [pandas][pandas] `DataFrame`, allowing easy integration into the 21 | Python data stack (including [scikit-learn][sklearn] and 22 | [matplotlib][matplotlib]); but see the [Ibis project][ibis] for a richer 23 | experience 24 | 25 | ### Dependencies 26 | 27 | Required: 28 | 29 | * Python 2.7+ or 3.5+ 30 | 31 | * `six`, `bitarray` 32 | 33 | * `thrift==0.16.0` 34 | 35 | * `thrift_sasl==0.4.3` 36 | 37 | Optional: 38 | 39 | * `kerberos>=1.3.0` for Kerberos over HTTP support. This also requires Kerberos libraries 40 | to be installed on your system - see [System Kerberos](#system-kerberos) 41 | 42 | * `pandas` for conversion to `DataFrame` objects; but see the [Ibis project][ibis] instead 43 | 44 | * `sqlalchemy` for the SQLAlchemy engine 45 | 46 | * `pytest` and `requests` for running tests; `unittest2` for testing on Python 2.6 47 | 48 | 49 | #### System Kerberos 50 | 51 | Different systems require different packages to be installed to enable Kerberos support in 52 | Impyla. Some examples of how to install the packages on different distributions follow. 53 | 54 | Ubuntu: 55 | ```bash 56 | apt-get install libkrb5-dev krb5-user 57 | ``` 58 | 59 | RHEL/CentOS: 60 | ```bash 61 | yum install krb5-libs krb5-devel krb5-server krb5-workstation 62 | ``` 63 | 64 | ### Installation 65 | 66 | Install the latest release with `pip`: 67 | 68 | ```bash 69 | pip install impyla 70 | ``` 71 | 72 | For the latest (dev) version, install directly from the repo: 73 | 74 | ```bash 75 | pip install git+https://github.com/cloudera/impyla.git 76 | ``` 77 | 78 | or clone the repo: 79 | 80 | ```bash 81 | git clone https://github.com/cloudera/impyla.git 82 | cd impyla 83 | python setup.py install 84 | ``` 85 | 86 | #### Running the tests 87 | 88 | impyla uses the [pytest][pytest] toolchain, and depends on the following 89 | environment variables: 90 | 91 | ```bash 92 | export IMPYLA_TEST_HOST=your.impalad.com 93 | export IMPYLA_TEST_PORT=21050 94 | export IMPYLA_TEST_AUTH_MECH=NOSASL 95 | ``` 96 | 97 | To run the maximal set of tests, run 98 | 99 | ```bash 100 | cd path/to/impyla 101 | py.test --connect impala 102 | ``` 103 | 104 | Leave out the `--connect` option to skip tests for DB API compliance. 105 | 106 | To test impyla with different Python versions [tox] can be used. 107 | The commands below will run all impyla tests with all supported and 108 | installed Python versions: 109 | ```bash 110 | cd path/to/impyla 111 | tox 112 | ``` 113 | To filter environments / tests use `-e` and [pytest] arguments after `--`: 114 | ```bash 115 | tox -e py310 -- -ktest_utf8_strings 116 | ``` 117 | 118 | ### Usage 119 | 120 | Impyla implements the [Python DB API v2.0 (PEP 249)][pep249] database interface 121 | (refer to it for API details): 122 | 123 | ```python 124 | from impala.dbapi import connect 125 | conn = connect(host='my.host.com', port=21050) # auth_mechanism='PLAIN' for unsecured Hive connection, see function doc 126 | cursor = conn.cursor() 127 | cursor.execute('SELECT * FROM mytable LIMIT 100') 128 | print cursor.description # prints the result set's schema 129 | results = cursor.fetchall() 130 | ``` 131 | 132 | The `Cursor` object also exposes the iterator interface, which is buffered 133 | (controlled by `cursor.arraysize`): 134 | 135 | ```python 136 | cursor.execute('SELECT * FROM mytable LIMIT 100') 137 | for row in cursor: 138 | print(row) 139 | ``` 140 | 141 | Furthermore the `Cursor` object returns you information about the columns 142 | returned in the query. This is useful to export your data as a csv file. 143 | 144 | ```python 145 | import csv 146 | 147 | cursor.execute('SELECT * FROM mytable LIMIT 100') 148 | columns = [datum[0] for datum in cursor.description] 149 | targetfile = '/tmp/foo.csv' 150 | 151 | with open(targetfile, 'w', newline='') as outcsv: 152 | writer = csv.writer(outcsv, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL, lineterminator='\n') 153 | writer.writerow(columns) 154 | for row in cursor: 155 | writer.writerow(row) 156 | ``` 157 | 158 | You can also get back a pandas DataFrame object 159 | 160 | ```python 161 | from impala.util import as_pandas 162 | df = as_pandas(cur) 163 | # carry df through scikit-learn, for example 164 | ``` 165 | 166 | 167 | [pep249]: http://legacy.python.org/dev/peps/pep-0249/ 168 | [pandas]: http://pandas.pydata.org/ 169 | [sklearn]: http://scikit-learn.org/ 170 | [matplotlib]: http://matplotlib.org/ 171 | [pytest]: http://pytest.org/latest/ 172 | [sqlalchemy]: http://www.sqlalchemy.org/ 173 | [ibis]: http://www.ibis-project.org/ 174 | [tox]: http://tox.wiki/ 175 | 176 | # How do I contribute code? 177 | You need to first sign and return an 178 | [ICLA](https://github.com/cloudera/native-toolchain/blob/icla/Cloudera%20ICLA_25APR2018.pdf) 179 | and 180 | [CCLA](https://github.com/cloudera/native-toolchain/blob/icla/Cloudera%20CCLA_25APR2018.pdf) 181 | before we can accept and redistribute your contribution. Once these are submitted you are 182 | free to start contributing to impyla. Submit these to CLA@cloudera.com. 183 | 184 | ## Find 185 | We use Github issues to track bugs for this project. Find an issue that you would like to 186 | work on (or file one if you have discovered a new issue!). If no-one is working on it, 187 | assign it to yourself only if you intend to work on it shortly. 188 | 189 | It's a good idea to discuss your intended approach on the issue. You are much more 190 | likely to have your patch reviewed and committed if you've already got buy-in from the 191 | impyla community before you start. 192 | 193 | ## Fix 194 | Now start coding! As you are writing your patch, please keep the following things in mind: 195 | 196 | First, please include tests with your patch. If your patch adds a feature or fixes a bug 197 | and does not include tests, it will generally not be accepted. If you are unsure how to 198 | write tests for a particular component, please ask on the issue for guidance. 199 | 200 | Second, please keep your patch narrowly targeted to the problem described by the issue. 201 | It's better for everyone if we maintain discipline about the scope of each patch. In 202 | general, if you find a bug while working on a specific feature, file a issue for the bug, 203 | check if you can assign it to yourself and fix it independently of the feature. This helps 204 | us to differentiate between bug fixes and features and allows us to build stable 205 | maintenance releases. 206 | 207 | Finally, please write a good, clear commit message, with a short, descriptive title and 208 | a message that is exactly long enough to explain what the problem was, and how it was 209 | fixed. 210 | 211 | Please create a pull request on github with your patch. 212 | -------------------------------------------------------------------------------- /bin/register-impala-udfs.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # Copyright 2014 Cloudera Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import sys 17 | import argparse 18 | 19 | import llvm.core as lc 20 | from hdfs.client import InsecureClient 21 | 22 | import impala.dbapi 23 | 24 | 25 | def log(msg): 26 | sys.stderr.write("%s\n" % msg) 27 | sys.stderr.flush() 28 | 29 | 30 | llvm2impala = { 31 | 'struct.impala_udf::BooleanVal': 'BOOLEAN', 32 | 'struct.impala_udf::TinyIntVal': 'TINYINT', 33 | 'struct.impala_udf::SmallIntVal': 'SMALLINT', 34 | 'struct.impala_udf::IntVal': 'INT', 35 | 'struct.impala_udf::BigIntVal': 'BIGINT', 36 | 'struct.impala_udf::FloatVal': 'FLOAT', 37 | 'struct.impala_udf::DoubleVal': 'DOUBLE', 38 | 'struct.impala_udf::StringVal': 'STRING', 39 | 'struct.impala_udf::TimestampVal': 'TIMESTAMP' 40 | } 41 | 42 | parser = argparse.ArgumentParser(description="Register clang-compiled UDFs " 43 | "with Impala") 44 | parser.add_argument('-i', '--llvm-path', required=True, 45 | help="Local path to LLVM module") 46 | parser.add_argument('-o', '--hdfs-path', required=True, 47 | help="Path in HDFS to store LLVM module, including the " 48 | "final file name") 49 | parser.add_argument('-n', '--name', required=True, action='append', 50 | help="Specify the name of the C++ UDF; must be matched " 51 | "with a --return-type") 52 | parser.add_argument('-t', '--return-type', required=True, action='append', 53 | help="Specify a return type for the corresponding " 54 | "function; use Impala types, e.g., STRING or INT") 55 | parser.add_argument('-j', '--impala-host', required=False, default='localhost', 56 | help="Impala daemon hostname") 57 | parser.add_argument('-q', '--impala-port', required=False, default=21050, 58 | help="Port for Impala daemon") 59 | parser.add_argument('-k', '--nn-host', required=False, default='localhost', 60 | help="Namenode hostname") 61 | parser.add_argument('-p', '--webhdfs-port', required=False, default=50070, 62 | type=int, help="Port for WebHDFS interface") 63 | parser.add_argument('-u', '--user', required=False, 64 | help="User name to connect to HDFS with") 65 | parser.add_argument('-f', '--force', action='store_true', 66 | help="Overwrite LLVM on HDFS if it already exists") 67 | parser.add_argument('-d', '--db', required=False, 68 | help="Specify which database to add the functions to") 69 | args = parser.parse_args() 70 | 71 | # do some input validation 72 | if len(args.name) != len(args.return_type): 73 | raise ValueError("Must supply a return type or each specified " 74 | "function name.") 75 | if not args.hdfs_path.endswith('.ll'): 76 | raise ValueError("The HDFS file name must end with .ll") 77 | 78 | # load the LLVM IR 79 | with open(args.llvm_path, 'rb') as ip: 80 | bc = ip.read() 81 | ll = lc.Module.from_bitcode(bc) 82 | log("Loaded the LLVM IR file %s" % args.llvm_path) 83 | 84 | # load symbols and types for each function in the LLVM module 85 | functions = [] 86 | for function in ll.functions: 87 | try: 88 | symbol = function.name 89 | log("Loading types for function %s" % symbol) 90 | # skip the first argument, which is FunctionContext* 91 | arg_types = tuple([llvm2impala[arg.pointee.name] 92 | for arg in function.type.pointee.args[1:]]) 93 | functions.append((symbol, arg_types)) 94 | except (AttributeError, KeyError): 95 | # this process could fail for non-UDF helper functions...just ignore 96 | # them, because we're not going to be registering them anyway 97 | log("Had trouble with function %s; moving on..." % symbol) 98 | pass 99 | 100 | # transfer the LLVM module to HDFS 101 | url = 'http://{nn_host}:{webhdfs_port}'.format( 102 | nn_host=args.nn_host, webhdfs_port=args.webhdfs_port) 103 | hdfs_client = InsecureClient(url, user=args.user) 104 | hdfs_client.write(args.hdfs_path, bc, overwrite=args.force) 105 | log("Transferred LLVM IR to HDFS at %s" % args.hdfs_path) 106 | 107 | # register the functions with impala 108 | conn = impala.dbapi.connect(host=args.impala_host, port=args.impala_port) 109 | cursor = conn.cursor(user=args.user) 110 | log("Connected to impalad: %s" % args.impala_host) 111 | if args.db: 112 | cursor.execute('USE %s' % args.db) 113 | cursor.execute("SHOW FUNCTIONS") 114 | registered_functions = cursor.fetchall() 115 | for (udf_name, return_type) in zip(args.name, args.return_type): 116 | log("Registering function %s" % udf_name) 117 | # find matching LLVM symbols to the current UDF name 118 | matches = [pair for pair in functions if udf_name in pair[0]] 119 | if len(matches) == 0: 120 | log("Couldn't find a symbol matching %s; skipping..." % udf_name) 121 | continue 122 | if len(matches) > 1: 123 | log("Found multiple symbols matching %s; skipping..." % udf_name) 124 | continue 125 | (symbol, arg_types) = matches[0] 126 | impala_name = '%s(%s)' % (udf_name, ','.join(arg_types)) 127 | if args.force and impala_name in registered_functions: 128 | log("Overwriting function %s" % impala_name) 129 | cursor.execute("DROP FUNCTION %s" % impala_name) 130 | register_query = ( 131 | "CREATE FUNCTION %s RETURNS %s LOCATION '%s' SYMBOL='%s'" % ( 132 | impala_name, return_type, args.hdfs_path, symbol)) 133 | log(register_query) 134 | cursor.execute(register_query) 135 | log("Successfully registered %s" % impala_name) 136 | -------------------------------------------------------------------------------- /build-dists.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2015 Cloudera Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -x 17 | 18 | # Usage info 19 | show_help() { 20 | cat << EOF 21 | Usage: ${0##*/} [-h] [-a GITHUB_ACCOUNT] GIT_VERSION_TAG 22 | -h display this help and exit 23 | -a GITHUB_ACCOUNT use GITHUB_ACCOUNT instead of 'cloudera' 24 | EOF 25 | } 26 | 27 | # Parse command line options 28 | GITHUB_ACCOUNT="cloudera" 29 | GIT_VERSION_TAG="" 30 | 31 | OPTIND=1 32 | while getopts ha: opt; do 33 | case $opt in 34 | h) 35 | show_help 36 | exit 0 37 | ;; 38 | a) GITHUB_ACCOUNT=$OPTARG 39 | ;; 40 | *) 41 | show_help >&2 42 | exit 1 43 | ;; 44 | esac 45 | done 46 | shift "$((OPTIND-1))" # Discard the options and sentinel -- 47 | 48 | GIT_VERSION_TAG="$1" 49 | if [ -z "$GIT_VERSION_TAG" ] || [ "$#" -gt 1 ]; then 50 | show_help >&2 51 | exit 1 52 | fi 53 | 54 | # Start build script in manylinux docker container 55 | DOCKER_IMAGE='quay.io/pypa/manylinux2010_x86_64' 56 | 57 | docker pull "$DOCKER_IMAGE" 58 | docker container run -t --rm -v "$(pwd)/io:/io" "$DOCKER_IMAGE" \ 59 | "/io/manylinux/build.sh" \ 60 | "/io/pip-dists-build" \ 61 | "$GIT_VERSION_TAG" \ 62 | "$GITHUB_ACCOUNT" 63 | 64 | RETVAL="$?" 65 | if [[ "$RETVAL" != "0" ]]; then 66 | echo "Failed with $RETVAL" 67 | else 68 | echo "Succeeded" 69 | fi 70 | exit $RETVAL 71 | -------------------------------------------------------------------------------- /dev/merge-pr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # Utility for creating well-formed pull request merges and pushing them to Apache. 21 | # usage: ./apache-pr-merge.py (see config env vars below) 22 | # 23 | # Lightly modified from version of this script in incubator-parquet-format 24 | 25 | from __future__ import print_function 26 | 27 | from requests.auth import HTTPBasicAuth 28 | import requests 29 | 30 | import os 31 | import six 32 | import subprocess 33 | import sys 34 | import textwrap 35 | 36 | IMPYLA_HOME = os.path.abspath(__file__).rsplit("/", 2)[0] 37 | PROJECT_NAME = 'impyla' 38 | print("IMPYLA_HOME = " + IMPYLA_HOME) 39 | 40 | # Remote name with the PR 41 | PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME", "upstream") 42 | 43 | # Remote name where results pushed 44 | PUSH_REMOTE_NAME = os.environ.get("PUSH_REMOTE_NAME", "upstream") 45 | 46 | GITHUB_BASE = "https://github.com/cloudera/" + PROJECT_NAME + "/pull" 47 | GITHUB_API_BASE = "https://api.github.com/repos/cloudera/" + PROJECT_NAME 48 | 49 | # Prefix added to temporary branches 50 | BRANCH_PREFIX = "PR_TOOL" 51 | 52 | os.chdir(IMPYLA_HOME) 53 | 54 | auth_required = False 55 | 56 | if auth_required: 57 | GITHUB_USERNAME = os.environ['GITHUB_USER'] 58 | import getpass 59 | GITHUB_PASSWORD = getpass.getpass('Enter github.com password for %s:' 60 | % GITHUB_USERNAME) 61 | 62 | def get_json_auth(url): 63 | auth = HTTPBasicAuth(GITHUB_USERNAME, GITHUB_PASSWORD) 64 | req = requests.get(url, auth=auth) 65 | return req.json() 66 | 67 | get_json = get_json_auth 68 | else: 69 | def get_json_no_auth(url): 70 | req = requests.get(url) 71 | return req.json() 72 | 73 | get_json = get_json_no_auth 74 | 75 | 76 | def fail(msg): 77 | print(msg) 78 | clean_up() 79 | sys.exit(-1) 80 | 81 | 82 | def run_cmd(cmd): 83 | # py2.6 does not have subprocess.check_output 84 | if isinstance(cmd, six.string_types): 85 | cmd = cmd.split(' ') 86 | 87 | popenargs = [cmd] 88 | kwargs = {} 89 | 90 | process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs) 91 | output, unused_err = process.communicate() 92 | retcode = process.poll() 93 | if retcode: 94 | cmd = kwargs.get("args") 95 | if cmd is None: 96 | cmd = popenargs[0] 97 | raise subprocess.CalledProcessError(retcode, cmd, output=output) 98 | return output 99 | 100 | 101 | def continue_maybe(prompt): 102 | result = raw_input("\n%s (y/n): " % prompt) 103 | if result.lower() != "y": 104 | fail("Okay, exiting") 105 | 106 | 107 | original_head = run_cmd("git rev-parse HEAD")[:8] 108 | 109 | 110 | def clean_up(): 111 | print("Restoring head pointer to %s" % original_head) 112 | run_cmd("git checkout %s" % original_head) 113 | 114 | branches = run_cmd("git branch").replace(" ", "").split("\n") 115 | 116 | for branch in filter(lambda x: x.startswith(BRANCH_PREFIX), branches): 117 | print("Deleting local branch %s" % branch) 118 | run_cmd("git branch -D %s" % branch) 119 | 120 | 121 | # merge the requested PR and return the merge hash 122 | def merge_pr(pr_num, target_ref): 123 | pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num) 124 | target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, pr_num, target_ref.upper()) 125 | run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name)) 126 | run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, target_ref, target_branch_name)) 127 | run_cmd("git checkout %s" % target_branch_name) 128 | 129 | had_conflicts = False 130 | try: 131 | run_cmd(['git', 'merge', pr_branch_name, '--squash']) 132 | except Exception as e: 133 | msg = "Error merging: %s\nWould you like to manually fix-up this merge?" % e 134 | continue_maybe(msg) 135 | msg = "Okay, please fix any conflicts and 'git add' conflicting files... Finished?" 136 | continue_maybe(msg) 137 | had_conflicts = True 138 | 139 | commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, 140 | '--pretty=format:%an <%ae>']).split("\n") 141 | distinct_authors = sorted(set(commit_authors), 142 | key=lambda x: commit_authors.count(x), reverse=True) 143 | primary_author = distinct_authors[0] 144 | commits = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, 145 | '--pretty=format:%h [%an] %s']).split("\n\n") 146 | 147 | merge_message_flags = [] 148 | 149 | merge_message_flags += ["-m", title] 150 | if body != None: 151 | merge_message_flags += ["-m", '\n'.join(textwrap.wrap(body))] 152 | 153 | authors = "\n".join(["Author: %s" % a for a in distinct_authors]) 154 | 155 | merge_message_flags += ["-m", authors] 156 | 157 | if had_conflicts: 158 | committer_name = run_cmd("git config --get user.name").strip() 159 | committer_email = run_cmd("git config --get user.email").strip() 160 | message = "This patch had conflicts when merged, resolved by\nCommitter: %s <%s>" % ( 161 | committer_name, committer_email) 162 | merge_message_flags += ["-m", message] 163 | 164 | # The string "Closes #%s" string is required for GitHub to correctly close the PR 165 | merge_message_flags += [ 166 | "-m", 167 | "Closes #%s from %s and squashes the following commits:" % (pr_num, pr_repo_desc)] 168 | for c in commits: 169 | merge_message_flags += ["-m", c] 170 | 171 | run_cmd(['git', 'commit', '--author="%s"' % primary_author] + 172 | merge_message_flags) 173 | 174 | continue_maybe("Merge complete (local ref %s). Push to %s?" % ( 175 | target_branch_name, PUSH_REMOTE_NAME)) 176 | 177 | try: 178 | run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, target_branch_name, 179 | target_ref)) 180 | except Exception as e: 181 | clean_up() 182 | fail("Exception while pushing: %s" % e) 183 | 184 | merge_hash = run_cmd("git rev-parse %s" % target_branch_name)[:8] 185 | clean_up() 186 | print("Pull request #%s merged!" % pr_num) 187 | print("Merge hash: %s" % merge_hash) 188 | return merge_hash 189 | 190 | 191 | def cherry_pick(pr_num, merge_hash, default_branch): 192 | pick_ref = raw_input("Enter a branch name [%s]: " % default_branch) 193 | if pick_ref == "": 194 | pick_ref = default_branch 195 | 196 | pick_branch_name = "%s_PICK_PR_%s_%s" % (BRANCH_PREFIX, pr_num, 197 | pick_ref.upper()) 198 | 199 | run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, pick_ref, 200 | pick_branch_name)) 201 | run_cmd("git checkout %s" % pick_branch_name) 202 | run_cmd("git cherry-pick -sx %s" % merge_hash) 203 | 204 | continue_maybe("Pick complete (local ref %s). Push to %s?" % ( 205 | pick_branch_name, PUSH_REMOTE_NAME)) 206 | 207 | try: 208 | run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, pick_branch_name, 209 | pick_ref)) 210 | except Exception as e: 211 | clean_up() 212 | fail("Exception while pushing: %s" % e) 213 | 214 | pick_hash = run_cmd("git rev-parse %s" % pick_branch_name)[:8] 215 | clean_up() 216 | 217 | print("Pull request #%s picked into %s!" % (pr_num, pick_ref)) 218 | print("Pick hash: %s" % pick_hash) 219 | return pick_ref 220 | 221 | 222 | def fix_version_from_branch(branch, versions): 223 | # Note: Assumes this is a sorted (newest->oldest) list of un-released 224 | # versions 225 | if branch == "master": 226 | return versions[0] 227 | else: 228 | branch_ver = branch.replace("branch-", "") 229 | return filter(lambda x: x.name.startswith(branch_ver), versions)[-1] 230 | 231 | 232 | branches = get_json("%s/branches" % GITHUB_API_BASE) 233 | branch_names = filter(lambda x: x.startswith("branch-"), 234 | [x['name'] for x in branches]) 235 | # Assumes branch names can be sorted lexicographically 236 | # latest_branch = sorted(branch_names, reverse=True)[0] 237 | 238 | pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ") 239 | pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) 240 | 241 | url = pr["url"] 242 | title = pr["title"] 243 | body = pr["body"] 244 | target_ref = pr["base"]["ref"] 245 | user_login = pr["user"]["login"] 246 | base_ref = pr["head"]["ref"] 247 | pr_repo_desc = "%s/%s" % (user_login, base_ref) 248 | 249 | if pr["merged"] is True: 250 | print("Pull request {0} has already been merged, assuming " 251 | "you want to backport".format(pr_num)) 252 | merge_commit_desc = run_cmd([ 253 | 'git', 'log', '--merges', '--first-parent', 254 | '--grep=pull request #%s' % pr_num, '--oneline']).split("\n")[0] 255 | if merge_commit_desc == "": 256 | fail("Couldn't find any merge commit for #{0}" 257 | ", you may need to update HEAD.".format(pr_num)) 258 | 259 | merge_hash = merge_commit_desc[:7] 260 | message = merge_commit_desc[8:] 261 | 262 | print("Found: %s" % message) 263 | maybe_cherry_pick(pr_num, merge_hash, latest_branch) 264 | sys.exit(0) 265 | 266 | if not bool(pr["mergeable"]): 267 | msg = ("Pull request {0} is not mergeable in its current form.\n" 268 | "Continue? (experts only!)".format(pr_num)) 269 | continue_maybe(msg) 270 | 271 | print ("\n=== Pull Request #%s ===" % pr_num) 272 | print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % ( 273 | title, pr_repo_desc, target_ref, url)) 274 | continue_maybe("Proceed with merging pull request #%s?" % pr_num) 275 | 276 | merged_refs = [target_ref] 277 | 278 | merge_hash = merge_pr(pr_num, target_ref) 279 | 280 | pick_prompt = "Would you like to pick %s into another branch?" % merge_hash 281 | while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y": 282 | merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, 283 | latest_branch)] 284 | -------------------------------------------------------------------------------- /impala/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | # setup.py also contains the version - the two should have the same value! 17 | __version__ = u'v0.21.0' 18 | -------------------------------------------------------------------------------- /impala/_thrift_gen/ErrorCodes/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/ExecStats/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/ExecStats/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | from .ttypes import * 14 | -------------------------------------------------------------------------------- /impala/_thrift_gen/ImpalaService/ImpalaHiveServer2Service-remote: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Autogenerated by Thrift Compiler (0.16.0) 4 | # 5 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | # 7 | # options string: py:new_style,no_utf8strings 8 | # 9 | 10 | import sys 11 | import pprint 12 | if sys.version_info[0] > 2: 13 | from urllib.parse import urlparse 14 | else: 15 | from urlparse import urlparse 16 | from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient 17 | from thrift.protocol.TBinaryProtocol import TBinaryProtocol 18 | 19 | from impala._thrift_gen.ImpalaService import ImpalaHiveServer2Service 20 | from impala._thrift_gen.ImpalaService.ttypes import * 21 | 22 | if len(sys.argv) <= 1 or sys.argv[1] == '--help': 23 | print('') 24 | print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') 25 | print('') 26 | print('Functions:') 27 | print(' TGetExecSummaryResp GetExecSummary(TGetExecSummaryReq req)') 28 | print(' TGetRuntimeProfileResp GetRuntimeProfile(TGetRuntimeProfileReq req)') 29 | print(' TPingImpalaHS2ServiceResp PingImpalaHS2Service(TPingImpalaHS2ServiceReq req)') 30 | print(' TCloseImpalaOperationResp CloseImpalaOperation(TCloseImpalaOperationReq req)') 31 | print(' TOpenSessionResp OpenSession(TOpenSessionReq req)') 32 | print(' TCloseSessionResp CloseSession(TCloseSessionReq req)') 33 | print(' TGetInfoResp GetInfo(TGetInfoReq req)') 34 | print(' TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req)') 35 | print(' TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req)') 36 | print(' TGetCatalogsResp GetCatalogs(TGetCatalogsReq req)') 37 | print(' TGetSchemasResp GetSchemas(TGetSchemasReq req)') 38 | print(' TGetTablesResp GetTables(TGetTablesReq req)') 39 | print(' TGetTableTypesResp GetTableTypes(TGetTableTypesReq req)') 40 | print(' TGetColumnsResp GetColumns(TGetColumnsReq req)') 41 | print(' TGetFunctionsResp GetFunctions(TGetFunctionsReq req)') 42 | print(' TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req)') 43 | print(' TCancelOperationResp CancelOperation(TCancelOperationReq req)') 44 | print(' TCloseOperationResp CloseOperation(TCloseOperationReq req)') 45 | print(' TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req)') 46 | print(' TFetchResultsResp FetchResults(TFetchResultsReq req)') 47 | print(' TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req)') 48 | print(' TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req)') 49 | print(' TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req)') 50 | print(' TGetLogResp GetLog(TGetLogReq req)') 51 | print('') 52 | sys.exit(0) 53 | 54 | pp = pprint.PrettyPrinter(indent=2) 55 | host = 'localhost' 56 | port = 9090 57 | uri = '' 58 | framed = False 59 | ssl = False 60 | validate = True 61 | ca_certs = None 62 | keyfile = None 63 | certfile = None 64 | http = False 65 | argi = 1 66 | 67 | if sys.argv[argi] == '-h': 68 | parts = sys.argv[argi + 1].split(':') 69 | host = parts[0] 70 | if len(parts) > 1: 71 | port = int(parts[1]) 72 | argi += 2 73 | 74 | if sys.argv[argi] == '-u': 75 | url = urlparse(sys.argv[argi + 1]) 76 | parts = url[1].split(':') 77 | host = parts[0] 78 | if len(parts) > 1: 79 | port = int(parts[1]) 80 | else: 81 | port = 80 82 | uri = url[2] 83 | if url[4]: 84 | uri += '?%s' % url[4] 85 | http = True 86 | argi += 2 87 | 88 | if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed': 89 | framed = True 90 | argi += 1 91 | 92 | if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl': 93 | ssl = True 94 | argi += 1 95 | 96 | if sys.argv[argi] == '-novalidate': 97 | validate = False 98 | argi += 1 99 | 100 | if sys.argv[argi] == '-ca_certs': 101 | ca_certs = sys.argv[argi+1] 102 | argi += 2 103 | 104 | if sys.argv[argi] == '-keyfile': 105 | keyfile = sys.argv[argi+1] 106 | argi += 2 107 | 108 | if sys.argv[argi] == '-certfile': 109 | certfile = sys.argv[argi+1] 110 | argi += 2 111 | 112 | cmd = sys.argv[argi] 113 | args = sys.argv[argi + 1:] 114 | 115 | if http: 116 | transport = THttpClient.THttpClient(host, port, uri) 117 | else: 118 | if ssl: 119 | socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile) 120 | else: 121 | socket = TSocket.TSocket(host, port) 122 | if framed: 123 | transport = TTransport.TFramedTransport(socket) 124 | else: 125 | transport = TTransport.TBufferedTransport(socket) 126 | protocol = TBinaryProtocol(transport) 127 | client = ImpalaHiveServer2Service.Client(protocol) 128 | transport.open() 129 | 130 | if cmd == 'GetExecSummary': 131 | if len(args) != 1: 132 | print('GetExecSummary requires 1 args') 133 | sys.exit(1) 134 | pp.pprint(client.GetExecSummary(eval(args[0]),)) 135 | 136 | elif cmd == 'GetRuntimeProfile': 137 | if len(args) != 1: 138 | print('GetRuntimeProfile requires 1 args') 139 | sys.exit(1) 140 | pp.pprint(client.GetRuntimeProfile(eval(args[0]),)) 141 | 142 | elif cmd == 'PingImpalaHS2Service': 143 | if len(args) != 1: 144 | print('PingImpalaHS2Service requires 1 args') 145 | sys.exit(1) 146 | pp.pprint(client.PingImpalaHS2Service(eval(args[0]),)) 147 | 148 | elif cmd == 'CloseImpalaOperation': 149 | if len(args) != 1: 150 | print('CloseImpalaOperation requires 1 args') 151 | sys.exit(1) 152 | pp.pprint(client.CloseImpalaOperation(eval(args[0]),)) 153 | 154 | elif cmd == 'OpenSession': 155 | if len(args) != 1: 156 | print('OpenSession requires 1 args') 157 | sys.exit(1) 158 | pp.pprint(client.OpenSession(eval(args[0]),)) 159 | 160 | elif cmd == 'CloseSession': 161 | if len(args) != 1: 162 | print('CloseSession requires 1 args') 163 | sys.exit(1) 164 | pp.pprint(client.CloseSession(eval(args[0]),)) 165 | 166 | elif cmd == 'GetInfo': 167 | if len(args) != 1: 168 | print('GetInfo requires 1 args') 169 | sys.exit(1) 170 | pp.pprint(client.GetInfo(eval(args[0]),)) 171 | 172 | elif cmd == 'ExecuteStatement': 173 | if len(args) != 1: 174 | print('ExecuteStatement requires 1 args') 175 | sys.exit(1) 176 | pp.pprint(client.ExecuteStatement(eval(args[0]),)) 177 | 178 | elif cmd == 'GetTypeInfo': 179 | if len(args) != 1: 180 | print('GetTypeInfo requires 1 args') 181 | sys.exit(1) 182 | pp.pprint(client.GetTypeInfo(eval(args[0]),)) 183 | 184 | elif cmd == 'GetCatalogs': 185 | if len(args) != 1: 186 | print('GetCatalogs requires 1 args') 187 | sys.exit(1) 188 | pp.pprint(client.GetCatalogs(eval(args[0]),)) 189 | 190 | elif cmd == 'GetSchemas': 191 | if len(args) != 1: 192 | print('GetSchemas requires 1 args') 193 | sys.exit(1) 194 | pp.pprint(client.GetSchemas(eval(args[0]),)) 195 | 196 | elif cmd == 'GetTables': 197 | if len(args) != 1: 198 | print('GetTables requires 1 args') 199 | sys.exit(1) 200 | pp.pprint(client.GetTables(eval(args[0]),)) 201 | 202 | elif cmd == 'GetTableTypes': 203 | if len(args) != 1: 204 | print('GetTableTypes requires 1 args') 205 | sys.exit(1) 206 | pp.pprint(client.GetTableTypes(eval(args[0]),)) 207 | 208 | elif cmd == 'GetColumns': 209 | if len(args) != 1: 210 | print('GetColumns requires 1 args') 211 | sys.exit(1) 212 | pp.pprint(client.GetColumns(eval(args[0]),)) 213 | 214 | elif cmd == 'GetFunctions': 215 | if len(args) != 1: 216 | print('GetFunctions requires 1 args') 217 | sys.exit(1) 218 | pp.pprint(client.GetFunctions(eval(args[0]),)) 219 | 220 | elif cmd == 'GetOperationStatus': 221 | if len(args) != 1: 222 | print('GetOperationStatus requires 1 args') 223 | sys.exit(1) 224 | pp.pprint(client.GetOperationStatus(eval(args[0]),)) 225 | 226 | elif cmd == 'CancelOperation': 227 | if len(args) != 1: 228 | print('CancelOperation requires 1 args') 229 | sys.exit(1) 230 | pp.pprint(client.CancelOperation(eval(args[0]),)) 231 | 232 | elif cmd == 'CloseOperation': 233 | if len(args) != 1: 234 | print('CloseOperation requires 1 args') 235 | sys.exit(1) 236 | pp.pprint(client.CloseOperation(eval(args[0]),)) 237 | 238 | elif cmd == 'GetResultSetMetadata': 239 | if len(args) != 1: 240 | print('GetResultSetMetadata requires 1 args') 241 | sys.exit(1) 242 | pp.pprint(client.GetResultSetMetadata(eval(args[0]),)) 243 | 244 | elif cmd == 'FetchResults': 245 | if len(args) != 1: 246 | print('FetchResults requires 1 args') 247 | sys.exit(1) 248 | pp.pprint(client.FetchResults(eval(args[0]),)) 249 | 250 | elif cmd == 'GetDelegationToken': 251 | if len(args) != 1: 252 | print('GetDelegationToken requires 1 args') 253 | sys.exit(1) 254 | pp.pprint(client.GetDelegationToken(eval(args[0]),)) 255 | 256 | elif cmd == 'CancelDelegationToken': 257 | if len(args) != 1: 258 | print('CancelDelegationToken requires 1 args') 259 | sys.exit(1) 260 | pp.pprint(client.CancelDelegationToken(eval(args[0]),)) 261 | 262 | elif cmd == 'RenewDelegationToken': 263 | if len(args) != 1: 264 | print('RenewDelegationToken requires 1 args') 265 | sys.exit(1) 266 | pp.pprint(client.RenewDelegationToken(eval(args[0]),)) 267 | 268 | elif cmd == 'GetLog': 269 | if len(args) != 1: 270 | print('GetLog requires 1 args') 271 | sys.exit(1) 272 | pp.pprint(client.GetLog(eval(args[0]),)) 273 | 274 | else: 275 | print('Unrecognized method %s' % cmd) 276 | sys.exit(1) 277 | 278 | transport.close() 279 | -------------------------------------------------------------------------------- /impala/_thrift_gen/ImpalaService/ImpalaService-remote: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Autogenerated by Thrift Compiler (0.16.0) 4 | # 5 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | # 7 | # options string: py:new_style,no_utf8strings 8 | # 9 | 10 | import sys 11 | import pprint 12 | if sys.version_info[0] > 2: 13 | from urllib.parse import urlparse 14 | else: 15 | from urlparse import urlparse 16 | from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient 17 | from thrift.protocol.TBinaryProtocol import TBinaryProtocol 18 | 19 | from impala._thrift_gen.ImpalaService import ImpalaService 20 | from impala._thrift_gen.ImpalaService.ttypes import * 21 | 22 | if len(sys.argv) <= 1 or sys.argv[1] == '--help': 23 | print('') 24 | print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') 25 | print('') 26 | print('Functions:') 27 | print(' TStatus Cancel(QueryHandle query_id)') 28 | print(' TStatus ResetCatalog()') 29 | print(' TStatus ResetTable(TResetTableReq request)') 30 | print(' string GetRuntimeProfile(QueryHandle query_id)') 31 | print(' TDmlResult CloseInsert(QueryHandle handle)') 32 | print(' TPingImpalaServiceResp PingImpalaService()') 33 | print(' TExecSummary GetExecSummary(QueryHandle handle)') 34 | print(' QueryHandle query(Query query)') 35 | print(' QueryHandle executeAndWait(Query query, LogContextId clientCtx)') 36 | print(' QueryExplanation explain(Query query)') 37 | print(' Results fetch(QueryHandle query_id, bool start_over, i32 fetch_size)') 38 | print(' QueryState get_state(QueryHandle handle)') 39 | print(' ResultsMetadata get_results_metadata(QueryHandle handle)') 40 | print(' string echo(string s)') 41 | print(' string dump_config()') 42 | print(' string get_log(LogContextId context)') 43 | print(' get_default_configuration(bool include_hadoop)') 44 | print(' void close(QueryHandle handle)') 45 | print(' void clean(LogContextId log_context)') 46 | print('') 47 | sys.exit(0) 48 | 49 | pp = pprint.PrettyPrinter(indent=2) 50 | host = 'localhost' 51 | port = 9090 52 | uri = '' 53 | framed = False 54 | ssl = False 55 | validate = True 56 | ca_certs = None 57 | keyfile = None 58 | certfile = None 59 | http = False 60 | argi = 1 61 | 62 | if sys.argv[argi] == '-h': 63 | parts = sys.argv[argi + 1].split(':') 64 | host = parts[0] 65 | if len(parts) > 1: 66 | port = int(parts[1]) 67 | argi += 2 68 | 69 | if sys.argv[argi] == '-u': 70 | url = urlparse(sys.argv[argi + 1]) 71 | parts = url[1].split(':') 72 | host = parts[0] 73 | if len(parts) > 1: 74 | port = int(parts[1]) 75 | else: 76 | port = 80 77 | uri = url[2] 78 | if url[4]: 79 | uri += '?%s' % url[4] 80 | http = True 81 | argi += 2 82 | 83 | if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed': 84 | framed = True 85 | argi += 1 86 | 87 | if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl': 88 | ssl = True 89 | argi += 1 90 | 91 | if sys.argv[argi] == '-novalidate': 92 | validate = False 93 | argi += 1 94 | 95 | if sys.argv[argi] == '-ca_certs': 96 | ca_certs = sys.argv[argi+1] 97 | argi += 2 98 | 99 | if sys.argv[argi] == '-keyfile': 100 | keyfile = sys.argv[argi+1] 101 | argi += 2 102 | 103 | if sys.argv[argi] == '-certfile': 104 | certfile = sys.argv[argi+1] 105 | argi += 2 106 | 107 | cmd = sys.argv[argi] 108 | args = sys.argv[argi + 1:] 109 | 110 | if http: 111 | transport = THttpClient.THttpClient(host, port, uri) 112 | else: 113 | if ssl: 114 | socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile) 115 | else: 116 | socket = TSocket.TSocket(host, port) 117 | if framed: 118 | transport = TTransport.TFramedTransport(socket) 119 | else: 120 | transport = TTransport.TBufferedTransport(socket) 121 | protocol = TBinaryProtocol(transport) 122 | client = ImpalaService.Client(protocol) 123 | transport.open() 124 | 125 | if cmd == 'Cancel': 126 | if len(args) != 1: 127 | print('Cancel requires 1 args') 128 | sys.exit(1) 129 | pp.pprint(client.Cancel(eval(args[0]),)) 130 | 131 | elif cmd == 'ResetCatalog': 132 | if len(args) != 0: 133 | print('ResetCatalog requires 0 args') 134 | sys.exit(1) 135 | pp.pprint(client.ResetCatalog()) 136 | 137 | elif cmd == 'ResetTable': 138 | if len(args) != 1: 139 | print('ResetTable requires 1 args') 140 | sys.exit(1) 141 | pp.pprint(client.ResetTable(eval(args[0]),)) 142 | 143 | elif cmd == 'GetRuntimeProfile': 144 | if len(args) != 1: 145 | print('GetRuntimeProfile requires 1 args') 146 | sys.exit(1) 147 | pp.pprint(client.GetRuntimeProfile(eval(args[0]),)) 148 | 149 | elif cmd == 'CloseInsert': 150 | if len(args) != 1: 151 | print('CloseInsert requires 1 args') 152 | sys.exit(1) 153 | pp.pprint(client.CloseInsert(eval(args[0]),)) 154 | 155 | elif cmd == 'PingImpalaService': 156 | if len(args) != 0: 157 | print('PingImpalaService requires 0 args') 158 | sys.exit(1) 159 | pp.pprint(client.PingImpalaService()) 160 | 161 | elif cmd == 'GetExecSummary': 162 | if len(args) != 1: 163 | print('GetExecSummary requires 1 args') 164 | sys.exit(1) 165 | pp.pprint(client.GetExecSummary(eval(args[0]),)) 166 | 167 | elif cmd == 'query': 168 | if len(args) != 1: 169 | print('query requires 1 args') 170 | sys.exit(1) 171 | pp.pprint(client.query(eval(args[0]),)) 172 | 173 | elif cmd == 'executeAndWait': 174 | if len(args) != 2: 175 | print('executeAndWait requires 2 args') 176 | sys.exit(1) 177 | pp.pprint(client.executeAndWait(eval(args[0]), eval(args[1]),)) 178 | 179 | elif cmd == 'explain': 180 | if len(args) != 1: 181 | print('explain requires 1 args') 182 | sys.exit(1) 183 | pp.pprint(client.explain(eval(args[0]),)) 184 | 185 | elif cmd == 'fetch': 186 | if len(args) != 3: 187 | print('fetch requires 3 args') 188 | sys.exit(1) 189 | pp.pprint(client.fetch(eval(args[0]), eval(args[1]), eval(args[2]),)) 190 | 191 | elif cmd == 'get_state': 192 | if len(args) != 1: 193 | print('get_state requires 1 args') 194 | sys.exit(1) 195 | pp.pprint(client.get_state(eval(args[0]),)) 196 | 197 | elif cmd == 'get_results_metadata': 198 | if len(args) != 1: 199 | print('get_results_metadata requires 1 args') 200 | sys.exit(1) 201 | pp.pprint(client.get_results_metadata(eval(args[0]),)) 202 | 203 | elif cmd == 'echo': 204 | if len(args) != 1: 205 | print('echo requires 1 args') 206 | sys.exit(1) 207 | pp.pprint(client.echo(args[0],)) 208 | 209 | elif cmd == 'dump_config': 210 | if len(args) != 0: 211 | print('dump_config requires 0 args') 212 | sys.exit(1) 213 | pp.pprint(client.dump_config()) 214 | 215 | elif cmd == 'get_log': 216 | if len(args) != 1: 217 | print('get_log requires 1 args') 218 | sys.exit(1) 219 | pp.pprint(client.get_log(eval(args[0]),)) 220 | 221 | elif cmd == 'get_default_configuration': 222 | if len(args) != 1: 223 | print('get_default_configuration requires 1 args') 224 | sys.exit(1) 225 | pp.pprint(client.get_default_configuration(eval(args[0]),)) 226 | 227 | elif cmd == 'close': 228 | if len(args) != 1: 229 | print('close requires 1 args') 230 | sys.exit(1) 231 | pp.pprint(client.close(eval(args[0]),)) 232 | 233 | elif cmd == 'clean': 234 | if len(args) != 1: 235 | print('clean requires 1 args') 236 | sys.exit(1) 237 | pp.pprint(client.clean(eval(args[0]),)) 238 | 239 | else: 240 | print('Unrecognized method %s' % cmd) 241 | sys.exit(1) 242 | 243 | transport.close() 244 | -------------------------------------------------------------------------------- /impala/_thrift_gen/ImpalaService/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants', 'ImpalaService', 'ImpalaHiveServer2Service'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/ImpalaService/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | from .ttypes import * 14 | -------------------------------------------------------------------------------- /impala/_thrift_gen/Metrics/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/Metrics/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | from .ttypes import * 14 | -------------------------------------------------------------------------------- /impala/_thrift_gen/Metrics/ttypes.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | 14 | from thrift.transport import TTransport 15 | all_structs = [] 16 | 17 | 18 | class TUnit(object): 19 | UNIT = 0 20 | UNIT_PER_SECOND = 1 21 | CPU_TICKS = 2 22 | BYTES = 3 23 | BYTES_PER_SECOND = 4 24 | TIME_NS = 5 25 | DOUBLE_VALUE = 6 26 | NONE = 7 27 | TIME_MS = 8 28 | TIME_S = 9 29 | TIME_US = 10 30 | BASIS_POINTS = 11 31 | 32 | _VALUES_TO_NAMES = { 33 | 0: "UNIT", 34 | 1: "UNIT_PER_SECOND", 35 | 2: "CPU_TICKS", 36 | 3: "BYTES", 37 | 4: "BYTES_PER_SECOND", 38 | 5: "TIME_NS", 39 | 6: "DOUBLE_VALUE", 40 | 7: "NONE", 41 | 8: "TIME_MS", 42 | 9: "TIME_S", 43 | 10: "TIME_US", 44 | 11: "BASIS_POINTS", 45 | } 46 | 47 | _NAMES_TO_VALUES = { 48 | "UNIT": 0, 49 | "UNIT_PER_SECOND": 1, 50 | "CPU_TICKS": 2, 51 | "BYTES": 3, 52 | "BYTES_PER_SECOND": 4, 53 | "TIME_NS": 5, 54 | "DOUBLE_VALUE": 6, 55 | "NONE": 7, 56 | "TIME_MS": 8, 57 | "TIME_S": 9, 58 | "TIME_US": 10, 59 | "BASIS_POINTS": 11, 60 | } 61 | 62 | 63 | class TMetricKind(object): 64 | GAUGE = 0 65 | COUNTER = 1 66 | PROPERTY = 2 67 | STATS = 3 68 | SET = 4 69 | HISTOGRAM = 5 70 | 71 | _VALUES_TO_NAMES = { 72 | 0: "GAUGE", 73 | 1: "COUNTER", 74 | 2: "PROPERTY", 75 | 3: "STATS", 76 | 4: "SET", 77 | 5: "HISTOGRAM", 78 | } 79 | 80 | _NAMES_TO_VALUES = { 81 | "GAUGE": 0, 82 | "COUNTER": 1, 83 | "PROPERTY": 2, 84 | "STATS": 3, 85 | "SET": 4, 86 | "HISTOGRAM": 5, 87 | } 88 | fix_spec(all_structs) 89 | del all_structs 90 | -------------------------------------------------------------------------------- /impala/_thrift_gen/RuntimeProfile/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/RuntimeProfile/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | from .ttypes import * 14 | -------------------------------------------------------------------------------- /impala/_thrift_gen/Status/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/Status/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | from .ttypes import * 14 | -------------------------------------------------------------------------------- /impala/_thrift_gen/Status/ttypes.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | import impala._thrift_gen.ErrorCodes.ttypes 14 | 15 | from thrift.transport import TTransport 16 | all_structs = [] 17 | 18 | 19 | class TStatus(object): 20 | """ 21 | Attributes: 22 | - status_code 23 | - error_msgs 24 | 25 | """ 26 | 27 | 28 | def __init__(self, status_code=None, error_msgs=None,): 29 | self.status_code = status_code 30 | self.error_msgs = error_msgs 31 | 32 | def read(self, iprot): 33 | if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: 34 | iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) 35 | return 36 | iprot.readStructBegin() 37 | while True: 38 | (fname, ftype, fid) = iprot.readFieldBegin() 39 | if ftype == TType.STOP: 40 | break 41 | if fid == 1: 42 | if ftype == TType.I32: 43 | self.status_code = iprot.readI32() 44 | else: 45 | iprot.skip(ftype) 46 | elif fid == 2: 47 | if ftype == TType.LIST: 48 | self.error_msgs = [] 49 | (_etype3, _size0) = iprot.readListBegin() 50 | for _i4 in range(_size0): 51 | _elem5 = iprot.readString() 52 | self.error_msgs.append(_elem5) 53 | iprot.readListEnd() 54 | else: 55 | iprot.skip(ftype) 56 | else: 57 | iprot.skip(ftype) 58 | iprot.readFieldEnd() 59 | iprot.readStructEnd() 60 | 61 | def write(self, oprot): 62 | if oprot._fast_encode is not None and self.thrift_spec is not None: 63 | oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) 64 | return 65 | oprot.writeStructBegin('TStatus') 66 | if self.status_code is not None: 67 | oprot.writeFieldBegin('status_code', TType.I32, 1) 68 | oprot.writeI32(self.status_code) 69 | oprot.writeFieldEnd() 70 | if self.error_msgs is not None: 71 | oprot.writeFieldBegin('error_msgs', TType.LIST, 2) 72 | oprot.writeListBegin(TType.STRING, len(self.error_msgs)) 73 | for iter6 in self.error_msgs: 74 | oprot.writeString(iter6) 75 | oprot.writeListEnd() 76 | oprot.writeFieldEnd() 77 | oprot.writeFieldStop() 78 | oprot.writeStructEnd() 79 | 80 | def validate(self): 81 | if self.status_code is None: 82 | raise TProtocolException(message='Required field status_code is unset!') 83 | return 84 | 85 | def __repr__(self): 86 | L = ['%s=%r' % (key, value) 87 | for key, value in self.__dict__.items()] 88 | return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) 89 | 90 | def __eq__(self, other): 91 | return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ 92 | 93 | def __ne__(self, other): 94 | return not (self == other) 95 | all_structs.append(TStatus) 96 | TStatus.thrift_spec = ( 97 | None, # 0 98 | (1, TType.I32, 'status_code', None, None, ), # 1 99 | (2, TType.LIST, 'error_msgs', (TType.STRING, None, False), None, ), # 2 100 | ) 101 | fix_spec(all_structs) 102 | del all_structs 103 | -------------------------------------------------------------------------------- /impala/_thrift_gen/TCLIService/TCLIService-remote: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Autogenerated by Thrift Compiler (0.16.0) 4 | # 5 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | # 7 | # options string: py:new_style,no_utf8strings 8 | # 9 | 10 | import sys 11 | import pprint 12 | if sys.version_info[0] > 2: 13 | from urllib.parse import urlparse 14 | else: 15 | from urlparse import urlparse 16 | from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient 17 | from thrift.protocol.TBinaryProtocol import TBinaryProtocol 18 | 19 | from impala._thrift_gen.TCLIService import TCLIService 20 | from impala._thrift_gen.TCLIService.ttypes import * 21 | 22 | if len(sys.argv) <= 1 or sys.argv[1] == '--help': 23 | print('') 24 | print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') 25 | print('') 26 | print('Functions:') 27 | print(' TOpenSessionResp OpenSession(TOpenSessionReq req)') 28 | print(' TCloseSessionResp CloseSession(TCloseSessionReq req)') 29 | print(' TGetInfoResp GetInfo(TGetInfoReq req)') 30 | print(' TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req)') 31 | print(' TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req)') 32 | print(' TGetCatalogsResp GetCatalogs(TGetCatalogsReq req)') 33 | print(' TGetSchemasResp GetSchemas(TGetSchemasReq req)') 34 | print(' TGetTablesResp GetTables(TGetTablesReq req)') 35 | print(' TGetTableTypesResp GetTableTypes(TGetTableTypesReq req)') 36 | print(' TGetColumnsResp GetColumns(TGetColumnsReq req)') 37 | print(' TGetFunctionsResp GetFunctions(TGetFunctionsReq req)') 38 | print(' TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req)') 39 | print(' TCancelOperationResp CancelOperation(TCancelOperationReq req)') 40 | print(' TCloseOperationResp CloseOperation(TCloseOperationReq req)') 41 | print(' TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req)') 42 | print(' TFetchResultsResp FetchResults(TFetchResultsReq req)') 43 | print(' TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req)') 44 | print(' TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req)') 45 | print(' TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req)') 46 | print(' TGetLogResp GetLog(TGetLogReq req)') 47 | print('') 48 | sys.exit(0) 49 | 50 | pp = pprint.PrettyPrinter(indent=2) 51 | host = 'localhost' 52 | port = 9090 53 | uri = '' 54 | framed = False 55 | ssl = False 56 | validate = True 57 | ca_certs = None 58 | keyfile = None 59 | certfile = None 60 | http = False 61 | argi = 1 62 | 63 | if sys.argv[argi] == '-h': 64 | parts = sys.argv[argi + 1].split(':') 65 | host = parts[0] 66 | if len(parts) > 1: 67 | port = int(parts[1]) 68 | argi += 2 69 | 70 | if sys.argv[argi] == '-u': 71 | url = urlparse(sys.argv[argi + 1]) 72 | parts = url[1].split(':') 73 | host = parts[0] 74 | if len(parts) > 1: 75 | port = int(parts[1]) 76 | else: 77 | port = 80 78 | uri = url[2] 79 | if url[4]: 80 | uri += '?%s' % url[4] 81 | http = True 82 | argi += 2 83 | 84 | if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed': 85 | framed = True 86 | argi += 1 87 | 88 | if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl': 89 | ssl = True 90 | argi += 1 91 | 92 | if sys.argv[argi] == '-novalidate': 93 | validate = False 94 | argi += 1 95 | 96 | if sys.argv[argi] == '-ca_certs': 97 | ca_certs = sys.argv[argi+1] 98 | argi += 2 99 | 100 | if sys.argv[argi] == '-keyfile': 101 | keyfile = sys.argv[argi+1] 102 | argi += 2 103 | 104 | if sys.argv[argi] == '-certfile': 105 | certfile = sys.argv[argi+1] 106 | argi += 2 107 | 108 | cmd = sys.argv[argi] 109 | args = sys.argv[argi + 1:] 110 | 111 | if http: 112 | transport = THttpClient.THttpClient(host, port, uri) 113 | else: 114 | if ssl: 115 | socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile) 116 | else: 117 | socket = TSocket.TSocket(host, port) 118 | if framed: 119 | transport = TTransport.TFramedTransport(socket) 120 | else: 121 | transport = TTransport.TBufferedTransport(socket) 122 | protocol = TBinaryProtocol(transport) 123 | client = TCLIService.Client(protocol) 124 | transport.open() 125 | 126 | if cmd == 'OpenSession': 127 | if len(args) != 1: 128 | print('OpenSession requires 1 args') 129 | sys.exit(1) 130 | pp.pprint(client.OpenSession(eval(args[0]),)) 131 | 132 | elif cmd == 'CloseSession': 133 | if len(args) != 1: 134 | print('CloseSession requires 1 args') 135 | sys.exit(1) 136 | pp.pprint(client.CloseSession(eval(args[0]),)) 137 | 138 | elif cmd == 'GetInfo': 139 | if len(args) != 1: 140 | print('GetInfo requires 1 args') 141 | sys.exit(1) 142 | pp.pprint(client.GetInfo(eval(args[0]),)) 143 | 144 | elif cmd == 'ExecuteStatement': 145 | if len(args) != 1: 146 | print('ExecuteStatement requires 1 args') 147 | sys.exit(1) 148 | pp.pprint(client.ExecuteStatement(eval(args[0]),)) 149 | 150 | elif cmd == 'GetTypeInfo': 151 | if len(args) != 1: 152 | print('GetTypeInfo requires 1 args') 153 | sys.exit(1) 154 | pp.pprint(client.GetTypeInfo(eval(args[0]),)) 155 | 156 | elif cmd == 'GetCatalogs': 157 | if len(args) != 1: 158 | print('GetCatalogs requires 1 args') 159 | sys.exit(1) 160 | pp.pprint(client.GetCatalogs(eval(args[0]),)) 161 | 162 | elif cmd == 'GetSchemas': 163 | if len(args) != 1: 164 | print('GetSchemas requires 1 args') 165 | sys.exit(1) 166 | pp.pprint(client.GetSchemas(eval(args[0]),)) 167 | 168 | elif cmd == 'GetTables': 169 | if len(args) != 1: 170 | print('GetTables requires 1 args') 171 | sys.exit(1) 172 | pp.pprint(client.GetTables(eval(args[0]),)) 173 | 174 | elif cmd == 'GetTableTypes': 175 | if len(args) != 1: 176 | print('GetTableTypes requires 1 args') 177 | sys.exit(1) 178 | pp.pprint(client.GetTableTypes(eval(args[0]),)) 179 | 180 | elif cmd == 'GetColumns': 181 | if len(args) != 1: 182 | print('GetColumns requires 1 args') 183 | sys.exit(1) 184 | pp.pprint(client.GetColumns(eval(args[0]),)) 185 | 186 | elif cmd == 'GetFunctions': 187 | if len(args) != 1: 188 | print('GetFunctions requires 1 args') 189 | sys.exit(1) 190 | pp.pprint(client.GetFunctions(eval(args[0]),)) 191 | 192 | elif cmd == 'GetOperationStatus': 193 | if len(args) != 1: 194 | print('GetOperationStatus requires 1 args') 195 | sys.exit(1) 196 | pp.pprint(client.GetOperationStatus(eval(args[0]),)) 197 | 198 | elif cmd == 'CancelOperation': 199 | if len(args) != 1: 200 | print('CancelOperation requires 1 args') 201 | sys.exit(1) 202 | pp.pprint(client.CancelOperation(eval(args[0]),)) 203 | 204 | elif cmd == 'CloseOperation': 205 | if len(args) != 1: 206 | print('CloseOperation requires 1 args') 207 | sys.exit(1) 208 | pp.pprint(client.CloseOperation(eval(args[0]),)) 209 | 210 | elif cmd == 'GetResultSetMetadata': 211 | if len(args) != 1: 212 | print('GetResultSetMetadata requires 1 args') 213 | sys.exit(1) 214 | pp.pprint(client.GetResultSetMetadata(eval(args[0]),)) 215 | 216 | elif cmd == 'FetchResults': 217 | if len(args) != 1: 218 | print('FetchResults requires 1 args') 219 | sys.exit(1) 220 | pp.pprint(client.FetchResults(eval(args[0]),)) 221 | 222 | elif cmd == 'GetDelegationToken': 223 | if len(args) != 1: 224 | print('GetDelegationToken requires 1 args') 225 | sys.exit(1) 226 | pp.pprint(client.GetDelegationToken(eval(args[0]),)) 227 | 228 | elif cmd == 'CancelDelegationToken': 229 | if len(args) != 1: 230 | print('CancelDelegationToken requires 1 args') 231 | sys.exit(1) 232 | pp.pprint(client.CancelDelegationToken(eval(args[0]),)) 233 | 234 | elif cmd == 'RenewDelegationToken': 235 | if len(args) != 1: 236 | print('RenewDelegationToken requires 1 args') 237 | sys.exit(1) 238 | pp.pprint(client.RenewDelegationToken(eval(args[0]),)) 239 | 240 | elif cmd == 'GetLog': 241 | if len(args) != 1: 242 | print('GetLog requires 1 args') 243 | sys.exit(1) 244 | pp.pprint(client.GetLog(eval(args[0]),)) 245 | 246 | else: 247 | print('Unrecognized method %s' % cmd) 248 | sys.exit(1) 249 | 250 | transport.close() 251 | -------------------------------------------------------------------------------- /impala/_thrift_gen/TCLIService/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants', 'TCLIService'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/TCLIService/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | from .ttypes import * 14 | PRIMITIVE_TYPES = set(( 15 | 0, 16 | 1, 17 | 2, 18 | 3, 19 | 4, 20 | 5, 21 | 6, 22 | 7, 23 | 8, 24 | 9, 25 | 15, 26 | 16, 27 | 17, 28 | 18, 29 | 19, 30 | )) 31 | COMPLEX_TYPES = set(( 32 | 10, 33 | 11, 34 | 12, 35 | 13, 36 | 14, 37 | )) 38 | COLLECTION_TYPES = set(( 39 | 10, 40 | 11, 41 | )) 42 | TYPE_NAMES = { 43 | 10: "ARRAY", 44 | 4: "BIGINT", 45 | 9: "BINARY", 46 | 0: "BOOLEAN", 47 | 19: "CHAR", 48 | 17: "DATE", 49 | 15: "DECIMAL", 50 | 6: "DOUBLE", 51 | 5: "FLOAT", 52 | 3: "INT", 53 | 11: "MAP", 54 | 16: "NULL", 55 | 2: "SMALLINT", 56 | 7: "STRING", 57 | 12: "STRUCT", 58 | 8: "TIMESTAMP", 59 | 1: "TINYINT", 60 | 13: "UNIONTYPE", 61 | 18: "VARCHAR", 62 | } 63 | CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength" 64 | PRECISION = "precision" 65 | SCALE = "scale" 66 | -------------------------------------------------------------------------------- /impala/_thrift_gen/Types/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/Types/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | from .ttypes import * 14 | -------------------------------------------------------------------------------- /impala/_thrift_gen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudera/impyla/c8bd02683ca05424905e7e0e5567d8f2c5ae55f3/impala/_thrift_gen/__init__.py -------------------------------------------------------------------------------- /impala/_thrift_gen/beeswax/BeeswaxService-remote: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Autogenerated by Thrift Compiler (0.16.0) 4 | # 5 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | # 7 | # options string: py:new_style,no_utf8strings 8 | # 9 | 10 | import sys 11 | import pprint 12 | if sys.version_info[0] > 2: 13 | from urllib.parse import urlparse 14 | else: 15 | from urlparse import urlparse 16 | from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient 17 | from thrift.protocol.TBinaryProtocol import TBinaryProtocol 18 | 19 | from impala._thrift_gen.beeswax import BeeswaxService 20 | from impala._thrift_gen.beeswax.ttypes import * 21 | 22 | if len(sys.argv) <= 1 or sys.argv[1] == '--help': 23 | print('') 24 | print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') 25 | print('') 26 | print('Functions:') 27 | print(' QueryHandle query(Query query)') 28 | print(' QueryHandle executeAndWait(Query query, LogContextId clientCtx)') 29 | print(' QueryExplanation explain(Query query)') 30 | print(' Results fetch(QueryHandle query_id, bool start_over, i32 fetch_size)') 31 | print(' QueryState get_state(QueryHandle handle)') 32 | print(' ResultsMetadata get_results_metadata(QueryHandle handle)') 33 | print(' string echo(string s)') 34 | print(' string dump_config()') 35 | print(' string get_log(LogContextId context)') 36 | print(' get_default_configuration(bool include_hadoop)') 37 | print(' void close(QueryHandle handle)') 38 | print(' void clean(LogContextId log_context)') 39 | print('') 40 | sys.exit(0) 41 | 42 | pp = pprint.PrettyPrinter(indent=2) 43 | host = 'localhost' 44 | port = 9090 45 | uri = '' 46 | framed = False 47 | ssl = False 48 | validate = True 49 | ca_certs = None 50 | keyfile = None 51 | certfile = None 52 | http = False 53 | argi = 1 54 | 55 | if sys.argv[argi] == '-h': 56 | parts = sys.argv[argi + 1].split(':') 57 | host = parts[0] 58 | if len(parts) > 1: 59 | port = int(parts[1]) 60 | argi += 2 61 | 62 | if sys.argv[argi] == '-u': 63 | url = urlparse(sys.argv[argi + 1]) 64 | parts = url[1].split(':') 65 | host = parts[0] 66 | if len(parts) > 1: 67 | port = int(parts[1]) 68 | else: 69 | port = 80 70 | uri = url[2] 71 | if url[4]: 72 | uri += '?%s' % url[4] 73 | http = True 74 | argi += 2 75 | 76 | if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed': 77 | framed = True 78 | argi += 1 79 | 80 | if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl': 81 | ssl = True 82 | argi += 1 83 | 84 | if sys.argv[argi] == '-novalidate': 85 | validate = False 86 | argi += 1 87 | 88 | if sys.argv[argi] == '-ca_certs': 89 | ca_certs = sys.argv[argi+1] 90 | argi += 2 91 | 92 | if sys.argv[argi] == '-keyfile': 93 | keyfile = sys.argv[argi+1] 94 | argi += 2 95 | 96 | if sys.argv[argi] == '-certfile': 97 | certfile = sys.argv[argi+1] 98 | argi += 2 99 | 100 | cmd = sys.argv[argi] 101 | args = sys.argv[argi + 1:] 102 | 103 | if http: 104 | transport = THttpClient.THttpClient(host, port, uri) 105 | else: 106 | if ssl: 107 | socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile) 108 | else: 109 | socket = TSocket.TSocket(host, port) 110 | if framed: 111 | transport = TTransport.TFramedTransport(socket) 112 | else: 113 | transport = TTransport.TBufferedTransport(socket) 114 | protocol = TBinaryProtocol(transport) 115 | client = BeeswaxService.Client(protocol) 116 | transport.open() 117 | 118 | if cmd == 'query': 119 | if len(args) != 1: 120 | print('query requires 1 args') 121 | sys.exit(1) 122 | pp.pprint(client.query(eval(args[0]),)) 123 | 124 | elif cmd == 'executeAndWait': 125 | if len(args) != 2: 126 | print('executeAndWait requires 2 args') 127 | sys.exit(1) 128 | pp.pprint(client.executeAndWait(eval(args[0]), eval(args[1]),)) 129 | 130 | elif cmd == 'explain': 131 | if len(args) != 1: 132 | print('explain requires 1 args') 133 | sys.exit(1) 134 | pp.pprint(client.explain(eval(args[0]),)) 135 | 136 | elif cmd == 'fetch': 137 | if len(args) != 3: 138 | print('fetch requires 3 args') 139 | sys.exit(1) 140 | pp.pprint(client.fetch(eval(args[0]), eval(args[1]), eval(args[2]),)) 141 | 142 | elif cmd == 'get_state': 143 | if len(args) != 1: 144 | print('get_state requires 1 args') 145 | sys.exit(1) 146 | pp.pprint(client.get_state(eval(args[0]),)) 147 | 148 | elif cmd == 'get_results_metadata': 149 | if len(args) != 1: 150 | print('get_results_metadata requires 1 args') 151 | sys.exit(1) 152 | pp.pprint(client.get_results_metadata(eval(args[0]),)) 153 | 154 | elif cmd == 'echo': 155 | if len(args) != 1: 156 | print('echo requires 1 args') 157 | sys.exit(1) 158 | pp.pprint(client.echo(args[0],)) 159 | 160 | elif cmd == 'dump_config': 161 | if len(args) != 0: 162 | print('dump_config requires 0 args') 163 | sys.exit(1) 164 | pp.pprint(client.dump_config()) 165 | 166 | elif cmd == 'get_log': 167 | if len(args) != 1: 168 | print('get_log requires 1 args') 169 | sys.exit(1) 170 | pp.pprint(client.get_log(eval(args[0]),)) 171 | 172 | elif cmd == 'get_default_configuration': 173 | if len(args) != 1: 174 | print('get_default_configuration requires 1 args') 175 | sys.exit(1) 176 | pp.pprint(client.get_default_configuration(eval(args[0]),)) 177 | 178 | elif cmd == 'close': 179 | if len(args) != 1: 180 | print('close requires 1 args') 181 | sys.exit(1) 182 | pp.pprint(client.close(eval(args[0]),)) 183 | 184 | elif cmd == 'clean': 185 | if len(args) != 1: 186 | print('clean requires 1 args') 187 | sys.exit(1) 188 | pp.pprint(client.clean(eval(args[0]),)) 189 | 190 | else: 191 | print('Unrecognized method %s' % cmd) 192 | sys.exit(1) 193 | 194 | transport.close() 195 | -------------------------------------------------------------------------------- /impala/_thrift_gen/beeswax/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants', 'BeeswaxService'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/beeswax/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | from .ttypes import * 14 | -------------------------------------------------------------------------------- /impala/_thrift_gen/fb303/FacebookService-remote: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Autogenerated by Thrift Compiler (0.16.0) 4 | # 5 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 6 | # 7 | # options string: py:new_style,no_utf8strings 8 | # 9 | 10 | import sys 11 | import pprint 12 | if sys.version_info[0] > 2: 13 | from urllib.parse import urlparse 14 | else: 15 | from urlparse import urlparse 16 | from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient 17 | from thrift.protocol.TBinaryProtocol import TBinaryProtocol 18 | 19 | from impala._thrift_gen.fb303 import FacebookService 20 | from impala._thrift_gen.fb303.ttypes import * 21 | 22 | if len(sys.argv) <= 1 or sys.argv[1] == '--help': 23 | print('') 24 | print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') 25 | print('') 26 | print('Functions:') 27 | print(' string getName()') 28 | print(' string getVersion()') 29 | print(' fb_status getStatus()') 30 | print(' string getStatusDetails()') 31 | print(' getCounters()') 32 | print(' i64 getCounter(string key)') 33 | print(' void setOption(string key, string value)') 34 | print(' string getOption(string key)') 35 | print(' getOptions()') 36 | print(' string getCpuProfile(i32 profileDurationInSec)') 37 | print(' i64 aliveSince()') 38 | print(' void reinitialize()') 39 | print(' void shutdown()') 40 | print('') 41 | sys.exit(0) 42 | 43 | pp = pprint.PrettyPrinter(indent=2) 44 | host = 'localhost' 45 | port = 9090 46 | uri = '' 47 | framed = False 48 | ssl = False 49 | validate = True 50 | ca_certs = None 51 | keyfile = None 52 | certfile = None 53 | http = False 54 | argi = 1 55 | 56 | if sys.argv[argi] == '-h': 57 | parts = sys.argv[argi + 1].split(':') 58 | host = parts[0] 59 | if len(parts) > 1: 60 | port = int(parts[1]) 61 | argi += 2 62 | 63 | if sys.argv[argi] == '-u': 64 | url = urlparse(sys.argv[argi + 1]) 65 | parts = url[1].split(':') 66 | host = parts[0] 67 | if len(parts) > 1: 68 | port = int(parts[1]) 69 | else: 70 | port = 80 71 | uri = url[2] 72 | if url[4]: 73 | uri += '?%s' % url[4] 74 | http = True 75 | argi += 2 76 | 77 | if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed': 78 | framed = True 79 | argi += 1 80 | 81 | if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl': 82 | ssl = True 83 | argi += 1 84 | 85 | if sys.argv[argi] == '-novalidate': 86 | validate = False 87 | argi += 1 88 | 89 | if sys.argv[argi] == '-ca_certs': 90 | ca_certs = sys.argv[argi+1] 91 | argi += 2 92 | 93 | if sys.argv[argi] == '-keyfile': 94 | keyfile = sys.argv[argi+1] 95 | argi += 2 96 | 97 | if sys.argv[argi] == '-certfile': 98 | certfile = sys.argv[argi+1] 99 | argi += 2 100 | 101 | cmd = sys.argv[argi] 102 | args = sys.argv[argi + 1:] 103 | 104 | if http: 105 | transport = THttpClient.THttpClient(host, port, uri) 106 | else: 107 | if ssl: 108 | socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile) 109 | else: 110 | socket = TSocket.TSocket(host, port) 111 | if framed: 112 | transport = TTransport.TFramedTransport(socket) 113 | else: 114 | transport = TTransport.TBufferedTransport(socket) 115 | protocol = TBinaryProtocol(transport) 116 | client = FacebookService.Client(protocol) 117 | transport.open() 118 | 119 | if cmd == 'getName': 120 | if len(args) != 0: 121 | print('getName requires 0 args') 122 | sys.exit(1) 123 | pp.pprint(client.getName()) 124 | 125 | elif cmd == 'getVersion': 126 | if len(args) != 0: 127 | print('getVersion requires 0 args') 128 | sys.exit(1) 129 | pp.pprint(client.getVersion()) 130 | 131 | elif cmd == 'getStatus': 132 | if len(args) != 0: 133 | print('getStatus requires 0 args') 134 | sys.exit(1) 135 | pp.pprint(client.getStatus()) 136 | 137 | elif cmd == 'getStatusDetails': 138 | if len(args) != 0: 139 | print('getStatusDetails requires 0 args') 140 | sys.exit(1) 141 | pp.pprint(client.getStatusDetails()) 142 | 143 | elif cmd == 'getCounters': 144 | if len(args) != 0: 145 | print('getCounters requires 0 args') 146 | sys.exit(1) 147 | pp.pprint(client.getCounters()) 148 | 149 | elif cmd == 'getCounter': 150 | if len(args) != 1: 151 | print('getCounter requires 1 args') 152 | sys.exit(1) 153 | pp.pprint(client.getCounter(args[0],)) 154 | 155 | elif cmd == 'setOption': 156 | if len(args) != 2: 157 | print('setOption requires 2 args') 158 | sys.exit(1) 159 | pp.pprint(client.setOption(args[0], args[1],)) 160 | 161 | elif cmd == 'getOption': 162 | if len(args) != 1: 163 | print('getOption requires 1 args') 164 | sys.exit(1) 165 | pp.pprint(client.getOption(args[0],)) 166 | 167 | elif cmd == 'getOptions': 168 | if len(args) != 0: 169 | print('getOptions requires 0 args') 170 | sys.exit(1) 171 | pp.pprint(client.getOptions()) 172 | 173 | elif cmd == 'getCpuProfile': 174 | if len(args) != 1: 175 | print('getCpuProfile requires 1 args') 176 | sys.exit(1) 177 | pp.pprint(client.getCpuProfile(eval(args[0]),)) 178 | 179 | elif cmd == 'aliveSince': 180 | if len(args) != 0: 181 | print('aliveSince requires 0 args') 182 | sys.exit(1) 183 | pp.pprint(client.aliveSince()) 184 | 185 | elif cmd == 'reinitialize': 186 | if len(args) != 0: 187 | print('reinitialize requires 0 args') 188 | sys.exit(1) 189 | pp.pprint(client.reinitialize()) 190 | 191 | elif cmd == 'shutdown': 192 | if len(args) != 0: 193 | print('shutdown requires 0 args') 194 | sys.exit(1) 195 | pp.pprint(client.shutdown()) 196 | 197 | else: 198 | print('Unrecognized method %s' % cmd) 199 | sys.exit(1) 200 | 201 | transport.close() 202 | -------------------------------------------------------------------------------- /impala/_thrift_gen/fb303/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants', 'FacebookService'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/fb303/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | from .ttypes import * 14 | -------------------------------------------------------------------------------- /impala/_thrift_gen/fb303/ttypes.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | 14 | from thrift.transport import TTransport 15 | all_structs = [] 16 | 17 | 18 | class fb_status(object): 19 | """ 20 | Common status reporting mechanism across all services 21 | 22 | """ 23 | DEAD = 0 24 | STARTING = 1 25 | ALIVE = 2 26 | STOPPING = 3 27 | STOPPED = 4 28 | WARNING = 5 29 | 30 | _VALUES_TO_NAMES = { 31 | 0: "DEAD", 32 | 1: "STARTING", 33 | 2: "ALIVE", 34 | 3: "STOPPING", 35 | 4: "STOPPED", 36 | 5: "WARNING", 37 | } 38 | 39 | _NAMES_TO_VALUES = { 40 | "DEAD": 0, 41 | "STARTING": 1, 42 | "ALIVE": 2, 43 | "STOPPING": 3, 44 | "STOPPED": 4, 45 | "WARNING": 5, 46 | } 47 | fix_spec(all_structs) 48 | del all_structs 49 | -------------------------------------------------------------------------------- /impala/_thrift_gen/hive_metastore/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['ttypes', 'constants', 'ThriftHiveMetastore'] 2 | -------------------------------------------------------------------------------- /impala/_thrift_gen/hive_metastore/constants.py: -------------------------------------------------------------------------------- 1 | # 2 | # Autogenerated by Thrift Compiler (0.16.0) 3 | # 4 | # DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING 5 | # 6 | # options string: py:new_style,no_utf8strings 7 | # 8 | 9 | from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException 10 | from thrift.protocol.TProtocol import TProtocolException 11 | from thrift.TRecursive import fix_spec 12 | 13 | from .ttypes import * 14 | DDL_TIME = "transient_lastDdlTime" 15 | HIVE_FILTER_FIELD_OWNER = "hive_filter_field_owner__" 16 | HIVE_FILTER_FIELD_PARAMS = "hive_filter_field_params__" 17 | HIVE_FILTER_FIELD_LAST_ACCESS = "hive_filter_field_last_access__" 18 | IS_ARCHIVED = "is_archived" 19 | ORIGINAL_LOCATION = "original_location" 20 | IS_IMMUTABLE = "immutable" 21 | META_TABLE_COLUMNS = "columns" 22 | META_TABLE_COLUMN_TYPES = "columns.types" 23 | BUCKET_FIELD_NAME = "bucket_field_name" 24 | BUCKET_COUNT = "bucket_count" 25 | FIELD_TO_DIMENSION = "field_to_dimension" 26 | META_TABLE_NAME = "name" 27 | META_TABLE_DB = "db" 28 | META_TABLE_LOCATION = "location" 29 | META_TABLE_SERDE = "serde" 30 | META_TABLE_PARTITION_COLUMNS = "partition_columns" 31 | META_TABLE_PARTITION_COLUMN_TYPES = "partition_columns.types" 32 | FILE_INPUT_FORMAT = "file.inputformat" 33 | FILE_OUTPUT_FORMAT = "file.outputformat" 34 | META_TABLE_STORAGE = "storage_handler" 35 | TABLE_IS_TRANSACTIONAL = "transactional" 36 | TABLE_NO_AUTO_COMPACT = "no_auto_compaction" 37 | -------------------------------------------------------------------------------- /impala/compat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # pylint: disable=unused-import,wrong-import-position 16 | 17 | from __future__ import absolute_import 18 | 19 | import six 20 | 21 | if six.PY3: 22 | def lzip(*x): 23 | return list(zip(*x)) 24 | 25 | from decimal import Decimal 26 | elif six.PY2: 27 | lzip = zip 28 | 29 | try: 30 | from cdecimal import Decimal 31 | except ImportError: 32 | from decimal import Decimal # noqa 33 | 34 | try: 35 | _xrange = xrange 36 | except NameError: 37 | _xrange = range # python3 compatibilty 38 | -------------------------------------------------------------------------------- /impala/dbapi.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Implements the Python DB API 2.0 (PEP 249) for Impala""" 16 | 17 | from __future__ import absolute_import 18 | 19 | import six 20 | import time 21 | import datetime 22 | 23 | from impala.error import ( # noqa 24 | Error, Warning, InterfaceError, DatabaseError, InternalError, 25 | OperationalError, ProgrammingError, IntegrityError, DataError, 26 | NotSupportedError) 27 | from impala.util import ( 28 | warn_deprecate, warn_protocol_param, warn_nontls_jwt) 29 | import impala.hiveserver2 as hs2 30 | 31 | 32 | AUTH_MECHANISMS = ['NOSASL', 'PLAIN', 'GSSAPI', 'LDAP', 'JWT'] 33 | 34 | 35 | # PEP 249 module globals 36 | apilevel = '2.0' 37 | threadsafety = 1 # Threads may share the module, but not connections 38 | paramstyle = 'pyformat' 39 | 40 | 41 | def connect(host='localhost', port=21050, database=None, timeout=None, 42 | use_ssl=False, ca_cert=None, auth_mechanism='NOSASL', user=None, 43 | password=None, kerberos_service_name='impala', use_ldap=None, 44 | ldap_user=None, ldap_password=None, use_kerberos=None, 45 | protocol=None, krb_host=None, use_http_transport=False, 46 | http_path='', auth_cookie_names=None, http_cookie_names=None, 47 | retries=3, jwt=None, user_agent=None, 48 | get_user_custom_headers_func=None): 49 | """Get a connection to HiveServer2 (HS2). 50 | 51 | These options are largely compatible with the impala-shell command line 52 | arguments. See those docs for more information. 53 | 54 | Parameters 55 | ---------- 56 | host : str 57 | The hostname for HS2. For Impala, this can be any of the `impalad`s. 58 | port : int, optional 59 | The port number for HS2. The Impala default is 21050. The Hive port is 60 | likely different. 61 | database : str, optional 62 | The default database. If `None`, the result is 63 | implementation-dependent. 64 | timeout : int, optional 65 | Connection timeout in seconds. Default is no timeout. 66 | use_ssl : bool, optional 67 | Enable SSL. 68 | ca_cert : str, optional 69 | Local path to the the third-party CA certificate. If SSL is enabled but 70 | the certificate is not specified, the server certificate will not be 71 | validated. 72 | auth_mechanism : {'NOSASL', 'PLAIN', 'GSSAPI', 'LDAP', 'JWT'} 73 | Specify the authentication mechanism. `'NOSASL'` for unsecured Impala. 74 | `'PLAIN'` for unsecured Hive (because Hive requires the SASL 75 | transport). `'GSSAPI'` for Kerberos and `'LDAP'` for Kerberos with 76 | LDAP. `'JWT'` requires providing a JSON Web Token via the jwt parameter 77 | and only works with use_http_transport=True. 78 | user : str, optional 79 | LDAP user, if applicable. 80 | password : str, optional 81 | LDAP password, if applicable. 82 | kerberos_service_name : str, optional 83 | Authenticate to a particular `impalad` service principal. Uses 84 | `'impala'` by default. 85 | use_http_transport: bool optional 86 | Set it to True to use http transport of False to use binary transport. 87 | http_path: str, optional 88 | Specify the path in the http URL. Used only when `use_http_transport` is True. 89 | http_cookie_names: list of str or str, optional 90 | Specify the list of possible names for the cookies used for cookie-based 91 | authentication or session management. If the list of names contains one cookie 92 | name only, a str value can be specified instead of a list. 93 | If a cookie with one of these names is returned in an http response by the server 94 | or an intermediate proxy then it will be included in each subsequent request for 95 | the same connection. If set to wildcard ('*'), all cookies in an http response 96 | will be preserved. By default 'http_cookie_names' is set to '*'. 97 | Used only when `use_http_transport` is True. 98 | The names of authentication cookies are expected to end with ".auth" string, for 99 | example, "impala.auth" for Impala authentication cookies. 100 | If 'http_cookie_names' is explicitly set to a not None empty value ([], or ''), 101 | Impyla won't attempt to do cookie based authentication or session management. 102 | Currently cookie retention is supported for GSSAPI/LDAP/SASL/NOSASL/JWT over http. 103 | jwt: string containing a JSON Web Token 104 | This is used for auth_mechanism=JWT when using the HTTP transport. 105 | user_agent: A user specified user agent when HTTP transport is used. If none is specified, 106 | 'Python/ImpylaHttpClient' is used 107 | use_ldap : bool, optional 108 | Specify `auth_mechanism='LDAP'` instead. 109 | get_user_custom_headers_func : function, optional 110 | Used to add custom headers to the http messages when using hs2-http protocol. 111 | This is a function returning a list of tuples, each tuple contains a key-value 112 | pair. This allows duplicate headers to be set. 113 | 114 | .. deprecated:: 0.18.0 115 | auth_cookie_names : list of str or str, optional 116 | Use `http_cookie_names` parameter instead. 117 | 118 | .. deprecated:: 0.18.0 119 | 120 | .. deprecated:: 0.11.0 121 | ldap_user : str, optional 122 | Use `user` parameter instead. 123 | 124 | .. deprecated:: 0.11.0 125 | ldap_password : str, optional 126 | Use `password` parameter instead. 127 | 128 | .. deprecated:: 0.11.0 129 | use_kerberos : bool, optional 130 | Specify `auth_mechanism='GSSAPI'` instead. 131 | 132 | .. deprecated:: 0.11.0 133 | protocol : str, optional 134 | Do not use. HiveServer2 is the only protocol currently supported. 135 | 136 | .. deprecated:: 0.11.0 137 | 138 | 139 | Returns 140 | ------- 141 | HiveServer2Connection 142 | A `Connection` object (DB API 2.0-compliant). 143 | """ 144 | # pylint: disable=too-many-locals 145 | if use_kerberos is not None: 146 | warn_deprecate('use_kerberos', 'auth_mechanism="GSSAPI"') 147 | if use_kerberos: 148 | auth_mechanism = 'GSSAPI' 149 | 150 | if use_ldap is not None: 151 | warn_deprecate('use_ldap', 'auth_mechanism="LDAP"') 152 | if use_ldap: 153 | auth_mechanism = 'LDAP' 154 | 155 | if auth_mechanism: 156 | auth_mechanism = auth_mechanism.upper() 157 | else: 158 | auth_mechanism = 'NOSASL' 159 | 160 | if auth_mechanism not in AUTH_MECHANISMS: 161 | raise NotSupportedError( 162 | 'Unsupported authentication mechanism: {0}'.format(auth_mechanism)) 163 | 164 | if auth_mechanism == 'JWT': 165 | if jwt is None: 166 | raise NotSupportedError("JWT authentication requires specifying the 'jwt' argument") 167 | if not use_http_transport: 168 | raise NotSupportedError('JWT authentication is only supported for HTTP transport') 169 | if not use_ssl: 170 | warn_nontls_jwt() 171 | if user is not None or ldap_user is not None: 172 | raise NotSupportedError("'user' argument cannot be specified with '{0}' authentication".format(auth_mechanism)) 173 | if password is not None or ldap_password is not None: 174 | raise NotSupportedError("'password' argument cannot be specified with '{0}' authentication".format(auth_mechanism)) 175 | else: 176 | if jwt is not None: 177 | raise NotSupportedError("'jwt' argument cannot be specified with '{0}' authentication".format(auth_mechanism)) 178 | 179 | if ldap_user is not None: 180 | warn_deprecate('ldap_user', 'user') 181 | user = ldap_user 182 | 183 | if ldap_password is not None: 184 | warn_deprecate('ldap_password', 'password') 185 | password = ldap_password 186 | 187 | if protocol is not None: 188 | if protocol.lower() == 'hiveserver2': 189 | warn_protocol_param() 190 | else: 191 | raise NotSupportedError( 192 | "'{0}' is not a supported protocol; only HiveServer2 is " 193 | "supported".format(protocol)) 194 | 195 | if auth_cookie_names is not None and http_cookie_names is None: 196 | warn_deprecate('auth_cookie_names', 'http_cookie_names') 197 | http_cookie_names = auth_cookie_names 198 | elif http_cookie_names is None: 199 | # Preserve all cookies. 200 | http_cookie_names = '*' 201 | 202 | service = hs2.connect(host=host, port=port, 203 | timeout=timeout, use_ssl=use_ssl, 204 | ca_cert=ca_cert, user=user, password=password, 205 | kerberos_service_name=kerberos_service_name, 206 | auth_mechanism=auth_mechanism, krb_host=krb_host, 207 | use_http_transport=use_http_transport, 208 | http_path=http_path, 209 | http_cookie_names=http_cookie_names, 210 | retries=retries, 211 | jwt=jwt, user_agent=user_agent, 212 | get_user_custom_headers_func=get_user_custom_headers_func) 213 | return hs2.HiveServer2Connection(service, default_db=database) 214 | 215 | 216 | class _DBAPITypeObject(object): 217 | # Compliance with Type Objects of PEP 249. 218 | 219 | def __init__(self, *values): 220 | self.values = values 221 | 222 | def __cmp__(self, other): 223 | if other in self.values: 224 | return 0 225 | else: 226 | return -1 227 | 228 | def __eq__(self, other): 229 | # py3 ignores __cmp__ 230 | return other in self.values 231 | 232 | 233 | STRING = _DBAPITypeObject('STRING') 234 | BINARY = _DBAPITypeObject('BINARY') 235 | NUMBER = _DBAPITypeObject('BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 236 | 'FLOAT', 'DOUBLE', 'DECIMAL') 237 | DATETIME = _DBAPITypeObject('TIMESTAMP') 238 | DATE = _DBAPITypeObject('DATE') 239 | ROWID = _DBAPITypeObject() 240 | 241 | Date = datetime.date 242 | Time = datetime.time 243 | Timestamp = datetime.datetime 244 | 245 | 246 | def DateFromTicks(ticks): 247 | return Date(*time.localtime(ticks)[:3]) 248 | 249 | 250 | def TimeFromTicks(ticks): 251 | return Time(*time.localtime(ticks)[3:6]) 252 | 253 | 254 | def TimestampFromTicks(ticks): 255 | return Timestamp(*time.localtime(ticks)[:6]) 256 | 257 | if six.PY3: 258 | buffer = memoryview 259 | Binary = buffer 260 | -------------------------------------------------------------------------------- /impala/error.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Impala exception classes. Also implements PEP 249.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | 20 | class Error(Exception): 21 | pass 22 | 23 | 24 | class Warning(Exception): 25 | pass 26 | 27 | 28 | # DB API (PEP 249) exceptions 29 | 30 | class InterfaceError(Error): 31 | pass 32 | 33 | 34 | class DatabaseError(Error): 35 | pass 36 | 37 | 38 | class InternalError(DatabaseError): 39 | pass 40 | 41 | 42 | class OperationalError(DatabaseError): 43 | pass 44 | 45 | 46 | class ProgrammingError(DatabaseError): 47 | pass 48 | 49 | 50 | class IntegrityError(DatabaseError): 51 | pass 52 | 53 | 54 | class DataError(DatabaseError): 55 | pass 56 | 57 | 58 | class NotSupportedError(DatabaseError): 59 | pass 60 | 61 | 62 | # RPC errors 63 | 64 | class RPCError(Error): 65 | pass 66 | 67 | 68 | class HiveServer2Error(RPCError): 69 | pass 70 | 71 | class HttpError(RPCError): 72 | """An error containing an http response code""" 73 | def __init__(self, code, message, body, http_headers): 74 | self.code = code 75 | self.message = message 76 | self.body = body 77 | self.http_headers = http_headers 78 | 79 | def __str__(self): 80 | # Don't try to print the body as we don't know what format it is. 81 | return "HTTP code {}: {}".format(self.code, self.message) 82 | 83 | 84 | class BeeswaxError(RPCError): 85 | pass 86 | 87 | 88 | class QueryStateError(BeeswaxError): 89 | pass 90 | 91 | 92 | class DisconnectedError(BeeswaxError): 93 | pass 94 | -------------------------------------------------------------------------------- /impala/interface.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | 17 | import datetime 18 | import re 19 | import six 20 | from six import reraise 21 | 22 | from impala.util import _escape 23 | from impala.error import ( # pylint: disable=unused-import 24 | Error, Warning, InterfaceError, DatabaseError, InternalError, 25 | OperationalError, ProgrammingError, IntegrityError, DataError, 26 | NotSupportedError) 27 | 28 | 29 | class Connection(object): 30 | # PEP 249 31 | # Connection objects are associated with a TCLIService.Client thrift 32 | # service 33 | # it's instantiated with an alive TCLIService.Client 34 | 35 | def close(self): 36 | # PEP 249 37 | raise NotImplementedError 38 | 39 | def commit(self): 40 | # PEP 249 41 | raise NotImplementedError 42 | 43 | def rollback(self): 44 | # PEP 249 45 | raise NotImplementedError 46 | 47 | def cursor(self, user=None, configuration=None, convert_types=True): 48 | # PEP 249 49 | raise NotImplementedError 50 | 51 | def reconnect(self): 52 | raise NotImplementedError 53 | 54 | def kerberized(self): 55 | # pylint: disable=protected-access 56 | # returns bool whether underlying service is kerberized or not 57 | from thrift_sasl import TSaslClientTransport 58 | if isinstance(self.service._iprot.trans, TSaslClientTransport): 59 | if self.service._iprot.trans.mechanism == 'GSSAPI': 60 | return True 61 | return False 62 | 63 | def __enter__(self): 64 | return self 65 | 66 | def __exit__(self, exc_type, exc_val, exc_tb): 67 | self.close() 68 | if exc_type is not None: 69 | reraise(exc_type, exc_val, exc_tb) 70 | 71 | # optional DB API addition to make the errors attributes of Connection 72 | Error = Error 73 | Warning = Warning 74 | InterfaceError = InterfaceError 75 | DatabaseError = DatabaseError 76 | InternalError = InternalError 77 | OperationalError = OperationalError 78 | ProgrammingError = ProgrammingError 79 | IntegrityError = IntegrityError 80 | DataError = DataError 81 | NotSupportedError = NotSupportedError 82 | 83 | 84 | class Cursor(object): 85 | """Abstract representation of Cursor""" 86 | 87 | def description(self): 88 | raise NotImplementedError 89 | 90 | def rowcount(self): 91 | raise NotImplementedError 92 | 93 | def lastrowid(self): 94 | raise NotImplementedError 95 | 96 | def query_string(self): 97 | raise NotImplementedError 98 | 99 | def get_arraysize(self): 100 | raise NotImplementedError 101 | 102 | def set_arraysize(self, arraysize): 103 | raise NotImplementedError 104 | 105 | def buffersize(self): 106 | raise NotImplementedError 107 | 108 | def has_result_set(self): 109 | raise NotImplementedError 110 | 111 | def close(self): 112 | raise NotImplementedError 113 | 114 | def close_operation(self): 115 | raise NotImplementedError 116 | 117 | def execute(self, operation, parameters=None, configuration=None): 118 | raise NotImplementedError 119 | 120 | def _execute_sync(self, operation_fn): 121 | raise NotImplementedError 122 | 123 | def _reset_state(self): 124 | raise NotImplementedError 125 | 126 | def _wait_to_finish(self): 127 | raise NotImplementedError 128 | 129 | def executemany(self, operation, seq_of_parameters): 130 | raise NotImplementedError 131 | 132 | def fetchone(self): 133 | raise NotImplementedError 134 | 135 | def fetchmany(self, size=None): 136 | raise NotImplementedError 137 | 138 | def fetchall(self): 139 | raise NotImplementedError 140 | 141 | def setinputsizes(self, sizes): 142 | raise NotImplementedError 143 | 144 | def setoutputsize(self, size, column=None): 145 | raise NotImplementedError 146 | 147 | def __iter__(self): 148 | raise NotImplementedError 149 | 150 | def __next__(self): 151 | raise NotImplementedError 152 | 153 | def next(self): 154 | # for py2 compat 155 | return self.__next__() 156 | 157 | def ping(self): 158 | raise NotImplementedError 159 | 160 | def get_log(self): 161 | raise NotImplementedError 162 | 163 | def get_profile(self): 164 | raise NotImplementedError 165 | 166 | def get_summary(self): 167 | raise NotImplementedError 168 | 169 | def build_summary_table(self, summary, idx, is_fragment_root, indent_level, 170 | output): 171 | raise NotImplementedError 172 | 173 | def __enter__(self): 174 | return self 175 | 176 | def __exit__(self, exc_type, exc_val, exc_tb): 177 | self.close() 178 | if exc_type is not None: 179 | reraise(exc_type, exc_val, exc_tb) 180 | 181 | 182 | def _replace_numeric_markers(operation, string_parameters, paramstyle): 183 | """ 184 | Replaces qmark, format, and numeric markers in the given operation, from 185 | the string_parameters list. 186 | 187 | Raises ProgrammingError on wrong number of parameters or markers. 188 | For numeric markers there has to be enough parameters to satisfy 189 | every marker and there has to bo no unused parameter. 190 | """ 191 | RE_QMARK = r'(?P\?)' 192 | RE_FORMAT = r'(?P%s)' 193 | RE_NUMERIC = r'(?P:(?P\d+))' 194 | RE_ALL = '|'.join([RE_QMARK, RE_FORMAT, RE_NUMERIC]) 195 | 196 | if paramstyle is not None: 197 | if paramstyle in ['named', 'pyformat']: 198 | raise ProgrammingError( 199 | "Paramstyle '%s' is not compatible with parameters passed as " 200 | "list. Please use a dict for your parameters instead " 201 | "or specify a different paramstyle" % paramstyle) 202 | 203 | if paramstyle not in ['qmark', 'format', 'numeric']: 204 | raise ProgrammingError( 205 | "Paramstyle '%s' is not supported. Please use a different one") 206 | 207 | param_count = len(string_parameters) 208 | used_positional_indexes = set() 209 | used_numeric_indexes = set() 210 | 211 | def replace_marker(match): 212 | if paramstyle is not None and match.group(paramstyle) is None: 213 | return match.group(0) 214 | 215 | if match.group('index') is not None: 216 | param_index = int(match.group('index')) - 1 217 | used_numeric_indexes.add(param_index) 218 | else: 219 | param_index = len(used_positional_indexes) 220 | used_positional_indexes.add(param_index) 221 | 222 | if param_index >= param_count: 223 | raise ProgrammingError( 224 | "Incorrect number of bindings supplied. The current statement " 225 | "uses %d or more, and there are %d supplied." % ( 226 | param_index, param_count)) 227 | 228 | return string_parameters[param_index] 229 | 230 | operation = re.sub(RE_ALL, replace_marker, operation) 231 | 232 | marker_count = len(used_numeric_indexes | used_positional_indexes) 233 | if marker_count < param_count: 234 | raise ProgrammingError( 235 | "Incorrect number of bindings supplied. The current statement " 236 | "uses %d, and there are %d supplied." % ( 237 | marker_count, param_count)) 238 | 239 | return operation 240 | 241 | 242 | def _bind_parameters_list(operation, parameters, paramstyle): 243 | string_parameters = [] 244 | for value in parameters: 245 | if value is None: 246 | string_parameters.append('NULL') 247 | elif isinstance(value, six.string_types): 248 | string_parameters.append("'" + _escape(value) + "'") 249 | elif isinstance(value, datetime.datetime): 250 | string_parameters.append("'" + str(value) + "'") 251 | elif isinstance(value, datetime.date): 252 | string_parameters.append("'" + str(value) + "'") 253 | else: 254 | string_parameters.append(str(value)) 255 | 256 | # replace qmark and numeric parameters 257 | return _replace_numeric_markers(operation, string_parameters, paramstyle) 258 | 259 | 260 | def _bind_parameters_dict(operation, parameters): 261 | string_parameters = {} 262 | for (name, value) in six.iteritems(parameters): 263 | if value is None: 264 | string_parameters[name] = 'NULL' 265 | elif isinstance(value, six.string_types): 266 | string_parameters[name] = "'" + _escape(value) + "'" 267 | elif isinstance(value, datetime.date): 268 | string_parameters[name] = "'{0}'".format(value) 269 | else: 270 | string_parameters[name] = str(value) 271 | 272 | # replace named parameters by their pyformat equivalents 273 | operation = re.sub(r":([^\d\W]\w*)", r"%(\g<1>)s", operation) 274 | 275 | # replace pyformat parameters 276 | return operation % string_parameters 277 | 278 | 279 | def _bind_parameters(operation, parameters, paramstyle=None): 280 | # If parameters is a list, assume either qmark, format, or numeric 281 | # format. If not, assume either named or pyformat parameters 282 | if isinstance(parameters, (list, tuple)): 283 | return _bind_parameters_list(operation, parameters, paramstyle) 284 | elif isinstance(parameters, dict): 285 | return _bind_parameters_dict(operation, parameters) 286 | else: 287 | raise ProgrammingError("Query parameters argument should be a " 288 | "list, tuple, or dict object") 289 | -------------------------------------------------------------------------------- /impala/sasl_compat.py: -------------------------------------------------------------------------------- 1 | from puresasl.client import SASLClient, SASLError 2 | from contextlib import contextmanager 3 | 4 | @contextmanager 5 | def error_catcher(self, Exc = Exception): 6 | try: 7 | self.error = None 8 | yield 9 | except Exc as e: 10 | self.error = str(e) 11 | 12 | 13 | class PureSASLClient(SASLClient): 14 | def __init__(self, *args, **kwargs): 15 | self.error = None 16 | super(PureSASLClient, self).__init__(*args, **kwargs) 17 | 18 | def start(self, mechanism): 19 | with error_catcher(self, SASLError): 20 | if isinstance(mechanism, list): 21 | self.choose_mechanism(mechanism) 22 | else: 23 | self.choose_mechanism([mechanism]) 24 | return True, self.mechanism, self.process() 25 | # else 26 | return False, mechanism, None 27 | 28 | def encode(self, incoming): 29 | with error_catcher(self): 30 | return True, self.unwrap(incoming) 31 | # else 32 | return False, None 33 | 34 | def decode(self, outgoing): 35 | with error_catcher(self): 36 | return True, self.wrap(outgoing) 37 | # else 38 | return False, None 39 | 40 | def step(self, challenge): 41 | with error_catcher(self): 42 | return True, self.process(challenge) 43 | # else 44 | return False, None 45 | 46 | def getError(self): 47 | return self.error 48 | -------------------------------------------------------------------------------- /impala/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /impala/tests/compat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # pylint: disable=unused-import,import-error 16 | 17 | import sys 18 | 19 | 20 | if sys.version_info[:2] <= (2, 6): 21 | import unittest2 as unittest 22 | else: 23 | import unittest # noqa 24 | -------------------------------------------------------------------------------- /impala/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | 17 | import logging 18 | 19 | from pytest import fixture, skip 20 | 21 | from impala.dbapi import connect 22 | from impala.util import ( 23 | _random_id, force_drop_impala_database, force_drop_hive_database) 24 | from impala.tests.util import ImpylaTestEnv 25 | 26 | 27 | # set up some special cmd line options for test running 28 | 29 | 30 | def pytest_addoption(parser): 31 | parser.addoption('--connect', action='store_true', default=False, 32 | help='Also run DB API 2.0 compliance tests') 33 | parser.addoption('--log-info', action='store_true', default=False, 34 | help='Enable INFO logging') 35 | parser.addoption('--log-debug', action='store_true', default=False, 36 | help='Enable DEBUG logging') 37 | 38 | 39 | def pytest_configure(config): 40 | # if both --log-debug and --log-info are set, the DEBUG takes precedence 41 | if config.getoption('log_debug'): 42 | root_logger = logging.getLogger() 43 | root_logger.setLevel(logging.DEBUG) 44 | root_logger.addHandler(logging.StreamHandler()) 45 | elif config.getoption('log_info'): 46 | root_logger = logging.getLogger() 47 | root_logger.setLevel(logging.INFO) 48 | root_logger.addHandler(logging.StreamHandler()) 49 | config.addinivalue_line("markers", "connect") 50 | config.addinivalue_line("markers", "params_neg: marks tests that verify invalid parameters are not allowed") 51 | 52 | def pytest_runtest_setup(item): 53 | if (getattr(item.obj, 'connect', None) and 54 | not item.config.getvalue('connect')): 55 | skip('--connect not requested (for integration tests)') 56 | 57 | 58 | # testing fixtures 59 | 60 | 61 | ENV = ImpylaTestEnv() 62 | hive = ENV.auth_mech == 'PLAIN' 63 | 64 | 65 | @fixture(scope='session') 66 | def host(): 67 | return ENV.host 68 | 69 | 70 | @fixture(scope='session') 71 | def port(): 72 | return ENV.port 73 | 74 | 75 | @fixture(scope='session') 76 | def auth_mech(): 77 | return ENV.auth_mech 78 | 79 | 80 | @fixture(scope='session') 81 | def tmp_db(): 82 | return _random_id('tmp_impyla_') 83 | 84 | 85 | @fixture(scope='session') 86 | def con(host, port, auth_mech, tmp_db): 87 | # create the temporary database 88 | con = connect(host=host, port=port, auth_mechanism=auth_mech) 89 | cur = con.cursor() 90 | cur.execute('CREATE DATABASE {0}'.format(tmp_db)) 91 | cur.close() 92 | con.close() 93 | 94 | # create the actual fixture 95 | con = connect(host=host, port=port, auth_mechanism=auth_mech, 96 | database=tmp_db) 97 | yield con 98 | con.close() 99 | 100 | # cleanup the temporary database 101 | con = connect(host=host, port=port, auth_mechanism=auth_mech) 102 | cur = con.cursor() 103 | if hive: 104 | force_drop_hive_database(cur, tmp_db) 105 | else: 106 | force_drop_impala_database(cur, tmp_db) 107 | cur.close() 108 | con.close() 109 | 110 | 111 | @fixture(scope='session') 112 | def cur(con): 113 | cur = con.cursor() 114 | yield cur 115 | cur.close() 116 | 117 | @fixture(scope='session') 118 | def cur_no_string_conv(con): 119 | cur = con.cursor(convert_types=True, convert_strings_to_unicode=False) 120 | yield cur 121 | cur.close() 122 | -------------------------------------------------------------------------------- /impala/tests/test_data_types.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | # Copyright 2015 Cloudera Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import datetime 17 | import pytest 18 | import sys 19 | from pytest import fixture 20 | from decimal import Decimal 21 | 22 | @fixture(scope='module') 23 | def decimal_table(cur): 24 | table_name = 'tmp_decimal_table' 25 | ddl = """CREATE TABLE {0} ( 26 | f1 decimal(10, 2), 27 | f2 decimal(7, 5), 28 | f3 decimal(38, 17))""".format(table_name) 29 | cur.execute(ddl) 30 | try: 31 | yield table_name 32 | finally: 33 | cur.execute("DROP TABLE {0}".format(table_name)) 34 | 35 | 36 | @pytest.mark.connect 37 | def test_cursor_description_precision_scale(cur, decimal_table): 38 | # According to the DBAPI 2.0, these are the 7 fields of cursor.description 39 | # - name 40 | # - type_code 41 | # - display_size 42 | # - internal_size 43 | # - precision 44 | # - scale 45 | # - null_ok 46 | expected = [(10, 2), 47 | (7, 5), 48 | (38, 17)] 49 | cur.execute('select * from {0} limit 0'.format(decimal_table)) 50 | observed = [(t[4], t[5]) for t in cur.description] 51 | for (exp, obs) in zip(expected, observed): 52 | assert exp == obs 53 | 54 | 55 | @fixture(scope='module') 56 | def decimal_table2(cur): 57 | table_name = 'tmp_decimal_table2' 58 | ddl = """CREATE TABLE {0} (val decimal(18, 9))""".format(table_name) 59 | cur.execute(ddl) 60 | cur.execute('''insert into {0} 61 | values (cast(123456789.123456789 as decimal(18, 9))), 62 | (cast(-123456789.123456789 as decimal(18, 9))), 63 | (cast(0.000000001 as decimal(18, 9))), 64 | (cast(-0.000000001 as decimal(18, 9))), 65 | (cast(999999999.999999999 as decimal(18, 9))), 66 | (cast(-999999999.999999999 as decimal(18, 9))), 67 | (NULL)'''.format(table_name)) 68 | try: 69 | yield table_name 70 | finally: 71 | cur.execute("DROP TABLE {0}".format(table_name)) 72 | 73 | 74 | def common_test_decimal(cur, decimal_table): 75 | """Read back a few decimal values in a wide range.""" 76 | cur.execute('select val from {0} order by val'.format(decimal_table)) 77 | results = cur.fetchall() 78 | assert results == [(Decimal('-999999999.999999999'),), 79 | (Decimal('-123456789.123456789'),), 80 | (Decimal('-0.000000001'),), 81 | (Decimal('0.000000001'),), 82 | (Decimal('123456789.123456789'),), 83 | (Decimal('999999999.999999999'),), 84 | (None,)] 85 | 86 | 87 | @pytest.mark.connect 88 | def test_decimal_basic(cur, decimal_table2): 89 | common_test_decimal(cur, decimal_table2) 90 | 91 | 92 | @pytest.mark.connect 93 | def test_decimal_no_string_conv(cur_no_string_conv, decimal_table2): 94 | common_test_decimal(cur_no_string_conv, decimal_table2) 95 | 96 | 97 | @fixture(scope='module') 98 | def date_table(cur): 99 | table_name = 'tmp_date_table' 100 | ddl = """CREATE TABLE {0} (d date)""".format(table_name) 101 | cur.execute(ddl) 102 | cur.execute('''insert into {0} 103 | values (date "0001-01-01"), (date "1999-9-9")'''.format(table_name)) 104 | try: 105 | yield table_name 106 | finally: 107 | cur.execute("DROP TABLE {0}".format(table_name)) 108 | 109 | 110 | def common_test_date(cur, date_table): 111 | """Read back a couple of data values in a wide range.""" 112 | cur.execute('select d from {0} order by d'.format(date_table)) 113 | results = cur.fetchall() 114 | assert results == [(datetime.date(1, 1, 1),), (datetime.date(1999, 9, 9),)] 115 | 116 | 117 | @pytest.mark.connect 118 | def test_date_basic(cur, date_table): 119 | common_test_date(cur, date_table) 120 | 121 | 122 | @pytest.mark.connect 123 | def test_date_no_string_conv(cur_no_string_conv, date_table): 124 | common_test_date(cur_no_string_conv, date_table) 125 | 126 | 127 | @fixture(scope='module') 128 | def timestamp_table(cur): 129 | table_name = 'tmp_timestamp_table' 130 | ddl = """CREATE TABLE {0} (ts timestamp)""".format(table_name) 131 | cur.execute(ddl) 132 | cur.execute('''insert into {0} 133 | values (cast("1400-01-01 00:00:00" as timestamp)), 134 | (cast("2014-06-23 13:30:51" as timestamp)), 135 | (cast("2014-06-23 13:30:51.123" as timestamp)), 136 | (cast("2014-06-23 13:30:51.123456" as timestamp)), 137 | (cast("2014-06-23 13:30:51.123456789" as timestamp)), 138 | (cast("9999-12-31 23:59:59" as timestamp))'''.format(table_name)) 139 | try: 140 | yield table_name 141 | finally: 142 | cur.execute("DROP TABLE {0}".format(table_name)) 143 | 144 | 145 | def common_test_timestamp(cur, timestamp_table): 146 | """Read back a few timestamp values in a wide range.""" 147 | cur.execute('select ts from {0} order by ts'.format(timestamp_table)) 148 | results = cur.fetchall() 149 | assert results == [(datetime.datetime(1400, 1, 1, 0, 0),), 150 | (datetime.datetime(2014, 6, 23, 13, 30, 51),), 151 | (datetime.datetime(2014, 6, 23, 13, 30, 51, 123000),), 152 | (datetime.datetime(2014, 6, 23, 13, 30, 51, 123456),), 153 | (datetime.datetime(2014, 6, 23, 13, 30, 51, 123456),), 154 | (datetime.datetime(9999, 12, 31, 23, 59, 59),)] 155 | 156 | 157 | @pytest.mark.connect 158 | def test_timestamp_basic(cur, timestamp_table): 159 | common_test_timestamp(cur, timestamp_table) 160 | 161 | 162 | @pytest.mark.connect 163 | def test_timestamp_no_string_conv(cur_no_string_conv, timestamp_table): 164 | common_test_timestamp(cur_no_string_conv, timestamp_table) 165 | 166 | 167 | @pytest.mark.connect 168 | def test_utf8_strings(cur): 169 | """Use STRING/VARCHAR/CHAR values with multi byte unicode code points in a query.""" 170 | cur.execute('select "引擎", cast("引擎" as varchar(6)), cast("引擎" as char(6))') 171 | result = cur.fetchone() 172 | assert result == ("引擎",) * 3 173 | 174 | # Tests returning STRING/VARCHAR/CHAR strings that are not valid UTF-8. 175 | # With Python 3 and Thrift 0.11.0 these tests needed TCLIService.thrift to be 176 | # modified. Syncing thrift files from Hive/Impala is likely to break these tests. 177 | cur.execute('select substr("引擎", 1, 4), cast("引擎" as varchar(4)), cast("引擎" as char(4))') 178 | result = cur.fetchone() 179 | assert result == (b"\xe5\xbc\x95\xe6",) * 3 180 | assert result[0].decode("UTF-8", "replace") == u"引�" 181 | 182 | cur.execute('select unhex("AA")') 183 | result = cur.fetchone()[0] 184 | assert result == b"\xaa" 185 | assert result.decode("UTF-8", "replace") == u"�" 186 | 187 | 188 | @pytest.mark.connect 189 | def test_string_conv(cur): 190 | cur.execute('select "Test string"') 191 | result = cur.fetchone() 192 | is_unicode = isinstance(result[0], str) 193 | 194 | 195 | @pytest.mark.connect 196 | def test_string_no_string_conv(cur_no_string_conv): 197 | cur = cur_no_string_conv 198 | cur.execute('select "Test string"') 199 | result = cur.fetchone() 200 | 201 | if sys.version_info[0] < 3: 202 | assert isinstance(result[0], str) 203 | else: 204 | assert isinstance(result[0], bytes) 205 | -------------------------------------------------------------------------------- /impala/tests/test_dbapi_compliance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | This test module simply wraps _dbapi20_tests.py, which ensure that our 18 | PEP 249 implementation is compliant. 19 | """ 20 | 21 | 22 | from __future__ import absolute_import, print_function 23 | 24 | import pytest 25 | 26 | import impala.dbapi 27 | from impala.tests.util import ImpylaTestEnv 28 | from impala.util import ( 29 | _random_id, force_drop_impala_database, force_drop_hive_database) 30 | # must import the module, rather than the class, per comment in module 31 | from impala.tests import _dbapi20_tests 32 | 33 | 34 | ENV = ImpylaTestEnv() 35 | tmp_db = _random_id('tmp_impyla_dbapi_') 36 | hive = ENV.auth_mech == 'PLAIN' 37 | 38 | @pytest.mark.connect 39 | class ImpalaDBAPI20Test(_dbapi20_tests.DatabaseAPI20Test): 40 | driver = impala.dbapi 41 | connect_kw_args = {'host': ENV.host, 42 | 'port': ENV.port, 43 | 'auth_mechanism': ENV.auth_mech, 44 | 'database': tmp_db} 45 | 46 | ddl1 = 'create table {0}booze (name string)'.format( 47 | _dbapi20_tests.DatabaseAPI20Test.table_prefix) 48 | ddl2 = 'create table {0}barflys (name string)'.format( 49 | _dbapi20_tests.DatabaseAPI20Test.table_prefix) 50 | 51 | @classmethod 52 | def setUpClass(cls): 53 | con = cls.driver.connect(host=ENV.host, port=ENV.port, 54 | auth_mechanism=ENV.auth_mech) 55 | cur = con.cursor() 56 | cur.execute('CREATE DATABASE {0}'.format(tmp_db)) 57 | cur.close() 58 | con.close() 59 | 60 | @classmethod 61 | def tearDownClass(cls): 62 | con = cls.driver.connect(host=ENV.host, port=ENV.port, 63 | auth_mechanism=ENV.auth_mech) 64 | cur = con.cursor() 65 | if hive: 66 | force_drop_hive_database(cur, tmp_db) 67 | else: 68 | force_drop_impala_database(cur, tmp_db) 69 | cur.close() 70 | con.close() 71 | 72 | def test_nextset(self): 73 | # Base class does not implement this. 74 | pytest.skip("Not implemented") 75 | 76 | def test_setoutputsize(self): 77 | # Base class does not implement this. 78 | pytest.skip("Not implemented") 79 | 80 | DDL_RETURNS_RESULTSET = 'DDL returns result set in Impala - issue #401' 81 | @pytest.mark.skipif(True, reason=DDL_RETURNS_RESULTSET) 82 | def test_description(self): 83 | super(ImpalaDBAPI20Test, self).test_description() 84 | 85 | @pytest.mark.skipif(True, reason=DDL_RETURNS_RESULTSET) 86 | def test_fetchone(self): 87 | super(ImpalaDBAPI20Test, self).test_fetchone() 88 | 89 | TEST_CLOSE_FAILING = 'test_close not raising error - issue #401' 90 | @pytest.mark.skipif(True, reason=TEST_CLOSE_FAILING) 91 | def test_close(self): 92 | super(ImpalaDBAPI20Test, self).test_close() 93 | -------------------------------------------------------------------------------- /impala/tests/test_hive.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_hive_queries(cur): 17 | cur.execute('CREATE TABLE tmp_hive (a STRING, b INT, c DOUBLE)') 18 | 19 | cur.execute('SHOW TABLES') 20 | tables = cur.fetchall() 21 | assert any([t[0] == 'tmp_hive' for t in tables]) 22 | 23 | cur.execute("INSERT INTO tmp_hive " 24 | "VALUES ('foo', 1, 0.5), ('bar', 2, NULL), ('baz', 3, 6.2)") 25 | 26 | cur.execute('SELECT b FROM tmp_hive LIMIT 2') 27 | assert len(cur.description) == 1 28 | assert cur.description[0][0] == 'b' 29 | results = cur.fetchall() 30 | assert len(results) == 2 31 | 32 | cur.execute('SELECT * FROM tmp_hive WHERE c IS NOT NULL') 33 | results = cur.fetchall() 34 | assert len(results) == 2 35 | 36 | cur.execute("SELECT c from tmp_hive WHERE a = 'foo'") 37 | results = cur.fetchall() 38 | assert len(results) == 1 39 | assert results[0][0] == 0.5 40 | 41 | cur.execute("SELECT c from tmp_hive WHERE a = 'bar'") 42 | results = cur.fetchall() 43 | assert len(results) == 1 44 | assert results[0][0] is None 45 | 46 | # Test executemany() with parameter substitution. The %s should be ignored 47 | # as paramstyle is "qmark". 48 | cur.executemany("INSERT INTO tmp_hive VALUES (?, ?, ?)", 49 | [['a', 4, 1.0], ['%s', 5, None]], 50 | {'paramstyle': 'qmark'}) 51 | 52 | cur.execute("SELECT * from tmp_hive WHERE b = 4") 53 | results = cur.fetchall() 54 | assert results == [('a', 4, 1.0)] 55 | 56 | cur.execute("SELECT * from tmp_hive WHERE b = 5") 57 | results = cur.fetchall() 58 | assert results == [('%s', 5, None)] 59 | 60 | cur.execute('DROP TABLE tmp_hive') 61 | 62 | cur.execute('SHOW TABLES') 63 | tables = cur.fetchall() 64 | assert all([t[0] != 'tmp_hive' for t in tables]) 65 | -------------------------------------------------------------------------------- /impala/tests/test_hive_dict_cursor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from pytest import fixture 16 | 17 | 18 | @fixture(scope='session') 19 | def cur2(con): 20 | cur = con.cursor(dictify=True) 21 | yield cur 22 | cur.close() 23 | 24 | 25 | def test_dict_cursor(cur2): 26 | cur = cur2 27 | cur.execute('CREATE TABLE tmp_hive (a STRING, b INT, c DOUBLE)') 28 | 29 | cur.execute('SHOW TABLES') 30 | tables = cur.fetchall() 31 | assert any(t['name'] == 'tmp_hive' for t in tables) 32 | 33 | cur.execute("INSERT INTO tmp_hive " 34 | "VALUES ('foo', 1, 0.5), ('bar', 2, NULL), ('baz', 3, 6.2)") 35 | 36 | cur.execute('SELECT b FROM tmp_hive LIMIT 2') 37 | assert len(cur.description) == 1 38 | assert cur.description[0][0] == 'b' 39 | results = cur.fetchall() 40 | assert len(results) == 2 41 | 42 | cur.execute('SELECT * FROM tmp_hive WHERE c IS NOT NULL') 43 | results = cur.fetchall() 44 | assert len(results) == 2 45 | 46 | cur.execute("SELECT c from tmp_hive WHERE a = 'foo'") 47 | results = cur.fetchall() 48 | assert len(results) == 1 49 | assert results[0]['c'] == 0.5 50 | 51 | cur.execute("SELECT c from tmp_hive WHERE a = 'bar'") 52 | results = cur.fetchall() 53 | assert len(results) == 1 54 | assert results[0]['c'] is None 55 | 56 | cur.execute('DROP TABLE tmp_hive') 57 | 58 | cur.execute('SHOW TABLES') 59 | tables = cur.fetchall() 60 | assert not any(t['tableName'] == 'tmp_hive' for t in tables) 61 | -------------------------------------------------------------------------------- /impala/tests/test_http_connect.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import socket 16 | import threading 17 | from contextlib import closing 18 | 19 | import pytest 20 | import requests 21 | import six 22 | from six.moves import SimpleHTTPServer 23 | from six.moves import http_client 24 | from six.moves import socketserver 25 | 26 | from impala.error import HttpError 27 | from impala.tests.util import ImpylaTestEnv, is_ipv6_only_host 28 | 29 | ENV = ImpylaTestEnv() 30 | 31 | IS_IPV6_ONLY_HOST = is_ipv6_only_host(ENV.host, ENV.port) 32 | LOCAL_HOST = "::1" if IS_IPV6_ONLY_HOST else "127.0.0.1" 33 | 34 | # socketserver.TCPServer cannot listen both on ipv4 and ipv6. Listen to ipv6 35 | # if the hs2-http server has only ipv6 address. 36 | class IPv4or6TcpServer(socketserver.TCPServer): 37 | address_family = socket.AF_INET6 if IS_IPV6_ONLY_HOST else socket.AF_INET 38 | def __init__(self, host_port, req_handler): 39 | socketserver.TCPServer.__init__(self, host_port, req_handler) 40 | 41 | @pytest.fixture 42 | def http_503_server(): 43 | class RequestHandler503(SimpleHTTPServer.SimpleHTTPRequestHandler): 44 | """A custom http handler that checks for duplicate 'Host' headers from the most 45 | recent http request, and always returns a 503 http code""" 46 | 47 | def do_POST(self): 48 | # Ensure that only one 'Host' header is contained in the request before responding. 49 | request_headers = None 50 | host_hdr_count = 0 51 | if six.PY2: 52 | # The unfortunately named self.headers here is an instance of mimetools.Message that 53 | # contains the request headers. 54 | request_headers = self.headers.headers 55 | host_hdr_count = sum([header.startswith('Host:') for header in request_headers]) 56 | if six.PY3: 57 | # In Python3 self.Headers is an HTTPMessage. 58 | request_headers = self.headers 59 | host_hdr_count = sum([header[0] == 'Host' for header in request_headers.items()]) 60 | assert host_hdr_count == 1, "need single 'Host:' header in %s" % request_headers 61 | 62 | # Respond with 503. 63 | self.send_response(code=http_client.SERVICE_UNAVAILABLE, message="Service Unavailable") 64 | self.end_headers() 65 | self.wfile.write("extra text".encode('utf-8')) 66 | 67 | class TestHTTPServer503(object): 68 | def __init__(self): 69 | self.HOST = LOCAL_HOST 70 | self.PORT = get_unused_port() 71 | self.httpd = IPv4or6TcpServer((self.HOST, self.PORT), RequestHandler503) 72 | 73 | self.http_server_thread = threading.Thread(target=self.httpd.serve_forever) 74 | self.http_server_thread.start() 75 | 76 | server = TestHTTPServer503() 77 | yield server 78 | 79 | # Cleanup after test. 80 | shutdown_server(server) 81 | 82 | 83 | @pytest.yield_fixture 84 | def http_proxy_server(): 85 | """A fixture that creates a reverse http proxy.""" 86 | 87 | class RequestHandlerProxy(SimpleHTTPServer.SimpleHTTPRequestHandler): 88 | """A custom http handler that acts as a reverse http proxy. This proxy will forward 89 | http messages to Impala, and copy the responses back to the client. In addition, it 90 | will save the outgoing http message headers in a class variable so that they can be 91 | accessed by test code.""" 92 | 93 | # This class variable is used to store the most recently seen outgoing http 94 | # message headers. 95 | saved_headers=None 96 | 97 | def __init__(self, request, client_address, server): 98 | SimpleHTTPServer.SimpleHTTPRequestHandler.__init__(self, request, client_address, 99 | server) 100 | 101 | def do_POST(self): 102 | # Read the body of the incoming http post message. 103 | data_string = self.rfile.read(int(self.headers['Content-Length'])) 104 | # Save the http headers from the message in a class variable. 105 | RequestHandlerProxy.saved_headers = self.decode_raw_headers() 106 | # Forward the http post message to Impala and get a response message. 107 | host = "[%s]" % ENV.host if ":" in ENV.host else ENV.host 108 | response = requests.post( 109 | url="http://{0}:{1}/cliservice".format(host, ENV.http_port), 110 | headers=self.headers, data=data_string) 111 | # Send the response message back to the client. 112 | self.send_response(code=response.status_code) 113 | # Send the http headers. 114 | # In python3 response.headers is a CaseInsensitiveDict 115 | # In python2 response.headers is a dict 116 | for key, value in response.headers.items(): 117 | self.send_header(keyword=key, value=value) 118 | self.end_headers() 119 | # Send the message body. 120 | self.wfile.write(response.content) 121 | 122 | def decode_raw_headers(self): 123 | """Decode a list of header strings into a list of tuples, each tuple containing a 124 | key-value pair. The details of how to get the headers differs between Python2 125 | and Python3""" 126 | if six.PY2: 127 | header_list = [] 128 | # In Python2 self.headers is an instance of mimetools.Message and 129 | # self.headers.headers is a list of raw header strings. 130 | # An example header string: 'Accept-Encoding: identity\\r\\n' 131 | for header in self.headers.headers: 132 | stripped = header.strip() 133 | key, value = stripped.split(':', 1) 134 | header_list.append((key.strip(), value.strip())) 135 | return header_list 136 | if six.PY3: 137 | # In Python 3 self.headers._headers is what we need 138 | return self.headers._headers 139 | 140 | 141 | class TestHTTPServerProxy(object): 142 | def __init__(self, clazz): 143 | self.clazz = clazz 144 | self.HOST = LOCAL_HOST 145 | self.PORT = get_unused_port() 146 | self.httpd = IPv4or6TcpServer((self.HOST, self.PORT), clazz) 147 | self.http_server_thread = threading.Thread(target=self.httpd.serve_forever) 148 | self.http_server_thread.start() 149 | 150 | def get_headers(self): 151 | """Return the most recently seen outgoing http message headers.""" 152 | return self.clazz.saved_headers 153 | 154 | server = TestHTTPServerProxy(RequestHandlerProxy) 155 | yield server 156 | 157 | # Cleanup after test. 158 | shutdown_server(server) 159 | 160 | 161 | from impala.dbapi import connect 162 | 163 | class TestHttpConnect(object): 164 | def test_simple_connect(self): 165 | con = connect(ENV.host, ENV.http_port, use_http_transport=True, http_path="cliservice") 166 | cur = con.cursor() 167 | cur.execute('select 1') 168 | rows = cur.fetchall() 169 | assert rows == [(1,)] 170 | 171 | def test_http_interactions(self, http_503_server): 172 | """Test interactions with the http server when using hs2-http protocol. 173 | Check that there is an HttpError exception when the server returns a 503 error.""" 174 | con = connect(ENV.host, http_503_server.PORT, use_http_transport=True) 175 | try: 176 | con.cursor() 177 | assert False, "Should have got exception" 178 | except HttpError as e: 179 | assert str(e) == "HTTP code 503: Service Unavailable" 180 | assert e.code == http_client.SERVICE_UNAVAILABLE 181 | assert e.body.decode("utf-8") == "extra text" 182 | 183 | def test_duplicate_headers(self, http_proxy_server): 184 | """Test that we can use 'connect' with the get_user_custom_headers_func parameter 185 | to add duplicate http message headers to outgoing messages.""" 186 | con = connect(ENV.host, http_proxy_server.PORT, use_http_transport=True, 187 | get_user_custom_headers_func=get_user_custom_headers_func) 188 | cur = con.cursor() 189 | cur.execute('select 1') 190 | rows = cur.fetchall() 191 | assert rows == [(1,)] 192 | 193 | # Get the outgoing message headers from the last outgoing http message. 194 | headers = http_proxy_server.get_headers() 195 | # For sanity test the count of a few simple expected headers. 196 | assert count_tuples_with_key(headers, "Host") == 1 197 | assert count_tuples_with_key(headers, "User-Agent") == 1 198 | # Check that the custom headers are present. 199 | assert count_tuples_with_key(headers, "key1") == 2 200 | assert count_tuples_with_key(headers, "key2") == 1 201 | assert count_tuples_with_key(headers, "key3") == 0 202 | 203 | def test_basic_auth_headers(self, http_proxy_server): 204 | con = connect( 205 | ENV.host, 206 | http_proxy_server.PORT, 207 | use_http_transport=True, 208 | user="thisisaratherlongusername", 209 | password="very!long!passwordthatcreatesalongbasic64encoding", 210 | auth_mechanism="PLAIN" 211 | ) 212 | cur = con.cursor() 213 | cur.execute('select 1') 214 | rows = cur.fetchall() 215 | assert rows == [(1,)] 216 | 217 | headers = http_proxy_server.get_headers() 218 | assert ('Authorization', "Basic dGhpc2lzYXJhdGhlcmxvbmd1c2VybmFtZTp2ZXJ5IWxvbmchcGFzc3dvcmR0aGF0Y3JlYXRlc2Fsb25nYmFzaWM2NGVuY29kaW5n") in headers 219 | 220 | def get_user_custom_headers_func(): 221 | """Insert some custom http headers, including a duplicate.""" 222 | headers = [] 223 | headers.append(('key1', 'value1')) 224 | headers.append(('key1', 'value2')) 225 | headers.append(('key2', 'value3')) 226 | return headers 227 | 228 | 229 | def get_unused_port(): 230 | """ Find an unused port http://stackoverflow.com/questions/1365265 """ 231 | with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: 232 | s.bind(('', 0)) 233 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 234 | return s.getsockname()[1] 235 | 236 | def shutdown_server(server): 237 | """Helper method to shutdown a http server.""" 238 | if server.httpd is not None: 239 | server.httpd.shutdown() 240 | if server.http_server_thread is not None: 241 | server.http_server_thread.join() 242 | 243 | def count_tuples_with_key(tuple_list, key_to_count): 244 | """Counts the number of tuples in a list that have a specific key. 245 | Args: 246 | tuple_list: A list of key-value tuples. 247 | key_to_count: The key to count occurrences of. 248 | Returns: 249 | The number of tuples with the specified key. 250 | """ 251 | count = 0 252 | for key, _ in tuple_list: 253 | if key == key_to_count: 254 | count += 1 255 | return count 256 | -------------------------------------------------------------------------------- /impala/tests/test_impala.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import sys 15 | 16 | import pytest 17 | from impala.compat import _xrange as xrange 18 | from pytest import fixture 19 | 20 | BIGGER_TABLE_NUM_ROWS = 100 21 | 22 | @fixture(scope='module') 23 | def bigger_table(cur): 24 | table_name = 'tmp_bigger_table' 25 | ddl = """CREATE TABLE {0} (s string) 26 | STORED AS PARQUET""".format(table_name) 27 | cur.execute(ddl) 28 | dml = """INSERT INTO {0} 29 | VALUES {1}""".format(table_name, 30 | ",".join(["('row{0}')".format(i) for i in xrange(BIGGER_TABLE_NUM_ROWS)])) 31 | # Disable codegen and expr rewrites so query runs faster. 32 | cur.execute("set disable_codegen=1") 33 | cur.execute("set enable_expr_rewrites=0") 34 | cur.execute(dml) 35 | try: 36 | yield table_name 37 | finally: 38 | cur.execute("DROP TABLE {0}".format(table_name)) 39 | 40 | 41 | def test_has_more_rows(cur, bigger_table): 42 | """Test that impyla correctly handles empty row batches returned with the 43 | hasMoreRows flag.""" 44 | # Set the fetch timeout very low and add sleeps so that Impala will return 45 | # empty batches. Run on a single node with a single thread to make as predictable 46 | # as possible. 47 | cur.execute("set fetch_rows_timeout_ms=1") 48 | cur.execute("set num_nodes=1") 49 | cur.execute("set mt_dop=1") 50 | cur.execute("""select * 51 | from {0} 52 | where s != cast(sleep(2) as string)""".format(bigger_table)) 53 | expected_rows = [("row{0}".format(i),) for i in xrange(BIGGER_TABLE_NUM_ROWS)] 54 | assert sorted(cur.fetchall()) == sorted(expected_rows) 55 | 56 | @fixture(scope='function') 57 | def empty_table(cur): 58 | table_name = 'tmp_empty_table' 59 | ddl = """CREATE TABLE {0} (i int)""".format(table_name) 60 | cur.execute(ddl) 61 | try: 62 | yield table_name 63 | finally: 64 | cur.execute("DROP TABLE {0}".format(table_name)) 65 | 66 | def test_dml_rowcount(cur, empty_table): 67 | """Test that impyla correctly sets rowcount for insert statements.""" 68 | dml = """INSERT INTO {0} 69 | VALUES (0)""".format(empty_table) 70 | cur.execute(dml) 71 | assert cur.rowcount == 1 72 | 73 | def test_row_count_in_empty_result(cur, empty_table): 74 | """Test that impyla correctly sets rowcount when 0 rows are returned. 75 | This case is missing from dbapi2 compliance tests. 76 | """ 77 | query = """SELECT * FROM {0}""".format(empty_table) 78 | cur.execute(query) 79 | cur.fetchall() 80 | assert cur.rowcount == 0 81 | 82 | def test_get_log(cur, empty_table): 83 | """Test that impyla can return the result of get_log after the query 84 | is closed. 85 | """ 86 | query = """SELECT * FROM {0}""".format(empty_table) 87 | for mt_dop in ['0', '2']: 88 | cur.execute(query, configuration={'mt_dop': mt_dop}) 89 | cur.fetchall() 90 | validate_log(cur) 91 | cur.close_operation() 92 | 93 | def validate_log(cur): 94 | # The query should be closed at this point. 95 | assert not cur._last_operation_active 96 | log = cur.get_log() 97 | assert "100% Complete" in log 98 | # Also check that summary and runtime profile are available 99 | summary = cur.get_summary() 100 | assert summary is not None 101 | for node in summary.nodes: 102 | assert hasattr(node, 'node_id') 103 | assert hasattr(node, 'fragment_idx') 104 | assert hasattr(node, 'label') 105 | assert hasattr(node, 'label_detail') 106 | assert hasattr(node, 'num_children') 107 | assert hasattr(node, 'estimated_stats') 108 | assert hasattr(node, 'exec_stats') 109 | assert hasattr(node, 'is_broadcast') 110 | assert hasattr(node, 'num_hosts') 111 | assert node.num_hosts > 0 112 | assert len(node.exec_stats) >= node.num_hosts 113 | profile = cur.get_profile() 114 | assert profile is not None 115 | -------------------------------------------------------------------------------- /impala/tests/test_sqlalchemy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | 17 | from sqlalchemy.engine import create_engine 18 | from sqlalchemy import Table, Column, select, insert, text 19 | from sqlalchemy.schema import MetaData, CreateTable 20 | 21 | from impala.sqlalchemy import STRING, INT, DOUBLE, TINYINT, DATE, VARCHAR 22 | from impala.tests.util import ImpylaTestEnv 23 | import pandas as pd 24 | 25 | TEST_ENV = ImpylaTestEnv() 26 | 27 | 28 | def create_partitioned_test_table(): 29 | metadata = MetaData() 30 | # TODO: add other types to this table (e.g., functional.all_types) 31 | return Table("mytable", 32 | metadata, 33 | Column('col1', STRING), 34 | Column('col2', TINYINT), 35 | Column('col3', INT), 36 | Column('col4', DOUBLE), 37 | Column('col5', DATE), 38 | Column('col6', VARCHAR(10)), 39 | impala_partitioned_by='(part_col STRING)', 40 | impala_stored_as='PARQUET', 41 | impala_table_properties={ 42 | 'transactional': 'true', 43 | 'transactional_properties': 'insert_only' 44 | }) 45 | 46 | def create_simple_test_table(): 47 | metadata = MetaData() 48 | return Table("mytable", 49 | metadata, 50 | Column('col1', STRING), 51 | Column('col2', TINYINT), 52 | Column('col3', INT), 53 | Column('col4', DOUBLE) 54 | ) 55 | 56 | def create_test_engine(diealect): 57 | host = "[%s]" % TEST_ENV.host if ":" in TEST_ENV.host else TEST_ENV.host 58 | return create_engine('{0}://{1}:{2}'.format(diealect, host, TEST_ENV.port)) 59 | 60 | def test_sqlalchemy_impala_compilation(): 61 | engine = create_test_engine("impala") 62 | observed = CreateTable(create_partitioned_test_table()).compile(engine) 63 | # The DATE column type of 'col5' will be replaced with TIMESTAMP. 64 | expected = ('\nCREATE TABLE mytable (\n\tcol1 STRING, \n\tcol2 TINYINT, ' 65 | '\n\tcol3 INT, \n\tcol4 DOUBLE, \n\tcol5 TIMESTAMP, \n\tcol6 VARCHAR(10)\n)' 66 | '\nPARTITIONED BY (part_col STRING)\nSTORED AS PARQUET\n' 67 | "TBLPROPERTIES ('transactional' = 'true', " 68 | "'transactional_properties' = 'insert_only')\n\n") 69 | assert expected == str(observed) 70 | 71 | 72 | def test_sqlalchemy_impala4_compilation(): 73 | engine = create_test_engine("impala4") 74 | observed = CreateTable(create_partitioned_test_table()).compile(engine) 75 | # The DATE column type of 'col5' will be left as is. 76 | expected = ('\nCREATE TABLE mytable (\n\tcol1 STRING, \n\tcol2 TINYINT, ' 77 | '\n\tcol3 INT, \n\tcol4 DOUBLE, \n\tcol5 DATE, \n\tcol6 VARCHAR(10)\n)' 78 | '\nPARTITIONED BY (part_col STRING)\nSTORED AS PARQUET\n' 79 | "TBLPROPERTIES ('transactional' = 'true', " 80 | "'transactional_properties' = 'insert_only')\n\n") 81 | assert expected == str(observed) 82 | 83 | def test_sqlalchemy_multiinsert(): 84 | engine = create_test_engine("impala4") 85 | table = create_simple_test_table() 86 | # TODO: Creating a non partitioned table as I am not sure about how to insert to 87 | # a partitioned table in SQL alchemy 88 | create_table_stmt = CreateTable(table) 89 | 90 | data = [ 91 | {"col1": "a", "col2": 1, "col3": 1, "col4": 1.0}, 92 | {"col1": "b", "col2": 2, "col3": 3, "col4": 2.0} 93 | ] 94 | insert_stmt = insert(table).values(data).compile(engine) 95 | expected_insert = 'INSERT INTO mytable (col1, col2, col3, col4) VALUES '\ 96 | '(%(col1_m0)s, %(col2_m0)s, %(col3_m0)s, %(col4_m0)s), '\ 97 | '(%(col1_m1)s, %(col2_m1)s, %(col3_m1)s, %(col4_m1)s)' 98 | assert expected_insert == str(insert_stmt) 99 | 100 | with engine.connect() as conn: 101 | conn.execute(create_table_stmt) 102 | try: 103 | conn.execute(insert_stmt) 104 | result = conn.execute(select(table.c).order_by(table.c.col1)).fetchall() 105 | expected_result = [('a', 1, 1, 1.0), ('b', 2, 3, 2.0)] 106 | assert expected_result == result 107 | finally: 108 | table.drop(conn) 109 | 110 | def test_pandas_dataframe_to_sql(): 111 | engine = create_test_engine("impala") 112 | # Creating a sample dataframe to push to the DB. 113 | df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=['a', 'b', 'c']) 114 | 115 | with engine.connect() as conn: 116 | try: 117 | df.to_sql('test_table', conn, if_exists='replace', index=False) 118 | table = pd.read_sql('DESCRIBE test_table', conn) 119 | columns = table['name'].tolist() 120 | assert ['a', 'b', 'c'] == columns 121 | 122 | finally: 123 | conn.execute(text('DROP TABLE test_table')) -------------------------------------------------------------------------------- /impala/tests/test_thrift_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from impala._thrift_api import ImpalaHttpClient 6 | 7 | 8 | @pytest.fixture() 9 | def proxy_env(): 10 | reset_value = os.environ.get("HTTPS_PROXY") 11 | os.environ["HTTPS_PROXY"] = "https://foo:%3F%40%3D@localhost" 12 | yield "proxy_env" 13 | if reset_value is None: 14 | del os.environ["HTTPS_PROXY"] 15 | else: 16 | os.environ["HTTPS_PROXY"] = reset_value 17 | 18 | 19 | class TestHttpTransport(object): 20 | def test_proxy_auth_header(self, proxy_env): 21 | client = ImpalaHttpClient( 22 | uri_or_host="https://localhost:443/cliservice", 23 | ) 24 | assert client.proxy_auth == "Basic Zm9vOj9APQ==" 25 | -------------------------------------------------------------------------------- /impala/tests/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import sys 17 | import six 18 | import socket 19 | 20 | 21 | identity = lambda x: x 22 | 23 | 24 | def get_env_var(name, coercer, default): 25 | if name in os.environ: 26 | return coercer(os.environ[name]) 27 | else: 28 | sys.stderr.write("{0} not set; using {1!r}\n".format(name, default)) 29 | return default 30 | 31 | def is_ipv6_only_host(host, port): 32 | has_ipv6 = False 33 | for addr in socket.getaddrinfo(host, port, socket.AF_UNSPEC, 34 | socket.SOCK_STREAM, socket.IPPROTO_TCP): 35 | (family, _, _, _, _) = addr 36 | if family == socket.AF_INET: 37 | return False # found ipv4 38 | elif family == socket.AF_INET6: 39 | has_ipv6 = True 40 | return has_ipv6 41 | 42 | 43 | class ImpylaTestEnv(object): 44 | 45 | def __init__(self, host=None, port=None, hive_port=None, auth_mech=None, 46 | http_port=None): 47 | if host is not None: 48 | self.host = host 49 | else: 50 | self.host = get_env_var('IMPYLA_TEST_HOST', identity, 'localhost') 51 | 52 | if port is not None: 53 | self.port = port 54 | else: 55 | self.port = get_env_var('IMPYLA_TEST_PORT', int, 21050) 56 | 57 | if http_port is not None: 58 | self.http_port = http_port 59 | else: 60 | self.http_port = get_env_var('IMPYLA_TEST_HTTP_PORT', int, 28000) 61 | 62 | if hive_port is not None: 63 | self.hive_port = hive_port 64 | else: 65 | self.hive_port = get_env_var('IMPYLA_TEST_HIVE_PORT', int, 10000) 66 | 67 | self.hive_user = get_env_var('IMPYLA_TEST_HIVE_USER', identity, 68 | 'cloudera') 69 | 70 | self.skip_hive_tests = get_env_var('IMPYLA_SKIP_HIVE_TESTS', bool, False) 71 | 72 | if auth_mech is not None: 73 | self.auth_mech = auth_mech 74 | else: 75 | self.auth_mech = get_env_var('IMPYLA_TEST_AUTH_MECH', identity, 76 | 'NOSASL') 77 | 78 | self.ssl_cert = get_env_var('IMPYLA_SSL_CERT', identity, "") 79 | 80 | def __repr__(self): 81 | kvs = ['{0}={1}'.format(k, v) 82 | for (k, v) in six.iteritems(self.__dict__)] 83 | return 'ImpylaTestEnv(\n {0})'.format(',\n '.join(kvs)) 84 | 85 | class SocketTracker(object): 86 | def __init__(self): 87 | self.open_sockets = set() 88 | self.socket_constructor = socket.socket.__init__ 89 | self.socket_close = socket.socket.close 90 | 91 | def __enter__(self): 92 | def constructor(*args, **kwargs): 93 | self.open_sockets.add(args[0]) 94 | return self.socket_constructor(*args, **kwargs) 95 | 96 | def close(*args, **kwargs): 97 | self.open_sockets.remove(args[0]) 98 | return self.socket_close(*args, **kwargs) 99 | socket.socket.__init__ = constructor 100 | socket.socket.close = close 101 | return self 102 | 103 | def __exit__(self, exception_type, exception_value, traceback): 104 | socket.socket.__init__ = self.socket_constructor 105 | socket.socket.close = self.socket_close 106 | -------------------------------------------------------------------------------- /impala/thrift/ExecStats.thrift: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | namespace py impala._thrift_gen.ExecStats 19 | namespace cpp impala 20 | namespace java org.apache.impala.thrift 21 | 22 | include "Status.thrift" 23 | include "Types.thrift" 24 | 25 | // NOTE: The definitions in this file are part of the binary format of the Impala query 26 | // profiles. They should preserve backwards compatibility and as such some rules apply 27 | // when making changes. Please see RuntimeProfile.thrift for more details. 28 | 29 | enum TExecState { 30 | REGISTERED = 0 31 | PLANNING = 1 32 | QUEUED = 2 33 | RUNNING = 3 34 | FINISHED = 4 35 | 36 | CANCELLED = 5 37 | FAILED = 6 38 | } 39 | 40 | // Execution stats for a single plan node. 41 | struct TExecStats { 42 | // The wall clock time spent on the "main" thread. This is the user perceived 43 | // latency. This value indicates the current bottleneck. 44 | // Note: anywhere we have a queue between operators, this time can fluctuate 45 | // significantly without the overall query time changing much (i.e. the bottleneck 46 | // moved to another operator). This is unavoidable though. 47 | 1: optional i64 latency_ns 48 | 49 | // Total CPU time spent across all threads. For operators that have an async 50 | // component (e.g. multi-threaded) this will be >= latency_ns. 51 | 2: optional i64 cpu_time_ns 52 | 53 | // Number of rows returned. 54 | 3: optional i64 cardinality 55 | 56 | // Peak memory used (in bytes). 57 | 4: optional i64 memory_used 58 | } 59 | 60 | // Summary for a single plan node or data sink. This includes labels for how to display 61 | // the node as well as per instance stats. 62 | struct TPlanNodeExecSummary { 63 | // The plan node ID or -1 if this is a data sink at the root of a fragment. 64 | 1: required Types.TPlanNodeId node_id 65 | 2: required Types.TFragmentIdx fragment_idx 66 | 3: required string label 67 | 4: optional string label_detail 68 | 5: required i32 num_children 69 | 70 | // Estimated stats generated by the planner 71 | 6: optional TExecStats estimated_stats 72 | 73 | // One entry for each fragment instance executing this plan node or data sink. 74 | 7: optional list exec_stats 75 | 76 | // If true, this is an exchange node that is the receiver of a broadcast. 77 | 8: optional bool is_broadcast 78 | 79 | // The number of hosts. It cannot be inferred from exec_stats, since the length of the 80 | // list can be greater when mt_dop > 0. 81 | 9: optional i32 num_hosts 82 | } 83 | 84 | // Progress counters for an in-flight query. 85 | struct TExecProgress { 86 | 1: optional i64 total_scan_ranges 87 | 2: optional i64 num_completed_scan_ranges 88 | 3: optional i64 total_fragment_instances; 89 | 4: optional i64 num_completed_fragment_instances; 90 | } 91 | 92 | // Execution summary of an entire query. 93 | struct TExecSummary { 94 | // State of the query. 95 | 1: required TExecState state 96 | 97 | // Contains the error if state is FAILED. 98 | 2: optional Status.TStatus status 99 | 100 | // Flattened execution summary of the plan tree. 101 | 3: optional list nodes 102 | 103 | // For each node in 'nodes' that consumes input from the root of a different fragment, 104 | // i.e. an exchange or join node with a separate build, contains the index to the root 105 | // node of the source fragment. Both the key and value are indices into 'nodes'. 106 | 4: optional map exch_to_sender_map 107 | 108 | // List of errors that were encountered during execution. This can be non-empty 109 | // even if status is okay, in which case it contains errors that impala skipped 110 | // over. 111 | 5: optional list error_logs 112 | 113 | // Optional record indicating the query progress 114 | 6: optional TExecProgress progress 115 | 116 | // Set to true if the query is currently queued by admission control. 117 | 7: optional bool is_queued 118 | 119 | // Contains the latest queuing reason if the query is currently queued by admission 120 | // control. 121 | 8: optional string queued_reason 122 | } 123 | -------------------------------------------------------------------------------- /impala/thrift/Metrics.thrift: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | namespace py impala._thrift_gen.Metrics 19 | namespace cpp impala 20 | namespace java org.apache.impala.thrift 21 | 22 | // NOTE: The definitions in this file are part of the binary format of the Impala query 23 | // profiles. They should preserve backwards compatibility and as such some rules apply 24 | // when making changes. Please see RuntimeProfile.thrift for more details. 25 | 26 | 27 | // Metric and counter data types. 28 | // 29 | // WARNING (IMPALA-8236): Adding new values to TUnit and using them in TCounter will break 30 | // old decoders of thrift profiles. The workaround is to only use the following units in 31 | // anything that is serialised into a TCounter: 32 | // UNIT, UNIT_PER_SECOND, CPU_TICKS, BYTES, BYTES_PER_SECOND, TIME_NS, DOUBLE_VALUE 33 | enum TUnit { 34 | // A dimensionless numerical quantity 35 | UNIT = 0 36 | // Rate of a dimensionless numerical quantity 37 | UNIT_PER_SECOND = 1 38 | CPU_TICKS = 2 39 | BYTES = 3 40 | BYTES_PER_SECOND = 4 41 | TIME_NS = 5 42 | DOUBLE_VALUE = 6 43 | // No units at all, may not be a numerical quantity 44 | NONE = 7 45 | TIME_MS = 8 46 | TIME_S = 9 47 | TIME_US = 10 48 | // 100th of a percent, used to express ratios etc., range from 0 to 10000, pretty 49 | // printed as integer percentages from 0 to 100. 50 | BASIS_POINTS = 11 51 | } 52 | 53 | // The kind of value that a metric represents. 54 | enum TMetricKind { 55 | // May go up or down over time 56 | GAUGE = 0 57 | // A strictly increasing value 58 | COUNTER = 1 59 | // Fixed; will never change 60 | PROPERTY = 2 61 | STATS = 3 62 | SET = 4 63 | HISTOGRAM = 5 64 | } 65 | -------------------------------------------------------------------------------- /impala/thrift/Status.thrift: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | namespace py impala._thrift_gen.Status 19 | namespace cpp impala 20 | namespace java org.apache.impala.thrift 21 | 22 | include "ErrorCodes.thrift" 23 | 24 | // NOTE: The definitions in this file are part of the binary format of the Impala query 25 | // profiles. They should preserve backwards compatibility and as such some rules apply 26 | // when making changes. Please see RuntimeProfile.thrift for more details. 27 | 28 | struct TStatus { 29 | 1: required ErrorCodes.TErrorCode status_code 30 | 2: list error_msgs 31 | } 32 | -------------------------------------------------------------------------------- /impala/thrift/Types.thrift: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | namespace py impala._thrift_gen.Types 19 | namespace cpp impala 20 | namespace java org.apache.impala.thrift 21 | 22 | // NOTE: The definitions in this file are part of the binary format of the Impala query 23 | // profiles. They should preserve backwards compatibility and as such some rules apply 24 | // when making changes. Please see RuntimeProfile.thrift for more details. 25 | 26 | typedef i64 TTimestamp 27 | typedef i32 TFragmentIdx 28 | typedef i32 TPlanNodeId 29 | typedef i32 TDataSinkId 30 | typedef i32 TTupleId 31 | typedef i32 TSlotId 32 | typedef i32 TTableId 33 | 34 | // TODO: Consider moving unrelated enums to better locations. 35 | 36 | enum TPrimitiveType { 37 | INVALID_TYPE = 0 38 | NULL_TYPE = 1 39 | BOOLEAN = 2 40 | TINYINT = 3 41 | SMALLINT = 4 42 | INT = 5 43 | BIGINT = 6 44 | FLOAT = 7 45 | DOUBLE = 8 46 | DATE = 9 47 | DATETIME = 10 48 | TIMESTAMP = 11 49 | STRING = 12 50 | BINARY = 13 51 | DECIMAL = 14 52 | CHAR = 15 53 | VARCHAR = 16 54 | FIXED_UDA_INTERMEDIATE = 17 55 | } 56 | 57 | enum TTypeNodeType { 58 | SCALAR = 0 59 | ARRAY = 1 60 | MAP = 2 61 | STRUCT = 3 62 | } 63 | 64 | struct TScalarType { 65 | 1: required TPrimitiveType type 66 | 67 | // Only set if type == CHAR or type == VARCHAR 68 | 2: optional i32 len 69 | 70 | // Only set for DECIMAL 71 | 3: optional i32 precision 72 | 4: optional i32 scale 73 | } 74 | 75 | // Represents a field in a STRUCT type. 76 | // TODO: Model column stats for struct fields. 77 | struct TStructField { 78 | 1: required string name 79 | 2: optional string comment 80 | // Valid for Iceberg tables 81 | 3: optional i32 field_id 82 | } 83 | 84 | struct TTypeNode { 85 | 1: required TTypeNodeType type 86 | 87 | // only set for scalar types 88 | 2: optional TScalarType scalar_type 89 | 90 | // only used for structs; has struct_fields.size() corresponding child types 91 | 3: optional list struct_fields 92 | } 93 | 94 | // A flattened representation of a tree of column types obtained by depth-first 95 | // traversal. Complex types such as map, array and struct have child types corresponding 96 | // to the map key/value, array item type, and struct fields, respectively. 97 | // For scalar types the list contains only a single node. 98 | // Note: We cannot rename this to TType because it conflicts with Thrift's internal TType 99 | // and the generated Python thrift files will not work. 100 | struct TColumnType { 101 | 1: list types 102 | } 103 | 104 | enum TStmtType { 105 | QUERY = 0 106 | DDL = 1 107 | DML = 2 108 | EXPLAIN = 3 109 | LOAD = 4 110 | SET = 5 111 | ADMIN_FN = 6 112 | TESTCASE = 7 113 | CONVERT = 8 114 | UNKNOWN = 9 115 | KILL = 10 116 | } 117 | 118 | enum TIcebergOperation { 119 | INSERT = 0 120 | DELETE = 1 121 | UPDATE = 2 122 | OPTIMIZE = 3 123 | MERGE = 4 124 | } 125 | 126 | // Level of verboseness for "explain" output. 127 | enum TExplainLevel { 128 | MINIMAL = 0 129 | STANDARD = 1 130 | EXTENDED = 2 131 | VERBOSE = 3 132 | } 133 | 134 | enum TRuntimeFilterMode { 135 | // No filters are computed in the FE or the BE. 136 | OFF = 0 137 | 138 | // Only broadcast filters are computed in the BE, and are only published to the local 139 | // fragment. 140 | LOCAL = 1 141 | 142 | // All fiters are computed in the BE, and are published globally. 143 | GLOBAL = 2 144 | } 145 | 146 | enum TPrefetchMode { 147 | // No prefetching at all. 148 | NONE = 0 149 | 150 | // Prefetch the hash table buckets. 151 | HT_BUCKET = 1 152 | } 153 | 154 | // A TNetworkAddress is the standard host, port representation of a 155 | // network address. The hostname field must be resolvable to an IPv4 156 | // address. 157 | // uds_address is Unix Domain Socket address. UDS is limited to KRPC. 158 | // We use the unique name in "Abstract Namespace" as UDS address in the form of 159 | // "@impala-krpc:". This field is optional. It is only used for KRPC 160 | // bind/listen/connect when FLAGS_rpc_use_unix_domain_socket is set as true. 161 | struct TNetworkAddress { 162 | 1: required string hostname 163 | 2: required i32 port 164 | 3: optional string uds_address 165 | } 166 | 167 | // A list of network addresses 168 | struct TAddressesList { 169 | 1: required list addresses; 170 | } 171 | 172 | // Wire format for UniqueId 173 | struct TUniqueId { 174 | 1: required i64 hi 175 | 2: required i64 lo 176 | } 177 | 178 | // Used to uniquely identify individual impalads. 179 | typedef TUniqueId TBackendId; 180 | 181 | enum TFunctionCategory { 182 | SCALAR = 0 183 | AGGREGATE = 1 184 | ANALYTIC = 2 185 | } 186 | 187 | enum TFunctionBinaryType { 188 | // Impala builtin. We can either run this interpreted or via codegen 189 | // depending on the query option. 190 | BUILTIN = 0 191 | 192 | // Java UDFs, loaded from *.jar 193 | JAVA = 1 194 | 195 | // Native-interface, precompiled UDFs loaded from *.so 196 | NATIVE = 2 197 | 198 | // Native-interface, precompiled to IR; loaded from *.ll 199 | IR = 3 200 | } 201 | 202 | // Represents a fully qualified function name. 203 | struct TFunctionName { 204 | // Name of the function's parent database. Not set if in global 205 | // namespace (e.g. builtins) 206 | 1: optional string db_name 207 | 208 | // Name of the function 209 | 2: required string function_name 210 | } 211 | 212 | struct TScalarFunction { 213 | 1: required string symbol; 214 | 2: optional string prepare_fn_symbol 215 | 3: optional string close_fn_symbol 216 | } 217 | 218 | struct TAggregateFunction { 219 | 1: required TColumnType intermediate_type 220 | 2: required bool is_analytic_only_fn 221 | 3: required string update_fn_symbol 222 | 4: required string init_fn_symbol 223 | 5: optional string serialize_fn_symbol 224 | 6: optional string merge_fn_symbol 225 | 7: optional string finalize_fn_symbol 226 | 8: optional string get_value_fn_symbol 227 | 9: optional string remove_fn_symbol 228 | 10: optional bool ignores_distinct 229 | } 230 | 231 | // Represents a function in the Catalog or a query plan, or may be used 232 | // in a minimal form in order to simply specify a function (e.g. when 233 | // included in a minimal catalog update or a TGetPartialCatalogInfo request). 234 | // 235 | // In the case of this latter 'specifier' use case, only the name must be 236 | // set. 237 | struct TFunction { 238 | // Fully qualified function name. 239 | 1: required TFunctionName name 240 | 241 | // ------------------------------------------------------------------------- 242 | // The following fields are always set, unless this TFunction is being used 243 | // as a name-only "specifier". 244 | // ------------------------------------------------------------------------- 245 | 246 | // Type of the udf. e.g. hive, native, ir 247 | 2: optional TFunctionBinaryType binary_type 248 | 249 | // The types of the arguments to the function 250 | 3: optional list arg_types 251 | 252 | // Return type for the function. 253 | 4: optional TColumnType ret_type 254 | 255 | // If true, this function takes var args. 256 | 5: optional bool has_var_args 257 | 258 | // ------------------------------------------------------------------------- 259 | // The following fields are truly optional, even in "full" function objects. 260 | // 261 | // Note that TFunction objects are persisted in the user's metastore, so 262 | // in many cases these fields are optional because they have been added 263 | // incrementally across releases of Impala. 264 | // ------------------------------------------------------------------------- 265 | 266 | // Optional comment to attach to the function 267 | 6: optional string comment 268 | 269 | 7: optional string signature 270 | 271 | // HDFS path for the function binary. This binary must exist at the time the 272 | // function is created. 273 | 8: optional string hdfs_location 274 | 275 | // One of these should be set. 276 | 9: optional TScalarFunction scalar_fn 277 | 10: optional TAggregateFunction aggregate_fn 278 | 279 | // True for builtins or user-defined functions persisted by the catalog 280 | 11: optional bool is_persistent 281 | 282 | // Last modified time of the 'hdfs_location'. Set by the coordinator to record 283 | // the mtime its aware of for the lib. Executors expect that the lib they use 284 | // has the same mtime as the coordinator's. An mtime of -1 makes the mtime check 285 | // a no-op. 286 | // Not set when stored in the catalog. 287 | 12: optional i64 last_modified_time 288 | 289 | 290 | // NOTE: when adding fields to this struct, do not renumber the field IDs or 291 | // add new required fields. This struct is serialized into user metastores. 292 | } 293 | 294 | // The sorting order used in SORT BY queries. 295 | enum TSortingOrder { 296 | LEXICAL = 0 297 | ZORDER = 1 298 | } 299 | -------------------------------------------------------------------------------- /impala/thrift/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This package exists purely to simpify the distribution of the thrift files, 16 | # which are required for py3 support. 17 | 18 | from __future__ import absolute_import 19 | -------------------------------------------------------------------------------- /impala/thrift/beeswax.thrift: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | // Interface for interacting with Beeswax Server 19 | 20 | namespace py impala._thrift_gen.beeswax 21 | namespace java com.cloudera.beeswax.api 22 | namespace cpp beeswax 23 | 24 | include "hive_metastore.thrift" 25 | 26 | // A Query 27 | struct Query { 28 | 1: string query; 29 | // A list of HQL commands to execute before the query. 30 | // This is typically defining UDFs, setting settings, and loading resources. 31 | 3: list configuration; 32 | 33 | // User and groups to "act as" for purposes of Hadoop. 34 | 4: string hadoop_user; 35 | } 36 | 37 | typedef string LogContextId 38 | 39 | enum QueryState { 40 | CREATED = 0 41 | INITIALIZED = 1 42 | COMPILED = 2 43 | RUNNING = 3 44 | FINISHED = 4 45 | EXCEPTION = 5 46 | } 47 | 48 | struct QueryHandle { 49 | 1: string id; 50 | 2: LogContextId log_context; 51 | } 52 | 53 | struct QueryExplanation { 54 | 1: string textual 55 | } 56 | 57 | struct Results { 58 | // If set, data is valid. Otherwise, results aren't ready yet. 59 | 1: bool ready, 60 | // Columns for the results 61 | 2: list columns, 62 | // A set of results 63 | 3: list data, 64 | // The starting row of the results 65 | 4: i64 start_row, 66 | // Whether there are more results to fetch 67 | 5: bool has_more 68 | } 69 | 70 | /** 71 | * Metadata information about the results. 72 | * Applicable only for SELECT. 73 | */ 74 | struct ResultsMetadata { 75 | /** The schema of the results */ 76 | 1: hive_metastore.Schema schema, 77 | /** The directory containing the results. Not applicable for partition table. */ 78 | 2: string table_dir, 79 | /** If the results are straight from an existing table, the table name. */ 80 | 3: string in_tablename, 81 | /** Field delimiter */ 82 | 4: string delim, 83 | } 84 | 85 | exception BeeswaxException { 86 | 1: string message, 87 | // Use get_log(log_context) to retrieve any log related to this exception 88 | 2: LogContextId log_context, 89 | // (Optional) The QueryHandle that caused this exception 90 | 3: QueryHandle handle, 91 | 4: optional i32 errorCode = 0, 92 | 5: optional string SQLState = " " 93 | } 94 | 95 | exception QueryNotFoundException { 96 | } 97 | 98 | // Impala extension: 99 | // Levels to use when displaying query options from Impala shell. REMOVED options should 100 | // not be displayed in the shell, but setting them is a warning rather than an error. 101 | enum TQueryOptionLevel { 102 | REGULAR = 0 103 | ADVANCED = 1 104 | DEVELOPMENT = 2 105 | DEPRECATED = 3 106 | REMOVED = 4 107 | } 108 | 109 | /** Represents a Hadoop-style configuration variable. */ 110 | struct ConfigVariable { 111 | 1: string key, 112 | 2: string value, 113 | 3: string description, 114 | // For displaying purposes in Impala shell 115 | 4: optional TQueryOptionLevel level 116 | } 117 | 118 | service BeeswaxService { 119 | /** 120 | * Submit a query and return a handle (QueryHandle). The query runs asynchronously. 121 | */ 122 | QueryHandle query(1:Query query) throws(1:BeeswaxException error), 123 | 124 | /** 125 | * run a query synchronously and return a handle (QueryHandle). 126 | */ 127 | QueryHandle executeAndWait(1:Query query, 2:LogContextId clientCtx) 128 | throws(1:BeeswaxException error), 129 | 130 | /** 131 | * Get the query plan for a query. 132 | */ 133 | QueryExplanation explain(1:Query query) 134 | throws(1:BeeswaxException error), 135 | 136 | /** 137 | * Get the results of a query. This is non-blocking. Caller should check 138 | * Results.ready to determine if the results are in yet. The call requests 139 | * the batch size of fetch. 140 | */ 141 | Results fetch(1:QueryHandle query_id, 2:bool start_over, 3:i32 fetch_size=-1) 142 | throws(1:QueryNotFoundException error, 2:BeeswaxException error2), 143 | 144 | /** 145 | * Get the state of the query 146 | */ 147 | QueryState get_state(1:QueryHandle handle) throws(1:QueryNotFoundException error), 148 | 149 | /** 150 | * Get the result metadata 151 | */ 152 | ResultsMetadata get_results_metadata(1:QueryHandle handle) 153 | throws(1:QueryNotFoundException error), 154 | 155 | /** 156 | * Used to test connection to server. A "noop" command. 157 | */ 158 | string echo(1:string s) 159 | 160 | /** 161 | * Returns a string representation of the configuration object being used. 162 | * Handy for debugging. 163 | */ 164 | string dump_config() 165 | 166 | /** 167 | * Get the log messages related to the given context. 168 | */ 169 | string get_log(1:LogContextId context) throws(1:QueryNotFoundException error) 170 | 171 | /* 172 | * Returns "default" configuration. 173 | */ 174 | list get_default_configuration(1:bool include_hadoop) 175 | 176 | /* 177 | * closes the query with given handle 178 | */ 179 | void close(1:QueryHandle handle) throws(1:QueryNotFoundException error, 180 | 2:BeeswaxException error2) 181 | 182 | /* 183 | * clean the log context for given id 184 | */ 185 | void clean(1:LogContextId log_context) 186 | } 187 | -------------------------------------------------------------------------------- /impala/thrift/fb303.thrift: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * fb303.thrift 22 | */ 23 | 24 | namespace py impala._thrift_gen.fb303 25 | namespace java com.facebook.fb303 26 | namespace cpp facebook.fb303 27 | namespace perl Facebook.FB303 28 | 29 | /** 30 | * Common status reporting mechanism across all services 31 | */ 32 | enum fb_status { 33 | DEAD = 0, 34 | STARTING = 1, 35 | ALIVE = 2, 36 | STOPPING = 3, 37 | STOPPED = 4, 38 | WARNING = 5, 39 | } 40 | 41 | /** 42 | * Standard base service 43 | */ 44 | service FacebookService { 45 | 46 | /** 47 | * Returns a descriptive name of the service 48 | */ 49 | string getName(), 50 | 51 | /** 52 | * Returns the version of the service 53 | */ 54 | string getVersion(), 55 | 56 | /** 57 | * Gets the status of this service 58 | */ 59 | fb_status getStatus(), 60 | 61 | /** 62 | * User friendly description of status, such as why the service is in 63 | * the dead or warning state, or what is being started or stopped. 64 | */ 65 | string getStatusDetails(), 66 | 67 | /** 68 | * Gets the counters for this service 69 | */ 70 | map getCounters(), 71 | 72 | /** 73 | * Gets the value of a single counter 74 | */ 75 | i64 getCounter(1: string key), 76 | 77 | /** 78 | * Sets an option 79 | */ 80 | void setOption(1: string key, 2: string value), 81 | 82 | /** 83 | * Gets an option 84 | */ 85 | string getOption(1: string key), 86 | 87 | /** 88 | * Gets all options 89 | */ 90 | map getOptions(), 91 | 92 | /** 93 | * Returns a CPU profile over the given time interval (client and server 94 | * must agree on the profile format). 95 | */ 96 | string getCpuProfile(1: i32 profileDurationInSec), 97 | 98 | /** 99 | * Returns the unix time that the server has been running since 100 | */ 101 | i64 aliveSince(), 102 | 103 | /** 104 | * Tell the server to reload its configuration, reopen log files, etc 105 | */ 106 | oneway void reinitialize(), 107 | 108 | /** 109 | * Suggest a shutdown to the server 110 | */ 111 | oneway void shutdown(), 112 | 113 | } 114 | -------------------------------------------------------------------------------- /impala/thrift/process_thrift.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2019 Cloudera Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -euxo pipefail 17 | 18 | function die() { 19 | echo $1 >&2 20 | exit 1 21 | } 22 | 23 | [ -n "${IMPALA_REPO:-}" ] || die "Need to set IMPALA_REPO" 24 | [ -n "${IMPYLA_REPO:-}" ] || die "Need to set IMPYLA_REPO" 25 | 26 | # impala-config.sh checks (and sets) some unset variables 27 | set +u 28 | source $IMPALA_REPO/bin/impala-config.sh 29 | set -u 30 | 31 | SYNC_IMPYLA_THRIFT_FILES="${SYNC_IMPYLA_THRIFT_FILES:-false}" 32 | if [ "$SYNC_IMPYLA_THRIFT_FILES" = true ] ; then 33 | echo "Copying thrift files from the main Impala repo at $IMPALA_REPO" 34 | cp $IMPALA_REPO/common/thrift/hive-3-api/TCLIService.thrift $IMPYLA_REPO/impala/thrift 35 | # ImpalaService.thrift require hand edit to exclude files unrelated to query profile 36 | # such as Frontend.thrift, BackendGflags.thrift, and Query.thrift. 37 | # cp $IMPALA_REPO/common/thrift/ImpalaService.thrift $IMPYLA_REPO/impala/thrift 38 | cp $IMPALA_REPO/common/thrift/ErrorCodes.thrift $IMPYLA_REPO/impala/thrift 39 | cp $IMPALA_REPO/common/thrift/ExecStats.thrift $IMPYLA_REPO/impala/thrift 40 | cp $IMPALA_REPO/common/thrift/Metrics.thrift $IMPYLA_REPO/impala/thrift 41 | cp $IMPALA_REPO/common/thrift/RuntimeProfile.thrift $IMPYLA_REPO/impala/thrift 42 | cp $IMPALA_REPO/common/thrift/Status.thrift $IMPYLA_REPO/impala/thrift 43 | cp $IMPALA_REPO/common/thrift/Types.thrift $IMPYLA_REPO/impala/thrift 44 | cp $IMPALA_TOOLCHAIN_PACKAGES_HOME/thrift-$IMPALA_THRIFT_VERSION/share/fb303/if/fb303.thrift \ 45 | $IMPYLA_REPO/impala/thrift 46 | 47 | 48 | # beeswax.thrift already includes a namespace py declaration, which breaks my 49 | # directory structure, so here I delete it (in preparation for adding the proper 50 | # namespace declaration below) 51 | grep -v 'namespace py beeswaxd' $IMPALA_REPO/common/thrift/beeswax.thrift \ 52 | > $IMPYLA_REPO/impala/thrift/beeswax.thrift 53 | 54 | # hive_metastore.thrift assumes a directory structure for fb303.thrift, so we 55 | # change the include statement here 56 | 57 | cat $HIVE_SRC_DIR/standalone-metastore/src/main/thrift/hive_metastore.thrift \ 58 | | sed 's/share\/fb303\/if\///g' \ 59 | > $IMPYLA_REPO/impala/thrift/hive_metastore.thrift 60 | 61 | 62 | # We add "namespace py" statements to all the thrift files so we can get the 63 | # appropriate directory structure 64 | echo "Adding namespace py lines to thrift files" 65 | for THRIFT_FILE in $IMPYLA_REPO/impala/thrift/*.thrift; do 66 | FILE_NAME=$(basename $THRIFT_FILE) 67 | BASE_NAME=${FILE_NAME%.*} 68 | ADD_NAMESPACE_PY=" 69 | BEGIN { 70 | n = 0 71 | } 72 | { 73 | if (\$0 ~ /^namespace/ && n == 0) { 74 | print \"namespace py impala._thrift_gen.$BASE_NAME\"; 75 | n += 1; 76 | } 77 | print \$0; 78 | }" 79 | echo " $BASE_NAME" 80 | cat $THRIFT_FILE | awk "$ADD_NAMESPACE_PY" > $IMPYLA_REPO/impala/thrift/temp.thrift 81 | mv $IMPYLA_REPO/impala/thrift/temp.thrift $THRIFT_FILE 82 | done 83 | fi 84 | 85 | echo "Generating thrift python modules" 86 | THRIFT_BIN="$IMPALA_TOOLCHAIN_PACKAGES_HOME/thrift-$IMPALA_THRIFT_PY_VERSION/bin/thrift" 87 | $THRIFT_BIN -r --gen py:new_style,no_utf8strings -out $IMPYLA_REPO $IMPYLA_REPO/impala/thrift/ImpalaService.thrift 88 | 89 | echo "Removing extraneous $IMPYLA_REPO/__init__.py" 90 | rm -f $IMPYLA_REPO/__init__.py 91 | -------------------------------------------------------------------------------- /impala/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | 17 | import base64 18 | import sys 19 | import warnings 20 | import logging 21 | import string 22 | import random 23 | import six 24 | import datetime 25 | import os.path 26 | from six.moves import http_cookies 27 | 28 | 29 | try: 30 | from logging import NullHandler 31 | except ImportError: 32 | # py 2.6 compat 33 | class NullHandler(logging.Handler): 34 | def emit(self, record): 35 | pass 36 | 37 | 38 | def get_logger_and_init_null(logger_name): 39 | logger = logging.getLogger(logger_name) 40 | logger.addHandler(NullHandler()) 41 | return logger 42 | 43 | 44 | log = get_logger_and_init_null(__name__) 45 | 46 | 47 | def as_pandas(cursor, coerce_float=False): 48 | """Return a pandas `DataFrame` out of an impyla cursor. 49 | 50 | This will pull the entire result set into memory. For richer pandas-like 51 | functionality on distributed data sets, see the Ibis project. 52 | 53 | Parameters 54 | ---------- 55 | cursor : `HiveServer2Cursor` 56 | The cursor object that has a result set waiting to be fetched. 57 | 58 | coerce_float : bool, optional 59 | Attempt to convert values of non-string, non-numeric objects to floating 60 | point. 61 | 62 | Returns 63 | ------- 64 | DataFrame 65 | """ 66 | from pandas import DataFrame # pylint: disable=import-error 67 | names = [metadata[0] for metadata in cursor.description] 68 | return DataFrame.from_records(cursor.fetchall(), columns=names, 69 | coerce_float=coerce_float) 70 | 71 | 72 | def _random_id(prefix='', length=8): 73 | return prefix + ''.join(random.sample(string.ascii_uppercase, length)) 74 | 75 | 76 | def _get_table_schema_hack(cursor, table): 77 | """Get the schema of table by talking to Impala 78 | 79 | table must be a string (incl possible db name) 80 | """ 81 | # get the schema of the query result via a LIMIT 0 hack 82 | cursor.execute('SELECT * FROM %s LIMIT 0' % table) 83 | schema = [tup[:2] for tup in cursor.description] 84 | cursor.fetchall() # resets the state of the cursor and closes operation 85 | return schema 86 | 87 | 88 | def _gen_safe_random_table_name(cursor, prefix='tmp'): 89 | # unlikely but can be problematic if generated table name is taken in the 90 | # interim 91 | tries_left = 3 92 | while tries_left > 0: 93 | name = _random_id(prefix, 8) 94 | if not cursor.table_exists(name): 95 | return name 96 | tries_left -= 1 97 | raise ValueError("Failed to generate a safe table name") 98 | 99 | 100 | def compute_result_schema(cursor, query_string): 101 | temp_name = _random_id(prefix="tmp_crs_") 102 | try: 103 | cursor.execute("CREATE VIEW %s AS %s" % (temp_name, query_string)) 104 | cursor.execute("SELECT * FROM %s LIMIT 0" % temp_name) 105 | schema = cursor.description 106 | finally: 107 | cursor.execute("DROP VIEW %s" % temp_name) 108 | return schema 109 | 110 | 111 | def force_drop_impala_database(cursor, database): 112 | cursor.execute('USE %s' % database) 113 | cursor.execute('SHOW TABLES') 114 | tables = [x[0] for x in cursor.fetchall()] 115 | for table in tables: 116 | cursor.execute('DROP TABLE IF EXISTS %s.%s' % (database, table)) 117 | cursor.execute('SHOW FUNCTIONS') 118 | udfs = [x[1] for x in cursor.fetchall()] 119 | for udf in udfs: 120 | cursor.execute('DROP FUNCTION IF EXISTS %s.%s' % (database, udf)) 121 | cursor.execute('SHOW AGGREGATE FUNCTIONS') 122 | udas = [x[1] for x in cursor.fetchall()] 123 | for uda in udas: 124 | cursor.execute('DROP AGGREGATE FUNCTION IF EXISTS %s.%s' % ( 125 | database, uda)) 126 | cursor.execute('USE default') 127 | cursor.execute('DROP DATABASE IF EXISTS %s' % database) 128 | 129 | 130 | def force_drop_hive_database(cursor, database): 131 | cursor.execute('USE default') 132 | cursor.execute('DROP DATABASE IF EXISTS {0} CASCADE'.format(database)) 133 | 134 | 135 | def _escape(s): 136 | e = s 137 | e = e.replace('\\', '\\\\') 138 | e = e.replace('\n', '\\n') 139 | e = e.replace('\r', '\\r') 140 | e = e.replace("'", "\\'") 141 | e = e.replace('"', '\\"') 142 | log.debug('%s => %s', s, e) 143 | return e 144 | 145 | 146 | def _py_to_sql_string(value): 147 | if value is None: 148 | return 'NULL' 149 | elif isinstance(value, six.string_types): 150 | return "'" + _escape(value) + "'" 151 | else: 152 | return str(value) 153 | 154 | 155 | # Logging-related utils 156 | 157 | 158 | def warn_protocol_param(): 159 | msg = ("Specifying the protocol argument is no longer necessary because " 160 | "impyla only supports HiveServer2.") 161 | warnings.warn(msg, Warning) 162 | 163 | 164 | def warn_deprecate(functionality='This', alternative=None): 165 | msg = ("{0} functionality in impyla is now deprecated and will be removed " 166 | "in a future release".format(functionality)) 167 | if alternative: 168 | msg += "; Please use {0} instead.".format(alternative) 169 | warnings.warn(msg, Warning) 170 | 171 | 172 | def warn_nontls_jwt(): 173 | msg = ("JWT authentication is running without SSL/TLS. This is not a secure " 174 | "configuration unless other layers are providing transport security.") 175 | warnings.warn(msg, Warning) 176 | 177 | # Cookie-related utils 178 | 179 | 180 | def cookie_matches_path(c, path): 181 | if 'path' not in c or not c['path']: 182 | return True 183 | cookie_path = c['path'].strip() 184 | if not cookie_path.startswith('/'): 185 | cookie_path = '/' + cookie_path 186 | cookie_path = os.path.normpath(cookie_path) 187 | if cookie_path == '/': 188 | return True 189 | if not path.startswith('/'): 190 | path = '/' + path 191 | path = os.path.normpath(path) 192 | return path == cookie_path or path.startswith(cookie_path + '/') 193 | 194 | 195 | def get_cookie_expiry(c): 196 | if 'max-age' in c and c['max-age']: 197 | try: 198 | max_age_sec = int(c['max-age']) 199 | return datetime.datetime.now() + datetime.timedelta(seconds=max_age_sec) 200 | except: 201 | pass 202 | # TODO: implement support for 'expires' cookie attribute as well. 203 | return None 204 | 205 | 206 | def get_cookies(resp_headers): 207 | """Returns a SimpleCookie containing all Set-Cookie entries in resp_headers.""" 208 | if 'Set-Cookie' not in resp_headers: 209 | return None 210 | 211 | cookies = http_cookies.SimpleCookie() 212 | try: 213 | if sys.version_info.major == 2: 214 | cookies.load(resp_headers['Set-Cookie']) 215 | else: 216 | cookie_headers = resp_headers.get_all('Set-Cookie') 217 | for header in cookie_headers: 218 | cookies.load(header) 219 | return cookies 220 | except Exception: 221 | return None 222 | 223 | 224 | def get_all_cookies(path, resp_headers): 225 | """Return cookies that match path. 226 | 227 | Returns a list of Morsel objects representing cookie key/value pairs for all cookies 228 | in resp_headers matching path.""" 229 | cookies = get_cookies(resp_headers) 230 | if not cookies: 231 | return None 232 | 233 | matching_cookies = [] 234 | for c in cookies.values(): 235 | if c and cookie_matches_path(c, path): 236 | matching_cookies.append(c) 237 | return matching_cookies 238 | 239 | 240 | def get_all_matching_cookies(cookie_names, path, resp_headers): 241 | """Return cookies in cookie_names that match path. 242 | 243 | Returns a list of Morsel objects representing cookie key/value pairs for cookies 244 | in resp_headers matching path where the cookie is also listed in cookie_names.""" 245 | cookies = get_cookies(resp_headers) 246 | if not cookies: 247 | return None 248 | 249 | matching_cookies = [] 250 | for cn in cookie_names: 251 | if cn in cookies: 252 | c = cookies[cn] 253 | if c and cookie_matches_path(c, path): 254 | matching_cookies.append(c) 255 | return matching_cookies 256 | 257 | 258 | def get_basic_credentials_for_request_headers(user, password): 259 | """Returns base64 encoded credentials for HTTP request headers 260 | 261 | This function produces RFC 2617-compliant basic credentials: 262 | - RFC 2045 encoding of username:password without limitations to 76 chars 263 | per line (and without trailing newline) 264 | - No translation of characters (+,/) for URL-safety 265 | """ 266 | user_password = '%s:%s' % (user, password) 267 | return base64.b64encode(user_password.encode()).decode() 268 | -------------------------------------------------------------------------------- /io/manylinux/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2015 Cloudera Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -eu -o pipefail 17 | set -x 18 | 19 | # Called inside the manylinux1 image 20 | echo "Started $0 $@" 21 | 22 | PIP_DISTS_BUILD_DIR="$1" 23 | GIT_VERSION_TAG="$2" 24 | GITHUB_ACCOUNT="$3" 25 | 26 | PKG_NAME="impyla" 27 | GIT_REPO="impyla" 28 | GIT_URL="https://github.com/${GITHUB_ACCOUNT}/${GIT_REPO}.git" 29 | 30 | WHEELHOUSE_DIR="${PIP_DISTS_BUILD_DIR}/wheelhouse" 31 | SDIST_DIR="${PIP_DISTS_BUILD_DIR}/sdist" 32 | 33 | # krb5-libs & krb5-devel are required by kerberos package 34 | # python27 is required for testing 35 | SYSTEM_REQUIREMENTS=(krb5-libs krb5-devel python27) 36 | 37 | prepare_system() { 38 | # Install system packages required by kerberos. 39 | yum install -y "${SYSTEM_REQUIREMENTS[@]}" 40 | 41 | # Add "krb5-config" to path if necessary 42 | if ! type krb5-config >/dev/null 2>&1; then 43 | export PATH="/usr/kerberos/bin:$PATH" 44 | fi 45 | 46 | cd /tmp 47 | git clone -b "$GIT_VERSION_TAG" --single-branch "$GIT_URL" 48 | cd "$GIT_REPO" 49 | echo "Build directory: $(pwd)" 50 | 51 | # Clean up dists directory 52 | rm -rf "$PIP_DISTS_BUILD_DIR" || true 53 | mkdir -p "$PIP_DISTS_BUILD_DIR" 54 | 55 | echo "Python versions found: $(cd /opt/python && echo cp* | sed -e 's|[^ ]*-||g')" 56 | g++ --version 57 | } 58 | 59 | is_cpython2() { 60 | local pyver_abi="$1" 61 | [[ "$pyver_abi" =~ ^cp2 ]] 62 | } 63 | 64 | build_wheel() { 65 | local pydir="" 66 | local wheel_path="" 67 | for pydir in /opt/python/*; do 68 | # Build universal wheel with python3 69 | local pyver_abi="$(basename $pydir)" 70 | if is_cpython2 "$pyver_abi"; then continue; fi 71 | 72 | echo "Building universal wheel with $(${pydir}/bin/python -V 2>&1)" 73 | "${pydir}/bin/python" setup.py bdist_wheel --universal -d "$WHEELHOUSE_DIR" 74 | wheel_path="$(ls ${WHEELHOUSE_DIR}/*.whl)" 75 | break 76 | done 77 | 78 | if [ -z "wheel_path" ]; then 79 | echo "Failed building wheels. Couldn't find python>=3.0" 80 | exit 1 81 | fi 82 | } 83 | 84 | show_wheel() { 85 | ls -l "${WHEELHOUSE_DIR}/"*.whl 86 | } 87 | 88 | build_sdist() { 89 | local pydir="" 90 | local sdist_path="" 91 | for pydir in /opt/python/*; do 92 | # Build sdist with python3 93 | local pyver_abi="$(basename $pydir)" 94 | if is_cpython2 "$pyver_abi"; then continue; fi 95 | 96 | echo "Building sdist with $(${pydir}/bin/python -V 2>&1)" 97 | "${pydir}/bin/python" setup.py sdist -d "$SDIST_DIR" 98 | sdist_path="$(ls ${SDIST_DIR}/*.tar.gz)" 99 | break 100 | done 101 | 102 | if [ -z "$sdist_path" ]; then 103 | echo "Failed building sdist. Couldn't find python>=3.0" 104 | exit 1 105 | fi 106 | } 107 | 108 | show_sdist() { 109 | ls -l "$SDIST_DIR" 110 | } 111 | 112 | set_up_virt_env() { 113 | local pydir="$1" 114 | local pyver_abi="$(basename $pydir)" 115 | 116 | if is_cpython2 "$pyver_abi"; then 117 | "${pydir}/bin/python" -m virtualenv impyla_test_env 118 | else 119 | "${pydir}/bin/python" -m venv impyla_test_env 120 | fi 121 | 122 | # set -eu must be disabled temporarily for activating the env. 123 | set +e +u 124 | source impyla_test_env/bin/activate 125 | set -eu 126 | } 127 | 128 | tear_down_virt_env() { 129 | # set -eu must be disabled temporarily for deactivating the env. 130 | set +e +u 131 | deactivate 132 | set -eu 133 | 134 | rm -rf impyla_test_env 135 | } 136 | 137 | set_up_virt_env_py27() { 138 | local py27lib="$1" 139 | local py27="$2" 140 | LD_LIBRARY_PATH="$py27lib" "$py27" -m virtualenv impyla_test_env 141 | 142 | # set -eu must be disabled temporarily for activating the env. 143 | set +e +u 144 | source impyla_test_env/bin/activate 145 | set -eu 146 | 147 | LD_LIBRARY_PATH="$py27lib" easy_install -U setuptools 148 | } 149 | 150 | sanity_check() { 151 | cat </tmp/sanity_check.py 152 | import impala.dbapi 153 | EOF 154 | 155 | cd /tmp 156 | 157 | # Install sdist with different python versions and run sanity_check. 158 | local sdistfn="$(ls ${SDIST_DIR}/${PKG_NAME}-*.tar.gz)" 159 | local pydir="" 160 | for pydir in /opt/python/*; do 161 | set_up_virt_env "$pydir" 162 | pip install --no-cache-dir --no-binary "$PKG_NAME" "${sdistfn}[kerberos]" 163 | python /tmp/sanity_check.py 164 | tear_down_virt_env 165 | done 166 | 167 | # Install universal wheel with different python versions and run sanity_check. 168 | local whlfn="$(ls ${WHEELHOUSE_DIR}/${PKG_NAME}-*-py2.py3-none-any.whl)" 169 | for pydir in /opt/python/*; do 170 | set_up_virt_env "$pydir" 171 | pip install --no-cache-dir --only-binary "$PKG_NAME" "${whlfn}[kerberos]" 172 | python /tmp/sanity_check.py 173 | tear_down_virt_env 174 | done 175 | 176 | # Test with separately installed python27 177 | local py27lib="/opt/rh/python27/root/usr/lib64" 178 | local py27="/opt/rh/python27/root/usr/bin/python" 179 | for pkgfn in "$sdistfn" "$whlfn"; do 180 | set_up_virt_env_py27 "$py27lib" "$py27" 181 | LD_LIBRARY_PATH="$py27lib" pip install --no-cache-dir "${pkgfn}[kerberos]" 182 | LD_LIBRARY_PATH="$py27lib" python /tmp/sanity_check.py 183 | tear_down_virt_env 184 | done 185 | } 186 | 187 | prepare_system 188 | 189 | build_wheel 190 | show_wheel 191 | 192 | build_sdist 193 | show_sdist 194 | 195 | sanity_check 196 | -------------------------------------------------------------------------------- /jenkins/README.md: -------------------------------------------------------------------------------- 1 | ## Jenkins testing using the internal Cloudera sandbox environment 2 | 3 | All the `impyla` jobs are prefixed with `impyla-`. 4 | 5 | Testing occurs against either: 6 | 7 | * the `nightly` build of CM/CDH or 8 | 9 | * the stable `bottou` cluster 10 | 11 | The tests run are either: 12 | 13 | * DB API (PEP 249)-only (`run-dbapi.sh`) 14 | 15 | * All tests, including UDF (`run-all.sh`) 16 | 17 | The two main scripts specify the necessary environment variables that must be 18 | set. This also includes testing either HiveServer2 or Beeswax for 19 | connectivity, and using released versus master versions of Numba. 20 | 21 | Finally, the jobs that run against `nightly` will only start if the `nightly` 22 | build succeeds. This is accomplished by creating a dependence on a job called 23 | `golden-nightly-success`, which runs a script like 24 | 25 | ```bash 26 | NIGHTLY_URL="http://golden.jenkins.sf.cloudera.com/job/CM-Master-Refresh-Nightly-Cluster/lastBuild/api/json" 27 | NIGHTLY_STATUS=$(curl -s -L "$NIGHTLY_URL" | $WORKSPACE/jenkins/parse-build-result.py) 28 | if [ "$NIGHTLY_STATUS" != "SUCCESS" ]; then exit 1; fi 29 | ``` 30 | -------------------------------------------------------------------------------- /jenkins/parse-build-result.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # Copyright 2014 Cloudera Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import sys 17 | 18 | try: 19 | import json 20 | except ImportError: 21 | import simplejson as json # pylint: disable=import-error 22 | 23 | data = json.loads(sys.stdin.read()) 24 | sys.stdout.write(data['result'] + '\n') 25 | -------------------------------------------------------------------------------- /jenkins/run-dbapi.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | # Copyright 2014 Cloudera Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -e 17 | set -x 18 | 19 | # Check for necessary environment variables 20 | : ${IMPYLA_TEST_HOST:?"IMPYLA_TEST_HOST is unset"} 21 | : ${IMPYLA_TEST_PORT:?"IMPYLA_TEST_PORT is unset"} 22 | : ${IMPYLA_TEST_AUTH_MECH:?"IMPYLA_TEST_AUTH_MECH is unset"} 23 | : ${PYTHON_VERSION:?"PYTHON_VERSION is unset"} 24 | # the following are set by jenkins and are only needed if WORKSPACE not set 25 | # GIT_URL 26 | # GIT_BRANCH 27 | # and for pulling in a pull request 28 | # GITHUB_PR 29 | # For reporting to codecov.io, set 30 | # CODECOV_TOKEN 31 | # 32 | # If testing against Hive, make sure that the HDFS dir /user/ exists 33 | # for whichever user is running the Hive queries. Otherwise, the MapReduce 34 | # jobs will fail. 35 | 36 | printenv 37 | 38 | mkdir -p /tmp/impyla-dbapi 39 | TMP_DIR=$(mktemp -d -p /tmp/impyla-dbapi tmpXXXX) 40 | 41 | function cleanup_tmp_dir { 42 | cd ~ 43 | rm -rf $TMP_DIR 44 | } 45 | trap cleanup_tmp_dir EXIT 46 | 47 | cd $TMP_DIR 48 | 49 | # checkout impyla if necessary 50 | # this is necessary when run via SSH on a kerberized node 51 | if [ -z "$WORKSPACE" ]; then 52 | : ${GIT_URL:?"GIT_URL is unset"} 53 | : ${GIT_BRANCH:?"GIT_BRANCH is unset"} 54 | git clone $GIT_URL 55 | pushd impyla && git checkout origin/$GIT_BRANCH && popd 56 | IMPYLA_HOME=$TMP_DIR/impyla 57 | else 58 | # WORKSPACE is set, so I must be on a Jenkins slave 59 | IMPYLA_HOME=$WORKSPACE 60 | fi 61 | 62 | # pull in PR if necessary 63 | if [ -z "$WORKSPACE" -a -n "$GITHUB_PR" ]; then 64 | pushd $IMPYLA_HOME 65 | git clean -d -f 66 | git fetch origin pull/$GITHUB_PR/head:pr_$GITHUB_PR 67 | git checkout pr_$GITHUB_PR 68 | popd 69 | fi 70 | 71 | # Setup Python 72 | curl https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh > miniconda.sh 73 | bash miniconda.sh -b -p $TMP_DIR/miniconda 74 | export PATH="$TMP_DIR/miniconda/bin:$PATH" 75 | conda update -y -q conda 76 | conda info -a 77 | 78 | # Install impyla and deps into new environment 79 | CONDA_ENV_NAME=pyenv-impyla-dbapi-test 80 | conda create -y -q -n $CONDA_ENV_NAME python=$PYTHON_VERSION pip 81 | source activate $CONDA_ENV_NAME 82 | pip install sqlalchemy 83 | pip install unittest2 pytest-cov 84 | 85 | # build impyla 86 | pip install $IMPYLA_HOME 87 | 88 | python --version 89 | which python 90 | 91 | if [ $IMPYLA_TEST_AUTH_MECH != "NOSASL" ]; then 92 | # Hive and Kerberos all need sasl installed 93 | sudo yum install -y cyrus-sasl-devel 94 | pip install sasl 95 | fi 96 | 97 | if [ $IMPYLA_TEST_AUTH_MECH = "GSSAPI" -o $IMPYLA_TEST_AUTH_MECH = "LDAP" ]; then 98 | # CLOUDERA INTERNAL JENKINS/KERBEROS CONFIG 99 | # impyla tests create databases, so we need to give systest the requisite 100 | # privileges 101 | kinit -l 4h -kt /cdep/keytabs/hive.keytab hive 102 | sudo -u hive PYTHON_EGG_CACHE=/dev/null impala-shell -k -q "GRANT ALL ON SERVER TO ROLE cdep_default_admin WITH GRANT OPTION" 103 | kdestroy 104 | 105 | function cleanup_sentry_roles { 106 | cleanup_tmp_dir # only one command per trapped signal 107 | kinit -l 4h -kt /cdep/keytabs/hive.keytab hive 108 | sudo -u hive PYTHON_EGG_CACHE=/dev/null impala-shell -k -q "REVOKE ALL ON SERVER FROM ROLE cdep_default_admin" 109 | kdestroy 110 | } 111 | trap cleanup_sentry_roles EXIT 112 | 113 | kinit -l 4h -kt /cdep/keytabs/systest.keytab systest 114 | fi 115 | 116 | python -c "from impala.tests.util import ImpylaTestEnv; print(ImpylaTestEnv())" 117 | 118 | cd $IMPYLA_HOME 119 | 120 | # Run PEP 249 testing suite 121 | py.test --connect \ 122 | --cov impala \ 123 | --cov-report xml --cov-report term \ 124 | --cov-config .coveragerc \ 125 | impala 126 | 127 | # Enforce PEP 8 etc 128 | if [ $PYTHON_VERSION != "2.6" ]; then 129 | pip install prospector[with_pyroma] 130 | prospector 131 | fi 132 | 133 | # Report code coverage to codecov.io 134 | if [ -n $CODECOV_TOKEN ]; then 135 | bash <(curl -s https://codecov.io/bash) -t $CODECOV_TOKEN 136 | fi 137 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Cloudera Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | 17 | import ez_setup 18 | ez_setup.use_setuptools() 19 | 20 | from setuptools import setup, find_packages 21 | 22 | 23 | def readme(): 24 | with open('README.md', 'r') as ip: 25 | return ip.read() 26 | 27 | setup( 28 | name='impyla', 29 | # impala/__init__.py also contains the version - the two should have the same value! 30 | version='v0.21.0', 31 | description='Python client for the Impala distributed query engine', 32 | long_description_content_type='text/markdown', 33 | long_description=readme(), 34 | maintainer='Wes McKinney', 35 | maintainer_email='wes.mckinney@twosigma.com', 36 | author='Uri Laserson', 37 | author_email='laserson@cloudera.com', 38 | url='https://github.com/cloudera/impyla', 39 | packages=find_packages(), 40 | install_package_data=True, 41 | package_data={'impala.thrift': ['*.thrift']}, 42 | install_requires=['six', 'bitarray<3', 'thrift==0.16.0', 'thrift_sasl==0.4.3'], 43 | extras_require={ 44 | "kerberos": ['kerberos>=1.3.0'], 45 | }, 46 | keywords=('cloudera impala python hadoop sql hdfs mpp spark pydata ' 47 | 'pandas distributed db api pep 249 hive hiveserver2 hs2'), 48 | license='Apache License, Version 2.0', 49 | classifiers=[ 50 | 'Programming Language :: Python :: 2', 51 | 'Programming Language :: Python :: 2.7', 52 | 'Programming Language :: Python :: 3', 53 | 'Programming Language :: Python :: 3.6', 54 | 'Programming Language :: Python :: 3.7', 55 | 'Programming Language :: Python :: 3.8', 56 | 'Programming Language :: Python :: 3.9', 57 | 'Programming Language :: Python :: 3.10', 58 | 'Programming Language :: Python :: 3.11', 59 | 'Programming Language :: Python :: 3.12', 60 | 'Programming Language :: Python :: 3.13', 61 | ], 62 | entry_points={ 63 | 'sqlalchemy.dialects': ['impala = impala.sqlalchemy:ImpalaDialect', 64 | 'impala4 = impala.sqlalchemy:Impala4Dialect'] 65 | }, 66 | zip_safe=False) 67 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | requires = 3 | tox>=4 4 | virtualenv<20.22.0 5 | env_list = py{27,36,37,38,39,310,311,312,313} 6 | 7 | [testenv] 8 | description = Run unit tests assuming local impala dev environment 9 | deps = 10 | pytest>=6,<7 11 | sqlalchemy>=2 12 | requests 13 | pandas 14 | setenv = 15 | IMPYLA_TEST_HIVE_PORT = 11050 16 | IMPYLA_TEST_HIVE_USER = hive 17 | commands = 18 | pytest --connect impala/tests {posargs} 19 | 20 | [testenv:py27] 21 | deps = 22 | pytest>=4,<5 23 | sqlalchemy>=1,<2 24 | requests 25 | pandas --------------------------------------------------------------------------------