├── MANIFEST.in ├── yarn_api_client ├── __main__.py ├── errors.py ├── __init__.py ├── auth.py ├── constants.py ├── base.py ├── hadoop_conf.py ├── node_manager.py ├── history_server.py ├── application_master.py ├── main.py └── resource_manager.py ├── docs ├── node_manager.rst ├── base.rst ├── history_server.rst ├── resource_manager.rst ├── application_master.rst ├── index.rst ├── Makefile └── conf.py ├── itests ├── __init__.py └── integration_test_resource_manager.py ├── tests ├── __init__.py ├── test_main.py ├── test_constants.py ├── test_node_manager.py ├── test_base.py ├── test_application_master.py ├── test_history_server.py ├── test_hadoop_conf.py └── test_resource_manager.py ├── requirements.yml ├── .gitattributes ├── tox.ini ├── setup.cfg ├── .gitignore ├── .github └── workflows │ └── build.yml ├── LICENSE ├── Makefile ├── setup.py └── README.md /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md -------------------------------------------------------------------------------- /yarn_api_client/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .main import main 3 | 4 | if __name__ == '__main__': 5 | main() 6 | -------------------------------------------------------------------------------- /docs/node_manager.rst: -------------------------------------------------------------------------------- 1 | NodeManager API's. 2 | ======================= 3 | 4 | .. automodule:: yarn_api_client.node_manager 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/base.rst: -------------------------------------------------------------------------------- 1 | Base Response class 2 | ========================== 3 | 4 | .. autoclass:: yarn_api_client.base.Response 5 | :members: data 6 | -------------------------------------------------------------------------------- /docs/history_server.rst: -------------------------------------------------------------------------------- 1 | History Server API's. 2 | ========================== 3 | 4 | .. automodule:: yarn_api_client.history_server 5 | :members: 6 | -------------------------------------------------------------------------------- /itests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | try: 3 | from unittest2 import TestCase 4 | except ImportError: 5 | from unittest import TestCase 6 | -------------------------------------------------------------------------------- /docs/resource_manager.rst: -------------------------------------------------------------------------------- 1 | ResourceManager API's. 2 | =========================== 3 | 4 | .. automodule:: yarn_api_client.resource_manager 5 | :members: 6 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | try: 3 | from unittest2 import TestCase # NOQA 4 | except ImportError: 5 | from unittest import TestCase # NOQA 6 | -------------------------------------------------------------------------------- /docs/application_master.rst: -------------------------------------------------------------------------------- 1 | MapReduce Application Master API's. 2 | ======================================== 3 | 4 | .. automodule:: yarn_api_client.application_master 5 | :members: 6 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from tests import TestCase 3 | 4 | import yarn_api_client.main as m 5 | 6 | 7 | class MainTestCase(TestCase): 8 | def test_get_parser(self): 9 | m.get_parser() 10 | -------------------------------------------------------------------------------- /yarn_api_client/errors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class APIError(Exception): 5 | pass 6 | 7 | 8 | class ConfigurationError(APIError): 9 | pass 10 | 11 | 12 | class IllegalArgumentError(APIError): 13 | pass 14 | -------------------------------------------------------------------------------- /yarn_api_client/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | __version__ = '1.0.4.dev0' 3 | __all__ = ['ApplicationMaster', 'HistoryServer', 'NodeManager', 'ResourceManager'] 4 | 5 | from .application_master import ApplicationMaster 6 | from .history_server import HistoryServer 7 | from .node_manager import NodeManager 8 | from .resource_manager import ResourceManager 9 | -------------------------------------------------------------------------------- /requirements.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - defaults 4 | dependencies: 5 | - pip 6 | - requests>=2.7,<3.0 7 | 8 | # Test Requirements 9 | - mock 10 | - nose 11 | - tox 12 | - pip: 13 | - requests_mock 14 | 15 | # Code Style 16 | - flake8 17 | 18 | # Documentation Requirements 19 | - recommonmark 20 | - sphinx=1.8.3 21 | - sphinx_rtd_theme 22 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior to have all files normalized to Unix-style 2 | # line endings upon check-in. 3 | * text=auto 4 | # Declare files that will always have CRLF line endings on checkout. 5 | *.bat text eol=crlf 6 | # Denote all files that are truly binary and should not be modified. 7 | *.dll binary 8 | *.exp binary 9 | *.lib binary 10 | *.pdb binary 11 | *.exe binary 12 | 13 | -------------------------------------------------------------------------------- /tests/test_constants.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from tests import TestCase 3 | 4 | from yarn_api_client import constants 5 | 6 | 7 | class ConstantsTestCase(TestCase): 8 | def test_stats_len(self): 9 | self.assertEqual(8, len(constants.YarnApplicationState)) 10 | self.assertEqual(6, len(constants.ApplicationState)) 11 | self.assertEqual(4, len(constants.FinalApplicationStatus)) 12 | self.assertEqual(14, len(constants.JobStateInternal)) 13 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | 2 | [tox] 3 | envlist = py36, py37, py38, py39, py310 4 | 5 | [gh-actions] 6 | python = 7 | 3.6: py36 8 | 3.7: py37 9 | 3.8: py38 10 | 3.9: py39 11 | 3.10: py310 12 | 13 | [testenv] 14 | deps = 15 | coverage 16 | mock 17 | py36: cryptography<=3.2.2 # requests-kerberos pulls in newer crypt that requires rust compiler on 3.6 18 | requests 19 | pywinrm[kerberos] 20 | requests-kerberos 21 | requests_mock 22 | commands = coverage run --source=yarn_api_client setup.py test 23 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=0 3 | 4 | [metadata] 5 | description-file=README.rst 6 | license_file = LICENSE 7 | 8 | [flake8] 9 | # References: 10 | # https://flake8.readthedocs.io/en/latest/user/configuration.html 11 | # https://flake8.readthedocs.io/en/latest/user/error-codes.html 12 | exclude = __init__.py 13 | ignore = 14 | # Import formatting 15 | E4, 16 | # Comparing types instead of isinstance 17 | E721, 18 | # Assigning lambda expression 19 | E731, 20 | # Ambiguous variable names 21 | E741, 22 | # Allow breaks after binary operators 23 | W504 24 | max-line-length = 120 25 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. yarn-api-client documentation master file, created by 2 | sphinx-quickstart on Thu Jul 31 22:07:17 2014. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to yarn-api-client's documentation! 7 | =========================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | base 15 | resource_manager 16 | node_manager 17 | application_master 18 | history_server 19 | 20 | 21 | Indices and tables 22 | ================== 23 | 24 | * :ref:`genindex` 25 | * :ref:`modindex` 26 | * :ref:`search` 27 | 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # -*- mode: gitignore; -*- 2 | *~ 3 | \#*\# 4 | 5 | # Mac 6 | .DS_Store 7 | 8 | # Eclipse 9 | .classpath 10 | .project 11 | .settings/ 12 | target/ 13 | 14 | # Intellij 15 | .idea/ 16 | .idea_modules/ 17 | *.iml 18 | *.iws 19 | *.class 20 | *.log 21 | 22 | # Others 23 | .checkstyle 24 | .fbExcludeFilterFile 25 | 26 | # Byte-compiled / optimized / DLL files 27 | __pycache__/ 28 | *.py[cod] 29 | 30 | # C extensions 31 | *.so 32 | 33 | # Distribution / packaging 34 | .Python 35 | env/ 36 | bin/ 37 | build/ 38 | develop-eggs/ 39 | dist/ 40 | eggs/ 41 | lib/ 42 | lib64/ 43 | parts/ 44 | sdist/ 45 | var/ 46 | *.egg-info/ 47 | .installed.cfg 48 | *.egg 49 | 50 | # Installer logs 51 | pip-log.txt 52 | pip-delete-this-directory.txt 53 | 54 | # Unit test / coverage reports 55 | htmlcov/ 56 | .tox/ 57 | .coverage 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | 62 | # Translations 63 | *.mo 64 | 65 | # Mr Developer 66 | .mr.developer.cfg 67 | .project 68 | .pydevproject 69 | 70 | # Rope 71 | .ropeproject 72 | 73 | # Django stuff: 74 | *.log 75 | *.pot 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | -------------------------------------------------------------------------------- /yarn_api_client/auth.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | class SimpleAuth(requests.auth.AuthBase): 4 | def __init__(self, username="yarn"): 5 | self.username = username 6 | self.auth_token = None 7 | self.auth_done = False 8 | 9 | def __call__(self, request): 10 | if not self.auth_done: 11 | _session = requests.Session() 12 | r = _session.get(request.url, params={"user.name": self.username}, allow_redirects=False) 13 | r.raise_for_status() 14 | 15 | if 'This is standby RM.' not in r.text: 16 | self.auth_token = _session.cookies.get_dict()['hadoop.auth'] 17 | self.auth_done = True 18 | 19 | # Borrowed from https://github.com/psf/requests/issues/2532#issuecomment-90126896 20 | if 'Cookie' in request.headers: 21 | old_cookies = request.headers['Cookie'] 22 | all_cookies = '; '.join([old_cookies, "{0}={1}".format("hadoop.auth", self.auth_token)]) 23 | request.headers['Cookie'] = all_cookies 24 | else: 25 | request.prepare_cookies({"hadoop.auth": self.auth_token}) 26 | return request 27 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Builds 2 | on: 3 | push: 4 | branches: '*' 5 | pull_request: 6 | branches: '*' 7 | 8 | jobs: 9 | build: 10 | runs-on: ${{ matrix.os }} 11 | env: 12 | TOX_ENV: py${{ matrix.python-version}} 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | os: [ubuntu-latest, macos-latest] 17 | python-version: [ '3.6', '3.7', '3.8', '3.9' ] 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@v2 21 | with: 22 | clean: true 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v1 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | architecture: 'x64' 28 | - name: Install Dependencies (Linux) 29 | run: sudo apt-get install libkrb5-dev 30 | if: matrix.os == 'ubuntu-latest' 31 | - name: Install dependencies 32 | run: | 33 | pip install --upgrade setuptools pip tox tox-gh-actions coveralls 34 | pip freeze 35 | - name: Run the tests 36 | run: tox 37 | - name: Upload coverage data to coveralls.io 38 | env: 39 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 40 | COVERALLS_PARALLEL: true 41 | run: coveralls --service=github 42 | - name: Code coverage 43 | env: 44 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 45 | run: | 46 | coveralls --finish 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Eduard Iskandarov 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the {organization} nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-docs clean-test clean-pyc clean-build dist docs help 2 | .DEFAULT_GOAL := help 3 | 4 | SA:=source activate 5 | ENV:=hadoop-yarn-api-python-client 6 | 7 | help: 8 | @grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 9 | 10 | 11 | ## Setup conda environments 12 | env: ## Make a dev environment 13 | -conda env create --file requirements.yml --name $(ENV) 14 | 15 | activate: ## Activate the virtualenv (default: hadoop-yarn-api-python-client) 16 | @echo "$(SA) $(ENV)" 17 | 18 | nuke: ## Make clean + remove conda env 19 | -conda env remove -n $(ENV) -y 20 | 21 | ## Clean different build artifacts from multiple build phases 22 | 23 | clean: clean-build clean-pyc clean-test clean-docs ## remove all build, test, coverage and Python artifacts 24 | 25 | clean-build: 26 | rm -fr build/ 27 | rm -fr dist/ 28 | rm -fr .eggs/ 29 | find . -name '*.egg-info' -exec rm -fr {} + 30 | find . -name '*.egg' -exec rm -f {} + 31 | 32 | clean-pyc: 33 | find . -name '*.pyc' -exec rm -f {} + 34 | find . -name '*.pyo' -exec rm -f {} + 35 | find . -name '*~' -exec rm -f {} + 36 | find . -name '__pycache__' -exec rm -fr {} + 37 | 38 | clean-test: 39 | rm -fr .tox/ 40 | rm -f .coverage 41 | rm -fr htmlcov/ 42 | rm -fr .pytest_cache 43 | 44 | clean-docs: 45 | $(MAKE) -C docs clean 46 | 47 | lint: ## check style with flake8 48 | $(SA) $(ENV) && flake8 yarn-api-client itests tests 49 | 50 | test: ## run tests quickly with the default Python 51 | $(SA) $(ENV) && nosetests -v tests 52 | 53 | docs: clean-docs ## generate Sphinx HTML documentation, including API docs 54 | $(SA) $(ENV) && $(MAKE) -C docs html 55 | 56 | release: dist ## package and upload a release 57 | twine upload dist/* 58 | 59 | dist: clean ## builds source and wheel package 60 | $(SA) $(ENV) && python setup.py bdist_wheel 61 | $(SA) $(ENV) && python setup.py sdist 62 | ls -l dist 63 | 64 | install: clean ## install the package to the active Python's site-packages 65 | $(SA) $(ENV) && python setup.py install 66 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import codecs 3 | import os 4 | import re 5 | from setuptools import setup, find_packages 6 | 7 | 8 | def read(*parts): 9 | filename = os.path.join(os.path.dirname(__file__), *parts) 10 | with codecs.open(filename, encoding='utf-8') as fp: 11 | return fp.read() 12 | 13 | 14 | def find_version(*file_paths): 15 | version_file = read(*file_paths) 16 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", 17 | version_file, re.M) 18 | if version_match: 19 | return version_match.group(1) 20 | raise RuntimeError("Unable to find version string.") 21 | 22 | setup( 23 | name = 'yarn-api-client', 24 | version = find_version('yarn_api_client', '__init__.py'), 25 | description='Python client for Hadoop® YARN API', 26 | long_description=read('README.md'), 27 | long_description_content_type='text/markdown', 28 | packages = find_packages(exclude=['tests','itests']), 29 | 30 | install_requires = [ 31 | 'requests>=2.7,<3.0', 32 | ], 33 | 34 | entry_points = { 35 | 'console_scripts': [ 36 | 'yarn_client = yarn_api_client.main:main', 37 | ], 38 | }, 39 | 40 | tests_require = ['mock', 'flake8'], 41 | test_suite = 'tests', 42 | 43 | author = 'Iskandarov Eduard', 44 | author_email = 'eduard.iskandarov@ya.ru', 45 | maintainer = 'Dmitry Romanenko', 46 | maintainer_email = 'dmitry@romanenko.in', 47 | license = 'BSD', 48 | url = 'https://github.com/CODAIT/hadoop-yarn-api-python-client', 49 | classifiers = [ 50 | 'Intended Audience :: Developers', 51 | 'License :: OSI Approved :: BSD License', 52 | 'Operating System :: OS Independent', 53 | 'Programming Language :: Python :: 3.6', 54 | 'Programming Language :: Python :: 3.7', 55 | 'Programming Language :: Python :: 3.8', 56 | 'Programming Language :: Python :: 3.9', 57 | 'Programming Language :: Python :: 3.10', 58 | 'Topic :: System :: Distributed Computing', 59 | ], 60 | ) 61 | -------------------------------------------------------------------------------- /yarn_api_client/constants.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ACCEPTED = 'ACCEPTED' 3 | FAILED = 'FAILED' 4 | FINISHED = 'FINISHED' 5 | KILLED = 'KILLED' 6 | NEW = 'NEW' 7 | NEW_SAVING = 'NEW_SAVING' 8 | RUNNING = 'RUNNING' 9 | SUBMITTED = 'SUBMITTED' 10 | SUCCEEDED = 'SUCCEEDED' 11 | UNDEFINED = 'UNDEFINED' 12 | INITING = 'INITING' 13 | INITED = 'INITED' 14 | FINISHING_CONTAINERS_WAIT = 'FINISHING_CONTAINERS_WAIT' 15 | APPLICATION_RESOURCES_CLEANINGUP = 'APPLICATION_RESOURCES_CLEANINGUP' 16 | SETUP = 'SETUP' 17 | COMMITTING = 'COMMITTING' 18 | FAIL_WAIT = 'FAIL_WAIT' 19 | FAIL_ABORT = 'FAIL_ABORT' 20 | KILL_WAIT = 'KILL_WAIT' 21 | KILL_ABORT = 'KILL_ABORT' 22 | ERROR = 'ERROR' 23 | REBOOT = 'REBOOT' 24 | OUTPUT_THREAD_DUMP = 'OUTPUT_THREAD_DUMP' 25 | GRACEFUL_SHUTDOWN = 'GRACEFUL_SHUTDOWN' 26 | FORCEFUL_SHUTDOWN = 'FORCEFUL_SHUTDOWN' 27 | 28 | YarnApplicationState = ( 29 | (ACCEPTED, 'Application has been accepted by the scheduler.'), 30 | (FAILED, 'Application which failed.'), 31 | (FINISHED, 'Application which finished successfully.'), 32 | (KILLED, 'Application which was terminated by a user or admin.'), 33 | (NEW, 'Application which was just created.'), 34 | (NEW_SAVING, 'Application which is being saved.'), 35 | (RUNNING, 'Application which is currently running.'), 36 | (SUBMITTED, 'Application which has been submitted.'), 37 | ) 38 | 39 | 40 | ApplicationState = ( 41 | (NEW, NEW), 42 | (INITING, INITING), 43 | (RUNNING, RUNNING), 44 | (FINISHING_CONTAINERS_WAIT, FINISHING_CONTAINERS_WAIT), 45 | (APPLICATION_RESOURCES_CLEANINGUP, APPLICATION_RESOURCES_CLEANINGUP), 46 | (FINISHED, FINISHED), 47 | ) 48 | 49 | 50 | FinalApplicationStatus = ( 51 | (FAILED, 'Application which failed.'), 52 | (KILLED, 'Application which was terminated by a user or admin.'), 53 | (SUCCEEDED, 'Application which finished successfully.'), 54 | (UNDEFINED, 'Undefined state when either the application has not yet finished.') 55 | ) 56 | 57 | 58 | JobStateInternal = ( 59 | (NEW, NEW), 60 | (SETUP, SETUP), 61 | (INITED, INITED), 62 | (RUNNING, RUNNING), 63 | (COMMITTING, COMMITTING), 64 | (SUCCEEDED, SUCCEEDED), 65 | (FAIL_WAIT, FAIL_WAIT), 66 | (FAIL_ABORT, FAIL_ABORT), 67 | (FAILED, FAILED), 68 | (KILL_WAIT, KILL_WAIT), 69 | (KILL_ABORT, KILL_ABORT), 70 | (KILLED, KILLED), 71 | (ERROR, ERROR), 72 | (REBOOT, REBOOT), 73 | ) 74 | 75 | ClusterContainerSignal = ( 76 | (OUTPUT_THREAD_DUMP, OUTPUT_THREAD_DUMP), 77 | (GRACEFUL_SHUTDOWN, GRACEFUL_SHUTDOWN), 78 | (FORCEFUL_SHUTDOWN, FORCEFUL_SHUTDOWN) 79 | ) 80 | -------------------------------------------------------------------------------- /tests/test_node_manager.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from mock import patch 3 | from tests import TestCase 4 | 5 | from yarn_api_client.node_manager import NodeManager 6 | from yarn_api_client.errors import IllegalArgumentError 7 | 8 | 9 | @patch('yarn_api_client.node_manager.NodeManager.request') 10 | class NodeManagerTestCase(TestCase): 11 | def setUp(self): 12 | self.nm = NodeManager('localhost') 13 | 14 | def test_node_information(self, request_mock): 15 | self.nm.node_information() 16 | request_mock.assert_called_with('/ws/v1/node/info') 17 | 18 | def test_node_applications(self, request_mock): 19 | self.nm.node_applications('RUNNING', 'root') 20 | request_mock.assert_called_with('/ws/v1/node/apps', 21 | params={"state": 'RUNNING', "user": 'root'}) 22 | 23 | self.nm.node_applications() 24 | request_mock.assert_called_with('/ws/v1/node/apps', params={}) 25 | 26 | with self.assertRaises(IllegalArgumentError): 27 | self.nm.node_applications('ololo', 'root') 28 | 29 | def test_node_application(self, request_mock): 30 | self.nm.node_application('app_1') 31 | request_mock.assert_called_with('/ws/v1/node/apps/app_1') 32 | 33 | def test_node_containers(self, request_mock): 34 | self.nm.node_containers() 35 | request_mock.assert_called_with('/ws/v1/node/containers') 36 | 37 | def test_node_container(self, request_mock): 38 | self.nm.node_container('container_1') 39 | request_mock.assert_called_with('/ws/v1/node/containers/container_1') 40 | 41 | def test_auxiliary_services(self, request_mock): 42 | self.nm.auxiliary_services() 43 | request_mock.assert_called_with('/ws/v1/node/auxiliaryservices') 44 | 45 | def test_auxiliary_services_update(self, request_mock): 46 | self.nm.auxiliary_services_update({ 47 | "services": [ 48 | { 49 | "name": "mapreduce_shuffle", 50 | "version": "2", 51 | "configuration": { 52 | "properties": { 53 | "class.name": "org.apache.hadoop.mapred.ShuffleHandler", 54 | "mapreduce.shuffle.transfer.buffer.size": "102400", 55 | "mapreduce.shuffle.port": "13563" 56 | } 57 | } 58 | } 59 | ] 60 | }) 61 | request_mock.assert_called_with('/ws/v1/node/auxiliaryservices', 'PUT', json={ 62 | "services": [ 63 | { 64 | "name": "mapreduce_shuffle", 65 | "version": "2", 66 | "configuration": { 67 | "properties": { 68 | "class.name": "org.apache.hadoop.mapred.ShuffleHandler", 69 | "mapreduce.shuffle.transfer.buffer.size": "102400", 70 | "mapreduce.shuffle.port": "13563" 71 | } 72 | } 73 | } 74 | ] 75 | }) 76 | -------------------------------------------------------------------------------- /itests/integration_test_resource_manager.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | 5 | from pprint import pprint 6 | from unittest import TestCase 7 | from urllib.parse import urlparse 8 | from yarn_api_client.resource_manager import ResourceManager 9 | 10 | 11 | class ResourceManagerTestCase(TestCase): 12 | """ 13 | Integration test that, given a provided YARN ENDPOINT, 14 | execute some real scenario test against that server. 15 | 16 | Note that, if no YARN ENDPOINT is provided, the tests 17 | are ignored. 18 | """ 19 | @classmethod 20 | def setUpClass(self): 21 | self.configured = False 22 | if os.getenv('YARN_ENDPOINT'): 23 | yarn_endpoint = os.getenv('YARN_ENDPOINT') 24 | yarn_endpoint_uri = urlparse(yarn_endpoint) 25 | 26 | if yarn_endpoint_uri.hostname and yarn_endpoint_uri.port: 27 | self.configured = True 28 | self.resource_manager = ResourceManager([yarn_endpoint_uri.hostname + ":" + 29 | str(yarn_endpoint_uri.port)]) 30 | 31 | def test_cluster_information(self): 32 | if self.configured: 33 | info = self.resource_manager.cluster_information() 34 | pprint(info.data) 35 | self.assertEqual(info.data['clusterInfo']['state'], 'STARTED') 36 | 37 | def test_cluster_metrics(self): 38 | if self.configured: 39 | metrics = self.resource_manager.cluster_metrics() 40 | pprint(metrics.data) 41 | self.assertGreater(metrics.data['clusterMetrics']['activeNodes'], 0) 42 | self.assertIsNotNone(metrics.data['clusterMetrics']['totalNodes']) 43 | 44 | def test_cluster_scheduler(self): 45 | if self.configured: 46 | scheduler = self.resource_manager.cluster_scheduler() 47 | pprint(scheduler.data) 48 | self.assertIsNotNone(scheduler.data['scheduler']['schedulerInfo']) 49 | 50 | def test_cluster_applications(self): 51 | if self.configured: 52 | apps = self.resource_manager.cluster_applications() 53 | pprint(apps.data) 54 | self.assertIsNotNone(apps.data['apps']) 55 | 56 | def test_cluster_application_state(self): 57 | if self.configured: 58 | apps = self.resource_manager.cluster_applications() 59 | appid = apps.data['apps']['app'][0]['id'] 60 | print(appid) 61 | response = self.resource_manager.cluster_application_state(appid) 62 | pprint(response.data) 63 | pprint(response.data['state']) 64 | self.assertIsNotNone(apps.data['apps']) 65 | 66 | def test_cluster_application_statistics(self): 67 | if self.configured: 68 | appstats = self.resource_manager.cluster_application_statistics() 69 | pprint(appstats.data) 70 | self.assertIsNotNone(appstats.data['appStatInfo']) 71 | 72 | def test_cluster_nodes(self): 73 | if self.configured: 74 | nodes = self.resource_manager.cluster_nodes() 75 | pprint(nodes.data) 76 | self.assertIsNotNone(nodes.data['nodes']) 77 | 78 | running_nodes = self.resource_manager.cluster_nodes(state='RUNNING', healthy='true') 79 | pprint(running_nodes.data) 80 | self.assertIsNotNone(nodes.data['nodes']) 81 | -------------------------------------------------------------------------------- /tests/test_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import requests_mock 4 | 5 | from tests import TestCase 6 | from yarn_api_client import base 7 | from yarn_api_client.errors import APIError, ConfigurationError 8 | 9 | 10 | class BaseYarnAPITestCase(TestCase): 11 | @staticmethod 12 | def success_response(): 13 | return { 14 | 'status': 'success' 15 | } 16 | 17 | def test_valid_request(self): 18 | with requests_mock.mock() as requests_get_mock: 19 | requests_get_mock.get('/ololo', text=json.dumps(BaseYarnAPITestCase.success_response())) 20 | 21 | client = self.get_client() 22 | response = client.request('/ololo', params={"foo": 'bar'}) 23 | 24 | assert requests_get_mock.called 25 | self.assertIn(response.data['status'], 'success') 26 | 27 | def test_valid_request_with_parameters(self): 28 | with requests_mock.mock() as requests_get_mock: 29 | requests_get_mock.get('/ololo?foo=bar', text=json.dumps(BaseYarnAPITestCase.success_response())) 30 | 31 | client = self.get_client() 32 | response = client.request('/ololo', params={"foo": 'bar'}) 33 | 34 | assert requests_get_mock.called 35 | self.assertIn(response.data['status'], 'success') 36 | 37 | def test_bad_request(self): 38 | with requests_mock.mock() as requests_get_mock: 39 | requests_get_mock.get('/ololo', status_code=404) 40 | 41 | client = self.get_client() 42 | with self.assertRaises(APIError): 43 | client.request('/ololo') 44 | 45 | def test_http_configuration(self): 46 | with requests_mock.mock() as requests_get_mock: 47 | requests_get_mock.get('/ololo', text=json.dumps(BaseYarnAPITestCase.success_response())) 48 | 49 | client = self.get_client() 50 | client.service_uri = None 51 | 52 | with self.assertRaises(ConfigurationError): 53 | client.request('/ololo') 54 | 55 | def test_uri_parsing(self): 56 | result_uri = base.Uri('localhost') 57 | self.assertEqual(result_uri.scheme, 'http') 58 | self.assertEqual(result_uri.hostname, 'localhost') 59 | self.assertEqual(result_uri.port, None) 60 | self.assertEqual(result_uri.is_https, False) 61 | 62 | result_uri = base.Uri('test-domain.com:1234') 63 | self.assertEqual(result_uri.scheme, 'http') 64 | self.assertEqual(result_uri.hostname, 'test-domain.com') 65 | self.assertEqual(result_uri.port, 1234) 66 | self.assertEqual(result_uri.is_https, False) 67 | 68 | result_uri = base.Uri('http://123.45.67.89:1234') 69 | self.assertEqual(result_uri.scheme, 'http') 70 | self.assertEqual(result_uri.hostname, '123.45.67.89') 71 | self.assertEqual(result_uri.port, 1234) 72 | self.assertEqual(result_uri.is_https, False) 73 | 74 | result_uri = base.Uri('https://test-domain.com:1234') 75 | self.assertEqual(result_uri.scheme, 'https') 76 | self.assertEqual(result_uri.hostname, 'test-domain.com') 77 | self.assertEqual(result_uri.port, 1234) 78 | self.assertEqual(result_uri.is_https, True) 79 | 80 | def get_client(self): 81 | client = base.BaseYarnAPI() 82 | client.service_uri = base.Uri('example.com:80') 83 | client.timeout = 0 84 | client.auth = None 85 | client.verify = True 86 | return client 87 | -------------------------------------------------------------------------------- /tests/test_application_master.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from mock import patch 3 | from tests import TestCase 4 | 5 | from yarn_api_client.application_master import ApplicationMaster 6 | 7 | 8 | @patch('yarn_api_client.application_master.ApplicationMaster.request') 9 | class AppMasterTestCase(TestCase): 10 | def setUp(self): 11 | self.app = ApplicationMaster('localhost') 12 | 13 | @patch('yarn_api_client.application_master.get_webproxy_endpoint') 14 | def test__init__(self, get_config_mock, request_mock): 15 | get_config_mock.return_value = None 16 | ApplicationMaster() 17 | get_config_mock.assert_called_with(30, None, True, None) 18 | 19 | def test_application_information(self, request_mock): 20 | self.app.application_information('app_100500') 21 | request_mock.assert_called_with('/proxy/app_100500/ws/v1/mapreduce/info') 22 | 23 | def test_jobs(self, request_mock): 24 | self.app.jobs('app_100500') 25 | request_mock.assert_called_with('/proxy/app_100500/ws/v1/mapreduce/jobs') 26 | 27 | def test_job(self, request_mock): 28 | self.app.job('app_100500', 'job_100500') 29 | request_mock.assert_called_with('/proxy/app_100500/ws/v1/mapreduce/jobs/job_100500') 30 | 31 | def test_job_attempts(self, request_mock): 32 | self.app.job_attempts('app_1', 'job_2') 33 | 34 | def test_job_counters(self, request_mock): 35 | self.app.job_counters('app_1', 'job_2') 36 | request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/counters') 37 | 38 | def test_job_conf(self, request_mock): 39 | self.app.job_conf('app_1', 'job_2') 40 | request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/conf') 41 | 42 | def test_job_tasks(self, request_mock): 43 | self.app.job_tasks('app_1', 'job_2') 44 | request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks') 45 | 46 | def test_job_task(self, request_mock): 47 | self.app.job_task('app_1', 'job_2', 'task_3') 48 | request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3') 49 | 50 | def test_task_counters(self, request_mock): 51 | self.app.task_counters('app_1', 'job_2', 'task_3') 52 | request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/counters') 53 | 54 | def test_task_attempts(self, request_mock): 55 | self.app.task_attempts('app_1', 'job_2', 'task_3') 56 | request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/attempts') 57 | 58 | def test_task_attempt(self, request_mock): 59 | self.app.task_attempt('app_1', 'job_2', 'task_3', 'attempt_4') 60 | request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4') 61 | 62 | def test_task_attempt_state(self, request_mock): 63 | self.app.task_attempt_state('app_1', 'job_2', 'task_3', 'attempt_4') 64 | request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4/state') 65 | 66 | def test_task_attempt_state_kill(self, request_mock): 67 | self.app.task_attempt_state_kill('app_1', 'job_2', 'task_3', 'attempt_4') 68 | request_mock.assert_called_with( 69 | '/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4/state', 70 | 'PUT', json={'state': 'KILLED'} 71 | ) 72 | 73 | def test_task_attempt_counters(self, request_mock): 74 | self.app.task_attempt_counters('app_1', 'job_2', 'task_3', 'attempt_4') 75 | request_mock.assert_called_with( 76 | '/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4/counters' 77 | ) 78 | -------------------------------------------------------------------------------- /yarn_api_client/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | import logging 5 | import os 6 | import requests 7 | 8 | from datetime import datetime 9 | from urllib.parse import urlparse, urlunparse 10 | 11 | from .errors import APIError, ConfigurationError 12 | 13 | 14 | def get_logger(logger_name): 15 | logger = logging.getLogger(logger_name) 16 | return logger 17 | 18 | 19 | log = get_logger(__name__) 20 | 21 | 22 | class Response(object): 23 | """ 24 | Basic container for response dictionary 25 | 26 | :param requests.Response response: Response for call via requests lib 27 | """ 28 | def __init__(self, response): 29 | #: Dictionary with response data. Handle cases where content is empty 30 | # to prevent JSON decode issues 31 | if response.content: 32 | self.data = response.json() 33 | else: 34 | self.data = {} 35 | 36 | 37 | class Uri(object): 38 | def __init__(self, service_endpoint): 39 | if not (service_endpoint.startswith("http://") or service_endpoint.startswith("https://")): 40 | service_endpoint = "http://" + service_endpoint 41 | 42 | service_uri = urlparse(service_endpoint) 43 | self.scheme = service_uri.scheme or 'http' 44 | self.hostname = service_uri.hostname or service_uri.path 45 | self.port = service_uri.port 46 | self.is_https = service_uri.scheme == 'https' or False 47 | 48 | def to_url(self, api_path=None): 49 | path = api_path or '' 50 | if self.port: 51 | result_url = urlunparse((self.scheme, self.hostname + ":" + str(self.port), path, None, None, None)) 52 | else: 53 | result_url = urlunparse((self.scheme, self.hostname, path, None, None, None)) 54 | 55 | return result_url 56 | 57 | 58 | class BaseYarnAPI(object): 59 | response_class = Response 60 | 61 | def __init__(self, service_endpoint=None, timeout=None, auth=None, verify=True, proxies=None): 62 | self.timeout = timeout 63 | 64 | if service_endpoint: 65 | self.service_uri = Uri(service_endpoint) 66 | else: 67 | self.service_uri = None 68 | 69 | self.session = requests.Session() 70 | self.session.auth = auth 71 | self.session.verify = verify 72 | self.session.proxies = proxies 73 | 74 | def _validate_configuration(self): 75 | if not self.service_uri: 76 | raise ConfigurationError('API endpoint is not set') 77 | 78 | def request(self, api_path, method='GET', **kwargs): 79 | self._validate_configuration() 80 | api_endpoint = self.service_uri.to_url(api_path) 81 | 82 | if method == 'GET': 83 | headers = {} 84 | else: 85 | headers = {"Content-Type": "application/json"} 86 | 87 | if 'headers' in kwargs and kwargs['headers']: 88 | headers.update(kwargs['headers']) 89 | 90 | begin = datetime.now() 91 | response = self.session.request(method=method, url=api_endpoint, headers=headers, timeout=self.timeout, **kwargs) 92 | end = datetime.now() 93 | log.debug( 94 | "'{method}' request against endpoint '{endpoint}' took {duration} ms".format( 95 | method=method, 96 | endpoint=api_endpoint, 97 | duration=round((end-begin).total_seconds()*1000,3) 98 | ) 99 | ) 100 | 101 | if response.status_code in (200, 202): 102 | return self.response_class(response) 103 | else: 104 | msg = "Response finished with status: {status}. Details: {msg}".format( 105 | status=response.status_code, 106 | msg=response.text 107 | ) 108 | raise APIError(msg) 109 | 110 | def construct_parameters(self, arguments): 111 | params = dict((key, value) for key, value in arguments if value is not None) 112 | return params 113 | -------------------------------------------------------------------------------- /tests/test_history_server.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from mock import patch 3 | from tests import TestCase 4 | 5 | from yarn_api_client.history_server import HistoryServer 6 | from yarn_api_client.errors import IllegalArgumentError 7 | 8 | 9 | @patch('yarn_api_client.history_server.HistoryServer.request') 10 | class HistoryServerTestCase(TestCase): 11 | def setUp(self): 12 | self.hs = HistoryServer('localhost') 13 | 14 | @patch('yarn_api_client.history_server.get_jobhistory_endpoint') 15 | def test__init__(self, get_config_mock, request_mock): 16 | get_config_mock.return_value = None 17 | HistoryServer() 18 | get_config_mock.assert_called_with() 19 | 20 | def test_application_information(self, request_mock): 21 | self.hs.application_information() 22 | request_mock.assert_called_with('/ws/v1/history/info') 23 | 24 | def test_jobs(self, request_mock): 25 | self.hs.jobs() 26 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs', params={}) 27 | 28 | self.hs.jobs(state='NEW', user='root', queue='high', limit=100, 29 | started_time_begin=1, started_time_end=2, 30 | finished_time_begin=3, finished_time_end=4) 31 | 32 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs', 33 | params={"queue": 'high', 34 | "state": 'NEW', 35 | "user": 'root', 36 | "limit": 100, 37 | "startedTimeBegin": 1, 38 | "startedTimeEnd": 2, 39 | "finishedTimeBegin": 3, 40 | "finishedTimeEnd": 4}) 41 | 42 | with self.assertRaises(IllegalArgumentError): 43 | self.hs.jobs(state='ololo') 44 | 45 | def test_job(self, request_mock): 46 | self.hs.job('job_100500') 47 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_100500') 48 | 49 | def test_job_attempts(self, request_mock): 50 | self.hs.job_attempts('job_1') 51 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_1/jobattempts') 52 | 53 | def test_job_counters(self, request_mock): 54 | self.hs.job_counters('job_2') 55 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/counters') 56 | 57 | def test_job_conf(self, request_mock): 58 | self.hs.job_conf('job_2') 59 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/conf') 60 | 61 | def test_job_tasks(self, request_mock): 62 | self.hs.job_tasks('job_2') 63 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks', params={}) 64 | self.hs.job_tasks('job_2', job_type='m') 65 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks', params={"type": 'm'}) 66 | 67 | with self.assertRaises(IllegalArgumentError): 68 | self.hs.job_tasks('job_2', job_type='ololo') 69 | 70 | def test_job_task(self, request_mock): 71 | self.hs.job_task('job_2', 'task_3') 72 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks/task_3') 73 | 74 | def test_task_counters(self, request_mock): 75 | self.hs.task_counters('job_2', 'task_3') 76 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks/task_3/counters') 77 | 78 | def test_task_attempts(self, request_mock): 79 | self.hs.task_attempts('job_2', 'task_3') 80 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks/task_3/attempts') 81 | 82 | def test_task_attempt(self, request_mock): 83 | self.hs.task_attempt('job_2', 'task_3', 'attempt_4') 84 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4') 85 | 86 | def test_task_attempt_counters(self, request_mock): 87 | self.hs.task_attempt_counters('job_2', 'task_3', 'attempt_4') 88 | request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4/counters') 89 | -------------------------------------------------------------------------------- /yarn_api_client/hadoop_conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import xml.etree.ElementTree as ET 4 | import requests 5 | 6 | from .base import get_logger 7 | 8 | log = get_logger(__name__) 9 | 10 | CONF_DIR = os.getenv('YARN_CONF_DIR', os.getenv('HADOOP_CONF_DIR', '/etc/hadoop/conf')) 11 | 12 | 13 | def _get_rm_ids(hadoop_conf_path): 14 | rm_ids = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), 'yarn.resourcemanager.ha.rm-ids') 15 | if rm_ids is not None: 16 | rm_ids = rm_ids.split(',') 17 | return rm_ids 18 | 19 | 20 | def _get_maximum_container_memory(hadoop_conf_path): 21 | container_memory = int(parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), 22 | 'yarn.nodemanager.resource.memory-mb')) 23 | return container_memory 24 | 25 | 26 | def _is_https_only(): 27 | # determine if HTTPS_ONLY is the configured policy, else use http 28 | hadoop_conf_path = CONF_DIR 29 | http_policy = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), 'yarn.http.policy') 30 | if http_policy == 'HTTPS_ONLY': 31 | return True 32 | return False 33 | 34 | 35 | def _get_resource_manager(hadoop_conf_path, rm_id=None): 36 | # compose property name based on policy (and rm_id) 37 | is_https_only = _is_https_only() 38 | 39 | if is_https_only: 40 | prop_name = 'yarn.resourcemanager.webapp.https.address' 41 | else: 42 | prop_name = 'yarn.resourcemanager.webapp.address' 43 | 44 | # Adjust prop_name if rm_id is set 45 | if rm_id: 46 | prop_name = "{name}.{rm_id}".format(name=prop_name, rm_id=rm_id) 47 | 48 | rm_address = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), prop_name) 49 | 50 | return ('https://' if is_https_only else 'http://') + rm_address if rm_address else None 51 | 52 | 53 | def check_is_active_rm(url, timeout=30, auth=None, verify=True, proxies=None): 54 | try: 55 | response = requests.get(url + "/cluster", timeout=timeout, auth=auth, verify=verify, proxies=proxies) 56 | except requests.RequestException as e: 57 | log.warning("Exception encountered accessing RM '{url}': '{err}', continuing...".format(url=url, err=e)) 58 | return False 59 | 60 | if response.status_code != 200: 61 | log.warning("Failed to access RM '{url}' - HTTP Code '{status}', continuing...".format(url=url, status=response.status_code)) 62 | return False 63 | else: 64 | return True 65 | 66 | 67 | def get_resource_manager_endpoint(timeout=30, auth=None, verify=True, proxies=None): 68 | log.info('Getting resource manager endpoint from config: {config_path}'.format(config_path=os.path.join(CONF_DIR, 'yarn-site.xml'))) 69 | hadoop_conf_path = CONF_DIR 70 | rm_ids = _get_rm_ids(hadoop_conf_path) 71 | if rm_ids: 72 | for rm_id in rm_ids: 73 | ret = _get_resource_manager(hadoop_conf_path, rm_id) 74 | if ret: 75 | if check_is_active_rm(ret, timeout, auth, verify, proxies): 76 | return ret 77 | return None 78 | else: 79 | return _get_resource_manager(hadoop_conf_path, None) 80 | 81 | 82 | def get_jobhistory_endpoint(): 83 | config_path = os.path.join(CONF_DIR, 'mapred-site.xml') 84 | log.info('Getting jobhistory endpoint from config: {config_path}'.format(config_path=config_path)) 85 | prop_name = 'mapreduce.jobhistory.webapp.address' 86 | return parse(config_path, prop_name) 87 | 88 | 89 | def get_nodemanager_endpoint(): 90 | config_path = os.path.join(CONF_DIR, 'yarn-site.xml') 91 | log.info('Getting nodemanager endpoint from config: {config_path}'.format(config_path=config_path)) 92 | prop_name = 'yarn.nodemanager.webapp.address' 93 | return parse(config_path, prop_name) 94 | 95 | 96 | def get_webproxy_endpoint(timeout=30, auth=None, verify=True, proxies=None): 97 | config_path = os.path.join(CONF_DIR, 'yarn-site.xml') 98 | log.info('Getting webproxy endpoint from config: {config_path}'.format(config_path=config_path)) 99 | prop_name = 'yarn.web-proxy.address' 100 | value = parse(config_path, prop_name) 101 | return value or get_resource_manager_endpoint(timeout, auth, verify, proxies) 102 | 103 | 104 | def parse(config_path, key): 105 | tree = ET.parse(config_path) 106 | root = tree.getroot() 107 | # Construct list with profit values 108 | ph1 = [dict((el.tag, el.text) for el in p) for p in root.findall('./property')] 109 | # Construct dict with property key values 110 | ph2 = dict((obj['name'], obj['value']) for obj in ph1) 111 | 112 | value = ph2.get(key, None) 113 | return value 114 | -------------------------------------------------------------------------------- /yarn_api_client/node_manager.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .base import BaseYarnAPI, get_logger 3 | from .constants import ApplicationState 4 | from .errors import IllegalArgumentError 5 | from .hadoop_conf import get_nodemanager_endpoint 6 | 7 | log = get_logger(__name__) 8 | 9 | LEGAL_APPLICATION_STATES = {s for s, _ in ApplicationState} 10 | 11 | 12 | def validate_application_state(state, required=False): 13 | if state: 14 | if state not in LEGAL_APPLICATION_STATES: 15 | msg = 'Application State %s is illegal' % (state,) 16 | raise IllegalArgumentError(msg) 17 | else: 18 | if required: 19 | msg = "state argument is required to be provided" 20 | raise IllegalArgumentError(msg) 21 | 22 | 23 | class NodeManager(BaseYarnAPI): 24 | """ 25 | The NodeManager REST API's allow the user to get status on the node and 26 | information about applications and containers running on that node. 27 | 28 | If `service_endpoint` argument is `None` client will try to extract it from 29 | Hadoop configuration files. 30 | 31 | :param str service_endpoint: NodeManager HTTP(S) address 32 | :param int timeout: API connection timeout in seconds 33 | :param AuthBase auth: Auth to use for requests 34 | :param boolean verify: Either a boolean, in which case it controls whether 35 | we verify the server's TLS certificate, or a string, in which case it must 36 | be a path to a CA bundle to use. Defaults to ``True`` 37 | """ 38 | def __init__(self, service_endpoint=None, timeout=30, auth=None, verify=True, proxies=None): 39 | if not service_endpoint: 40 | service_endpoint = get_nodemanager_endpoint() 41 | 42 | super(NodeManager, self).__init__(service_endpoint, timeout, auth, verify, proxies) 43 | 44 | def node_information(self): 45 | """ 46 | The node information resource provides overall information about that 47 | particular node. 48 | 49 | :returns: API response object with JSON data 50 | :rtype: :py:class:`yarn_api_client.base.Response` 51 | """ 52 | path = '/ws/v1/node/info' 53 | return self.request(path) 54 | 55 | def node_applications(self, state=None, user=None): 56 | """ 57 | With the Applications API, you can obtain a collection of resources, 58 | each of which represents an application. 59 | 60 | :param str state: application state 61 | :param str user: user name 62 | :returns: API response object with JSON data 63 | :rtype: :py:class:`yarn_api_client.base.Response` 64 | :raises yarn_api_client.errors.IllegalArgumentError: if `state` 65 | incorrect 66 | """ 67 | path = '/ws/v1/node/apps' 68 | 69 | validate_application_state(state) 70 | 71 | loc_args = ( 72 | ('state', state), 73 | ('user', user)) 74 | 75 | params = self.construct_parameters(loc_args) 76 | 77 | return self.request(path, params=params) 78 | 79 | def node_application(self, application_id): 80 | """ 81 | An application resource contains information about a particular 82 | application that was run or is running on this NodeManager. 83 | 84 | :param str application_id: The application id 85 | :returns: API response object with JSON data 86 | :rtype: :py:class:`yarn_api_client.base.Response` 87 | """ 88 | path = '/ws/v1/node/apps/{appid}'.format(appid=application_id) 89 | 90 | return self.request(path) 91 | 92 | def node_containers(self): 93 | """ 94 | With the containers API, you can obtain a collection of resources, 95 | each of which represents a container. 96 | 97 | :returns: API response object with JSON data 98 | :rtype: :py:class:`yarn_api_client.base.Response` 99 | """ 100 | path = '/ws/v1/node/containers' 101 | 102 | return self.request(path) 103 | 104 | def node_container(self, container_id): 105 | """ 106 | A container resource contains information about a particular container 107 | that is running on this NodeManager. 108 | 109 | :param str container_id: The container id 110 | :returns: API response object with JSON data 111 | :rtype: :py:class:`yarn_api_client.base.Response` 112 | """ 113 | path = '/ws/v1/node/containers/{containerid}'.format( 114 | containerid=container_id) 115 | 116 | return self.request(path) 117 | 118 | def auxiliary_services(self): 119 | """ 120 | With the auxiliary services API, you can obtain a collection of resources, 121 | each of which represents an auxiliary service. When you run a GET operation 122 | on this resource, you obtain a collection of auxiliary service information objects. 123 | 124 | :returns: API response object with JSON data 125 | :rtype: :py:class:`yarn_api_client.base.Response` 126 | """ 127 | path = '/ws/v1/node/auxiliaryservices' 128 | return self.request(path) 129 | 130 | def auxiliary_services_update(self, data): 131 | """ 132 | A YARN admin can use a PUT operation to update the auxiliary services running 133 | on the NodeManager. The body of the request should be of the same format as 134 | an auxiliary services manifest file. 135 | 136 | :param dict data: auxiliary services manifest file 137 | :returns: API response object with JSON data 138 | :rtype: :py:class:`yarn_api_client.base.Response` 139 | """ 140 | path = '/ws/v1/node/auxiliaryservices' 141 | return self.request(path, 'PUT', json=data) 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hadoop-yarn-api-python-client 2 | 3 | Python client for Apache Hadoop® YARN API 4 | 5 | [![Latest Version](https://img.shields.io/pypi/v/yarn-api-client.svg)](https://pypi.python.org/pypi/yarn-api-client/) 6 | [![Downloads](https://pepy.tech/badge/yarn-api-client/month)](https://pepy.tech/project/yarn-api-client/month) 7 | [![Travis CI build status](https://travis-ci.org/CODAIT/hadoop-yarn-api-python-client.svg?branch=master)](https://travis-ci.org/CODAIT/hadoop-yarn-api-python-client) 8 | [![Latest documentation status](https://readthedocs.org/projects/yarn-api-client-python/badge/?version=latest)](https://yarn-api-client-python.readthedocs.org/en/latest/?badge=latest) 9 | [![Test coverage](https://coveralls.io/repos/toidi/hadoop-yarn-api-python-client/badge.png)](https://coveralls.io/r/toidi/hadoop-yarn-api-python-client) 10 | 11 | Package documentation: 12 | [yarn-api-client-python.readthedocs.org](https://yarn-api-client-python.readthedocs.org/en/latest/) 13 | 14 | REST API documentation: [hadoop.apache.org](http://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html) 15 | 16 | **Warning**: CLI is outdated & broken. Please don't use CLI. This will be resolved in future releases. 17 | 18 | --- 19 | ## Compatibility Matrix 20 | 21 | | yarn-api-client-python | Apache Hadoop | 22 | | ------------- | ------------- | 23 | | 1.0.2 | 3.2.1 | 24 | | 1.0.3 | 3.3.0, 3.3.1 | 25 | 26 | If u have version other than mentioned (or vendored variant like Hortonworks), certain APIs might be not working or have differences in 27 | implementation. If u plan to use certain API long-term, you might want to make sure its not in Alpha stage in documentation. 28 | 29 | ## Installation 30 | 31 | From PyPI 32 | ``` 33 | pip install yarn-api-client 34 | ``` 35 | 36 | From Anaconda (conda forge) 37 | ``` 38 | conda install -c conda-forge yarn-api-client 39 | ``` 40 | 41 | From source code 42 | ``` 43 | pip install git+https://github.com/CODAIT/hadoop-yarn-api-python-client.git 44 | ``` 45 | 46 | ## Enabling support for SimpleAuth 47 | 48 | See example below: 49 | ``` 50 | from yarn_api_client.auth import SimpleAuth 51 | from yarn_api_client.history_server import HistoryServer 52 | auth = SimpleAuth('impersonated_account_name') 53 | history_server = HistoryServer('https://127.0.0.2:5678', auth=auth) 54 | ``` 55 | 56 | ## Enabling support for Kerberos/SPNEGO Security 57 | 1. First option - using `requests_kerberos` package 58 | 59 | To avoid deployment issues on a non Kerberized environment, the `requests_kerberos` 60 | dependency is optional and needs to be explicit installed in order to enable access 61 | to YARN console protected by Kerberos/SPNEGO. 62 | 63 | `pip install requests_kerberos` 64 | 65 | From python code 66 | ``` 67 | from yarn_api_client.history_server import HistoryServer 68 | from requests_kerberos import HTTPKerberosAuth 69 | history_server = HistoryServer('https://127.0.0.2:5678', auth=HTTPKerberosAuth()) 70 | ``` 71 | 72 | PS: You **need** to get valid kerberos ticket in systemwide kerberos cache before running your code, otherwise calls to kerberized environment won't go through (run kinit before proceeding to run code) 73 | 74 | 2. Second option - using `gssapi` package 75 | 76 | If you want to avoid using terminal calls, you have to perform SPNEGO handshake to retrieve ticket yourself. Full API documentation: https://pythongssapi.github.io/python-gssapi/latest/ 77 | 78 | # Usage 79 | 80 | ### CLI interface 81 | 82 | **Warning**: CLI is outdated & broken. Please don't use CLI. This will be resolved in future releases. 83 | 84 | 1. First way 85 | ``` 86 | bin/yarn_client --help 87 | ``` 88 | 89 | 2. Alternative way 90 | ``` 91 | python -m yarn_api_client --help 92 | ``` 93 | 94 | ### Programmatic interface 95 | 96 | ``` 97 | from yarn_api_client import ApplicationMaster, HistoryServer, NodeManager, ResourceManager 98 | am = ApplicationMaster('https://127.0.0.2:5678') 99 | app_information = am.application_information('application_id') 100 | ``` 101 | 102 | ### Changelog 103 | 104 | 1.0.3 Release 105 | - Drop support of Python 2.7 (if you still need it for extreme emergency, look into reverting ab4f71582f8c69e908db93905485ba4d00562dfd) 106 | - Update of supported hadoop version to 3.3.1 107 | - Add support for YARN_CONF_DIR and HADOOP_CONF_DIR 108 | - Add class for native SimpleAuth (#106) 109 | - Add constructor argument for proxies (#109) 110 | 111 | 1.0.2 Release 112 | - Add support for Python 3.8.x 113 | - Fix HTTPS url parsing 114 | - Fix JSON body request APIs 115 | - Handle YARN response with empty contents 116 | - Better logging support 117 | 118 | 1.0.1 Release 119 | - Passes the authorization instance to the Active RM check 120 | - Establishes a new (working) documentation site in readthedocs.io: yarn-api-client-python.readthedocs.io 121 | - Adds more python version (3.7 and 3.8) to test matrix and removes 2.6. 122 | 123 | 1.0.0 Release 124 | - Major cleanup of API. 125 | - Address/port parameters have been replaced with complete 126 | endpoints (includes scheme [e.g., http or https]). 127 | - ResourceManager has been updated to take a list of endpoints for 128 | improved HA support. 129 | - ResourceManager, ApplicationMaster, HistoryServer and NodeManager 130 | have been updated with methods corresponding to the latest REST API. 131 | - pytest support on Windows has been provided. 132 | - Documentation has been updated. 133 | 134 | **NOTE:** Applications using APIs relative to releases prior to 1.0 should 135 | pin their dependency on yarn-api-client to _less than_ 1.0 and are encouraged 136 | to update to 1.0 as soon as possible. 137 | 138 | 0.3.7 Release 139 | - Honor configured HTTP Policy when no address is provided - enabling 140 | using of HTTPS in these cases. 141 | 142 | 0.3.6 Release 143 | - Extend ResourceManager to allow applications to determine 144 | resource availability prior to submission. 145 | 146 | 0.3.5 Release 147 | - Hotfix release to fix internal signature mismatch 148 | 149 | 0.3.4 Release 150 | - More flexible support for discovering Hadoop configuration 151 | including multiple Resource Managers when HA is configured 152 | - Properly support YARN post response codes 153 | 154 | 0.3.3 Release 155 | - Properly set Content-Type in PUT requests 156 | - Check for HADOOP_CONF_DIR env variable 157 | 158 | 0.3.2 Release 159 | - Make Kerberos/SPNEGO dependency optional 160 | 161 | 0.3.1 Release 162 | - Fix cluster_application_kill API 163 | 164 | 0.3.0 Release 165 | - Add support for YARN endpoints protected by Kerberos/SPNEGO 166 | - Moved to `requests` package for REST API invocation 167 | - Remove `http_con` property, as connections are now managed by `requests` package 168 | 169 | 0.2.5 Release 170 | - Fixed History REST API 171 | 172 | 0.2.4 Release 173 | - Added compatibility with HA enabled Resource Manager 174 | 175 | ### Team 176 | 177 | YARN API client is developed by an open community, and the current maintainers 178 | are listed below in alphabetical order: 179 | 180 | - [Dmitry Romanenko](https://github.com/dimon222) 181 | - [Eduard Iskandarov](https://github.com/toidi) 182 | - [Kevin Bates](https://github.com/kevin-bates) 183 | - [Luciano Resende](https://github.com/lresende) 184 | 185 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/yarn-api-client.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/yarn-api-client.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/yarn-api-client" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/yarn-api-client" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # yarn-api-client documentation build configuration file, created by 4 | # sphinx-quickstart on Thu Jul 31 22:07:17 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | sys.path.append(os.pardir) 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | #needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # The suffix of source filenames. 40 | source_suffix = '.rst' 41 | 42 | # The encoding of source files. 43 | #source_encoding = 'utf-8-sig' 44 | 45 | # The master toctree document. 46 | master_doc = 'index' 47 | 48 | # General information about the project. 49 | project = u'yarn-api-client' 50 | copyright = u'2014, Iskandarov Eduard' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = '0.2.4' 58 | # The full version, including alpha/beta/rc tags. 59 | release = '0.2.4' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | #language = None 64 | 65 | # There are two options for replacing |today|: either, you set today to some 66 | # non-false value, then it is used: 67 | #today = '' 68 | # Else, today_fmt is used as the format for a strftime call. 69 | #today_fmt = '%B %d, %Y' 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | exclude_patterns = ['_build'] 74 | 75 | # The reST default role (used for this markup: `text`) to use for all 76 | # documents. 77 | #default_role = None 78 | 79 | # If true, '()' will be appended to :func: etc. cross-reference text. 80 | #add_function_parentheses = True 81 | 82 | # If true, the current module name will be prepended to all description 83 | # unit titles (such as .. function::). 84 | #add_module_names = True 85 | 86 | # If true, sectionauthor and moduleauthor directives will be shown in the 87 | # output. They are ignored by default. 88 | #show_authors = False 89 | 90 | # The name of the Pygments (syntax highlighting) style to use. 91 | pygments_style = 'sphinx' 92 | 93 | # A list of ignored prefixes for module index sorting. 94 | #modindex_common_prefix = [] 95 | 96 | # If true, keep warnings as "system message" paragraphs in the built documents. 97 | #keep_warnings = False 98 | 99 | 100 | # -- Options for HTML output ---------------------------------------------- 101 | 102 | # The theme to use for HTML and HTML Help pages. See the documentation for 103 | # a list of builtin themes. 104 | html_theme = 'default' 105 | 106 | # Theme options are theme-specific and customize the look and feel of a theme 107 | # further. For a list of options available for each theme, see the 108 | # documentation. 109 | #html_theme_options = {} 110 | 111 | # Add any paths that contain custom themes here, relative to this directory. 112 | #html_theme_path = [] 113 | 114 | # The name for this set of Sphinx documents. If None, it defaults to 115 | # " v documentation". 116 | #html_title = None 117 | 118 | # A shorter title for the navigation bar. Default is the same as html_title. 119 | #html_short_title = None 120 | 121 | # The name of an image file (relative to this directory) to place at the top 122 | # of the sidebar. 123 | #html_logo = None 124 | 125 | # The name of an image file (within the static path) to use as favicon of the 126 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 127 | # pixels large. 128 | #html_favicon = None 129 | 130 | # Add any paths that contain custom static files (such as style sheets) here, 131 | # relative to this directory. They are copied after the builtin static files, 132 | # so a file named "default.css" will overwrite the builtin "default.css". 133 | html_static_path = ['_static'] 134 | 135 | # Add any extra paths that contain custom files (such as robots.txt or 136 | # .htaccess) here, relative to this directory. These files are copied 137 | # directly to the root of the documentation. 138 | #html_extra_path = [] 139 | 140 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 141 | # using the given strftime format. 142 | #html_last_updated_fmt = '%b %d, %Y' 143 | 144 | # If true, SmartyPants will be used to convert quotes and dashes to 145 | # typographically correct entities. 146 | #html_use_smartypants = True 147 | 148 | # Custom sidebar templates, maps document names to template names. 149 | #html_sidebars = {} 150 | 151 | # Additional templates that should be rendered to pages, maps page names to 152 | # template names. 153 | #html_additional_pages = {} 154 | 155 | # If false, no module index is generated. 156 | #html_domain_indices = True 157 | 158 | # If false, no index is generated. 159 | #html_use_index = True 160 | 161 | # If true, the index is split into individual pages for each letter. 162 | #html_split_index = False 163 | 164 | # If true, links to the reST sources are added to the pages. 165 | #html_show_sourcelink = True 166 | 167 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 168 | #html_show_sphinx = True 169 | 170 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 171 | #html_show_copyright = True 172 | 173 | # If true, an OpenSearch description file will be output, and all pages will 174 | # contain a tag referring to it. The value of this option must be the 175 | # base URL from which the finished HTML is served. 176 | #html_use_opensearch = '' 177 | 178 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 179 | #html_file_suffix = None 180 | 181 | # Output file base name for HTML help builder. 182 | htmlhelp_basename = 'yarn-api-clientdoc' 183 | 184 | 185 | # -- Options for LaTeX output --------------------------------------------- 186 | 187 | latex_elements = { 188 | # The paper size ('letterpaper' or 'a4paper'). 189 | #'papersize': 'letterpaper', 190 | 191 | # The font size ('10pt', '11pt' or '12pt'). 192 | #'pointsize': '10pt', 193 | 194 | # Additional stuff for the LaTeX preamble. 195 | #'preamble': '', 196 | } 197 | 198 | # Grouping the document tree into LaTeX files. List of tuples 199 | # (source start file, target name, title, 200 | # author, documentclass [howto, manual, or own class]). 201 | latex_documents = [ 202 | ('index', 'yarn-api-client.tex', u'yarn-api-client Documentation', 203 | u'Iskandarov Eduard', 'manual'), 204 | ] 205 | 206 | # The name of an image file (relative to this directory) to place at the top of 207 | # the title page. 208 | #latex_logo = None 209 | 210 | # For "manual" documents, if this is true, then toplevel headings are parts, 211 | # not chapters. 212 | #latex_use_parts = False 213 | 214 | # If true, show page references after internal links. 215 | #latex_show_pagerefs = False 216 | 217 | # If true, show URL addresses after external links. 218 | #latex_show_urls = False 219 | 220 | # Documents to append as an appendix to all manuals. 221 | #latex_appendices = [] 222 | 223 | # If false, no module index is generated. 224 | #latex_domain_indices = True 225 | 226 | 227 | # -- Options for manual page output --------------------------------------- 228 | 229 | # One entry per manual page. List of tuples 230 | # (source start file, name, description, authors, manual section). 231 | man_pages = [ 232 | ('index', 'yarn-api-client', u'yarn-api-client Documentation', 233 | [u'Iskandarov Eduard'], 1) 234 | ] 235 | 236 | # If true, show URL addresses after external links. 237 | #man_show_urls = False 238 | 239 | 240 | # -- Options for Texinfo output ------------------------------------------- 241 | 242 | # Grouping the document tree into Texinfo files. List of tuples 243 | # (source start file, target name, title, author, 244 | # dir menu entry, description, category) 245 | texinfo_documents = [ 246 | ('index', 'yarn-api-client', u'yarn-api-client Documentation', 247 | u'Iskandarov Eduard', 'yarn-api-client', 'One line description of project.', 248 | 'Miscellaneous'), 249 | ] 250 | 251 | # Documents to append as an appendix to all manuals. 252 | #texinfo_appendices = [] 253 | 254 | # If false, no module index is generated. 255 | #texinfo_domain_indices = True 256 | 257 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 258 | #texinfo_show_urls = 'footnote' 259 | 260 | # If true, do not generate a @detailmenu in the "Top" node's menu. 261 | #texinfo_no_detailmenu = False 262 | -------------------------------------------------------------------------------- /yarn_api_client/history_server.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | from .base import BaseYarnAPI, get_logger 5 | from .constants import JobStateInternal 6 | from .errors import IllegalArgumentError 7 | from .hadoop_conf import get_jobhistory_endpoint 8 | 9 | log = get_logger(__name__) 10 | 11 | 12 | class HistoryServer(BaseYarnAPI): 13 | """ 14 | The history server REST API's allow the user to get status on finished 15 | applications. Currently it only supports MapReduce and provides 16 | information on finished jobs. 17 | 18 | If `service_endpoint` argument is `None` client will try to extract it from 19 | Hadoop configuration files. 20 | 21 | :param str service_endpoint: HistoryServer HTTP(S) address 22 | :param int timeout: API connection timeout in seconds 23 | :param AuthBase auth: Auth to use for requests 24 | :param boolean verify: Either a boolean, in which case it controls whether 25 | we verify the server's TLS certificate, or a string, in which case it must 26 | be a path to a CA bundle to use. Defaults to ``True`` 27 | """ 28 | def __init__(self, service_endpoint=None, timeout=30, auth=None, verify=True, proxies=None): 29 | if not service_endpoint: 30 | service_endpoint = get_jobhistory_endpoint() 31 | 32 | super(HistoryServer, self).__init__(service_endpoint, timeout, auth, verify, proxies) 33 | 34 | def application_information(self): 35 | """ 36 | The history server information resource provides overall information 37 | about the history server. 38 | 39 | :returns: API response object with JSON data 40 | :rtype: :py:class:`yarn_api_client.base.Response` 41 | """ 42 | path = '/ws/v1/history/info' 43 | 44 | return self.request(path) 45 | 46 | def jobs(self, state=None, user=None, queue=None, limit=None, 47 | started_time_begin=None, started_time_end=None, 48 | finished_time_begin=None, finished_time_end=None): 49 | """ 50 | The jobs resource provides a list of the MapReduce jobs that have 51 | finished. It does not currently return a full list of parameters. 52 | 53 | :param str user: user name 54 | :param str state: the job state 55 | :param str queue: queue name 56 | :param str limit: total number of app objects to be returned 57 | :param str started_time_begin: jobs with start time beginning with 58 | this time, specified in ms since epoch 59 | :param str started_time_end: jobs with start time ending with this 60 | time, specified in ms since epoch 61 | :param str finished_time_begin: jobs with finish time beginning with 62 | this time, specified in ms since epoch 63 | :param str finished_time_end: jobs with finish time ending with this 64 | time, specified in ms since epoch 65 | :returns: API response object with JSON data 66 | :rtype: :py:class:`yarn_api_client.base.Response` 67 | :raises yarn_api_client.errors.IllegalArgumentError: if `state` 68 | incorrect 69 | """ 70 | path = '/ws/v1/history/mapreduce/jobs' 71 | 72 | legal_states = {s for s, _ in JobStateInternal} 73 | if state is not None and state not in legal_states: 74 | msg = 'Job Internal State %s is illegal' % (state,) 75 | raise IllegalArgumentError(msg) 76 | 77 | loc_args = ( 78 | ('state', state), 79 | ('user', user), 80 | ('queue', queue), 81 | ('limit', limit), 82 | ('startedTimeBegin', started_time_begin), 83 | ('startedTimeEnd', started_time_end), 84 | ('finishedTimeBegin', finished_time_begin), 85 | ('finishedTimeEnd', finished_time_end)) 86 | 87 | params = self.construct_parameters(loc_args) 88 | 89 | return self.request(path, params=params) 90 | 91 | def job(self, job_id): 92 | """ 93 | A Job resource contains information about a particular job identified 94 | by jobid. 95 | 96 | :param str job_id: The job id 97 | :returns: API response object with JSON data 98 | :rtype: :py:class:`yarn_api_client.base.Response` 99 | """ 100 | path = '/ws/v1/history/mapreduce/jobs/{jobid}'.format(jobid=job_id) 101 | 102 | return self.request(path) 103 | 104 | def job_attempts(self, job_id): 105 | """ 106 | With the job attempts API, you can obtain a collection of resources 107 | that represent a job attempt. 108 | 109 | :param str job_id: The job id 110 | :returns: API response object with JSON data 111 | :rtype: :py:class:`yarn_api_client.base.Response` 112 | """ 113 | path = '/ws/v1/history/mapreduce/jobs/{jobid}/jobattempts'.format( 114 | jobid=job_id) 115 | 116 | return self.request(path) 117 | 118 | def job_counters(self, job_id): 119 | """ 120 | With the job counters API, you can object a collection of resources 121 | that represent al the counters for that job. 122 | 123 | :param str job_id: The job id 124 | :returns: API response object with JSON data 125 | :rtype: :py:class:`yarn_api_client.base.Response` 126 | """ 127 | path = '/ws/v1/history/mapreduce/jobs/{jobid}/counters'.format( 128 | jobid=job_id) 129 | 130 | return self.request(path) 131 | 132 | def job_conf(self, job_id): 133 | """ 134 | A job configuration resource contains information about the job 135 | configuration for this job. 136 | 137 | :param str job_id: The job id 138 | :returns: API response object with JSON data 139 | :rtype: :py:class:`yarn_api_client.base.Response` 140 | """ 141 | path = '/ws/v1/history/mapreduce/jobs/{jobid}/conf'.format(jobid=job_id) 142 | 143 | return self.request(path) 144 | 145 | def job_tasks(self, job_id, job_type=None): 146 | """ 147 | With the tasks API, you can obtain a collection of resources that 148 | represent a task within a job. 149 | 150 | :param str job_id: The job id 151 | :param str type: type of task, valid values are m or r. m for map 152 | task or r for reduce task 153 | :returns: API response object with JSON data 154 | :rtype: :py:class:`yarn_api_client.base.Response` 155 | """ 156 | path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks'.format( 157 | jobid=job_id) 158 | 159 | # m - for map 160 | # r - for reduce 161 | valid_types = ['m', 'r'] 162 | if job_type is not None and job_type not in valid_types: 163 | msg = 'Job type %s is illegal' % (job_type,) 164 | raise IllegalArgumentError(msg) 165 | 166 | params = {} 167 | if job_type is not None: 168 | params['type'] = job_type 169 | 170 | return self.request(path, params=params) 171 | 172 | def job_task(self, job_id, task_id): 173 | """ 174 | A Task resource contains information about a particular task 175 | within a job. 176 | 177 | :param str job_id: The job id 178 | :param str task_id: The task id 179 | :returns: API response object with JSON data 180 | :rtype: :py:class:`yarn_api_client.base.Response` 181 | """ 182 | path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks/{taskid}'.format( 183 | jobid=job_id, taskid=task_id) 184 | 185 | return self.request(path) 186 | 187 | def task_counters(self, job_id, task_id): 188 | """ 189 | With the task counters API, you can object a collection of resources 190 | that represent all the counters for that task. 191 | 192 | :param str job_id: The job id 193 | :param str task_id: The task id 194 | :returns: API response object with JSON data 195 | :rtype: :py:class:`yarn_api_client.base.Response` 196 | """ 197 | path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks/{taskid}/counters'.format( 198 | jobid=job_id, taskid=task_id) 199 | 200 | return self.request(path) 201 | 202 | def task_attempts(self, job_id, task_id): 203 | """ 204 | With the task attempts API, you can obtain a collection of resources 205 | that represent a task attempt within a job. 206 | 207 | :param str job_id: The job id 208 | :param str task_id: The task id 209 | :returns: API response object with JSON data 210 | :rtype: :py:class:`yarn_api_client.base.Response` 211 | """ 212 | path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts'.format( 213 | jobid=job_id, taskid=task_id) 214 | 215 | return self.request(path) 216 | 217 | def task_attempt(self, job_id, task_id, attempt_id): 218 | """ 219 | A Task Attempt resource contains information about a particular task 220 | attempt within a job. 221 | 222 | :param str job_id: The job id 223 | :param str task_id: The task id 224 | :param str attempt_id: The attempt id 225 | :returns: API response object with JSON data 226 | :rtype: :py:class:`yarn_api_client.base.Response` 227 | """ 228 | path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}'.format( 229 | jobid=job_id, taskid=task_id, attemptid=attempt_id) 230 | 231 | return self.request(path) 232 | 233 | def task_attempt_counters(self, job_id, task_id, attempt_id): 234 | """ 235 | With the task attempt counters API, you can object a collection of 236 | resources that represent al the counters for that task attempt. 237 | 238 | :param str job_id: The job id 239 | :param str task_id: The task id 240 | :param str attempt_id: The attempt id 241 | :returns: API response object with JSON data 242 | :rtype: :py:class:`yarn_api_client.base.Response` 243 | """ 244 | path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}/counters'.format( 245 | jobid=job_id, taskid=task_id, attemptid=attempt_id) 246 | 247 | return self.request(path) 248 | -------------------------------------------------------------------------------- /tests/test_hadoop_conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from tempfile import NamedTemporaryFile 3 | 4 | import mock 5 | from mock import patch 6 | from requests import RequestException 7 | from tests import TestCase 8 | 9 | import requests_mock 10 | from yarn_api_client import hadoop_conf 11 | import platform 12 | import os 13 | import sys 14 | 15 | if sys.version_info[0] == 2: 16 | _mock_exception_method = 'assertRaisesRegexp' 17 | else: 18 | _mock_exception_method = 'assertRaisesRegex' 19 | 20 | _http_request_method = '' 21 | _http_getresponse_method = '' 22 | 23 | try: 24 | from httplib import HTTPConnection, OK, NOT_FOUND # NOQA 25 | _http_request_method = 'httplib.HTTPConnection.request' 26 | _http_getresponse_method = 'httplib.HTTPConnection.getresponse' 27 | except ImportError: 28 | from http.client import HTTPConnection, OK, NOT_FOUND # NOQA 29 | _http_request_method = 'http.client.HTTPConnection.request' 30 | _http_getresponse_method = 'http.client.HTTPConnection.getresponse' 31 | 32 | if platform.system() == 'Windows': 33 | hadoop_conf_path = '/etc/hadoop/conf\\' 34 | else: 35 | hadoop_conf_path = '/etc/hadoop/conf/' 36 | 37 | empty_config = ''.encode('latin1') 38 | 39 | yarn_site_xml = """\ 40 | 41 | 42 | yarn.resourcemanager.webapp.address 43 | localhost:8022 44 | 45 | 46 | yarn.resourcemanager.webapp.https.address 47 | localhost:8024 48 | 49 | 50 | yarn.http.policy 51 | HTTPS_ONLY 52 | 53 | 54 | """.encode('latin1') 55 | 56 | 57 | class HadoopConfTestCase(TestCase): 58 | def test_parse(self): 59 | temp_filename = None 60 | 61 | with NamedTemporaryFile(delete=False) as f: 62 | f.write(yarn_site_xml) 63 | f.flush() 64 | f.close() 65 | temp_filename = f.name 66 | 67 | key = 'yarn.resourcemanager.webapp.address' 68 | value = hadoop_conf.parse(f.name, key) 69 | self.assertEqual('localhost:8022', value) 70 | 71 | key = 'yarn.resourcemanager.webapp.https.address' 72 | value = hadoop_conf.parse(f.name, key) 73 | self.assertEqual('localhost:8024', value) 74 | 75 | key = 'yarn.http.policy' 76 | value = hadoop_conf.parse(f.name, key) 77 | self.assertEqual('HTTPS_ONLY', value) 78 | os.remove(temp_filename) 79 | 80 | with NamedTemporaryFile(delete=False) as f: 81 | f.write(empty_config) 82 | f.flush() 83 | f.close() 84 | temp_filename = f.name 85 | 86 | key = 'yarn.resourcemanager.webapp.address' 87 | value = hadoop_conf.parse(f.name, key) 88 | self.assertEqual(None, value) 89 | 90 | key = 'yarn.resourcemanager.webapp.https.address' 91 | value = hadoop_conf.parse(f.name, key) 92 | self.assertEqual(None, value) 93 | 94 | key = 'yarn.http.policy' 95 | value = hadoop_conf.parse(f.name, key) 96 | self.assertEqual(None, value) 97 | os.remove(temp_filename) 98 | 99 | def test_get_resource_endpoint(self): 100 | with patch('yarn_api_client.hadoop_conf.parse') as parse_mock: 101 | with patch('yarn_api_client.hadoop_conf._get_rm_ids') as get_rm_ids_mock: 102 | parse_mock.return_value = 'example.com:8022' 103 | get_rm_ids_mock.return_value = None 104 | 105 | endpoint = hadoop_conf.get_resource_manager_endpoint() 106 | 107 | self.assertEqual('http://example.com:8022', endpoint) 108 | parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 109 | 'yarn.resourcemanager.webapp.address') 110 | 111 | parse_mock.reset_mock() 112 | parse_mock.return_value = None 113 | 114 | endpoint = hadoop_conf.get_resource_manager_endpoint() 115 | self.assertIsNone(endpoint) 116 | 117 | @mock.patch('yarn_api_client.hadoop_conf._get_rm_ids') 118 | @mock.patch('yarn_api_client.hadoop_conf.parse') 119 | @mock.patch('yarn_api_client.hadoop_conf.check_is_active_rm') 120 | def test_get_resource_endpoint_with_ha(self, check_is_active_rm_mock, parse_mock, get_rm_ids_mock): 121 | get_rm_ids_mock.return_value = ['rm1', 'rm2'] 122 | parse_mock.return_value = 'example.com:8022' 123 | check_is_active_rm_mock.return_value = True 124 | endpoint = hadoop_conf.get_resource_manager_endpoint() 125 | 126 | self.assertEqual('http://example.com:8022', endpoint) 127 | parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 128 | 'yarn.resourcemanager.webapp.address.rm1') 129 | 130 | parse_mock.reset_mock() 131 | parse_mock.return_value = None 132 | 133 | endpoint = hadoop_conf.get_resource_manager_endpoint() 134 | self.assertIsNone(endpoint) 135 | 136 | def test_get_rm_ids(self): 137 | with patch('yarn_api_client.hadoop_conf.parse') as parse_mock: 138 | parse_mock.return_value = 'rm1,rm2' 139 | rm_list = hadoop_conf._get_rm_ids(hadoop_conf.CONF_DIR) 140 | self.assertEqual(['rm1', 'rm2'], rm_list) 141 | parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 'yarn.resourcemanager.ha.rm-ids') 142 | 143 | parse_mock.reset_mock() 144 | parse_mock.return_value = None 145 | 146 | rm_list = hadoop_conf._get_rm_ids(hadoop_conf.CONF_DIR) 147 | self.assertIsNone(rm_list) 148 | 149 | @mock.patch('yarn_api_client.hadoop_conf._is_https_only') 150 | def test_check_is_active_rm(self, is_https_only_mock): 151 | is_https_only_mock.return_value = False 152 | 153 | # Success scenario 154 | with requests_mock.mock() as requests_get_mock: 155 | requests_get_mock.get('https://example2:8022/cluster', status_code=200) 156 | self.assertTrue(hadoop_conf.check_is_active_rm('https://example2:8022')) 157 | 158 | # Outage scenario 159 | with requests_mock.mock() as requests_get_mock: 160 | requests_get_mock.get('https://example2:8022/cluster', status_code=500) 161 | self.assertFalse(hadoop_conf.check_is_active_rm('https://example2:8022')) 162 | 163 | # Error scenario (URL is wrong - not found) 164 | with requests_mock.mock() as requests_get_mock: 165 | requests_get_mock.get('https://example2:8022/cluster', status_code=404) 166 | self.assertFalse(hadoop_conf.check_is_active_rm('https://example2:8022')) 167 | 168 | # Error scenario (necessary Auth is not provided or invalid credentials) 169 | with requests_mock.mock() as requests_get_mock: 170 | requests_get_mock.get('https://example2:8022/cluster', status_code=401) 171 | self.assertFalse(hadoop_conf.check_is_active_rm('https://example2:8022')) 172 | 173 | # Emulate requests library exception (socket timeout, etc) 174 | with requests_mock.mock() as requests_get_mock: 175 | requests_get_mock.get('example2:8022/cluster', exc=RequestException) 176 | self.assertFalse(hadoop_conf.check_is_active_rm('example2:8022')) 177 | 178 | def test_get_resource_manager(self): 179 | with patch('yarn_api_client.hadoop_conf.parse') as parse_mock: 180 | parse_mock.return_value = 'example.com:8022' 181 | 182 | endpoint = hadoop_conf._get_resource_manager(hadoop_conf.CONF_DIR, None) 183 | 184 | self.assertEqual('http://example.com:8022', endpoint) 185 | parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 'yarn.resourcemanager.webapp.address') 186 | 187 | endpoint = hadoop_conf._get_resource_manager(hadoop_conf.CONF_DIR, 'rm1') 188 | 189 | self.assertEqual(('http://example.com:8022'), endpoint) 190 | parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 'yarn.resourcemanager.webapp.address.rm1') 191 | 192 | parse_mock.reset_mock() 193 | parse_mock.return_value = None 194 | 195 | endpoint = hadoop_conf._get_resource_manager(hadoop_conf.CONF_DIR, 'rm1') 196 | self.assertIsNone(endpoint) 197 | 198 | def test_get_jobhistory_endpoint(self): 199 | with patch('yarn_api_client.hadoop_conf.parse') as parse_mock: 200 | parse_mock.return_value = 'example.com:8022' 201 | 202 | endpoint = hadoop_conf.get_jobhistory_endpoint() 203 | 204 | self.assertEqual('example.com:8022', endpoint) 205 | parse_mock.assert_called_with(hadoop_conf_path + 'mapred-site.xml', 206 | 'mapreduce.jobhistory.webapp.address') 207 | 208 | parse_mock.reset_mock() 209 | parse_mock.return_value = None 210 | 211 | endpoint = hadoop_conf.get_jobhistory_endpoint() 212 | self.assertIsNone(endpoint) 213 | 214 | def test_get_nodemanager_endpoint(self): 215 | with patch('yarn_api_client.hadoop_conf.parse') as parse_mock: 216 | parse_mock.return_value = 'example.com:8022' 217 | 218 | endpoint = hadoop_conf.get_nodemanager_endpoint() 219 | 220 | self.assertEqual('example.com:8022', endpoint) 221 | parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 222 | 'yarn.nodemanager.webapp.address') 223 | 224 | parse_mock.reset_mock() 225 | parse_mock.return_value = None 226 | 227 | endpoint = hadoop_conf.get_nodemanager_endpoint() 228 | self.assertIsNone(endpoint) 229 | 230 | def test_get_webproxy_endpoint(self): 231 | with patch('yarn_api_client.hadoop_conf.parse') as parse_mock: 232 | parse_mock.return_value = 'example.com:8022' 233 | 234 | endpoint = hadoop_conf.get_webproxy_endpoint() 235 | 236 | self.assertEqual('example.com:8022', endpoint) 237 | parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 238 | 'yarn.web-proxy.address') 239 | 240 | parse_mock.reset_mock() 241 | parse_mock.return_value = None 242 | 243 | endpoint = hadoop_conf.get_webproxy_endpoint() 244 | self.assertIsNone(endpoint) 245 | -------------------------------------------------------------------------------- /yarn_api_client/application_master.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | from .base import BaseYarnAPI, get_logger 5 | from .hadoop_conf import get_webproxy_endpoint 6 | 7 | 8 | log = get_logger(__name__) 9 | 10 | 11 | class ApplicationMaster(BaseYarnAPI): 12 | """ 13 | The MapReduce Application Master REST API's allow the user to get status 14 | on the running MapReduce application master. Currently this is the 15 | equivalent to a running MapReduce job. The information includes the jobs 16 | the app master is running and all the job particulars like tasks, 17 | counters, configuration, attempts, etc. 18 | 19 | If `address` argument is `None` client will try to extract `address` and 20 | `port` from Hadoop configuration files. 21 | 22 | :param str service_endpoint: ApplicationMaster HTTP(S) address 23 | :param int timeout: API connection timeout in seconds 24 | :param AuthBase auth: Auth to use for requests 25 | :param boolean verify: Either a boolean, in which case it controls whether 26 | we verify the server's TLS certificate, or a string, in which case it must 27 | be a path to a CA bundle to use. Defaults to ``True`` 28 | """ 29 | def __init__(self, service_endpoint=None, timeout=30, auth=None, verify=True, proxies=None): 30 | if not service_endpoint: 31 | service_endpoint = get_webproxy_endpoint(timeout, auth, verify, proxies) 32 | 33 | super(ApplicationMaster, self).__init__(service_endpoint, timeout, auth, verify, proxies) 34 | 35 | def application_information(self, application_id): 36 | """ 37 | The MapReduce application master information resource provides overall 38 | information about that mapreduce application master. 39 | This includes application id, time it was started, user, name, etc. 40 | 41 | :returns: API response object with JSON data 42 | :rtype: :py:class:`yarn_api_client.base.Response` 43 | """ 44 | path = '/proxy/{appid}/ws/v1/mapreduce/info'.format( 45 | appid=application_id) 46 | 47 | return self.request(path) 48 | 49 | def jobs(self, application_id): 50 | """ 51 | The jobs resource provides a list of the jobs running on this 52 | application master. 53 | 54 | :param str application_id: The application id 55 | :returns: API response object with JSON data 56 | :rtype: :py:class:`yarn_api_client.base.Response` 57 | """ 58 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs'.format( 59 | appid=application_id) 60 | 61 | return self.request(path) 62 | 63 | def job(self, application_id, job_id): 64 | """ 65 | A job resource contains information about a particular job that was 66 | started by this application master. Certain fields are only accessible 67 | if user has permissions - depends on acl settings. 68 | 69 | :param str application_id: The application id 70 | :param str job_id: The job id 71 | :returns: API response object with JSON data 72 | :rtype: :py:class:`yarn_api_client.base.Response` 73 | """ 74 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}'.format( 75 | appid=application_id, jobid=job_id) 76 | 77 | return self.request(path) 78 | 79 | def job_attempts(self, application_id, job_id): 80 | """ 81 | With the job attempts API, you can obtain a collection of resources 82 | that represent the job attempts. 83 | 84 | :param str application_id: The application id 85 | :param str job_id: The job id 86 | :returns: API response object with JSON data 87 | :rtype: :py:class:`yarn_api_client.base.Response` 88 | """ 89 | 90 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/jobattempts'.format( 91 | appid=application_id, jobid=job_id) 92 | 93 | return self.request(path) 94 | 95 | def job_counters(self, application_id, job_id): 96 | """ 97 | With the job counters API, you can object a collection of resources 98 | that represent all the counters for that job. 99 | 100 | :param str application_id: The application id 101 | :param str job_id: The job id 102 | :returns: API response object with JSON data 103 | :rtype: :py:class:`yarn_api_client.base.Response` 104 | """ 105 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/counters'.format( 106 | appid=application_id, jobid=job_id) 107 | 108 | return self.request(path) 109 | 110 | def job_conf(self, application_id, job_id): 111 | """ 112 | A job configuration resource contains information about the job 113 | configuration for this job. 114 | 115 | :param str application_id: The application id 116 | :param str job_id: The job id 117 | :returns: API response object with JSON data 118 | :rtype: :py:class:`yarn_api_client.base.Response` 119 | """ 120 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/conf'.format( 121 | appid=application_id, jobid=job_id) 122 | 123 | return self.request(path) 124 | 125 | def job_tasks(self, application_id, job_id): 126 | """ 127 | With the tasks API, you can obtain a collection of resources that 128 | represent all the tasks for a job. 129 | 130 | :param str application_id: The application id 131 | :param str job_id: The job id 132 | :returns: API response object with JSON data 133 | :rtype: :py:class:`yarn_api_client.base.Response` 134 | """ 135 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks'.format( 136 | appid=application_id, jobid=job_id) 137 | 138 | return self.request(path) 139 | 140 | def job_task(self, application_id, job_id, task_id): 141 | """ 142 | A Task resource contains information about a particular 143 | task within a job. 144 | 145 | :param str application_id: The application id 146 | :param str job_id: The job id 147 | :param str task_id: The task id 148 | :returns: API response object with JSON data 149 | :rtype: :py:class:`yarn_api_client.base.Response` 150 | """ 151 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}'.format( 152 | appid=application_id, jobid=job_id, taskid=task_id) 153 | 154 | return self.request(path) 155 | 156 | def task_counters(self, application_id, job_id, task_id): 157 | """ 158 | With the task counters API, you can object a collection of resources 159 | that represent all the counters for that task. 160 | 161 | :param str application_id: The application id 162 | :param str job_id: The job id 163 | :param str task_id: The task id 164 | :returns: API response object with JSON data 165 | :rtype: :py:class:`yarn_api_client.base.Response` 166 | """ 167 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/counters'.format( 168 | appid=application_id, jobid=job_id, taskid=task_id) 169 | 170 | return self.request(path) 171 | 172 | def task_attempts(self, application_id, job_id, task_id): 173 | """ 174 | With the task attempts API, you can obtain a collection of resources 175 | that represent a task attempt within a job. 176 | 177 | :param str application_id: The application id 178 | :param str job_id: The job id 179 | :param str task_id: The task id 180 | :returns: API response object with JSON data 181 | :rtype: :py:class:`yarn_api_client.base.Response` 182 | """ 183 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts'.format( 184 | appid=application_id, jobid=job_id, taskid=task_id) 185 | 186 | return self.request(path) 187 | 188 | def task_attempt(self, application_id, job_id, task_id, attempt_id): 189 | """ 190 | A Task Attempt resource contains information about a particular task 191 | attempt within a job. 192 | 193 | :param str application_id: The application id 194 | :param str job_id: The job id 195 | :param str task_id: The task id 196 | :param str attempt_id: The attempt id 197 | :returns: API response object with JSON data 198 | :rtype: :py:class:`yarn_api_client.base.Response` 199 | """ 200 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}'.format( 201 | appid=application_id, jobid=job_id, taskid=task_id, 202 | attemptid=attempt_id) 203 | 204 | return self.request(path) 205 | 206 | def task_attempt_state(self, application_id, job_id, task_id, attempt_id): 207 | """ 208 | With the task attempt state API, you can query the state of a submitted 209 | task attempt. 210 | 211 | :param str application_id: The application id 212 | :param str job_id: The job id 213 | :param str task_id: The task id 214 | :param str attempt_id: The attempt id 215 | :returns: API response object with JSON data 216 | :rtype: :py:class:`yarn_api_client.base.Response` 217 | """ 218 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}/state'.format( 219 | appid=application_id, jobid=job_id, taskid=task_id, 220 | attemptid=attempt_id) 221 | 222 | return self.request(path) 223 | 224 | def task_attempt_state_kill(self, application_id, job_id, task_id, attempt_id): 225 | """ 226 | Kill specific attempt using task attempt state API. 227 | 228 | :param str application_id: The application id 229 | :param str job_id: The job id 230 | :param str task_id: The task id 231 | :param str attempt_id: The attempt id 232 | :returns: API response object with JSON data 233 | :rtype: :py:class:`yarn_api_client.base.Response` 234 | """ 235 | data = {"state": "KILLED"} 236 | 237 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}/state'.format( 238 | appid=application_id, jobid=job_id, taskid=task_id, 239 | attemptid=attempt_id) 240 | 241 | return self.request(path, 'PUT', json=data) 242 | 243 | def task_attempt_counters(self, application_id, job_id, task_id, attempt_id): 244 | """ 245 | With the task attempt counters API, you can object a collection 246 | of resources that represent al the counters for that task attempt. 247 | 248 | :param str application_id: The application id 249 | :param str job_id: The job id 250 | :param str task_id: The task id 251 | :param str attempt_id: The attempt id 252 | :returns: API response object with JSON data 253 | :rtype: :py:class:`yarn_api_client.base.Response` 254 | """ 255 | path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}/counters'.format( 256 | appid=application_id, jobid=job_id, taskid=task_id, 257 | attemptid=attempt_id) 258 | 259 | return self.request(path) 260 | -------------------------------------------------------------------------------- /yarn_api_client/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | import argparse 4 | from pprint import pprint 5 | 6 | from .base import get_logger 7 | from .constants import (YarnApplicationState, FinalApplicationStatus, 8 | ApplicationState, JobStateInternal) 9 | from . import ResourceManager, NodeManager, HistoryServer, ApplicationMaster 10 | 11 | log = get_logger(__name__) 12 | 13 | 14 | def get_parser(): 15 | parser = argparse.ArgumentParser( 16 | description='Client for Hadoop® YARN API') 17 | 18 | parser.add_argument('--endpoint', help='API endpoint (https://test.cluster.com:8090)') 19 | 20 | subparsers = parser.add_subparsers() 21 | populate_resource_manager_arguments(subparsers) 22 | populate_node_manager_arguments(subparsers) 23 | populate_application_master_arguments(subparsers) 24 | populate_history_server_arguments(subparsers) 25 | 26 | return parser 27 | 28 | 29 | def populate_resource_manager_arguments(subparsers): 30 | rm_parser = subparsers.add_parser( 31 | 'rm', help='ResourceManager REST API\'s') 32 | rm_parser.set_defaults(api_class=ResourceManager) 33 | 34 | rm_subparsers = rm_parser.add_subparsers() 35 | 36 | ci_parser = rm_subparsers.add_parser( 37 | 'info', help='Cluster Information API') 38 | ci_parser.set_defaults(method='cluster_information') 39 | 40 | cm_parser = rm_subparsers.add_parser( 41 | 'metrics', help='Cluster Metrics API') 42 | cm_parser.set_defaults(method='cluster_metrics') 43 | 44 | cs_parser = rm_subparsers.add_parser( 45 | 'scheduler', help='Cluster Scheduler API') 46 | cs_parser.set_defaults(method='cluster_scheduler') 47 | 48 | cas_parser = rm_subparsers.add_parser( 49 | 'apps', help='Cluster Applications API') 50 | cas_parser.add_argument('--state', 51 | help='states of the applications', 52 | choices=dict(YarnApplicationState).keys()) 53 | cas_parser.add_argument('--final-status', 54 | choices=dict(FinalApplicationStatus).keys()) 55 | cas_parser.add_argument('--user') 56 | cas_parser.add_argument('--queue') 57 | cas_parser.add_argument('--limit') 58 | cas_parser.add_argument('--started-time-begin') 59 | cas_parser.add_argument('--started-time-end') 60 | cas_parser.add_argument('--finished-time-begin') 61 | cas_parser.add_argument('--finished-time-end') 62 | cas_parser.set_defaults(method='cluster_applications') 63 | cas_parser.set_defaults(method_kwargs=[ 64 | 'state', 'user', 'queue', 'limit', 65 | 'started_time_begin', 'started_time_end', 'finished_time_begin', 66 | 'finished_time_end', 'final_status']) 67 | 68 | ca_parser = rm_subparsers.add_parser( 69 | 'app', help='Cluster Application API') 70 | ca_parser.add_argument('application_id') 71 | ca_parser.set_defaults(method='cluster_application') 72 | ca_parser.set_defaults(method_args=['application_id']) 73 | 74 | caa_parser = rm_subparsers.add_parser( 75 | 'app_attempts', help='Cluster Application Attempts API') 76 | caa_parser.add_argument('application_id') 77 | caa_parser.set_defaults(method='cluster_application_attempts') 78 | caa_parser.set_defaults(method_args=['application_id']) 79 | 80 | cns_parser = rm_subparsers.add_parser( 81 | 'nodes', help='Cluster Nodes API') 82 | cns_parser.add_argument('--state', help='the state of the node') 83 | cns_parser.add_argument('--healthy', help='true or false') 84 | cns_parser.set_defaults(method='cluster_nodes') 85 | cns_parser.set_defaults(method_kargs=['state', 'healthy']) 86 | 87 | cn_parser = rm_subparsers.add_parser( 88 | 'node', help='Cluster Node API') 89 | cn_parser.add_argument('node_id') 90 | cn_parser.set_defaults(method='cluster_node') 91 | cn_parser.set_defaults(method_args=['node_id']) 92 | 93 | 94 | def populate_node_manager_arguments(subparsers): 95 | nm_parser = subparsers.add_parser( 96 | 'nm', help='NodeManager REST API\'s') 97 | nm_parser.set_defaults(api_class=NodeManager) 98 | 99 | nm_subparsers = nm_parser.add_subparsers() 100 | 101 | ni_parser = nm_subparsers.add_parser( 102 | 'info', help='NodeManager Information API') 103 | ni_parser.set_defaults(method='node_information') 104 | 105 | nas_parser = nm_subparsers.add_parser( 106 | 'apps', help='Applications API') 107 | nas_parser.add_argument('--state', 108 | help='application state', 109 | choices=dict(ApplicationState).keys()) 110 | nas_parser.add_argument('--user', 111 | help='user name') 112 | nas_parser.set_defaults(method='node_applications') 113 | nas_parser.set_defaults(method_kwargs=['state', 'user']) 114 | 115 | na_parser = nm_subparsers.add_parser( 116 | 'app', help='Application API') 117 | na_parser.add_argument('application_id') 118 | na_parser.set_defaults(method='node_application') 119 | na_parser.set_defaults(method_args=['application_id']) 120 | 121 | ncs_parser = nm_subparsers.add_parser( 122 | 'containers', help='Containers API') 123 | ncs_parser.set_defaults(method='node_containers') 124 | 125 | nc_parser = nm_subparsers.add_parser( 126 | 'container', help='Container API') 127 | nc_parser.add_argument('container_id') 128 | nc_parser.set_defaults(method='node_container') 129 | nc_parser.set_defaults(method_args=['container_id']) 130 | 131 | 132 | def populate_application_master_arguments(subparsers): 133 | am_parser = subparsers.add_parser( 134 | 'am', help='MapReduce Application Master REST API\'s') 135 | am_parser.set_defaults(api_class=ApplicationMaster) 136 | am_parser.add_argument('application_id') 137 | 138 | # TODO: not implemented 139 | 140 | 141 | def populate_history_server_arguments(subparsers): 142 | hs_parser = subparsers.add_parser( 143 | 'hs', help='History Server REST API\'s') 144 | hs_parser.set_defaults(api_class=HistoryServer) 145 | 146 | hs_subparsers = hs_parser.add_subparsers() 147 | 148 | hi_parser = hs_subparsers.add_parser( 149 | 'info', help='History Server Information API') 150 | hi_parser.set_defaults(method='application_information') 151 | 152 | hjs_parser = hs_subparsers.add_parser( 153 | 'jobs', help='Jobs API') 154 | hjs_parser.add_argument('--state', 155 | help='states of the applications', 156 | choices=dict(JobStateInternal).keys()) 157 | hjs_parser.add_argument('--user') 158 | hjs_parser.add_argument('--queue') 159 | hjs_parser.add_argument('--limit') 160 | hjs_parser.add_argument('--started-time-begin') 161 | hjs_parser.add_argument('--started-time-end') 162 | hjs_parser.add_argument('--finished-time-begin') 163 | hjs_parser.add_argument('--finished-time-end') 164 | hjs_parser.set_defaults(method='jobs') 165 | hjs_parser.set_defaults(method_kwargs=[ 166 | 'state', 'user', 'queue', 'limit', 167 | 'started_time_begin', 'started_time_end', 'finished_time_begin', 168 | 'finished_time_end']) 169 | 170 | hj_parser = hs_subparsers.add_parser('job', help='Job API') 171 | hj_parser.add_argument('job_id') 172 | hj_parser.set_defaults(method='job') 173 | hj_parser.set_defaults(method_args=['job_id']) 174 | 175 | hja_parser = hs_subparsers.add_parser( 176 | 'job_attempts', help='Job Attempts API') 177 | hja_parser.add_argument('job_id') 178 | hja_parser.set_defaults(method='job_attempts') 179 | hja_parser.set_defaults(method_args=['job_id']) 180 | 181 | hjc_parser = hs_subparsers.add_parser( 182 | 'job_counters', help='Job Counters API') 183 | hjc_parser.add_argument('job_id') 184 | hjc_parser.set_defaults(method='job_counters') 185 | hjc_parser.set_defaults(method_args=['job_id']) 186 | 187 | hjcn_parser = hs_subparsers.add_parser( 188 | 'job_conf', help='Job Conf API') 189 | hjcn_parser.add_argument('job_id') 190 | hjcn_parser.set_defaults(method='job_conf') 191 | hjcn_parser.set_defaults(method_args=['job_id']) 192 | 193 | hts_parser = hs_subparsers.add_parser( 194 | 'tasks', help='Tasks API') 195 | hts_parser.add_argument('job_id') 196 | hts_parser.add_argument('--type', choices=['m', 'r'], 197 | help=('type of task, m for map task ' 198 | 'or r for reduce task.')) 199 | hts_parser.set_defaults(method='job_tasks') 200 | hts_parser.set_defaults(method_args=['job_id']) 201 | hts_parser.set_defaults(method_kwargs=['type']) 202 | 203 | ht_parser = hs_subparsers.add_parser( 204 | 'task', help='Task API') 205 | ht_parser.add_argument('job_id') 206 | ht_parser.add_argument('task_id') 207 | ht_parser.set_defaults(method='job_task') 208 | ht_parser.set_defaults(method_args=['job_id', 'task_id']) 209 | 210 | htc_parser = hs_subparsers.add_parser( 211 | 'task_counters', help='Task Counters API') 212 | htc_parser.add_argument('job_id') 213 | htc_parser.add_argument('task_id') 214 | htc_parser.set_defaults(method='task_counters') 215 | htc_parser.set_defaults(method_args=['job_id', 'task_id']) 216 | 217 | htas_parser = hs_subparsers.add_parser( 218 | 'task_attempts', help='Task Attempts API') 219 | htas_parser.add_argument('job_id') 220 | htas_parser.add_argument('task_id') 221 | htas_parser.set_defaults(method='task_attempts') 222 | htas_parser.set_defaults(method_args=['job_id', 'task_id']) 223 | 224 | hta_parser = hs_subparsers.add_parser( 225 | 'task_attempt', help='Task Attempt API') 226 | hta_parser.add_argument('job_id') 227 | hta_parser.add_argument('task_id') 228 | hta_parser.add_argument('attempt_id') 229 | hta_parser.set_defaults(method='task_attempt') 230 | hta_parser.set_defaults(method_args=['job_id', 'task_id', 'attempt_id']) 231 | 232 | htac_parser = hs_subparsers.add_parser( 233 | 'task_attempt_counters', help='Task Attempt Counters API') 234 | htac_parser.add_argument('job_id') 235 | htac_parser.add_argument('task_id') 236 | htac_parser.add_argument('attempt_id') 237 | htac_parser.set_defaults(method='task_attempt_counters') 238 | htac_parser.set_defaults(method_args=['job_id', 'task_id', 'attempt_id']) 239 | 240 | 241 | def main(): 242 | parser = get_parser() 243 | opts = parser.parse_args() 244 | 245 | class_kwargs = {} 246 | if not hasattr(opts, 'api_class'): 247 | raise Exception("Please provide api class - rm, hs, nm, am") 248 | # Only ResourceManager supports HA 249 | elif opts.endpoint: 250 | if opts.api_class == ResourceManager: 251 | class_kwargs['service_endpoints'] = opts.endpoint.split(",") 252 | else: 253 | class_kwargs['service_endpoint'] = opts.endpoint 254 | 255 | api = opts.api_class(**class_kwargs) 256 | # Construct positional arguments for method 257 | if 'method_args' in opts: 258 | method_args = [getattr(opts, arg) for arg in opts.method_args] 259 | else: 260 | method_args = [] 261 | # Construct key arguments for method 262 | if 'method_kwargs' in opts: 263 | method_kwargs = dict((key, getattr(opts, key)) for key in opts.method_kwargs) 264 | else: 265 | method_kwargs = {} 266 | response = getattr(api, opts.method)(*method_args, **method_kwargs) 267 | pprint(response.data) 268 | -------------------------------------------------------------------------------- /tests/test_resource_manager.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from mock import patch 3 | from tests import TestCase 4 | 5 | from yarn_api_client.resource_manager import ResourceManager 6 | from yarn_api_client.errors import IllegalArgumentError 7 | 8 | 9 | @patch('yarn_api_client.resource_manager.ResourceManager.request') 10 | class ResourceManagerTestCase(TestCase): 11 | @patch('yarn_api_client.resource_manager.check_is_active_rm') 12 | def setUp(self, check_is_active_rm_mock): 13 | check_is_active_rm_mock.return_value = True 14 | self.rm = ResourceManager(['localhost']) 15 | 16 | @patch('yarn_api_client.resource_manager.get_resource_manager_endpoint') 17 | def test__init__(self, get_config_mock, request_mock): 18 | get_config_mock.return_value = "https://localhost" 19 | rm = ResourceManager() 20 | get_config_mock.assert_called_with(30, None, True) 21 | self.assertEqual(rm.service_uri.is_https, True) 22 | 23 | def test_cluster_information(self, request_mock): 24 | self.rm.cluster_information() 25 | request_mock.assert_called_with('/ws/v1/cluster/info') 26 | 27 | def test_cluster_metrics(self, request_mock): 28 | self.rm.cluster_metrics() 29 | request_mock.assert_called_with('/ws/v1/cluster/metrics') 30 | 31 | def test_cluster_scheduler(self, request_mock): 32 | self.rm.cluster_scheduler() 33 | request_mock.assert_called_with('/ws/v1/cluster/scheduler') 34 | 35 | def test_cluster_applications(self, request_mock): 36 | self.rm.cluster_applications() 37 | request_mock.assert_called_with('/ws/v1/cluster/apps', params={}) 38 | 39 | self.rm.cluster_applications(state='KILLED', states=['KILLED'], 40 | final_status='FAILED', user='root', 41 | queue='low', limit=10, 42 | started_time_begin=1, started_time_end=2, 43 | finished_time_begin=3, finished_time_end=4, 44 | application_types=['YARN'], 45 | application_tags=['apptag'], 46 | name="wordcount", 47 | de_selects=['resourceRequests']) 48 | request_mock.assert_called_with('/ws/v1/cluster/apps', params={ 49 | 'state': 'KILLED', 50 | 'states': 'KILLED', 51 | 'finalStatus': 'FAILED', 52 | 'user': 'root', 53 | 'queue': 'low', 54 | 'limit': 10, 55 | 'startedTimeBegin': 1, 56 | 'startedTimeEnd': 2, 57 | 'finishedTimeBegin': 3, 58 | 'finishedTimeEnd': 4, 59 | 'applicationTypes': 'YARN', 60 | 'applicationTags': 'apptag', 61 | 'name': 'wordcount', 62 | 'deSelects': 'resourceRequests' 63 | }) 64 | 65 | with self.assertRaises(IllegalArgumentError): 66 | self.rm.cluster_applications(states=['ololo']) 67 | 68 | with self.assertRaises(IllegalArgumentError): 69 | self.rm.cluster_applications(final_status='ololo') 70 | 71 | def test_cluster_application_statistics(self, request_mock): 72 | self.rm.cluster_application_statistics() 73 | request_mock.assert_called_with('/ws/v1/cluster/appstatistics', params={}) 74 | # TODO: test arguments 75 | 76 | def test_cluster_application(self, request_mock): 77 | self.rm.cluster_application('app_1') 78 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1') 79 | 80 | def test_cluster_application_attempts(self, request_mock): 81 | self.rm.cluster_application_attempts('app_1') 82 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/appattempts') 83 | 84 | def test_cluster_application_attempt_info(self, request_mock): 85 | self.rm.cluster_application_attempt_info('app_1', 'attempt_1') 86 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/appattempts/attempt_1') 87 | 88 | def test_cluster_application_attempt_containers(self, request_mock): 89 | self.rm.cluster_application_attempt_containers('app_1', 'attempt_1') 90 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/appattempts/attempt_1/containers') 91 | 92 | def test_cluster_application_attempt_container_info(self, request_mock): 93 | self.rm.cluster_application_attempt_container_info('app_1', 'attempt_1', 'container_1') 94 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/appattempts/attempt_1/containers/container_1') 95 | 96 | def test_cluster_application_state(self, request_mock): 97 | self.rm.cluster_application_state('app_1') 98 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/state') 99 | 100 | def test_cluster_application_kill(self, request_mock): 101 | self.rm.cluster_application_kill('app_1') 102 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/state', 'PUT', json={ 103 | "state": 'KILLED' 104 | }) 105 | 106 | def test_cluster_nodes(self, request_mock): 107 | self.rm.cluster_nodes() 108 | request_mock.assert_called_with('/ws/v1/cluster/nodes', params={}) 109 | 110 | self.rm.cluster_nodes(states=['NEW']) 111 | request_mock.assert_called_with('/ws/v1/cluster/nodes', params={ 112 | "states": 'NEW' 113 | }) 114 | 115 | with self.assertRaises(IllegalArgumentError): 116 | self.rm.cluster_nodes(states=['ololo']) 117 | 118 | def test_cluster_node(self, request_mock): 119 | self.rm.cluster_node('node_1') 120 | request_mock.assert_called_with('/ws/v1/cluster/nodes/node_1') 121 | 122 | def test_cluster_submit_application(self, request_mock): 123 | self.rm.cluster_submit_application({"application-name": "dummy_application"}) 124 | request_mock.assert_called_with('/ws/v1/cluster/apps', 'POST', json={ 125 | "application-name": "dummy_application" 126 | }) 127 | 128 | def test_cluster_new_application(self, request_mock): 129 | self.rm.cluster_new_application() 130 | request_mock.assert_called_with('/ws/v1/cluster/apps/new-application', 'POST') 131 | 132 | def test_cluster_get_application_queue(self, request_mock): 133 | self.rm.cluster_get_application_queue('app_1') 134 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/queue') 135 | 136 | def test_cluster_change_application_queue(self, request_mock): 137 | self.rm.cluster_change_application_queue('app_1', 'queue_1') 138 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/queue', 'PUT', json={ 139 | "queue": 'queue_1' 140 | }) 141 | 142 | def test_cluster_get_application_priority(self, request_mock): 143 | self.rm.cluster_get_application_priority('app_1') 144 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/priority') 145 | 146 | def test_cluster_change_application_priority(self, request_mock): 147 | self.rm.cluster_change_application_priority('app_1', 'priority_1') 148 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/priority', 'PUT', json={ 149 | "priority": 'priority_1' 150 | }) 151 | 152 | @patch('yarn_api_client.hadoop_conf.parse') 153 | def test_cluster_node_container_memory(self, parse_mock, request_mock): 154 | parse_mock.return_value = 1024 155 | value = self.rm.cluster_node_container_memory() 156 | self.assertEqual(value, 1024) 157 | 158 | # TODO 159 | # def test_cluster_scheduler_queue(self, request_mock): 160 | # class ResponseMock(): 161 | # def __init__(self, status, data): 162 | # self.status = status 163 | # self.data = data 164 | 165 | # request_mock.return_value = ResponseMock( 166 | # 'OK', 167 | # { 168 | # 'scheduler': { 169 | # 'schedulerInfo': { 170 | # "queues": { 171 | # "queue": [ 172 | # { 173 | # 'queueName': 'queue_1', 174 | # 'queues': { 175 | # 'queue': [ 176 | # { 177 | # "queueName": 'queue_2', 178 | # 'queues': { 179 | # 'queue': [ 180 | # { 181 | # 'queueName': 'queue_3' 182 | # } 183 | # ] 184 | # } 185 | # } 186 | # ] 187 | # } 188 | # } 189 | # ] 190 | # } 191 | # } 192 | # } 193 | # } 194 | # ) 195 | # value = self.rm.cluster_scheduler_queue('queue_1') 196 | # self.assertIsNotNone(value) 197 | 198 | # request_mock.return_value = ResponseMock( 199 | # 'OK', 200 | # { 201 | # 'scheduler': { 202 | # 'schedulerInfo': { 203 | # 'queueName': 'queue_1' 204 | # } 205 | # } 206 | # } 207 | # ) 208 | # value = self.rm.cluster_scheduler_queue('queue_2') 209 | # self.assertIsNone(value) 210 | 211 | def test_cluster_scheduler_queue_availability(self, request_mock): 212 | value = self.rm.cluster_scheduler_queue_availability({'absoluteUsedCapacity': 90}, 70) 213 | self.assertEqual(value, False) 214 | 215 | value = self.rm.cluster_scheduler_queue_availability({'absoluteUsedCapacity': 50}, 70) 216 | self.assertEqual(value, True) 217 | 218 | def test_cluster_queue_partition(self, request_mock): 219 | value = self.rm.cluster_queue_partition( 220 | { 221 | 'capacities': { 222 | 'queueCapacitiesByPartition': [ 223 | { 224 | 'partitionName': 'label_1' 225 | }, 226 | { 227 | 'partitionName': 'label_2' 228 | } 229 | ] 230 | }, 231 | }, 232 | 'label_1' 233 | ) 234 | self.assertIsNotNone(value) 235 | 236 | value = self.rm.cluster_queue_partition( 237 | { 238 | 'capacities': { 239 | 'queueCapacitiesByPartition': [ 240 | { 241 | 'partitionName': 'label_1' 242 | }, 243 | { 244 | 'partitionName': 'label_2' 245 | } 246 | ] 247 | }, 248 | }, 249 | 'label_3' 250 | ) 251 | self.assertIsNone(value) 252 | 253 | def test_cluster_reservations(self, request_mock): 254 | self.rm.cluster_reservations('queue_1', 'reservation_1', 0, 5, True) 255 | request_mock.assert_called_with('/ws/v1/cluster/reservation/list', params={ 256 | "queue": "queue_1", 257 | "reservation-id": "reservation_1", 258 | "start-time": 0, 259 | "end-time": 5, 260 | "include-resource-allocations": True 261 | }) 262 | 263 | def test_cluster_new_delegation_token(self, request_mock): 264 | self.rm.cluster_new_delegation_token('renewer_1') 265 | request_mock.assert_called_with('/ws/v1/cluster/delegation-token', 'POST', json={ 266 | "renewer": "renewer_1" 267 | }) 268 | 269 | def test_cluster_renew_delegation_token(self, request_mock): 270 | self.rm.cluster_renew_delegation_token('delegation_token_1') 271 | request_mock.assert_called_with('/ws/v1/cluster/delegation-token/expiration', 'POST', headers={ 272 | "Hadoop-YARN-RM-Delegation-Token": 'delegation_token_1' 273 | }) 274 | 275 | def test_cluster_cancel_delegation_token(self, request_mock): 276 | self.rm.cluster_cancel_delegation_token('delegation_token_1') 277 | request_mock.assert_called_with('/ws/v1/cluster/delegation-token', 'DELETE', headers={ 278 | "Hadoop-YARN-RM-Delegation-Token": 'delegation_token_1' 279 | }) 280 | 281 | def test_cluster_new_reservation(self, request_mock): 282 | self.rm.cluster_new_reservation() 283 | request_mock.assert_called_with('/ws/v1/cluster/reservation/new-reservation', 'POST') 284 | 285 | def test_cluster_submit_reservation(self, request_mock): 286 | self.rm.cluster_submit_reservation({'reservation-id': 'reservation_1'}) 287 | request_mock.assert_called_with('/ws/v1/cluster/reservation/submit', 'POST', json={ 288 | 'reservation-id': 'reservation_1' 289 | }) 290 | 291 | def test_cluster_update_reservation(self, request_mock): 292 | self.rm.cluster_update_reservation({ 293 | 'reservation-id': 'reservation_1' 294 | }) 295 | request_mock.assert_called_with('/ws/v1/cluster/reservation/update', 'POST', json={ 296 | 'reservation-id': 'reservation_1' 297 | }) 298 | 299 | def test_cluster_delete_reservation(self, request_mock): 300 | self.rm.cluster_delete_reservation('reservation_1') 301 | request_mock.assert_called_with('/ws/v1/cluster/reservation/delete', 'POST', json={ 302 | 'reservation-id': 'reservation_1' 303 | }) 304 | 305 | def test_cluster_application_timeouts(self, request_mock): 306 | self.rm.cluster_application_timeouts('app_1') 307 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/timeouts') 308 | 309 | def test_cluster_application_timeout(self, request_mock): 310 | self.rm.cluster_application_timeout('app_1', 'LIFETIME') 311 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/timeouts/LIFETIME') 312 | 313 | def test_cluster_update_application_timeout(self, request_mock): 314 | self.rm.cluster_update_application_timeout('app_1', 'LIFETIME', '2016-12-05T22:51:00.104+0530') 315 | request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/timeout', 'PUT', json={ 316 | 'timeout': {'type': 'LIFETIME', 'expiryTime': '2016-12-05T22:51:00.104+0530'} 317 | }) 318 | 319 | def test_cluster_scheduler_conf_mutation(self, request_mock): 320 | self.rm.cluster_scheduler_conf_mutation() 321 | request_mock.assert_called_with('/ws/v1/cluster/scheduler-conf') 322 | 323 | def test_cluster_modify_scheduler_conf_mutation(self, request_mock): 324 | self.rm.cluster_modify_scheduler_conf_mutation({ 325 | 'queue-name': 'queue_1', 326 | 'params': { 327 | 'test': 'test' 328 | } 329 | }) 330 | request_mock.assert_called_with('/ws/v1/cluster/scheduler-conf', 'PUT', json={ 331 | 'queue-name': 'queue_1', 332 | 'params': { 333 | 'test': 'test' 334 | } 335 | }) 336 | 337 | def test_cluster_node_update_resource(self, request_mock): 338 | self.rm.cluster_node_update_resource('node_1', { 339 | "resource": 340 | { 341 | "memory": 1024, 342 | "vCores": 3 343 | }, 344 | "overCommitTimeout": -1 345 | }) 346 | request_mock.assert_called_with('/ws/v1/cluster/nodes/node_1/resource', 'POST', json={ 347 | "resource": 348 | { 349 | "memory": 1024, 350 | "vCores": 3 351 | }, 352 | "overCommitTimeout": -1 353 | }) 354 | 355 | def test_cluster_container_signal(self, request_mock): 356 | self.rm.cluster_container_signal('container_1', 'OUTPUT_THREAD_DUMP') 357 | request_mock.assert_called_with( 358 | '/ws/v1/cluster/containers/container_1/signal/OUTPUT_THREAD_DUMP', 359 | 'POST' 360 | ) 361 | 362 | def test_scheduler_activities(self, request_mock): 363 | self.rm.scheduler_activities(node_id='node_1', group_by='diagnostic') 364 | request_mock.assert_called_with('/ws/v1/cluster/scheduler/activities', params={ 365 | "nodeId": 'node_1', 366 | "groupBy": 'diagnostic' 367 | }) 368 | 369 | def test_application_activities(self, request_mock): 370 | self.rm.application_activities('app_1', max_time=4, 371 | request_priorities=["1","2"], 372 | allocation_request_ids=["-1", "1"], group_by="diagnostic", 373 | limit=5, actions=['refresh', 'get'], summarize=True) 374 | request_mock.assert_called_with('/ws/v1/cluster/scheduler/app-activities/app_1', params={ 375 | "maxTime": 4, 376 | "requestPriorities": "1,2", 377 | "allocationRequestIds": "-1,1", 378 | "groupBy": "diagnostic", 379 | "limit": 5, 380 | "actions": "refresh,get", 381 | "summarize": True 382 | }) 383 | -------------------------------------------------------------------------------- /yarn_api_client/resource_manager.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | from .base import BaseYarnAPI, get_logger 4 | from .constants import YarnApplicationState, FinalApplicationStatus, ClusterContainerSignal 5 | from .errors import IllegalArgumentError 6 | from .hadoop_conf import get_resource_manager_endpoint, check_is_active_rm, CONF_DIR, _get_maximum_container_memory 7 | from collections import deque 8 | 9 | log = get_logger(__name__) 10 | LEGAL_STATES = {s for s, _ in YarnApplicationState} 11 | LEGAL_FINAL_STATUSES = {s for s, _ in FinalApplicationStatus} 12 | LEGAL_CLUSTER_CONTAINER_STATUSES = {s for s, _ in ClusterContainerSignal} 13 | 14 | 15 | def validate_yarn_application_state(state, required=False): 16 | if state: 17 | if state not in LEGAL_STATES: 18 | msg = 'Yarn Application State %s is illegal' % (state,) 19 | raise IllegalArgumentError(msg) 20 | else: 21 | if required: 22 | msg = "state argument is required to be provided" 23 | raise IllegalArgumentError(msg) 24 | 25 | 26 | def validate_yarn_application_states(states, required=False): 27 | if states: 28 | if not isinstance(states, list): 29 | msg = "States should be list" 30 | raise IllegalArgumentError(msg) 31 | 32 | illegal_states = set(states) - LEGAL_STATES 33 | if illegal_states: 34 | msg = 'Yarn Application States %s are illegal' % ( 35 | ",".join(illegal_states), 36 | ) 37 | raise IllegalArgumentError(msg) 38 | else: 39 | if required: 40 | msg = "states argument is required to be provided" 41 | raise IllegalArgumentError(msg) 42 | 43 | 44 | def validate_final_application_status(final_status, required=False): 45 | if final_status: 46 | if final_status not in LEGAL_FINAL_STATUSES: 47 | msg = 'Final Application Status %s is illegal' % (final_status,) 48 | raise IllegalArgumentError(msg) 49 | else: 50 | if required: 51 | msg = "final_status argument is required to be provided" 52 | raise IllegalArgumentError(msg) 53 | 54 | 55 | def validate_cluster_container_status(cluster_container_status, required=False): 56 | if cluster_container_status: 57 | if cluster_container_status not in LEGAL_CLUSTER_CONTAINER_STATUSES: 58 | msg = 'Cluster Container Status %s is illegal' % (cluster_container_status,) 59 | raise IllegalArgumentError(msg) 60 | else: 61 | if required: 62 | msg = "cluster_container_status argument is required to be provided" 63 | raise IllegalArgumentError(msg) 64 | 65 | 66 | class ResourceManager(BaseYarnAPI): 67 | """ 68 | The ResourceManager REST API's allow the user to get information about the 69 | cluster - status on the cluster, metrics on the cluster, 70 | scheduler information, information about nodes in the cluster, 71 | and information about applications on the cluster. 72 | 73 | If `service_endpoint` argument is `None` client will try to extract it from 74 | Hadoop configuration files. If both `address` and `alt_address` are 75 | provided, the address corresponding to the ACTIVE HA Resource Manager will 76 | be used. 77 | 78 | :param List[str] service_endpoints: List of ResourceManager HTTP(S) 79 | addresses 80 | :param int timeout: API connection timeout in seconds 81 | :param AuthBase auth: Auth to use for requests configurations 82 | :param boolean verify: Either a boolean, in which case it controls whether 83 | we verify the server's TLS certificate, or a string, in which case it must 84 | be a path to a CA bundle to use. Defaults to ``True`` 85 | """ 86 | def __init__(self, service_endpoints=None, timeout=30, auth=None, verify=True, proxies=None): 87 | active_service_endpoint = None 88 | if not service_endpoints: 89 | active_service_endpoint = get_resource_manager_endpoint(timeout, auth, verify) 90 | else: 91 | for endpoint in service_endpoints: 92 | if check_is_active_rm(endpoint, timeout, auth, verify): 93 | active_service_endpoint = endpoint 94 | break 95 | 96 | if active_service_endpoint: 97 | super(ResourceManager, self).__init__(active_service_endpoint, timeout, auth, verify, proxies) 98 | else: 99 | raise Exception("No active RMs found") 100 | 101 | def get_active_endpoint(self): 102 | """ 103 | The active address, port tuple to which this instance is associated. 104 | :return: str service_endpoint: Service endpoint URL corresponding to 105 | the active address of RM 106 | """ 107 | return self.service_uri.to_url() 108 | 109 | def cluster_information(self): 110 | """ 111 | The cluster information resource provides overall information about 112 | the cluster. 113 | 114 | :returns: API response object with JSON data 115 | :rtype: :py:class:`yarn_api_client.base.Response` 116 | """ 117 | path = '/ws/v1/cluster/info' 118 | return self.request(path) 119 | 120 | def cluster_metrics(self): 121 | """ 122 | The cluster metrics resource provides some overall metrics about the 123 | cluster. More detailed metrics should be retrieved from the jmx 124 | interface. 125 | 126 | :returns: API response object with JSON data 127 | :rtype: :py:class:`yarn_api_client.base.Response` 128 | """ 129 | path = '/ws/v1/cluster/metrics' 130 | return self.request(path) 131 | 132 | def cluster_scheduler(self): 133 | """ 134 | A scheduler resource contains information about the current scheduler 135 | configured in a cluster. It currently supports both the Fifo and 136 | Capacity Scheduler. You will get different information depending on 137 | which scheduler is configured so be sure to look at the type 138 | information. 139 | 140 | :returns: API response object with JSON data 141 | :rtype: :py:class:`yarn_api_client.base.Response` 142 | """ 143 | path = '/ws/v1/cluster/scheduler' 144 | return self.request(path) 145 | 146 | def cluster_applications(self, state=None, states=None, 147 | final_status=None, user=None, 148 | queue=None, limit=None, 149 | started_time_begin=None, started_time_end=None, 150 | finished_time_begin=None, finished_time_end=None, 151 | application_types=None, application_tags=None, 152 | name=None, de_selects=None): 153 | """ 154 | With the Applications API, you can obtain a collection of resources, 155 | each of which represents an application. 156 | 157 | :param str state: state of the application [deprecated] 158 | :param List[str] states: applications matching the given application 159 | states 160 | :param str final_status: the final status of the application - 161 | reported by the application itself 162 | :param str user: user name 163 | :param str queue: queue name 164 | :param str limit: total number of app objects to be returned 165 | :param str started_time_begin: applications with start time beginning 166 | with this time, specified in ms since epoch 167 | :param str started_time_end: applications with start time ending with 168 | this time, specified in ms since epoch 169 | :param str finished_time_begin: applications with finish time 170 | beginning with this time, specified in ms since epoch 171 | :param str finished_time_end: applications with finish time ending 172 | with this time, specified in ms since epoch 173 | :param List[str] application_types: applications matching the given 174 | application types, specified as a comma-separated list 175 | :param List[str] application_tags: applications matching any of the 176 | given application tags, specified as a comma-separated list 177 | :param str name: name of the application 178 | :param List[str] de_selects: a generic fields which will be skipped in 179 | the result 180 | :returns: API response object with JSON data 181 | :rtype: :py:class:`yarn_api_client.base.Response` 182 | :raises yarn_api_client.errors.IllegalArgumentError: if `state` or 183 | `final_status` incorrect 184 | """ 185 | path = '/ws/v1/cluster/apps' 186 | 187 | validate_yarn_application_state(state) 188 | validate_yarn_application_states(states) 189 | validate_final_application_status(final_status) 190 | 191 | loc_args = ( 192 | ('state', state), 193 | ('states', ','.join(states) if states else None), 194 | ('finalStatus', final_status), 195 | ('user', user), 196 | ('queue', queue), 197 | ('limit', limit), 198 | ('startedTimeBegin', started_time_begin), 199 | ('startedTimeEnd', started_time_end), 200 | ('finishedTimeBegin', finished_time_begin), 201 | ('finishedTimeEnd', finished_time_end), 202 | ('applicationTypes', ','.join(application_types) if application_types else None), 203 | ('applicationTags', ','.join(application_tags) if application_tags else None), 204 | ('name', name), 205 | ('deSelects', ','.join(de_selects) if de_selects else None) 206 | ) 207 | 208 | params = self.construct_parameters(loc_args) 209 | 210 | return self.request(path, params=params) 211 | 212 | def cluster_application_statistics(self, states=None, 213 | application_types=None): 214 | """ 215 | With the Application Statistics API, you can obtain a collection of 216 | triples, each of which contains the application type, the application 217 | state and the number of applications of this type and this state in 218 | ResourceManager context. 219 | 220 | This method only works in Hadoop > 2.0.0 221 | 222 | :param List[str] states: states of the applications. If states is not 223 | provided, the API will enumerate all application states and 224 | return the counts of them. 225 | :param List[str] application_types: types of the applications, 226 | specified as a comma-separated list. If application_types is not 227 | provided, the API will count the applications of any application 228 | type. In this case, the response shows * to indicate any 229 | application type. Note that we only support at most one 230 | applicationType temporarily. Otherwise, users will expect 231 | an BadRequestException. 232 | :returns: API response object with JSON data 233 | :rtype: :py:class:`yarn_api_client.base.Response` 234 | """ 235 | path = '/ws/v1/cluster/appstatistics' 236 | 237 | validate_yarn_application_states(states) 238 | 239 | loc_args = ( 240 | ('states', ','.join(states) if states else None), 241 | ('applicationTypes', ','.join(application_types) if application_types else None) 242 | ) 243 | params = self.construct_parameters(loc_args) 244 | 245 | return self.request(path, params=params) 246 | 247 | def cluster_application(self, application_id): 248 | """ 249 | An application resource contains information about a particular 250 | application that was submitted to a cluster. 251 | 252 | :param str application_id: The application id 253 | :returns: API response object with JSON data 254 | :rtype: :py:class:`yarn_api_client.base.Response` 255 | """ 256 | path = '/ws/v1/cluster/apps/{appid}'.format(appid=application_id) 257 | 258 | return self.request(path) 259 | 260 | def cluster_application_attempts(self, application_id): 261 | """ 262 | With the application attempts API, you can obtain a collection of 263 | resources that represent an application attempt. 264 | 265 | :param str application_id: The application id 266 | :returns: API response object with JSON data 267 | :rtype: :py:class:`yarn_api_client.base.Response` 268 | """ 269 | path = '/ws/v1/cluster/apps/{appid}/appattempts'.format( 270 | appid=application_id) 271 | 272 | return self.request(path) 273 | 274 | def cluster_application_attempt_info(self, application_id, attempt_id): 275 | """ 276 | With the application attempts API, you can obtain an extended info about 277 | an application attempt. 278 | 279 | :param str application_id: The application id 280 | :param str attempt_id: The attempt id 281 | :returns: API response object with JSON data 282 | :rtype: :py:class:`yarn_api_client.base.Response` 283 | """ 284 | path = '/ws/v1/cluster/apps/{appid}/appattempts/{attemptid}'.format( 285 | appid=application_id, attemptid=attempt_id) 286 | 287 | return self.request(path) 288 | 289 | def cluster_application_attempt_containers(self, application_id, attempt_id): 290 | """ 291 | With the application attempts API, you can obtain an information 292 | about container related to an application attempt. 293 | 294 | :param str application_id: The application id 295 | :param str attempt_id: The attempt id 296 | :returns: API response object with JSON data 297 | :rtype: :py:class:`yarn_api_client.base.Response` 298 | """ 299 | path = '/ws/v1/cluster/apps/{appid}/appattempts/{attemptid}/containers'.format( 300 | appid=application_id, attemptid=attempt_id) 301 | 302 | return self.request(path) 303 | 304 | def cluster_application_attempt_container_info(self, application_id, attempt_id, container_id): 305 | """ 306 | With the application attempts API, you can obtain an information 307 | about container related to an application attempt. 308 | 309 | :param str application_id: The application id 310 | :param str attempt_id: The attempt id 311 | :param str container_id: The container id 312 | :returns: API response object with JSON data 313 | :rtype: :py:class:`yarn_api_client.base.Response` 314 | """ 315 | path = '/ws/v1/cluster/apps/{appid}/appattempts/{attemptid}/containers/{containerid}'.format( 316 | appid=application_id, attemptid=attempt_id, containerid=container_id) 317 | 318 | return self.request(path) 319 | 320 | def cluster_application_state(self, application_id): 321 | """ 322 | (This feature is currently in the alpha stage and may change in the 323 | future) 324 | 325 | With the application state API, you can obtain the current 326 | state of an application. 327 | 328 | :param str application_id: The application id 329 | :returns: API response object with JSON data 330 | :rtype: :py:class:`yarn_api_client.base.Response` 331 | """ 332 | path = '/ws/v1/cluster/apps/{appid}/state'.format( 333 | appid=application_id) 334 | 335 | return self.request(path) 336 | 337 | def cluster_application_kill(self, application_id): 338 | """ 339 | (This feature is currently in the alpha stage and may change in the 340 | future) 341 | 342 | With the application kill API, you can kill an application 343 | that is not in FINISHED or FAILED state. 344 | 345 | :param str application_id: The application id 346 | :returns: API response object with JSON data 347 | :rtype: :py:class:`yarn_api_client.base.Response` 348 | """ 349 | 350 | data = {"state": "KILLED"} 351 | path = '/ws/v1/cluster/apps/{appid}/state'.format( 352 | appid=application_id) 353 | 354 | return self.request(path, 'PUT', json=data) 355 | 356 | def cluster_nodes(self, states=None): 357 | """ 358 | With the Nodes API, you can obtain a collection of resources, each of 359 | which represents a node. 360 | 361 | :param List[str] states: the states of the node, specified as a 362 | comma-separated list valid values are: NEW, RUNNING, UNHEALTHY, 363 | DECOMMISSIONING, DECOMMISSIONED, LOST, REBOOTED, SHUTDOWN 364 | :returns: API response object with JSON data 365 | :rtype: :py:class:`yarn_api_client.base.Response` 366 | :raises yarn_api_client.errors.IllegalArgumentError: if `healthy` 367 | incorrect 368 | """ 369 | path = '/ws/v1/cluster/nodes' 370 | 371 | validate_yarn_application_states(states) 372 | 373 | loc_args = ( 374 | ('states', ','.join(states) if states else None), 375 | ) 376 | params = self.construct_parameters(loc_args) 377 | 378 | return self.request(path, params=params) 379 | 380 | def cluster_node(self, node_id): 381 | """ 382 | A node resource contains information about a node in the cluster. 383 | 384 | :param str node_id: The node id 385 | :returns: API response object with JSON data 386 | :rtype: :py:class:`yarn_api_client.base.Response` 387 | """ 388 | path = '/ws/v1/cluster/nodes/{nodeid}'.format(nodeid=node_id) 389 | 390 | return self.request(path) 391 | 392 | def cluster_node_update_resource(self, node_id, data): 393 | """ 394 | Update the total resources in a node. 395 | 396 | For data body definition refer to: 397 | (https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Node_Update_Resource_API) 398 | 399 | :param dict data: resourceOption details 400 | :returns: API response object with JSON data 401 | :rtype: :py:class:`yarn_api_client.base.Response` 402 | """ 403 | path = '/ws/v1/cluster/nodes/{nodeid}/resource'.format(nodeid=node_id) 404 | 405 | return self.request(path, 'POST', json=data) 406 | 407 | def cluster_submit_application(self, data): 408 | """ 409 | (This feature is currently in the alpha stage and may change in the 410 | future) 411 | 412 | With the New Application API, you can obtain an application-id which 413 | can then be used as part of the Cluster Submit Applications API to 414 | submit applications. The response also includes the maximum resource 415 | capabilities available on the cluster. 416 | 417 | For data body definition refer to: 418 | (https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Applications_API.28Submit_Application.29) 419 | 420 | :param dict data: Application details 421 | :returns: API response object with JSON data 422 | :rtype: :py:class:`yarn_api_client.base.Response` 423 | """ 424 | path = '/ws/v1/cluster/apps' 425 | 426 | return self.request(path, 'POST', json=data) 427 | 428 | def cluster_new_application(self): 429 | """ 430 | (This feature is currently in the alpha stage and may change in the 431 | future) 432 | 433 | With the New Application API, you can obtain an application-id which 434 | can then be used as part of the Cluster Submit Applications API to 435 | submit applications. The response also includes the maximum resource 436 | capabilities available on the cluster. 437 | 438 | :returns: API response object with JSON data 439 | :rtype: :py:class:`yarn_api_client.base.Response` 440 | """ 441 | path = '/ws/v1/cluster/apps/new-application' 442 | 443 | return self.request(path, 'POST') 444 | 445 | def cluster_get_application_queue(self, application_id): 446 | """ 447 | (This feature is currently in the alpha stage and may change in the 448 | future) 449 | 450 | With the application queue API, you can query the queue of a 451 | submitted app 452 | 453 | :param str application_id: The application id 454 | :returns: API response object with JSON data 455 | :rtype: :py:class:`yarn_api_client.base.Response` 456 | """ 457 | path = '/ws/v1/cluster/apps/{appid}/queue'.format(appid=application_id) 458 | 459 | return self.request(path) 460 | 461 | def cluster_change_application_queue(self, application_id, queue): 462 | """ 463 | (This feature is currently in the alpha stage and may change in the 464 | future) 465 | 466 | Move a running app to another queue using a PUT request specifying the 467 | target queue. 468 | 469 | To perform the PUT operation, authentication has to be 470 | setup for the RM web services. In addition, you must be authorized to 471 | move the app. Currently you can only move the app if you’re using the 472 | Capacity scheduler or the Fair scheduler. 473 | 474 | Please note that in order to move an app, you must have an 475 | authentication filter setup for the HTTP interface. The functionality 476 | requires that a username is set in the HttpServletRequest. If no filter 477 | is setup, the response will be an “UNAUTHORIZED” response. 478 | 479 | :param str application_id: The application id 480 | :param str queue: queue name 481 | :returns: API response object with JSON data 482 | :rtype: :py:class:`yarn_api_client.base.Response` 483 | """ 484 | path = '/ws/v1/cluster/apps/{appid}/queue'.format(appid=application_id) 485 | 486 | return self.request(path, 'PUT', json={"queue": queue}) 487 | 488 | def cluster_get_application_priority(self, application_id): 489 | """ 490 | (This feature is currently in the alpha stage and may change in the 491 | future) 492 | 493 | With the application priority API, you can query the priority of a 494 | submitted app 495 | 496 | :param str application_id: The application id 497 | :returns: API response object with JSON data 498 | :rtype: :py:class:`yarn_api_client.base.Response` 499 | """ 500 | path = '/ws/v1/cluster/apps/{appid}/priority'.format(appid=application_id) 501 | 502 | return self.request(path) 503 | 504 | def cluster_change_application_priority(self, application_id, priority): 505 | """ 506 | (This feature is currently in the alpha stage and may change in the 507 | future) 508 | 509 | Update priority of a running or accepted app using a PUT request 510 | specifying the target priority. 511 | 512 | To perform the PUT operation, authentication has to be 513 | setup for the RM web services. In addition, you must be authorized to 514 | move the app. Currently you can only move the app if you’re using the 515 | Capacity scheduler or the Fair scheduler. 516 | 517 | Please note that in order to move an app, you must have an 518 | authentication filter setup for the HTTP interface. The functionality 519 | requires that a username is set in the HttpServletRequest. If no filter 520 | is setup, the response will be an “UNAUTHORIZED” response. 521 | 522 | :param str application_id: The application id 523 | :param int priority: application priority 524 | :returns: API response object with JSON data 525 | :rtype: :py:class:`yarn_api_client.base.Response` 526 | """ 527 | path = '/ws/v1/cluster/apps/{appid}/priority'.format(appid=application_id) 528 | 529 | return self.request(path, 'PUT', json={"priority": priority}) 530 | 531 | def cluster_node_container_memory(self): 532 | """ 533 | This endpoint allows clients to gather info on the maximum memory that 534 | can be allocated per container in the cluster. 535 | :returns: integer specifying the maximum memory that can be allocated in 536 | a container in the cluster 537 | """ 538 | 539 | return _get_maximum_container_memory(CONF_DIR) 540 | 541 | def cluster_scheduler_queue(self, yarn_queue_name): 542 | """ 543 | Given a queue name, this function tries to locate the given queue in 544 | the object returned by scheduler endpoint. 545 | 546 | The queue can be present inside a multilevel structure. This solution 547 | tries to locate the queue using breadth-first-search algorithm. 548 | 549 | :param str yarn_queue_name: case sensitive queue name 550 | :return: queue, None if not found 551 | :rtype: dict 552 | """ 553 | scheduler = self.cluster_scheduler().data 554 | scheduler_info = scheduler['scheduler']['schedulerInfo'] 555 | 556 | bfs_deque = deque([scheduler_info]) 557 | while bfs_deque: 558 | vertex = bfs_deque.popleft() 559 | if vertex['queueName'] == yarn_queue_name: 560 | return vertex 561 | elif 'queues' in vertex: 562 | for queue in vertex['queues']['queue']: 563 | bfs_deque.append(queue) 564 | 565 | return None 566 | 567 | def cluster_scheduler_queue_availability(self, candidate_partition, availability_threshold): 568 | """ 569 | Checks whether the requested memory satisfies the available space of the queue 570 | This solution takes into consideration the node label concept in cluster. 571 | Following node labelling, the resources can be available in various partition. 572 | Given the partition data it tells you if the used capacity of this partition is spilling 573 | the threshold specified. 574 | 575 | :param str candidate_parition: node label partition (case sensitive) 576 | :param float availability_threshold: value can range between 0 - 100 . 577 | :return: Boolean 578 | """ 579 | 580 | if candidate_partition['absoluteUsedCapacity'] > availability_threshold: 581 | return False 582 | return True 583 | 584 | def cluster_queue_partition(self, candidate_queue, cluster_node_label): 585 | """ 586 | A queue can be divided into multiple partitions having different node labels. 587 | Given the candidate queue and parition node label, this extracts the partition 588 | we are interested in. 589 | 590 | :param dict candidate_queue: queue dictionary 591 | :param str cluster_node_label: case sensitive node label name 592 | :return: partition, None if not Found. 593 | :rtype: dict 594 | """ 595 | for partition in candidate_queue['capacities']['queueCapacitiesByPartition']: 596 | if partition['partitionName'] == cluster_node_label: 597 | return partition 598 | return None 599 | 600 | def cluster_reservations(self, queue=None, reservation_id=None, 601 | start_time=None, end_time=None, 602 | include_resource_allocations=None): 603 | """ 604 | The Cluster Reservation API can be used to list reservations. When listing reservations 605 | the user must specify the constraints in terms of a queue, reservation-id, start time or 606 | end time. The user must also specify whether or not to include the full resource allocations 607 | of the reservations being listed. The resulting page returns a response containing 608 | information related to the reservation such as the acceptance time, the user, the resource 609 | allocations, the reservation-id, as well as the reservation definition. 610 | 611 | :param str queue: the queue name containing the reservations to be listed. if not set, this 612 | value will default to “default” 613 | :param str reservation_id: the reservation-id of the reservation which will be listed. If 614 | this parameter is present, start-time and end-time will be ignored. 615 | :param str start_time: reservations that end after this start-time will be listed. If 616 | unspecified or invalid, this will default to 0. 617 | :param str end_time: reservations that start after this end-time will be listed. If 618 | unspecified or invalid, this will default to Long.MaxValue. 619 | :param str include_resource_allocations: true or false. If true, the resource allocations 620 | of the reservation will be included in the response. If false, no resource allocations 621 | will be included in the response. This will default to false. 622 | :returns: API response object with JSON data 623 | :rtype: :py:class:`yarn_api_client.base.Response` 624 | """ 625 | path = '/ws/v1/cluster/reservation/list' 626 | 627 | loc_args = ( 628 | ('queue', queue), 629 | ('reservation-id', reservation_id), 630 | ('start-time', start_time), 631 | ('end-time', end_time), 632 | ('include-resource-allocations', include_resource_allocations) 633 | ) 634 | 635 | params = self.construct_parameters(loc_args) 636 | 637 | return self.request(path, params=params) 638 | 639 | def cluster_new_delegation_token(self, renewer): 640 | """ 641 | (This feature is currently in the alpha stage and may change in the 642 | future) 643 | 644 | API to create delegation token. 645 | 646 | All delegation token requests must be carried out on a Kerberos 647 | authenticated connection(using SPNEGO). Carrying out operations on a non-kerberos 648 | connection will result in a FORBIDDEN response. In case of renewing a token, only 649 | the renewer specified when creating the token can renew the token. Other users(including 650 | the owner) are forbidden from renewing tokens. 651 | 652 | :param str renewer: The user who is allowed to renew the delegation token 653 | :returns: API response object with JSON data 654 | :rtype: :py:class:`yarn_api_client.base.Response` 655 | """ 656 | path = '/ws/v1/cluster/delegation-token' 657 | 658 | return self.request(path, 'POST', json={"renewer": renewer}) 659 | 660 | def cluster_renew_delegation_token(self, delegation_token): 661 | """ 662 | (This feature is currently in the alpha stage and may change in the 663 | future) 664 | 665 | API to renew delegation token. 666 | 667 | All delegation token requests must be carried out on a Kerberos 668 | authenticated connection(using SPNEGO). Carrying out operations on a non-kerberos 669 | connection will result in a FORBIDDEN response. In case of renewing a token, only 670 | the renewer specified when creating the token can renew the token. Other users(including 671 | the owner) are forbidden from renewing tokens. 672 | 673 | :param str delegation_token: Delegation token 674 | :returns: API response object with JSON data 675 | :rtype: :py:class:`yarn_api_client.base.Response` 676 | """ 677 | path = '/ws/v1/cluster/delegation-token/expiration' 678 | 679 | return self.request(path, 'POST', headers={ 680 | "Hadoop-YARN-RM-Delegation-Token": delegation_token 681 | }) 682 | 683 | def cluster_cancel_delegation_token(self, delegation_token): 684 | """ 685 | (This feature is currently in the alpha stage and may change in the 686 | future) 687 | 688 | API to cancel delegation token. 689 | 690 | All delegation token requests must be carried out on a Kerberos 691 | authenticated connection(using SPNEGO). Carrying out operations on a non-kerberos 692 | connection will result in a FORBIDDEN response. 693 | 694 | :param str delegation_token: Delegation token 695 | :returns: API response object with JSON data 696 | :rtype: :py:class:`yarn_api_client.base.Response` 697 | """ 698 | path = '/ws/v1/cluster/delegation-token' 699 | 700 | return self.request(path, 'DELETE', headers={ 701 | "Hadoop-YARN-RM-Delegation-Token": delegation_token 702 | }) 703 | 704 | def cluster_new_reservation(self): 705 | """ 706 | (This feature is currently in the alpha stage and may change in the 707 | future) 708 | 709 | Use the New Reservation API, to obtain a reservation-id which can then be used as part of 710 | the Cluster Reservation API Submit to submit reservations. 711 | 712 | :returns: API response object with JSON data 713 | :rtype: :py:class:`yarn_api_client.base.Response` 714 | """ 715 | path = '/ws/v1/cluster/reservation/new-reservation' 716 | 717 | return self.request(path, 'POST') 718 | 719 | def cluster_submit_reservation(self, data): 720 | """ 721 | The Cluster Reservation API can be used to submit reservations. When submitting a 722 | reservation the user specifies the constraints in terms of resources, and time that is 723 | required. The resulting response is successful if the reservation can be made. If a 724 | reservation-id is used to submit a reservation multiple times, the request will succeed 725 | if the reservation definition is the same, but only one reservation will be created. If 726 | the reservation definition is different, the server will respond with an error response. 727 | When the reservation is made, the user can use the reservation-id used to submit the 728 | reservation to get access to the resources by specifying it as part of Cluster Submit 729 | Applications API. 730 | 731 | For data body definition refer to: 732 | (https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Reservation_API_Submit) 733 | 734 | :param dict data: Reservation details 735 | :returns: API response object with JSON data 736 | :rtype: :py:class:`yarn_api_client.base.Response` 737 | """ 738 | path = '/ws/v1/cluster/reservation/submit' 739 | 740 | return self.request(path, 'POST', json=data) 741 | 742 | def cluster_update_reservation(self, data): 743 | """ 744 | The Cluster Reservation API Update can be used to update existing reservations.Update of a 745 | Reservation works similarly to submit described above, but the user submits the 746 | reservation-id of an existing reservation to be updated. The semantics is a try-and-swap, 747 | successful operation will modify the existing reservation based on the requested update 748 | parameter, while a failed execution will leave the existing reservation unchanged. 749 | 750 | For data body definition refer to: 751 | (https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Reservation_API_Update) 752 | 753 | :param dict data: Reservation details 754 | :returns: API response object with JSON data 755 | :rtype: :py:class:`yarn_api_client.base.Response` 756 | """ 757 | path = '/ws/v1/cluster/reservation/update' 758 | 759 | return self.request(path, 'POST', json=data) 760 | 761 | def cluster_delete_reservation(self, reservation_id): 762 | """ 763 | The Cluster Reservation API Update can be used to update existing reservations.Update of a 764 | Reservation works similarly to submit described above, but the user submits the 765 | reservation-id of an existing reservation to be updated. The semantics is a try-and-swap, 766 | successful operation will modify the existing reservation based on the requested update 767 | parameter, while a failed execution will leave the existing reservation unchanged. 768 | 769 | :param str reservation_id: The id of the reservation to be deleted (the system automatically 770 | looks up the right queue from this) 771 | :returns: API response object with JSON data 772 | :rtype: :py:class:`yarn_api_client.base.Response` 773 | """ 774 | path = '/ws/v1/cluster/reservation/delete' 775 | 776 | return self.request(path, 'POST', json={'reservation-id': reservation_id}) 777 | 778 | def cluster_application_timeouts(self, application_id): 779 | """ 780 | Cluster Application Timeouts API can be used to get all configured timeouts of an 781 | application. When you run a GET operation on this resource, a collection of timeout objects 782 | is returned. Each timeout object is composed of a timeout type, expiry-time and remaining 783 | time in seconds. 784 | 785 | :param str application_id: The application id 786 | :returns: API response object with JSON data 787 | :rtype: :py:class:`yarn_api_client.base.Response` 788 | """ 789 | path = '/ws/v1/cluster/apps/{appid}/timeouts'.format( 790 | appid=application_id) 791 | 792 | return self.request(path) 793 | 794 | def cluster_application_timeout(self, application_id, timeout_type): 795 | """ 796 | The Cluster Application Timeout resource contains information about timeout. 797 | 798 | :param str application_id: The application id 799 | :param str timeout_type: Timeout type. Valid values are the members of the 800 | ApplicationTimeoutType enum. LIFETIME is currently the only valid value. . 801 | :returns: API response object with JSON data 802 | :rtype: :py:class:`yarn_api_client.base.Response` 803 | """ 804 | path = '/ws/v1/cluster/apps/{appid}/timeouts/{timeout_type}'.format( 805 | appid=application_id, timeout_type=timeout_type) 806 | 807 | return self.request(path) 808 | 809 | def cluster_update_application_timeout(self, application_id, timeout_type, expiry_time): 810 | """ 811 | Update timeout of an application for given timeout type. 812 | 813 | :param str application_id: The application id 814 | :param str timeout_type: Timeout type. Valid values are the members of the 815 | ApplicationTimeoutType enum. LIFETIME is currently the only valid value. 816 | :param str expiry_time: Time at which the application will expire in 817 | ISO8601 yyyy-MM-dd’T’HH:mm:ss.SSSZ format. 818 | :returns: API response object with JSON data 819 | :rtype: :py:class:`yarn_api_client.base.Response` 820 | """ 821 | path = '/ws/v1/cluster/apps/{appid}/timeout'.format(appid=application_id) 822 | 823 | return self.request(path, 'PUT', json={ 824 | "timeout": {"type": timeout_type, "expiryTime": expiry_time} 825 | }) 826 | 827 | def cluster_scheduler_conf_mutation(self): 828 | """ 829 | (This feature is currently in the alpha stage and may change in the 830 | future) 831 | 832 | API to retrieve the scheduler’s configuration that is currently loaded into 833 | scheduler’s context. 834 | 835 | :returns: API response object with JSON data 836 | :rtype: :py:class:`yarn_api_client.base.Response` 837 | """ 838 | path = '/ws/v1/cluster/scheduler-conf' 839 | 840 | return self.request(path) 841 | 842 | def cluster_modify_scheduler_conf_mutation(self, data): 843 | """ 844 | (This feature is currently in the alpha stage and may change in the 845 | future) 846 | 847 | API to modify the scheduler configuration 848 | 849 | For data body definition refer to: 850 | (https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Scheduler_Configuration_Mutation_API) 851 | 852 | :param dict data: sched-conf dictionary object 853 | :returns: API response object with JSON data 854 | :rtype: :py:class:`yarn_api_client.base.Response` 855 | """ 856 | path = '/ws/v1/cluster/scheduler-conf' 857 | 858 | return self.request(path, 'PUT', json=data) 859 | 860 | def cluster_container_signal(self, container_id, command): 861 | """ 862 | With the Container Signal API, you can send a signal to a specified container 863 | with one of the following commands: 864 | OUTPUT_THREAD_DUMP, GRACEFUL_SHUTDOWN and FORCEFUL_SHUTDOWN. 865 | 866 | :param str container_id: container id 867 | :param str command: signal command 868 | :returns: API response object with JSON data 869 | :rtype: :py:class:`yarn_api_client.base.Response` 870 | """ 871 | 872 | validate_cluster_container_status(command, True) 873 | 874 | path = '/ws/v1/cluster/containers/{containerid}/signal/{command}'.format( 875 | containerid=container_id, 876 | command=command 877 | ) 878 | 879 | return self.request(path, 'POST') 880 | 881 | def scheduler_activities(self, node_id=None, group_by=None): 882 | """ 883 | The scheduler activities RESTful API is available if you are using capacity scheduler 884 | and can fetch scheduler activities info recorded in a scheduling cycle. 885 | 886 | The API returns a message that includes important scheduling activities info which 887 | has a hierarchical layout with following fields: 888 | 889 | * Activities - Activities is the root object of scheduler activities. 890 | * Allocations - Allocations are allocation attempts based on partition or reservation. 891 | * Hierarchical Queues - Hierarchical Queues where the scheduler have been tried to allocate 892 | containers to, each of them contains queue name, allocation state, optional diagnostic and 893 | optional children. 894 | * Applications - Applications are shown as children of leaf queue, each of them contains the 895 | basic info about the application. 896 | * Requests - Requests are shown as children of application, each of them contains the basic 897 | info about the request. 898 | * Nodes - Nodes are shown as children of request, each of them contains node id, allocation 899 | state, optional name which should appear after allocating or reserving a container on the 900 | node, and optional diagnostic which should present if failed to allocate or reserve a 901 | container on this node. For aggregated nodes grouped by allocation state and diagnostic, 902 | each of them contains allocation state, aggregated node IDs and optional diagnostic. 903 | 904 | :param str node_id: specified node ID, if not specified, the scheduler will record the 905 | scheduling activities info for the next scheduling cycle on all nodes. 906 | :param str group_by: aggregation type of application activities, currently only support 907 | “diagnostic” with which user can query aggregated activities grouped by allocation 908 | state and diagnostic 909 | :returns: API response object with JSON data 910 | :rtype: :py:class:`yarn_api_client.base.Response` 911 | """ 912 | path = '/ws/v1/cluster/scheduler/activities' 913 | 914 | loc_args = ( 915 | ('nodeId', node_id), 916 | ('groupBy', group_by) 917 | ) 918 | 919 | params = self.construct_parameters(loc_args) 920 | 921 | return self.request(path, params=params) 922 | 923 | def application_activities(self, application_id, max_time=None, 924 | request_priorities=None, 925 | allocation_request_ids=None, group_by=None, 926 | limit=None, actions=None, summarize=None): 927 | """ 928 | Application activities RESTful API is available if you are using capacity scheduler and can 929 | fetch useful scheduling info for a specified application, the response has a hierarchical 930 | layout with following fields: 931 | 932 | * AppActivities - AppActivities are root element of application activities within basic 933 | information. 934 | * Allocations - Allocations are allocation attempts at app level queried from the cache. 935 | * Requests - Requests are shown as children of allocation, each of them contains request 936 | name, request priority, allocation request id, allocation state and optional children. 937 | * Nodes - Nodes are shown as children of request, each of them contains node id, allocation 938 | state, optional name which should appear after allocating or reserving a container on the 939 | node, and optional diagnostic which should appear if failed to allocate or reserve a 940 | container on the node. For aggregated nodes grouped by allocation state and diagnostic, each 941 | of them contains allocation state, aggregated node IDs and optional diagnostic. 942 | 943 | :param int maxTime: the max duration in seconds from now on for recording application 944 | activities. If not specified, this will default to 3 (seconds). 945 | :param List[int] requestPriorities: the priorities of request, used to filter application 946 | activities 947 | :param List[int] allocationRequestIds: the allocation request IDs of request, used to filter 948 | application activities 949 | :param str groupBy: the aggregation type of application activities, currently only 950 | support “diagnostic” with which user can query aggregated activities grouped by 951 | allocation state and diagnostic. 952 | :param str limit: the limit of application activities which can reduce the cost for both 953 | server and client side. 954 | :param List[str] actions: the required actions of app activities including “refresh” and 955 | “get” 956 | :param boolean summarize: whether app activities in multiple scheduling processes need to be 957 | summarized, specified as boolean, it’s useful when multi-node placement disabled, because 958 | only one node can be considered in a single scheduling process, enabling this can give us a 959 | summary with diagnostics on all nodes. 960 | :returns: API response object with JSON data 961 | :rtype: :py:class:`yarn_api_client.base.Response` 962 | """ 963 | path = '/ws/v1/cluster/scheduler/app-activities/{appid}'.format(appid=application_id) 964 | 965 | loc_args = ( 966 | ('maxTime', max_time), 967 | ('requestPriorities', ','.join(request_priorities) if request_priorities else None), 968 | ('allocationRequestIds', ','.join(allocation_request_ids) if allocation_request_ids else None), 969 | ('groupBy', group_by), 970 | ('limit', limit), 971 | ('actions', ','.join(actions) if actions else None), 972 | ('summarize', summarize) 973 | ) 974 | 975 | params = self.construct_parameters(loc_args) 976 | 977 | return self.request(path, params=params) 978 | --------------------------------------------------------------------------------