├── MANIFEST.in
├── yarn_api_client
    ├── __main__.py
    ├── errors.py
    ├── __init__.py
    ├── auth.py
    ├── constants.py
    ├── base.py
    ├── hadoop_conf.py
    ├── node_manager.py
    ├── history_server.py
    ├── application_master.py
    ├── main.py
    └── resource_manager.py
├── docs
    ├── node_manager.rst
    ├── base.rst
    ├── history_server.rst
    ├── resource_manager.rst
    ├── application_master.rst
    ├── index.rst
    ├── Makefile
    └── conf.py
├── itests
    ├── __init__.py
    └── integration_test_resource_manager.py
├── tests
    ├── __init__.py
    ├── test_main.py
    ├── test_constants.py
    ├── test_node_manager.py
    ├── test_base.py
    ├── test_application_master.py
    ├── test_history_server.py
    ├── test_hadoop_conf.py
    └── test_resource_manager.py
├── requirements.yml
├── .gitattributes
├── tox.ini
├── setup.cfg
├── .gitignore
├── .github
    └── workflows
    │   └── build.yml
├── LICENSE
├── Makefile
├── setup.py
└── README.md


/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md


--------------------------------------------------------------------------------
/yarn_api_client/__main__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from .main import main
3 | 
4 | if __name__ == '__main__':
5 |     main()
6 | 


--------------------------------------------------------------------------------
/docs/node_manager.rst:
--------------------------------------------------------------------------------
1 | NodeManager API's.
2 | =======================
3 | 
4 | .. automodule:: yarn_api_client.node_manager
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/base.rst:
--------------------------------------------------------------------------------
1 | Base Response class
2 | ==========================
3 | 
4 | .. autoclass:: yarn_api_client.base.Response
5 |    :members: data
6 | 


--------------------------------------------------------------------------------
/docs/history_server.rst:
--------------------------------------------------------------------------------
1 | History Server API's.
2 | ==========================
3 | 
4 | .. automodule:: yarn_api_client.history_server
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/itests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | try:
3 |     from unittest2 import TestCase
4 | except ImportError:
5 |     from unittest import TestCase
6 | 


--------------------------------------------------------------------------------
/docs/resource_manager.rst:
--------------------------------------------------------------------------------
1 | ResourceManager API's.
2 | ===========================
3 | 
4 | .. automodule:: yarn_api_client.resource_manager
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | try:
3 |     from unittest2 import TestCase # NOQA
4 | except ImportError:
5 |     from unittest import TestCase # NOQA
6 | 


--------------------------------------------------------------------------------
/docs/application_master.rst:
--------------------------------------------------------------------------------
1 | MapReduce Application Master API's.
2 | ========================================
3 | 
4 | .. automodule:: yarn_api_client.application_master
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from tests import TestCase
 3 | 
 4 | import yarn_api_client.main as m
 5 | 
 6 | 
 7 | class MainTestCase(TestCase):
 8 |     def test_get_parser(self):
 9 |         m.get_parser()
10 | 


--------------------------------------------------------------------------------
/yarn_api_client/errors.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | class APIError(Exception):
 5 |     pass
 6 | 
 7 | 
 8 | class ConfigurationError(APIError):
 9 |     pass
10 | 
11 | 
12 | class IllegalArgumentError(APIError):
13 |     pass
14 | 


--------------------------------------------------------------------------------
/yarn_api_client/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __version__ = '1.0.4.dev0'
3 | __all__ = ['ApplicationMaster', 'HistoryServer', 'NodeManager', 'ResourceManager']
4 | 
5 | from .application_master import ApplicationMaster
6 | from .history_server import HistoryServer
7 | from .node_manager import NodeManager
8 | from .resource_manager import ResourceManager
9 | 


--------------------------------------------------------------------------------
/requirements.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - defaults
 4 | dependencies:
 5 |   - pip
 6 |   - requests>=2.7,<3.0
 7 | 
 8 |   # Test Requirements
 9 |   - mock
10 |   - nose
11 |   - tox
12 |   - pip:
13 |       - requests_mock
14 | 
15 |   # Code Style
16 |   - flake8
17 | 
18 |   # Documentation Requirements
19 |   - recommonmark
20 |   - sphinx=1.8.3
21 |   - sphinx_rtd_theme
22 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Set the default behavior to have all files normalized to Unix-style
 2 | # line endings upon check-in.
 3 | * text=auto
 4 |  # Declare files that will always have CRLF line endings on checkout.
 5 | *.bat text eol=crlf
 6 |  # Denote all files that are truly binary and should not be modified.
 7 | *.dll binary
 8 | *.exp binary
 9 | *.lib binary
10 | *.pdb binary
11 | *.exe binary
12 | 
13 | 


--------------------------------------------------------------------------------
/tests/test_constants.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from tests import TestCase
 3 | 
 4 | from yarn_api_client import constants
 5 | 
 6 | 
 7 | class ConstantsTestCase(TestCase):
 8 |     def test_stats_len(self):
 9 |         self.assertEqual(8, len(constants.YarnApplicationState))
10 |         self.assertEqual(6, len(constants.ApplicationState))
11 |         self.assertEqual(4, len(constants.FinalApplicationStatus))
12 |         self.assertEqual(14, len(constants.JobStateInternal))
13 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | 
 2 | [tox]
 3 | envlist = py36, py37, py38, py39, py310
 4 | 
 5 | [gh-actions]
 6 | python =
 7 |     3.6: py36
 8 |     3.7: py37
 9 |     3.8: py38
10 |     3.9: py39
11 |     3.10: py310
12 | 
13 | [testenv]
14 | deps =
15 |     coverage
16 |     mock
17 |     py36: cryptography<=3.2.2  # requests-kerberos pulls in newer crypt that requires rust compiler on 3.6
18 |     requests
19 |     pywinrm[kerberos]
20 |     requests-kerberos
21 |     requests_mock
22 | commands = coverage run --source=yarn_api_client setup.py test
23 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bdist_wheel]
 2 | universal=0
 3 | 
 4 | [metadata]
 5 | description-file=README.rst
 6 | license_file = LICENSE
 7 | 
 8 | [flake8]
 9 | # References:
10 | # https://flake8.readthedocs.io/en/latest/user/configuration.html
11 | # https://flake8.readthedocs.io/en/latest/user/error-codes.html
12 | exclude = __init__.py
13 | ignore =
14 |     # Import formatting
15 |     E4,
16 |     # Comparing types instead of isinstance
17 |     E721,
18 |     # Assigning lambda expression
19 |     E731,
20 |     # Ambiguous variable names
21 |     E741,
22 |     # Allow breaks after binary operators
23 |     W504
24 | max-line-length = 120
25 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. yarn-api-client documentation master file, created by
 2 |    sphinx-quickstart on Thu Jul 31 22:07:17 2014.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to yarn-api-client's documentation!
 7 | ===========================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |     :maxdepth: 2
13 | 
14 |     base
15 |     resource_manager
16 |     node_manager
17 |     application_master
18 |     history_server
19 | 
20 | 
21 | Indices and tables
22 | ==================
23 | 
24 | * :ref:`genindex`
25 | * :ref:`modindex`
26 | * :ref:`search`
27 | 
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # -*- mode: gitignore; -*-
 2 | *~
 3 | \#*\#
 4 | 
 5 | # Mac
 6 | .DS_Store
 7 | 
 8 | # Eclipse
 9 | .classpath
10 | .project
11 | .settings/
12 | target/
13 | 
14 | # Intellij
15 | .idea/
16 | .idea_modules/
17 | *.iml
18 | *.iws
19 | *.class
20 | *.log
21 | 
22 | # Others
23 | .checkstyle
24 | .fbExcludeFilterFile
25 | 
26 | # Byte-compiled / optimized / DLL files
27 | __pycache__/
28 | *.py[cod]
29 | 
30 | # C extensions
31 | *.so
32 | 
33 | # Distribution / packaging
34 | .Python
35 | env/
36 | bin/
37 | build/
38 | develop-eggs/
39 | dist/
40 | eggs/
41 | lib/
42 | lib64/
43 | parts/
44 | sdist/
45 | var/
46 | *.egg-info/
47 | .installed.cfg
48 | *.egg
49 | 
50 | # Installer logs
51 | pip-log.txt
52 | pip-delete-this-directory.txt
53 | 
54 | # Unit test / coverage reports
55 | htmlcov/
56 | .tox/
57 | .coverage
58 | .cache
59 | nosetests.xml
60 | coverage.xml
61 | 
62 | # Translations
63 | *.mo
64 | 
65 | # Mr Developer
66 | .mr.developer.cfg
67 | .project
68 | .pydevproject
69 | 
70 | # Rope
71 | .ropeproject
72 | 
73 | # Django stuff:
74 | *.log
75 | *.pot
76 | 
77 | # Sphinx documentation
78 | docs/_build/
79 | 


--------------------------------------------------------------------------------
/yarn_api_client/auth.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | class SimpleAuth(requests.auth.AuthBase):
 4 |     def __init__(self, username="yarn"):
 5 |         self.username = username
 6 |         self.auth_token = None
 7 |         self.auth_done = False
 8 | 
 9 |     def __call__(self, request):
10 |         if not self.auth_done:
11 |             _session = requests.Session()
12 |             r = _session.get(request.url, params={"user.name": self.username}, allow_redirects=False)
13 |             r.raise_for_status()
14 | 
15 |             if 'This is standby RM.' not in r.text:
16 |                 self.auth_token = _session.cookies.get_dict()['hadoop.auth']
17 |                 self.auth_done = True
18 | 
19 |         # Borrowed from https://github.com/psf/requests/issues/2532#issuecomment-90126896
20 |         if 'Cookie' in request.headers:
21 |             old_cookies = request.headers['Cookie']
22 |             all_cookies = '; '.join([old_cookies, "{0}={1}".format("hadoop.auth", self.auth_token)])
23 |             request.headers['Cookie'] = all_cookies
24 |         else:
25 |             request.prepare_cookies({"hadoop.auth": self.auth_token})
26 |         return request
27 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Builds
 2 | on:
 3 |   push:
 4 |     branches: '*'
 5 |   pull_request:
 6 |     branches: '*'
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ${{ matrix.os }}
11 |     env:
12 |       TOX_ENV: py${{ matrix.python-version}}
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         os: [ubuntu-latest, macos-latest]
17 |         python-version: [ '3.6', '3.7', '3.8', '3.9' ]
18 |     steps:
19 |     - name: Checkout
20 |       uses: actions/checkout@v2
21 |       with:
22 |         clean: true
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v1
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |         architecture: 'x64'
28 |     - name: Install Dependencies (Linux)
29 |       run: sudo apt-get install libkrb5-dev
30 |       if: matrix.os == 'ubuntu-latest'
31 |     - name: Install dependencies
32 |       run: |
33 |         pip install --upgrade setuptools pip tox tox-gh-actions coveralls
34 |         pip freeze
35 |     - name: Run the tests
36 |       run: tox
37 |     - name: Upload coverage data to coveralls.io
38 |       env:
39 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
40 |         COVERALLS_PARALLEL: true
41 |       run: coveralls --service=github
42 |     - name: Code coverage
43 |       env:
44 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
45 |       run: |
46 |         coveralls --finish
47 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Eduard Iskandarov
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of the {organization} nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean clean-docs clean-test clean-pyc clean-build dist docs help
 2 | .DEFAULT_GOAL := help
 3 | 
 4 | SA:=source activate
 5 | ENV:=hadoop-yarn-api-python-client
 6 | 
 7 | help:
 8 | 	@grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 9 | 
10 | 
11 | ## Setup conda environments
12 | env: ## Make a dev environment
13 | 	-conda env create --file requirements.yml --name $(ENV)
14 | 
15 | activate: ## Activate the virtualenv (default: hadoop-yarn-api-python-client)
16 | 	@echo "$(SA) $(ENV)"
17 | 
18 | nuke: ## Make clean + remove conda env
19 | 	-conda env remove -n $(ENV) -y
20 | 
21 | ## Clean different build artifacts from multiple build phases
22 | 
23 | clean: clean-build clean-pyc clean-test clean-docs ## remove all build, test, coverage and Python artifacts
24 | 
25 | clean-build:
26 | 	rm -fr build/
27 | 	rm -fr dist/
28 | 	rm -fr .eggs/
29 | 	find . -name '*.egg-info' -exec rm -fr {} +
30 | 	find . -name '*.egg' -exec rm -f {} +
31 | 
32 | clean-pyc:
33 | 	find . -name '*.pyc' -exec rm -f {} +
34 | 	find . -name '*.pyo' -exec rm -f {} +
35 | 	find . -name '*~' -exec rm -f {} +
36 | 	find . -name '__pycache__' -exec rm -fr {} +
37 | 
38 | clean-test:
39 | 	rm -fr .tox/
40 | 	rm -f .coverage
41 | 	rm -fr htmlcov/
42 | 	rm -fr .pytest_cache
43 | 
44 | clean-docs:
45 | 	$(MAKE) -C docs clean
46 | 
47 | lint: ## check style with flake8
48 | 	$(SA) $(ENV) && flake8 yarn-api-client itests tests
49 | 
50 | test: ## run tests quickly with the default Python
51 | 	$(SA) $(ENV) && nosetests -v tests
52 | 
53 | docs: clean-docs ## generate Sphinx HTML documentation, including API docs
54 | 	$(SA) $(ENV) && $(MAKE) -C docs html
55 | 
56 | release: dist ## package and upload a release
57 | 	twine upload dist/*
58 | 
59 | dist: clean ## builds source and wheel package
60 | 	$(SA) $(ENV) && python setup.py bdist_wheel
61 | 	$(SA) $(ENV) && python setup.py sdist
62 | 	ls -l dist
63 | 
64 | install: clean ## install the package to the active Python's site-packages
65 | 	$(SA) $(ENV) && python setup.py install
66 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import codecs
 3 | import os
 4 | import re
 5 | from setuptools import setup, find_packages
 6 | 
 7 | 
 8 | def read(*parts):
 9 |     filename = os.path.join(os.path.dirname(__file__), *parts)
10 |     with codecs.open(filename, encoding='utf-8') as fp:
11 |         return fp.read()
12 | 
13 | 
14 | def find_version(*file_paths):
15 |     version_file = read(*file_paths)
16 |     version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
17 |                               version_file, re.M)
18 |     if version_match:
19 |         return version_match.group(1)
20 |     raise RuntimeError("Unable to find version string.")
21 | 
22 | setup(
23 |     name = 'yarn-api-client',
24 |     version = find_version('yarn_api_client', '__init__.py'),
25 |     description='Python client for Hadoop® YARN API',
26 |     long_description=read('README.md'),
27 |     long_description_content_type='text/markdown',
28 |     packages = find_packages(exclude=['tests','itests']),
29 | 
30 |     install_requires = [
31 |         'requests>=2.7,<3.0',
32 |     ],
33 | 
34 |     entry_points = {
35 |         'console_scripts': [
36 |             'yarn_client = yarn_api_client.main:main',
37 |         ],
38 |     },
39 | 
40 |     tests_require = ['mock', 'flake8'],
41 |     test_suite = 'tests',
42 | 
43 |     author = 'Iskandarov Eduard',
44 |     author_email = 'eduard.iskandarov@ya.ru',
45 |     maintainer = 'Dmitry Romanenko',
46 |     maintainer_email = 'dmitry@romanenko.in',
47 |     license = 'BSD',
48 |     url = 'https://github.com/CODAIT/hadoop-yarn-api-python-client',
49 |     classifiers = [
50 |         'Intended Audience :: Developers',
51 |         'License :: OSI Approved :: BSD License',
52 |         'Operating System :: OS Independent',
53 |         'Programming Language :: Python :: 3.6',
54 |         'Programming Language :: Python :: 3.7',
55 |         'Programming Language :: Python :: 3.8',
56 |         'Programming Language :: Python :: 3.9',
57 |         'Programming Language :: Python :: 3.10',
58 |         'Topic :: System :: Distributed Computing',
59 |     ],
60 | )
61 | 


--------------------------------------------------------------------------------
/yarn_api_client/constants.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | ACCEPTED = 'ACCEPTED'
 3 | FAILED = 'FAILED'
 4 | FINISHED = 'FINISHED'
 5 | KILLED = 'KILLED'
 6 | NEW = 'NEW'
 7 | NEW_SAVING = 'NEW_SAVING'
 8 | RUNNING = 'RUNNING'
 9 | SUBMITTED = 'SUBMITTED'
10 | SUCCEEDED = 'SUCCEEDED'
11 | UNDEFINED = 'UNDEFINED'
12 | INITING = 'INITING'
13 | INITED = 'INITED'
14 | FINISHING_CONTAINERS_WAIT = 'FINISHING_CONTAINERS_WAIT'
15 | APPLICATION_RESOURCES_CLEANINGUP = 'APPLICATION_RESOURCES_CLEANINGUP'
16 | SETUP = 'SETUP'
17 | COMMITTING = 'COMMITTING'
18 | FAIL_WAIT = 'FAIL_WAIT'
19 | FAIL_ABORT = 'FAIL_ABORT'
20 | KILL_WAIT = 'KILL_WAIT'
21 | KILL_ABORT = 'KILL_ABORT'
22 | ERROR = 'ERROR'
23 | REBOOT = 'REBOOT'
24 | OUTPUT_THREAD_DUMP = 'OUTPUT_THREAD_DUMP'
25 | GRACEFUL_SHUTDOWN = 'GRACEFUL_SHUTDOWN'
26 | FORCEFUL_SHUTDOWN = 'FORCEFUL_SHUTDOWN'
27 | 
28 | YarnApplicationState = (
29 |     (ACCEPTED, 'Application has been accepted by the scheduler.'),
30 |     (FAILED, 'Application which failed.'),
31 |     (FINISHED, 'Application which finished successfully.'),
32 |     (KILLED, 'Application which was terminated by a user or admin.'),
33 |     (NEW, 'Application which was just created.'),
34 |     (NEW_SAVING, 'Application which is being saved.'),
35 |     (RUNNING, 'Application which is currently running.'),
36 |     (SUBMITTED, 'Application which has been submitted.'),
37 | )
38 | 
39 | 
40 | ApplicationState = (
41 |     (NEW, NEW),
42 |     (INITING, INITING),
43 |     (RUNNING, RUNNING),
44 |     (FINISHING_CONTAINERS_WAIT, FINISHING_CONTAINERS_WAIT),
45 |     (APPLICATION_RESOURCES_CLEANINGUP, APPLICATION_RESOURCES_CLEANINGUP),
46 |     (FINISHED, FINISHED),
47 | )
48 | 
49 | 
50 | FinalApplicationStatus = (
51 |     (FAILED, 'Application which failed.'),
52 |     (KILLED, 'Application which was terminated by a user or admin.'),
53 |     (SUCCEEDED, 'Application which finished successfully.'),
54 |     (UNDEFINED, 'Undefined state when either the application has not yet finished.')
55 | )
56 | 
57 | 
58 | JobStateInternal = (
59 |     (NEW, NEW),
60 |     (SETUP, SETUP),
61 |     (INITED, INITED),
62 |     (RUNNING, RUNNING),
63 |     (COMMITTING, COMMITTING),
64 |     (SUCCEEDED, SUCCEEDED),
65 |     (FAIL_WAIT, FAIL_WAIT),
66 |     (FAIL_ABORT, FAIL_ABORT),
67 |     (FAILED, FAILED),
68 |     (KILL_WAIT, KILL_WAIT),
69 |     (KILL_ABORT, KILL_ABORT),
70 |     (KILLED, KILLED),
71 |     (ERROR, ERROR),
72 |     (REBOOT, REBOOT),
73 | )
74 | 
75 | ClusterContainerSignal = (
76 |     (OUTPUT_THREAD_DUMP, OUTPUT_THREAD_DUMP),
77 |     (GRACEFUL_SHUTDOWN, GRACEFUL_SHUTDOWN),
78 |     (FORCEFUL_SHUTDOWN, FORCEFUL_SHUTDOWN)
79 | )
80 | 


--------------------------------------------------------------------------------
/tests/test_node_manager.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from mock import patch
 3 | from tests import TestCase
 4 | 
 5 | from yarn_api_client.node_manager import NodeManager
 6 | from yarn_api_client.errors import IllegalArgumentError
 7 | 
 8 | 
 9 | @patch('yarn_api_client.node_manager.NodeManager.request')
10 | class NodeManagerTestCase(TestCase):
11 |     def setUp(self):
12 |         self.nm = NodeManager('localhost')
13 | 
14 |     def test_node_information(self, request_mock):
15 |         self.nm.node_information()
16 |         request_mock.assert_called_with('/ws/v1/node/info')
17 | 
18 |     def test_node_applications(self, request_mock):
19 |         self.nm.node_applications('RUNNING', 'root')
20 |         request_mock.assert_called_with('/ws/v1/node/apps',
21 |                                         params={"state": 'RUNNING', "user": 'root'})
22 | 
23 |         self.nm.node_applications()
24 |         request_mock.assert_called_with('/ws/v1/node/apps', params={})
25 | 
26 |         with self.assertRaises(IllegalArgumentError):
27 |             self.nm.node_applications('ololo', 'root')
28 | 
29 |     def test_node_application(self, request_mock):
30 |         self.nm.node_application('app_1')
31 |         request_mock.assert_called_with('/ws/v1/node/apps/app_1')
32 | 
33 |     def test_node_containers(self, request_mock):
34 |         self.nm.node_containers()
35 |         request_mock.assert_called_with('/ws/v1/node/containers')
36 | 
37 |     def test_node_container(self, request_mock):
38 |         self.nm.node_container('container_1')
39 |         request_mock.assert_called_with('/ws/v1/node/containers/container_1')
40 | 
41 |     def test_auxiliary_services(self, request_mock):
42 |         self.nm.auxiliary_services()
43 |         request_mock.assert_called_with('/ws/v1/node/auxiliaryservices')
44 | 
45 |     def test_auxiliary_services_update(self, request_mock):
46 |         self.nm.auxiliary_services_update({
47 |           "services": [
48 |             {
49 |               "name": "mapreduce_shuffle",
50 |               "version": "2",
51 |               "configuration": {
52 |                 "properties": {
53 |                   "class.name": "org.apache.hadoop.mapred.ShuffleHandler",
54 |                   "mapreduce.shuffle.transfer.buffer.size": "102400",
55 |                   "mapreduce.shuffle.port": "13563"
56 |                 }
57 |               }
58 |             }
59 |           ]
60 |         })
61 |         request_mock.assert_called_with('/ws/v1/node/auxiliaryservices', 'PUT', json={
62 |           "services": [
63 |             {
64 |               "name": "mapreduce_shuffle",
65 |               "version": "2",
66 |               "configuration": {
67 |                 "properties": {
68 |                   "class.name": "org.apache.hadoop.mapred.ShuffleHandler",
69 |                   "mapreduce.shuffle.transfer.buffer.size": "102400",
70 |                   "mapreduce.shuffle.port": "13563"
71 |                 }
72 |               }
73 |             }
74 |           ]
75 |         })
76 | 


--------------------------------------------------------------------------------
/itests/integration_test_resource_manager.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | 
 5 | from pprint import pprint
 6 | from unittest import TestCase
 7 | from urllib.parse import urlparse
 8 | from yarn_api_client.resource_manager import ResourceManager
 9 | 
10 | 
11 | class ResourceManagerTestCase(TestCase):
12 |     """
13 |     Integration test that, given a provided YARN ENDPOINT,
14 |     execute some real scenario test against that server.
15 | 
16 |     Note that, if no YARN ENDPOINT is provided, the tests
17 |     are ignored.
18 |     """
19 |     @classmethod
20 |     def setUpClass(self):
21 |         self.configured = False
22 |         if os.getenv('YARN_ENDPOINT'):
23 |             yarn_endpoint = os.getenv('YARN_ENDPOINT')
24 |             yarn_endpoint_uri = urlparse(yarn_endpoint)
25 | 
26 |             if yarn_endpoint_uri.hostname and yarn_endpoint_uri.port:
27 |                 self.configured = True
28 |                 self.resource_manager = ResourceManager([yarn_endpoint_uri.hostname + ":" +
29 |                                                          str(yarn_endpoint_uri.port)])
30 | 
31 |     def test_cluster_information(self):
32 |         if self.configured:
33 |             info = self.resource_manager.cluster_information()
34 |             pprint(info.data)
35 |             self.assertEqual(info.data['clusterInfo']['state'], 'STARTED')
36 | 
37 |     def test_cluster_metrics(self):
38 |         if self.configured:
39 |             metrics = self.resource_manager.cluster_metrics()
40 |             pprint(metrics.data)
41 |             self.assertGreater(metrics.data['clusterMetrics']['activeNodes'], 0)
42 |             self.assertIsNotNone(metrics.data['clusterMetrics']['totalNodes'])
43 | 
44 |     def test_cluster_scheduler(self):
45 |         if self.configured:
46 |             scheduler = self.resource_manager.cluster_scheduler()
47 |             pprint(scheduler.data)
48 |             self.assertIsNotNone(scheduler.data['scheduler']['schedulerInfo'])
49 | 
50 |     def test_cluster_applications(self):
51 |         if self.configured:
52 |             apps = self.resource_manager.cluster_applications()
53 |             pprint(apps.data)
54 |             self.assertIsNotNone(apps.data['apps'])
55 | 
56 |     def test_cluster_application_state(self):
57 |         if self.configured:
58 |             apps = self.resource_manager.cluster_applications()
59 |             appid = apps.data['apps']['app'][0]['id']
60 |             print(appid)
61 |             response = self.resource_manager.cluster_application_state(appid)
62 |             pprint(response.data)
63 |             pprint(response.data['state'])
64 |             self.assertIsNotNone(apps.data['apps'])
65 | 
66 |     def test_cluster_application_statistics(self):
67 |         if self.configured:
68 |             appstats = self.resource_manager.cluster_application_statistics()
69 |             pprint(appstats.data)
70 |             self.assertIsNotNone(appstats.data['appStatInfo'])
71 | 
72 |     def test_cluster_nodes(self):
73 |         if self.configured:
74 |             nodes = self.resource_manager.cluster_nodes()
75 |             pprint(nodes.data)
76 |             self.assertIsNotNone(nodes.data['nodes'])
77 | 
78 |             running_nodes = self.resource_manager.cluster_nodes(state='RUNNING', healthy='true')
79 |             pprint(running_nodes.data)
80 |             self.assertIsNotNone(nodes.data['nodes'])
81 | 


--------------------------------------------------------------------------------
/tests/test_base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import json
 3 | import requests_mock
 4 | 
 5 | from tests import TestCase
 6 | from yarn_api_client import base
 7 | from yarn_api_client.errors import APIError, ConfigurationError
 8 | 
 9 | 
10 | class BaseYarnAPITestCase(TestCase):
11 |     @staticmethod
12 |     def success_response():
13 |         return {
14 |             'status': 'success'
15 |         }
16 | 
17 |     def test_valid_request(self):
18 |         with requests_mock.mock() as requests_get_mock:
19 |             requests_get_mock.get('/ololo', text=json.dumps(BaseYarnAPITestCase.success_response()))
20 | 
21 |             client = self.get_client()
22 |             response = client.request('/ololo', params={"foo": 'bar'})
23 | 
24 |             assert requests_get_mock.called
25 |             self.assertIn(response.data['status'], 'success')
26 | 
27 |     def test_valid_request_with_parameters(self):
28 |         with requests_mock.mock() as requests_get_mock:
29 |             requests_get_mock.get('/ololo?foo=bar', text=json.dumps(BaseYarnAPITestCase.success_response()))
30 | 
31 |             client = self.get_client()
32 |             response = client.request('/ololo', params={"foo": 'bar'})
33 | 
34 |             assert requests_get_mock.called
35 |             self.assertIn(response.data['status'], 'success')
36 | 
37 |     def test_bad_request(self):
38 |         with requests_mock.mock() as requests_get_mock:
39 |             requests_get_mock.get('/ololo', status_code=404)
40 | 
41 |             client = self.get_client()
42 |             with self.assertRaises(APIError):
43 |                 client.request('/ololo')
44 | 
45 |     def test_http_configuration(self):
46 |         with requests_mock.mock() as requests_get_mock:
47 |             requests_get_mock.get('/ololo', text=json.dumps(BaseYarnAPITestCase.success_response()))
48 | 
49 |             client = self.get_client()
50 |             client.service_uri = None
51 | 
52 |             with self.assertRaises(ConfigurationError):
53 |                 client.request('/ololo')
54 | 
55 |     def test_uri_parsing(self):
56 |         result_uri = base.Uri('localhost')
57 |         self.assertEqual(result_uri.scheme, 'http')
58 |         self.assertEqual(result_uri.hostname, 'localhost')
59 |         self.assertEqual(result_uri.port, None)
60 |         self.assertEqual(result_uri.is_https, False)
61 | 
62 |         result_uri = base.Uri('test-domain.com:1234')
63 |         self.assertEqual(result_uri.scheme, 'http')
64 |         self.assertEqual(result_uri.hostname, 'test-domain.com')
65 |         self.assertEqual(result_uri.port, 1234)
66 |         self.assertEqual(result_uri.is_https, False)
67 | 
68 |         result_uri = base.Uri('http://123.45.67.89:1234')
69 |         self.assertEqual(result_uri.scheme, 'http')
70 |         self.assertEqual(result_uri.hostname, '123.45.67.89')
71 |         self.assertEqual(result_uri.port, 1234)
72 |         self.assertEqual(result_uri.is_https, False)
73 | 
74 |         result_uri = base.Uri('https://test-domain.com:1234')
75 |         self.assertEqual(result_uri.scheme, 'https')
76 |         self.assertEqual(result_uri.hostname, 'test-domain.com')
77 |         self.assertEqual(result_uri.port, 1234)
78 |         self.assertEqual(result_uri.is_https, True)
79 | 
80 |     def get_client(self):
81 |         client = base.BaseYarnAPI()
82 |         client.service_uri = base.Uri('example.com:80')
83 |         client.timeout = 0
84 |         client.auth = None
85 |         client.verify = True
86 |         return client
87 | 


--------------------------------------------------------------------------------
/tests/test_application_master.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from mock import patch
 3 | from tests import TestCase
 4 | 
 5 | from yarn_api_client.application_master import ApplicationMaster
 6 | 
 7 | 
 8 | @patch('yarn_api_client.application_master.ApplicationMaster.request')
 9 | class AppMasterTestCase(TestCase):
10 |     def setUp(self):
11 |         self.app = ApplicationMaster('localhost')
12 | 
13 |     @patch('yarn_api_client.application_master.get_webproxy_endpoint')
14 |     def test__init__(self, get_config_mock, request_mock):
15 |         get_config_mock.return_value = None
16 |         ApplicationMaster()
17 |         get_config_mock.assert_called_with(30, None, True, None)
18 | 
19 |     def test_application_information(self, request_mock):
20 |         self.app.application_information('app_100500')
21 |         request_mock.assert_called_with('/proxy/app_100500/ws/v1/mapreduce/info')
22 | 
23 |     def test_jobs(self, request_mock):
24 |         self.app.jobs('app_100500')
25 |         request_mock.assert_called_with('/proxy/app_100500/ws/v1/mapreduce/jobs')
26 | 
27 |     def test_job(self, request_mock):
28 |         self.app.job('app_100500', 'job_100500')
29 |         request_mock.assert_called_with('/proxy/app_100500/ws/v1/mapreduce/jobs/job_100500')
30 | 
31 |     def test_job_attempts(self, request_mock):
32 |         self.app.job_attempts('app_1', 'job_2')
33 | 
34 |     def test_job_counters(self, request_mock):
35 |         self.app.job_counters('app_1', 'job_2')
36 |         request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/counters')
37 | 
38 |     def test_job_conf(self, request_mock):
39 |         self.app.job_conf('app_1', 'job_2')
40 |         request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/conf')
41 | 
42 |     def test_job_tasks(self, request_mock):
43 |         self.app.job_tasks('app_1', 'job_2')
44 |         request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks')
45 | 
46 |     def test_job_task(self, request_mock):
47 |         self.app.job_task('app_1', 'job_2', 'task_3')
48 |         request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3')
49 | 
50 |     def test_task_counters(self, request_mock):
51 |         self.app.task_counters('app_1', 'job_2', 'task_3')
52 |         request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/counters')
53 | 
54 |     def test_task_attempts(self, request_mock):
55 |         self.app.task_attempts('app_1', 'job_2', 'task_3')
56 |         request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/attempts')
57 | 
58 |     def test_task_attempt(self, request_mock):
59 |         self.app.task_attempt('app_1', 'job_2', 'task_3', 'attempt_4')
60 |         request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4')
61 | 
62 |     def test_task_attempt_state(self, request_mock):
63 |         self.app.task_attempt_state('app_1', 'job_2', 'task_3', 'attempt_4')
64 |         request_mock.assert_called_with('/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4/state')
65 | 
66 |     def test_task_attempt_state_kill(self, request_mock):
67 |         self.app.task_attempt_state_kill('app_1', 'job_2', 'task_3', 'attempt_4')
68 |         request_mock.assert_called_with(
69 |             '/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4/state',
70 |             'PUT', json={'state': 'KILLED'}
71 |         )
72 | 
73 |     def test_task_attempt_counters(self, request_mock):
74 |         self.app.task_attempt_counters('app_1', 'job_2', 'task_3', 'attempt_4')
75 |         request_mock.assert_called_with(
76 |             '/proxy/app_1/ws/v1/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4/counters'
77 |         )
78 | 


--------------------------------------------------------------------------------
/yarn_api_client/base.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import unicode_literals
  3 | 
  4 | import logging
  5 | import os
  6 | import requests
  7 | 
  8 | from datetime import datetime
  9 | from urllib.parse import urlparse, urlunparse
 10 | 
 11 | from .errors import APIError, ConfigurationError
 12 | 
 13 | 
 14 | def get_logger(logger_name):
 15 |     logger = logging.getLogger(logger_name)
 16 |     return logger
 17 | 
 18 | 
 19 | log = get_logger(__name__)
 20 | 
 21 | 
 22 | class Response(object):
 23 |     """
 24 |     Basic container for response dictionary
 25 | 
 26 |     :param requests.Response response: Response for call via requests lib
 27 |     """
 28 |     def __init__(self, response):
 29 |         #: Dictionary with response data.  Handle cases where content is empty
 30 |         # to prevent JSON decode issues
 31 |         if response.content:
 32 |             self.data = response.json()
 33 |         else:
 34 |             self.data = {}
 35 | 
 36 | 
 37 | class Uri(object):
 38 |     def __init__(self, service_endpoint):
 39 |         if not (service_endpoint.startswith("http://") or service_endpoint.startswith("https://")):
 40 |             service_endpoint = "http://" + service_endpoint
 41 | 
 42 |         service_uri = urlparse(service_endpoint)
 43 |         self.scheme = service_uri.scheme or 'http'
 44 |         self.hostname = service_uri.hostname or service_uri.path
 45 |         self.port = service_uri.port
 46 |         self.is_https = service_uri.scheme == 'https' or False
 47 | 
 48 |     def to_url(self, api_path=None):
 49 |         path = api_path or ''
 50 |         if self.port:
 51 |             result_url = urlunparse((self.scheme, self.hostname + ":" + str(self.port), path, None, None, None))
 52 |         else:
 53 |             result_url = urlunparse((self.scheme, self.hostname, path, None, None, None))
 54 | 
 55 |         return result_url
 56 | 
 57 | 
 58 | class BaseYarnAPI(object):
 59 |     response_class = Response
 60 | 
 61 |     def __init__(self, service_endpoint=None, timeout=None, auth=None, verify=True, proxies=None):
 62 |         self.timeout = timeout
 63 | 
 64 |         if service_endpoint:
 65 |             self.service_uri = Uri(service_endpoint)
 66 |         else:
 67 |             self.service_uri = None
 68 | 
 69 |         self.session = requests.Session()
 70 |         self.session.auth = auth
 71 |         self.session.verify = verify
 72 |         self.session.proxies = proxies
 73 | 
 74 |     def _validate_configuration(self):
 75 |         if not self.service_uri:
 76 |             raise ConfigurationError('API endpoint is not set')
 77 | 
 78 |     def request(self, api_path, method='GET', **kwargs):
 79 |         self._validate_configuration()
 80 |         api_endpoint = self.service_uri.to_url(api_path)
 81 | 
 82 |         if method == 'GET':
 83 |             headers = {}
 84 |         else:
 85 |             headers = {"Content-Type": "application/json"}
 86 | 
 87 |         if 'headers' in kwargs and kwargs['headers']:
 88 |             headers.update(kwargs['headers'])
 89 | 
 90 |         begin = datetime.now()
 91 |         response = self.session.request(method=method, url=api_endpoint, headers=headers, timeout=self.timeout, **kwargs)
 92 |         end = datetime.now()
 93 |         log.debug(
 94 |             "'{method}' request against endpoint '{endpoint}' took {duration} ms".format(
 95 |                 method=method, 
 96 |                 endpoint=api_endpoint,
 97 |                 duration=round((end-begin).total_seconds()*1000,3)
 98 |             )
 99 |         )
100 | 
101 |         if response.status_code in (200, 202):
102 |             return self.response_class(response)
103 |         else:
104 |             msg = "Response finished with status: {status}. Details: {msg}".format(
105 |                 status=response.status_code, 
106 |                 msg=response.text
107 |             )
108 |             raise APIError(msg)
109 | 
110 |     def construct_parameters(self, arguments):
111 |         params = dict((key, value) for key, value in arguments if value is not None)
112 |         return params
113 | 


--------------------------------------------------------------------------------
/tests/test_history_server.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from mock import patch
 3 | from tests import TestCase
 4 | 
 5 | from yarn_api_client.history_server import HistoryServer
 6 | from yarn_api_client.errors import IllegalArgumentError
 7 | 
 8 | 
 9 | @patch('yarn_api_client.history_server.HistoryServer.request')
10 | class HistoryServerTestCase(TestCase):
11 |     def setUp(self):
12 |         self.hs = HistoryServer('localhost')
13 | 
14 |     @patch('yarn_api_client.history_server.get_jobhistory_endpoint')
15 |     def test__init__(self, get_config_mock, request_mock):
16 |         get_config_mock.return_value = None
17 |         HistoryServer()
18 |         get_config_mock.assert_called_with()
19 | 
20 |     def test_application_information(self, request_mock):
21 |         self.hs.application_information()
22 |         request_mock.assert_called_with('/ws/v1/history/info')
23 | 
24 |     def test_jobs(self, request_mock):
25 |         self.hs.jobs()
26 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs', params={})
27 | 
28 |         self.hs.jobs(state='NEW', user='root', queue='high', limit=100,
29 |                      started_time_begin=1, started_time_end=2,
30 |                      finished_time_begin=3, finished_time_end=4)
31 | 
32 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs',
33 |                                         params={"queue": 'high',
34 |                                                 "state": 'NEW',
35 |                                                 "user": 'root',
36 |                                                 "limit": 100,
37 |                                                 "startedTimeBegin": 1,
38 |                                                 "startedTimeEnd": 2,
39 |                                                 "finishedTimeBegin": 3,
40 |                                                 "finishedTimeEnd": 4})
41 | 
42 |         with self.assertRaises(IllegalArgumentError):
43 |             self.hs.jobs(state='ololo')
44 | 
45 |     def test_job(self, request_mock):
46 |         self.hs.job('job_100500')
47 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_100500')
48 | 
49 |     def test_job_attempts(self, request_mock):
50 |         self.hs.job_attempts('job_1')
51 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_1/jobattempts')
52 | 
53 |     def test_job_counters(self, request_mock):
54 |         self.hs.job_counters('job_2')
55 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/counters')
56 | 
57 |     def test_job_conf(self, request_mock):
58 |         self.hs.job_conf('job_2')
59 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/conf')
60 | 
61 |     def test_job_tasks(self, request_mock):
62 |         self.hs.job_tasks('job_2')
63 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks', params={})
64 |         self.hs.job_tasks('job_2', job_type='m')
65 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks', params={"type": 'm'})
66 | 
67 |         with self.assertRaises(IllegalArgumentError):
68 |             self.hs.job_tasks('job_2', job_type='ololo')
69 | 
70 |     def test_job_task(self, request_mock):
71 |         self.hs.job_task('job_2', 'task_3')
72 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks/task_3')
73 | 
74 |     def test_task_counters(self, request_mock):
75 |         self.hs.task_counters('job_2', 'task_3')
76 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks/task_3/counters')
77 | 
78 |     def test_task_attempts(self, request_mock):
79 |         self.hs.task_attempts('job_2', 'task_3')
80 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks/task_3/attempts')
81 | 
82 |     def test_task_attempt(self, request_mock):
83 |         self.hs.task_attempt('job_2', 'task_3', 'attempt_4')
84 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4')
85 | 
86 |     def test_task_attempt_counters(self, request_mock):
87 |         self.hs.task_attempt_counters('job_2', 'task_3', 'attempt_4')
88 |         request_mock.assert_called_with('/ws/v1/history/mapreduce/jobs/job_2/tasks/task_3/attempts/attempt_4/counters')
89 | 


--------------------------------------------------------------------------------
/yarn_api_client/hadoop_conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import xml.etree.ElementTree as ET
  4 | import requests
  5 | 
  6 | from .base import get_logger
  7 | 
  8 | log = get_logger(__name__)
  9 | 
 10 | CONF_DIR = os.getenv('YARN_CONF_DIR', os.getenv('HADOOP_CONF_DIR', '/etc/hadoop/conf'))
 11 | 
 12 | 
 13 | def _get_rm_ids(hadoop_conf_path):
 14 |     rm_ids = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), 'yarn.resourcemanager.ha.rm-ids')
 15 |     if rm_ids is not None:
 16 |         rm_ids = rm_ids.split(',')
 17 |     return rm_ids
 18 | 
 19 | 
 20 | def _get_maximum_container_memory(hadoop_conf_path):
 21 |     container_memory = int(parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'),
 22 |                                  'yarn.nodemanager.resource.memory-mb'))
 23 |     return container_memory
 24 | 
 25 | 
 26 | def _is_https_only():
 27 |     # determine if HTTPS_ONLY is the configured policy, else use http
 28 |     hadoop_conf_path = CONF_DIR
 29 |     http_policy = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), 'yarn.http.policy')
 30 |     if http_policy == 'HTTPS_ONLY':
 31 |         return True
 32 |     return False
 33 | 
 34 | 
 35 | def _get_resource_manager(hadoop_conf_path, rm_id=None):
 36 |     # compose property name based on policy (and rm_id)
 37 |     is_https_only = _is_https_only()
 38 | 
 39 |     if is_https_only:
 40 |         prop_name = 'yarn.resourcemanager.webapp.https.address'
 41 |     else:
 42 |         prop_name = 'yarn.resourcemanager.webapp.address'
 43 | 
 44 |     # Adjust prop_name if rm_id is set
 45 |     if rm_id:
 46 |         prop_name = "{name}.{rm_id}".format(name=prop_name, rm_id=rm_id)
 47 | 
 48 |     rm_address = parse(os.path.join(hadoop_conf_path, 'yarn-site.xml'), prop_name)
 49 | 
 50 |     return ('https://' if is_https_only else 'http://') + rm_address if rm_address else None
 51 | 
 52 | 
 53 | def check_is_active_rm(url, timeout=30, auth=None, verify=True, proxies=None):
 54 |     try:
 55 |         response = requests.get(url + "/cluster", timeout=timeout, auth=auth, verify=verify, proxies=proxies)
 56 |     except requests.RequestException as e:
 57 |         log.warning("Exception encountered accessing RM '{url}': '{err}', continuing...".format(url=url, err=e))
 58 |         return False
 59 | 
 60 |     if response.status_code != 200:
 61 |         log.warning("Failed to access RM '{url}' - HTTP Code '{status}', continuing...".format(url=url, status=response.status_code))
 62 |         return False
 63 |     else:
 64 |         return True
 65 | 
 66 | 
 67 | def get_resource_manager_endpoint(timeout=30, auth=None, verify=True, proxies=None):
 68 |     log.info('Getting resource manager endpoint from config: {config_path}'.format(config_path=os.path.join(CONF_DIR, 'yarn-site.xml')))
 69 |     hadoop_conf_path = CONF_DIR
 70 |     rm_ids = _get_rm_ids(hadoop_conf_path)
 71 |     if rm_ids:
 72 |         for rm_id in rm_ids:
 73 |             ret = _get_resource_manager(hadoop_conf_path, rm_id)
 74 |             if ret:
 75 |                 if check_is_active_rm(ret, timeout, auth, verify, proxies):
 76 |                     return ret
 77 |         return None
 78 |     else:
 79 |         return _get_resource_manager(hadoop_conf_path, None)
 80 | 
 81 | 
 82 | def get_jobhistory_endpoint():
 83 |     config_path = os.path.join(CONF_DIR, 'mapred-site.xml')
 84 |     log.info('Getting jobhistory endpoint from config: {config_path}'.format(config_path=config_path))
 85 |     prop_name = 'mapreduce.jobhistory.webapp.address'
 86 |     return parse(config_path, prop_name)
 87 | 
 88 | 
 89 | def get_nodemanager_endpoint():
 90 |     config_path = os.path.join(CONF_DIR, 'yarn-site.xml')
 91 |     log.info('Getting nodemanager endpoint from config: {config_path}'.format(config_path=config_path))
 92 |     prop_name = 'yarn.nodemanager.webapp.address'
 93 |     return parse(config_path, prop_name)
 94 | 
 95 | 
 96 | def get_webproxy_endpoint(timeout=30, auth=None, verify=True, proxies=None):
 97 |     config_path = os.path.join(CONF_DIR, 'yarn-site.xml')
 98 |     log.info('Getting webproxy endpoint from config: {config_path}'.format(config_path=config_path))
 99 |     prop_name = 'yarn.web-proxy.address'
100 |     value = parse(config_path, prop_name)
101 |     return value or get_resource_manager_endpoint(timeout, auth, verify, proxies)
102 | 
103 | 
104 | def parse(config_path, key):
105 |     tree = ET.parse(config_path)
106 |     root = tree.getroot()
107 |     # Construct list with profit values
108 |     ph1 = [dict((el.tag, el.text) for el in p) for p in root.findall('./property')]
109 |     # Construct dict with property key values
110 |     ph2 = dict((obj['name'], obj['value']) for obj in ph1)
111 | 
112 |     value = ph2.get(key, None)
113 |     return value
114 | 


--------------------------------------------------------------------------------
/yarn_api_client/node_manager.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from .base import BaseYarnAPI, get_logger
  3 | from .constants import ApplicationState
  4 | from .errors import IllegalArgumentError
  5 | from .hadoop_conf import get_nodemanager_endpoint
  6 | 
  7 | log = get_logger(__name__)
  8 | 
  9 | LEGAL_APPLICATION_STATES = {s for s, _ in ApplicationState}
 10 | 
 11 | 
 12 | def validate_application_state(state, required=False):
 13 |     if state:
 14 |         if state not in LEGAL_APPLICATION_STATES:
 15 |             msg = 'Application State %s is illegal' % (state,)
 16 |             raise IllegalArgumentError(msg)
 17 |     else:
 18 |         if required:
 19 |             msg = "state argument is required to be provided"
 20 |             raise IllegalArgumentError(msg)
 21 | 
 22 | 
 23 | class NodeManager(BaseYarnAPI):
 24 |     """
 25 |     The NodeManager REST API's allow the user to get status on the node and
 26 |     information about applications and containers running on that node.
 27 | 
 28 |     If `service_endpoint` argument is `None` client will try to extract it from
 29 |     Hadoop configuration files.
 30 | 
 31 |     :param str service_endpoint: NodeManager HTTP(S) address
 32 |     :param int timeout: API connection timeout in seconds
 33 |     :param AuthBase auth: Auth to use for requests
 34 |     :param boolean verify: Either a boolean, in which case it controls whether
 35 |         we verify the server's TLS certificate, or a string, in which case it must
 36 |         be a path to a CA bundle to use. Defaults to ``True``
 37 |     """
 38 |     def __init__(self, service_endpoint=None, timeout=30, auth=None, verify=True, proxies=None):
 39 |         if not service_endpoint:
 40 |             service_endpoint = get_nodemanager_endpoint()
 41 | 
 42 |         super(NodeManager, self).__init__(service_endpoint, timeout, auth, verify, proxies)
 43 | 
 44 |     def node_information(self):
 45 |         """
 46 |         The node information resource provides overall information about that
 47 |         particular node.
 48 | 
 49 |         :returns: API response object with JSON data
 50 |         :rtype: :py:class:`yarn_api_client.base.Response`
 51 |         """
 52 |         path = '/ws/v1/node/info'
 53 |         return self.request(path)
 54 | 
 55 |     def node_applications(self, state=None, user=None):
 56 |         """
 57 |         With the Applications API, you can obtain a collection of resources,
 58 |         each of which represents an application.
 59 | 
 60 |         :param str state: application state
 61 |         :param str user: user name
 62 |         :returns: API response object with JSON data
 63 |         :rtype: :py:class:`yarn_api_client.base.Response`
 64 |         :raises yarn_api_client.errors.IllegalArgumentError: if `state`
 65 |             incorrect
 66 |         """
 67 |         path = '/ws/v1/node/apps'
 68 | 
 69 |         validate_application_state(state)
 70 | 
 71 |         loc_args = (
 72 |             ('state', state),
 73 |             ('user', user))
 74 | 
 75 |         params = self.construct_parameters(loc_args)
 76 | 
 77 |         return self.request(path, params=params)
 78 | 
 79 |     def node_application(self, application_id):
 80 |         """
 81 |         An application resource contains information about a particular
 82 |         application that was run or is running on this NodeManager.
 83 | 
 84 |         :param str application_id: The application id
 85 |         :returns: API response object with JSON data
 86 |         :rtype: :py:class:`yarn_api_client.base.Response`
 87 |         """
 88 |         path = '/ws/v1/node/apps/{appid}'.format(appid=application_id)
 89 | 
 90 |         return self.request(path)
 91 | 
 92 |     def node_containers(self):
 93 |         """
 94 |         With the containers API, you can obtain a collection of resources,
 95 |         each of which represents a container.
 96 | 
 97 |         :returns: API response object with JSON data
 98 |         :rtype: :py:class:`yarn_api_client.base.Response`
 99 |         """
100 |         path = '/ws/v1/node/containers'
101 | 
102 |         return self.request(path)
103 | 
104 |     def node_container(self, container_id):
105 |         """
106 |         A container resource contains information about a particular container
107 |         that is running on this NodeManager.
108 | 
109 |         :param str container_id: The container id
110 |         :returns: API response object with JSON data
111 |         :rtype: :py:class:`yarn_api_client.base.Response`
112 |         """
113 |         path = '/ws/v1/node/containers/{containerid}'.format(
114 |             containerid=container_id)
115 | 
116 |         return self.request(path)
117 | 
118 |     def auxiliary_services(self):
119 |         """
120 |         With the auxiliary services API, you can obtain a collection of resources,
121 |         each of which represents an auxiliary service. When you run a GET operation
122 |         on this resource, you obtain a collection of auxiliary service information objects.
123 | 
124 |         :returns: API response object with JSON data
125 |         :rtype: :py:class:`yarn_api_client.base.Response`
126 |         """
127 |         path = '/ws/v1/node/auxiliaryservices'
128 |         return self.request(path)
129 | 
130 |     def auxiliary_services_update(self, data):
131 |         """
132 |         A YARN admin can use a PUT operation to update the auxiliary services running
133 |         on the NodeManager. The body of the request should be of the same format as
134 |         an auxiliary services manifest file.
135 | 
136 |         :param dict data: auxiliary services manifest file
137 |         :returns: API response object with JSON data
138 |         :rtype: :py:class:`yarn_api_client.base.Response`
139 |         """
140 |         path = '/ws/v1/node/auxiliaryservices'
141 |         return self.request(path, 'PUT', json=data)
142 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # hadoop-yarn-api-python-client
  2 | 
  3 | Python client for Apache Hadoop® YARN API
  4 | 
  5 | [![Latest Version](https://img.shields.io/pypi/v/yarn-api-client.svg)](https://pypi.python.org/pypi/yarn-api-client/)
  6 | [![Downloads](https://pepy.tech/badge/yarn-api-client/month)](https://pepy.tech/project/yarn-api-client/month)
  7 | [![Travis CI build status](https://travis-ci.org/CODAIT/hadoop-yarn-api-python-client.svg?branch=master)](https://travis-ci.org/CODAIT/hadoop-yarn-api-python-client)
  8 | [![Latest documentation status](https://readthedocs.org/projects/yarn-api-client-python/badge/?version=latest)](https://yarn-api-client-python.readthedocs.org/en/latest/?badge=latest)
  9 | [![Test coverage](https://coveralls.io/repos/toidi/hadoop-yarn-api-python-client/badge.png)](https://coveralls.io/r/toidi/hadoop-yarn-api-python-client)
 10 | 
 11 | Package documentation:
 12 | [yarn-api-client-python.readthedocs.org](https://yarn-api-client-python.readthedocs.org/en/latest/)
 13 | 
 14 | REST API documentation: [hadoop.apache.org](http://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html)
 15 | 
 16 | **Warning**: CLI is outdated & broken. Please don't use CLI. This will be resolved in future releases.
 17 | 
 18 | ---
 19 | ## Compatibility Matrix
 20 | 
 21 | | yarn-api-client-python | Apache Hadoop |
 22 | | ------------- | ------------- |
 23 | | 1.0.2  | 3.2.1  |
 24 | | 1.0.3  | 3.3.0, 3.3.1  |
 25 | 
 26 | If u have version other than mentioned (or vendored variant like Hortonworks), certain APIs might be not working or have differences in
 27 | implementation. If u plan to use certain API long-term, you might want to make sure its not in Alpha stage in documentation.
 28 | 
 29 | ## Installation
 30 | 
 31 | From PyPI
 32 | ```
 33 | pip install yarn-api-client
 34 | ```
 35 | 
 36 | From Anaconda (conda forge)
 37 | ```
 38 | conda install -c conda-forge yarn-api-client
 39 | ```
 40 | 
 41 | From source code
 42 | ```
 43 | pip install git+https://github.com/CODAIT/hadoop-yarn-api-python-client.git
 44 | ```
 45 | 
 46 | ## Enabling support for SimpleAuth
 47 | 
 48 | See example below:
 49 | ```
 50 | from yarn_api_client.auth import SimpleAuth
 51 | from yarn_api_client.history_server import HistoryServer
 52 | auth = SimpleAuth('impersonated_account_name')
 53 | history_server = HistoryServer('https://127.0.0.2:5678', auth=auth)
 54 | ```
 55 | 
 56 | ## Enabling support for Kerberos/SPNEGO Security
 57 | 1. First option - using `requests_kerberos` package  
 58 | 
 59 | To avoid deployment issues on a non Kerberized environment, the `requests_kerberos`
 60 | dependency is optional and needs to be explicit installed in order to enable access
 61 | to YARN console protected by Kerberos/SPNEGO.
 62 | 
 63 | `pip install requests_kerberos`
 64 | 
 65 | From python code
 66 | ```
 67 | from yarn_api_client.history_server import HistoryServer
 68 | from requests_kerberos import HTTPKerberosAuth
 69 | history_server = HistoryServer('https://127.0.0.2:5678', auth=HTTPKerberosAuth())
 70 | ```
 71 | 
 72 | PS: You **need** to get valid kerberos ticket in systemwide kerberos cache before running your code, otherwise calls to kerberized environment won't go through (run kinit before proceeding to run code)
 73 | 
 74 | 2. Second option - using `gssapi` package  
 75 | 
 76 | If you want to avoid using terminal calls, you have to perform SPNEGO handshake to retrieve ticket yourself. Full API documentation: https://pythongssapi.github.io/python-gssapi/latest/
 77 | 
 78 | # Usage
 79 | 
 80 | ### CLI interface
 81 | 
 82 | **Warning**: CLI is outdated & broken. Please don't use CLI. This will be resolved in future releases.
 83 | 
 84 | 1. First way
 85 | ```
 86 | bin/yarn_client --help
 87 | ```
 88 | 
 89 | 2. Alternative way
 90 | ```
 91 | python -m yarn_api_client --help
 92 | ```
 93 | 
 94 | ### Programmatic interface
 95 | 
 96 | ```
 97 | from yarn_api_client import ApplicationMaster, HistoryServer, NodeManager, ResourceManager
 98 | am = ApplicationMaster('https://127.0.0.2:5678')
 99 | app_information = am.application_information('application_id')
100 | ```
101 | 
102 | ### Changelog
103 | 
104 | 1.0.3 Release
105 |    - Drop support of Python 2.7 (if you still need it for extreme emergency, look into reverting ab4f71582f8c69e908db93905485ba4d00562dfd)
106 |    - Update of supported hadoop version to 3.3.1
107 |    - Add support for YARN_CONF_DIR and HADOOP_CONF_DIR
108 |    - Add class for native SimpleAuth (#106)
109 |    - Add constructor argument for proxies (#109)
110 | 
111 | 1.0.2 Release
112 |    - Add support for Python 3.8.x
113 |    - Fix HTTPS url parsing
114 |    - Fix JSON body request APIs
115 |    - Handle YARN response with empty contents
116 |    - Better logging support
117 | 
118 | 1.0.1 Release
119 |    - Passes the authorization instance to the Active RM check
120 |    - Establishes a new (working) documentation site in readthedocs.io: yarn-api-client-python.readthedocs.io
121 |    - Adds more python version (3.7 and 3.8) to test matrix and removes 2.6.
122 | 
123 | 1.0.0 Release
124 |    - Major cleanup of API.  
125 |      - Address/port parameters have been replaced with complete
126 |        endpoints (includes scheme [e.g., http or https]).
127 |      - ResourceManager has been updated to take a list of endpoints for
128 |        improved HA support.
129 |      - ResourceManager, ApplicationMaster, HistoryServer and NodeManager
130 |        have been updated with methods corresponding to the latest REST API.
131 |    - pytest support on Windows has been provided.
132 |    - Documentation has been updated.
133 | 
134 |    **NOTE:** Applications using APIs relative to releases prior to 1.0 should
135 |    pin their dependency on yarn-api-client to _less than_ 1.0 and are encouraged
136 |    to update to 1.0 as soon as possible.
137 | 
138 | 0.3.7 Release  
139 |    - Honor configured HTTP Policy when no address is provided - enabling
140 |      using of HTTPS in these cases.
141 | 
142 | 0.3.6 Release  
143 |    - Extend ResourceManager to allow applications to determine
144 |      resource availability prior to submission.
145 | 
146 | 0.3.5 Release  
147 |    - Hotfix release to fix internal signature mismatch
148 | 
149 | 0.3.4 Release  
150 |    - More flexible support for discovering Hadoop configuration
151 |      including multiple Resource Managers when HA is configured
152 |    - Properly support YARN post response codes
153 | 
154 | 0.3.3 Release  
155 |    - Properly set Content-Type in PUT requests
156 |    - Check for HADOOP_CONF_DIR env variable
157 | 
158 | 0.3.2 Release  
159 |    - Make Kerberos/SPNEGO dependency optional
160 | 
161 | 0.3.1 Release  
162 |    - Fix cluster_application_kill API
163 | 
164 | 0.3.0 Release  
165 |    - Add support for YARN endpoints protected by Kerberos/SPNEGO
166 |    - Moved to `requests` package for REST API invocation
167 |    - Remove `http_con` property, as connections are now managed by `requests` package
168 | 
169 | 0.2.5 Release  
170 |   - Fixed History REST API
171 | 
172 | 0.2.4 Release  
173 |   - Added compatibility with HA enabled Resource Manager
174 | 
175 | ### Team
176 | 
177 | YARN API client is developed by an open community, and the current maintainers 
178 | are listed below in alphabetical order:
179 | 
180 | - [Dmitry Romanenko](https://github.com/dimon222)
181 | - [Eduard Iskandarov](https://github.com/toidi)
182 | - [Kevin Bates](https://github.com/kevin-bates)
183 | - [Luciano Resende](https://github.com/lresende)
184 | 
185 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/yarn-api-client.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/yarn-api-client.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/yarn-api-client"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/yarn-api-client"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # yarn-api-client documentation build configuration file, created by
  4 | # sphinx-quickstart on Thu Jul 31 22:07:17 2014.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | #sys.path.insert(0, os.path.abspath('.'))
 22 | sys.path.append(os.pardir)
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #needs_sphinx = '1.0'
 28 | 
 29 | # Add any Sphinx extension module names here, as strings. They can be
 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 31 | # ones.
 32 | extensions = [
 33 |     'sphinx.ext.autodoc',
 34 | ]
 35 | 
 36 | # Add any paths that contain templates here, relative to this directory.
 37 | templates_path = ['_templates']
 38 | 
 39 | # The suffix of source filenames.
 40 | source_suffix = '.rst'
 41 | 
 42 | # The encoding of source files.
 43 | #source_encoding = 'utf-8-sig'
 44 | 
 45 | # The master toctree document.
 46 | master_doc = 'index'
 47 | 
 48 | # General information about the project.
 49 | project = u'yarn-api-client'
 50 | copyright = u'2014, Iskandarov Eduard'
 51 | 
 52 | # The version info for the project you're documenting, acts as replacement for
 53 | # |version| and |release|, also used in various other places throughout the
 54 | # built documents.
 55 | #
 56 | # The short X.Y version.
 57 | version = '0.2.4'
 58 | # The full version, including alpha/beta/rc tags.
 59 | release = '0.2.4'
 60 | 
 61 | # The language for content autogenerated by Sphinx. Refer to documentation
 62 | # for a list of supported languages.
 63 | #language = None
 64 | 
 65 | # There are two options for replacing |today|: either, you set today to some
 66 | # non-false value, then it is used:
 67 | #today = ''
 68 | # Else, today_fmt is used as the format for a strftime call.
 69 | #today_fmt = '%B %d, %Y'
 70 | 
 71 | # List of patterns, relative to source directory, that match files and
 72 | # directories to ignore when looking for source files.
 73 | exclude_patterns = ['_build']
 74 | 
 75 | # The reST default role (used for this markup: `text`) to use for all
 76 | # documents.
 77 | #default_role = None
 78 | 
 79 | # If true, '()' will be appended to :func: etc. cross-reference text.
 80 | #add_function_parentheses = True
 81 | 
 82 | # If true, the current module name will be prepended to all description
 83 | # unit titles (such as .. function::).
 84 | #add_module_names = True
 85 | 
 86 | # If true, sectionauthor and moduleauthor directives will be shown in the
 87 | # output. They are ignored by default.
 88 | #show_authors = False
 89 | 
 90 | # The name of the Pygments (syntax highlighting) style to use.
 91 | pygments_style = 'sphinx'
 92 | 
 93 | # A list of ignored prefixes for module index sorting.
 94 | #modindex_common_prefix = []
 95 | 
 96 | # If true, keep warnings as "system message" paragraphs in the built documents.
 97 | #keep_warnings = False
 98 | 
 99 | 
100 | # -- Options for HTML output ----------------------------------------------
101 | 
102 | # The theme to use for HTML and HTML Help pages.  See the documentation for
103 | # a list of builtin themes.
104 | html_theme = 'default'
105 | 
106 | # Theme options are theme-specific and customize the look and feel of a theme
107 | # further.  For a list of options available for each theme, see the
108 | # documentation.
109 | #html_theme_options = {}
110 | 
111 | # Add any paths that contain custom themes here, relative to this directory.
112 | #html_theme_path = []
113 | 
114 | # The name for this set of Sphinx documents.  If None, it defaults to
115 | # "<project> v<release> documentation".
116 | #html_title = None
117 | 
118 | # A shorter title for the navigation bar.  Default is the same as html_title.
119 | #html_short_title = None
120 | 
121 | # The name of an image file (relative to this directory) to place at the top
122 | # of the sidebar.
123 | #html_logo = None
124 | 
125 | # The name of an image file (within the static path) to use as favicon of the
126 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
127 | # pixels large.
128 | #html_favicon = None
129 | 
130 | # Add any paths that contain custom static files (such as style sheets) here,
131 | # relative to this directory. They are copied after the builtin static files,
132 | # so a file named "default.css" will overwrite the builtin "default.css".
133 | html_static_path = ['_static']
134 | 
135 | # Add any extra paths that contain custom files (such as robots.txt or
136 | # .htaccess) here, relative to this directory. These files are copied
137 | # directly to the root of the documentation.
138 | #html_extra_path = []
139 | 
140 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
141 | # using the given strftime format.
142 | #html_last_updated_fmt = '%b %d, %Y'
143 | 
144 | # If true, SmartyPants will be used to convert quotes and dashes to
145 | # typographically correct entities.
146 | #html_use_smartypants = True
147 | 
148 | # Custom sidebar templates, maps document names to template names.
149 | #html_sidebars = {}
150 | 
151 | # Additional templates that should be rendered to pages, maps page names to
152 | # template names.
153 | #html_additional_pages = {}
154 | 
155 | # If false, no module index is generated.
156 | #html_domain_indices = True
157 | 
158 | # If false, no index is generated.
159 | #html_use_index = True
160 | 
161 | # If true, the index is split into individual pages for each letter.
162 | #html_split_index = False
163 | 
164 | # If true, links to the reST sources are added to the pages.
165 | #html_show_sourcelink = True
166 | 
167 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
168 | #html_show_sphinx = True
169 | 
170 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
171 | #html_show_copyright = True
172 | 
173 | # If true, an OpenSearch description file will be output, and all pages will
174 | # contain a <link> tag referring to it.  The value of this option must be the
175 | # base URL from which the finished HTML is served.
176 | #html_use_opensearch = ''
177 | 
178 | # This is the file name suffix for HTML files (e.g. ".xhtml").
179 | #html_file_suffix = None
180 | 
181 | # Output file base name for HTML help builder.
182 | htmlhelp_basename = 'yarn-api-clientdoc'
183 | 
184 | 
185 | # -- Options for LaTeX output ---------------------------------------------
186 | 
187 | latex_elements = {
188 | # The paper size ('letterpaper' or 'a4paper').
189 | #'papersize': 'letterpaper',
190 | 
191 | # The font size ('10pt', '11pt' or '12pt').
192 | #'pointsize': '10pt',
193 | 
194 | # Additional stuff for the LaTeX preamble.
195 | #'preamble': '',
196 | }
197 | 
198 | # Grouping the document tree into LaTeX files. List of tuples
199 | # (source start file, target name, title,
200 | #  author, documentclass [howto, manual, or own class]).
201 | latex_documents = [
202 |   ('index', 'yarn-api-client.tex', u'yarn-api-client Documentation',
203 |    u'Iskandarov Eduard', 'manual'),
204 | ]
205 | 
206 | # The name of an image file (relative to this directory) to place at the top of
207 | # the title page.
208 | #latex_logo = None
209 | 
210 | # For "manual" documents, if this is true, then toplevel headings are parts,
211 | # not chapters.
212 | #latex_use_parts = False
213 | 
214 | # If true, show page references after internal links.
215 | #latex_show_pagerefs = False
216 | 
217 | # If true, show URL addresses after external links.
218 | #latex_show_urls = False
219 | 
220 | # Documents to append as an appendix to all manuals.
221 | #latex_appendices = []
222 | 
223 | # If false, no module index is generated.
224 | #latex_domain_indices = True
225 | 
226 | 
227 | # -- Options for manual page output ---------------------------------------
228 | 
229 | # One entry per manual page. List of tuples
230 | # (source start file, name, description, authors, manual section).
231 | man_pages = [
232 |     ('index', 'yarn-api-client', u'yarn-api-client Documentation',
233 |      [u'Iskandarov Eduard'], 1)
234 | ]
235 | 
236 | # If true, show URL addresses after external links.
237 | #man_show_urls = False
238 | 
239 | 
240 | # -- Options for Texinfo output -------------------------------------------
241 | 
242 | # Grouping the document tree into Texinfo files. List of tuples
243 | # (source start file, target name, title, author,
244 | #  dir menu entry, description, category)
245 | texinfo_documents = [
246 |   ('index', 'yarn-api-client', u'yarn-api-client Documentation',
247 |    u'Iskandarov Eduard', 'yarn-api-client', 'One line description of project.',
248 |    'Miscellaneous'),
249 | ]
250 | 
251 | # Documents to append as an appendix to all manuals.
252 | #texinfo_appendices = []
253 | 
254 | # If false, no module index is generated.
255 | #texinfo_domain_indices = True
256 | 
257 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
258 | #texinfo_show_urls = 'footnote'
259 | 
260 | # If true, do not generate a @detailmenu in the "Top" node's menu.
261 | #texinfo_no_detailmenu = False
262 | 


--------------------------------------------------------------------------------
/yarn_api_client/history_server.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import unicode_literals
  3 | 
  4 | from .base import BaseYarnAPI, get_logger
  5 | from .constants import JobStateInternal
  6 | from .errors import IllegalArgumentError
  7 | from .hadoop_conf import get_jobhistory_endpoint
  8 | 
  9 | log = get_logger(__name__)
 10 | 
 11 | 
 12 | class HistoryServer(BaseYarnAPI):
 13 |     """
 14 |     The history server REST API's allow the user to get status on finished
 15 |     applications. Currently it only supports MapReduce and provides
 16 |     information on finished jobs.
 17 | 
 18 |     If `service_endpoint` argument is `None` client will try to extract it from
 19 |     Hadoop configuration files.
 20 | 
 21 |     :param str service_endpoint: HistoryServer HTTP(S) address
 22 |     :param int timeout: API connection timeout in seconds
 23 |     :param AuthBase auth: Auth to use for requests
 24 |     :param boolean verify: Either a boolean, in which case it controls whether
 25 |         we verify the server's TLS certificate, or a string, in which case it must
 26 |         be a path to a CA bundle to use. Defaults to ``True``
 27 |     """
 28 |     def __init__(self, service_endpoint=None, timeout=30, auth=None, verify=True, proxies=None):
 29 |         if not service_endpoint:
 30 |             service_endpoint = get_jobhistory_endpoint()
 31 | 
 32 |         super(HistoryServer, self).__init__(service_endpoint, timeout, auth, verify, proxies)
 33 | 
 34 |     def application_information(self):
 35 |         """
 36 |         The history server information resource provides overall information
 37 |         about the history server.
 38 | 
 39 |         :returns: API response object with JSON data
 40 |         :rtype: :py:class:`yarn_api_client.base.Response`
 41 |         """
 42 |         path = '/ws/v1/history/info'
 43 | 
 44 |         return self.request(path)
 45 | 
 46 |     def jobs(self, state=None, user=None, queue=None, limit=None,
 47 |              started_time_begin=None, started_time_end=None,
 48 |              finished_time_begin=None, finished_time_end=None):
 49 |         """
 50 |         The jobs resource provides a list of the MapReduce jobs that have
 51 |         finished. It does not currently return a full list of parameters.
 52 | 
 53 |         :param str user: user name
 54 |         :param str state: the job state
 55 |         :param str queue: queue name
 56 |         :param str limit: total number of app objects to be returned
 57 |         :param str started_time_begin: jobs with start time beginning with
 58 |             this time, specified in ms since epoch
 59 |         :param str started_time_end: jobs with start time ending with this
 60 |             time, specified in ms since epoch
 61 |         :param str finished_time_begin: jobs with finish time beginning with
 62 |             this time, specified in ms since epoch
 63 |         :param str finished_time_end: jobs with finish time ending with this
 64 |             time, specified in ms since epoch
 65 |         :returns: API response object with JSON data
 66 |         :rtype: :py:class:`yarn_api_client.base.Response`
 67 |         :raises yarn_api_client.errors.IllegalArgumentError: if `state`
 68 |             incorrect
 69 |         """
 70 |         path = '/ws/v1/history/mapreduce/jobs'
 71 | 
 72 |         legal_states = {s for s, _ in JobStateInternal}
 73 |         if state is not None and state not in legal_states:
 74 |             msg = 'Job Internal State %s is illegal' % (state,)
 75 |             raise IllegalArgumentError(msg)
 76 | 
 77 |         loc_args = (
 78 |             ('state', state),
 79 |             ('user', user),
 80 |             ('queue', queue),
 81 |             ('limit', limit),
 82 |             ('startedTimeBegin', started_time_begin),
 83 |             ('startedTimeEnd', started_time_end),
 84 |             ('finishedTimeBegin', finished_time_begin),
 85 |             ('finishedTimeEnd', finished_time_end))
 86 | 
 87 |         params = self.construct_parameters(loc_args)
 88 | 
 89 |         return self.request(path, params=params)
 90 | 
 91 |     def job(self, job_id):
 92 |         """
 93 |         A Job resource contains information about a particular job identified
 94 |         by jobid.
 95 | 
 96 |         :param str job_id: The job id
 97 |         :returns: API response object with JSON data
 98 |         :rtype: :py:class:`yarn_api_client.base.Response`
 99 |         """
100 |         path = '/ws/v1/history/mapreduce/jobs/{jobid}'.format(jobid=job_id)
101 | 
102 |         return self.request(path)
103 | 
104 |     def job_attempts(self, job_id):
105 |         """
106 |         With the job attempts API, you can obtain a collection of resources
107 |         that represent a job attempt.
108 | 
109 |         :param str job_id: The job id
110 |         :returns: API response object with JSON data
111 |         :rtype: :py:class:`yarn_api_client.base.Response`
112 |         """
113 |         path = '/ws/v1/history/mapreduce/jobs/{jobid}/jobattempts'.format(
114 |             jobid=job_id)
115 | 
116 |         return self.request(path)
117 | 
118 |     def job_counters(self, job_id):
119 |         """
120 |         With the job counters API, you can object a collection of resources
121 |         that represent al the counters for that job.
122 | 
123 |         :param str job_id: The job id
124 |         :returns: API response object with JSON data
125 |         :rtype: :py:class:`yarn_api_client.base.Response`
126 |         """
127 |         path = '/ws/v1/history/mapreduce/jobs/{jobid}/counters'.format(
128 |             jobid=job_id)
129 | 
130 |         return self.request(path)
131 | 
132 |     def job_conf(self, job_id):
133 |         """
134 |         A job configuration resource contains information about the job
135 |         configuration for this job.
136 | 
137 |         :param str job_id: The job id
138 |         :returns: API response object with JSON data
139 |         :rtype: :py:class:`yarn_api_client.base.Response`
140 |         """
141 |         path = '/ws/v1/history/mapreduce/jobs/{jobid}/conf'.format(jobid=job_id)
142 | 
143 |         return self.request(path)
144 | 
145 |     def job_tasks(self, job_id, job_type=None):
146 |         """
147 |         With the tasks API, you can obtain a collection of resources that
148 |         represent a task within a job.
149 | 
150 |         :param str job_id: The job id
151 |         :param str type: type of task, valid values are m or r. m for map
152 |             task or r for reduce task
153 |         :returns: API response object with JSON data
154 |         :rtype: :py:class:`yarn_api_client.base.Response`
155 |         """
156 |         path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks'.format(
157 |             jobid=job_id)
158 | 
159 |         # m - for map
160 |         # r - for reduce
161 |         valid_types = ['m', 'r']
162 |         if job_type is not None and job_type not in valid_types:
163 |             msg = 'Job type %s is illegal' % (job_type,)
164 |             raise IllegalArgumentError(msg)
165 | 
166 |         params = {}
167 |         if job_type is not None:
168 |             params['type'] = job_type
169 | 
170 |         return self.request(path, params=params)
171 | 
172 |     def job_task(self, job_id, task_id):
173 |         """
174 |         A Task resource contains information about a particular task
175 |         within a job.
176 | 
177 |         :param str job_id: The job id
178 |         :param str task_id: The task id
179 |         :returns: API response object with JSON data
180 |         :rtype: :py:class:`yarn_api_client.base.Response`
181 |         """
182 |         path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks/{taskid}'.format(
183 |             jobid=job_id, taskid=task_id)
184 | 
185 |         return self.request(path)
186 | 
187 |     def task_counters(self, job_id, task_id):
188 |         """
189 |         With the task counters API, you can object a collection of resources
190 |         that represent all the counters for that task.
191 | 
192 |         :param str job_id: The job id
193 |         :param str task_id: The task id
194 |         :returns: API response object with JSON data
195 |         :rtype: :py:class:`yarn_api_client.base.Response`
196 |         """
197 |         path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks/{taskid}/counters'.format(
198 |             jobid=job_id, taskid=task_id)
199 | 
200 |         return self.request(path)
201 | 
202 |     def task_attempts(self, job_id, task_id):
203 |         """
204 |         With the task attempts API, you can obtain a collection of resources
205 |         that represent a task attempt within a job.
206 | 
207 |         :param str job_id: The job id
208 |         :param str task_id: The task id
209 |         :returns: API response object with JSON data
210 |         :rtype: :py:class:`yarn_api_client.base.Response`
211 |         """
212 |         path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts'.format(
213 |             jobid=job_id, taskid=task_id)
214 | 
215 |         return self.request(path)
216 | 
217 |     def task_attempt(self, job_id, task_id, attempt_id):
218 |         """
219 |         A Task Attempt resource contains information about a particular task
220 |         attempt within a job.
221 | 
222 |         :param str job_id: The job id
223 |         :param str task_id: The task id
224 |         :param str attempt_id: The attempt id
225 |         :returns: API response object with JSON data
226 |         :rtype: :py:class:`yarn_api_client.base.Response`
227 |         """
228 |         path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}'.format(
229 |             jobid=job_id, taskid=task_id, attemptid=attempt_id)
230 | 
231 |         return self.request(path)
232 | 
233 |     def task_attempt_counters(self, job_id, task_id, attempt_id):
234 |         """
235 |         With the task attempt counters API, you can object a collection of
236 |         resources that represent al the counters for that task attempt.
237 | 
238 |         :param str job_id: The job id
239 |         :param str task_id: The task id
240 |         :param str attempt_id: The attempt id
241 |         :returns: API response object with JSON data
242 |         :rtype: :py:class:`yarn_api_client.base.Response`
243 |         """
244 |         path = '/ws/v1/history/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}/counters'.format(
245 |             jobid=job_id, taskid=task_id, attemptid=attempt_id)
246 | 
247 |         return self.request(path)
248 | 


--------------------------------------------------------------------------------
/tests/test_hadoop_conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from tempfile import NamedTemporaryFile
  3 | 
  4 | import mock
  5 | from mock import patch
  6 | from requests import RequestException
  7 | from tests import TestCase
  8 | 
  9 | import requests_mock
 10 | from yarn_api_client import hadoop_conf
 11 | import platform
 12 | import os
 13 | import sys
 14 | 
 15 | if sys.version_info[0] == 2:
 16 |     _mock_exception_method = 'assertRaisesRegexp'
 17 | else:
 18 |     _mock_exception_method = 'assertRaisesRegex'
 19 | 
 20 | _http_request_method = ''
 21 | _http_getresponse_method = ''
 22 | 
 23 | try:
 24 |     from httplib import HTTPConnection, OK, NOT_FOUND # NOQA
 25 |     _http_request_method = 'httplib.HTTPConnection.request'
 26 |     _http_getresponse_method = 'httplib.HTTPConnection.getresponse'
 27 | except ImportError:
 28 |     from http.client import HTTPConnection, OK, NOT_FOUND # NOQA
 29 |     _http_request_method = 'http.client.HTTPConnection.request'
 30 |     _http_getresponse_method = 'http.client.HTTPConnection.getresponse'
 31 | 
 32 | if platform.system() == 'Windows':
 33 |     hadoop_conf_path = '/etc/hadoop/conf\\'
 34 | else:
 35 |     hadoop_conf_path = '/etc/hadoop/conf/'
 36 | 
 37 | empty_config = '<configuration></configuration>'.encode('latin1')
 38 | 
 39 | yarn_site_xml = """\
 40 | <configuration>
 41 |   <property>
 42 |     <name>yarn.resourcemanager.webapp.address</name>
 43 |     <value>localhost:8022</value>
 44 |   </property>
 45 |   <property>
 46 |     <name>yarn.resourcemanager.webapp.https.address</name>
 47 |     <value>localhost:8024</value>
 48 |   </property>
 49 |   <property>
 50 |     <name>yarn.http.policy</name>
 51 |     <value>HTTPS_ONLY</value>
 52 |   </property>
 53 | </configuration>
 54 | """.encode('latin1')
 55 | 
 56 | 
 57 | class HadoopConfTestCase(TestCase):
 58 |     def test_parse(self):
 59 |         temp_filename = None
 60 | 
 61 |         with NamedTemporaryFile(delete=False) as f:
 62 |             f.write(yarn_site_xml)
 63 |             f.flush()
 64 |             f.close()
 65 |             temp_filename = f.name
 66 | 
 67 |             key = 'yarn.resourcemanager.webapp.address'
 68 |             value = hadoop_conf.parse(f.name, key)
 69 |             self.assertEqual('localhost:8022', value)
 70 | 
 71 |             key = 'yarn.resourcemanager.webapp.https.address'
 72 |             value = hadoop_conf.parse(f.name, key)
 73 |             self.assertEqual('localhost:8024', value)
 74 | 
 75 |             key = 'yarn.http.policy'
 76 |             value = hadoop_conf.parse(f.name, key)
 77 |             self.assertEqual('HTTPS_ONLY', value)
 78 |         os.remove(temp_filename)
 79 | 
 80 |         with NamedTemporaryFile(delete=False) as f:
 81 |             f.write(empty_config)
 82 |             f.flush()
 83 |             f.close()
 84 |             temp_filename = f.name
 85 | 
 86 |             key = 'yarn.resourcemanager.webapp.address'
 87 |             value = hadoop_conf.parse(f.name, key)
 88 |             self.assertEqual(None, value)
 89 | 
 90 |             key = 'yarn.resourcemanager.webapp.https.address'
 91 |             value = hadoop_conf.parse(f.name, key)
 92 |             self.assertEqual(None, value)
 93 | 
 94 |             key = 'yarn.http.policy'
 95 |             value = hadoop_conf.parse(f.name, key)
 96 |             self.assertEqual(None, value)
 97 |         os.remove(temp_filename)
 98 | 
 99 |     def test_get_resource_endpoint(self):
100 |         with patch('yarn_api_client.hadoop_conf.parse') as parse_mock:
101 |             with patch('yarn_api_client.hadoop_conf._get_rm_ids') as get_rm_ids_mock:
102 |                 parse_mock.return_value = 'example.com:8022'
103 |                 get_rm_ids_mock.return_value = None
104 | 
105 |                 endpoint = hadoop_conf.get_resource_manager_endpoint()
106 | 
107 |                 self.assertEqual('http://example.com:8022', endpoint)
108 |                 parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml',
109 |                                               'yarn.resourcemanager.webapp.address')
110 | 
111 |                 parse_mock.reset_mock()
112 |                 parse_mock.return_value = None
113 | 
114 |                 endpoint = hadoop_conf.get_resource_manager_endpoint()
115 |                 self.assertIsNone(endpoint)
116 | 
117 |     @mock.patch('yarn_api_client.hadoop_conf._get_rm_ids')
118 |     @mock.patch('yarn_api_client.hadoop_conf.parse')
119 |     @mock.patch('yarn_api_client.hadoop_conf.check_is_active_rm')
120 |     def test_get_resource_endpoint_with_ha(self, check_is_active_rm_mock, parse_mock, get_rm_ids_mock):
121 |         get_rm_ids_mock.return_value = ['rm1', 'rm2']
122 |         parse_mock.return_value = 'example.com:8022'
123 |         check_is_active_rm_mock.return_value = True
124 |         endpoint = hadoop_conf.get_resource_manager_endpoint()
125 | 
126 |         self.assertEqual('http://example.com:8022', endpoint)
127 |         parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml',
128 |                                       'yarn.resourcemanager.webapp.address.rm1')
129 | 
130 |         parse_mock.reset_mock()
131 |         parse_mock.return_value = None
132 | 
133 |         endpoint = hadoop_conf.get_resource_manager_endpoint()
134 |         self.assertIsNone(endpoint)
135 | 
136 |     def test_get_rm_ids(self):
137 |         with patch('yarn_api_client.hadoop_conf.parse') as parse_mock:
138 |             parse_mock.return_value = 'rm1,rm2'
139 |             rm_list = hadoop_conf._get_rm_ids(hadoop_conf.CONF_DIR)
140 |             self.assertEqual(['rm1', 'rm2'], rm_list)
141 |             parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 'yarn.resourcemanager.ha.rm-ids')
142 | 
143 |             parse_mock.reset_mock()
144 |             parse_mock.return_value = None
145 | 
146 |             rm_list = hadoop_conf._get_rm_ids(hadoop_conf.CONF_DIR)
147 |             self.assertIsNone(rm_list)
148 | 
149 |     @mock.patch('yarn_api_client.hadoop_conf._is_https_only')
150 |     def test_check_is_active_rm(self, is_https_only_mock):
151 |         is_https_only_mock.return_value = False
152 | 
153 |         # Success scenario
154 |         with requests_mock.mock() as requests_get_mock:
155 |             requests_get_mock.get('https://example2:8022/cluster', status_code=200)
156 |             self.assertTrue(hadoop_conf.check_is_active_rm('https://example2:8022'))
157 | 
158 |         # Outage scenario
159 |         with requests_mock.mock() as requests_get_mock:
160 |             requests_get_mock.get('https://example2:8022/cluster', status_code=500)
161 |             self.assertFalse(hadoop_conf.check_is_active_rm('https://example2:8022'))
162 | 
163 |         # Error scenario (URL is wrong - not found)
164 |         with requests_mock.mock() as requests_get_mock:
165 |             requests_get_mock.get('https://example2:8022/cluster', status_code=404)
166 |             self.assertFalse(hadoop_conf.check_is_active_rm('https://example2:8022'))
167 | 
168 |         # Error scenario (necessary Auth is not provided or invalid credentials)
169 |         with requests_mock.mock() as requests_get_mock:
170 |             requests_get_mock.get('https://example2:8022/cluster', status_code=401)
171 |             self.assertFalse(hadoop_conf.check_is_active_rm('https://example2:8022'))
172 | 
173 |         # Emulate requests library exception (socket timeout, etc)
174 |         with requests_mock.mock() as requests_get_mock:
175 |             requests_get_mock.get('example2:8022/cluster', exc=RequestException)
176 |             self.assertFalse(hadoop_conf.check_is_active_rm('example2:8022'))
177 | 
178 |     def test_get_resource_manager(self):
179 |         with patch('yarn_api_client.hadoop_conf.parse') as parse_mock:
180 |             parse_mock.return_value = 'example.com:8022'
181 | 
182 |             endpoint = hadoop_conf._get_resource_manager(hadoop_conf.CONF_DIR, None)
183 | 
184 |             self.assertEqual('http://example.com:8022', endpoint)
185 |             parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 'yarn.resourcemanager.webapp.address')
186 | 
187 |             endpoint = hadoop_conf._get_resource_manager(hadoop_conf.CONF_DIR, 'rm1')
188 | 
189 |             self.assertEqual(('http://example.com:8022'), endpoint)
190 |             parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml', 'yarn.resourcemanager.webapp.address.rm1')
191 | 
192 |             parse_mock.reset_mock()
193 |             parse_mock.return_value = None
194 | 
195 |             endpoint = hadoop_conf._get_resource_manager(hadoop_conf.CONF_DIR, 'rm1')
196 |             self.assertIsNone(endpoint)
197 | 
198 |     def test_get_jobhistory_endpoint(self):
199 |         with patch('yarn_api_client.hadoop_conf.parse') as parse_mock:
200 |             parse_mock.return_value = 'example.com:8022'
201 | 
202 |             endpoint = hadoop_conf.get_jobhistory_endpoint()
203 | 
204 |             self.assertEqual('example.com:8022', endpoint)
205 |             parse_mock.assert_called_with(hadoop_conf_path + 'mapred-site.xml',
206 |                                           'mapreduce.jobhistory.webapp.address')
207 | 
208 |             parse_mock.reset_mock()
209 |             parse_mock.return_value = None
210 | 
211 |             endpoint = hadoop_conf.get_jobhistory_endpoint()
212 |             self.assertIsNone(endpoint)
213 | 
214 |     def test_get_nodemanager_endpoint(self):
215 |         with patch('yarn_api_client.hadoop_conf.parse') as parse_mock:
216 |             parse_mock.return_value = 'example.com:8022'
217 | 
218 |             endpoint = hadoop_conf.get_nodemanager_endpoint()
219 | 
220 |             self.assertEqual('example.com:8022', endpoint)
221 |             parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml',
222 |                                           'yarn.nodemanager.webapp.address')
223 | 
224 |             parse_mock.reset_mock()
225 |             parse_mock.return_value = None
226 | 
227 |             endpoint = hadoop_conf.get_nodemanager_endpoint()
228 |             self.assertIsNone(endpoint)
229 | 
230 |     def test_get_webproxy_endpoint(self):
231 |         with patch('yarn_api_client.hadoop_conf.parse') as parse_mock:
232 |             parse_mock.return_value = 'example.com:8022'
233 | 
234 |             endpoint = hadoop_conf.get_webproxy_endpoint()
235 | 
236 |             self.assertEqual('example.com:8022', endpoint)
237 |             parse_mock.assert_called_with(hadoop_conf_path + 'yarn-site.xml',
238 |                                           'yarn.web-proxy.address')
239 | 
240 |             parse_mock.reset_mock()
241 |             parse_mock.return_value = None
242 | 
243 |             endpoint = hadoop_conf.get_webproxy_endpoint()
244 |             self.assertIsNone(endpoint)
245 | 


--------------------------------------------------------------------------------
/yarn_api_client/application_master.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import unicode_literals
  3 | 
  4 | from .base import BaseYarnAPI, get_logger
  5 | from .hadoop_conf import get_webproxy_endpoint
  6 | 
  7 | 
  8 | log = get_logger(__name__)
  9 | 
 10 | 
 11 | class ApplicationMaster(BaseYarnAPI):
 12 |     """
 13 |     The MapReduce Application Master REST API's allow the user to get status
 14 |     on the running MapReduce application master. Currently this is the
 15 |     equivalent to a running MapReduce job. The information includes the jobs
 16 |     the app master is running and all the job particulars like tasks,
 17 |     counters, configuration, attempts, etc.
 18 | 
 19 |     If `address` argument is `None` client will try to extract `address` and
 20 |     `port` from Hadoop configuration files.
 21 | 
 22 |     :param str service_endpoint: ApplicationMaster HTTP(S) address
 23 |     :param int timeout: API connection timeout in seconds
 24 |     :param AuthBase auth: Auth to use for requests
 25 |     :param boolean verify: Either a boolean, in which case it controls whether
 26 |         we verify the server's TLS certificate, or a string, in which case it must
 27 |         be a path to a CA bundle to use. Defaults to ``True``
 28 |     """
 29 |     def __init__(self, service_endpoint=None, timeout=30, auth=None, verify=True, proxies=None):
 30 |         if not service_endpoint:
 31 |             service_endpoint = get_webproxy_endpoint(timeout, auth, verify, proxies)
 32 | 
 33 |         super(ApplicationMaster, self).__init__(service_endpoint, timeout, auth, verify, proxies)
 34 | 
 35 |     def application_information(self, application_id):
 36 |         """
 37 |         The MapReduce application master information resource provides overall
 38 |         information about that mapreduce application master.
 39 |         This includes application id, time it was started, user, name, etc.
 40 | 
 41 |         :returns: API response object with JSON data
 42 |         :rtype: :py:class:`yarn_api_client.base.Response`
 43 |         """
 44 |         path = '/proxy/{appid}/ws/v1/mapreduce/info'.format(
 45 |             appid=application_id)
 46 | 
 47 |         return self.request(path)
 48 | 
 49 |     def jobs(self, application_id):
 50 |         """
 51 |         The jobs resource provides a list of the jobs running on this
 52 |         application master.
 53 | 
 54 |         :param str application_id: The application id
 55 |         :returns: API response object with JSON data
 56 |         :rtype: :py:class:`yarn_api_client.base.Response`
 57 |         """
 58 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs'.format(
 59 |             appid=application_id)
 60 | 
 61 |         return self.request(path)
 62 | 
 63 |     def job(self, application_id, job_id):
 64 |         """
 65 |         A job resource contains information about a particular job that was
 66 |         started by this application master. Certain fields are only accessible
 67 |         if user has permissions - depends on acl settings.
 68 | 
 69 |         :param str application_id: The application id
 70 |         :param str job_id: The job id
 71 |         :returns: API response object with JSON data
 72 |         :rtype: :py:class:`yarn_api_client.base.Response`
 73 |         """
 74 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}'.format(
 75 |             appid=application_id, jobid=job_id)
 76 | 
 77 |         return self.request(path)
 78 | 
 79 |     def job_attempts(self, application_id, job_id):
 80 |         """
 81 |         With the job attempts API, you can obtain a collection of resources
 82 |         that represent the job attempts.
 83 | 
 84 |         :param str application_id: The application id
 85 |         :param str job_id: The job id
 86 |         :returns: API response object with JSON data
 87 |         :rtype: :py:class:`yarn_api_client.base.Response`
 88 |         """
 89 | 
 90 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/jobattempts'.format(
 91 |             appid=application_id, jobid=job_id)
 92 | 
 93 |         return self.request(path)
 94 | 
 95 |     def job_counters(self, application_id, job_id):
 96 |         """
 97 |         With the job counters API, you can object a collection of resources
 98 |         that represent all the counters for that job.
 99 | 
100 |         :param str application_id: The application id
101 |         :param str job_id: The job id
102 |         :returns: API response object with JSON data
103 |         :rtype: :py:class:`yarn_api_client.base.Response`
104 |         """
105 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/counters'.format(
106 |             appid=application_id, jobid=job_id)
107 | 
108 |         return self.request(path)
109 | 
110 |     def job_conf(self, application_id, job_id):
111 |         """
112 |         A job configuration resource contains information about the job
113 |         configuration for this job.
114 | 
115 |         :param str application_id: The application id
116 |         :param str job_id: The job id
117 |         :returns: API response object with JSON data
118 |         :rtype: :py:class:`yarn_api_client.base.Response`
119 |         """
120 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/conf'.format(
121 |             appid=application_id, jobid=job_id)
122 | 
123 |         return self.request(path)
124 | 
125 |     def job_tasks(self, application_id, job_id):
126 |         """
127 |         With the tasks API, you can obtain a collection of resources that
128 |         represent all the tasks for a job.
129 | 
130 |         :param str application_id: The application id
131 |         :param str job_id: The job id
132 |         :returns: API response object with JSON data
133 |         :rtype: :py:class:`yarn_api_client.base.Response`
134 |         """
135 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks'.format(
136 |             appid=application_id, jobid=job_id)
137 | 
138 |         return self.request(path)
139 | 
140 |     def job_task(self, application_id, job_id, task_id):
141 |         """
142 |         A Task resource contains information about a particular
143 |         task within a job.
144 | 
145 |         :param str application_id: The application id
146 |         :param str job_id: The job id
147 |         :param str task_id: The task id
148 |         :returns: API response object with JSON data
149 |         :rtype: :py:class:`yarn_api_client.base.Response`
150 |         """
151 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}'.format(
152 |             appid=application_id, jobid=job_id, taskid=task_id)
153 | 
154 |         return self.request(path)
155 | 
156 |     def task_counters(self, application_id, job_id, task_id):
157 |         """
158 |         With the task counters API, you can object a collection of resources
159 |         that represent all the counters for that task.
160 | 
161 |         :param str application_id: The application id
162 |         :param str job_id: The job id
163 |         :param str task_id: The task id
164 |         :returns: API response object with JSON data
165 |         :rtype: :py:class:`yarn_api_client.base.Response`
166 |         """
167 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/counters'.format(
168 |             appid=application_id, jobid=job_id, taskid=task_id)
169 | 
170 |         return self.request(path)
171 | 
172 |     def task_attempts(self, application_id, job_id, task_id):
173 |         """
174 |         With the task attempts API, you can obtain a collection of resources
175 |         that represent a task attempt within a job.
176 | 
177 |         :param str application_id: The application id
178 |         :param str job_id: The job id
179 |         :param str task_id: The task id
180 |         :returns: API response object with JSON data
181 |         :rtype: :py:class:`yarn_api_client.base.Response`
182 |         """
183 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts'.format(
184 |             appid=application_id, jobid=job_id, taskid=task_id)
185 | 
186 |         return self.request(path)
187 | 
188 |     def task_attempt(self, application_id, job_id, task_id, attempt_id):
189 |         """
190 |         A Task Attempt resource contains information about a particular task
191 |         attempt within a job.
192 | 
193 |         :param str application_id: The application id
194 |         :param str job_id: The job id
195 |         :param str task_id: The task id
196 |         :param str attempt_id: The attempt id
197 |         :returns: API response object with JSON data
198 |         :rtype: :py:class:`yarn_api_client.base.Response`
199 |         """
200 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}'.format(
201 |             appid=application_id, jobid=job_id, taskid=task_id,
202 |             attemptid=attempt_id)
203 | 
204 |         return self.request(path)
205 | 
206 |     def task_attempt_state(self, application_id, job_id, task_id, attempt_id):
207 |         """
208 |         With the task attempt state API, you can query the state of a submitted
209 |         task attempt.
210 | 
211 |         :param str application_id: The application id
212 |         :param str job_id: The job id
213 |         :param str task_id: The task id
214 |         :param str attempt_id: The attempt id
215 |         :returns: API response object with JSON data
216 |         :rtype: :py:class:`yarn_api_client.base.Response`
217 |         """
218 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}/state'.format(
219 |             appid=application_id, jobid=job_id, taskid=task_id,
220 |             attemptid=attempt_id)
221 | 
222 |         return self.request(path)
223 | 
224 |     def task_attempt_state_kill(self, application_id, job_id, task_id, attempt_id):
225 |         """
226 |         Kill specific attempt using task attempt state API.
227 | 
228 |         :param str application_id: The application id
229 |         :param str job_id: The job id
230 |         :param str task_id: The task id
231 |         :param str attempt_id: The attempt id
232 |         :returns: API response object with JSON data
233 |         :rtype: :py:class:`yarn_api_client.base.Response`
234 |         """
235 |         data = {"state": "KILLED"}
236 | 
237 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}/state'.format(
238 |             appid=application_id, jobid=job_id, taskid=task_id,
239 |             attemptid=attempt_id)
240 | 
241 |         return self.request(path, 'PUT', json=data)
242 | 
243 |     def task_attempt_counters(self, application_id, job_id, task_id, attempt_id):
244 |         """
245 |         With the task attempt counters API, you can object a collection
246 |         of resources that represent al the counters for that task attempt.
247 | 
248 |         :param str application_id: The application id
249 |         :param str job_id: The job id
250 |         :param str task_id: The task id
251 |         :param str attempt_id: The attempt id
252 |         :returns: API response object with JSON data
253 |         :rtype: :py:class:`yarn_api_client.base.Response`
254 |         """
255 |         path = '/proxy/{appid}/ws/v1/mapreduce/jobs/{jobid}/tasks/{taskid}/attempts/{attemptid}/counters'.format(
256 |             appid=application_id, jobid=job_id, taskid=task_id,
257 |             attemptid=attempt_id)
258 | 
259 |         return self.request(path)
260 | 


--------------------------------------------------------------------------------
/yarn_api_client/main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import unicode_literals
  3 | import argparse
  4 | from pprint import pprint
  5 | 
  6 | from .base import get_logger
  7 | from .constants import (YarnApplicationState, FinalApplicationStatus,
  8 |                         ApplicationState, JobStateInternal)
  9 | from . import ResourceManager, NodeManager, HistoryServer, ApplicationMaster
 10 | 
 11 | log = get_logger(__name__)
 12 | 
 13 | 
 14 | def get_parser():
 15 |     parser = argparse.ArgumentParser(
 16 |         description='Client for Hadoop® YARN API')
 17 | 
 18 |     parser.add_argument('--endpoint', help='API endpoint (https://test.cluster.com:8090)')
 19 | 
 20 |     subparsers = parser.add_subparsers()
 21 |     populate_resource_manager_arguments(subparsers)
 22 |     populate_node_manager_arguments(subparsers)
 23 |     populate_application_master_arguments(subparsers)
 24 |     populate_history_server_arguments(subparsers)
 25 | 
 26 |     return parser
 27 | 
 28 | 
 29 | def populate_resource_manager_arguments(subparsers):
 30 |     rm_parser = subparsers.add_parser(
 31 |         'rm', help='ResourceManager REST API\'s')
 32 |     rm_parser.set_defaults(api_class=ResourceManager)
 33 | 
 34 |     rm_subparsers = rm_parser.add_subparsers()
 35 | 
 36 |     ci_parser = rm_subparsers.add_parser(
 37 |         'info', help='Cluster Information API')
 38 |     ci_parser.set_defaults(method='cluster_information')
 39 | 
 40 |     cm_parser = rm_subparsers.add_parser(
 41 |         'metrics', help='Cluster Metrics API')
 42 |     cm_parser.set_defaults(method='cluster_metrics')
 43 | 
 44 |     cs_parser = rm_subparsers.add_parser(
 45 |         'scheduler', help='Cluster Scheduler API')
 46 |     cs_parser.set_defaults(method='cluster_scheduler')
 47 | 
 48 |     cas_parser = rm_subparsers.add_parser(
 49 |         'apps', help='Cluster Applications API')
 50 |     cas_parser.add_argument('--state',
 51 |                             help='states of the applications',
 52 |                             choices=dict(YarnApplicationState).keys())
 53 |     cas_parser.add_argument('--final-status',
 54 |                             choices=dict(FinalApplicationStatus).keys())
 55 |     cas_parser.add_argument('--user')
 56 |     cas_parser.add_argument('--queue')
 57 |     cas_parser.add_argument('--limit')
 58 |     cas_parser.add_argument('--started-time-begin')
 59 |     cas_parser.add_argument('--started-time-end')
 60 |     cas_parser.add_argument('--finished-time-begin')
 61 |     cas_parser.add_argument('--finished-time-end')
 62 |     cas_parser.set_defaults(method='cluster_applications')
 63 |     cas_parser.set_defaults(method_kwargs=[
 64 |         'state', 'user', 'queue', 'limit',
 65 |         'started_time_begin', 'started_time_end', 'finished_time_begin',
 66 |         'finished_time_end', 'final_status'])
 67 | 
 68 |     ca_parser = rm_subparsers.add_parser(
 69 |         'app', help='Cluster Application API')
 70 |     ca_parser.add_argument('application_id')
 71 |     ca_parser.set_defaults(method='cluster_application')
 72 |     ca_parser.set_defaults(method_args=['application_id'])
 73 | 
 74 |     caa_parser = rm_subparsers.add_parser(
 75 |         'app_attempts', help='Cluster Application Attempts API')
 76 |     caa_parser.add_argument('application_id')
 77 |     caa_parser.set_defaults(method='cluster_application_attempts')
 78 |     caa_parser.set_defaults(method_args=['application_id'])
 79 | 
 80 |     cns_parser = rm_subparsers.add_parser(
 81 |         'nodes', help='Cluster Nodes API')
 82 |     cns_parser.add_argument('--state', help='the state of the node')
 83 |     cns_parser.add_argument('--healthy', help='true or false')
 84 |     cns_parser.set_defaults(method='cluster_nodes')
 85 |     cns_parser.set_defaults(method_kargs=['state', 'healthy'])
 86 | 
 87 |     cn_parser = rm_subparsers.add_parser(
 88 |         'node', help='Cluster Node API')
 89 |     cn_parser.add_argument('node_id')
 90 |     cn_parser.set_defaults(method='cluster_node')
 91 |     cn_parser.set_defaults(method_args=['node_id'])
 92 | 
 93 | 
 94 | def populate_node_manager_arguments(subparsers):
 95 |     nm_parser = subparsers.add_parser(
 96 |         'nm', help='NodeManager REST API\'s')
 97 |     nm_parser.set_defaults(api_class=NodeManager)
 98 | 
 99 |     nm_subparsers = nm_parser.add_subparsers()
100 | 
101 |     ni_parser = nm_subparsers.add_parser(
102 |         'info', help='NodeManager Information API')
103 |     ni_parser.set_defaults(method='node_information')
104 | 
105 |     nas_parser = nm_subparsers.add_parser(
106 |         'apps', help='Applications API')
107 |     nas_parser.add_argument('--state',
108 |                             help='application state',
109 |                             choices=dict(ApplicationState).keys())
110 |     nas_parser.add_argument('--user',
111 |                             help='user name')
112 |     nas_parser.set_defaults(method='node_applications')
113 |     nas_parser.set_defaults(method_kwargs=['state', 'user'])
114 | 
115 |     na_parser = nm_subparsers.add_parser(
116 |         'app', help='Application API')
117 |     na_parser.add_argument('application_id')
118 |     na_parser.set_defaults(method='node_application')
119 |     na_parser.set_defaults(method_args=['application_id'])
120 | 
121 |     ncs_parser = nm_subparsers.add_parser(
122 |         'containers', help='Containers API')
123 |     ncs_parser.set_defaults(method='node_containers')
124 | 
125 |     nc_parser = nm_subparsers.add_parser(
126 |         'container', help='Container API')
127 |     nc_parser.add_argument('container_id')
128 |     nc_parser.set_defaults(method='node_container')
129 |     nc_parser.set_defaults(method_args=['container_id'])
130 | 
131 | 
132 | def populate_application_master_arguments(subparsers):
133 |     am_parser = subparsers.add_parser(
134 |         'am', help='MapReduce Application Master REST API\'s')
135 |     am_parser.set_defaults(api_class=ApplicationMaster)
136 |     am_parser.add_argument('application_id')
137 | 
138 |     # TODO: not implemented
139 | 
140 | 
141 | def populate_history_server_arguments(subparsers):
142 |     hs_parser = subparsers.add_parser(
143 |         'hs', help='History Server REST API\'s')
144 |     hs_parser.set_defaults(api_class=HistoryServer)
145 | 
146 |     hs_subparsers = hs_parser.add_subparsers()
147 | 
148 |     hi_parser = hs_subparsers.add_parser(
149 |         'info', help='History Server Information API')
150 |     hi_parser.set_defaults(method='application_information')
151 | 
152 |     hjs_parser = hs_subparsers.add_parser(
153 |         'jobs', help='Jobs API')
154 |     hjs_parser.add_argument('--state',
155 |                             help='states of the applications',
156 |                             choices=dict(JobStateInternal).keys())
157 |     hjs_parser.add_argument('--user')
158 |     hjs_parser.add_argument('--queue')
159 |     hjs_parser.add_argument('--limit')
160 |     hjs_parser.add_argument('--started-time-begin')
161 |     hjs_parser.add_argument('--started-time-end')
162 |     hjs_parser.add_argument('--finished-time-begin')
163 |     hjs_parser.add_argument('--finished-time-end')
164 |     hjs_parser.set_defaults(method='jobs')
165 |     hjs_parser.set_defaults(method_kwargs=[
166 |         'state', 'user', 'queue', 'limit',
167 |         'started_time_begin', 'started_time_end', 'finished_time_begin',
168 |         'finished_time_end'])
169 | 
170 |     hj_parser = hs_subparsers.add_parser('job', help='Job API')
171 |     hj_parser.add_argument('job_id')
172 |     hj_parser.set_defaults(method='job')
173 |     hj_parser.set_defaults(method_args=['job_id'])
174 | 
175 |     hja_parser = hs_subparsers.add_parser(
176 |         'job_attempts', help='Job Attempts API')
177 |     hja_parser.add_argument('job_id')
178 |     hja_parser.set_defaults(method='job_attempts')
179 |     hja_parser.set_defaults(method_args=['job_id'])
180 | 
181 |     hjc_parser = hs_subparsers.add_parser(
182 |         'job_counters', help='Job Counters API')
183 |     hjc_parser.add_argument('job_id')
184 |     hjc_parser.set_defaults(method='job_counters')
185 |     hjc_parser.set_defaults(method_args=['job_id'])
186 | 
187 |     hjcn_parser = hs_subparsers.add_parser(
188 |         'job_conf', help='Job Conf API')
189 |     hjcn_parser.add_argument('job_id')
190 |     hjcn_parser.set_defaults(method='job_conf')
191 |     hjcn_parser.set_defaults(method_args=['job_id'])
192 | 
193 |     hts_parser = hs_subparsers.add_parser(
194 |         'tasks', help='Tasks API')
195 |     hts_parser.add_argument('job_id')
196 |     hts_parser.add_argument('--type', choices=['m', 'r'],
197 |                             help=('type of task, m for map task '
198 |                                   'or r for reduce task.'))
199 |     hts_parser.set_defaults(method='job_tasks')
200 |     hts_parser.set_defaults(method_args=['job_id'])
201 |     hts_parser.set_defaults(method_kwargs=['type'])
202 | 
203 |     ht_parser = hs_subparsers.add_parser(
204 |         'task', help='Task API')
205 |     ht_parser.add_argument('job_id')
206 |     ht_parser.add_argument('task_id')
207 |     ht_parser.set_defaults(method='job_task')
208 |     ht_parser.set_defaults(method_args=['job_id', 'task_id'])
209 | 
210 |     htc_parser = hs_subparsers.add_parser(
211 |         'task_counters', help='Task Counters API')
212 |     htc_parser.add_argument('job_id')
213 |     htc_parser.add_argument('task_id')
214 |     htc_parser.set_defaults(method='task_counters')
215 |     htc_parser.set_defaults(method_args=['job_id', 'task_id'])
216 | 
217 |     htas_parser = hs_subparsers.add_parser(
218 |         'task_attempts', help='Task Attempts API')
219 |     htas_parser.add_argument('job_id')
220 |     htas_parser.add_argument('task_id')
221 |     htas_parser.set_defaults(method='task_attempts')
222 |     htas_parser.set_defaults(method_args=['job_id', 'task_id'])
223 | 
224 |     hta_parser = hs_subparsers.add_parser(
225 |         'task_attempt', help='Task Attempt API')
226 |     hta_parser.add_argument('job_id')
227 |     hta_parser.add_argument('task_id')
228 |     hta_parser.add_argument('attempt_id')
229 |     hta_parser.set_defaults(method='task_attempt')
230 |     hta_parser.set_defaults(method_args=['job_id', 'task_id', 'attempt_id'])
231 | 
232 |     htac_parser = hs_subparsers.add_parser(
233 |         'task_attempt_counters', help='Task Attempt Counters API')
234 |     htac_parser.add_argument('job_id')
235 |     htac_parser.add_argument('task_id')
236 |     htac_parser.add_argument('attempt_id')
237 |     htac_parser.set_defaults(method='task_attempt_counters')
238 |     htac_parser.set_defaults(method_args=['job_id', 'task_id', 'attempt_id'])
239 | 
240 | 
241 | def main():
242 |     parser = get_parser()
243 |     opts = parser.parse_args()
244 | 
245 |     class_kwargs = {}
246 |     if not hasattr(opts, 'api_class'):
247 |         raise Exception("Please provide api class - rm, hs, nm, am")
248 |     # Only ResourceManager supports HA
249 |     elif opts.endpoint:
250 |         if opts.api_class == ResourceManager:
251 |             class_kwargs['service_endpoints'] = opts.endpoint.split(",")
252 |         else:
253 |             class_kwargs['service_endpoint'] = opts.endpoint
254 | 
255 |     api = opts.api_class(**class_kwargs)
256 |     # Construct positional arguments for method
257 |     if 'method_args' in opts:
258 |         method_args = [getattr(opts, arg) for arg in opts.method_args]
259 |     else:
260 |         method_args = []
261 |     # Construct key arguments for method
262 |     if 'method_kwargs' in opts:
263 |         method_kwargs = dict((key, getattr(opts, key)) for key in opts.method_kwargs)
264 |     else:
265 |         method_kwargs = {}
266 |     response = getattr(api, opts.method)(*method_args, **method_kwargs)
267 |     pprint(response.data)
268 | 


--------------------------------------------------------------------------------
/tests/test_resource_manager.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from mock import patch
  3 | from tests import TestCase
  4 | 
  5 | from yarn_api_client.resource_manager import ResourceManager
  6 | from yarn_api_client.errors import IllegalArgumentError
  7 | 
  8 | 
  9 | @patch('yarn_api_client.resource_manager.ResourceManager.request')
 10 | class ResourceManagerTestCase(TestCase):
 11 |     @patch('yarn_api_client.resource_manager.check_is_active_rm')
 12 |     def setUp(self, check_is_active_rm_mock):
 13 |         check_is_active_rm_mock.return_value = True
 14 |         self.rm = ResourceManager(['localhost'])
 15 | 
 16 |     @patch('yarn_api_client.resource_manager.get_resource_manager_endpoint')
 17 |     def test__init__(self, get_config_mock, request_mock):
 18 |         get_config_mock.return_value = "https://localhost"
 19 |         rm = ResourceManager()
 20 |         get_config_mock.assert_called_with(30, None, True)
 21 |         self.assertEqual(rm.service_uri.is_https, True)
 22 | 
 23 |     def test_cluster_information(self, request_mock):
 24 |         self.rm.cluster_information()
 25 |         request_mock.assert_called_with('/ws/v1/cluster/info')
 26 | 
 27 |     def test_cluster_metrics(self, request_mock):
 28 |         self.rm.cluster_metrics()
 29 |         request_mock.assert_called_with('/ws/v1/cluster/metrics')
 30 | 
 31 |     def test_cluster_scheduler(self, request_mock):
 32 |         self.rm.cluster_scheduler()
 33 |         request_mock.assert_called_with('/ws/v1/cluster/scheduler')
 34 | 
 35 |     def test_cluster_applications(self, request_mock):
 36 |         self.rm.cluster_applications()
 37 |         request_mock.assert_called_with('/ws/v1/cluster/apps', params={})
 38 | 
 39 |         self.rm.cluster_applications(state='KILLED', states=['KILLED'],
 40 |                                      final_status='FAILED', user='root',
 41 |                                      queue='low', limit=10,
 42 |                                      started_time_begin=1, started_time_end=2,
 43 |                                      finished_time_begin=3, finished_time_end=4,
 44 |                                      application_types=['YARN'],
 45 |                                      application_tags=['apptag'],
 46 |                                      name="wordcount",
 47 |                                      de_selects=['resourceRequests'])
 48 |         request_mock.assert_called_with('/ws/v1/cluster/apps', params={
 49 |             'state': 'KILLED',
 50 |             'states': 'KILLED',
 51 |             'finalStatus': 'FAILED',
 52 |             'user': 'root',
 53 |             'queue': 'low',
 54 |             'limit': 10,
 55 |             'startedTimeBegin': 1,
 56 |             'startedTimeEnd': 2,
 57 |             'finishedTimeBegin': 3,
 58 |             'finishedTimeEnd': 4,
 59 |             'applicationTypes': 'YARN',
 60 |             'applicationTags': 'apptag',
 61 |             'name': 'wordcount',
 62 |             'deSelects': 'resourceRequests'
 63 |         })
 64 | 
 65 |         with self.assertRaises(IllegalArgumentError):
 66 |             self.rm.cluster_applications(states=['ololo'])
 67 | 
 68 |         with self.assertRaises(IllegalArgumentError):
 69 |             self.rm.cluster_applications(final_status='ololo')
 70 | 
 71 |     def test_cluster_application_statistics(self, request_mock):
 72 |         self.rm.cluster_application_statistics()
 73 |         request_mock.assert_called_with('/ws/v1/cluster/appstatistics', params={})
 74 |         # TODO: test arguments
 75 | 
 76 |     def test_cluster_application(self, request_mock):
 77 |         self.rm.cluster_application('app_1')
 78 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1')
 79 | 
 80 |     def test_cluster_application_attempts(self, request_mock):
 81 |         self.rm.cluster_application_attempts('app_1')
 82 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/appattempts')
 83 | 
 84 |     def test_cluster_application_attempt_info(self, request_mock):
 85 |         self.rm.cluster_application_attempt_info('app_1', 'attempt_1')
 86 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/appattempts/attempt_1')
 87 | 
 88 |     def test_cluster_application_attempt_containers(self, request_mock):
 89 |         self.rm.cluster_application_attempt_containers('app_1', 'attempt_1')
 90 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/appattempts/attempt_1/containers')
 91 | 
 92 |     def test_cluster_application_attempt_container_info(self, request_mock):
 93 |         self.rm.cluster_application_attempt_container_info('app_1', 'attempt_1', 'container_1')
 94 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/appattempts/attempt_1/containers/container_1')
 95 | 
 96 |     def test_cluster_application_state(self, request_mock):
 97 |         self.rm.cluster_application_state('app_1')
 98 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/state')
 99 | 
100 |     def test_cluster_application_kill(self, request_mock):
101 |         self.rm.cluster_application_kill('app_1')
102 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/state', 'PUT', json={
103 |             "state": 'KILLED'
104 |         })
105 | 
106 |     def test_cluster_nodes(self, request_mock):
107 |         self.rm.cluster_nodes()
108 |         request_mock.assert_called_with('/ws/v1/cluster/nodes', params={})
109 | 
110 |         self.rm.cluster_nodes(states=['NEW'])
111 |         request_mock.assert_called_with('/ws/v1/cluster/nodes', params={
112 |             "states": 'NEW'
113 |         })
114 | 
115 |         with self.assertRaises(IllegalArgumentError):
116 |             self.rm.cluster_nodes(states=['ololo'])
117 | 
118 |     def test_cluster_node(self, request_mock):
119 |         self.rm.cluster_node('node_1')
120 |         request_mock.assert_called_with('/ws/v1/cluster/nodes/node_1')
121 | 
122 |     def test_cluster_submit_application(self, request_mock):
123 |         self.rm.cluster_submit_application({"application-name": "dummy_application"})
124 |         request_mock.assert_called_with('/ws/v1/cluster/apps', 'POST', json={
125 |             "application-name": "dummy_application"
126 |         })
127 | 
128 |     def test_cluster_new_application(self, request_mock):
129 |         self.rm.cluster_new_application()
130 |         request_mock.assert_called_with('/ws/v1/cluster/apps/new-application', 'POST')
131 | 
132 |     def test_cluster_get_application_queue(self, request_mock):
133 |         self.rm.cluster_get_application_queue('app_1')
134 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/queue')
135 | 
136 |     def test_cluster_change_application_queue(self, request_mock):
137 |         self.rm.cluster_change_application_queue('app_1', 'queue_1')
138 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/queue', 'PUT', json={
139 |             "queue": 'queue_1'
140 |         })
141 | 
142 |     def test_cluster_get_application_priority(self, request_mock):
143 |         self.rm.cluster_get_application_priority('app_1')
144 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/priority')
145 | 
146 |     def test_cluster_change_application_priority(self, request_mock):
147 |         self.rm.cluster_change_application_priority('app_1', 'priority_1')
148 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/priority', 'PUT', json={
149 |             "priority": 'priority_1'
150 |         })
151 | 
152 |     @patch('yarn_api_client.hadoop_conf.parse')
153 |     def test_cluster_node_container_memory(self, parse_mock, request_mock):
154 |         parse_mock.return_value = 1024
155 |         value = self.rm.cluster_node_container_memory()
156 |         self.assertEqual(value, 1024)
157 | 
158 |     # TODO
159 |     # def test_cluster_scheduler_queue(self, request_mock):
160 |     #     class ResponseMock():
161 |     #         def __init__(self, status, data):
162 |     #             self.status = status
163 |     #             self.data = data
164 | 
165 |     #     request_mock.return_value = ResponseMock(
166 |     #         'OK',
167 |     #         {
168 |     #             'scheduler': {
169 |     #                 'schedulerInfo': {
170 |     #                     "queues": {
171 |     #                         "queue": [
172 |     #                             {
173 |     #                                 'queueName': 'queue_1',
174 |     #                                 'queues': {
175 |     #                                     'queue': [
176 |     #                                         {
177 |     #                                             "queueName": 'queue_2',
178 |     #                                             'queues': {
179 |     #                                                 'queue': [
180 |     #                                                     {
181 |     #                                                         'queueName': 'queue_3'
182 |     #                                                     }
183 |     #                                                 ]
184 |     #                                             }
185 |     #                                         }
186 |     #                                     ]
187 |     #                                 }
188 |     #                             }
189 |     #                         ]
190 |     #                     }
191 |     #                 }
192 |     #             }
193 |     #         }
194 |     #     )
195 |     #     value = self.rm.cluster_scheduler_queue('queue_1')
196 |     #     self.assertIsNotNone(value)
197 | 
198 |     #     request_mock.return_value = ResponseMock(
199 |     #         'OK',
200 |     #         {
201 |     #             'scheduler': {
202 |     #                 'schedulerInfo': {
203 |     #                     'queueName': 'queue_1'
204 |     #                 }
205 |     #             }
206 |     #         }
207 |     #     )
208 |     #     value = self.rm.cluster_scheduler_queue('queue_2')
209 |     #     self.assertIsNone(value)
210 | 
211 |     def test_cluster_scheduler_queue_availability(self, request_mock):
212 |         value = self.rm.cluster_scheduler_queue_availability({'absoluteUsedCapacity': 90}, 70)
213 |         self.assertEqual(value, False)
214 | 
215 |         value = self.rm.cluster_scheduler_queue_availability({'absoluteUsedCapacity': 50}, 70)
216 |         self.assertEqual(value, True)
217 | 
218 |     def test_cluster_queue_partition(self, request_mock):
219 |         value = self.rm.cluster_queue_partition(
220 |             {
221 |                 'capacities': {
222 |                     'queueCapacitiesByPartition': [
223 |                         {
224 |                             'partitionName': 'label_1'
225 |                         },
226 |                         {
227 |                             'partitionName': 'label_2'
228 |                         }
229 |                     ]
230 |                 },
231 |             },
232 |             'label_1'
233 |         )
234 |         self.assertIsNotNone(value)
235 | 
236 |         value = self.rm.cluster_queue_partition(
237 |             {
238 |                 'capacities': {
239 |                     'queueCapacitiesByPartition': [
240 |                         {
241 |                             'partitionName': 'label_1'
242 |                         },
243 |                         {
244 |                             'partitionName': 'label_2'
245 |                         }
246 |                     ]
247 |                 },
248 |             },
249 |             'label_3'
250 |         )
251 |         self.assertIsNone(value)
252 | 
253 |     def test_cluster_reservations(self, request_mock):
254 |         self.rm.cluster_reservations('queue_1', 'reservation_1', 0, 5, True)
255 |         request_mock.assert_called_with('/ws/v1/cluster/reservation/list', params={
256 |             "queue": "queue_1",
257 |             "reservation-id": "reservation_1",
258 |             "start-time": 0,
259 |             "end-time": 5,
260 |             "include-resource-allocations": True
261 |         })
262 | 
263 |     def test_cluster_new_delegation_token(self, request_mock):
264 |         self.rm.cluster_new_delegation_token('renewer_1')
265 |         request_mock.assert_called_with('/ws/v1/cluster/delegation-token', 'POST', json={
266 |             "renewer": "renewer_1"
267 |         })
268 | 
269 |     def test_cluster_renew_delegation_token(self, request_mock):
270 |         self.rm.cluster_renew_delegation_token('delegation_token_1')
271 |         request_mock.assert_called_with('/ws/v1/cluster/delegation-token/expiration', 'POST', headers={
272 |             "Hadoop-YARN-RM-Delegation-Token": 'delegation_token_1'
273 |         })
274 | 
275 |     def test_cluster_cancel_delegation_token(self, request_mock):
276 |         self.rm.cluster_cancel_delegation_token('delegation_token_1')
277 |         request_mock.assert_called_with('/ws/v1/cluster/delegation-token', 'DELETE', headers={
278 |             "Hadoop-YARN-RM-Delegation-Token": 'delegation_token_1'
279 |         })
280 | 
281 |     def test_cluster_new_reservation(self, request_mock):
282 |         self.rm.cluster_new_reservation()
283 |         request_mock.assert_called_with('/ws/v1/cluster/reservation/new-reservation', 'POST')
284 | 
285 |     def test_cluster_submit_reservation(self, request_mock):
286 |         self.rm.cluster_submit_reservation({'reservation-id': 'reservation_1'})
287 |         request_mock.assert_called_with('/ws/v1/cluster/reservation/submit', 'POST', json={
288 |             'reservation-id': 'reservation_1'
289 |         })
290 | 
291 |     def test_cluster_update_reservation(self, request_mock):
292 |         self.rm.cluster_update_reservation({
293 |             'reservation-id': 'reservation_1'
294 |         })
295 |         request_mock.assert_called_with('/ws/v1/cluster/reservation/update', 'POST', json={
296 |             'reservation-id': 'reservation_1'
297 |         })
298 | 
299 |     def test_cluster_delete_reservation(self, request_mock):
300 |         self.rm.cluster_delete_reservation('reservation_1')
301 |         request_mock.assert_called_with('/ws/v1/cluster/reservation/delete', 'POST', json={
302 |             'reservation-id': 'reservation_1'
303 |         })
304 | 
305 |     def test_cluster_application_timeouts(self, request_mock):
306 |         self.rm.cluster_application_timeouts('app_1')
307 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/timeouts')
308 | 
309 |     def test_cluster_application_timeout(self, request_mock):
310 |         self.rm.cluster_application_timeout('app_1', 'LIFETIME')
311 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/timeouts/LIFETIME')
312 | 
313 |     def test_cluster_update_application_timeout(self, request_mock):
314 |         self.rm.cluster_update_application_timeout('app_1', 'LIFETIME', '2016-12-05T22:51:00.104+0530')
315 |         request_mock.assert_called_with('/ws/v1/cluster/apps/app_1/timeout', 'PUT', json={
316 |             'timeout': {'type': 'LIFETIME', 'expiryTime': '2016-12-05T22:51:00.104+0530'}
317 |         })
318 | 
319 |     def test_cluster_scheduler_conf_mutation(self, request_mock):
320 |         self.rm.cluster_scheduler_conf_mutation()
321 |         request_mock.assert_called_with('/ws/v1/cluster/scheduler-conf')
322 | 
323 |     def test_cluster_modify_scheduler_conf_mutation(self, request_mock):
324 |         self.rm.cluster_modify_scheduler_conf_mutation({
325 |             'queue-name': 'queue_1',
326 |             'params': {
327 |                 'test': 'test'
328 |             }
329 |         })
330 |         request_mock.assert_called_with('/ws/v1/cluster/scheduler-conf', 'PUT', json={
331 |             'queue-name': 'queue_1',
332 |             'params': {
333 |                 'test': 'test'
334 |             }
335 |         })
336 | 
337 |     def test_cluster_node_update_resource(self, request_mock):
338 |         self.rm.cluster_node_update_resource('node_1', {
339 |           "resource":
340 |           {
341 |             "memory": 1024,
342 |             "vCores": 3
343 |           },
344 |           "overCommitTimeout": -1
345 |         })
346 |         request_mock.assert_called_with('/ws/v1/cluster/nodes/node_1/resource', 'POST', json={
347 |           "resource":
348 |           {
349 |             "memory": 1024,
350 |             "vCores": 3
351 |           },
352 |           "overCommitTimeout": -1
353 |         })
354 | 
355 |     def test_cluster_container_signal(self, request_mock):
356 |         self.rm.cluster_container_signal('container_1', 'OUTPUT_THREAD_DUMP')
357 |         request_mock.assert_called_with(
358 |             '/ws/v1/cluster/containers/container_1/signal/OUTPUT_THREAD_DUMP',
359 |             'POST'
360 |         )
361 | 
362 |     def test_scheduler_activities(self, request_mock):
363 |         self.rm.scheduler_activities(node_id='node_1', group_by='diagnostic')
364 |         request_mock.assert_called_with('/ws/v1/cluster/scheduler/activities', params={
365 |             "nodeId": 'node_1',
366 |             "groupBy": 'diagnostic'
367 |         })
368 | 
369 |     def test_application_activities(self, request_mock):
370 |         self.rm.application_activities('app_1', max_time=4,
371 |                                        request_priorities=["1","2"],
372 |                                        allocation_request_ids=["-1", "1"], group_by="diagnostic",
373 |                                        limit=5, actions=['refresh', 'get'], summarize=True)
374 |         request_mock.assert_called_with('/ws/v1/cluster/scheduler/app-activities/app_1', params={
375 |             "maxTime": 4,
376 |             "requestPriorities": "1,2",
377 |             "allocationRequestIds": "-1,1",
378 |             "groupBy": "diagnostic",
379 |             "limit": 5,
380 |             "actions": "refresh,get",
381 |             "summarize": True
382 |         })
383 | 


--------------------------------------------------------------------------------
/yarn_api_client/resource_manager.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import unicode_literals
  3 | from .base import BaseYarnAPI, get_logger
  4 | from .constants import YarnApplicationState, FinalApplicationStatus, ClusterContainerSignal
  5 | from .errors import IllegalArgumentError
  6 | from .hadoop_conf import get_resource_manager_endpoint, check_is_active_rm, CONF_DIR, _get_maximum_container_memory
  7 | from collections import deque
  8 | 
  9 | log = get_logger(__name__)
 10 | LEGAL_STATES = {s for s, _ in YarnApplicationState}
 11 | LEGAL_FINAL_STATUSES = {s for s, _ in FinalApplicationStatus}
 12 | LEGAL_CLUSTER_CONTAINER_STATUSES = {s for s, _ in ClusterContainerSignal}
 13 | 
 14 | 
 15 | def validate_yarn_application_state(state, required=False):
 16 |     if state:
 17 |         if state not in LEGAL_STATES:
 18 |             msg = 'Yarn Application State %s is illegal' % (state,)
 19 |             raise IllegalArgumentError(msg)
 20 |     else:
 21 |         if required:
 22 |             msg = "state argument is required to be provided"
 23 |             raise IllegalArgumentError(msg)
 24 | 
 25 | 
 26 | def validate_yarn_application_states(states, required=False):
 27 |     if states:
 28 |         if not isinstance(states, list):
 29 |             msg = "States should be list"
 30 |             raise IllegalArgumentError(msg)
 31 | 
 32 |         illegal_states = set(states) - LEGAL_STATES
 33 |         if illegal_states:
 34 |             msg = 'Yarn Application States %s are illegal' % (
 35 |                 ",".join(illegal_states),
 36 |             )
 37 |             raise IllegalArgumentError(msg)
 38 |     else:
 39 |         if required:
 40 |             msg = "states argument is required to be provided"
 41 |             raise IllegalArgumentError(msg)
 42 | 
 43 | 
 44 | def validate_final_application_status(final_status, required=False):
 45 |     if final_status:
 46 |         if final_status not in LEGAL_FINAL_STATUSES:
 47 |             msg = 'Final Application Status %s is illegal' % (final_status,)
 48 |             raise IllegalArgumentError(msg)
 49 |     else:
 50 |         if required:
 51 |             msg = "final_status argument is required to be provided"
 52 |             raise IllegalArgumentError(msg)
 53 | 
 54 | 
 55 | def validate_cluster_container_status(cluster_container_status, required=False):
 56 |     if cluster_container_status:
 57 |         if cluster_container_status not in LEGAL_CLUSTER_CONTAINER_STATUSES:
 58 |             msg = 'Cluster Container Status %s is illegal' % (cluster_container_status,)
 59 |             raise IllegalArgumentError(msg)
 60 |     else:
 61 |         if required:
 62 |             msg = "cluster_container_status argument is required to be provided"
 63 |             raise IllegalArgumentError(msg)
 64 | 
 65 | 
 66 | class ResourceManager(BaseYarnAPI):
 67 |     """
 68 |     The ResourceManager REST API's allow the user to get information about the
 69 |     cluster - status on the cluster, metrics on the cluster,
 70 |     scheduler information, information about nodes in the cluster,
 71 |     and information about applications on the cluster.
 72 | 
 73 |     If `service_endpoint` argument is `None` client will try to extract it from
 74 |     Hadoop configuration files.  If both `address` and `alt_address` are
 75 |     provided, the address corresponding to the ACTIVE HA Resource Manager will
 76 |     be used.
 77 | 
 78 |     :param List[str] service_endpoints: List of ResourceManager HTTP(S)
 79 |         addresses
 80 |     :param int timeout: API connection timeout in seconds
 81 |     :param AuthBase auth: Auth to use for requests configurations
 82 |     :param boolean verify: Either a boolean, in which case it controls whether
 83 |         we verify the server's TLS certificate, or a string, in which case it must
 84 |         be a path to a CA bundle to use. Defaults to ``True``
 85 |     """
 86 |     def __init__(self, service_endpoints=None, timeout=30, auth=None, verify=True, proxies=None):
 87 |         active_service_endpoint = None
 88 |         if not service_endpoints:
 89 |             active_service_endpoint = get_resource_manager_endpoint(timeout, auth, verify)
 90 |         else:
 91 |             for endpoint in service_endpoints:
 92 |                 if check_is_active_rm(endpoint, timeout, auth, verify):
 93 |                     active_service_endpoint = endpoint
 94 |                     break
 95 | 
 96 |         if active_service_endpoint:
 97 |             super(ResourceManager, self).__init__(active_service_endpoint, timeout, auth, verify, proxies)
 98 |         else:
 99 |             raise Exception("No active RMs found")
100 | 
101 |     def get_active_endpoint(self):
102 |         """
103 |         The active address, port tuple to which this instance is associated.
104 |         :return: str service_endpoint: Service endpoint URL corresponding to
105 |         the active address of RM
106 |         """
107 |         return self.service_uri.to_url()
108 | 
109 |     def cluster_information(self):
110 |         """
111 |         The cluster information resource provides overall information about
112 |         the cluster.
113 | 
114 |         :returns: API response object with JSON data
115 |         :rtype: :py:class:`yarn_api_client.base.Response`
116 |         """
117 |         path = '/ws/v1/cluster/info'
118 |         return self.request(path)
119 | 
120 |     def cluster_metrics(self):
121 |         """
122 |         The cluster metrics resource provides some overall metrics about the
123 |         cluster. More detailed metrics should be retrieved from the jmx
124 |         interface.
125 | 
126 |         :returns: API response object with JSON data
127 |         :rtype: :py:class:`yarn_api_client.base.Response`
128 |         """
129 |         path = '/ws/v1/cluster/metrics'
130 |         return self.request(path)
131 | 
132 |     def cluster_scheduler(self):
133 |         """
134 |         A scheduler resource contains information about the current scheduler
135 |         configured in a cluster. It currently supports both the Fifo and
136 |         Capacity Scheduler. You will get different information depending on
137 |         which scheduler is configured so be sure to look at the type
138 |         information.
139 | 
140 |         :returns: API response object with JSON data
141 |         :rtype: :py:class:`yarn_api_client.base.Response`
142 |         """
143 |         path = '/ws/v1/cluster/scheduler'
144 |         return self.request(path)
145 | 
146 |     def cluster_applications(self, state=None, states=None,
147 |                              final_status=None, user=None,
148 |                              queue=None, limit=None,
149 |                              started_time_begin=None, started_time_end=None,
150 |                              finished_time_begin=None, finished_time_end=None,
151 |                              application_types=None, application_tags=None,
152 |                              name=None, de_selects=None):
153 |         """
154 |         With the Applications API, you can obtain a collection of resources,
155 |         each of which represents an application.
156 | 
157 |         :param str state: state of the application [deprecated]
158 |         :param List[str] states: applications matching the given application
159 |             states
160 |         :param str final_status: the final status of the application -
161 |             reported by the application itself
162 |         :param str user: user name
163 |         :param str queue: queue name
164 |         :param str limit: total number of app objects to be returned
165 |         :param str started_time_begin: applications with start time beginning
166 |             with this time, specified in ms since epoch
167 |         :param str started_time_end: applications with start time ending with
168 |             this time, specified in ms since epoch
169 |         :param str finished_time_begin: applications with finish time
170 |             beginning with this time, specified in ms since epoch
171 |         :param str finished_time_end: applications with finish time ending
172 |             with this time, specified in ms since epoch
173 |         :param List[str] application_types: applications matching the given
174 |             application types, specified as a comma-separated list
175 |         :param List[str] application_tags: applications matching any of the
176 |             given application tags, specified as a comma-separated list
177 |         :param str name: name of the application
178 |         :param List[str] de_selects: a generic fields which will be skipped in
179 |             the result
180 |         :returns: API response object with JSON data
181 |         :rtype: :py:class:`yarn_api_client.base.Response`
182 |         :raises yarn_api_client.errors.IllegalArgumentError: if `state` or
183 |             `final_status` incorrect
184 |         """
185 |         path = '/ws/v1/cluster/apps'
186 | 
187 |         validate_yarn_application_state(state)
188 |         validate_yarn_application_states(states)
189 |         validate_final_application_status(final_status)
190 | 
191 |         loc_args = (
192 |             ('state', state),
193 |             ('states', ','.join(states) if states else None),
194 |             ('finalStatus', final_status),
195 |             ('user', user),
196 |             ('queue', queue),
197 |             ('limit', limit),
198 |             ('startedTimeBegin', started_time_begin),
199 |             ('startedTimeEnd', started_time_end),
200 |             ('finishedTimeBegin', finished_time_begin),
201 |             ('finishedTimeEnd', finished_time_end),
202 |             ('applicationTypes', ','.join(application_types) if application_types else None),
203 |             ('applicationTags', ','.join(application_tags) if application_tags else None),
204 |             ('name', name),
205 |             ('deSelects', ','.join(de_selects) if de_selects else None)
206 |         )
207 | 
208 |         params = self.construct_parameters(loc_args)
209 | 
210 |         return self.request(path, params=params)
211 | 
212 |     def cluster_application_statistics(self, states=None,
213 |                                        application_types=None):
214 |         """
215 |         With the Application Statistics API, you can obtain a collection of
216 |         triples, each of which contains the application type, the application
217 |         state and the number of applications of this type and this state in
218 |         ResourceManager context.
219 | 
220 |         This method only works in Hadoop > 2.0.0
221 | 
222 |         :param List[str] states: states of the applications. If states is not
223 |             provided, the API will enumerate all application states and
224 |             return the counts of them.
225 |         :param List[str] application_types: types of the applications,
226 |             specified as a comma-separated list. If application_types is not
227 |             provided, the API will count the applications of any application
228 |             type. In this case, the response shows * to indicate any
229 |             application type. Note that we only support at most one
230 |             applicationType temporarily. Otherwise, users will expect
231 |             an BadRequestException.
232 |         :returns: API response object with JSON data
233 |         :rtype: :py:class:`yarn_api_client.base.Response`
234 |         """
235 |         path = '/ws/v1/cluster/appstatistics'
236 | 
237 |         validate_yarn_application_states(states)
238 | 
239 |         loc_args = (
240 |             ('states', ','.join(states) if states else None),
241 |             ('applicationTypes', ','.join(application_types) if application_types else None)
242 |         )
243 |         params = self.construct_parameters(loc_args)
244 | 
245 |         return self.request(path, params=params)
246 | 
247 |     def cluster_application(self, application_id):
248 |         """
249 |         An application resource contains information about a particular
250 |         application that was submitted to a cluster.
251 | 
252 |         :param str application_id: The application id
253 |         :returns: API response object with JSON data
254 |         :rtype: :py:class:`yarn_api_client.base.Response`
255 |         """
256 |         path = '/ws/v1/cluster/apps/{appid}'.format(appid=application_id)
257 | 
258 |         return self.request(path)
259 | 
260 |     def cluster_application_attempts(self, application_id):
261 |         """
262 |         With the application attempts API, you can obtain a collection of
263 |         resources that represent an application attempt.
264 | 
265 |         :param str application_id: The application id
266 |         :returns: API response object with JSON data
267 |         :rtype: :py:class:`yarn_api_client.base.Response`
268 |         """
269 |         path = '/ws/v1/cluster/apps/{appid}/appattempts'.format(
270 |             appid=application_id)
271 | 
272 |         return self.request(path)
273 | 
274 |     def cluster_application_attempt_info(self, application_id, attempt_id):
275 |         """
276 |         With the application attempts API, you can obtain an extended info about
277 |         an application attempt.
278 | 
279 |         :param str application_id: The application id
280 |         :param str attempt_id: The attempt id
281 |         :returns: API response object with JSON data
282 |         :rtype: :py:class:`yarn_api_client.base.Response`
283 |         """
284 |         path = '/ws/v1/cluster/apps/{appid}/appattempts/{attemptid}'.format(
285 |             appid=application_id, attemptid=attempt_id)
286 | 
287 |         return self.request(path)
288 | 
289 |     def cluster_application_attempt_containers(self, application_id, attempt_id):
290 |         """
291 |         With the application attempts API, you can obtain an information
292 |         about container related to an application attempt.
293 | 
294 |         :param str application_id: The application id
295 |         :param str attempt_id: The attempt id
296 |         :returns: API response object with JSON data
297 |         :rtype: :py:class:`yarn_api_client.base.Response`
298 |         """
299 |         path = '/ws/v1/cluster/apps/{appid}/appattempts/{attemptid}/containers'.format(
300 |             appid=application_id, attemptid=attempt_id)
301 | 
302 |         return self.request(path)
303 | 
304 |     def cluster_application_attempt_container_info(self, application_id, attempt_id, container_id):
305 |         """
306 |         With the application attempts API, you can obtain an information
307 |         about container related to an application attempt.
308 | 
309 |         :param str application_id: The application id
310 |         :param str attempt_id: The attempt id
311 |         :param str container_id: The container id
312 |         :returns: API response object with JSON data
313 |         :rtype: :py:class:`yarn_api_client.base.Response`
314 |         """
315 |         path = '/ws/v1/cluster/apps/{appid}/appattempts/{attemptid}/containers/{containerid}'.format(
316 |             appid=application_id, attemptid=attempt_id, containerid=container_id)
317 | 
318 |         return self.request(path)
319 | 
320 |     def cluster_application_state(self, application_id):
321 |         """
322 |         (This feature is currently in the alpha stage and may change in the
323 |         future)
324 | 
325 |         With the application state API, you can obtain the current
326 |         state of an application.
327 | 
328 |         :param str application_id: The application id
329 |         :returns: API response object with JSON data
330 |         :rtype: :py:class:`yarn_api_client.base.Response`
331 |         """
332 |         path = '/ws/v1/cluster/apps/{appid}/state'.format(
333 |             appid=application_id)
334 | 
335 |         return self.request(path)
336 | 
337 |     def cluster_application_kill(self, application_id):
338 |         """
339 |         (This feature is currently in the alpha stage and may change in the
340 |         future)
341 | 
342 |         With the application kill API, you can kill an application
343 |         that is not in FINISHED or FAILED state.
344 | 
345 |         :param str application_id: The application id
346 |         :returns: API response object with JSON data
347 |         :rtype: :py:class:`yarn_api_client.base.Response`
348 |         """
349 | 
350 |         data = {"state": "KILLED"}
351 |         path = '/ws/v1/cluster/apps/{appid}/state'.format(
352 |             appid=application_id)
353 | 
354 |         return self.request(path, 'PUT', json=data)
355 | 
356 |     def cluster_nodes(self, states=None):
357 |         """
358 |         With the Nodes API, you can obtain a collection of resources, each of
359 |         which represents a node.
360 | 
361 |         :param List[str] states: the states of the node, specified as a
362 |             comma-separated list valid values are: NEW, RUNNING, UNHEALTHY,
363 |             DECOMMISSIONING, DECOMMISSIONED, LOST, REBOOTED, SHUTDOWN
364 |         :returns: API response object with JSON data
365 |         :rtype: :py:class:`yarn_api_client.base.Response`
366 |         :raises yarn_api_client.errors.IllegalArgumentError: if `healthy`
367 |             incorrect
368 |         """
369 |         path = '/ws/v1/cluster/nodes'
370 | 
371 |         validate_yarn_application_states(states)
372 | 
373 |         loc_args = (
374 |             ('states', ','.join(states) if states else None),
375 |         )
376 |         params = self.construct_parameters(loc_args)
377 | 
378 |         return self.request(path, params=params)
379 | 
380 |     def cluster_node(self, node_id):
381 |         """
382 |         A node resource contains information about a node in the cluster.
383 | 
384 |         :param str node_id: The node id
385 |         :returns: API response object with JSON data
386 |         :rtype: :py:class:`yarn_api_client.base.Response`
387 |         """
388 |         path = '/ws/v1/cluster/nodes/{nodeid}'.format(nodeid=node_id)
389 | 
390 |         return self.request(path)
391 | 
392 |     def cluster_node_update_resource(self, node_id, data):
393 |         """
394 |         Update the total resources in a node.
395 | 
396 |         For data body definition refer to:
397 |         (https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Node_Update_Resource_API)
398 | 
399 |         :param dict data: resourceOption details
400 |         :returns: API response object with JSON data
401 |         :rtype: :py:class:`yarn_api_client.base.Response`
402 |         """
403 |         path = '/ws/v1/cluster/nodes/{nodeid}/resource'.format(nodeid=node_id)
404 | 
405 |         return self.request(path, 'POST', json=data)
406 | 
407 |     def cluster_submit_application(self, data):
408 |         """
409 |         (This feature is currently in the alpha stage and may change in the
410 |         future)
411 | 
412 |         With the New Application API, you can obtain an application-id which
413 |         can then be used as part of the Cluster Submit Applications API to
414 |         submit applications. The response also includes the maximum resource
415 |         capabilities available on the cluster.
416 | 
417 |         For data body definition refer to:
418 |         (https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Applications_API.28Submit_Application.29)
419 | 
420 |         :param dict data: Application details
421 |         :returns: API response object with JSON data
422 |         :rtype: :py:class:`yarn_api_client.base.Response`
423 |         """
424 |         path = '/ws/v1/cluster/apps'
425 | 
426 |         return self.request(path, 'POST', json=data)
427 | 
428 |     def cluster_new_application(self):
429 |         """
430 |         (This feature is currently in the alpha stage and may change in the
431 |         future)
432 | 
433 |         With the New Application API, you can obtain an application-id which
434 |         can then be used as part of the Cluster Submit Applications API to
435 |         submit applications. The response also includes the maximum resource
436 |         capabilities available on the cluster.
437 | 
438 |         :returns: API response object with JSON data
439 |         :rtype: :py:class:`yarn_api_client.base.Response`
440 |         """
441 |         path = '/ws/v1/cluster/apps/new-application'
442 | 
443 |         return self.request(path, 'POST')
444 | 
445 |     def cluster_get_application_queue(self, application_id):
446 |         """
447 |         (This feature is currently in the alpha stage and may change in the
448 |         future)
449 | 
450 |         With the application queue API, you can query the queue of a
451 |         submitted app
452 | 
453 |         :param str application_id: The application id
454 |         :returns: API response object with JSON data
455 |         :rtype: :py:class:`yarn_api_client.base.Response`
456 |         """
457 |         path = '/ws/v1/cluster/apps/{appid}/queue'.format(appid=application_id)
458 | 
459 |         return self.request(path)
460 | 
461 |     def cluster_change_application_queue(self, application_id, queue):
462 |         """
463 |         (This feature is currently in the alpha stage and may change in the
464 |         future)
465 | 
466 |         Move a running app to another queue using a PUT request specifying the
467 |         target queue.
468 | 
469 |         To perform the PUT operation, authentication has to be
470 |         setup for the RM web services. In addition, you must be authorized to
471 |         move the app. Currently you can only move the app if you’re using the
472 |         Capacity scheduler or the Fair scheduler.
473 | 
474 |         Please note that in order to move an app, you must have an
475 |         authentication filter setup for the HTTP interface. The functionality
476 |         requires that a username is set in the HttpServletRequest. If no filter
477 |         is setup, the response will be an “UNAUTHORIZED” response.
478 | 
479 |         :param str application_id: The application id
480 |         :param str queue: queue name
481 |         :returns: API response object with JSON data
482 |         :rtype: :py:class:`yarn_api_client.base.Response`
483 |         """
484 |         path = '/ws/v1/cluster/apps/{appid}/queue'.format(appid=application_id)
485 | 
486 |         return self.request(path, 'PUT', json={"queue": queue})
487 | 
488 |     def cluster_get_application_priority(self, application_id):
489 |         """
490 |         (This feature is currently in the alpha stage and may change in the
491 |         future)
492 | 
493 |         With the application priority API, you can query the priority of a
494 |         submitted app
495 | 
496 |         :param str application_id: The application id
497 |         :returns: API response object with JSON data
498 |         :rtype: :py:class:`yarn_api_client.base.Response`
499 |         """
500 |         path = '/ws/v1/cluster/apps/{appid}/priority'.format(appid=application_id)
501 | 
502 |         return self.request(path)
503 | 
504 |     def cluster_change_application_priority(self, application_id, priority):
505 |         """
506 |         (This feature is currently in the alpha stage and may change in the
507 |         future)
508 | 
509 |         Update priority of a running or accepted app using a PUT request
510 |         specifying the target priority.
511 | 
512 |         To perform the PUT operation, authentication has to be
513 |         setup for the RM web services. In addition, you must be authorized to
514 |         move the app. Currently you can only move the app if you’re using the
515 |         Capacity scheduler or the Fair scheduler.
516 | 
517 |         Please note that in order to move an app, you must have an
518 |         authentication filter setup for the HTTP interface. The functionality
519 |         requires that a username is set in the HttpServletRequest. If no filter
520 |         is setup, the response will be an “UNAUTHORIZED” response.
521 | 
522 |         :param str application_id: The application id
523 |         :param int priority: application priority
524 |         :returns: API response object with JSON data
525 |         :rtype: :py:class:`yarn_api_client.base.Response`
526 |         """
527 |         path = '/ws/v1/cluster/apps/{appid}/priority'.format(appid=application_id)
528 | 
529 |         return self.request(path, 'PUT', json={"priority": priority})
530 | 
531 |     def cluster_node_container_memory(self):
532 |         """
533 |         This endpoint allows clients to gather info on the maximum memory that
534 |         can be allocated per container in the cluster.
535 |         :returns: integer specifying the maximum memory that can be allocated in
536 |         a container in the cluster
537 |         """
538 | 
539 |         return _get_maximum_container_memory(CONF_DIR)
540 | 
541 |     def cluster_scheduler_queue(self, yarn_queue_name):
542 |         """
543 |         Given a queue name, this function tries to locate the given queue in
544 |         the object returned by scheduler endpoint.
545 | 
546 |         The queue can be present inside a multilevel structure. This solution
547 |         tries to locate the queue using breadth-first-search algorithm.
548 | 
549 |         :param str yarn_queue_name: case sensitive queue name
550 |         :return: queue, None if not found
551 |         :rtype: dict
552 |         """
553 |         scheduler = self.cluster_scheduler().data
554 |         scheduler_info = scheduler['scheduler']['schedulerInfo']
555 | 
556 |         bfs_deque = deque([scheduler_info])
557 |         while bfs_deque:
558 |             vertex = bfs_deque.popleft()
559 |             if vertex['queueName'] == yarn_queue_name:
560 |                 return vertex
561 |             elif 'queues' in vertex:
562 |                 for queue in vertex['queues']['queue']:
563 |                     bfs_deque.append(queue)
564 | 
565 |         return None
566 | 
567 |     def cluster_scheduler_queue_availability(self, candidate_partition, availability_threshold):
568 |         """
569 |         Checks whether the requested memory satisfies the available space of the queue
570 |         This solution takes into consideration the node label concept in cluster.
571 |         Following node labelling, the resources can be available in various partition.
572 |         Given the partition data it tells you if the used capacity of this partition is spilling
573 |         the threshold specified.
574 | 
575 |         :param str candidate_parition: node label partition (case sensitive)
576 |         :param float availability_threshold: value can range between 0 - 100 .
577 |         :return: Boolean
578 |         """
579 | 
580 |         if candidate_partition['absoluteUsedCapacity'] > availability_threshold:
581 |             return False
582 |         return True
583 | 
584 |     def cluster_queue_partition(self, candidate_queue, cluster_node_label):
585 |         """
586 |         A queue can be divided into multiple partitions having different node labels.
587 |         Given the candidate queue and parition node label, this extracts the partition
588 |         we are interested in.
589 | 
590 |         :param dict candidate_queue: queue dictionary
591 |         :param str cluster_node_label: case sensitive node label name
592 |         :return: partition, None if not Found.
593 |         :rtype: dict
594 |         """
595 |         for partition in candidate_queue['capacities']['queueCapacitiesByPartition']:
596 |             if partition['partitionName'] == cluster_node_label:
597 |                 return partition
598 |         return None
599 | 
600 |     def cluster_reservations(self, queue=None, reservation_id=None,
601 |                              start_time=None, end_time=None,
602 |                              include_resource_allocations=None):
603 |         """
604 |         The Cluster Reservation API can be used to list reservations. When listing reservations
605 |         the user must specify the constraints in terms of a queue, reservation-id, start time or
606 |         end time. The user must also specify whether or not to include the full resource allocations
607 |         of the reservations being listed. The resulting page returns a response containing
608 |         information related to the reservation such as the acceptance time, the user, the resource
609 |         allocations, the reservation-id, as well as the reservation definition.
610 | 
611 |         :param str queue: the queue name containing the reservations to be listed. if not set, this
612 |             value will default to “default”
613 |         :param str reservation_id: the reservation-id of the reservation which will be listed. If
614 |             this parameter is present, start-time and end-time will be ignored.
615 |         :param str start_time:  reservations that end after this start-time will be listed. If
616 |             unspecified or invalid, this will default to 0.
617 |         :param str end_time: reservations that start after this end-time will be listed. If
618 |             unspecified or invalid, this will default to Long.MaxValue.
619 |         :param str include_resource_allocations: true or false. If true, the resource allocations
620 |             of the reservation will be included in the response. If false, no resource allocations
621 |             will be included in the response. This will default to false.
622 |         :returns: API response object with JSON data
623 |         :rtype: :py:class:`yarn_api_client.base.Response`
624 |         """
625 |         path = '/ws/v1/cluster/reservation/list'
626 | 
627 |         loc_args = (
628 |             ('queue', queue),
629 |             ('reservation-id', reservation_id),
630 |             ('start-time', start_time),
631 |             ('end-time', end_time),
632 |             ('include-resource-allocations', include_resource_allocations)
633 |         )
634 | 
635 |         params = self.construct_parameters(loc_args)
636 | 
637 |         return self.request(path, params=params)
638 | 
639 |     def cluster_new_delegation_token(self, renewer):
640 |         """
641 |         (This feature is currently in the alpha stage and may change in the
642 |         future)
643 | 
644 |         API to create delegation token.
645 | 
646 |         All delegation token requests must be carried out on a Kerberos
647 |         authenticated connection(using SPNEGO). Carrying out operations on a non-kerberos
648 |         connection will result in a FORBIDDEN response. In case of renewing a token, only
649 |         the renewer specified when creating the token can renew the token. Other users(including
650 |         the owner) are forbidden from renewing tokens.
651 | 
652 |         :param str renewer: The user who is allowed to renew the delegation token
653 |         :returns: API response object with JSON data
654 |         :rtype: :py:class:`yarn_api_client.base.Response`
655 |         """
656 |         path = '/ws/v1/cluster/delegation-token'
657 | 
658 |         return self.request(path, 'POST', json={"renewer": renewer})
659 | 
660 |     def cluster_renew_delegation_token(self, delegation_token):
661 |         """
662 |         (This feature is currently in the alpha stage and may change in the
663 |         future)
664 | 
665 |         API to renew delegation token.
666 | 
667 |         All delegation token requests must be carried out on a Kerberos
668 |         authenticated connection(using SPNEGO). Carrying out operations on a non-kerberos
669 |         connection will result in a FORBIDDEN response. In case of renewing a token, only
670 |         the renewer specified when creating the token can renew the token. Other users(including
671 |         the owner) are forbidden from renewing tokens.
672 | 
673 |         :param str delegation_token: Delegation token
674 |         :returns: API response object with JSON data
675 |         :rtype: :py:class:`yarn_api_client.base.Response`
676 |         """
677 |         path = '/ws/v1/cluster/delegation-token/expiration'
678 | 
679 |         return self.request(path, 'POST', headers={
680 |             "Hadoop-YARN-RM-Delegation-Token": delegation_token
681 |         })
682 | 
683 |     def cluster_cancel_delegation_token(self, delegation_token):
684 |         """
685 |         (This feature is currently in the alpha stage and may change in the
686 |         future)
687 | 
688 |         API to cancel delegation token.
689 | 
690 |         All delegation token requests must be carried out on a Kerberos
691 |         authenticated connection(using SPNEGO). Carrying out operations on a non-kerberos
692 |         connection will result in a FORBIDDEN response.
693 | 
694 |         :param str delegation_token: Delegation token
695 |         :returns: API response object with JSON data
696 |         :rtype: :py:class:`yarn_api_client.base.Response`
697 |         """
698 |         path = '/ws/v1/cluster/delegation-token'
699 | 
700 |         return self.request(path, 'DELETE', headers={
701 |             "Hadoop-YARN-RM-Delegation-Token": delegation_token
702 |         })
703 | 
704 |     def cluster_new_reservation(self):
705 |         """
706 |         (This feature is currently in the alpha stage and may change in the
707 |         future)
708 | 
709 |         Use the New Reservation API, to obtain a reservation-id which can then be used as part of
710 |         the Cluster Reservation API Submit to submit reservations.
711 | 
712 |         :returns: API response object with JSON data
713 |         :rtype: :py:class:`yarn_api_client.base.Response`
714 |         """
715 |         path = '/ws/v1/cluster/reservation/new-reservation'
716 | 
717 |         return self.request(path, 'POST')
718 | 
719 |     def cluster_submit_reservation(self, data):
720 |         """
721 |         The Cluster Reservation API can be used to submit reservations. When submitting a
722 |         reservation the user specifies the constraints in terms of resources, and time that is
723 |         required. The resulting response is successful if the reservation can be made. If a
724 |         reservation-id is used to submit a reservation multiple times, the request will succeed
725 |         if the reservation definition is the same, but only one reservation will be created. If
726 |         the reservation definition is different, the server will respond with an error response.
727 |         When the reservation is made, the user can use the reservation-id used to submit the
728 |         reservation to get access to the resources by specifying it as part of Cluster Submit
729 |         Applications API.
730 | 
731 |         For data body definition refer to:
732 |         (https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Reservation_API_Submit)
733 | 
734 |         :param dict data: Reservation details
735 |         :returns: API response object with JSON data
736 |         :rtype: :py:class:`yarn_api_client.base.Response`
737 |         """
738 |         path = '/ws/v1/cluster/reservation/submit'
739 | 
740 |         return self.request(path, 'POST', json=data)
741 | 
742 |     def cluster_update_reservation(self, data):
743 |         """
744 |         The Cluster Reservation API Update can be used to update existing reservations.Update of a
745 |         Reservation works similarly to submit described above, but the user submits the
746 |         reservation-id of an existing reservation to be updated. The semantics is a try-and-swap,
747 |         successful operation will modify the existing reservation based on the requested update
748 |         parameter, while a failed execution will leave the existing reservation unchanged.
749 | 
750 |         For data body definition refer to:
751 |         (https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Reservation_API_Update)
752 | 
753 |         :param dict data: Reservation details
754 |         :returns: API response object with JSON data
755 |         :rtype: :py:class:`yarn_api_client.base.Response`
756 |         """
757 |         path = '/ws/v1/cluster/reservation/update'
758 | 
759 |         return self.request(path, 'POST', json=data)
760 | 
761 |     def cluster_delete_reservation(self, reservation_id):
762 |         """
763 |         The Cluster Reservation API Update can be used to update existing reservations.Update of a
764 |         Reservation works similarly to submit described above, but the user submits the
765 |         reservation-id of an existing reservation to be updated. The semantics is a try-and-swap,
766 |         successful operation will modify the existing reservation based on the requested update
767 |         parameter, while a failed execution will leave the existing reservation unchanged.
768 | 
769 |         :param str reservation_id: The id of the reservation to be deleted (the system automatically
770 |             looks up the right queue from this)
771 |         :returns: API response object with JSON data
772 |         :rtype: :py:class:`yarn_api_client.base.Response`
773 |         """
774 |         path = '/ws/v1/cluster/reservation/delete'
775 | 
776 |         return self.request(path, 'POST', json={'reservation-id': reservation_id})
777 | 
778 |     def cluster_application_timeouts(self, application_id):
779 |         """
780 |         Cluster Application Timeouts API can be used to get all configured timeouts of an
781 |         application. When you run a GET operation on this resource, a collection of timeout objects
782 |         is returned. Each timeout object is composed of a timeout type, expiry-time and remaining
783 |         time in seconds.
784 | 
785 |         :param str application_id: The application id
786 |         :returns: API response object with JSON data
787 |         :rtype: :py:class:`yarn_api_client.base.Response`
788 |         """
789 |         path = '/ws/v1/cluster/apps/{appid}/timeouts'.format(
790 |             appid=application_id)
791 | 
792 |         return self.request(path)
793 | 
794 |     def cluster_application_timeout(self, application_id, timeout_type):
795 |         """
796 |         The Cluster Application Timeout resource contains information about timeout.
797 | 
798 |         :param str application_id: The application id
799 |         :param str timeout_type: Timeout type. Valid values are the members of the
800 |             ApplicationTimeoutType enum. LIFETIME is currently the only valid value. .
801 |         :returns: API response object with JSON data
802 |         :rtype: :py:class:`yarn_api_client.base.Response`
803 |         """
804 |         path = '/ws/v1/cluster/apps/{appid}/timeouts/{timeout_type}'.format(
805 |             appid=application_id, timeout_type=timeout_type)
806 | 
807 |         return self.request(path)
808 | 
809 |     def cluster_update_application_timeout(self, application_id, timeout_type, expiry_time):
810 |         """
811 |         Update timeout of an application for given timeout type.
812 | 
813 |         :param str application_id: The application id
814 |         :param str timeout_type: Timeout type. Valid values are the members of the
815 |             ApplicationTimeoutType enum. LIFETIME is currently the only valid value.
816 |         :param str expiry_time: Time at which the application will expire in
817 |             ISO8601 yyyy-MM-dd’T’HH:mm:ss.SSSZ format.
818 |         :returns: API response object with JSON data
819 |         :rtype: :py:class:`yarn_api_client.base.Response`
820 |         """
821 |         path = '/ws/v1/cluster/apps/{appid}/timeout'.format(appid=application_id)
822 | 
823 |         return self.request(path, 'PUT', json={
824 |             "timeout": {"type": timeout_type, "expiryTime": expiry_time}
825 |         })
826 | 
827 |     def cluster_scheduler_conf_mutation(self):
828 |         """
829 |         (This feature is currently in the alpha stage and may change in the
830 |         future)
831 | 
832 |         API to retrieve the scheduler’s configuration that is currently loaded into
833 |         scheduler’s context.
834 | 
835 |         :returns: API response object with JSON data
836 |         :rtype: :py:class:`yarn_api_client.base.Response`
837 |         """
838 |         path = '/ws/v1/cluster/scheduler-conf'
839 | 
840 |         return self.request(path)
841 | 
842 |     def cluster_modify_scheduler_conf_mutation(self, data):
843 |         """
844 |         (This feature is currently in the alpha stage and may change in the
845 |         future)
846 | 
847 |         API to modify the scheduler configuration
848 | 
849 |         For data body definition refer to:
850 |         (https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Scheduler_Configuration_Mutation_API)
851 | 
852 |         :param dict data: sched-conf dictionary object
853 |         :returns: API response object with JSON data
854 |         :rtype: :py:class:`yarn_api_client.base.Response`
855 |         """
856 |         path = '/ws/v1/cluster/scheduler-conf'
857 | 
858 |         return self.request(path, 'PUT', json=data)
859 | 
860 |     def cluster_container_signal(self, container_id, command):
861 |         """
862 |         With the Container Signal API, you can send a signal to a specified container
863 |         with one of the following commands:
864 |         OUTPUT_THREAD_DUMP, GRACEFUL_SHUTDOWN and FORCEFUL_SHUTDOWN.
865 | 
866 |         :param str container_id: container id
867 |         :param str command: signal command
868 |         :returns: API response object with JSON data
869 |         :rtype: :py:class:`yarn_api_client.base.Response`
870 |         """
871 | 
872 |         validate_cluster_container_status(command, True)
873 | 
874 |         path = '/ws/v1/cluster/containers/{containerid}/signal/{command}'.format(
875 |             containerid=container_id,
876 |             command=command
877 |         )
878 | 
879 |         return self.request(path, 'POST')
880 | 
881 |     def scheduler_activities(self, node_id=None, group_by=None):
882 |         """
883 |         The scheduler activities RESTful API is available if you are using capacity scheduler
884 |         and can fetch scheduler activities info recorded in a scheduling cycle.
885 | 
886 |         The API returns a message that includes important scheduling activities info which
887 |         has a hierarchical layout with following fields:
888 | 
889 |         * Activities - Activities is the root object of scheduler activities.
890 |         * Allocations - Allocations are allocation attempts based on partition or reservation.
891 |         * Hierarchical Queues - Hierarchical Queues where the scheduler have been tried to allocate
892 |         containers to, each of them contains queue name, allocation state, optional diagnostic and
893 |         optional children.
894 |         * Applications - Applications are shown as children of leaf queue, each of them contains the
895 |         basic info about the application.
896 |         * Requests - Requests are shown as children of application, each of them contains the basic
897 |         info about the request.
898 |         * Nodes - Nodes are shown as children of request, each of them contains node id, allocation
899 |         state, optional name which should appear after allocating or reserving a container on the
900 |         node, and optional diagnostic which should present if failed to allocate or reserve a
901 |         container on this node. For aggregated nodes grouped by allocation state and diagnostic,
902 |         each of them contains allocation state, aggregated node IDs and optional diagnostic.
903 | 
904 |         :param str node_id: specified node ID, if not specified, the scheduler will record the
905 |         scheduling activities info for the next scheduling cycle on all nodes.
906 |         :param str group_by: aggregation type of application activities, currently only support
907 |         “diagnostic” with which user can query aggregated activities grouped by allocation
908 |         state and diagnostic
909 |         :returns: API response object with JSON data
910 |         :rtype: :py:class:`yarn_api_client.base.Response`
911 |         """
912 |         path = '/ws/v1/cluster/scheduler/activities'
913 | 
914 |         loc_args = (
915 |             ('nodeId', node_id),
916 |             ('groupBy', group_by)
917 |         )
918 | 
919 |         params = self.construct_parameters(loc_args)
920 | 
921 |         return self.request(path, params=params)
922 | 
923 |     def application_activities(self, application_id, max_time=None,
924 |                                       request_priorities=None,
925 |                                       allocation_request_ids=None, group_by=None,
926 |                                       limit=None, actions=None, summarize=None):
927 |         """
928 |         Application activities RESTful API is available if you are using capacity scheduler and can
929 |         fetch useful scheduling info for a specified application, the response has a hierarchical
930 |         layout with following fields:
931 | 
932 |         * AppActivities - AppActivities are root element of application activities within basic
933 |         information.
934 |         * Allocations - Allocations are allocation attempts at app level queried from the cache.
935 |         * Requests - Requests are shown as children of allocation, each of them contains request
936 |         name, request priority, allocation request id, allocation state and optional children.
937 |         * Nodes - Nodes are shown as children of request, each of them contains node id, allocation
938 |         state, optional name which should appear after allocating or reserving a container on the
939 |         node, and optional diagnostic which should appear if failed to allocate or reserve a
940 |         container on the node. For aggregated nodes grouped by allocation state and diagnostic, each
941 |         of them contains allocation state, aggregated node IDs and optional diagnostic.
942 | 
943 |         :param int maxTime: the max duration in seconds from now on for recording application
944 |         activities. If not specified, this will default to 3 (seconds).
945 |         :param List[int] requestPriorities: the priorities of request, used to filter application
946 |         activities
947 |         :param List[int] allocationRequestIds: the allocation request IDs of request, used to filter
948 |         application activities
949 |         :param str groupBy: the aggregation type of application activities, currently only
950 |         support “diagnostic” with which user can query aggregated activities grouped by
951 |         allocation state and diagnostic.
952 |         :param str limit: the limit of application activities which can reduce the cost for both
953 |         server and client side.
954 |         :param List[str] actions: the required actions of app activities including “refresh” and
955 |         “get”
956 |         :param boolean summarize: whether app activities in multiple scheduling processes need to be
957 |         summarized, specified as boolean, it’s useful when multi-node placement disabled, because
958 |         only one node can be considered in a single scheduling process, enabling this can give us a
959 |         summary with diagnostics on all nodes.
960 |         :returns: API response object with JSON data
961 |         :rtype: :py:class:`yarn_api_client.base.Response`
962 |         """
963 |         path = '/ws/v1/cluster/scheduler/app-activities/{appid}'.format(appid=application_id)
964 | 
965 |         loc_args = (
966 |             ('maxTime', max_time),
967 |             ('requestPriorities', ','.join(request_priorities) if request_priorities else None),
968 |             ('allocationRequestIds', ','.join(allocation_request_ids) if allocation_request_ids else None),
969 |             ('groupBy', group_by),
970 |             ('limit', limit),
971 |             ('actions', ','.join(actions) if actions else None),
972 |             ('summarize', summarize)
973 |         )
974 | 
975 |         params = self.construct_parameters(loc_args)
976 | 
977 |         return self.request(path, params=params)
978 | 


--------------------------------------------------------------------------------