├── kale
    ├── tests
    │   ├── __init__.py
    │   ├── test_queue_config.yaml
    │   ├── test_settings.py
    │   ├── test_consumer.py
    │   ├── test_utils.py
    │   ├── test_sqs.py
    │   ├── test_message.py
    │   ├── test_queue_info.py
    │   ├── test_crypt.py
    │   ├── test_queue_selector.py
    │   ├── test_publisher.py
    │   └── test_task.py
    ├── scripts
    │   ├── __init__.py
    │   ├── README.md
    │   ├── benchmark_settings.py
    │   ├── sample_queue_config.yaml
    │   ├── benchmark_queue_info.py
    │   └── benchmark_queue_selector.py
    ├── version.py
    ├── utils.py
    ├── timeout.py
    ├── exceptions.py
    ├── default_settings.py
    ├── __init__.py
    ├── test_utils.py
    ├── consumer.py
    ├── publisher.py
    ├── sqs.py
    ├── queue_selector.py
    ├── crypt.py
    ├── queue_info.py
    ├── message.py
    ├── task.py
    └── worker.py
├── example
    ├── taskworker
    │   ├── __init__.py
    │   ├── queue_config.yaml
    │   ├── settings.py
    │   └── tasks.py
    ├── __init__.py
    ├── run_worker.sh
    ├── run_elasticmq.sh
    ├── run_publisher.sh
    ├── worker.py
    ├── publisher.py
    └── README.md
├── setup.cfg
├── docs
    ├── modules.rst
    ├── index.rst
    ├── example.rst
    ├── example.taskworker.rst
    ├── kale.rst
    ├── Makefile
    ├── make.bat
    └── conf.py
├── .gitignore
├── .travis.yml
├── LICENSE.txt
├── setup.py
└── README.md


/kale/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/kale/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/example/taskworker/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/example/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'wenbin'
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 
4 | [bdist_wheel]
5 | universal=1
6 | 


--------------------------------------------------------------------------------
/docs/modules.rst:
--------------------------------------------------------------------------------
1 | .
2 | =
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    example
8 |    kale
9 | 


--------------------------------------------------------------------------------
/example/run_worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export KALE_SETTINGS_MODULE=taskworker.settings
3 | python worker.py
4 | 


--------------------------------------------------------------------------------
/kale/scripts/README.md:
--------------------------------------------------------------------------------
1 | # Scripts to assist development
2 | 
3 | Scripts under this folder are just to assist development.


--------------------------------------------------------------------------------
/kale/scripts/benchmark_settings.py:
--------------------------------------------------------------------------------
1 | AWS_ACCESS_KEY_ID = 'x'
2 | AWS_SECRET_ACCESS_KEY = 'x'
3 | AWS_REGION = 'us-west-2'
4 | 


--------------------------------------------------------------------------------
/example/run_elasticmq.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | sudo docker pull pakohan/elasticmq
4 | sudo docker run -p 9324:9324 pakohan/elasticmq
5 | 


--------------------------------------------------------------------------------
/example/run_publisher.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | INPUT=${1:-10}
4 | export KALE_SETTINGS_MODULE=taskworker.settings
5 | python publisher.py -n $INPUT
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /ndkale.egg-info/
 2 | /build/
 3 | /dist/
 4 | /.venv/
 5 | /.wheelhouse/
 6 | /.idea
 7 | /docs/_build
 8 | /.eggs/
 9 | *.egg
10 | *.pyc
11 | 


--------------------------------------------------------------------------------
/kale/tests/test_queue_config.yaml:
--------------------------------------------------------------------------------
1 | default:
2 |   name: default
3 |   priority: 75
4 |   batch_size: 10
5 |   visibility_timeout_sec: 600
6 |   long_poll_time_sec: 5
7 |   num_iterations: 5
8 | 


--------------------------------------------------------------------------------
/example/worker.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | 
 4 | from kale import worker
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 |     print('Task worker is running ...')
 9 |     worker.Worker().run()
10 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. kale documentation master file, created by
 2 |    sphinx-quickstart on Wed Feb  4 15:15:06 2015.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to kale's documentation!
 7 | ================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    kale
15 | 
16 | 
17 | Indices and tables
18 | ==================
19 | 
20 | * :ref:`genindex`
21 | * :ref:`modindex`
22 | * :ref:`search`
23 | 
24 | 


--------------------------------------------------------------------------------
/example/publisher.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | 
 4 | import argparse
 5 | 
 6 | from taskworker import tasks
 7 | 
 8 | 
 9 | if __name__ == '__main__':
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument('n', type=int, default=10,
12 |                         help='The input of fibonacci task. Default: 10')
13 |     args = parser.parse_args()
14 | 
15 |     tasks.FibonacciTask.publish(None, args.n)
16 |     print('A FibonacciTask is scheduled to run. With input %d.' % args.n)
17 | 


--------------------------------------------------------------------------------
/kale/tests/test_settings.py:
--------------------------------------------------------------------------------
 1 | """Module for kale settings for unit tests."""
 2 | from __future__ import absolute_import
 3 | 
 4 | import os
 5 | 
 6 | QUEUE_CONFIG = os.path.join(os.path.split(os.path.abspath(__file__))[0],
 7 |                             'test_queue_config.yaml')
 8 | QUEUE_CLASS = 'kale.test_utils.TestQueueClass'
 9 | QUEUE_SELECTOR = 'kale.test_utils.TestQueueSelector'
10 | AWS_REGION = 'us-east-1'
11 | 
12 | 
13 | def queue_name_to_tags(queue_name):
14 |     return {'is_dlq': 'dlq' in queue_name}
15 | 
16 | 
17 | SQS_QUEUE_TAG_FUNCTION = queue_name_to_tags
18 | 


--------------------------------------------------------------------------------
/example/README.md:
--------------------------------------------------------------------------------
 1 | # Example implementation of task worker
 2 | 
 3 | ## How to run?
 4 | 
 5 | ### Prerequisites
 6 | 
 7 | You must have docker installed. We need docker to run elasticmq container, 
 8 | where elasticmq is to emulate Amazon SQS on your local machine.
 9 | 
10 | ### Run ElasticMQ
11 | 
12 |     ./run_elasticmq.sh
13 | 
14 | ### Run Worker process
15 | 
16 |     # You can run multiple such processes
17 |     ./run_worker.sh
18 | 
19 | ### Publish a task
20 | 
21 |     # The example task is to calculate fibonacci number
22 |     # The script takes an integer argument $n as input,
23 |     # so at the worker side, it'll calculate fibonacci($n)
24 |     ./run_publisher.sh 7
25 | 
26 | 


--------------------------------------------------------------------------------
/docs/example.rst:
--------------------------------------------------------------------------------
 1 | example package
 2 | ===============
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     example.taskworker
10 | 
11 | Submodules
12 | ----------
13 | 
14 | example.publisher module
15 | ------------------------
16 | 
17 | .. automodule:: example.publisher
18 |     :members:
19 |     :undoc-members:
20 |     :show-inheritance:
21 | 
22 | example.worker module
23 | ---------------------
24 | 
25 | .. automodule:: example.worker
26 |     :members:
27 |     :undoc-members:
28 |     :show-inheritance:
29 | 
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: example
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/kale/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Nextdoor.com, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __version__ = '2.2.4'  # http://semver.org/
16 | 


--------------------------------------------------------------------------------
/docs/example.taskworker.rst:
--------------------------------------------------------------------------------
 1 | example.taskworker package
 2 | ==========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | example.taskworker.settings module
 8 | ----------------------------------
 9 | 
10 | .. automodule:: example.taskworker.settings
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | example.taskworker.tasks module
16 | -------------------------------
17 | 
18 | .. automodule:: example.taskworker.tasks
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: example.taskworker
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/example/taskworker/queue_config.yaml:
--------------------------------------------------------------------------------
 1 | # max tasks per queue select: 10 (batch size) * 10 (iterations) = 100
 2 | # task SLA: 60/10 = 6 seconds
 3 | high_priority:
 4 |   name: high_priority
 5 |   priority: 100
 6 |   batch_size: 10
 7 |   visibility_timeout_sec: 60
 8 |   long_poll_time_sec: 1
 9 |   num_iterations: 10
10 | 
11 | # max tasks per queue select: 10 (batch size) * 5 (iterations) = 50
12 | # task SLA: 60 / 10 = 6 seconds
13 | default:
14 |   name: default
15 |   priority: 40
16 |   batch_size: 10
17 |   visibility_timeout_sec: 60
18 |   long_poll_time_sec: 1
19 |   num_iterations: 5
20 | 
21 | # max tasks per queue select: 10 (batch size) * 5 (iterations) = 50
22 | # task SLA: 60 / 10 = 6 seconds
23 | low_priority:
24 |   name: low_priority
25 |   priority: 5
26 |   batch_size: 10
27 |   visibility_timeout_sec: 60
28 |   long_poll_time_sec: 5
29 |   num_iterations: 5
30 | 


--------------------------------------------------------------------------------
/kale/scripts/sample_queue_config.yaml:
--------------------------------------------------------------------------------
 1 | default:
 2 |   name: default
 3 |   priority: 75
 4 |   batch_size: 10
 5 |   visibility_timeout_sec: 600
 6 |   long_poll_time_sec: 5
 7 |   num_iterations: 5
 8 | 
 9 | large:
10 |   name: large
11 |   priority: 30
12 |   batch_size: 1
13 |   visibility_timeout_sec: 600
14 |   long_poll_time_sec: 1
15 |   num_iterations: 2
16 | 
17 | digest:
18 |   name: digest
19 |   priority: 65
20 |   batch_size: 10
21 |   visibility_timeout_sec: 120
22 |   long_poll_time_sec: 2
23 |   num_iterations: 2
24 | 
25 | low_priority:
26 |   name: low_priority
27 |   priority: 5
28 |   batch_size: 10
29 |   visibility_timeout_sec: 120
30 |   long_poll_time_sec: 1
31 |   num_iterations: 1
32 | 
33 | high_priority:
34 |   name: high_priority
35 |   priority: 100
36 |   batch_size: 10
37 |   visibility_timeout_sec: 600
38 |   long_poll_time_sec: 1
39 |   num_iterations: 10
40 | 


--------------------------------------------------------------------------------
/kale/tests/test_consumer.py:
--------------------------------------------------------------------------------
 1 | """Module testing the kale.consumer module."""
 2 | from __future__ import absolute_import
 3 | 
 4 | import unittest
 5 | 
 6 | from moto import mock_sqs
 7 | 
 8 | from kale import consumer
 9 | from kale import settings
10 | from kale import sqs
11 | 
12 | 
13 | class ConsumerTestCase(unittest.TestCase):
14 |     """Test consumer logic."""
15 | 
16 |     _previous_region = None
17 | 
18 |     def setUp(self):
19 |         self.mock_sqs = mock_sqs()
20 |         self.mock_sqs.start()
21 |         sqs.SQSTalk._queues = {}
22 | 
23 |     def tearDown(self):
24 |         self.mock_sqs.stop()
25 | 
26 |     def test_fetch_batch(self):
27 |         c = consumer.Consumer()
28 | 
29 |         self.assertIsNotNone(c.fetch_batch(
30 |             settings.QUEUE_CLASS, 10, 60))
31 |         self.assertIsNotNone(c.fetch_batch(
32 |             settings.QUEUE_CLASS, 10, 60, 2))
33 | 


--------------------------------------------------------------------------------
/kale/utils.py:
--------------------------------------------------------------------------------
 1 | """Module containing utility functions for kale."""
 2 | import resource
 3 | import sys
 4 | 
 5 | 
 6 | def class_import_from_path(path_to_class):
 7 |     """Import a class from a path string.
 8 | 
 9 |     :param str path_to_class: class path, e.g., kale.consumer.Consumer
10 |     :return: class object
11 |     :rtype: class
12 |     """
13 | 
14 |     components = path_to_class.split('.')
15 |     module = __import__('.'.join(components[:-1]))
16 |     for comp in components[1:-1]:
17 |         module = getattr(module, comp)
18 |     return getattr(module, components[-1])
19 | 
20 | 
21 | def ru_maxrss_mb():
22 |     """Gets memory residence set size in megabytes.
23 | 
24 |     Returns:
25 |         Integer, megabytes.
26 |     """
27 |     resource_data = resource.getrusage(resource.RUSAGE_SELF)
28 |     if sys.platform == 'darwin':
29 |         return resource_data.ru_maxrss / (1024 * 1024)
30 |     else:
31 |         return resource_data.ru_maxrss / 1024
32 | 


--------------------------------------------------------------------------------
/example/taskworker/settings.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import os
 4 | 
 5 | #
 6 | # Production settings
 7 | #
 8 | 
 9 | # MESSAGE_QUEUE_USE_PROXY = False
10 | # AWS_ACCESS_KEY_ID = 'KEY_ID'
11 | # AWS_SECRET_ACCESS_KEY = 'ACCESS_KEY'
12 | # AWS_REGION = 'us-west-2'
13 | 
14 | #
15 | # Development settings
16 | #
17 | 
18 | # Using elasticmq to emulate SQS locally
19 | MESSAGE_QUEUE_USE_PROXY = True
20 | MESSAGE_QUEUE_PROXY_PORT = 9324
21 | MESSAGE_QUEUE_PROXY_HOST = os.getenv('MESSAGE_QUEUE_PROXY_HOST', '0.0.0.0')
22 | AWS_ACCESS_KEY_ID = 'x'
23 | AWS_SECRET_ACCESS_KEY = 'x'
24 | 
25 | # Queue config file path
26 | QUEUE_CONFIG = 'taskworker/queue_config.yaml'
27 | 
28 | # SQS limits per message size, bytes
29 | # It can be set anywhere from 1024 bytes (1KB), up to 262144 bytes (256KB).
30 | # See http://aws.amazon.com/sqs/faqs/
31 | SQS_TASK_SIZE_LIMIT = 256000
32 | 
33 | # The class for queue selction algorithm
34 | QUEUE_SELECTOR = 'kale.queue_selector.ReducedLottery'
35 | 


--------------------------------------------------------------------------------
/example/taskworker/tasks.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | 
 4 | import logging
 5 | 
 6 | from kale import task
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class FibonacciTask(task.Task):
12 | 
13 |     # How many times should taskworker retry if it fails.
14 |     # If this task shouldn't be retried, set it to None
15 |     max_retries = 3
16 | 
17 |     # The hard limit for max task running time.
18 |     # This value should be set between max actual running time and
19 |     # queue visibility timeout.
20 |     time_limit = 5  # seconds
21 | 
22 |     # The queue name
23 |     queue = 'default'
24 | 
25 |     @staticmethod
26 |     def fibonacci(n):
27 |         if n == 0:
28 |             return 0
29 |         elif n == 1:
30 |             return 1
31 |         else:
32 |             return FibonacciTask.fibonacci(n - 1) + FibonacciTask.fibonacci(n - 2)
33 | 
34 |     def run_task(self, n, *args, **kwargs):
35 |         print('fibonacci(%d) = %d' % (n, self.fibonacci(n)))
36 | 


--------------------------------------------------------------------------------
/kale/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | """Module testing the kale.utils module."""
 2 | from __future__ import absolute_import
 3 | import resource
 4 | import sys
 5 | 
 6 | import mock
 7 | import unittest
 8 | 
 9 | from kale import utils
10 | 
11 | 
12 | class TestRuMaxrssMb(unittest.TestCase):
13 | 
14 |     def test_linux(self):
15 |         with mock.patch.object(sys, 'platform', 'posix'), \
16 |             mock.patch.object(
17 |                 resource, 'getrusage', return_value=mock.Mock(ru_maxrss=2048)):
18 |             self.assertEqual(utils.ru_maxrss_mb(), 2)
19 | 
20 |     def test_osx(self):
21 |         with mock.patch.object(sys, 'platform', 'darwin'), \
22 |             mock.patch.object(
23 |                 resource, 'getrusage', return_value=mock.Mock(ru_maxrss=1024 * 1024)):
24 |             self.assertEqual(utils.ru_maxrss_mb(), 1)
25 | 
26 |     def test_other(self):
27 |         with mock.patch.object(sys, 'platform', ''), \
28 |             mock.patch.object(
29 |                 resource, 'getrusage', return_value=mock.Mock(ru_maxrss=2048)):
30 |             self.assertEqual(utils.ru_maxrss_mb(), 2)
31 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | env:
 3 |   global:
 4 |     # pypi api token split in 2 because of travisci issue https://travis-ci.community/t/travis-encrypt-data-too-large-for-pypi-tokens-with-older-repos/5792/5
 5 |     secure: "CFbLgwgs4M0yROBMl0vrUF3ELkR85CRQm1s9IzOAroHFxLqBPHsm7ofVzmS+1pYqTTJunJiVUD0655AdnlbyO9x8T0exlcDowlwgTnAMozSlJo72B1mjdqZW26+VIgGcPiZiB3iRBJcOiONOPKo04SzS4EG8jdP+JBc2IwS6VtQ="
 6 |     secure: "j8x37uSIozGhdCEyWXonOIK9MDalFwxfhavUQAnEh4mtgfGshntqs7JkE0gemt+HPAd3jigNQijm/a+8IOjhfuGZPxd/+vhyL7/fT16F3TAOL2o2N+Vz9m9ybxKuO+JFgv4QJOCZ5KGumz2sVfogFiT0Nr4+ow/XU8nhVAAGnSY="
 7 | python:
 8 | - '2.7'
 9 | - '3.5'
10 | - '3.6'
11 | - '3.7'
12 | before_install:
13 | - export BOTO_CONFIG=/dev/null
14 | install:
15 | - python setup.py install
16 | - pip freeze
17 | before_script:
18 | - pip install flake8 && flake8 kale example --max-line-length 100 --ignore=E731,E402
19 | script:
20 | - python setup.py test
21 | deploy:
22 |   provider: pypi
23 |   user: __token__
24 |   password:
25 |     secure: $TOKEN_1$TOKEN_2
26 |   on:
27 |     tags: true
28 |     all_branches: true
29 |     condition: "$TRAVIS_TAG =~ ^version-[0-9]+.[0-9]+.[0-9]+[a-z]?$"
30 |     distributions: sdist bdist_wheel
31 | 


--------------------------------------------------------------------------------
/kale/timeout.py:
--------------------------------------------------------------------------------
 1 | """Module containing timeout methods for monitoring tasks.
 2 | 
 3 | Modeled after:
 4 | http://stackoverflow.com/a/601168/854976
 5 | """
 6 | from __future__ import absolute_import
 7 | 
 8 | import errno
 9 | import os
10 | import signal
11 | from contextlib import contextmanager
12 | 
13 | from kale import exceptions
14 | 
15 | 
16 | @contextmanager
17 | def time_limit(seconds, error_message=os.strerror(errno.ETIME)):
18 |     """Context manager for handling method timeouts.
19 | 
20 |     Usage:
21 |         try:
22 |             with time_limit(10):
23 |                 some_function()
24 |         except TimeoutException:
25 |             # error handling here
26 | 
27 |     :param int seconds: seconds before timeout.
28 |     :param str error_message: error message.
29 |     :raises: kale.exceptions.TimeoutException
30 |     """
31 | 
32 |     def _handle_timeout(signum, frame):
33 |         """Handle timout signal."""
34 |         raise exceptions.TimeoutException(error_message)
35 | 
36 |     original_handler = signal.signal(signal.SIGALRM, _handle_timeout)
37 |     signal.alarm(seconds)
38 |     try:
39 |         yield
40 |     finally:
41 |         # Reset original handler.
42 |         signal.signal(signal.SIGALRM, original_handler)
43 |         # Remove signal.
44 |         signal.alarm(0)
45 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Nextdoor, Inc.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification, 
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, 
 8 | this list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice, 
11 | this list of conditions and the following disclaimer in the documentation and/or
12 | other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
15 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 
17 | SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
18 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
19 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
20 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
21 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
22 | THE POSSIBILITY OF SUCH DAMAGE.
23 | 


--------------------------------------------------------------------------------
/kale/exceptions.py:
--------------------------------------------------------------------------------
 1 | """Exceptions raised by the kale package."""
 2 | 
 3 | 
 4 | class ImproperlyConfiguredException(Exception):
 5 |     """Exception raised when Kale is improperly configured."""
 6 | 
 7 | 
 8 | class TaskException(Exception):
 9 |     """Base class for task exceptions."""
10 | 
11 | 
12 | class ChubbyTaskException(TaskException):
13 |     """Exception raised when a task is too chubby for SQS."""
14 | 
15 | 
16 | class TimeoutException(TaskException):
17 |     """Exception raised when a task exceeds its time limit."""
18 |     pass
19 | 
20 | 
21 | class InvalidTimeLimitTaskException(TaskException):
22 |     """Exception raised when a task's time_limit exceeds its queue's
23 |        visibility timeout.
24 |     """
25 | 
26 | 
27 | class InvalidTaskDelayException(TaskException):
28 |     """Exception raised when a task is published with an invalid delay time."""
29 | 
30 | 
31 | class BlacklistedException(TaskException):
32 |     """Exception raised when a task has been blacklisted."""
33 | 
34 | 
35 | class QueueException(Exception):
36 |     """Base class for queue exceptions."""
37 | 
38 | 
39 | class SendMessagesException(QueueException):
40 |     """Exception raised when a queue returns a non-zero number
41 |     of failures on send.
42 |     """
43 |     def __init__(self, msg_count):
44 |         super().__init__("{} messages failed to be delivered to SQS".format(msg_count))
45 | 
46 | 
47 | class DeleteMessagesException(QueueException):
48 |     """Exception raised when a queue returns a non-zero number
49 |     of failures on delete.
50 |     """
51 |     def __init__(self, msg_count):
52 |         super().__init__("{} messages failed to be deleted".format(msg_count))
53 | 
54 | 
55 | class ChangeMessagesVisibilityException(QueueException):
56 |     """Exception raised when a queue returns a non-zero number
57 |     of failures on change message visibility.
58 |     """
59 |     def __init__(self, msg_count):
60 |         super().__init__("{} messages failed to change visibility in SQS".format(msg_count))
61 | 


--------------------------------------------------------------------------------
/kale/tests/test_sqs.py:
--------------------------------------------------------------------------------
 1 | """Module testing the kale.sqs module."""
 2 | from __future__ import absolute_import
 3 | 
 4 | import unittest
 5 | 
 6 | import boto3
 7 | import mock
 8 | from moto import mock_sqs
 9 | 
10 | from kale import exceptions
11 | from kale import sqs
12 | 
13 | 
14 | class SQSTestCase(unittest.TestCase):
15 |     """Test SQSTalk logic"""
16 | 
17 |     _previous_region = None
18 | 
19 |     def setUp(self):
20 |         self.mock_sqs = mock_sqs()
21 |         self.mock_sqs.start()
22 |         sqs.SQSTalk._queues = {}
23 | 
24 |     def tearDown(self):
25 |         self.mock_sqs.stop()
26 | 
27 |     def test_create_queue(self):
28 |         boto3.setup_default_session()
29 | 
30 |         sqs_inst = sqs.SQSTalk()
31 | 
32 |         sqs_inst._get_or_create_queue('LowPriorityTest1')
33 |         sqs_inst._get_or_create_queue('HighPriorityTest2')
34 |         sqs_inst._get_or_create_queue('HighPriorityTest2-dlq')
35 | 
36 |         expected_low_queue = sqs_inst._sqs.Queue('https://queue.amazonaws.com/123456789012/'
37 |                                                  'LowPriorityTest1')
38 |         expected_hi_queue = sqs_inst._sqs.Queue('https://queue.amazonaws.com/123456789012/'
39 |                                                 'HighPriorityTest2')
40 | 
41 |         expected_hi_dlq_queue = sqs_inst._sqs.Queue('https://queue.amazonaws.com/123456789012/'
42 |                                                 'HighPriorityTest2-dlq')
43 | 
44 | 
45 |         self.assertEqual(len(sqs_inst._queues), 3)
46 | 
47 |         self.assertEqual(expected_low_queue, sqs_inst._queues['LowPriorityTest1'])
48 |         self.assertEqual(expected_hi_queue, sqs_inst._queues['HighPriorityTest2'])
49 | 
50 |     def test_get_queues(self):
51 |         boto3.setup_default_session()
52 |         sqs_inst = sqs.SQSTalk()
53 | 
54 |         sqs_inst._get_or_create_queue('LowPriorityTest3')
55 |         sqs_inst._get_or_create_queue('HighPriorityTest4')
56 | 
57 |         queues = sqs_inst.get_all_queues()
58 |         print(queues)
59 |         self.assertEqual(len(queues), 2)
60 | 
61 |         queues = sqs_inst.get_all_queues('Low')
62 |         self.assertEqual(len(queues), 1)
63 | 
64 |         sqs_inst.get_all_queues('High')
65 |         self.assertEqual(len(queues), 1)
66 | 
67 |     def test_get_improperly_configured(self):
68 |         with mock.patch('kale.sqs.settings') as mock_settings:
69 |             mock_settings.PROPERLY_CONFIGURED = False
70 |             with self.assertRaises(exceptions.ImproperlyConfiguredException):
71 |                 sqs.SQSTalk()
72 | 


--------------------------------------------------------------------------------
/kale/default_settings.py:
--------------------------------------------------------------------------------
 1 | """Default settings for kale."""
 2 | from __future__ import absolute_import
 3 | 
 4 | import os
 5 | import pickle
 6 | import platform
 7 | import time
 8 | import zlib
 9 | 
10 | import six
11 | 
12 | # The default settings are inadequate for actual use
13 | # due to the need for a queue. When the settings are
14 | # overridden this will be set to True.
15 | # Note: You are not supposed to overwrite this property.
16 | PROPERLY_CONFIGURED = False
17 | 
18 | # Optional functions to be called when the worker is
19 | # started and stopped.
20 | ON_WORKER_STARTUP = lambda: None
21 | ON_WORKER_SHUTDOWN = lambda: None
22 | 
23 | # Note: This makes it possible to move modules and remain
24 | # backwards compatible.
25 | TASK_MAPPER = {}
26 | 
27 | # The function to (de)compress a message string, which takes a string as input.
28 | COMPRESSOR = zlib.compress
29 | DECOMPRESSOR = zlib.decompress
30 | 
31 | # Set to a specific endpoint url to change how the Boto3 client will do the connection.
32 | # For example, use http://{ELASTICMQ_HOST}:{ELASTICMQ_PORT} to use ElasticMQ in Dev.
33 | # Or, use these AWS docs to pass a VPC-compatible endpoint url to send SQS traffic through a VPC:
34 | # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-sending-messages-from-vpc.html
35 | MESSAGE_QUEUE_ENDPOINT_URL = ''
36 | 
37 | # AWS credential for connecting to SQS
38 | AWS_ACCESS_KEY_ID = ''
39 | AWS_SECRET_ACCESS_KEY = ''
40 | AWS_REGION = ''
41 | 
42 | # Use max size (in bytes) as of March 2014 as the default.
43 | SQS_TASK_SIZE_LIMIT = 256000
44 | 
45 | SQS_QUEUE_TAG_FUNCTION = lambda queue_name: {}
46 | 
47 | RETRY_DELAY_MULTIPLE_SEC = 60
48 | SQS_MAX_TASK_DELAY_SEC = 900
49 | TIMESTAMP_FUNC = time.time
50 | PUBLISHER_STR_FUNC = lambda: '%s[%d]' % (platform.node(), os.getpid())
51 | 
52 | RESET_TIMEOUT_THRESHOLD = 1
53 | 
54 | USE_DEAD_LETTER_QUEUE = True
55 | 
56 | # Path for queue config
57 | QUEUE_CONFIG = 'queue_config.yaml'
58 | QUEUE_CLASS = 'kale.queue_info.TaskQueue'
59 | QUEUE_SELECTOR = 'kale.queue_selector.ReducedLottery'
60 | 
61 | # We will gracefully stop this process if memory usage
62 | # exceeds this amount (in MB).
63 | DIE_ON_RESIDENT_SET_SIZE_MB = 256
64 | 
65 | # CIPHER used by kale.crypt, must be 16-, 24-, or 36-byte string
66 | UTIL_CRYPT_CIPHER = '1234567890123456'
67 | 
68 | ENABLE_DEAD_LETTER_QUEUE = True
69 | 
70 | # Manually specify pickle protocol used for writing Pickle files
71 | # Py2/Py3 have different default protocols. Reproduce those defaults
72 | # here for backwards compatibility.
73 | # Note: If Python 2 & Python 3 need to co-exist, override PICKLE_PROTOCOL=2
74 | if six.PY2:
75 |     PICKLE_PROTOCOL = 0
76 | else:
77 |     PICKLE_PROTOCOL = pickle.DEFAULT_PROTOCOL
78 | 


--------------------------------------------------------------------------------
/docs/kale.rst:
--------------------------------------------------------------------------------
  1 | kale Package
  2 | ============
  3 | 
  4 | :mod:`kale` Package
  5 | -------------------
  6 | 
  7 | .. automodule:: kale
  8 |     :members:
  9 |     :undoc-members:
 10 |     :show-inheritance:
 11 | 
 12 | :mod:`consumer` Module
 13 | ----------------------
 14 | 
 15 | .. automodule:: kale.consumer
 16 |     :members:
 17 |     :undoc-members:
 18 |     :show-inheritance:
 19 | 
 20 | :mod:`crypt` Module
 21 | -------------------
 22 | 
 23 | .. automodule:: kale.crypt
 24 |     :members:
 25 |     :undoc-members:
 26 |     :show-inheritance:
 27 | 
 28 | :mod:`default_settings` Module
 29 | ------------------------------
 30 | 
 31 | .. automodule:: kale.default_settings
 32 |     :members:
 33 |     :undoc-members:
 34 |     :show-inheritance:
 35 | 
 36 | :mod:`exceptions` Module
 37 | ------------------------
 38 | 
 39 | .. automodule:: kale.exceptions
 40 |     :members:
 41 |     :undoc-members:
 42 |     :show-inheritance:
 43 | 
 44 | :mod:`message` Module
 45 | ---------------------
 46 | 
 47 | .. automodule:: kale.message
 48 |     :members:
 49 |     :undoc-members:
 50 |     :show-inheritance:
 51 | 
 52 | :mod:`publisher` Module
 53 | -----------------------
 54 | 
 55 | .. automodule:: kale.publisher
 56 |     :members:
 57 |     :undoc-members:
 58 |     :show-inheritance:
 59 | 
 60 | :mod:`queue_info` Module
 61 | ------------------------
 62 | 
 63 | .. automodule:: kale.queue_info
 64 |     :members:
 65 |     :undoc-members:
 66 |     :show-inheritance:
 67 | 
 68 | :mod:`queue_selector` Module
 69 | ----------------------------
 70 | 
 71 | .. automodule:: kale.queue_selector
 72 |     :members:
 73 |     :undoc-members:
 74 |     :show-inheritance:
 75 | 
 76 | :mod:`sqs` Module
 77 | -----------------
 78 | 
 79 | .. automodule:: kale.sqs
 80 |     :members:
 81 |     :undoc-members:
 82 |     :show-inheritance:
 83 | 
 84 | :mod:`task` Module
 85 | ------------------
 86 | 
 87 | .. automodule:: kale.task
 88 |     :members:
 89 |     :undoc-members:
 90 |     :show-inheritance:
 91 | 
 92 | :mod:`timeout` Module
 93 | ---------------------
 94 | 
 95 | .. automodule:: kale.timeout
 96 |     :members:
 97 |     :undoc-members:
 98 |     :show-inheritance:
 99 | 
100 | :mod:`utils` Module
101 | -------------------
102 | 
103 | .. automodule:: kale.utils
104 |     :members:
105 |     :undoc-members:
106 |     :show-inheritance:
107 | 
108 | :mod:`version` Module
109 | ---------------------
110 | 
111 | .. automodule:: kale.version
112 |     :members:
113 |     :undoc-members:
114 |     :show-inheritance:
115 | 
116 | :mod:`worker` Module
117 | --------------------
118 | 
119 | .. automodule:: kale.worker
120 |     :members:
121 |     :undoc-members:
122 |     :show-inheritance:
123 | 
124 | Subpackages
125 | -----------
126 | 
127 | .. toctree::
128 | 
129 |     kale.scripts
130 |     kale.tests
131 | 
132 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import absolute_import
 5 | 
 6 | import multiprocessing  # To make python setup.py test happy
 7 | import os
 8 | import shutil
 9 | import subprocess
10 | 
11 | from distutils.command.clean import clean
12 | from setuptools import setup
13 | 
14 | multiprocessing
15 | 
16 | PACKAGE = 'ndkale'
17 | __version__ = None
18 | with open(os.path.join('kale', 'version.py')) as f:
19 |     source = f.read()
20 | code = compile(source, os.path.join('kale', 'version.py'), 'exec')
21 | exec(code)  # set __version__
22 | 
23 | 
24 | # -*- Hooks -*-
25 | 
26 | class CleanHook(clean):
27 | 
28 |     def run(self):
29 |         clean.run(self)
30 | 
31 |         def maybe_rm(path):
32 |             if os.path.exists(path):
33 |                 shutil.rmtree(path)
34 | 
35 |         if self.all:
36 |             maybe_rm('ndkale.egg-info')
37 |             maybe_rm('build')
38 |             maybe_rm('dist')
39 |             subprocess.call('rm -rf *.egg', shell=True)
40 |             subprocess.call('find . -name "*.pyc" -exec rm -rf {} \;',
41 |                             shell=True)
42 | 
43 | # -*- Classifiers -*-
44 | 
45 | classes = """
46 |     Development Status :: 5 - Production/Stable
47 |     License :: OSI Approved :: BSD License
48 |     Topic :: System :: Distributed Computing
49 |     Topic :: Software Development :: Object Brokering
50 |     Programming Language :: Python
51 |     Programming Language :: Python
52 |     Programming Language :: Python :: 2.7
53 |     Programming Language :: Python :: 3.5
54 |     Programming Language :: Python :: 3.6
55 |     Programming Language :: Python :: 3.7
56 |     Programming Language :: Python :: Implementation :: CPython
57 |     Operating System :: OS Independent
58 | """
59 | classifiers = [s.strip() for s in classes.split('\n') if s]
60 | 
61 | # -*- %%% -*-
62 | 
63 | setup(
64 |     name=PACKAGE,
65 |     version=__version__,
66 |     description='Kale: A Task Worker Library from Nextdoor',
67 |     long_description=open('README.md').read(),
68 |     author='Nextdoor',
69 |     author_email='eng@nextdoor.com',
70 |     url='https://github.com/Nextdoor/ndkale',
71 |     download_url='http://pypi.python.org/pypi/ndkale#downloads',
72 |     license='Apache License, Version 2',
73 |     keywords='kale nextdoor taskworker sqs python',
74 |     packages=['kale'],
75 |     tests_require=[
76 |         'mock==2.0.0',
77 |         'nose==1.3.7',
78 |         'moto==1.3.15.dev122',
79 |         'MarkupSafe==1.1.1',
80 |         'Jinja2==2.10.3',
81 |         'zipp==0.6.0',
82 |         'ecdsa==0.14.1',
83 |     ],
84 |     test_suite='nose.collector',
85 |     install_requires=[
86 |         'boto3>=1.10.36,<1.15',
87 |         'pycryptodome>=3.6.6,<4.0',
88 |         'pyyaml>=5.2,<6.0',
89 |         'setuptools>=41.0.1,<50',
90 |         'six>=1.11.0,<2.0',
91 |         'future>=0.18.2,<1.0',
92 |     ],
93 |     classifiers=classifiers,
94 |     cmdclass={'clean': CleanHook},
95 | )
96 | 


--------------------------------------------------------------------------------
/kale/__init__.py:
--------------------------------------------------------------------------------
 1 | """Settings and configuration for Kale.
 2 | 
 3 | This will create a settings module that overrides default
 4 | settings (from the default_settings module) and it will override
 5 | those settings with values found in the module specified by the
 6 | KALE_SETTINGS_MODULE environment variable.
 7 | 
 8 | Any machine that wants to use these tasks MUST have
 9 | KALE_SETTINGS_MODULE as an environment variable or this module
10 | will raise an exception.
11 | """
12 | from __future__ import absolute_import
13 | 
14 | import importlib
15 | import logging
16 | import os
17 | 
18 | from kale import default_settings
19 | 
20 | logger = logging.getLogger(__name__)
21 | 
22 | ENVIRONMENT_VARIABLE = 'KALE_SETTINGS_MODULE'
23 | 
24 | _settings_module = None
25 | 
26 | 
27 | def setup_package():
28 |     global _settings_module
29 |     _settings_module = os.environ.get(ENVIRONMENT_VARIABLE)
30 |     os.environ[ENVIRONMENT_VARIABLE] = 'kale.tests.test_settings'
31 |     # Re-intitialize settings
32 |     global settings
33 |     settings.__init__()
34 | 
35 | 
36 | def teardown_package():
37 |     global _settings_module
38 |     if _settings_module:
39 |         os.environ[ENVIRONMENT_VARIABLE] = _settings_module
40 |     # Re-intitialize settings
41 |     global settings
42 |     settings.__init__()
43 | 
44 | 
45 | class Settings(object):
46 |     """Singleton class to manage kale settings."""
47 | 
48 |     _instance = None
49 | 
50 |     def __new__(cls, *args, **kwargs):
51 |         """Set this class up as a singleton."""
52 | 
53 |         if not cls._instance:
54 |             cls._instance = super(Settings, cls).__new__(cls, *args, **kwargs)
55 |         return cls._instance
56 | 
57 |     def __init__(self, *args, **kwargs):
58 |         """Instantiate the settings."""
59 | 
60 |         # update this dict from default settings (but only for ALL_CAPS
61 |         # settings)
62 |         for setting in dir(default_settings):
63 |             if setting == setting.upper():
64 |                 setattr(self, setting, getattr(default_settings, setting))
65 | 
66 |         try:
67 |             settings_module_path = os.environ[ENVIRONMENT_VARIABLE]
68 |         except KeyError:
69 |             # NOTE: This is arguably an EnvironmentError, but that causes
70 |             # problems with Python's interactive help.
71 |             logger.error(
72 |                 ('Settings cannot be imported, because environment '
73 |                  'variable %s is undefined.') % ENVIRONMENT_VARIABLE)
74 |             return
75 | 
76 |         try:
77 |             settings_module = importlib.import_module(settings_module_path)
78 |         except ImportError as e:
79 |             error = ImportError(
80 |                 'Could not import settings "%s" (Is it on sys.path?): %s' %
81 |                 (settings_module_path, e))
82 |             logger.error(error)
83 |             return
84 | 
85 |         for setting in dir(settings_module):
86 |             if setting == setting.upper():
87 |                 setting_value = getattr(settings_module, setting)
88 |                 setattr(self, setting, setting_value)
89 | 
90 |         # This setting lets the application know that settings
91 |         # have been properly configured.
92 |         self.PROPERLY_CONFIGURED = True
93 | 
94 | 
95 | # Instantiate the settings globally.
96 | settings = Settings()
97 | 


--------------------------------------------------------------------------------
/kale/test_utils.py:
--------------------------------------------------------------------------------
  1 | """Helpful tools for testing kale tasks."""
  2 | from __future__ import absolute_import
  3 | 
  4 | from kale import exceptions
  5 | from kale import message
  6 | from kale import queue_info
  7 | from kale import queue_selector
  8 | from kale import settings
  9 | from kale import task
 10 | 
 11 | 
 12 | class MockTask(task.Task):
 13 | 
 14 |     @classmethod
 15 |     def _get_task_id(self, *args, **kwargs):
 16 |         return 'mock_task'
 17 | 
 18 |     def run_task(self, *args, **kwargs):
 19 |         pass
 20 | 
 21 | 
 22 | class FailTask(task.Task):
 23 | 
 24 |     @classmethod
 25 |     def _get_task_id(self, *args, **kwargs):
 26 |         return 'fail_task'
 27 | 
 28 |     def run_task(self, *args, **kwargs):
 29 |         raise exceptions.TaskException('Task failed.')
 30 | 
 31 | 
 32 | class ShouldNotRunTask(task.Task):
 33 | 
 34 |     @classmethod
 35 |     def _get_task_id(cls, *args, **kwargs):
 36 |         return "should_not_run_task"
 37 | 
 38 |     def should_run_task(self, *args, **kwargs):
 39 |         return False
 40 | 
 41 |     def run_task(self, *args, **kwargs):
 42 |         pass
 43 | 
 44 | 
 45 | class TimeoutTask(task.Task):
 46 | 
 47 |     @classmethod
 48 |     def _get_task_id(self, *args, **kwargs):
 49 |         return 'fail_task'
 50 | 
 51 |     def run_task(self, *args, **kwargs):
 52 |         raise exceptions.TimeoutException('Task failed.')
 53 | 
 54 | 
 55 | class SlowButNotTooSlowTask(task.Task):
 56 | 
 57 |     time_limit = 100
 58 |     target_runtime = 90
 59 | 
 60 |     @classmethod
 61 |     def _get_task_id(self, *args, **kwargs):
 62 |         return 'moderately_slow_task'
 63 | 
 64 |     def run_task(self, *args, **kwargs):
 65 |         # Ugly trick.
 66 |         self._start_time = self._start_time - self.target_runtime
 67 | 
 68 | 
 69 | class FailTaskNoRetries(FailTask):
 70 | 
 71 |     max_retries = None
 72 | 
 73 | 
 74 | class MockMessage(message.KaleMessage):
 75 | 
 76 |     def __init__(self, task_inst, task_args=None, task_kwargs=None, app_data=None,
 77 |                  retry_num=0, failure_num=0):
 78 |         """Instantiate a mock KaleMessage.
 79 | 
 80 |         Args:
 81 |             task: An instance of a task.
 82 |         """
 83 |         self.id = 'id'
 84 |         self.task_name = task_inst.task_name
 85 |         self.task_id = task_inst.task_id
 86 |         self.task_args = task_args or []
 87 |         self.task_kwargs = task_kwargs or {}
 88 |         self.task_app_data = app_data or {}
 89 |         self.task_retry_num = retry_num
 90 |         self.task_failure_num = failure_num
 91 |         self.task_inst = task_inst
 92 | 
 93 | 
 94 | class MockConsumer(object):
 95 | 
 96 |     def consume(*args, **kwargs):
 97 |         return []
 98 | 
 99 | 
100 | class TestQueueClass(queue_info.TaskQueue):
101 |     pass
102 | 
103 | 
104 | class TestQueueSelector(queue_selector.SelectQueueBase):
105 | 
106 |     def __init__(self, queue_info):
107 |         self.queue_info = queue_info
108 | 
109 |     def get_queue(self, *args, **kwargs):
110 |         """Returns a TaskQueue object."""
111 |         return self.queue_info.get_queues()[0]
112 | 
113 | 
114 | def new_mock_message(task_class=None):
115 |     """Create a new mock message.
116 | 
117 |     Args:
118 |         task_class: Task class to use in message (default is MockTask).
119 |     """
120 |     task_inst = new_mock_task(task_class)
121 |     message = MockMessage(task_inst)
122 |     return message
123 | 
124 | 
125 | def new_mock_task(task_class=None):
126 |     """Create a new mock task instance.
127 | 
128 |     Args:
129 |         task_class: Task class to use in message (default is MockTask).
130 |     """
131 |     task_class = task_class if task_class else MockTask
132 | 
133 |     mock_payload = {
134 |         'id': task_class._get_task_id(),
135 |         '_enqueued_time': settings.TIMESTAMP_FUNC(),
136 |         '_publisher': settings.PUBLISHER_STR_FUNC()}
137 |     return task_class(mock_payload)
138 | 


--------------------------------------------------------------------------------
/kale/consumer.py:
--------------------------------------------------------------------------------
  1 | """Module containing task consumption functionality."""
  2 | from __future__ import absolute_import
  3 | 
  4 | import logging
  5 | 
  6 | from kale import sqs
  7 | from kale import exceptions
  8 | from kale.message import KaleMessage
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | class Consumer(sqs.SQSTalk):
 14 |     """SQS utility class for consuming tasks."""
 15 | 
 16 |     def fetch_batch(self, queue_name, batch_size, visibility_timeout_sec,
 17 |                     long_poll_time_sec=None):
 18 |         """Fetches a batch of messages from a queue.
 19 | 
 20 |         :param str queue_name: queue name.
 21 |         :param int batch_size: batch size.
 22 |         :param int visibility_timeout_sec: visibility timeout in seconds.
 23 |         :param int long_poll_time_sec: long poll time in seconds.
 24 |         :returns: a list of KaleMessage objects, or None if not message found.
 25 |         :rtype: list[KaleMessage]
 26 |         """
 27 |         sqs_queue = self._get_or_create_queue(queue_name)
 28 | 
 29 |         sqs_messages = sqs_queue.receive_messages(
 30 |             MaxNumberOfMessages=batch_size,
 31 |             VisibilityTimeout=visibility_timeout_sec,
 32 |             WaitTimeSeconds=long_poll_time_sec or 20
 33 |         )
 34 | 
 35 |         if sqs_messages is None:
 36 |             return None
 37 | 
 38 |         return [KaleMessage.decode_sqs(msg) for msg in sqs_messages]
 39 | 
 40 |     def delete_messages(self, messages, queue_name):
 41 |         """Remove messages from the queue.
 42 | 
 43 |         :param list[KaleMessage] messages: messages to delete.
 44 |         :param str queue_name: queue name.
 45 |         :raises: DeleteMessagesException: SQS responded with a partial success. Some
 46 |         messages were not deleted.
 47 |         """
 48 |         if not messages:
 49 |             return
 50 |         queue = self._get_or_create_queue(queue_name)
 51 | 
 52 |         response = queue.delete_messages(
 53 |             Entries=[{
 54 |                 'Id': message.id,
 55 |                 'ReceiptHandle': message.sqs_receipt_handle
 56 |             } for message in messages]
 57 |         )
 58 | 
 59 |         failures = response.get('Failed', [])
 60 |         for failure in failures:
 61 |             logger.warning('delete of %s failed with code %s due to %s',
 62 |                            failure['Id'],
 63 |                            failure['Code'],
 64 |                            failure['Message']
 65 |                            )
 66 | 
 67 |         if len(failures) > 0:
 68 |             raise exceptions.DeleteMessagesException(len(failures))
 69 | 
 70 |     def release_messages(self, messages, queue_name):
 71 |         """Releases messages to SQS queues so other workers can pick them up.
 72 | 
 73 |         :param list[KaleMessage] messages: messages to release to SQS.
 74 |         :param str queue_name: queue name.
 75 |         :raises: ChangeMessagesVisibilityException: SQS responded with a partial success. Some
 76 |         messages were not released.
 77 |         """
 78 |         if not messages:
 79 |             return
 80 | 
 81 |         queue = self._get_or_create_queue(queue_name)
 82 | 
 83 |         response = queue.change_message_visibility_batch(
 84 |             Entries=[{
 85 |                 'Id': message.id,
 86 |                 'ReceiptHandle': message.sqs_receipt_handle,
 87 |                 'VisibilityTimeout': 0
 88 |             } for message in messages]
 89 |         )
 90 | 
 91 |         failures = response.get('Failed', [])
 92 |         for failure in failures:
 93 |             logger.warning('change visibility of %s failed with code %s due to %s',
 94 |                            failure['Id'],
 95 |                            failure['Code'],
 96 |                            failure['Message']
 97 |                            )
 98 | 
 99 |         if len(failures) > 0:
100 |             raise exceptions.ChangeMessagesVisibilityException(len(failures))
101 | 


--------------------------------------------------------------------------------
/kale/scripts/benchmark_queue_info.py:
--------------------------------------------------------------------------------
  1 | """Benchmarks queue_info implementations."""
  2 | from __future__ import absolute_import
  3 | 
  4 | import argparse
  5 | import logging
  6 | import os
  7 | import threading
  8 | import time
  9 | 
 10 | import six.moves.queue
 11 | from six.moves import range
 12 | 
 13 | # Set this environment variable before importing kale module
 14 | os.environ['KALE_SETTINGS_MODULE'] = 'benchmark_settings'
 15 | 
 16 | from kale import queue_info
 17 | from kale import sqs
 18 | 
 19 | 
 20 | logging.basicConfig(level='INFO')
 21 | log = logging.getLogger('kale.benchmark')
 22 | 
 23 | checking_sqs_time = six.moves.queue.Queue()
 24 | 
 25 | 
 26 | class WorkerThread(threading.Thread):
 27 |     """Consuming tasks."""
 28 | 
 29 |     def __init__(self, queue_info_obj, iterations):
 30 |         """
 31 |         Args:
 32 |             speedup: Integer for how much faster we want to simulate tasks.
 33 |                 If speedup is 10, then we process tasks at 10x slower speed.
 34 |             select_queue_class: String for select_queue class in the
 35 |                 nd.kale.select_queue module.
 36 |         """
 37 |         super(WorkerThread, self).__init__()
 38 |         self.queue_info_obj = queue_info_obj
 39 |         self.iterations = iterations
 40 | 
 41 |     def run(self):
 42 |         for i in range(self.iterations):
 43 |             start_time = time.time()
 44 |             self.queue_info_obj.get_highest_priority_queue_that_needs_work()
 45 |             end_time = time.time()
 46 |             checking_sqs_time.put(end_time - start_time)
 47 | 
 48 | 
 49 | class Benchmark(object):
 50 |     """Manages entire benchmark."""
 51 | 
 52 |     def __init__(self, config_file, workers, iterations):
 53 |         self.config_file = config_file
 54 |         self.workers = workers
 55 |         self.iterations = iterations
 56 | 
 57 |     def run(self):
 58 |         """Main function of benchmarking."""
 59 |         log.info('Start benchmarking ...')
 60 |         log.info('Spawning %d worker threads ...' % self.workers)
 61 |         sqs_talk = sqs.SQSTalk()
 62 |         queue_info_obj = queue_info.QueueInfo(self.config_file, sqs_talk)
 63 |         worker_threads = []
 64 |         for i in range(self.workers):
 65 |             worker_thread = WorkerThread(queue_info_obj, self.iterations)
 66 |             worker_threads.append(worker_thread)
 67 |             worker_thread.setDaemon(True)
 68 |         for worker_thread in worker_threads:
 69 |             worker_thread.start()
 70 |         for worker_thread in worker_threads:
 71 |             worker_thread.join()
 72 | 
 73 |         all_check_time = []
 74 |         while not checking_sqs_time.empty():
 75 |             check_time = checking_sqs_time.get()
 76 |             all_check_time.append(check_time)
 77 |         log.info('=== Benchmark results ===')
 78 |         count = len(all_check_time)
 79 |         sum_time = sum(all_check_time)
 80 |         avg_time = sum_time / count
 81 |         median_time = sorted(all_check_time)[count / 2]
 82 |         max_time = max(all_check_time)
 83 |         log.info('Average Check Time: %f' % avg_time)
 84 |         log.info('Median Check time: %f' % median_time)
 85 |         log.info('Max Check Time: %f' % max_time)
 86 | 
 87 | 
 88 | def main():
 89 |     """Main function for this script."""
 90 |     parser = argparse.ArgumentParser()
 91 |     parser.add_argument('config_file', type=str, default='sample_queue_config.yaml',
 92 |                         help='The tasks load file path.')
 93 |     parser.add_argument('workers', type=int, default=5 * 8,
 94 |                         help='Number of task workers.')
 95 |     parser.add_argument('iterations', type=int, default=5,
 96 |                         help='Number of iterations for a worker to check sqs queues.')
 97 | 
 98 |     args = parser.parse_args()
 99 | 
100 |     benchmark = Benchmark(args.config_file, args.workers, args.iterations)
101 |     benchmark.run()
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     main()
106 | 


--------------------------------------------------------------------------------
/kale/publisher.py:
--------------------------------------------------------------------------------
 1 | """Module containing task publishing functionality."""
 2 | from __future__ import absolute_import
 3 | 
 4 | import logging
 5 | 
 6 | from kale import exceptions
 7 | from kale import message
 8 | from kale import settings
 9 | from kale import sqs
10 | from kale import queue_info
11 | from kale import utils
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | class Publisher(sqs.SQSTalk):
17 |     """Class to manage publishing SQS tasks."""
18 | 
19 |     def publish(self, task_class, task_id, payload,
20 |                 current_retry_num=None, current_failure_num=None, delay_sec=None):
21 |         """Publish the given task type to the queue with the provided payload.
22 | 
23 |         :param obj task_class: class of the task that we are publishing.
24 |         :param str task_id: unique identifying string for this task.
25 |         :param dict payload: dictionary for the task payload.
26 |         :param int current_retry_num: current task retry count. If 0, this is
27 |             the first attempt to run the task.
28 |         :param int current_failure_num: current task failure count.
29 |         :param int delay_sec: time (in seconds) that a task should stay
30 |                 in the queue before being released to consumers.
31 |         :raises: TaskTooChubbyException: This task is outrageously chubby.
32 |                 The publisher of the task should handle this exception and
33 |                 determine how to proceed.
34 |         """
35 | 
36 |         if delay_sec is not None and delay_sec > settings.SQS_MAX_TASK_DELAY_SEC:
37 |             raise exceptions.InvalidTaskDelayException(
38 |                 'Invalid task delay_sec (%d > %d).' % (
39 |                     delay_sec, settings.SQS_MAX_TASK_DELAY_SEC))
40 | 
41 |         queue_class = utils.class_import_from_path(settings.QUEUE_CLASS)
42 |         q_info = queue_info.QueueInfo(settings.QUEUE_CONFIG, self, queue_class)
43 |         queue_obj = q_info.get_queue(task_class.queue)
44 | 
45 |         if task_class.time_limit >= queue_obj.visibility_timeout_sec:
46 |             raise exceptions.InvalidTimeLimitTaskException(
47 |                 'Invalid task time limit: %d >= %d from %s' % (
48 |                     task_class.time_limit, queue_obj.visibility_timeout_sec,
49 |                     settings.QUEUE_CONFIG))
50 | 
51 |         sqs_queue = self._get_or_create_queue(queue_obj.name)
52 | 
53 |         kale_msg = message.KaleMessage(
54 |             task_class=task_class,
55 |             task_id=task_id,
56 |             payload=payload,
57 |             current_retry_num=current_retry_num,
58 |             current_failure_num=current_failure_num)
59 | 
60 |         sqs_queue.send_message(
61 |             MessageBody=kale_msg.encode(),
62 |             DelaySeconds=delay_sec or 1
63 |         )
64 | 
65 |         logger.debug('Published task. Task id: %s; Task name: %s' % (
66 |             task_id, '%s.%s' % (task_class.__module__, task_class.__name__)))
67 | 
68 |     def publish_messages_to_dead_letter_queue(self, dlq_name, messages):
69 |         """Sends a batch of messages to the dead letter queue.
70 | 
71 |         :param str dlq_name: dead-letter-queue name to send these messages to.
72 |         :param list[KaleMessage] messages: a list of KaleMessage instances that
73 |             have permanently failed.
74 |         :raises: SendMessagesException: SQS responded with a partial success. Some
75 |         messages were not delivered.
76 |         """
77 |         sqs_dead_letter_queue = self._get_or_create_queue(dlq_name)
78 | 
79 |         response = sqs_dead_letter_queue.send_messages(
80 |             Entries=[{
81 |                 'Id': m.id,
82 |                 'MessageBody': m.encode(),
83 |                 'DelaySeconds': 0
84 |             } for m in messages]
85 |         )
86 | 
87 |         failures = response.get('Failed', [])
88 |         for failure in failures:
89 |             logger.warning('failed to send %s with code %s due to %s',
90 |                            failure['Id'],
91 |                            failure['Code'],
92 |                            failure['Message']
93 |                            )
94 | 
95 |         if len(failures) > 0:
96 |             raise exceptions.SendMessagesException(len(failures))
97 | 


--------------------------------------------------------------------------------
/kale/sqs.py:
--------------------------------------------------------------------------------
  1 | """Base class for SQS utility classes."""
  2 | from __future__ import absolute_import
  3 | 
  4 | import logging
  5 | 
  6 | import boto3
  7 | import botocore
  8 | from kale import exceptions
  9 | from kale import settings
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class SQSTalk(object):
 15 |     """Base class for SQS utility classes."""
 16 | 
 17 |     _client = None
 18 |     _session = None
 19 |     _sqs = None
 20 | 
 21 |     # queue name to SQS.Queue object mapping
 22 |     _queues = {}
 23 | 
 24 |     def __init__(self, *args, **kwargs):
 25 |         """Constructor.
 26 |         :raises: exceptions.ImproperlyConfiguredException: Raised if the
 27 |             settings are not adequately configured.
 28 |         """
 29 | 
 30 |         if not settings.PROPERLY_CONFIGURED:
 31 |             raise exceptions.ImproperlyConfiguredException(
 32 |                 'Settings are not properly configured.')
 33 | 
 34 |         aws_region = None
 35 |         if settings.AWS_REGION != '':
 36 |             aws_region = settings.AWS_REGION
 37 | 
 38 |         aws_access_key_id = None
 39 |         if settings.AWS_ACCESS_KEY_ID != '':
 40 |             aws_access_key_id = settings.AWS_ACCESS_KEY_ID
 41 | 
 42 |         aws_secret_access_key = None
 43 |         if settings.AWS_SECRET_ACCESS_KEY != '':
 44 |             aws_secret_access_key = settings.AWS_SECRET_ACCESS_KEY
 45 | 
 46 |         # If None is passed, Boto3 uses its default behavior to determine the URL
 47 |         endpoint_url = None
 48 |         if settings.MESSAGE_QUEUE_ENDPOINT_URL:
 49 |             endpoint_url = settings.MESSAGE_QUEUE_ENDPOINT_URL
 50 | 
 51 |         self._session = boto3.Session(region_name=aws_region,
 52 |                                       aws_access_key_id=aws_access_key_id,
 53 |                                       aws_secret_access_key=aws_secret_access_key)
 54 | 
 55 |         self._client = self._session.client('sqs', endpoint_url=endpoint_url)
 56 |         self._sqs = self._session.resource('sqs', endpoint_url=endpoint_url)
 57 | 
 58 |         self._sqs_queue_name_to_tag = settings.SQS_QUEUE_TAG_FUNCTION
 59 | 
 60 |     def _get_or_create_queue(self, queue_name):
 61 |         """Fetch or create a queue.
 62 | 
 63 |         :param str queue_name: string for queue name.
 64 |         :return: Queue
 65 |         :rtype: boto3.resources.factory.sqs.Queue
 66 |         """
 67 | 
 68 |         # Check local cache first.
 69 |         if queue_name in self._queues:
 70 |             return self._queues[queue_name]
 71 | 
 72 |         # get or create queue
 73 |         try:
 74 |             resp = self._client.get_queue_url(QueueName=queue_name)
 75 |             queue_url = resp.get('QueueUrl')
 76 |         except botocore.exceptions.ClientError as e:
 77 |             if e.response['Error']['Code'] != 'AWS.SimpleQueueService.NonExistentQueue':
 78 |                 raise e
 79 |             tags = self._get_sqs_queue_tags(queue_name)
 80 | 
 81 |             logger.info('Creating new SQS queue: %s' % queue_name)
 82 |             queue = self._client.create_queue(QueueName=queue_name, tags=tags)
 83 |             queue_url = queue.get('QueueUrl')
 84 | 
 85 |         # create queue object
 86 |         queue = self._sqs.Queue(queue_url)
 87 | 
 88 |         self._queues[queue_name] = queue
 89 |         return queue
 90 | 
 91 |     def get_all_queues(self, prefix=''):
 92 |         """Returns all queues, filtered by prefix.
 93 | 
 94 |         :param str prefix: string for queue prefix.
 95 |         :return: a list of queue objects.
 96 |         :rtype: list[boto3.resources.factory.sqs.Queue]
 97 |         """
 98 | 
 99 |         # QueueNamePrefix is optional and can not be None.
100 |         resp = self._client.list_queues(QueueNamePrefix=prefix)
101 | 
102 |         queue_urls = resp.get('QueueUrls', [])
103 | 
104 |         queues = []
105 |         for queue_url in queue_urls:
106 |             queues.append(self._sqs.Queue(queue_url))
107 | 
108 |         return queues
109 | 
110 |     def _get_sqs_queue_tags(self, queue_name):
111 |         try:
112 |             tags = self._sqs_queue_name_to_tag(queue_name) or {}
113 |             if tags:
114 |                 # Tags must be a Dict[str, str]
115 |                 tags = {
116 |                     str(k): str(v)
117 |                     for k, v in tags.items()
118 |                 }
119 |             return tags
120 |         except Exception as e:
121 |             logger.warning('Failed to extract SQS Queue tags %s' % queue_name, exc_info=True)
122 |             return {}
123 | 


--------------------------------------------------------------------------------
/kale/tests/test_message.py:
--------------------------------------------------------------------------------
  1 | """Module testing the kale.message module."""
  2 | from __future__ import absolute_import
  3 | 
  4 | import unittest
  5 | 
  6 | from kale import message
  7 | from kale import task
  8 | 
  9 | import mock
 10 | 
 11 | 
 12 | def _time_function():
 13 |     return 123
 14 | 
 15 | 
 16 | def _get_publisher_data():
 17 |     return 'test_publisher'
 18 | 
 19 | 
 20 | class MessageTestCase(unittest.TestCase):
 21 |     """Test KaleMessage."""
 22 | 
 23 |     def test_validate_message(self):
 24 |         payload = {'args': [], 'kwargs': {}}
 25 |         message.KaleMessage._validate_task_payload(payload)
 26 | 
 27 |     def test_validate_message_with_appdata(self):
 28 |         payload = {'args': [], 'kwargs': {}, 'app_data': {}}
 29 |         message.KaleMessage._validate_task_payload(payload)
 30 | 
 31 |     @mock.patch('kale.message._get_current_timestamp')
 32 |     def test_message(self, mock_get_current_timestamp):
 33 |         mock_get_current_timestamp.return_value = 123
 34 |         payload = {'args': [], 'kwargs': {}}
 35 | 
 36 |         # Test create
 37 |         kale_msg = message.KaleMessage(
 38 |             task_class=task.Task,
 39 |             task_id=1,
 40 |             payload=payload,
 41 |             current_retry_num=None)
 42 | 
 43 |         self.assertIsNotNone(kale_msg)
 44 |         self.assertEqual('kale.task.Task', kale_msg.task_name)
 45 |         self.assertEqual(123, kale_msg._enqueued_time)
 46 |         self.assertEqual(0, kale_msg.task_retry_num)
 47 |         self.assertEqual(0, kale_msg.task_failure_num)
 48 |         self.assertEqual(1, kale_msg.task_id)
 49 |         self.assertEqual([], kale_msg.task_args)
 50 |         self.assertEqual({}, kale_msg.task_kwargs)
 51 | 
 52 |     def test_message_with_appdata(self):
 53 |         payload = {'args': [], 'kwargs': {}, 'app_data': {}}
 54 | 
 55 |         # Test create
 56 |         kale_msg = message.KaleMessage(
 57 |             task_class=task.Task,
 58 |             task_id=1,
 59 |             payload=payload,
 60 |             current_retry_num=None)
 61 |         self.assertIsNotNone(kale_msg)
 62 |         self.assertEqual({}, kale_msg.task_app_data)
 63 | 
 64 |     @mock.patch('kale.message._get_current_timestamp')
 65 |     @mock.patch('kale.message._get_publisher_data')
 66 |     def test_encode(self, mock_get_current_timestamp, mock__get_publisher_data):
 67 |         payload = {'args': [], 'kwargs': {}, 'app_data': {}}
 68 | 
 69 |         mock_get_current_timestamp.return_value = 123
 70 |         mock__get_publisher_data.return_value = 'test_publisher'
 71 | 
 72 |         kale_msg = message.KaleMessage(
 73 |             task_class=task.Task,
 74 |             task_id=1,
 75 |             payload=payload,
 76 |             current_retry_num=None)
 77 | 
 78 |         with mock.patch('kale.message.pickle') as pickle:
 79 |             pickle.dumps.return_value = b'\x80'
 80 |             actual = kale_msg.encode()
 81 | 
 82 |         expected = 'OKyZrDvbdIV4hnAi07xWGg=='
 83 |         self.assertEqual(expected, actual)
 84 | 
 85 |     def test_decode(self):
 86 |         mock_sqs_msg = mock.MagicMock()
 87 | 
 88 |         message_body = {
 89 |             'id': 1,
 90 |             'task': 'kale.task.Task',
 91 |             'payload': {'args': [], 'kwargs': {}},
 92 |             '_enqueued_time': 123,
 93 |             'publisher_data': '',
 94 |             'current_retry_num': 0
 95 |         }
 96 | 
 97 |         mock_sqs_msg.body = 'Qx2KhutzbmsCC8NaLkKMXjtMKox/HlpwGz+IM0jzMElyptGsyBQald2EL' \
 98 |                             'qADXqyiJCu0RvD6sDnOKYITIfHz1qSl5qeSZrbslvFJeVXTF4PYaEz69g' \
 99 |                             'ASICeunTWkCMNla0wnpiJvu4QMEWmubi+RFgFBkTYSnQXG5NtgUCB0ifD' \
100 |                             'PDgoKDtzSIC354LxZjCBmRg1kpjfZ+zNGJ8DMw6YabQ=='
101 |         mock_sqs_msg.delete = None
102 |         mock_sqs_msg.queue_url = 'https://queue.amazonaws.com/123456789012/' \
103 |                                  'LowPriorityTest1'
104 | 
105 |         with mock.patch('kale.message.pickle') as pickle:
106 |             pickle.loads.return_value = message_body
107 |             kale_msg = message.KaleMessage.decode_sqs(mock_sqs_msg)
108 | 
109 |         self.assertIsNotNone(kale_msg)
110 |         self.assertEqual('kale.task.Task', kale_msg.task_name)
111 |         self.assertEqual(123, kale_msg._enqueued_time)
112 |         self.assertEqual(0, kale_msg.task_retry_num)
113 |         self.assertEqual(0, kale_msg.task_failure_num)
114 |         self.assertEqual(1, kale_msg.task_id)
115 |         self.assertEqual([], kale_msg.task_args)
116 |         self.assertEqual({}, kale_msg.task_kwargs)
117 |         self.assertEqual('LowPriorityTest1', kale_msg.sqs_queue_name)
118 | 
119 |     def test_delete(self):
120 |         payload = {'args': [], 'kwargs': {}}
121 |         mock_delete = mock.MagicMock()
122 |         kale_msg = message.KaleMessage(
123 |             task_class=task.Task,
124 |             task_id=1,
125 |             payload=payload,
126 |             current_retry_num=None,
127 |             delete_func=mock_delete
128 |         )
129 | 
130 |         kale_msg.delete()
131 |         mock_delete.assert_called()
132 | 


--------------------------------------------------------------------------------
/kale/tests/test_queue_info.py:
--------------------------------------------------------------------------------
  1 | """Tests queue_info.py"""
  2 | from __future__ import absolute_import
  3 | 
  4 | import tempfile
  5 | import unittest
  6 | 
  7 | import mock
  8 | from botocore.exceptions import ClientError
  9 | 
 10 | from kale import queue_info
 11 | from kale import settings
 12 | from kale import sqs
 13 | 
 14 | 
 15 | class QueueInfoTest(unittest.TestCase):
 16 |     """Tests for QueueInfo class."""
 17 | 
 18 |     _previous_region = None
 19 | 
 20 |     def setUp(self):
 21 |         self._previous_region = settings.AWS_REGION
 22 |         settings.AWS_REGION = 'us-east-1'
 23 | 
 24 |     def tearDown(self):
 25 |         settings.AWS_REGION = self._previous_region
 26 | 
 27 |     test_string = ('default: \n'
 28 |                    '    name: default\n'
 29 |                    '    priority: 10\n'
 30 |                    '    batch_size: 10\n'
 31 |                    '    visibility_timeout_sec: 5\n'
 32 |                    '    long_poll_time_sec: 5\n'
 33 |                    '    num_iterations: 2\n'
 34 |                    'digest:\n'
 35 |                    '    name: digest\n'
 36 |                    '    priority: 22\n'
 37 |                    '    batch_size: 11\n'
 38 |                    '    visibility_timeout_sec: 55\n'
 39 |                    '    long_poll_time_sec: 65\n'
 40 |                    '    num_iterations: 13\n'
 41 |                    'lowp:\n'
 42 |                    '    name: lowp\n'
 43 |                    '    priority: 1\n'
 44 |                    '    batch_size: 10\n'
 45 |                    '    visibility_timeout_sec: 5\n'
 46 |                    '    long_poll_time_sec: 5\n'
 47 |                    '    num_iterations: 2\n')
 48 | 
 49 |     def test_get_queues_from_config(self):
 50 |         """Success case for get_queues_from_config.
 51 |         Don't have failure case. If fails, fails loudly.
 52 |         """
 53 |         queue_config = tempfile.NamedTemporaryFile(delete=True)
 54 |         queue_config.write(self.test_string.encode('utf8'))
 55 |         queue_config.seek(0)
 56 |         queues = queue_info.QueueInfo._get_queues_from_config(
 57 |             queue_config.name, queue_info.TaskQueue)
 58 |         queue_config.close()
 59 |         self.assertEquals(len(queues), 3)
 60 |         self.assertEquals(queues[0].name, 'digest')
 61 |         self.assertEquals(queues[0].priority, 22)
 62 |         self.assertEquals(queues[0].batch_size, 11)
 63 |         self.assertEquals(queues[0].visibility_timeout_sec, 55)
 64 |         self.assertEquals(queues[0].long_poll_time_sec, 65)
 65 |         self.assertEquals(queues[0].num_iterations, 13)
 66 |         self.assertEquals(queues[1].name, 'default')
 67 |         self.assertEquals(queues[2].name, 'lowp')
 68 | 
 69 |     def _build_queue_info(self):
 70 |         sqs_inst = sqs.SQSTalk()
 71 | 
 72 |         queue_config = tempfile.NamedTemporaryFile(delete=True)
 73 |         queue_config.write(self.test_string.encode('utf8'))
 74 |         queue_config.seek(0)
 75 |         queue_info.QueueInfo._queues = None
 76 |         queue_info.QueueInfo._simple_name_queues_map = None
 77 |         qinfo = queue_info.QueueInfo(queue_config.name, sqs_inst,
 78 |                                      queue_info.TaskQueue)
 79 |         return qinfo
 80 | 
 81 |     def test_queues(self):
 82 |         qinfo = self._build_queue_info()
 83 |         queues = qinfo.get_queues()
 84 |         self.assertEquals(len(queues), 3)
 85 | 
 86 |         # TODO (wenbin): add a separate test case for
 87 |         # get_highest_priority_non_empty_queue.
 88 | 
 89 |     def test_not_implemented_ops(self):
 90 |         queue_info_base = queue_info.QueueInfoBase()
 91 | 
 92 |         with self.assertRaises(NotImplementedError):
 93 |             queue_info_base.get_queues()
 94 | 
 95 |         with self.assertRaises(NotImplementedError):
 96 |             queue_info_base.get_highest_priority_queue_that_needs_work()
 97 | 
 98 |         with self.assertRaises(NotImplementedError):
 99 |             queue_info_base.is_queue_empty(mock.MagicMock())
100 | 
101 |         with self.assertRaises(NotImplementedError):
102 |             queue_info_base.does_queue_need_work(mock.MagicMock())
103 | 
104 |     def test_does_queue_need_work_empty(self):
105 |         with mock.patch.object(queue_info.QueueInfo, 'is_queue_empty', return_value=True):
106 |             qinfo = self._build_queue_info()
107 |             self.assertFalse(qinfo.does_queue_need_work(None))
108 | 
109 |     def test_does_queue_need_work_non_empty(self):
110 |         with mock.patch.object(queue_info.QueueInfo, 'is_queue_empty', return_value=False):
111 |             qinfo = self._build_queue_info()
112 |             self.assertTrue(qinfo.does_queue_need_work(None))
113 | 
114 |     def test_does_queue_need_work_rate_limited(self):
115 |         rate_limit_exception = ClientError(
116 |             {'Error': {'Code': 'ThrottlingException'}}, 'get_queue_url')
117 |         with mock.patch.object(
118 |                 queue_info.QueueInfo, 'is_queue_empty', side_effect=rate_limit_exception):
119 |             qinfo = self._build_queue_info()
120 |             self.assertTrue(qinfo.does_queue_need_work(None))
121 | 


--------------------------------------------------------------------------------
/kale/queue_selector.py:
--------------------------------------------------------------------------------
  1 | """Module containing queue selection algorithms.
  2 | 
  3 | 
  4 | How to implement your own queue selection algorithm?
  5 | 
  6 |     class MyQueueSelector(SelectQueueBase):
  7 | 
  8 |         def get_queue(self):
  9 | 
 10 |             # Get a list of all queues defined in the YAML file that is
 11 |             # specified at QUEUE_CONFIG in settings file.
 12 |             #
 13 |             # You may use these two properties of a queue object to select
 14 |             # a queue:
 15 |             #
 16 |             # - name: string of queue name
 17 |             # - priority: integer of queue priority; larger value,
 18 |             #       higher priority
 19 |             queues = self.queue_info.get_queues()
 20 | 
 21 |             # Implement your algorithm here
 22 |             # ...
 23 | 
 24 |             # Eventually, return one of queue object from queues
 25 |             return queue
 26 | """
 27 | from __future__ import absolute_import
 28 | 
 29 | import random
 30 | 
 31 | from six.moves import range
 32 | 
 33 | 
 34 | class SelectQueueBase(object):
 35 |     """Base class for selecting a queue.
 36 | 
 37 |     The only method that needs to be implemented:
 38 | 
 39 |     get_queue: it's called for each task processing cycle on task worker.
 40 |     """
 41 | 
 42 |     def __init__(self, queue_info):
 43 |         self.queue_info = queue_info
 44 | 
 45 |     def get_queue(self, *args, **kwargs):
 46 |         """Returns a TaskQueue object."""
 47 |         raise NotImplementedError('Base class cannot be used directly.')
 48 | 
 49 | 
 50 | class Random(SelectQueueBase):
 51 |     """Randomly selects a queue without considering priority."""
 52 | 
 53 |     def get_queue(self):
 54 |         queues = self.queue_info.get_queues()
 55 |         return random.choice(queues)
 56 | 
 57 | 
 58 | class Lottery(SelectQueueBase):
 59 |     """Use lottery scheduling algorithm to select a queue based on priority."""
 60 | 
 61 |     @staticmethod
 62 |     def _run_lottery(queues):
 63 |         """Draw lottery from a list of candidate queues.
 64 | 
 65 |         :param list[TaskQueue] queues: a list of candidate queues.
 66 | 
 67 |         :return: A TaskQueue object that wins lottery. If it fails (e.g.,
 68 |             invalid priority of queues), it returns None.
 69 |         :rtype: TaskQueue
 70 |         """
 71 |         tickets = {}
 72 |         total_tickets = 0
 73 |         for queue in queues:
 74 |             # Queue priority should be within 1 to 100.
 75 |             if queue.priority < 1 or queue.priority > 100:
 76 |                 continue
 77 |             priority = queue.priority
 78 |             low = total_tickets
 79 |             total_tickets += priority
 80 |             high = total_tickets
 81 |             tickets[queue.name] = (low, high)
 82 | 
 83 |         # [0, total_tickets)
 84 |         try:
 85 |             number = random.randrange(0, total_tickets)
 86 |             for queue in queues:
 87 |                 if number >= tickets[
 88 |                         queue.name][0] and number < tickets[queue.name][1]:
 89 |                     return queue
 90 |         except ValueError:
 91 |             return None
 92 | 
 93 |         # Something wrong happens
 94 |         return None
 95 | 
 96 |     def get_queue(self, *args, **kwargs):
 97 |         return self._run_lottery(self.queue_info.get_queues())
 98 | 
 99 | 
100 | class HighestPriorityFirst(SelectQueueBase):
101 |     """Highest priority first.
102 | 
103 |     Get the highest priority non-empty queue first.
104 |     If all queues are empty, get the highest priority empty queue.
105 |     """
106 | 
107 |     def get_queue(self, *args, **kwargs):
108 |         queue = self.queue_info.get_highest_priority_queue_that_needs_work()
109 |         if queue:
110 |             return queue
111 |         queues = self.queue_info.get_queues()
112 |         queues.sort(key=lambda x: x.priority, reverse=True)
113 |         return queues[0]
114 | 
115 | 
116 | class HighestPriorityLottery(Lottery):
117 |     """Highest priority first  + lottery.
118 | 
119 |     Get highest priority non-empty queue first.
120 |     If all queues are empty, run lottery on empty queues.
121 |     """
122 | 
123 |     def get_queue(self, *args, **kwargs):
124 |         queue = self.queue_info.get_highest_priority_queue_that_needs_work()
125 |         if queue:
126 |             return queue
127 | 
128 |         return self._run_lottery(self.queue_info.get_queues())
129 | 
130 | 
131 | class LotteryLottery(Lottery):
132 |     """Run lottery on both non-empty and empty queues.
133 | 
134 |     Run lottery on all queues. When we get an non-empty queue, return
135 |     immediately. If we get 10 empty queues in a row, run lottery again,
136 |     and long poll on whatever queue we get.
137 |     """
138 | 
139 |     def get_queue(self, *args, **kwargs):
140 |         retry_empty_queue_count = 10
141 | 
142 |         for i in range(retry_empty_queue_count):
143 |             queue = self._run_lottery(self.queue_info.get_queues())
144 |             if self.queue_info.does_queue_need_work(queue):
145 |                 return queue
146 |         return self._run_lottery(self.queue_info.get_queues())
147 | 
148 | 
149 | class ReducedLottery(Lottery):
150 |     """Improved lottery scheduling.
151 | 
152 |     Limiting the lottery pool by removing known empty queues. When we get an
153 |     non-empty queue, return immediately. If we get an empty queue, we'll remove
154 |     this empty queue out of the lottery pool and rerun lottery again. If all
155 |     queues are empty, run lottery on all queues, and long poll on whatever
156 |     queue we get.
157 |     """
158 | 
159 |     def get_queue(self, *args, **kwargs):
160 |         # Make a new copy of list, so no side effect on queue_info.queues
161 |         candidate_queues = self.queue_info.get_queues()[:]
162 | 
163 |         while len(candidate_queues) > 0:
164 |             queue = self._run_lottery(candidate_queues)
165 |             if self.queue_info.does_queue_need_work(queue):
166 |                 return queue
167 |             else:
168 |                 candidate_queues.remove(queue)
169 |         return self._run_lottery(self.queue_info.get_queues())
170 | 


--------------------------------------------------------------------------------
/kale/tests/test_crypt.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import unittest
  3 | 
  4 | from kale import crypt
  5 | 
  6 | __author__ = 'Aaron Webber (aaron@nextdoor.com)'
  7 | 
  8 | 
  9 | class Crypt2Test(unittest.TestCase):
 10 | 
 11 |     def setUp(self):
 12 |         self.msgs_encrypt = [
 13 |             (b'12345', 'hU6aBS2mJgr0DUrUiHQouA=='),
 14 |             (b'adasdfasdfasdfasdf', 'Y/VR7vs4e2arRx8C6EBGxZanBnCrxVik3257tMlqomM='),
 15 |             (b'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
 16 |              'C9pEG6g8ge76xt2q9XLbpwvaRBuoPIHu+sbdqvVy26ccNo37r4pWAD/7HiC77bI+'),
 17 |             (b's', 'gPm/2E+m/YMX16Yxg81wqA=='),
 18 |             (b'', '1lcHEDp3HufBy14CHkSlVw=='),
 19 |             (b'1616161616161616', 'wqx+H/v7iFtW7acYb9QoQ9ZXBxA6dx7nwcteAh5EpVc='),
 20 |             (b'\x8a\x004\xaf\xff', 'cVz2i16U1c3ztu2GwMhtvg=='),
 21 |             (b'\xff\xfe\xfa\xee\x014\xab\xff\xfe\xfa\xee\x014\xab\xff\xfe\xfa\xee\x014\xab',
 22 |              'I6XMloWUi3GQwjq5kP15Z3EI+LIV3Ygsda2FiSIgtM8='),
 23 |             (b'\xe9\x80\x99\xe6\x98\xaf\xe4\xb8\x80\xe5\x80\x8bUnicode\xe6\xb8\xac\xe8\xa9\xa6',
 24 |              'XwjwdL+rHdD6pHr/eUckPJUC7jBbc0LUr8g7f7FffBI='),
 25 |             (b'\x00\x00\x00\x00\x00\x00\x00', '/b/++aGOO0Kj2e2+YWadsQ==')
 26 |         ]
 27 | 
 28 |     @property
 29 |     def msgs(self):
 30 |         return [x[0] for x in self.msgs_encrypt]
 31 | 
 32 |     def test_get_padding_bytes(self):
 33 |         padding_bytes = crypt._get_padding_bytes(3)
 34 |         self.assertEqual(13, len(padding_bytes))
 35 |         self.assertEqual(13, int(padding_bytes[12]))
 36 | 
 37 |         padding_bytes = crypt._get_padding_bytes(12)
 38 |         self.assertEqual(4, len(padding_bytes))
 39 |         self.assertEqual(4, int(padding_bytes[3]))
 40 | 
 41 |         padding_bytes = crypt._get_padding_bytes(24)
 42 |         self.assertEqual(8, len(padding_bytes))
 43 |         self.assertEqual(8, int(padding_bytes[7]))
 44 | 
 45 |         # msgs that are multiples of 16 long are padded with 16 bytes
 46 |         padding_bytes = crypt._get_padding_bytes(16)
 47 |         self.assertEqual(16, len(padding_bytes))
 48 |         self.assertEqual(16, int(padding_bytes[15]))
 49 | 
 50 |         padding_bytes = crypt._get_padding_bytes(32)
 51 |         self.assertEqual(16, len(padding_bytes))
 52 |         self.assertEqual(16, int(padding_bytes[15]))
 53 | 
 54 |     def test_pad(self):
 55 |         # This would probably be nicer with those nose test generators
 56 |         [self._assert_padded_right(msg) for msg in self.msgs]
 57 | 
 58 |     def _assert_padded_right(self, test_msg):
 59 |         padded_msg = crypt._pad(test_msg)
 60 |         self.assertEqual(0, len(padded_msg) % crypt.BLOCK_SIZE)
 61 | 
 62 |     def test_unpad(self):
 63 |         for msg in self.msgs:
 64 |             padded_msg = crypt._pad(msg)
 65 |             self.assertEqual(msg, crypt._unpad(padded_msg))
 66 | 
 67 |     def test_crypt(self):
 68 |         for msg in self.msgs:
 69 |             self.assertNotEqual(msg, crypt.encrypt(msg))
 70 | 
 71 |     def test_decrypt(self):
 72 |         for msg in self.msgs:
 73 |             crypted_msg = crypt.encrypt(msg)
 74 |             self.assertEqual(msg, crypt.decrypt(crypted_msg))
 75 | 
 76 |     def test_urlsafe_crypt(self):
 77 |         [self.assertNotEqual(msg, crypt.urlsafe_encrypt(msg)) for msg in self.msgs]
 78 | 
 79 |     def test_urlsafe_decrypt(self):
 80 |         for msg in self.msgs:
 81 |             crypted_msg = crypt.urlsafe_encrypt(msg)
 82 |             self.assertEqual(msg, crypt.urlsafe_decrypt(crypted_msg))
 83 | 
 84 |     def test_hex_crypt(self):
 85 |         [self.assertNotEqual(msg, crypt.hex_encrypt(msg)) for msg in self.msgs]
 86 | 
 87 |     def text_hex_decrypt(self):
 88 |         for msg in self.msgs:
 89 |             crypted_msg = crypt.hex_encrypt(msg)
 90 |             self.assertEqual(msg, crypt.hex_decrypt(crypted_msg))
 91 | 
 92 |     def test_crypt_order_doesnt_matter(self):
 93 |         # This is effectively a test that we are using ECB (or some other non-chained)
 94 |         # mode of operation, because as long as we are it's fine to keep using the same
 95 |         # cipher object over and over. If we switched to a chained mode, this test would
 96 |         # break and we would need to create a new cipher object (and IV and/or nonce,
 97 |         # depending on mode) for every message.
 98 |         crypted_msgs = {msg: crypt.encrypt(msg) for msg in self.msgs}
 99 |         # Reset to a new cipher
100 |         crypt._set_cipher()
101 |         self.msgs.reverse()
102 |         crypted_in_reverse = {msg: crypt.encrypt(msg) for msg in self.msgs}
103 |         [self.assertEqual(crypted_msgs[msg],
104 |                           crypted_in_reverse[msg]) for msg in self.msgs]
105 | 
106 |     def test_bad_decryption_input(self):
107 |         """Test that we get CryptException when we pass bad input to decrypt functions."""
108 |         bad_messages = ['#$##$#$#$#$#',  # Just garbage
109 |                         'abcdefABCDEF00==',  # base64 encoded garbage
110 |                         'BYx3EzvPWTMLkBA5FF4xWw==',  # message with the padding byte > BLOCK_SIZE
111 |                         'Hn+Ou2DpitXxWD47glRtJw==',  # message with the padding byte > message size
112 |                         # more base64 encoded garbage
113 |                         'QkJCQkJCQkJBQUFBQUFBQWFiY19lZkFCQ0RFRiUlAAE='
114 |                         ]
115 | 
116 |         for f in [crypt.decrypt, crypt.hex_decrypt, crypt.urlsafe_decrypt]:
117 |             for msg in bad_messages:
118 |                 with self.assertRaises(crypt.CryptException):
119 |                     f(msg)
120 | 
121 |     def test_bad_encryption_input(self):
122 |         """Test that we raise ValueError when we are passed bad input to encrypt functions."""
123 |         bad_input = [crypt.CryptException(),  # not a str
124 |                      123456,  # also not a str
125 |                      u'測試測試',  # also not a str
126 |                      ]
127 | 
128 |         for f in [crypt.encrypt, crypt.hex_encrypt, crypt.urlsafe_encrypt]:
129 |             for msg in bad_input:
130 |                 with self.assertRaises(ValueError):
131 |                     f(msg)
132 | 
133 |     def test_backwards_compatible(self):
134 |         """Test that we still decrypt any existing messages correctly."""
135 |         for msg, encrypted in self.msgs_encrypt:
136 |             self.assertEqual(msg, crypt.decrypt(encrypted))
137 | 


--------------------------------------------------------------------------------
/kale/tests/test_queue_selector.py:
--------------------------------------------------------------------------------
  1 | """Tests queue_selector.py"""
  2 | from __future__ import absolute_import
  3 | 
  4 | import unittest
  5 | 
  6 | from kale import queue_info
  7 | from kale import queue_selector
  8 | 
  9 | 
 10 | class MultiQueueInfo(queue_info.QueueInfoBase):
 11 |     def get_queues(self):
 12 |         return [queue_info.TaskQueue(name='queue1', priority=100),
 13 |                 queue_info.TaskQueue(name='queue2', priority=50),
 14 |                 queue_info.TaskQueue(name='queue3', priority=1)]
 15 | 
 16 |     def does_queue_need_work(self, queue):
 17 |         return not self.is_queue_empty(queue)
 18 | 
 19 |     def is_queue_empty(self, queue):
 20 |         if queue.name == 'queue2':
 21 |             return False
 22 |         return True
 23 | 
 24 |     def get_highest_priority_queue_that_needs_work(self):
 25 |         return self.get_queues()[0]
 26 | 
 27 | 
 28 | class SingleQueueInfo(queue_info.QueueInfoBase):
 29 |     def get_queues(self):
 30 |         return [queue_info.TaskQueue(name='queue1', priority=99)]
 31 | 
 32 | 
 33 | class NoQueueInfo(queue_info.QueueInfoBase):
 34 |     def get_queues(self):
 35 |         return []
 36 | 
 37 | 
 38 | class MultiQueueNoPriorityInfo(MultiQueueInfo):
 39 |     def get_highest_priority_queue_that_needs_work(self):
 40 |         return None
 41 | 
 42 | 
 43 | class BadQueueInfo(queue_info.QueueInfoBase):
 44 |     def get_queues(self):
 45 |         return [queue_info.TaskQueue(name='queue1', priority=101),
 46 |                 queue_info.TaskQueue(name='queue2', priority=0)]
 47 | 
 48 | 
 49 | class SelectQueueBaseTest(unittest.TestCase):
 50 |     """Tests for SelectQueueBase class."""
 51 | 
 52 |     def get_queue_test(self):
 53 |         # Get any one queue from multiple queues
 54 |         selector = queue_selector.SelectQueueBase(MultiQueueInfo())
 55 |         with self.assertRaises(NotImplementedError):
 56 |             selector.get_queue()
 57 | 
 58 | 
 59 | class RandomQueueTest(unittest.TestCase):
 60 |     """Tests for Random class."""
 61 | 
 62 |     def get_queue_test(self):
 63 |         # Get any one queue from multiple queues
 64 |         queue = queue_selector.Random(MultiQueueInfo()).get_queue()
 65 |         self.assertTrue(queue.name in ['queue1', 'queue2', 'queue3'])
 66 | 
 67 | 
 68 | class HighestPriorityFirstTest(unittest.TestCase):
 69 |     """Tests for HighestPriorityFirst class."""
 70 | 
 71 |     def get_queue_test(self):
 72 |         # Get any one queue from multiple queues
 73 |         queue = queue_selector.HighestPriorityFirst(
 74 |             MultiQueueInfo()).get_queue()
 75 |         self.assertEqual(queue.name, 'queue1')
 76 | 
 77 |     def get_queue_test_no_priority(self):
 78 |         # Get any one queue from multiple queues
 79 |         queue = queue_selector.HighestPriorityFirst(
 80 |             MultiQueueNoPriorityInfo()).get_queue()
 81 |         self.assertTrue(queue.name in ['queue1', 'queue2', 'queue3'])
 82 | 
 83 | 
 84 | class LotteryTest(unittest.TestCase):
 85 |     """Tests for Lottery class."""
 86 | 
 87 |     def run_lottery_test(self):
 88 |         queue = queue_selector.Lottery._run_lottery(
 89 |             MultiQueueInfo().get_queues())
 90 |         self.assertTrue(queue.name in ['queue1', 'queue2', 'queue3'])
 91 | 
 92 |         queue = queue_selector.Lottery._run_lottery(
 93 |             SingleQueueInfo().get_queues())
 94 |         self.assertEqual(queue.name, 'queue1')
 95 | 
 96 |         queue = queue_selector.Lottery._run_lottery(NoQueueInfo().get_queues())
 97 |         self.assertIsNone(queue)
 98 | 
 99 |         queue = queue_selector.Lottery._run_lottery(
100 |             BadQueueInfo().get_queues())
101 |         self.assertIsNone(queue)
102 | 
103 |     def get_queue_test(self):
104 |         # Get any one queue from multiple queues
105 |         selector = queue_selector.Lottery(MultiQueueInfo())
106 |         queue = selector.get_queue()
107 |         self.assertTrue(queue.name in ['queue1', 'queue2', 'queue3'])
108 | 
109 | 
110 | class ReducedLotteryTest(unittest.TestCase):
111 |     """Tests for ReducedLottery class."""
112 | 
113 |     def get_queue_test(self):
114 |         selector = queue_selector.ReducedLottery(MultiQueueInfo())
115 |         queue = selector.get_queue()
116 |         self.assertEqual(queue.name, 'queue2')
117 | 
118 | 
119 | class HighestPriorityLotteryTest(unittest.TestCase):
120 |     """Tests for HighestPriorityLottery class."""
121 | 
122 |     def run_lottery_test(self):
123 |         queue = queue_selector.HighestPriorityLottery._run_lottery(
124 |             MultiQueueInfo().get_queues())
125 |         self.assertTrue(queue.name in ['queue1', 'queue2', 'queue3'])
126 | 
127 |         queue = queue_selector.HighestPriorityLottery._run_lottery(
128 |             SingleQueueInfo().get_queues())
129 |         self.assertEqual(queue.name, 'queue1')
130 | 
131 |         queue = queue_selector.HighestPriorityLottery._run_lottery(
132 |             NoQueueInfo().get_queues())
133 |         self.assertIsNone(queue)
134 | 
135 |         queue = queue_selector.HighestPriorityLottery._run_lottery(
136 |             BadQueueInfo().get_queues())
137 |         self.assertIsNone(queue)
138 | 
139 |     def get_queue_test(self):
140 |         # Get any one queue from multiple queues
141 |         selector = queue_selector.HighestPriorityLottery(MultiQueueInfo())
142 |         queue = selector.get_queue()
143 |         self.assertTrue(queue.name in ['queue1', 'queue2', 'queue3'])
144 | 
145 | 
146 | class LotteryLotteryTest(unittest.TestCase):
147 |     """Tests for LotteryLottery class."""
148 | 
149 |     def run_lottery_test(self):
150 |         queue = queue_selector.LotteryLottery._run_lottery(
151 |             MultiQueueInfo().get_queues())
152 |         self.assertTrue(queue.name in ['queue1', 'queue2', 'queue3'])
153 | 
154 |         queue = queue_selector.LotteryLottery._run_lottery(
155 |             SingleQueueInfo().get_queues())
156 |         self.assertEqual(queue.name, 'queue1')
157 | 
158 |         queue = queue_selector.LotteryLottery._run_lottery(
159 |             NoQueueInfo().get_queues())
160 |         self.assertIsNone(queue)
161 | 
162 |         queue = queue_selector.LotteryLottery._run_lottery(
163 |             BadQueueInfo().get_queues())
164 |         self.assertIsNone(queue)
165 | 
166 |     def get_queue_test(self):
167 |         # Get any one queue from multiple queues
168 |         selector = queue_selector.LotteryLottery(MultiQueueInfo())
169 |         queue = selector.get_queue()
170 |         self.assertTrue(queue.name in ['queue1', 'queue2', 'queue3'])
171 | 


--------------------------------------------------------------------------------
/kale/tests/test_publisher.py:
--------------------------------------------------------------------------------
  1 | """Module testing the kale.publisher module."""
  2 | from __future__ import absolute_import
  3 | 
  4 | import mock
  5 | import unittest
  6 | 
  7 | from kale import exceptions
  8 | from kale import message
  9 | from kale import publisher
 10 | from kale import settings
 11 | from kale import sqs
 12 | from kale import test_utils
 13 | 
 14 | 
 15 | class PublisherTestCase(unittest.TestCase):
 16 |     """Test publisher logic."""
 17 | 
 18 |     def test_publish(self):
 19 |         """Test publisher logic."""
 20 | 
 21 |         sqs_inst = sqs.SQSTalk()
 22 | 
 23 |         with mock.patch(
 24 |                 'kale.queue_info.QueueInfo.get_queue') as mock_get_queue:
 25 |             mock_queue = mock.MagicMock()
 26 |             mock_queue.visibility_timeout_sec = 10
 27 |             mock_get_queue.return_value = mock_queue
 28 |             mock_publisher = publisher.Publisher(sqs_inst)
 29 |             mock_publisher._get_or_create_queue = mock.MagicMock()
 30 |             payload = {'args': [], 'kwargs': {}}
 31 |             mock_task_class = mock.MagicMock()
 32 |             mock_task_class.time_limit = 2
 33 |             mock_task_class.__name__ = 'task'
 34 |             with mock.patch('kale.message.KaleMessage') as mock_message:
 35 |                 mock_message.create_message.return_value = mock.MagicMock()
 36 |                 mock_publisher.publish(mock_task_class, 1, payload)
 37 | 
 38 |     def test_publish_with_app_data(self):
 39 |         """Test publisher logic."""
 40 | 
 41 |         sqs_inst = sqs.SQSTalk()
 42 | 
 43 |         with mock.patch(
 44 |                 'kale.queue_info.QueueInfo.get_queue') as mock_get_queue:
 45 |             mock_queue = mock.MagicMock()
 46 |             mock_queue.visibility_timeout_sec = 10
 47 |             mock_get_queue.return_value = mock_queue
 48 |             mock_publisher = publisher.Publisher(sqs_inst)
 49 |             mock_publisher._get_or_create_queue = mock.MagicMock()
 50 |             payload = {'args': [], 'kwargs': {}, 'app_data': {}}
 51 |             mock_task_class = mock.MagicMock()
 52 |             mock_task_class.time_limit = 2
 53 |             mock_task_class.__name__ = 'task'
 54 |             with mock.patch('kale.message.KaleMessage') as mock_message:
 55 |                 mock_message.create_message.return_value = mock.MagicMock()
 56 |                 mock_publisher.publish(mock_task_class, 1, payload)
 57 | 
 58 |     def test_publish_messages_to_dead_letter_queue(self):
 59 |         """Test publisher to DLQ logic."""
 60 | 
 61 |         sqs_inst = sqs.SQSTalk()
 62 |         mock_publisher = publisher.Publisher(sqs_inst)
 63 |         mock_queue = mock.MagicMock()
 64 |         mock_publisher._get_or_create_queue = mock.MagicMock(
 65 |             return_value=mock_queue)
 66 | 
 67 |         payload = {'args': [], 'kwargs': {}}
 68 |         kale_msg = message.KaleMessage(
 69 |             task_class=test_utils.MockTask,
 70 |             task_id=test_utils.MockTask._get_task_id(),
 71 |             payload=payload,
 72 |             current_retry_num=5)
 73 |         kale_msg.id = 'test-id'
 74 |         test_body = 'test-body'
 75 |         kale_msg.encode = mock.MagicMock(return_value=test_body)
 76 |         mock_messages = [kale_msg]
 77 | 
 78 |         with mock.patch.object(mock_queue, 'send_messages') as mock_write:
 79 |             mock_publisher.publish_messages_to_dead_letter_queue(
 80 |                 'dlq_name', mock_messages)
 81 |             expected_args = [{'Id': kale_msg.id, 'MessageBody': test_body, 'DelaySeconds': 0}]
 82 |             mock_write.assert_called_once_with(Entries=expected_args)
 83 | 
 84 |     def test_publish_bad_time_limit_equal(self):
 85 |         """Test publish with bad time limit (equal to timeout)."""
 86 | 
 87 |         sqs_inst = sqs.SQSTalk()
 88 | 
 89 |         with mock.patch(
 90 |                 'kale.queue_info.QueueInfo.get_queue') as mock_get_queue:
 91 |             mock_queue = mock.MagicMock()
 92 |             mock_queue.visibility_timeout_sec = 600
 93 |             mock_get_queue.return_value = mock_queue
 94 |             mock_publisher = publisher.Publisher(sqs_inst)
 95 |             mock_publisher._get_or_create_queue = mock.MagicMock()
 96 |             payload = {'args': [], 'kwargs': {}}
 97 |             mock_task_class = mock.MagicMock()
 98 |             mock_task_class.time_limit = 600
 99 | 
100 |             with mock.patch('kale.message.KaleMessage') as mock_message:
101 |                 mock_message.create_message.return_value = mock.MagicMock()
102 |                 with self.assertRaises(
103 |                         exceptions.InvalidTimeLimitTaskException):
104 |                     mock_publisher.publish(mock_task_class, 1, payload)
105 | 
106 |     def test_publish_bad_time_limit_greater(self):
107 |         """Test publish with bad time limit (greater than timeout)."""
108 | 
109 |         sqs_inst = sqs.SQSTalk()
110 | 
111 |         with mock.patch(
112 |                 'kale.queue_info.QueueInfo.get_queue') as mock_get_queue:
113 |             mock_queue = mock.MagicMock()
114 |             mock_queue.visibility_timeout_sec = 600
115 |             mock_get_queue.return_value = mock_queue
116 |             mock_publisher = publisher.Publisher(sqs_inst)
117 |             mock_publisher._get_or_create_queue = mock.MagicMock()
118 |             payload = {'args': [], 'kwargs': {}}
119 |             mock_task_class = mock.MagicMock()
120 |             mock_task_class.time_limit = 601
121 |             with mock.patch('kale.message.KaleMessage') as mock_message:
122 |                 mock_message.create_message.return_value = mock.MagicMock()
123 |                 with self.assertRaises(
124 |                         exceptions.InvalidTimeLimitTaskException):
125 |                     mock_publisher.publish(mock_task_class, 1, payload)
126 | 
127 |     def test_publish_invalid_delay_sec(self):
128 |         """Test publish with invalid delay_sec value."""
129 | 
130 |         sqs_inst = sqs.SQSTalk()
131 | 
132 |         mock_publisher = publisher.Publisher(sqs_inst)
133 |         mock_publisher._get_or_create_queue = mock.MagicMock()
134 |         payload = {'args': [], 'kwargs': {}}
135 | 
136 |         mock_task_class = mock.MagicMock()
137 |         mock_task_class.time_limit = 2
138 | 
139 |         delay_sec = settings.SQS_MAX_TASK_DELAY_SEC + 1
140 |         with mock.patch('kale.message.KaleMessage') as mock_message:
141 |             mock_message.create_message.return_value = mock.MagicMock()
142 |             with self.assertRaises(exceptions.InvalidTaskDelayException):
143 |                 mock_publisher.publish(mock_task_class, 1, payload,
144 |                                        delay_sec=delay_sec)
145 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/kale.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/kale.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/kale"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/kale"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\kale.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\kale.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/kale/crypt.py:
--------------------------------------------------------------------------------
  1 | """Simple package for encrypting/decrypting strings.
  2 | 
  3 | This package provides a simple mechanism for encrypting and decrypting strings
  4 | very quickly using a private key. For simplicity, this key is stored in the
  5 | settings module and is used globally here -- that is, every message encrypted
  6 | with these functions will be encrypted with the same key.
  7 | 
  8 | This package is very simple. If you choose to encrypt data one day, and not
  9 | encrypt it the next day, you need to handle the failure scenarios.
 10 | 
 11 | Usage ::
 12 | 
 13 |     from kale import crypt
 14 | 
 15 |     encrypted_message = crypt.encrypt("foo")
 16 |     decrypted_message = crypt.decrypt(encrypted_message)
 17 | 
 18 | """
 19 | from __future__ import absolute_import
 20 | 
 21 | import base64
 22 | import builtins
 23 | import codecs
 24 | import logging
 25 | import struct
 26 | 
 27 | from Crypto.Cipher import AES
 28 | import six
 29 | 
 30 | from kale import settings
 31 | 
 32 | log = logging.getLogger(__name__)
 33 | 
 34 | # Set up our Crypto hashing object
 35 | BLOCK_SIZE = 16
 36 | 
 37 | 
 38 | def _set_cipher():
 39 |     global cipher
 40 |     cipher = AES.new(builtins.bytes(settings.UTIL_CRYPT_CIPHER, 'ascii'), AES.MODE_ECB)
 41 | 
 42 | 
 43 | # Get our cipher key from the settings file
 44 | cipher = None
 45 | if settings.UTIL_CRYPT_CIPHER:
 46 |     _set_cipher()
 47 | 
 48 | 
 49 | class CryptException(Exception):
 50 |     """General encryption/decryption failure exception."""
 51 | 
 52 | 
 53 | def encrypt(msg):
 54 |     """Encrypts a message.
 55 | 
 56 |     :param msg: byte string message to be encrypted.
 57 |     :return: bytes for an encrypted version of msg.
 58 |     :raises: ValueError if passed anything other than bytes.
 59 |         CryptException if the encryption fails.
 60 |     """
 61 |     if not cipher:
 62 |         return msg
 63 |     if not isinstance(msg, six.binary_type):
 64 |         raise ValueError('only bytes can be encrypted')
 65 |     if six.PY2:
 66 |         msg = builtins.bytes(msg)
 67 | 
 68 |     msg = _pad(msg)
 69 |     msg = cipher.encrypt(msg)
 70 |     msg = base64.b64encode(msg)
 71 | 
 72 |     return msg
 73 | 
 74 | 
 75 | def decrypt(msg):
 76 |     """Decrypts a message.
 77 | 
 78 |     :param bytes msg: string of message to be decrypted.
 79 |     :return: bytes for original message.
 80 |     :rtype: bytes
 81 |     """
 82 |     if not cipher:
 83 |         return msg
 84 | 
 85 |     if isinstance(msg, six.text_type):
 86 |         # This should be a base64 string, so it
 87 |         # should encode to ascii without any problems.
 88 |         msg = msg.encode('ascii')
 89 | 
 90 |     if not isinstance(msg, six.binary_type):
 91 |         raise ValueError('Only bytes(or unicodes) can be decrypted')
 92 | 
 93 |     if six.PY2:
 94 |         msg = builtins.bytes(msg)
 95 | 
 96 |     try:
 97 |         msg = _unpad(cipher.decrypt(base64.b64decode(msg)))
 98 |     except (ValueError, TypeError) as e:
 99 |         # We can get struct.error if we end up passing an empty string
100 |         # to _unpad. We get
101 |         raise CryptException(e)
102 | 
103 |     return msg
104 | 
105 | 
106 | def urlsafe_encrypt(msg):
107 |     """Urlsafe encrypts a message.
108 | 
109 |     :param bytes msg: string message to be encrypted.
110 |     :return: string of encrypted version of msg.
111 |     :rtype: bytes
112 |     """
113 |     if not cipher:
114 |         return msg
115 |     if not isinstance(msg, six.binary_type):
116 |         raise ValueError('only bytes can be encrypted')
117 | 
118 |     msg = base64.urlsafe_b64encode(cipher.encrypt(_pad(msg)))
119 | 
120 |     return msg
121 | 
122 | 
123 | def hex_encrypt(msg):
124 |     """Hex encrypts a message.
125 | 
126 |     :param bytes msg: string message to be encrypted.
127 |     :return: string for encrypted version of msg in hex.
128 |     :rtype: bytes
129 |     """
130 |     if not cipher:
131 |         return msg
132 |     if not isinstance(msg, six.binary_type):
133 |         raise ValueError('only bytes can be encrypted')
134 | 
135 |     msg = cipher.encrypt(_pad(msg))
136 |     msg = codecs.encode(msg, 'hex')
137 | 
138 |     return msg
139 | 
140 | 
141 | def hex_decrypt(msg):
142 |     """Decrypts a message.
143 | 
144 |     :param bytes msg: string for the message to be decrypted.
145 |     :return: string for the original message.
146 |     :rtype: bytes
147 |     """
148 |     if not cipher:
149 |         return msg
150 | 
151 |     if isinstance(msg, six.text_type):
152 |         # This should be a hex encoded string, so it
153 |         # should encode to ascii without any problems.
154 |         msg = msg.encode('ascii')
155 | 
156 |     if not isinstance(msg, six.binary_type):
157 |         raise ValueError('Only bytes or unicodes can be decrypted')
158 | 
159 |     try:
160 |         msg = _unpad(cipher.decrypt(codecs.decode(msg, 'hex')))
161 |     except (ValueError, TypeError) as e:
162 |         raise CryptException(e)
163 | 
164 |     return msg
165 | 
166 | 
167 | def urlsafe_decrypt(msg):
168 |     """Urlsafe decrypts a message.
169 | 
170 |     :param bytes msg: string for the message to be decrypted.
171 |     :return: string for the original message.
172 |     :rtype: bytes
173 |     """
174 | 
175 |     if not cipher:
176 |         return msg
177 | 
178 |     if isinstance(msg, six.text_type):
179 |         # This should be a base64 encoded string, so it
180 |         # should encode to ascii without any problems.
181 |         msg = msg.encode('ascii')
182 | 
183 |     if not isinstance(msg, six.binary_type):
184 |         raise ValueError('Only bytes(or unicodes) can be decrypted')
185 | 
186 |     try:
187 |         msg = _unpad(cipher.decrypt(base64.urlsafe_b64decode(msg)))
188 |     except (ValueError, TypeError) as e:
189 |         raise CryptException(e)
190 | 
191 |     return msg
192 | 
193 | 
194 | def _pad(msg):
195 |     """Pad the message with enough bytes to be a multiple of BLOCK_SIZE.
196 | 
197 |     :param bytes msg: bytes message to be padded.
198 |     :return: the msg with padding added.
199 |     :rtype: bytes
200 |     """
201 | 
202 |     padding_bytes = _get_padding_bytes(len(msg))
203 |     return msg + padding_bytes
204 | 
205 | 
206 | def _get_padding_bytes(msg_length):
207 |     """Gets the bytes to pad a message of a given length with.
208 | 
209 |     We pad out with null bytes until we are one short of being
210 |     a multiple of BLOCK_SIZE. The last byte is always the number
211 |     of bytes we padded out(including the last one). We always apply
212 |     some padding in order to be able to consistently remove the padding,
213 |     so if we would apply no padding, we add padding of
214 |     BLOCK_SIZE.
215 | 
216 |     :param msg_length: the length of the message to pad.
217 |     :return: the bytes that should be appended to the message to pad it.
218 |     """
219 | 
220 |     bytes_to_pad = BLOCK_SIZE - msg_length % BLOCK_SIZE
221 |     if bytes_to_pad == 0:
222 |         bytes_to_pad = BLOCK_SIZE
223 |     pack_format = '%sB' % ('x' * (bytes_to_pad - 1))
224 |     msg = struct.pack(pack_format, bytes_to_pad)
225 |     return builtins.bytes(msg)
226 | 
227 | 
228 | def _unpad(msg):
229 |     """Removes the padding we applied to the message with _pad.
230 | 
231 |     We read the last byte of the message as an integer
232 |     and strip that many bytes off the end of the message. Note that
233 |     reading as a regular integer(i.e. struct.unpack('i')) would
234 |     read multiple bytes off the end which would be bad if,
235 |     for instance, we only had one byte of padding.
236 | 
237 |     :param msg: bytes or unicode msg that needs to have padding stripped from it.
238 |     :return: bytes for the message with the padding removed.
239 |     """
240 |     if not msg or len(msg) < BLOCK_SIZE:
241 |         raise ValueError('decrypted message was not padded correctly')
242 |     msg = builtins.bytes(msg)
243 |     bytes_to_strip = int(msg[-1])
244 |     # If we are trying to strip off more than BLOCK_SIZE bytes,
245 |     # or more bytes than there are in the msg,
246 |     # something has gone wrong, likely the msg was corrupt.
247 |     if bytes_to_strip > BLOCK_SIZE or bytes_to_strip > len(msg):
248 |         raise ValueError('decrypted message was not padded correctly')
249 |     return msg[:-bytes_to_strip]
250 | 


--------------------------------------------------------------------------------
/kale/queue_info.py:
--------------------------------------------------------------------------------
  1 | """Get information of queues.
  2 | 
  3 | We can have different ways of getting queue information, e.g., from Zookeeper,
  4 |  from hardcoded data, from config file, ...
  5 | 
  6 | Thus, we need to extend the base class QueueInfoBase for different
  7 | implementations and use the right implementation in different cases.
  8 | """
  9 | from __future__ import absolute_import
 10 | 
 11 | import botocore.exceptions
 12 | 
 13 | import six
 14 | import yaml
 15 | 
 16 | 
 17 | class TaskQueue(object):
 18 |     """Represents a task queue. Always created via QueueInfo.
 19 | 
 20 | 
 21 |     :param str name: string for queue name.
 22 |     :param int priority: integer for queue priority.
 23 |     :param int num_iterations: integer for number of iterations to process
 24 |         tasks for a select_queue() call. Number of iterations to process
 25 |         tasks for a select_queue() call. Two reasons:
 26 |             1. Calling select_queue() has overhead, so we want to reuse
 27 |                the chosen queue for several rounds of tasks processing.
 28 |             2. SQS has limit of 10 tasks per batch. Using multiple
 29 |                iterations here simulates a larger batch, e.g., 20.
 30 |     :param int long_poll_time_sec: integer for number of seconds waiting for
 31 |         long poll. For SQS, if a queue is empty when task worker wants to
 32 |         fetch tasks from it, the task worker can wait for quite a while,
 33 |         hoping tasks would appear in the queue. This is to avoid costly
 34 |         connection reestablishment.
 35 |     :param int batch_size: integer for number of tasks to fetch in a batch.
 36 |     :param int visibility_timeout_sec: integer for the max time for a task that
 37 |             are invisible in an SQS queue. How to decide this property?
 38 |             - T: Average running time of a task.
 39 |             - N: Number of tasks in a batch that are fetched at once by
 40 |                  the worker.
 41 |             - F: Just to give us more headroom. E.g., 1.2
 42 |             => visibility_timeout = F * (T * N)
 43 |     """
 44 | 
 45 |     def __init__(self, name='default', priority=5, num_iterations=2,
 46 |                  long_poll_time_sec=5, batch_size=5,
 47 |                  visibility_timeout_sec=600):
 48 |         self.simple_name = name
 49 |         (self.name, self.dlq_name) = self._decorate_name(name)
 50 |         self.priority = priority
 51 |         self.num_iterations = num_iterations
 52 |         self.batch_size = batch_size
 53 |         self.long_poll_time_sec = long_poll_time_sec
 54 |         self.visibility_timeout_sec = visibility_timeout_sec
 55 | 
 56 |     def _decorate_name(self, name):
 57 |         """Decorate queue name.
 58 | 
 59 |         For example, we want to prefix RELEASE version on each queue name.
 60 |         The default implementation is to do nothing.
 61 | 
 62 |         :param str name: string for queue name.
 63 |         :return: A 2-tuple (queue name, dead letter queue name).
 64 |         :rtype: tuple
 65 |         """
 66 |         return (name, 'dlq-' + name)
 67 | 
 68 | 
 69 | class QueueInfoBase(object):
 70 |     """Base class to represent Queue information.
 71 | 
 72 |     Any concrete class should implement get_queues method.
 73 |     """
 74 | 
 75 |     def get_queues(self):
 76 |         """Returns a list of TaskQueue objects."""
 77 |         raise NotImplementedError('Base class cannot be used directly.')
 78 | 
 79 |     def get_highest_priority_queue_that_needs_work(self):
 80 |         """Returns the highest-priority non-empty queue."""
 81 |         raise NotImplementedError('Base class cannot be used directly.')
 82 | 
 83 |     def is_queue_empty(self, queue):
 84 |         """Check if a queue is empty.
 85 | 
 86 |         :param TaskQueue queue: A TaskQueue object.
 87 |         :return: True if the queue is empty; otherwise, queue is non-empty.
 88 |         :rtype: bool
 89 |         """
 90 |         raise NotImplementedError('Base class cannot be used directly.')
 91 | 
 92 |     def does_queue_need_work(self, queue):
 93 |         """Checks if a queue should be worked on.
 94 | 
 95 |         :param TaskQueue queue: a TaskQueue object.
 96 |         :return: True if the queue needs work; False otherwise.
 97 |         :rtype: bool
 98 |         """
 99 |         raise NotImplementedError('Base class cannot be used directly.')
100 | 
101 | 
102 | class QueueInfo(QueueInfoBase):
103 |     """Provides task queue info."""
104 | 
105 |     _queues = None
106 |     _simple_name_queues_map = None
107 | 
108 |     def __init__(self, config_file, sqs_talk, queue_cls=TaskQueue):
109 |         """Instantiate new QueueInfo object.
110 | 
111 |         :param str config_file: String of config file path.
112 |         :param SQSTalk sqs_talk: An SQSTalk object.
113 |         :param TaskQueue queue_cls: Class (or subclass) of TaskQueue.
114 |         """
115 | 
116 |         # Initialize singleton if needed
117 |         if not QueueInfo._queues:
118 |             QueueInfo._queues = self._get_queues_from_config(config_file,
119 |                                                              queue_cls)
120 | 
121 |         if not QueueInfo._simple_name_queues_map:
122 |             QueueInfo._simple_name_queues_map = {}
123 |             for queue in QueueInfo._queues:
124 |                 QueueInfo._simple_name_queues_map[queue.simple_name] = queue
125 | 
126 |         self._sqs_talk = sqs_talk
127 | 
128 |     def get_queue(self, simple_name):
129 |         """Get queue object by simple name.
130 | 
131 |         :param str simple_name: string of queue simple name.
132 |         :return: a TaskQueue object.
133 |         :rtype: TaskQueue
134 |         """
135 |         return self._simple_name_queues_map[simple_name]
136 | 
137 |     def get_queues(self):
138 |         """Returns a list of TaskQueue objs sorted by priority (highest first).
139 |         """
140 |         return self._queues
141 | 
142 |     def get_highest_priority_queue_that_needs_work(self):
143 |         """Returns the highest-priority queue that needs work.
144 | 
145 |         If all queues are empty, then None is returned.
146 |         """
147 |         for queue in self._queues:
148 |             if self.does_queue_need_work(queue):
149 |                 return queue
150 |         return None
151 | 
152 |     def is_queue_empty(self, queue):
153 |         """Check if a queue is empty.
154 | 
155 |         :param TaskQueue queue: a TaskQueue object.
156 |         :return: True if the queue is empty; otherwise, queue is non-empty.
157 |         :rtype: bool
158 |         """
159 |         sqs_queue = self._sqs_talk._get_or_create_queue(queue.name)
160 | 
161 |         # Call load to get updated attributes. Note this makes a
162 |         # network call.
163 |         sqs_queue.load()
164 | 
165 |         count_str = sqs_queue.attributes.get('ApproximateNumberOfMessages')
166 |         if int(count_str) > 0:
167 |             return False
168 |         return True
169 | 
170 |     def does_queue_need_work(self, queue):
171 |         """Checks if a queue should be worked on.
172 | 
173 |         This basically checks whether the queue is empty. However,
174 |         if we hit a SQS rate limit, this will assume the queue needs work.
175 | 
176 |         :param TaskQueue queue: a TaskQueue object.
177 |         :return: True if the queue needs work; False otherwise.
178 |         :rtype: bool
179 |         """
180 |         try:
181 |             return not self.is_queue_empty(queue)
182 |         except botocore.exceptions.ClientError as e:
183 |             if e.response['Error']['Code'] == 'ThrottlingException':
184 |                 return True
185 |             raise e
186 | 
187 |     @classmethod
188 |     def _get_queues_from_config(cls, config_file, queue_cls):
189 |         """Parses config file and returns queues.
190 | 
191 |         :param str config_file: String for the path of yaml config file for
192 |             queues.
193 |         :param TaskQueue queue_cls: Class (or subclass) of TaskQueue.
194 |         :return: A list of TaskQueue queue objects sorted by priority.
195 |             Highest priority first.
196 |         :rtype: list[TaskQueue]
197 |         """
198 |         with open(config_file, 'r') as fp:
199 |             queues_from_config = yaml.safe_load(fp)
200 |             queues = []
201 |             for queue_name, queue in six.iteritems(queues_from_config):
202 |                 q = queue_cls(name=queue['name'], priority=queue['priority'],
203 |                               num_iterations=queue['num_iterations'],
204 |                               long_poll_time_sec=queue['long_poll_time_sec'],
205 |                               batch_size=queue['batch_size'],
206 |                               visibility_timeout_sec=queue[
207 |                                   'visibility_timeout_sec'])
208 |                 queues.append(q)
209 |             # Sort by priority, highest priority first.
210 |             queues.sort(key=lambda x: x.priority, reverse=True)
211 |             return queues
212 | 


--------------------------------------------------------------------------------
/kale/message.py:
--------------------------------------------------------------------------------
  1 | """Custom message type for SQS messages."""
  2 | from __future__ import absolute_import
  3 | 
  4 | import pickle
  5 | 
  6 | import six
  7 | 
  8 | from kale import crypt
  9 | from kale import exceptions
 10 | from kale import settings
 11 | from kale import utils
 12 | 
 13 | _compressor = settings.COMPRESSOR
 14 | _decompressor = settings.DECOMPRESSOR
 15 | _task_size_limit = settings.SQS_TASK_SIZE_LIMIT
 16 | _get_current_timestamp = settings.TIMESTAMP_FUNC
 17 | _get_publisher_data = settings.PUBLISHER_STR_FUNC
 18 | 
 19 | 
 20 | class KaleMessage:
 21 |     """Kale message representing the data stored in an SQS queue."""
 22 | 
 23 |     # _task_mapper is a class cache mapping task paths to classes.
 24 |     # It will initially be populated with keys provided and will lazily
 25 |     # create other mappings.
 26 |     _task_mapper = None
 27 | 
 28 |     def __init__(self,
 29 |                  sqs_message_id=None,
 30 |                  sqs_receipt_handle=None,
 31 |                  sqs_queue_name=None,
 32 |                  task_class=None,
 33 |                  task_name=None,
 34 |                  task_id=None,
 35 |                  payload=None,
 36 |                  current_retry_num=None,
 37 |                  current_failure_num=None,
 38 |                  enqueued_time=None,
 39 |                  publisher_data=None,
 40 |                  instantiate_task=False,
 41 |                  delete_func=None
 42 |                  ):
 43 |         """Constructor.
 44 | 
 45 |         :param task_class: Class of task.
 46 |         :param task_name: Name of task. Required if task_class is not set.
 47 |         :param task_id: Id of task.
 48 |         :param payload: Payload holds the data that the task's run_task method will be called with.
 49 |         :param current_retry_num: Current task retry. This will be 0 from new tasks and will be
 50 |         incremented for each retry.
 51 |         :param current_failure_num: Current task failure. This will be 0 from new tasks and will be
 52 |         incremented for each failure.
 53 |         :param enqueued_time: Timestamp of when message was queued. If not provided then value set
 54 |         from setting's timestamp function.
 55 |         :param publisher_data: Str containing information about the publisher. If not provided the
 56 |         value from settings used.
 57 |         :param instantiate_task: Whether create instance of task_class. Default is false.
 58 |         :param delete_func: Delete function from the SQS message.
 59 | 
 60 |         """
 61 | 
 62 |         self._validate_task_payload(payload)
 63 |         retry_count = current_retry_num or 0
 64 |         failure_count = current_failure_num or 0
 65 | 
 66 |         self.id = sqs_message_id
 67 |         self.sqs_queue_name = sqs_queue_name
 68 |         self.sqs_receipt_handle = sqs_receipt_handle
 69 | 
 70 |         # This represents the path to the task. The consumer will have a
 71 |         # dictionary mapping these values to task classes.
 72 |         if task_class is not None:
 73 |             self.task_name = '.'.join([task_class.__module__, task_class.__name__])
 74 |         else:
 75 |             self.task_name = task_name
 76 | 
 77 |         self.task_id = task_id
 78 |         self.task_args = payload.get('args')
 79 |         self.task_kwargs = payload.get('kwargs')
 80 |         self.task_app_data = payload.get('app_data')
 81 |         self.task_retry_num = retry_count
 82 |         self.task_failure_num = failure_count
 83 |         self._enqueued_time = enqueued_time or _get_current_timestamp()
 84 |         self._publisher_data = publisher_data or _get_publisher_data()
 85 | 
 86 |         # Lazily instantiate the task mapper.
 87 |         if not self._task_mapper:
 88 |             self._task_mapper = {k: utils.class_import_from_path(v)
 89 |                                  for k, v in six.iteritems(settings.TASK_MAPPER)}
 90 | 
 91 |         # This will instantiate the task.
 92 |         if instantiate_task:
 93 |             self.task_inst = self._class_from_path(self.task_name)(self._get_message_body())
 94 | 
 95 |         self.delete_func = delete_func
 96 | 
 97 |     @staticmethod
 98 |     def _validate_task_payload(payload):
 99 |         """Validate that this is a valid task.
100 | 
101 |         :param payload: dictionary that will be submitted to the queue.
102 |         :raises: AssertionError if payload is invalid.
103 |         """
104 | 
105 |         assert 'args' in payload, 'args is required to be in the payload'
106 |         assert 'kwargs' in payload, 'kwargs is required to be in the payload'
107 | 
108 |     def _get_message_body(self):
109 |         message_body = {
110 |             'id': self.task_id,
111 |             'task': self.task_name,
112 |             # Payload holds the data that the task's run_task method will be
113 |             # called with.
114 |             # Ex: mytask.ThisTask().run_task(
115 |             # *payload['args'], **payload['kwargs'])
116 |             'payload': {
117 |                 'args': self.task_args,
118 |                 'kwargs': self.task_kwargs,
119 |                 'app_data': self.task_app_data,
120 |             },
121 |             '_enqueued_time': self._enqueued_time,
122 |             '_publisher': self._publisher_data,
123 |             'retry_num': self.task_retry_num,
124 |             'failure_num': self.task_failure_num,
125 |         }
126 |         return message_body
127 | 
128 |     def encode(self):
129 |         """Custom encoding for Kale tasks.
130 | 
131 |         :return: string for encoded message.
132 |         :rtype: str
133 |         """
134 | 
135 |         compressed_msg = _compressor(
136 |             pickle.dumps(self._get_message_body(), protocol=settings.PICKLE_PROTOCOL))
137 |         compressed_msg = crypt.encrypt(compressed_msg)
138 |         # Check compressed task size.
139 |         if len(compressed_msg) >= _task_size_limit:
140 |             raise exceptions.ChubbyTaskException(
141 |                 'Task %s is over the limit of %d bytes.' % (self.task_id,
142 |                                                             _task_size_limit))
143 | 
144 |         return compressed_msg.decode("utf-8")
145 | 
146 |     @classmethod
147 |     def decode_sqs(cls, sqs_message):
148 |         """Custom decoding for Kale tasks from sqs messages
149 | 
150 |         :param boto3.resources.factory.sqs.Message sqs_message: message to decode.
151 | 
152 |         :return: a kale message
153 |         :rtype: KaleMessage
154 |         """
155 | 
156 |         message_body = crypt.decrypt(sqs_message.body)
157 |         message_body = pickle.loads(_decompressor(message_body))
158 |         # queue_url format is https://queue.amazonaws.com/<account id>/<queue name>
159 |         sqs_queue_name = sqs_message.queue_url.rsplit('/', 1)[1]
160 | 
161 |         msg = cls(
162 |             sqs_queue_name=sqs_queue_name,
163 |             sqs_message_id=sqs_message.message_id,
164 |             sqs_receipt_handle=sqs_message.receipt_handle,
165 |             task_id=message_body.get('id'),
166 |             task_name=message_body.get('task'),
167 |             payload=message_body.get('payload'),
168 |             enqueued_time=message_body.get('_enqueued_time'),
169 |             publisher_data=message_body.get('_publisher'),
170 |             current_retry_num=message_body.get('retry_num'),
171 |             current_failure_num=message_body.get('failure_num'),
172 |             instantiate_task=True,
173 |             delete_func=sqs_message.delete
174 |         )
175 | 
176 |         return msg
177 | 
178 |     @classmethod
179 |     def decode_str(cls, message_str):
180 |         """Custom decoding for Kale tasks from strings
181 | 
182 |         :param str message_str: message to decode.
183 | 
184 |         :return: a kale message
185 |         :rtype: KaleMessage
186 |         """
187 | 
188 |         message_body = crypt.decrypt(message_str)
189 |         message_body = pickle.loads(_decompressor(message_body))
190 | 
191 |         msg = cls(
192 |             task_id=message_body.get('id'),
193 |             task_name=message_body.get('task'),
194 |             payload=message_body.get('payload'),
195 |             enqueued_time=message_body.get('_enqueued_time'),
196 |             publisher_data=message_body.get('_publisher'),
197 |             current_retry_num=message_body.get('retry_num'),
198 |             current_failure_num=message_body.get('failure_num')
199 |         )
200 | 
201 |         return msg
202 | 
203 |     def _class_from_path(self, task_path):
204 |         """Return the task class given a task's path.
205 | 
206 |         :param task_path: string for a class, e.g., mytask.MyTask
207 |         :return:
208 |         """
209 |         if task_path not in self._task_mapper:
210 |             task_class = utils.class_import_from_path(task_path)
211 |             self._task_mapper[task_path] = task_class
212 |         return self._task_mapper[task_path]
213 | 
214 |     def delete(self):
215 |         if self.delete_func is not None:
216 |             self.delete_func()
217 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Kale: Distributed task worker from Nextdoor
  2 | 
  3 | ![Apache](https://img.shields.io/hexpm/l/plug.svg) 
  4 | [![Build Status](https://travis-ci.org/Nextdoor/ndkale.svg?branch=master)](https://travis-ci.org/Nextdoor/ndkale)
  5 | 
  6 | Kale is a python task worker library that supports priority queues on Amazon SQS. 
  7 | 
  8 | Check out our blog post - [Nextdoor Taskworker: Simple, Efficient & Scalable](https://engblog.nextdoor.com/ac4f7886957b)
  9 | 
 10 | ## How does it work?
 11 | 
 12 | ![Kale-based Taskworker](https://cloud.githubusercontent.com/assets/1719237/16959018/e4fe6378-4d97-11e6-9903-d4f4f576524d.png)
 13 | 
 14 | Like other distributed task queue system, publishers send task messages to queues and workers fetch messages from queues. For now, Kale supports only Amazon SQS for the queue.
 15 | 
 16 | ### Publisher
 17 | 
 18 | A publisher can be any python program that imports a Kale-based task class and invokes the publish function of this class. For example, if a task class looks like this:
 19 | 
 20 |     # tasks.py
 21 |     
 22 |     class MyTask:
 23 |         def run_task(self, arg1, arg2, *args, **kwargs):
 24 |             # Do something
 25 |             
 26 | 
 27 | Then the publisher publishes a task to Amazon SQS, which normally takes 10s miliseconds to return:
 28 | 
 29 |     import tasks
 30 |     tasks.MyTask.publish(None, arg1, arg2)
 31 |    
 32 | The publish() function is a static method of a task class.  Other than the first parameter, which can usually be `None`, it has the same signiture as the `run_task()` method. A worker process, which may run on a different machine, will pick up the message and execute `run_task()` method of the task.
 33 | 
 34 | While ndkale is usable out of the box, the first parameter in `publish(app_data, *args, *kwargs)` is designed for more complex situations where certain state may need to be passed outside the context of the actual task parameters.  One example of this might be to pass the environment.  The `app_data` must be pickleable so that in can be encoded and inserted into the SQS message.
 35 | 
 36 | The default task object will be populated with an `app_data` attribute, but the default worker will not use it.  You will need to extend the default Worker or Task class to take advantage of `app_data`.
 37 | 
 38 | ### Worker
 39 | 
 40 | ![task lifecycle](https://cloud.githubusercontent.com/assets/1719237/16958964/b1a1c38a-4d97-11e6-9ea3-abdc86630732.png)
 41 | 
 42 | A worker process runs an infinite loop. For each iteration, it does the following things:
 43 | 
 44 | 1. It runs a **queue selection algorithm** (select_queue) to decides which queue to fetch tasks from;
 45 | 2. It fetches a batch of tasks from a queue (get_messages);
 46 | 3. It runs tasks one by one in the same batch (run_task);
 47 | 4. Finish up.
 48 |     1. If a task succeeded, it'll be deleted from the queue;
 49 |     2. If a task runs too long (exceeding **time_limit** that is a per task property for task SLA) or it fails,
 50 |       it'll be put back to the queue and other workers will pick it up in the future (if retry is allowed);
 51 |     3. If a batch of tasks runs too long, exceeding **visibility timeout** that is a per queue property for task
 52 |       batch SLA, then unfinished tasks will be put back to the queue and other workers will pick them up in the
 53 |       future.
 54 | 5. It exits this iteration and enters next iteration and repeat the above steps.
 55 | 
 56 | #### Queue Selection Algorithm
 57 | 
 58 | Code: kale/queue_selector.py
 59 | 
 60 | A good queue selection algorithm has these requirements:
 61 | 
 62 | 1. higher priority queues should have more chances to be selected than lower priority queues;
 63 | 2. it should not starve low priority queues;
 64 | 3. it should not send too many requests to SQS while retrieving no task, which is waste of
 65 |    compute resource and Amazon charges us by the number of requests.
 66 | 4. it should not wait on empty queues for too long, avoiding waste of compute resources.
 67 | 
 68 | We experimented and benchmarked quite a few queue selection algorithms. We end up using an
 69 | improved version of lottery algorithm, **ReducedLottery**, which fulfill the above requirements.
 70 | 
 71 | ReducedLottery works like this:
 72 | 
 73 |         Initialize the lottery pool with all queues
 74 |         while lottery pool is not empty:
 75 |             Run lottery based on queue priority to get a queue who wins the jackpot
 76 |             Short poll SQS to see if the selected queue is empty
 77 |             if the selected queue is not empty:
 78 |                 return queue
 79 |             else:
 80 |                 Remove this queue from the lottery pool
 81 |         Reset the lottery pool with all queues
 82 |         Return whatever queue who wins the jackpot
 83 | 
 84 | The beauty of **ReducedLottery**:
 85 | 
 86 | * It prefers higher priority queues, as higher priority queues get more lottery tickets and have
 87 |   higher chances to win the jackpot. Thus, requirement 1 is fulfilled.
 88 | * It uses randomness to avoid starvation. Lower priority queues still have chance to win the jackpot.
 89 |   Thus, requirement 2 is fulfilled.
 90 | * If the selected queue is empty, SQS will automatically let task worker long poll on the queue,
 91 |   avoiding sending too many requests (short polls). Thus, requirement 3 is fulfilled.
 92 | * It excludes known empty queues from the lottery pool. Only when all queues are empty can it returns
 93 |   an empty queue. So, it's unlikely to long poll on an empty queue. Thus, requirement 4 is fulfilled.
 94 | 
 95 | #### Settings
 96 | 
 97 | There are two types of settings, worker config and queue config.
 98 | 
 99 | ##### Worker config
100 | 
101 | Settings are specified in settings modules, including AWS confidentials, queues config, queue selection
102 | algorithm, ...
103 | 
104 | Settings modules are loaded in such order:
105 | 
106 | * kale.default_settings
107 | * the module specified via KALE\_SETTINGS\_MODULE environment variable
108 | 
109 | Here's an example
110 | 
111 |         import os
112 | 
113 |         AWS_REGION = 'us-west-2'
114 | 
115 |         #
116 |         # Production settings
117 |         # (use this for prod to talk to Amazon SQS)
118 | 
119 |         # MESSAGE_QUEUE_USE_PROXY = False
120 |         # AWS_ACCESS_KEY_ID = 'AWS KEY ID'
121 |         # AWS_SECRET_ACCESS_KEY = ''AWS SECRET KEY
122 | 
123 |         #
124 |         # Development settings
125 |         # (use this for dev to talk to ElasticMQ, which is SQS emulator)
126 | 
127 |         # Using elasticmq to emulate SQS locally
128 |         MESSAGE_QUEUE_USE_PROXY = True
129 |         MESSAGE_QUEUE_PROXY_PORT = 9324
130 |         MESSAGE_QUEUE_PROXY_HOST = os.getenv('MESSAGE_QUEUE_PROXY_HOST', '0.0.0.0')
131 |         AWS_ACCESS_KEY_ID = 'x'
132 |         AWS_SECRET_ACCESS_KEY = 'x'
133 | 
134 |         QUEUE_CONFIG = 'taskworker/queue_config.yaml'
135 | 
136 |         # SQS limits per message size, bytes
137 |         # It can be set anywhere from 1024 bytes (1KB), up to 262144 bytes (256KB).
138 |         # See http://aws.amazon.com/sqs/faqs/
139 |         SQS_TASK_SIZE_LIMIT = 256000
140 | 
141 |         QUEUE_SELECTOR = 'kale.queue_selector.ReducedLottery'
142 | 
143 | 
144 | Settings in the later modules overwrite those in the early-loaded modules.
145 | 
146 | ##### Queue config
147 | 
148 | All queues and their properties are in a queues config yaml file whose path is specified in the above
149 | settings modules.
150 | 
151 | Here's an example
152 | 
153 |         # task SLA: 60/10 = 6 seconds
154 |         high_priority:
155 |             name: high_priority
156 |             priority: 100
157 |             batch_size: 10
158 |             visibility_timeout_sec: 60
159 |             long_poll_time_sec: 1
160 |             num_iterations: 10
161 | 
162 |         # task SLA: 60 / 10 = 6 seconds
163 |             default:
164 |             name: default
165 |             priority: 40
166 |             batch_size: 10
167 |             visibility_timeout_sec: 60
168 |             long_poll_time_sec: 1
169 |             num_iterations: 5
170 | 
171 |         # task SLA: 60 / 10 = 6 seconds
172 |         low_priority:
173 |             name: low_priority
174 |             priority: 5
175 |             batch_size: 10
176 |             visibility_timeout_sec: 60
177 |             long_poll_time_sec: 5
178 |             num_iterations: 5
179 | 
180 | ## How to implement a distributed task worker system using Kale
181 | 
182 | ### Install kale
183 |     
184 | From source code
185 |     
186 |     python setup.py install
187 |     
188 | Using pip (from github repo)
189 | 
190 |     #
191 |     # Put this in requirements.txt, then run
192 |     #    pip install -r requirements.txt
193 |     #
194 | 
195 |     # If you want the latest build
196 |     git+https://github.com/Nextdoor/ndkale.git#egg=ndkale
197 | 
198 |     # Or put this if you want a specific commit
199 |     git+https://github.com/Nextdoor/ndkale.git@67f873ed7b0a8131cc8d72453d749ffb389d695f
200 |     
201 |     #
202 |     # Run from command line
203 |     #
204 | 
205 |     pip install -e git+https://github.com/Nextdoor/ndkale.git#egg=ndkale
206 | 
207 | (We'll upload the package to PyPI soon.)
208 | 
209 | ### Example implementation
210 | 
211 | See code in the example/ directory.
212 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | #
  4 | # kale documentation build configuration file, created by
  5 | # sphinx-quickstart on Wed Feb  4 15:15:06 2015.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | #sys.path.insert(0, os.path.abspath('.'))
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #needs_sphinx = '1.0'
 28 | 
 29 | # Add any Sphinx extension module names here, as strings. They can be
 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 31 | # ones.
 32 | extensions = [
 33 |     'sphinx.ext.autodoc',
 34 |     'sphinx.ext.intersphinx',
 35 | ]
 36 | 
 37 | # Add any paths that contain templates here, relative to this directory.
 38 | templates_path = ['_templates']
 39 | 
 40 | # The suffix of source filenames.
 41 | source_suffix = '.rst'
 42 | 
 43 | # The encoding of source files.
 44 | #source_encoding = 'utf-8-sig'
 45 | 
 46 | # The master toctree document.
 47 | master_doc = 'index'
 48 | 
 49 | # General information about the project.
 50 | project = u'kale'
 51 | copyright = u'2015, Nextdoor'
 52 | 
 53 | # The version info for the project you're documenting, acts as replacement for
 54 | # |version| and |release|, also used in various other places throughout the
 55 | # built documents.
 56 | #
 57 | # The short X.Y version.
 58 | version = '0.1.0'
 59 | # The full version, including alpha/beta/rc tags.
 60 | release = '0.1.0'
 61 | 
 62 | # The language for content autogenerated by Sphinx. Refer to documentation
 63 | # for a list of supported languages.
 64 | #language = None
 65 | 
 66 | # There are two options for replacing |today|: either, you set today to some
 67 | # non-false value, then it is used:
 68 | #today = ''
 69 | # Else, today_fmt is used as the format for a strftime call.
 70 | #today_fmt = '%B %d, %Y'
 71 | 
 72 | # List of patterns, relative to source directory, that match files and
 73 | # directories to ignore when looking for source files.
 74 | exclude_patterns = ['_build']
 75 | 
 76 | # The reST default role (used for this markup: `text`) to use for all
 77 | # documents.
 78 | #default_role = None
 79 | 
 80 | # If true, '()' will be appended to :func: etc. cross-reference text.
 81 | #add_function_parentheses = True
 82 | 
 83 | # If true, the current module name will be prepended to all description
 84 | # unit titles (such as .. function::).
 85 | #add_module_names = True
 86 | 
 87 | # If true, sectionauthor and moduleauthor directives will be shown in the
 88 | # output. They are ignored by default.
 89 | #show_authors = False
 90 | 
 91 | # The name of the Pygments (syntax highlighting) style to use.
 92 | pygments_style = 'sphinx'
 93 | 
 94 | # A list of ignored prefixes for module index sorting.
 95 | #modindex_common_prefix = []
 96 | 
 97 | # If true, keep warnings as "system message" paragraphs in the built documents.
 98 | #keep_warnings = False
 99 | 
100 | 
101 | # -- Options for HTML output ----------------------------------------------
102 | 
103 | # The theme to use for HTML and HTML Help pages.  See the documentation for
104 | # a list of builtin themes.
105 | html_theme = 'default'
106 | 
107 | # Theme options are theme-specific and customize the look and feel of a theme
108 | # further.  For a list of options available for each theme, see the
109 | # documentation.
110 | #html_theme_options = {}
111 | 
112 | # Add any paths that contain custom themes here, relative to this directory.
113 | #html_theme_path = []
114 | 
115 | # The name for this set of Sphinx documents.  If None, it defaults to
116 | # "<project> v<release> documentation".
117 | #html_title = None
118 | 
119 | # A shorter title for the navigation bar.  Default is the same as html_title.
120 | #html_short_title = None
121 | 
122 | # The name of an image file (relative to this directory) to place at the top
123 | # of the sidebar.
124 | #html_logo = None
125 | 
126 | # The name of an image file (within the static path) to use as favicon of the
127 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
128 | # pixels large.
129 | #html_favicon = None
130 | 
131 | # Add any paths that contain custom static files (such as style sheets) here,
132 | # relative to this directory. They are copied after the builtin static files,
133 | # so a file named "default.css" will overwrite the builtin "default.css".
134 | html_static_path = ['_static']
135 | 
136 | # Add any extra paths that contain custom files (such as robots.txt or
137 | # .htaccess) here, relative to this directory. These files are copied
138 | # directly to the root of the documentation.
139 | #html_extra_path = []
140 | 
141 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
142 | # using the given strftime format.
143 | #html_last_updated_fmt = '%b %d, %Y'
144 | 
145 | # If true, SmartyPants will be used to convert quotes and dashes to
146 | # typographically correct entities.
147 | #html_use_smartypants = True
148 | 
149 | # Custom sidebar templates, maps document names to template names.
150 | #html_sidebars = {}
151 | 
152 | # Additional templates that should be rendered to pages, maps page names to
153 | # template names.
154 | #html_additional_pages = {}
155 | 
156 | # If false, no module index is generated.
157 | #html_domain_indices = True
158 | 
159 | # If false, no index is generated.
160 | #html_use_index = True
161 | 
162 | # If true, the index is split into individual pages for each letter.
163 | #html_split_index = False
164 | 
165 | # If true, links to the reST sources are added to the pages.
166 | #html_show_sourcelink = True
167 | 
168 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
169 | #html_show_sphinx = True
170 | 
171 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
172 | #html_show_copyright = True
173 | 
174 | # If true, an OpenSearch description file will be output, and all pages will
175 | # contain a <link> tag referring to it.  The value of this option must be the
176 | # base URL from which the finished HTML is served.
177 | #html_use_opensearch = ''
178 | 
179 | # This is the file name suffix for HTML files (e.g. ".xhtml").
180 | #html_file_suffix = None
181 | 
182 | # Output file base name for HTML help builder.
183 | htmlhelp_basename = 'kaledoc'
184 | 
185 | 
186 | # -- Options for LaTeX output ---------------------------------------------
187 | 
188 | latex_elements = {
189 | # The paper size ('letterpaper' or 'a4paper').
190 | #'papersize': 'letterpaper',
191 | 
192 | # The font size ('10pt', '11pt' or '12pt').
193 | #'pointsize': '10pt',
194 | 
195 | # Additional stuff for the LaTeX preamble.
196 | #'preamble': '',
197 | }
198 | 
199 | # Grouping the document tree into LaTeX files. List of tuples
200 | # (source start file, target name, title,
201 | #  author, documentclass [howto, manual, or own class]).
202 | latex_documents = [
203 |   ('index', 'kale.tex', u'kale Documentation',
204 |    u'Nextdoor', 'manual'),
205 | ]
206 | 
207 | # The name of an image file (relative to this directory) to place at the top of
208 | # the title page.
209 | #latex_logo = None
210 | 
211 | # For "manual" documents, if this is true, then toplevel headings are parts,
212 | # not chapters.
213 | #latex_use_parts = False
214 | 
215 | # If true, show page references after internal links.
216 | #latex_show_pagerefs = False
217 | 
218 | # If true, show URL addresses after external links.
219 | #latex_show_urls = False
220 | 
221 | # Documents to append as an appendix to all manuals.
222 | #latex_appendices = []
223 | 
224 | # If false, no module index is generated.
225 | #latex_domain_indices = True
226 | 
227 | 
228 | # -- Options for manual page output ---------------------------------------
229 | 
230 | # One entry per manual page. List of tuples
231 | # (source start file, name, description, authors, manual section).
232 | man_pages = [
233 |     ('index', 'kale', u'kale Documentation',
234 |      [u'Nextdoor'], 1)
235 | ]
236 | 
237 | # If true, show URL addresses after external links.
238 | #man_show_urls = False
239 | 
240 | 
241 | # -- Options for Texinfo output -------------------------------------------
242 | 
243 | # Grouping the document tree into Texinfo files. List of tuples
244 | # (source start file, target name, title, author,
245 | #  dir menu entry, description, category)
246 | texinfo_documents = [
247 |   ('index', 'kale', u'kale Documentation',
248 |    u'Nextdoor', 'kale', 'One line description of project.',
249 |    'Miscellaneous'),
250 | ]
251 | 
252 | # Documents to append as an appendix to all manuals.
253 | #texinfo_appendices = []
254 | 
255 | # If false, no module index is generated.
256 | #texinfo_domain_indices = True
257 | 
258 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
259 | #texinfo_show_urls = 'footnote'
260 | 
261 | # If true, do not generate a @detailmenu in the "Top" node's menu.
262 | #texinfo_no_detailmenu = False
263 | 
264 | 
265 | # Example configuration for intersphinx: refer to the Python standard library.
266 | intersphinx_mapping = {'http://docs.python.org/': None}
267 | 


--------------------------------------------------------------------------------
/kale/tests/test_task.py:
--------------------------------------------------------------------------------
  1 | """Module testing the kale.task module."""
  2 | from __future__ import absolute_import
  3 | 
  4 | import mock
  5 | import unittest
  6 | 
  7 | from kale import exceptions
  8 | from kale import task
  9 | from kale import test_utils
 10 | 
 11 | from six.moves import range
 12 | 
 13 | 
 14 | class TaskFailureTestCase(unittest.TestCase):
 15 |     """Test handle_failure logic."""
 16 | 
 17 |     def _create_patch(self, name):
 18 |         """Helper method for creating scoped mocks."""
 19 |         patcher = mock.patch(name)
 20 |         patch = patcher.start()
 21 |         self.addCleanup(patcher.stop)
 22 |         return patch
 23 | 
 24 |     def testRunWorker(self):
 25 |         """Test running a task."""
 26 |         setup_env = self._create_patch(
 27 |             'kale.task.Task._setup_task_environment')
 28 |         pre_run = self._create_patch('kale.task.Task._pre_run')
 29 |         post_run = self._create_patch('kale.task.Task._post_run')
 30 |         clean_env = self._create_patch(
 31 |             'kale.task.Task._clean_task_environment')
 32 | 
 33 |         task_inst = test_utils.new_mock_task(task_class=test_utils.MockTask)
 34 |         task_args = [1, 'a']
 35 |         task_inst.run(*task_args)
 36 | 
 37 |         setup_env.assert_called_once_with()
 38 |         pre_run.assert_called_once_with(*task_args)
 39 |         post_run.assert_called_once_with(*task_args)
 40 |         clean_env.assert_called_once_with(
 41 |             task_id='mock_task', task_name='kale.test_utils.MockTask')
 42 | 
 43 |     def testRunWorkerFailTask(self):
 44 |         """Test running a task."""
 45 |         setup_env = self._create_patch(
 46 |             'kale.task.Task._setup_task_environment')
 47 |         pre_run = self._create_patch('kale.task.Task._pre_run')
 48 |         post_run = self._create_patch('kale.task.Task._post_run')
 49 |         clean_env = self._create_patch(
 50 |             'kale.task.Task._clean_task_environment')
 51 | 
 52 |         task_inst = test_utils.new_mock_task(task_class=test_utils.FailTask)
 53 |         task_inst._start_time = 1
 54 |         task_args = [1, 'a']
 55 | 
 56 |         with self.assertRaises(exceptions.TaskException) as exc_ctxt_mngr:
 57 |             task_inst.run(*task_args)
 58 | 
 59 |         setup_env.assert_called_once_with()
 60 |         pre_run.assert_called_once_with(*task_args)
 61 |         assert not post_run.called, '_post_run should not have been called.'
 62 |         clean_env.assert_called_once_with(
 63 |             task_id='fail_task', task_name='kale.test_utils.FailTask',
 64 |             exc=exc_ctxt_mngr.exception)
 65 |         self.assertTrue(task_inst._end_time > 0)
 66 |         self.assertTrue(task_inst._task_latency_sec > 0)
 67 | 
 68 |     def testTaskUnrecoverableException(self):
 69 |         """Task task failing with unrecoverable exception."""
 70 | 
 71 |         task_inst = test_utils.new_mock_task(task_class=test_utils.FailTask)
 72 |         message = test_utils.MockMessage(task_inst)
 73 | 
 74 |         with mock.patch(
 75 |                 'kale.task.Task._report_permanent_failure') as fail_func:
 76 |             exc = SyntaxError('Unrecoverable Error')
 77 |             retried = test_utils.FailTask.handle_failure(message, exc)
 78 |             self.assertFalse(retried)
 79 |             fail_func.assert_called_once_with(
 80 |                 message, exc, task.PERMANENT_FAILURE_UNRECOVERABLE, True)
 81 | 
 82 |     def testDelayedPublish(self):
 83 |         task_inst = test_utils.new_mock_task(task_class=test_utils.MockTask)
 84 |         delay_sec = 60
 85 |         random_arg = 99
 86 |         random_kwarg = 100
 87 |         payload = {
 88 |             'args': (random_arg,),
 89 |             'kwargs': {'random_kwarg': random_kwarg, 'delay_sec': delay_sec},
 90 |             'app_data': {}}
 91 |         with mock.patch(
 92 |                 'kale.publisher.Publisher.publish') as publish_func:
 93 |             task_inst.publish({}, random_arg, delay_sec=delay_sec, random_kwarg=random_kwarg)
 94 |             message = test_utils.MockMessage(task_inst)
 95 | 
 96 |             publish_func.assert_called_once_with(test_utils.MockTask, message.task_id, payload,
 97 |                                                  delay_sec=delay_sec)
 98 | 
 99 |     def testTaskNoRetries(self):
100 |         """Task task failing with retries disabled."""
101 | 
102 |         task_inst = test_utils.new_mock_task(
103 |             task_class=test_utils.FailTaskNoRetries)
104 |         message = test_utils.MockMessage(task_inst)
105 | 
106 |         with mock.patch(
107 |                 'kale.task.Task._report_permanent_failure') as fail_func:
108 |             exc = exceptions.TaskException('Exception')
109 |             retried = test_utils.FailTaskNoRetries.handle_failure(message, exc)
110 |             self.assertFalse(retried)
111 |             fail_func.assert_called_once_with(
112 |                 message, exc, task.PERMANENT_FAILURE_NO_RETRY, True)
113 | 
114 |     def testTaskRetriesExceeded(self):
115 |         """Task task failing with retries exceeded."""
116 | 
117 |         task_inst = test_utils.new_mock_task(task_class=test_utils.FailTask)
118 |         message = test_utils.MockMessage(
119 |             task_inst, failure_num=test_utils.FailTask.max_retries)
120 | 
121 |         with mock.patch(
122 |                 'kale.task.Task._report_permanent_failure') as fail_func:
123 |             exc = exceptions.TaskException('Exception')
124 |             retried = test_utils.FailTask.handle_failure(message, exc)
125 |             self.assertFalse(retried)
126 |             fail_func.assert_called_once_with(
127 |                 message, exc, task.PERMANENT_FAILURE_RETRIES_EXCEEDED, False)
128 | 
129 |     def testTaskRetries(self):
130 |         """Task task failing with retries exceeded."""
131 | 
132 |         task_inst = test_utils.new_mock_task(task_class=test_utils.FailTask)
133 |         message = test_utils.MockMessage(
134 |             task_inst, failure_num=test_utils.FailTask.max_retries)
135 | 
136 |         with mock.patch(
137 |                 'kale.task.Task._report_permanent_failure') as fail_func:
138 |             exc = exceptions.TaskException('Exception')
139 |             retried = test_utils.FailTask.handle_failure(message, exc)
140 |             self.assertFalse(retried)
141 |             fail_func.assert_called_once_with(
142 |                 message, exc, task.PERMANENT_FAILURE_RETRIES_EXCEEDED, False)
143 | 
144 |     def testTaskRetryDelayWithoutFailure(self):
145 |         """Task task failing with delay without failure"""
146 | 
147 |         task_inst = test_utils.new_mock_task(task_class=test_utils.FailTask)
148 |         sample_values = [
149 |             (i, test_utils.FailTask._get_delay_sec_for_retry(i)) for i in
150 |             range(task_inst.max_retries)]
151 |         payload = {
152 |             'args': [],
153 |             'kwargs': {},
154 |             'app_data': {}}
155 | 
156 |         for retry, delay_sec in sample_values:
157 |             with mock.patch(
158 |                     'kale.publisher.Publisher.publish') as publish_func:
159 |                 message = test_utils.MockMessage(task_inst, retry_num=retry)
160 | 
161 |                 retried = test_utils.FailTask.handle_failure(
162 |                     message, exceptions.TaskException('Exception'), increment_failure_num=False)
163 |                 self.assertTrue(retried)
164 |                 publish_func.assert_called_once_with(
165 |                     test_utils.FailTask, message.task_id, payload,
166 |                     current_failure_num=0, current_retry_num=(retry + 1),
167 |                     delay_sec=delay_sec)
168 | 
169 |     def testTaskRetryDelayWithFailure(self):
170 |         """Task task retrying with delay with failure"""
171 | 
172 |         task_inst = test_utils.new_mock_task(task_class=test_utils.FailTask)
173 |         sample_values = [
174 |             (i, test_utils.FailTask._get_delay_sec_for_retry(i)) for i in
175 |             range(task_inst.max_retries)]
176 |         payload = {
177 |             'args': [],
178 |             'kwargs': {},
179 |             'app_data': {}}
180 | 
181 |         for failure, delay_sec in sample_values:
182 |             with mock.patch(
183 |                     'kale.publisher.Publisher.publish') as publish_func:
184 |                 message = test_utils.MockMessage(task_inst, failure_num=failure, retry_num=failure)
185 | 
186 |                 retried = test_utils.FailTask.handle_failure(
187 |                     message, exceptions.TaskException('Exception'), increment_failure_num=True)
188 |                 self.assertTrue(retried)
189 |                 publish_func.assert_called_once_with(
190 |                     test_utils.FailTask, message.task_id, payload,
191 |                     current_failure_num=(failure + 1), current_retry_num=(failure + 1),
192 |                     delay_sec=delay_sec)
193 | 
194 |     def testTaskRuntimeExceeded(self):
195 |         """Task task failing from timeout."""
196 | 
197 |         task_inst = test_utils.new_mock_task(task_class=test_utils.FailTask)
198 | 
199 |         with mock.patch(
200 |                 'kale.task.Task._report_permanent_failure') as fail_func:
201 |             exc = exceptions.TaskException('Exception')
202 |             message = test_utils.MockMessage(task_inst, retry_num=0,
203 |                                              failure_num=task_inst.max_retries + 1)
204 |             retried = test_utils.FailTask.handle_failure(message, exc)
205 |             self.assertFalse(retried)
206 |             fail_func.assert_called_once_with(
207 |                 message, exc, task.PERMANENT_FAILURE_RETRIES_EXCEEDED, False)
208 | 
209 |     def testTargetRuntimeExceeded(self):
210 |         """Task task target runtime exceeded."""
211 | 
212 |         task_inst = test_utils.new_mock_task(
213 |             task_class=test_utils.SlowButNotTooSlowTask)
214 | 
215 |         with mock.patch(
216 |                 'kale.task.Task._alert_runtime_exceeded') as time_exceeded:
217 |             task_inst.run()
218 |             self.assertTrue(time_exceeded.called)
219 | 
220 |     def testBlacklistedTaskFails(self):
221 |         """Test that a blacklisted task raises an exception."""
222 |         setup_env = self._create_patch(
223 |             'kale.task.Task._setup_task_environment')
224 |         pre_run = self._create_patch('kale.task.Task._pre_run')
225 |         run_task = self._create_patch('kale.task.Task.run_task')
226 |         clean_env = self._create_patch(
227 |             'kale.task.Task._clean_task_environment')
228 |         check_blacklist = self._create_patch('kale.task.Task._check_blacklist')
229 |         raised_exc = exceptions.BlacklistedException()
230 |         check_blacklist.side_effect = raised_exc
231 | 
232 |         task_inst = test_utils.new_mock_task(task_class=test_utils.MockTask)
233 |         task_inst._start_time = 1
234 |         task_args = [1, 'a']
235 | 
236 |         with self.assertRaises(exceptions.BlacklistedException):
237 |             task_inst.run(*task_args)
238 | 
239 |         setup_env.assert_called_once_with()
240 |         pre_run.assert_called_once_with(*task_args)
241 |         self.assertFalse(run_task.called)
242 |         clean_env.assert_called_once_with(
243 |             task_id='mock_task', task_name='kale.test_utils.MockTask',
244 |             exc=raised_exc)
245 | 
246 |     def testBlacklistedTaskNoRetries(self):
247 |         """Test that a blacklisted task raises an exception."""
248 |         setup_env = self._create_patch(
249 |             'kale.task.Task._setup_task_environment')
250 |         pre_run = self._create_patch('kale.task.Task._pre_run')
251 |         run_task = self._create_patch('kale.task.Task.run_task')
252 |         clean_env = self._create_patch(
253 |             'kale.task.Task._clean_task_environment')
254 |         check_blacklist = self._create_patch('kale.task.Task._check_blacklist')
255 |         raised_exc = exceptions.BlacklistedException()
256 |         check_blacklist.side_effect = raised_exc
257 | 
258 |         mock_message = test_utils.new_mock_message(
259 |             task_class=test_utils.MockTask)
260 |         task_inst = mock_message.task_inst
261 |         task_inst._start_time = 1
262 |         task_args = [1, 'a']
263 | 
264 |         with self.assertRaises(exceptions.BlacklistedException):
265 |             task_inst.run(*task_args)
266 | 
267 |         setup_env.assert_called_once_with()
268 |         pre_run.assert_called_once_with(*task_args)
269 |         self.assertFalse(run_task.called)
270 |         clean_env.assert_called_once_with(
271 |             task_id='mock_task', task_name='kale.test_utils.MockTask',
272 |             exc=raised_exc)
273 | 
274 |         # Check that task
275 |         permanent_failure = not task_inst.__class__.handle_failure(
276 |             mock_message, raised_exc)
277 |         self.assertTrue(permanent_failure)
278 | 


--------------------------------------------------------------------------------
/kale/task.py:
--------------------------------------------------------------------------------
  1 | """Module containing the base class for tasks."""
  2 | from __future__ import absolute_import
  3 | 
  4 | import logging
  5 | import time
  6 | import uuid
  7 | 
  8 | from kale import exceptions
  9 | from kale import publisher
 10 | from kale import settings
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | PERMANENT_FAILURE_UNRECOVERABLE = 'unrecoverable'
 15 | PERMANENT_FAILURE_NO_RETRY = 'no_retries'
 16 | PERMANENT_FAILURE_RETRIES_EXCEEDED = 'max_retries'
 17 | 
 18 | publisher_inst = None
 19 | 
 20 | 
 21 | class Task(object):
 22 |     """Base class for kale tasks."""
 23 | 
 24 |     # Exponential retry settings. Overridable on a per-task basis based on
 25 |     # the task type, but these are our defaults.
 26 |     # Each time a task is retried its given a timeout before it will be
 27 |     # visible to consumers.
 28 |     # This is calculated with the following:
 29 |     # timeout = (1 << current_retries) * retry_delay_multiple
 30 |     _retry_delay_multiple = settings.RETRY_DELAY_MULTIPLE_SEC
 31 | 
 32 |     # Number of times a task can be retried after it fails the first time.
 33 |     # Set to None or 0 for no retries.
 34 |     max_retries = 4
 35 | 
 36 |     # Alert on tasks that exceed this time.
 37 |     target_runtime = 50
 38 | 
 39 |     # Default time until task is killed.
 40 |     time_limit = 60
 41 | 
 42 |     # Blacklist of exceptions to never
 43 |     # retry on (unlikely to be transient).
 44 |     EXCEPTION_LIST = (
 45 |         exceptions.BlacklistedException,
 46 |         KeyError,
 47 |         NotImplementedError,
 48 |         SyntaxError,
 49 |         UnboundLocalError)
 50 | 
 51 |     # String representation the a task queue, each task must override
 52 |     # this value.
 53 |     queue = None
 54 | 
 55 |     def __init__(self, message_body=None, *args, **kwargs):
 56 |         """Initialize an instance of a task.
 57 | 
 58 |         :param message_payload: Payload this task is being created from
 59 |                 (optionally None for testability).
 60 |         """
 61 |         # global mocking
 62 |         message_body = message_body or {}
 63 |         self.task_id = message_body.get('id')
 64 |         self.task_name = '%s.%s' % (self.__class__.__module__,
 65 |                                     self.__class__.__name__)
 66 |         self.app_data = None
 67 |         payload = message_body.get('payload')
 68 |         if payload:
 69 |             self.app_data = payload.get('app_data', None)
 70 | 
 71 |         # Used for tracking/diagnostics.
 72 |         self._publisher_data = message_body.get('_publisher')
 73 |         self._enqueued_time = message_body.get('_enqueued_time', 0)
 74 |         self._dequeued_time = None
 75 |         self._start_time = None
 76 |         self._end_time = None
 77 |         self._task_latency_sec = 0
 78 | 
 79 |         self._dequeued()
 80 | 
 81 |     @staticmethod
 82 |     def _get_publisher():
 83 |         global publisher_inst
 84 |         if publisher_inst is None:
 85 |             publisher_inst = publisher.Publisher()
 86 |         return publisher_inst
 87 | 
 88 |     @classmethod
 89 |     def _get_task_id(cls, *args, **kwargs):
 90 |         """Return a unique task identifier.
 91 | 
 92 |         This can be overridden in sub-classes to create task ids that
 93 |         are more descriptive.
 94 | 
 95 |         :return: the unique identifier for a task.
 96 |         :rtype: str
 97 |         """
 98 |         return '%s_uuid_%s' % (cls.__name__, uuid.uuid1())
 99 | 
100 |     @classmethod
101 |     def publish(cls, app_data, *args, **kwargs):
102 |         """Class method to publish a task given instance specific arguments."""
103 |         delay_sec = kwargs.get('delay_sec')
104 |         task_id = cls._get_task_id(*args, **kwargs)
105 |         payload = {
106 |             'args': args,
107 |             'kwargs': kwargs,
108 |             'app_data': app_data}
109 |         pub = cls._get_publisher()
110 |         pub.publish(cls, task_id, payload, delay_sec=delay_sec)
111 |         return task_id
112 | 
113 |     @classmethod
114 |     def _get_delay_sec_for_retry(cls, current_retry_num):
115 |         """Generate a delay based on the number of times the task has retried.
116 | 
117 |         :param int current_retry_num: Task retry count for the task that is
118 |             about to be published.
119 |         :return: Number of seconds this task should wait before running again.
120 |         :rtype: int
121 |         """
122 |         # Exponentially backoff the wait time for task attempts.
123 |         return min(((1 << current_retry_num) * cls._retry_delay_multiple),
124 |                    settings.SQS_MAX_TASK_DELAY_SEC)
125 | 
126 |     @classmethod
127 |     def handle_failure(cls, message, raised_exception, increment_failure_num=True):
128 |         """Logic to respond to task failure.
129 | 
130 |         :param KaleMessage message: instance of KaleMessage containing the
131 |             task that failed.
132 |         :param Exception raised_exception: exception that the failed task
133 |             raised.
134 |         :param increment_failure_num: boolean whether the failure should increment
135 |             the retry count.
136 |         :return: True if the task will be retried, False otherwise.
137 |         :rtype: boolean
138 |         """
139 | 
140 |         logger.warning('Task %s failed: %s.' % (message.task_id,
141 |                                                 raised_exception))
142 | 
143 |         if isinstance(raised_exception, exceptions.TimeoutException):
144 |             message.task_inst._kill_runtime_exceeded()
145 | 
146 |         # If our exception falls into a specific list, we bail out completely
147 |         # and do not retry.
148 |         if isinstance(raised_exception, cls.EXCEPTION_LIST):
149 |             cls._report_permanent_failure(
150 |                 message, raised_exception,
151 |                 PERMANENT_FAILURE_UNRECOVERABLE, True)
152 |             return False
153 | 
154 |         # See if retries are enabled at all. If is <= 0, then just return.
155 |         if cls.max_retries is None or cls.max_retries <= 0:
156 |             cls._report_permanent_failure(
157 |                 message, raised_exception, PERMANENT_FAILURE_NO_RETRY, True)
158 |             return False
159 | 
160 |         # Monitor retries and dropped tasks
161 |         if message.task_failure_num >= cls.max_retries:
162 |             cls._report_permanent_failure(
163 |                 message, raised_exception,
164 |                 PERMANENT_FAILURE_RETRIES_EXCEEDED, False)
165 |             return False
166 | 
167 |         failure_count = message.task_failure_num
168 |         if increment_failure_num:
169 |             failure_count = failure_count + 1
170 |         cls.republish(message, failure_count)
171 |         return True
172 | 
173 |     @classmethod
174 |     def republish(cls, message, failure_count):
175 |         payload = {
176 |             'args': message.task_args,
177 |             'kwargs': message.task_kwargs,
178 |             'app_data': message.task_app_data}
179 |         retry_count = message.task_retry_num + 1
180 |         delay_sec = cls._get_delay_sec_for_retry(message.task_retry_num)
181 |         pub = cls._get_publisher()
182 |         pub.publish(
183 |             cls, message.task_id, payload,
184 |             current_retry_num=retry_count, current_failure_num=failure_count, delay_sec=delay_sec)
185 | 
186 |     def run(self, *args, **kwargs):
187 |         """Wrap the run_task method of tasks.
188 | 
189 |         We use this instead of a decorator to protect against the case where
190 |         a subclass may not call super().
191 | 
192 |         The order of operations in a subclass should look like this:
193 |         1) Subclass's override _pre_run logic.
194 |         2) call super() at the end of the override.
195 |         3) run_task() willrun.
196 |         4) call super() at the start of _post_run override.
197 |         5) Subclass's override _post_run logic.
198 |         """
199 | 
200 |         self._setup_task_environment()
201 |         self._pre_run(*args, **kwargs)
202 | 
203 |         try:
204 |             # This raises an exception if the task should not be attempted.
205 |             # This enables tasks to be blacklisted by ID or type.
206 |             self._check_blacklist(*args, **kwargs)
207 |             self.run_task(*args, **kwargs)
208 |         except Exception as exc:
209 |             # Record latency here.
210 |             self._end_time = time.time()
211 |             self._task_latency_sec = self._end_time - self._start_time
212 | 
213 |             # Cleanup the environment this task was running in right away.
214 |             self._clean_task_environment(task_id=self.task_id,
215 |                                          task_name=self.task_name, exc=exc)
216 |             raise
217 | 
218 |         self._post_run(*args, **kwargs)
219 |         self._clean_task_environment(task_id=self.task_id,
220 |                                      task_name=self.task_name)
221 | 
222 |     def run_task(self, *args, **kwargs):
223 |         """Run the task, this must be implemented by subclasses."""
224 |         raise NotImplementedError()
225 | 
226 |     def should_run_task(self, *args, **kwargs):
227 |         return True
228 | 
229 |     def _check_blacklist(self, *args, **kwargs):
230 |         """Raises an exception if a task should not run.
231 | 
232 |         This enables a subclass to blacklist tasks by ID or type. This needs
233 |         to be handled outside of kale since it requires a datastore.
234 | 
235 |         Ex:
236 |         if task_id in cache.get(BLACKLISTED_IDS) or task_name in
237 |             cache.get(BLACKLISTED_TASK_TYPES):
238 |                 raise exceptions.BlacklistedException()
239 |         """
240 |         return
241 | 
242 |     def _dequeued(self, *args, **kwargs):
243 |         """Method called when a task is pulled from the queue and instantiated.
244 | 
245 |         This does not mean that this task instance  will necessarily run. It
246 |         was most likely pulled from the queue in a batch and will still sit
247 |         idle until its turn, if it's visibility timeout runs out before that
248 |         point it will be released back to the queue.
249 | 
250 |         Note: We do funny things in celery to determine time spent enqueued,
251 |         in SQS we may have alternative options here that require
252 |         the message but no args/kwargs.
253 |         """
254 |         self._dequeued_time = time.time()
255 | 
256 |     @staticmethod
257 |     def _setup_task_environment():
258 |         """Setup the environment for this task."""
259 |         pass
260 | 
261 |     @staticmethod
262 |     def _clean_task_environment(task_id=None, task_name=None, exc=None):
263 |         """Cleans the environment for this task.
264 | 
265 |         Args:
266 |             task_id: string of task id.
267 |             task_name: string of task name.
268 |             exc: The exception raised by the task, None if the task succeeded.
269 |         """
270 |         pass
271 | 
272 |     def _pre_run(self, *args, **kwargs):
273 |         """Called immediately prior to a task running."""
274 |         self._start_time = time.time()
275 | 
276 |     def _post_run(self, *args, **kwargs):
277 |         """Called immediately after a task finishes.
278 | 
279 |         This will not be called if the task fails.
280 |         """
281 |         self._end_time = time.time()
282 |         self._task_latency_sec = self._end_time - self._start_time
283 | 
284 |         if self.target_runtime:
285 |             if self._task_latency_sec >= self.target_runtime:
286 |                 self._alert_runtime_exceeded()
287 | 
288 |     def _alert_runtime_exceeded(self, *args, **kwargs):
289 |         """Handle the case where a task exceeds its alert runtime.
290 | 
291 |         This will be called during task post_run.
292 |         """
293 |         pass
294 | 
295 |     def _kill_runtime_exceeded(self, *args, **kwargs):
296 |         """Handle the case where a task is killed due to timing out.
297 | 
298 |         This will be called by the task's onfailure
299 |         """
300 |         pass
301 | 
302 |     @classmethod
303 |     def _report_permanent_failure(cls, message, exception, failure_type,
304 |                                   log_exception):
305 |         """Handles reporting of a permanent failure.
306 | 
307 |         :param KaleMessage message: A kale.message instance that contains the
308 |             task.
309 |         :param Exception exception: The error info that contains the stacktrace
310 |         :param str failure_type:  A string used for logging to denote the type
311 |             of permanent failure.
312 |         :param bool log_exception: A bool denoting if we should include the
313 |                 exception stacktrace.
314 |         """
315 |         message_str = ('PERMANENT_TASK_FAILURE: FAILURE_TYPE=%s TASK_TYPE=%s '
316 |                        'TASK_ID=%s, TRACEBACK=%s' %
317 |                        (failure_type, message.task_name, message.task_id,
318 |                         exception))
319 |         logger.error(message_str)
320 | 


--------------------------------------------------------------------------------
/kale/scripts/benchmark_queue_selector.py:
--------------------------------------------------------------------------------
  1 | """Benchmarks queue_selector implementations.
  2 | 
  3 | queue_selector is used to decide which queue a task worker should process
  4 | in a task processing cycle.
  5 | 
  6 | This provides a framework to simulate production task logs and to evaluate
  7 | different queue_selector implementation.
  8 | 
  9 | How it works
 10 | ------------
 11 | It spawns N threads to simulate N tasks workers, and 1 thread to simulate
 12 | a task publisher.
 13 | 
 14 | Tasks load
 15 | ----------
 16 | It needs a csv file with the following format to simulate tasks load:
 17 | 
 18 | QUEUE_NAME,running_time_in_seconds
 19 | QUEUE_NAME,running_time_in_seconds
 20 | ...
 21 | 
 22 | For example:
 23 | 
 24 | default,0.24
 25 | default,0.02
 26 | highp,2.46
 27 | highp,0.11
 28 | ...
 29 | 
 30 | The path of the tasks load file is specified via --tasks_load_file argument.
 31 | 
 32 | Example
 33 | -------
 34 | python benchmark_queue_selector.py --tasks_load_file ~/sample_logs \
 35 |      --speedup 1 \
 36 |      --publish_interval 0.01 \
 37 |      --queue_selector_class HighestPriorityLottery
 38 | 
 39 | python benchmark_queue_selector.py --tasks_load_file ~/sample_logs \
 40 |      --speedup 1 \
 41 |      --publish_interval 0.01 \
 42 |      --queue_selector_class Lottery
 43 | 
 44 | python benchmark_queue_selector.py --tasks_load_file ~/sample_logs \
 45 |      --speedup 1 \
 46 |      --publish_interval 0.01 \
 47 |      --queue_selector_class Random
 48 | """
 49 | from __future__ import absolute_import
 50 | 
 51 | import argparse
 52 | import csv
 53 | import logging
 54 | import os
 55 | import threading
 56 | import time
 57 | 
 58 | from six.moves import range
 59 | import six
 60 | import six.moves.queue
 61 | 
 62 | from kale import queue_info
 63 | from kale import queue_selector
 64 | 
 65 | 
 66 | parser = argparse.ArgumentParser()
 67 | parser.add_argument('queue_selector_class', type=str, default='Random',
 68 |                     help='The class for implementing queue_selector.')
 69 | parser.add_argument('tasks_load_file', type=str, default='',
 70 |                     help='The tasks load file path.')
 71 | parser.add_argument('workers', type=int, default=5 * 8,
 72 |                     help='Number of task workers.')
 73 | parser.add_argument('speedup', type=int, default=1,
 74 |                     help='Speedup task processing by [speedup] times.')
 75 | parser.add_argument('publish_interval', type=float, default=0.01,
 76 |                     help='Interval (seconds) between two task publishings.')
 77 | 
 78 | 
 79 | logging.basicConfig(level='INFO')
 80 | log = logging.getLogger('kale.benchmark')
 81 | 
 82 | PRINT_STATS_INTERVAL = 2  # seconds
 83 | 
 84 | all_done = False
 85 | 
 86 | 
 87 | class StaticTaskQueue(queue_info.TaskQueue):
 88 |     """The queue class used to assist benchmarking."""
 89 |     default_priority = 5
 90 | 
 91 |     def __init__(self, name='default', priority=5, num_iterations=2,
 92 |                  long_poll_time_sec=1, batch_size=5,
 93 |                  visibility_timeout_sec=600, default_priority=5):
 94 |         super(StaticTaskQueue, self).__init__(
 95 |             name=name, priority=priority,
 96 |             num_iterations=num_iterations,
 97 |             long_poll_time_sec=long_poll_time_sec,
 98 |             batch_size=batch_size,
 99 |             visibility_timeout_sec=visibility_timeout_sec)
100 | 
101 |         # We want to experiment dynamic priorities here, so should have a
102 |         # variable to keep track of the initial priority
103 |         self.default_priority = default_priority
104 | 
105 |         # What tasks are left in this queue for processing
106 |         self.tasks = six.moves.queue.Queue(maxsize=0)
107 | 
108 |         # What tasks are finished
109 |         self.finished_tasks = six.moves.queue.Queue(maxsize=0)
110 | 
111 |         # How many times we need to wait for long polling.
112 |         # That is, how often we hit an empty queue.
113 |         self.long_polling_count = 0
114 | 
115 | 
116 | class StaticQueueInfo(queue_info.QueueInfoBase):
117 |     """Hardcoded queue information."""
118 | 
119 |     def __init__(self):
120 |         self.queues = {
121 |             'default': StaticTaskQueue(name='default', priority=75,
122 |                                        default_priority=75),
123 |             'large': StaticTaskQueue(name='large', priority=75,
124 |                                      default_priority=75),
125 |             'highp': StaticTaskQueue(name='highp', priority=100,
126 |                                      default_priority=100),
127 |             'lowp': StaticTaskQueue(name='lowp', priority=1,
128 |                                     default_priority=1),
129 |             'digest': StaticTaskQueue(name='digest', priority=70,
130 |                                       default_priority=70)}
131 | 
132 |     def get_queues(self):
133 |         """Returns all queues."""
134 |         return list(self.queues.values())
135 | 
136 |     def is_empty(self):
137 |         """Are all queues empty?"""
138 |         for (queue_name, queue) in six.iteritems(self.queues):
139 |             if not self.is_queue_empty(queue):
140 |                 return False
141 |         return True
142 | 
143 |     def get_highest_priority_queue_that_needs_work(self):
144 |         """Returns a list of non-empty queues."""
145 |         non_empty_queues = []
146 |         for (queue_name, queue) in six.iteritems(self.queues):
147 |             if not self.is_queue_empty(queue):
148 |                 non_empty_queues.append(self.queues[queue_name])
149 |         if len(non_empty_queues) == 0:
150 |             return None
151 |         non_empty_queues.sort(key=lambda x: x.priority, reverse=True)
152 |         return non_empty_queues[0]
153 | 
154 |     def is_queue_empty(self, queue):
155 |         """Check if a queue is empty."""
156 |         the_queue = self.queues[queue.name]
157 |         if not the_queue.tasks.empty():
158 |             return False
159 |         return True
160 | 
161 |     def does_queue_need_work(self, queue):
162 |         """Checks if a queue should be worked on."""
163 |         return not self.is_queue_empty(queue)
164 | 
165 | 
166 | class WorkerThread(threading.Thread):
167 |     """Consuming tasks."""
168 | 
169 |     def __init__(self, speedup, queue_selector_class, queue_info):
170 |         """
171 |         Args:
172 |             speedup: Integer for how much faster we want to simulate tasks.
173 |                 If speedup is 10, then we process tasks at 10x slower speed.
174 |             queue_selector_class: String for queue_selector class in the
175 |                 nd.kale.queue_selector module.
176 |         """
177 |         super(WorkerThread, self).__init__()
178 |         SelectQueueClass = getattr(queue_selector, queue_selector_class)
179 |         self.queue_info = queue_info
180 |         self.queue_selector = SelectQueueClass(self.queue_info)
181 |         self.speedup = speedup
182 | 
183 |     def run(self):
184 |         """The main class for emulating a task worker."""
185 |         log.info('Running worker thread %d ...' % self.ident)
186 | 
187 |         while not all_done:
188 |             queue = self.queue_selector.get_queue()
189 |             try:
190 |                 task_entry = self.queue_info.queues[queue.name].tasks.get(
191 |                     block=True, timeout=queue.long_poll_time_sec)
192 |                 task_entry['queue'] = queue.name
193 |                 task_running_time = task_entry['running_time']
194 |                 task_entry['start_consumption_time'] = time.time()
195 |                 time.sleep(float(task_running_time) / self.speedup)
196 |                 self.queue_info.queues[queue.name].finished_tasks.put(
197 |                     task_entry)
198 |             except six.moves.queue.Empty:
199 |                 # We want to keep track of long polling occurrences, which is
200 |                 # a waste of compute resource.
201 |                 self.queue_info.queues[queue.name].long_polling_count += 1
202 | 
203 | 
204 | class PublisherThread(threading.Thread):
205 |     """Publishing tasks."""
206 | 
207 |     def __init__(self, tasks, publish_interval, queue_info):
208 |         """
209 |         Args:
210 |             tasks: A list of tasks, each is a 2-tuple
211 |                 (queue_name, running_time).
212 |             publish_interval: Integer for task publishing interval in secs.
213 |         """
214 |         self.tasks = tasks
215 |         self.queue_info = queue_info
216 |         self.publish_interval = publish_interval
217 |         super(PublisherThread, self).__init__()
218 | 
219 |     def run(self):
220 |         """The main function for publisher."""
221 |         global all_done
222 |         log.info('Running publisher thread %d ...' % self.ident)
223 |         while len(self.tasks) > 0:
224 |             task = self.tasks.pop()
225 |             task_entry = {
226 |                 'publish_time': time.time(),
227 |                 'finished_consumption_time': 0.0,
228 |                 'running_time': task[1]}
229 |             self.queue_info.queues[task[0]].tasks.put(task_entry)
230 |             time.sleep(self.publish_interval)
231 |         log.info('Finished publishing tasks.')
232 | 
233 |         # Use global variale to signal all workers we finish task publishing.
234 |         all_done = True
235 | 
236 | 
237 | class PrintStatsThread(threading.Thread):
238 |     """Print queue stats."""
239 | 
240 |     def __init__(self, tasks, queue_info):
241 |         self.tasks = tasks
242 |         self.total_num_tasks = len(tasks)
243 |         self.queue_info = queue_info
244 |         super(PrintStatsThread, self).__init__()
245 | 
246 |     def run(self):
247 |         """Main function for printing stats and benchmark results."""
248 |         while not all_done:
249 |             time.sleep(PRINT_STATS_INTERVAL)
250 |             self._print_queue_stats()
251 |         self._print_benchmark_results()
252 | 
253 |     def _print_queue_stats(self):
254 |         """Print out queue stats."""
255 |         string = ''
256 |         for queue_name in six.iterkeys(self.queue_info.queues):
257 |             string += '%s=%d; ' % (
258 |                 queue_name, self.queue_info.queues[queue_name].tasks.qsize())
259 |         log.info(string)
260 | 
261 |     def _print_benchmark_results(self):
262 |         """Print final benchmark results.
263 | 
264 |         Metrics we care about:
265 |         1. Task-in-queue time for each queue.
266 |         2. Number of tasks processed within limited time.
267 |         """
268 |         log.info('=== Benchmark Results ===')
269 |         total_processed_tasks = 0
270 |         finished_count_breakdown = {}
271 |         for queue_name in six.iterkeys(self.queue_info.queues):
272 |             finished_tasks = self.queue_info.queues[queue_name].finished_tasks
273 |             all_queue_latencies = []
274 |             while not finished_tasks.empty():
275 |                 task_entry = finished_tasks.get()
276 |                 all_queue_latencies.append(
277 |                     task_entry[
278 |                         'start_consumption_time'] - task_entry['publish_time'])
279 |                 total_processed_tasks += 1
280 |                 if task_entry['queue'] not in finished_count_breakdown:
281 |                     finished_count_breakdown[task_entry['queue']] = 1
282 |                 else:
283 |                     finished_count_breakdown[task_entry['queue']] += 1
284 | 
285 |             if len(all_queue_latencies) > 0:
286 |                 total_latency = sum(all_queue_latencies)
287 |                 num_tasks = len(all_queue_latencies)
288 |                 max_latency = max(all_queue_latencies)
289 |                 log.info(('Queue %s: average latency %f secs, median '
290 |                           'latency %f secs, max latency %f secs, '
291 |                           'long_polling count %d') % (
292 |                     queue_name,
293 |                     total_latency / num_tasks,
294 |                     sorted(all_queue_latencies)[num_tasks / 2],
295 |                     max_latency,
296 |                     self.queue_info.queues[queue_name].long_polling_count))
297 | 
298 |         log.info('Total processed tasks: %d / %d' % (
299 |             total_processed_tasks, self.total_num_tasks))
300 |         for queue_name in six.iterkeys(finished_count_breakdown):
301 |             log.info('Queue %s: %d tasks finished.' % (
302 |                 queue_name, finished_count_breakdown[queue_name]))
303 | 
304 | 
305 | class Benchmark(object):
306 |     """Manages benchmarking and emulates tasks workers."""
307 | 
308 |     def __init__(self, workers, speedup, tasks_load_file,
309 |                  queue_selector_class, publish_interval):
310 |         log.info('To terminate this process:')
311 |         log.info('\tkill -9 %d' % os.getpid())
312 |         log.info(('workers=[%d], speedup=[%d], tasks_load_file=[%s], '
313 |                   'queue_selector_class=[%s]') % (
314 |             workers, speedup, tasks_load_file, queue_selector_class))
315 |         self.workers = workers
316 |         self.speedup = speedup
317 |         self.tasks_load_file = tasks_load_file
318 |         self.queue_selector_class = queue_selector_class
319 |         self.publish_interval = publish_interval
320 | 
321 |     def run(self):
322 |         """Main function for doing benchmark.
323 | 
324 |         Three types of threads are running concurrently:
325 |         1. A publisher thread: publishing tasks periodically.
326 |         2. Multiple Worker threads: consuming tasks.
327 |         3. A print-stats thread: print out queue stats and benchmark results.
328 |         """
329 |         tasks = self._load_tasks()
330 | 
331 |         queue_info = StaticQueueInfo()
332 |         # Start a publisher thread
333 |         publisher_thread = PublisherThread(tasks, self.publish_interval,
334 |                                            queue_info)
335 |         publisher_thread.setDaemon(True)
336 |         publisher_thread.start()
337 | 
338 |         # Start a print-stats thread
339 |         print_stats_thread = PrintStatsThread(tasks, queue_info)
340 |         print_stats_thread.setDaemon(True)
341 |         print_stats_thread.start()
342 | 
343 |         # Start worker threads
344 |         worker_threads = []
345 |         for i in range(self.workers):
346 |             worker_thread = WorkerThread(
347 |                 self.speedup, self.queue_selector_class, queue_info)
348 |             worker_threads.append(worker_thread)
349 |             worker_thread.setDaemon(True)
350 |         for worker_thread in worker_threads:
351 |             worker_thread.start()
352 | 
353 |         # Wait for all threads
354 |         for worker_thread in worker_threads:
355 |             worker_thread.join()
356 |         publisher_thread.join()
357 |         print_stats_thread.join()
358 | 
359 |     def _load_tasks(self):
360 |         """Load tasks from tasks_load_file."""
361 |         tasks = []
362 |         with open(self.tasks_load_file, 'r') as f:
363 |             reader = csv.reader(f, delimiter=',')
364 |             for row in reader:
365 |                 try:
366 |                     queue_name = row[0].strip()
367 |                     running_time = row[1].strip()
368 |                     if queue_name and running_time:
369 |                         tasks.append((queue_name, running_time))
370 |                 except IndexError:
371 |                     continue
372 |         return tasks
373 | 
374 | 
375 | def main():
376 |     """Main function for this script."""
377 |     args = parser.parse_args()
378 | 
379 |     benchmark = Benchmark(args.workers, args.speedup, args.tasks_load_file,
380 |                           args.queue_selector_class, args.publish_interval)
381 |     benchmark.run()
382 | 
383 | 
384 | if __name__ == '__main__':
385 |     main()
386 | 


--------------------------------------------------------------------------------
/kale/worker.py:
--------------------------------------------------------------------------------
  1 | """Module for running the worker process.
  2 | 
  3 | It's an infinite loop.
  4 | """
  5 | from __future__ import absolute_import
  6 | 
  7 | import logging
  8 | import signal
  9 | import sys
 10 | import time
 11 | 
 12 | from six.moves import range
 13 | 
 14 | from kale import consumer
 15 | from kale import publisher
 16 | from kale import queue_info
 17 | from kale import settings
 18 | from kale import timeout
 19 | from kale import utils
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | SIGNALS_TO_HANDLE = (
 24 |     signal.SIGABRT,
 25 |     # This will catch Ctrl-C interrupts.
 26 |     signal.SIGINT,
 27 |     signal.SIGQUIT,
 28 |     signal.SIGTERM,
 29 |     # Handle Ctrl-Z/suspend.
 30 |     signal.SIGTSTP)
 31 | 
 32 | # Logging constants.
 33 | LOG_TASK_RESULT_DEFERRED = 'deferred'
 34 | LOG_TASK_RESULT_ERROR = 'error'
 35 | LOG_TASK_RESULT_SUCCESS = 'success'
 36 | 
 37 | publisher_inst = None
 38 | 
 39 | 
 40 | class Worker(object):
 41 | 
 42 |     def __init__(self):
 43 |         """Initialize a worker instance."""
 44 | 
 45 |         self._consumer = consumer.Consumer()
 46 | 
 47 |         queue_class = utils.class_import_from_path(settings.QUEUE_CLASS)
 48 |         q_info = queue_info.QueueInfo(
 49 |             config_file=settings.QUEUE_CONFIG,
 50 |             sqs_talk=self._consumer,
 51 |             queue_cls=queue_class)
 52 | 
 53 |         queue_selector_class = utils.class_import_from_path(
 54 |             settings.QUEUE_SELECTOR)
 55 |         self._queue_selector = queue_selector_class(q_info)
 56 | 
 57 |         # The worker will publish permanently failed tasks to a
 58 |         # dead-letter-queue.
 59 |         self._publisher = self._get_publisher()
 60 | 
 61 |         # Track total messages processed.
 62 |         self._total_messages_processed = 0
 63 | 
 64 |         # Intialize queue variables used by each batch.
 65 |         self._incomplete_messages = []
 66 |         self._successful_messages = []
 67 |         self._failed_messages = []
 68 |         self._permanent_failures = []
 69 |         self._batch_stop_time = time.time()
 70 |         self._batch_queue = None
 71 |         # Monitors whether the worker has been exposed to tasks and may
 72 |         # have bloated in memory.
 73 |         self._dirty = False
 74 | 
 75 |         # Setup signal handling for cleanup.
 76 |         for sig in SIGNALS_TO_HANDLE:
 77 |             signal.signal(sig, self._cleanup_worker)
 78 | 
 79 |         # Allow the client of this library to do any setup before
 80 |         # starting the worker.
 81 |         settings.ON_WORKER_STARTUP()
 82 | 
 83 |     @staticmethod
 84 |     def _get_publisher():
 85 |         global publisher_inst
 86 |         if publisher_inst is None:
 87 |             publisher_inst = publisher.Publisher()
 88 |         return publisher_inst
 89 | 
 90 |     def _on_pre_run_worker(self):
 91 |         """Callback function right at the beginning of starting the worker. """
 92 |         logger.info('Starting run loop for task worker.')
 93 | 
 94 |     def _on_exceeding_memory_limit(self, ru_maxrss_mb):
 95 |         """Callback function when the process exceeds memory limit.
 96 | 
 97 |         :param int ru_maxrss_mb: maximum resident set size used (in MB).
 98 |         """
 99 |         logger.info('Memory usage of %d MB exceeds max of %s MB. Exiting.' % (
100 |             ru_maxrss_mb, settings.DIE_ON_RESIDENT_SET_SIZE_MB))
101 | 
102 |     def _on_sigtstp(self, num_completed, num_incomplete):
103 |         """Callback function when SIGTSTP is triggered.
104 | 
105 |         :param int num_completed: the number of tasks completed in this batch.
106 |         :param int num_incomplete: the number of tasks incomplete in the batch.
107 |         """
108 |         logger.info(
109 |             ('Taskworker process suspended. Completed tasks: %d;'
110 |              ' Incomplete: %d') % (
111 |                 num_completed, num_incomplete))
112 | 
113 |     def _on_shutdown(self, num_completed, num_incomplete):
114 |         """Callback function when we shutdown the worker process.
115 | 
116 |         :param int num_completed: the number of tasks completed in this batch.
117 |         :param int num_incomplete: the number of tasks incomplete in the batch.
118 |         """
119 |         logger.info(('Taskworker process shutting down. Completed tasks: %d;'
120 |                      ' Incomplete: %d') % (num_completed, num_incomplete))
121 | 
122 |     def _on_pre_batch_run(self, messages):
123 |         """Callback function before running a batch of tasks.
124 | 
125 |         :param list[KaleMessage] messages: a list of task messages to process.
126 |         """
127 |         logger.debug('Start processing %d tasks in a batch in queue %s ...' % (
128 |             len(messages), self._batch_queue.name))
129 | 
130 |     def _on_post_batch_run(self, num_completed, num_incomplete, messages):
131 |         """Callback function after running a batch of tasks.
132 | 
133 |         :param int num_completed: the number of tasks completed in this batch.
134 |         :param int num_incomplete: the number of tasks incomplete in the batch.
135 |         :param list[KaleMessage] messages: a list of messages in this batch.
136 |         """
137 |         logger.debug(('Finish processing message batch. Completed tasks: %d;'
138 |                      ' Incomplete: %d') % (
139 |             num_completed, num_incomplete))
140 | 
141 |     def _on_permanent_failure_batch(self):
142 |         """Callback when there are permanently failed tasks in this batch."""
143 |         logger.info(('Moving permamently %d failed tasks to the '
144 |                      'dead-letter-queue %s.') % (
145 |             len(self._permanent_failures), self._batch_queue.dlq_name))
146 | 
147 |     def _on_task_deferred(self, message, time_remaining_sec):
148 |         """Callback function when a task is deferred.
149 | 
150 |         :param list[KaleMessage] message: a list of task messages.
151 |         :param int time_remaining_sec: integer for seconds remained in the
152 |             budget of running this batch of tasks.
153 |         """
154 |         task = message.task_inst
155 |         logger.info(('Task deferred. Task id: %s; Queue: %s; '
156 |                      'Time remaining: %d sec') % (
157 |             task.task_id, self._batch_queue.name, time_remaining_sec))
158 | 
159 |     def _on_task_failed(self, message, time_remaining_sec, err,
160 |                         permanent_failure):
161 |         """Callback function when a task fails.
162 | 
163 |         :param KaleMessage message: an object of kale.message.KaleMessage.
164 |         :param int time_remaining_sec: integer for seconds remained in the
165 |             budget of running this batch of tasks.
166 |         :param Exception err: object of Exception.
167 |         :param bool permanent_failure: whether this task permanently fails.
168 |         """
169 |         task = message.task_inst
170 |         logger.debug(('Task failed. Task id: %s; Queue: %s; '
171 |                      'Time remaining: %d sec') % (
172 |             task.task_id, self._batch_queue.name, time_remaining_sec))
173 | 
174 |     def _on_task_succeeded(self, message, time_remaining_sec):
175 |         """Callback function when a task succeeds.
176 | 
177 |         :param KaleMessage message: an object of kale.message.KaleMessage.
178 |         :param int time_remaining_sec: integer for seconds remained in the
179 |             budget of running this batch of tasks.
180 |         """
181 |         task = message.task_inst
182 |         logger.debug(('Task succeeded. Task id: %s; Queue: %s; '
183 |                      'Time remaining: %d sec') % (
184 |             task.task_id, self._batch_queue.name, time_remaining_sec))
185 | 
186 |     def run(self):
187 |         """This method starts the task processing loop."""
188 | 
189 |         self._on_pre_run_worker()
190 | 
191 |         while self._check_process_resources():
192 |             self._batch_queue = self._queue_selector.get_queue()
193 |             for i in range(self._batch_queue.num_iterations):
194 |                 if not self._run_single_iteration():
195 |                     # If the iteration didn't process any tasks break out
196 |                     # of this loop and move on to another queue.
197 |                     break
198 | 
199 |     def _check_process_resources(self):
200 |         """Check if the process is still is abusing resources & should continue
201 | 
202 |         This will check the processes memory usage and gracefully exit if
203 |         it exceeds the maximum value in settings.
204 | 
205 |         :return: True if still operational, otherwise it will exit the process.
206 |         :rtype: bool
207 |         """
208 |         ru_maxrss_mb = utils.ru_maxrss_mb()
209 | 
210 |         if ru_maxrss_mb < settings.DIE_ON_RESIDENT_SET_SIZE_MB:
211 | 
212 |             if self._dirty:
213 |                 # We only log when the worker has been infected by  tasks.
214 |                 logger.debug('Worker process data.')
215 |             return True
216 | 
217 |         # Allow the client of this library to do any setup before
218 |         # shutting down the worker.
219 |         settings.ON_WORKER_SHUTDOWN()
220 | 
221 |         self._on_exceeding_memory_limit(ru_maxrss_mb)
222 | 
223 |         # Use non-zero exit code.
224 |         sys.exit(1)
225 | 
226 |     def _cleanup_worker(self, signum, frame):
227 |         """Handle cleanup when the process is sent a signal.
228 | 
229 |         This will handle releasing tasks in flight and deleting tasks that have
230 |         been completed.
231 |         """
232 |         logger.info('Process sent signal %d. Cleaning up tasks...' % signum)
233 | 
234 |         num_completed, num_incomplete = self._release_batch()
235 | 
236 |         # When the process is suspended we release tasks and then return to the
237 |         # main loop.
238 |         if signum == signal.SIGTSTP:
239 |             self._on_sigtstp(num_completed, num_incomplete)
240 |             return
241 |         else:
242 |             # Allow the client of this library to do any setup before
243 |             # shutting down the worker.
244 |             settings.ON_WORKER_SHUTDOWN()
245 |             self._on_shutdown(num_completed, num_incomplete)
246 |             sys.exit(0)
247 | 
248 |     def _run_single_iteration(self):
249 |         """Run a single iteration of the task processing loop.
250 | 
251 |         :return: True if we were able to process a batch, False is there were
252 |             no messages.
253 |         """
254 |         message_batch = self._consumer.fetch_batch(
255 |             self._batch_queue.name,
256 |             self._batch_queue.batch_size,
257 |             self._batch_queue.visibility_timeout_sec,
258 |             long_poll_time_sec=self._batch_queue.long_poll_time_sec)
259 | 
260 |         self._dirty = bool(message_batch)
261 |         if not message_batch:
262 |             # No any messages in this queue. Let's re-select queue again.
263 |             return False
264 | 
265 |         self._on_pre_batch_run(message_batch)
266 | 
267 |         self._batch_stop_time = time.time() + \
268 |             self._batch_queue.visibility_timeout_sec
269 | 
270 |         self._run_batch(message_batch)
271 |         num_completed, num_incomplete = self._release_batch()
272 | 
273 |         self._on_post_batch_run(num_completed, num_incomplete, message_batch)
274 | 
275 |         return True
276 | 
277 |     def _release_batch(self):
278 |         """Release the most recent batch back to SQS.
279 |         This will delete tasks that succeeded and reset the
280 |         visibility timeout for incomplete tasks.
281 | 
282 |         :return: A two-tuple of the count of tasks that were completed and
283 |             the count of tasks that were incomplete.
284 |         :rtype: tuple
285 |         """
286 |         # Delete from queues (failed messages are re-published as new tasks)
287 |         messages_to_be_deleted = self._successful_messages + \
288 |             self._failed_messages
289 |         # Set timeout to 0 (if there is time left)
290 |         # As an enhancement, we reset the timeout on tasks that didn't get
291 |         # attempted if the remaining timeout is greater than some threshold.
292 |         # and example of this being helpful is if a 5 minute task was given the
293 |         # opportunity to run with 4 minutes left (and declines). This will
294 |         # release the task 4 minutes before it previously would have.
295 | 
296 |         if (self._batch_stop_time - time.time()) > \
297 |                 settings.RESET_TIMEOUT_THRESHOLD:
298 |             messages_to_be_released = self._incomplete_messages
299 |         else:
300 |             messages_to_be_released = []
301 | 
302 |         if messages_to_be_deleted:
303 |             # Note: This includes failed tasks.
304 |             self._consumer.delete_messages(messages_to_be_deleted,
305 |                                            self._batch_queue.name)
306 | 
307 |         if messages_to_be_released:
308 |             # This is only tasks that we didn't get the chance to attempt.
309 |             self._consumer.release_messages(messages_to_be_released,
310 |                                             self._batch_queue.name)
311 | 
312 |         # Send permanently failed tasks to the dead-letter-queue.
313 |         if self._permanent_failures and settings.ENABLE_DEAD_LETTER_QUEUE:
314 |             self._publisher.publish_messages_to_dead_letter_queue(
315 |                 self._batch_queue.dlq_name, self._permanent_failures)
316 |             self._on_permanent_failure_batch()
317 | 
318 |         # All messages start as incomplete.
319 |         self._incomplete_messages = []
320 |         self._successful_messages = []
321 |         self._failed_messages = []
322 |         self._permanent_failures = []
323 | 
324 |         return len(messages_to_be_deleted), len(messages_to_be_released)
325 | 
326 |     def _run_batch(self, message_batch):
327 |         """Consume as many tasks as possible in visibility timeout.
328 | 
329 |         :param list[KaleMessage] message_batch: list of consumable messages.
330 |         """
331 | 
332 |         # All messages start as incomplete.
333 |         self._incomplete_messages = list(message_batch)
334 |         self._successful_messages = []
335 |         self._failed_messages = []
336 | 
337 |         # These messages will be sent to the dead-letter-queue.
338 |         self._permanent_failures = []
339 | 
340 |         # Set visibility timeout start time.
341 |         for message in message_batch:
342 |             time_remaining_sec = self._batch_stop_time - time.time()
343 | 
344 |             if message.task_inst.time_limit >= time_remaining_sec:
345 |                 # Greedily continue to process tasks, this task
346 |                 # is already in the incomplete_messages list
347 |                 self._on_task_deferred(message, time_remaining_sec)
348 |                 continue
349 | 
350 |             # Add cleanup method when tasks are timed out?
351 | 
352 |             self._run_single_message(message, time_remaining_sec)
353 | 
354 |     def _run_single_message(self, message, time_remaining_sec):
355 |         """Call run_task on a single message and handle failures.
356 | 
357 |         This method handles time limiting, and calls subsequent commands if the task succeeds/fails
358 | 
359 |         :param message: KaleMessage
360 |         :param time_remaining_sec: int seconds left for the batch of messages. Used mainly for logs
361 |         """
362 |         task_inst = message.task_inst
363 |         try:
364 |             with timeout.time_limit(task_inst.time_limit):
365 |                 if not self.should_run_task(message):
366 |                     task_inst.__class__.republish(message, message.task_failure_num)
367 |                     self._successful_messages.append(message)
368 |                     return
369 |                 self.run_task(message)
370 |         except Exception as err:
371 |             # Re-publish failed tasks.
372 |             # As an optimization we could run all of the failures from a
373 |             # batch together.
374 |             permanent_failure = not task_inst.__class__.handle_failure(
375 |                 message, err)
376 |             if permanent_failure and settings.USE_DEAD_LETTER_QUEUE:
377 |                 self._permanent_failures.append(message)
378 | 
379 |             self._failed_messages.append(message)
380 | 
381 |             self._on_task_failed(message, time_remaining_sec, err,
382 |                                  permanent_failure)
383 |         else:
384 |             self._successful_messages.append(message)
385 |             self._on_task_succeeded(message, time_remaining_sec)
386 |         finally:
387 |             self.remove_message_or_exit(message)
388 |         # Increment total messages counter.
389 |         self._total_messages_processed += 1
390 | 
391 |     def remove_message_or_exit(self, message):
392 |         try:
393 |             self._incomplete_messages.remove(message)
394 |         except ValueError:
395 |             # Cleanup happened due to the signal handler - make sure we exit immediately.
396 |             sys.exit(0)
397 | 
398 |     def should_run_task(self, message):
399 |         return message.task_inst.should_run_task(*message.task_args, **message.task_kwargs)
400 | 
401 |     def run_task(self, message):
402 |         """Run the task contained in the message.
403 |         :param message: message.KaleMessage containing the task and arguments to run.
404 |         """
405 |         message.task_inst.run(*message.task_args, **message.task_kwargs)
406 | 


--------------------------------------------------------------------------------