├── katsdpdatawriter
    ├── test
    │   ├── __init__.py
    │   ├── test_queue_space.py
    │   ├── test_writer.py
    │   ├── test_vis_writer.py
    │   ├── test_flag_writer.py
    │   ├── test_rechunk.py
    │   └── test_spead_write.py
    ├── __init__.py
    ├── queue_space.py
    ├── dashboard.py
    ├── flag_writer.py
    ├── vis_writer.py
    ├── rechunk.py
    └── spead_write.py
├── .flake8
├── pyproject.toml
├── mypy.ini
├── .gitignore
├── test-requirements.txt
├── .pre-commit-config.yaml
├── Jenkinsfile
├── requirements.txt
├── setup.py
├── Dockerfile
├── LICENSE
└── scripts
    ├── vis_writer.py
    └── flag_writer.py


/katsdpdatawriter/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 100
3 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel", "katversion"]
3 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True
3 | files = katsdpdatawriter, scripts
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .mypy_cache
2 | *.py[cod]
3 | __pycache__
4 | *.egg-info
5 | pip-wheel-metadata
6 | 


--------------------------------------------------------------------------------
/test-requirements.txt:
--------------------------------------------------------------------------------
1 | -c https://raw.githubusercontent.com/ska-sa/katsdpdockerbase/master/docker-base-build/base-requirements.txt
2 | 
3 | asynctest
4 | coverage
5 | nose
6 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/PyCQA/flake8
 3 |   rev: 3.9.2
 4 |   hooks:
 5 |     - id: flake8
 6 | - repo: https://github.com/pre-commit/mirrors-mypy
 7 |   rev: v0.780
 8 |   hooks:
 9 |     - id: mypy
10 |       args: []
11 | 


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | #!groovy
 2 | 
 3 | @Library('katsdpjenkins@master') _
 4 | 
 5 | katsdp.killOldJobs()
 6 | katsdp.setDependencies(['ska-sa/katsdpdockerbase/master',
 7 |                         'ska-sa/katdal/master',
 8 |                         'ska-sa/katsdpservices/master',
 9 |                         'ska-sa/katsdptelstate/master'])
10 | katsdp.standardBuild(push_external: true)
11 | katsdp.mail('sdpdev+katsdpdatawriter@ska.ac.za')
12 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/__init__.py:
--------------------------------------------------------------------------------
 1 | # BEGIN VERSION CHECK
 2 | # Get package version when locally imported from repo or via -e develop install
 3 | try:
 4 |     import katversion as _katversion
 5 | except ImportError:  # pragma: no cover
 6 |     import time as _time
 7 |     __version__ = "0.0+unknown.{}".format(_time.strftime('%Y%m%d%H%M'))
 8 | else:  # pragma: no cover
 9 |     __version__ = _katversion.get_version(__path__[0])     # type: ignore
10 | # END VERSION CHECK
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | -c https://raw.githubusercontent.com/ska-sa/katsdpdockerbase/master/docker-base-build/base-requirements.txt
 2 | 
 3 | aiokatcp
 4 | attrs
 5 | bokeh
 6 | hiredis              # Speeds up katsdptelstate
 7 | numpy
 8 | spead2
 9 | 
10 | katdal[s3credentials] @ git+https://github.com/ska-sa/katdal
11 | katpoint @ git+https://github.com/ska-sa/katpoint
12 | katsdpservices[argparse,aiomonitor] @ git+https://github.com/ska-sa/katsdpservices
13 | katsdptelstate @ git+https://github.com/ska-sa/katsdptelstate
14 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from setuptools import setup, find_packages
 3 | 
 4 | 
 5 | tests_require = ['asynctest', 'nose']
 6 | 
 7 | setup(
 8 |     name="katsdpdatawriter",
 9 |     description="MeerKAT data writer",
10 |     author="MeerKAT SDP team",
11 |     author_email="sdpdev+katsdpdatawriter@ska.ac.za",
12 |     packages=find_packages(),
13 |     scripts=[
14 |         "scripts/flag_writer.py",
15 |         "scripts/vis_writer.py"
16 |     ],
17 |     setup_requires=["katversion"],
18 |     install_requires=[
19 |         "aiokatcp>=0.7.0",     # Needed for auto_strategy
20 |         "spead2>=3.0.0",
21 |         "katsdptelstate",
22 |         "katsdpservices[argparse,aiomonitor]",
23 |         "katdal[s3credentials]",
24 |         "attrs",
25 |         "numpy",
26 |         "bokeh"
27 |     ],
28 |     extras_require={"test": tests_require},
29 |     tests_require=tests_require,
30 |     use_katversion=True)
31 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG KATSDPDOCKERBASE_REGISTRY=harbor.sdp.kat.ac.za/dpp
 2 | 
 3 | FROM $KATSDPDOCKERBASE_REGISTRY/docker-base-build as build
 4 | 
 5 | # Switch to Python 3 environment
 6 | ENV PATH="$PATH_PYTHON3" VIRTUAL_ENV="$VIRTUAL_ENV_PYTHON3"
 7 | 
 8 | # Install dependencies
 9 | COPY --chown=kat:kat requirements.txt /tmp/install/requirements.txt
10 | RUN install_pinned.py -r /tmp/install/requirements.txt
11 | 
12 | # Install the current package
13 | COPY --chown=kat:kat . /tmp/install/katsdpdatawriter
14 | WORKDIR /tmp/install/katsdpdatawriter
15 | RUN python ./setup.py clean
16 | RUN pip install --no-deps .
17 | RUN pip check
18 | 
19 | #######################################################################
20 | 
21 | FROM $KATSDPDOCKERBASE_REGISTRY/docker-base-runtime
22 | LABEL maintainer="sdpdev+katsdpdatawriter@ska.ac.za"
23 | 
24 | COPY --from=build --chown=kat:kat /home/kat/ve3 /home/kat/ve3
25 | ENV PATH="$PATH_PYTHON3" VIRTUAL_ENV="$VIRTUAL_ENV_PYTHON3"
26 | 
27 | # katcp for vis_writer
28 | EXPOSE 2046
29 | # katcp for flag_writer
30 | EXPOSE 2052
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014-2019, National Research Foundation (SARAO)
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice,
 8 |    this list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 |    this list of conditions and the following disclaimer in the documentation
12 |    and/or other materials provided with the distribution.
13 | 
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 |    may be used to endorse or promote products derived from this software
16 |    without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/test/test_queue_space.py:
--------------------------------------------------------------------------------
 1 | import asynctest
 2 | from nose.tools import assert_true, assert_false
 3 | 
 4 | from ..queue_space import QueueSpace
 5 | 
 6 | 
 7 | class TestQueueSpace(asynctest.TestCase):
 8 |     def setUp(self):
 9 |         self.qs = QueueSpace(value=100, loop=self.loop)
10 | 
11 |     async def test_immediate(self):
12 |         result = await self.qs.acquire(100)
13 |         assert_true(result)
14 | 
15 |     async def test_block(self):
16 |         task = self.loop.create_task(self.qs.acquire(200))
17 |         await asynctest.exhaust_callbacks(self.loop)
18 |         assert_false(task.done())
19 |         self.qs.release(120)
20 |         await asynctest.exhaust_callbacks(self.loop)
21 |         assert_true(task.done())
22 |         assert_true(await task)
23 | 
24 |     async def test_cancel(self):
25 |         task1 = self.loop.create_task(self.qs.acquire(200))
26 |         task2 = self.loop.create_task(self.qs.acquire(100))
27 |         await asynctest.exhaust_callbacks(self.loop)
28 |         assert_false(task1.done())
29 |         task1.cancel()
30 |         await asynctest.exhaust_callbacks(self.loop)
31 |         assert_true(task2.done())
32 |         assert_true(await task2)
33 | 
34 |     async def test_release_multiple(self):
35 |         task1 = self.loop.create_task(self.qs.acquire(200))
36 |         task2 = self.loop.create_task(self.qs.acquire(100))
37 |         await asynctest.exhaust_callbacks(self.loop)
38 |         assert_false(task1.done())
39 |         self.qs.release(200)
40 |         await asynctest.exhaust_callbacks(self.loop)
41 |         assert_true(task1.done())
42 |         assert_true(task2.done())
43 |         assert_true(await task1)
44 |         assert_true(await task2)
45 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/queue_space.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from collections import deque
 3 | 
 4 | 
 5 | class QueueSpace:
 6 |     """Manage space in a queue.
 7 | 
 8 |     This is logically similar to a semaphore, but allows the user to specify
 9 |     how much to acquire and release, rather than 1. It is first-come,
10 |     first-served, so a large acquire will block the queue until there is
11 |     space, even if there are later smaller acquires that could have been
12 |     satisfied.
13 |     """
14 |     def __init__(self, value: int = 0, *, loop: asyncio.AbstractEventLoop = None) -> None:
15 |         self._loop = loop if loop is not None else asyncio.get_event_loop()
16 |         self._value = value
17 |         self._waiters = deque()      # type: deque
18 | 
19 |     @property
20 |     def value(self) -> int:
21 |         """Currently available space."""
22 |         return self._value
23 | 
24 |     async def acquire(self, value: int) -> bool:
25 |         if value <= self._value:
26 |             self._value -= value
27 |             return True
28 |         future = self._loop.create_future()
29 |         future.add_done_callback(self._cancel_handler)
30 |         self._waiters.append((future, value))
31 |         await future
32 |         return True
33 | 
34 |     def _wakeup(self):
35 |         while self._waiters:
36 |             if self._waiters[0][0].done():
37 |                 # Can happen if it was cancelled
38 |                 self._waiters.popleft()
39 |             elif self._waiters[0][1] <= self._value:
40 |                 future, req = self._waiters.popleft()
41 |                 self._value -= req
42 |                 future.set_result(None)
43 |             else:
44 |                 break
45 | 
46 |     def _cancel_handler(self, future):
47 |         if future.cancelled():
48 |             self._wakeup()  # Give next requester a chance
49 | 
50 |     def release(self, value: int) -> None:
51 |         self._value += value
52 |         self._wakeup()
53 | 
54 |     def locked(self, value: int) -> bool:
55 |         return value > self._value
56 | 


--------------------------------------------------------------------------------
/scripts/vis_writer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import asyncio
 4 | import signal
 5 | import logging
 6 | 
 7 | import katsdpservices
 8 | import katsdptelstate
 9 | 
10 | from katsdpdatawriter.vis_writer import VisibilityWriterServer
11 | from katsdpdatawriter.spead_write import add_common_args, chunk_store_from_args, ChunkParams
12 | from katsdpdatawriter.dashboard import make_dashboard, start_dashboard
13 | 
14 | 
15 | def on_shutdown(loop: asyncio.AbstractEventLoop, server: VisibilityWriterServer) -> None:
16 |     # in case the exit code below borks, we allow shutdown via traditional means
17 |     loop.remove_signal_handler(signal.SIGINT)
18 |     loop.remove_signal_handler(signal.SIGTERM)
19 |     server.halt()
20 | 
21 | 
22 | async def run(loop: asyncio.AbstractEventLoop, server: VisibilityWriterServer) -> None:
23 |     await server.start()
24 |     for sig in [signal.SIGINT, signal.SIGTERM]:
25 |         loop.add_signal_handler(sig, lambda: on_shutdown(loop, server))
26 |     logger.info("Started visibility writer server.")
27 |     await server.join()
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     katsdpservices.setup_logging()
32 |     logger = logging.getLogger('vis_writer')
33 |     katsdpservices.setup_restart()
34 | 
35 |     parser = katsdpservices.ArgumentParser()
36 |     add_common_args(parser)
37 |     parser.add_argument('--l0-spead', default=':7200', metavar='ENDPOINTS',
38 |                         type=katsdptelstate.endpoint.endpoint_list_parser(7200),
39 |                         help='Source port/multicast groups for L0 SPEAD stream. '
40 |                              '[default=%(default)s]')
41 |     parser.add_argument('--l0-interface', metavar='INTERFACE',
42 |                         help='Network interface to subscribe to for L0 stream. '
43 |                              '[default=auto]')
44 |     parser.add_argument('--l0-name', default='sdp_l0', metavar='NAME',
45 |                         help='Name of L0 stream from ingest [default=%(default)s]')
46 |     parser.add_argument('--l0-ibv', action='store_true',
47 |                         help='Use ibverbs acceleration to receive L0 stream [default=no]')
48 |     parser.set_defaults(telstate='localhost', port=2046)
49 |     args = parser.parse_args()
50 | 
51 |     if args.l0_ibv and args.l0_interface is None:
52 |         parser.error('--l0-ibv requires --l0-interface')
53 |     if args.rename_src and args.new_name is None:
54 |         parser.error('--rename-src requires --new-name')
55 | 
56 |     # Connect to object store
57 |     chunk_store = chunk_store_from_args(parser, args)
58 |     loop = asyncio.get_event_loop()
59 |     server = VisibilityWriterServer(args.host, args.port, loop, args.l0_spead,
60 |                                     args.l0_interface, args.l0_ibv,
61 |                                     chunk_store, ChunkParams.from_args(args),
62 |                                     args.telstate,
63 |                                     args.l0_name,
64 |                                     args.new_name if args.new_name is not None else args.l0_name,
65 |                                     args.rename_src,
66 |                                     args.s3_endpoint_url,
67 |                                     args.workers, args.buffer_dumps)
68 |     if args.dashboard_port is not None:
69 |         dashboard = make_dashboard(server.sensors)
70 |         start_dashboard(dashboard, args)
71 | 
72 |     with katsdpservices.start_aiomonitor(loop, args, locals()):
73 |         loop.run_until_complete(run(loop, server))
74 |     loop.close()
75 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/test/test_writer.py:
--------------------------------------------------------------------------------
 1 | """Base functionality for :mod:`test_vis_writer` and :mod:`test_flag_writer`"""
 2 | 
 3 | from unittest import mock
 4 | import asyncio
 5 | 
 6 | import asynctest
 7 | 
 8 | import katsdptelstate
 9 | from katsdptelstate.endpoint import Endpoint
10 | import aiokatcp
11 | import spead2
12 | import spead2.recv.asyncio
13 | import spead2.send.asyncio
14 | from nose.tools import assert_equal, assert_in
15 | 
16 | 
17 | class BaseTestWriterServer(asynctest.TestCase):
18 |     @classmethod
19 |     def setup_telstate(cls, namespace: str) -> katsdptelstate.TelescopeState:
20 |         telstate = katsdptelstate.TelescopeState().view(namespace)
21 |         n_ants = 3
22 |         telstate['n_chans'] = 4096
23 |         telstate['n_chans_per_substream'] = 1024
24 |         telstate['n_bls'] = n_ants * (n_ants + 1) * 2
25 |         return telstate
26 | 
27 |     def setup_sleep(self) -> None:
28 |         """Patch loop.call_later so that delayed callbacks run immediately.
29 | 
30 |         This speeds up the tests where the code under test has a 5s timeout.
31 |         """
32 |         def call_later(delay, callback, *args):
33 |             return self.loop.call_soon(callback, *args)
34 | 
35 |         patcher = mock.patch.object(self.loop, 'call_later', call_later)
36 |         patcher.start()
37 |         self.addCleanup(patcher.stop)
38 | 
39 |     def setup_spead(self) -> None:
40 |         def add_udp_reader(stream, host: str, port: int, *args, **kwargs) -> None:
41 |             queue = self.inproc_queues[Endpoint(host, port)]
42 |             stream.add_inproc_reader(queue)
43 | 
44 |         self.endpoints = [Endpoint('239.102.254.{}'.format(i), 7148) for i in range(4)]
45 |         self.inproc_queues = {endpoint: spead2.InprocQueue() for endpoint in self.endpoints}
46 |         tx_pool = spead2.ThreadPool()
47 |         self.tx = [spead2.send.asyncio.InprocStream(tx_pool, [self.inproc_queues[endpoint]])
48 |                    for endpoint in self.endpoints]
49 |         patcher = mock.patch('spead2.recv.asyncio.Stream.add_udp_reader', add_udp_reader)
50 |         patcher.start()
51 |         self.addCleanup(patcher.stop)
52 | 
53 |         async def get(stream):
54 |             heap = await orig_get(stream)
55 |             self.received_heaps.release()
56 |             return heap
57 | 
58 |         self.received_heaps = asyncio.Semaphore(value=0, loop=self.loop)
59 |         orig_get = spead2.recv.asyncio.Stream.get
60 |         patcher = mock.patch('spead2.recv.asyncio.Stream.get', get)   # type: ignore
61 |         patcher.start()
62 |         self.addCleanup(patcher.stop)
63 | 
64 |     async def setup_client(self, server: aiokatcp.DeviceServer) -> aiokatcp.Client:
65 |         assert server.server is not None, "Server has not been started"
66 |         # mypy doesn't know about asyncio.base_events.Server, which has the 'sockets' member
67 |         port = server.server.sockets[0].getsockname()[1]    # type: ignore
68 |         client = await aiokatcp.Client.connect('localhost', port)
69 |         self.addCleanup(client.wait_closed)
70 |         self.addCleanup(client.close)
71 |         return client
72 | 
73 |     def assert_sensor_equals(self, name, value, status=frozenset([aiokatcp.Sensor.Status.NOMINAL])):
74 |         assert_equal(self.server.sensors[name].value, value)
75 |         assert_in(self.server.sensors[name].status, status)
76 | 
77 |     async def send_heap(self, tx, heap):
78 |         """Send a heap and wait for it to be received.
79 | 
80 |         .. note:: This only works if all heaps are sent through this interface.
81 |         """
82 |         assert self.received_heaps.locked()
83 |         await tx.async_send_heap(heap)
84 |         # The above just waits until it's been transmitted into the inproc
85 |         # queue, but we want to wait until it's come out the other end.
86 |         await self.received_heaps.acquire()
87 | 


--------------------------------------------------------------------------------
/scripts/flag_writer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Capture L1 flags from the SPEAD stream(s) produced by cal.
 3 | 
 4 | We adopt a naive strategy and store the flags for each heap in a single
 5 | object. These objects will be later picked up by the trawler process
 6 | and inserted into the appropriate bucket in S3 from where they will be
 7 | picked up by katdal.
 8 | 
 9 | """
10 | 
11 | import logging
12 | import signal
13 | import asyncio
14 | 
15 | import katsdptelstate
16 | import katsdpservices
17 | 
18 | from katsdpdatawriter.flag_writer import FlagWriterServer
19 | from katsdpdatawriter.spead_write import add_common_args, chunk_store_from_args, ChunkParams
20 | from katsdpdatawriter.dashboard import make_dashboard, start_dashboard
21 | 
22 | 
23 | def on_shutdown(loop: asyncio.AbstractEventLoop, server: FlagWriterServer) -> None:
24 |     # in case the exit code below borks, we allow shutdown via traditional means
25 |     loop.remove_signal_handler(signal.SIGINT)
26 |     loop.remove_signal_handler(signal.SIGTERM)
27 |     server.halt()
28 | 
29 | 
30 | async def run(loop: asyncio.AbstractEventLoop, server: FlagWriterServer) -> None:
31 |     await server.start()
32 |     for sig in [signal.SIGINT, signal.SIGTERM]:
33 |         loop.add_signal_handler(sig, lambda: on_shutdown(loop, server))
34 |     logger.info("Started flag writer server.")
35 |     await server.join()
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     katsdpservices.setup_logging()
40 |     logger = logging.getLogger("flag_writer")
41 |     katsdpservices.setup_restart()
42 | 
43 |     parser = katsdpservices.ArgumentParser()
44 |     add_common_args(parser)
45 |     parser.add_argument('--flags-spead', default=':7202', metavar='ENDPOINTS',
46 |                         type=katsdptelstate.endpoint.endpoint_list_parser(7202),
47 |                         help='Source port/multicast groups for flags SPEAD streams. '
48 |                              '[default=%(default)s]')
49 |     parser.add_argument('--flags-interface', metavar='INTERFACE',
50 |                         help='Network interface to subscribe to for flag streams. '
51 |                              '[default=auto]')
52 |     parser.add_argument('--flags-name', type=str, default='sdp_l1_flags',
53 |                         help='name for the flags stream. [default=%(default)s]', metavar='NAME')
54 |     parser.add_argument('--flags-ibv', action='store_true',
55 |                         help='Use ibverbs acceleration to receive flags')
56 |     parser.set_defaults(telstate='localhost', port=2052)
57 | 
58 |     args = parser.parse_args()
59 |     if args.telstate is None:
60 |         parser.error('--telstate is required')
61 |     if args.flags_ibv and args.flags_interface is None:
62 |         parser.error("--flags-ibv requires --flags-interface")
63 |     if args.rename_src and args.new_name is None:
64 |         parser.error('--rename-src requires --new-name')
65 | 
66 |     chunk_store = chunk_store_from_args(parser, args)
67 |     loop = asyncio.get_event_loop()
68 |     # The type: ignore below is because mypy doesn't know that parser.error never returns
69 |     server = FlagWriterServer(args.host, args.port, loop, args.flags_spead,
70 |                               args.flags_interface, args.flags_ibv,
71 |                               chunk_store, ChunkParams.from_args(args),
72 |                               args.telstate,     # type: ignore
73 |                               args.flags_name,
74 |                               args.new_name if args.new_name is not None else args.flags_name,
75 |                               args.rename_src, args.s3_endpoint_url,
76 |                               args.workers, args.buffer_dumps)
77 |     if args.dashboard_port is not None:
78 |         dashboard = make_dashboard(server.sensors)
79 |         start_dashboard(dashboard, args)
80 | 
81 |     with katsdpservices.start_aiomonitor(loop, args, locals()):
82 |         loop.run_until_complete(run(loop, server))
83 |     loop.close()
84 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/test/test_vis_writer.py:
--------------------------------------------------------------------------------
  1 | """Tests for :mod:`katsdpdatawriter.vis_writer`."""
  2 | 
  3 | import tempfile
  4 | import shutil
  5 | from unittest import mock
  6 | 
  7 | import numpy as np
  8 | import katdal.chunkstore_npy
  9 | import spead2.send.asyncio
 10 | from aiokatcp import FailReply, Sensor
 11 | from nose.tools import assert_equal, assert_raises_regex, assert_true, assert_in
 12 | 
 13 | from ..vis_writer import VisibilityWriterServer, Status
 14 | from ..spead_write import DeviceStatus, ChunkParams
 15 | from .test_writer import BaseTestWriterServer
 16 | 
 17 | 
 18 | class TestVisWriterServer(BaseTestWriterServer):
 19 |     async def setup_server(self, **arg_overrides) -> VisibilityWriterServer:
 20 |         args = dict(
 21 |             host='127.0.0.1', port=0, loop=self.loop, endpoints=self.endpoints,
 22 |             interface='lo', ibv=False, chunk_store=self.chunk_store,
 23 |             chunk_params=ChunkParams(10000),
 24 |             telstate=self.telstate.root(),
 25 |             input_name='sdp_l0', output_name='sdp_l0', rename_src={},
 26 |             s3_endpoint_url=None, max_workers=4, buffer_dumps=2)
 27 |         args.update(arg_overrides)
 28 |         server = VisibilityWriterServer(**args)
 29 |         await server.start()
 30 |         self.addCleanup(server.stop)
 31 |         return server
 32 | 
 33 |     def setup_ig(self) -> spead2.send.ItemGroup:
 34 |         n_chans_per_substream = self.telstate['n_chans_per_substream']
 35 |         n_bls = self.telstate['n_bls']
 36 |         shape = (n_chans_per_substream, n_bls)
 37 |         vis = np.zeros(shape, np.complex64)
 38 |         flags = np.random.randint(0, 256, shape, np.uint8)
 39 |         weights = np.random.randint(0, 256, shape, np.uint8)
 40 |         weights_channel = np.random.random(n_chans_per_substream).astype(np.float32)
 41 |         # Adapted from katsdpingest/sender.py
 42 |         ig = spead2.send.ItemGroup()
 43 |         ig.add_item(id=None, name='correlator_data',
 44 |                     description="Visibilities",
 45 |                     shape=(n_chans_per_substream, n_bls), dtype=np.complex64,
 46 |                     value=vis)
 47 |         ig.add_item(id=None, name='flags',
 48 |                     description="Flags for visibilities",
 49 |                     shape=(n_chans_per_substream, n_bls), dtype=np.uint8,
 50 |                     value=flags)
 51 |         ig.add_item(id=None, name='weights',
 52 |                     description="Detailed weights, to be scaled by weights_channel",
 53 |                     shape=(n_chans_per_substream, n_bls), dtype=np.uint8,
 54 |                     value=weights)
 55 |         ig.add_item(id=None, name='weights_channel',
 56 |                     description="Coarse (per-channel) weights",
 57 |                     shape=(n_chans_per_substream,), dtype=np.float32,
 58 |                     value=weights_channel)
 59 |         ig.add_item(id=None, name='timestamp',
 60 |                     description="Seconds since CBF sync time",
 61 |                     shape=(), dtype=None, format=[('f', 64)],
 62 |                     value=100.0)
 63 |         ig.add_item(id=None, name='dump_index',
 64 |                     description='Index in time',
 65 |                     shape=(), dtype=None, format=[('u', 64)],
 66 |                     value=1)
 67 |         ig.add_item(id=0x4103, name='frequency',
 68 |                     description="Channel index of first channel in the heap",
 69 |                     shape=(), dtype=np.uint32,
 70 |                     value=0)
 71 |         return ig
 72 | 
 73 |     async def setUp(self) -> None:
 74 |         npy_path = tempfile.mkdtemp()
 75 |         self.addCleanup(shutil.rmtree, npy_path)
 76 |         self.chunk_store = katdal.chunkstore_npy.NpyFileChunkStore(npy_path)
 77 |         self.telstate = self.setup_telstate('sdp_l0')
 78 |         self.telstate['src_streams'] = ['i0_baseline_correlation_products']
 79 |         self.setup_sleep()
 80 |         self.setup_spead()
 81 |         self.server = await self.setup_server()
 82 |         self.client = await self.setup_client(self.server)
 83 |         self.ig = self.setup_ig()
 84 | 
 85 |     async def test_capture(self, output_name: str = 'sdp_l0') -> None:
 86 |         cbid = '1234567890'
 87 |         self.assert_sensor_equals('status', Status.IDLE)
 88 |         await self.client.request('capture-init', cbid)
 89 |         self.assert_sensor_equals('status', Status.WAIT_DATA)
 90 |         for tx in self.tx:
 91 |             await self.send_heap(tx, self.ig.get_start())
 92 |         await self.send_heap(self.tx[0], self.ig.get_heap())
 93 |         self.assert_sensor_equals('status', Status.CAPTURING)
 94 |         self.assert_sensor_equals('input-heaps-total', 1)
 95 |         for tx in self.tx:
 96 |             await self.send_heap(tx, self.ig.get_end())
 97 |         # The writes to chunkstore happen in other threads, so the state here
 98 |         # depends on timing.
 99 |         assert_in(self.server.sensors['status'].value, {Status.FINALISING, Status.COMPLETE})
100 |         await self.client.request('capture-done')
101 |         self.assert_sensor_equals('status', Status.IDLE)
102 |         capture_stream = '{}_{}'.format(cbid, output_name)
103 |         prefix = capture_stream.replace('_', '-')
104 |         assert_true(self.chunk_store.is_complete(prefix))
105 | 
106 |     async def test_new_name(self) -> None:
107 |         # Replace the client+server to use new arguments
108 |         output_name = 'sdp_l0_new'
109 |         s3_endpoint_url = 'http://sdp_l0_new.invalid/'
110 |         await self.server.stop()
111 |         self.server = await self.setup_server(output_name=output_name,
112 |                                               s3_endpoint_url=s3_endpoint_url)
113 |         self.client = await self.setup_client(self.server)
114 |         # Run the test
115 |         await self.test_capture(output_name)
116 |         telstate_output = self.telstate.root().view(output_name)
117 |         assert_equal(telstate_output['s3_endpoint_url'], s3_endpoint_url)
118 |         assert_equal(telstate_output['inherit'], 'sdp_l0')
119 | 
120 |     async def test_failed_write(self) -> None:
121 |         cbid = '1234567890'
122 |         with mock.patch.object(katdal.chunkstore_npy.NpyFileChunkStore, 'put_chunk',
123 |                                side_effect=katdal.chunkstore.StoreUnavailable):
124 |             await self.client.request('capture-init', cbid)
125 |             for tx in self.tx:
126 |                 await self.send_heap(tx, self.ig.get_start())
127 |             await self.send_heap(self.tx[0], self.ig.get_heap())
128 |             await self.client.request('capture-done')
129 |         self.assert_sensor_equals('device-status', DeviceStatus.FAIL, {Sensor.Status.ERROR})
130 | 
131 |     async def test_missing_stop_item(self) -> None:
132 |         cbid = '1234567890'
133 |         await self.client.request('capture-init', cbid)
134 |         for tx in self.tx:
135 |             await self.send_heap(tx, self.ig.get_start())
136 |         await self.send_heap(self.tx[0], self.ig.get_heap())
137 |         for tx in self.tx[:-1]:
138 |             await self.send_heap(tx, self.ig.get_end())
139 |         await self.client.request('capture-done')
140 | 
141 |     async def test_double_init(self) -> None:
142 |         await self.client.request('capture-init', '1234567890')
143 |         with assert_raises_regex(FailReply, '(?i)already capturing'):
144 |             await self.client.request('capture-init', '9876543210')
145 | 
146 |     async def test_done_without_init(self) -> None:
147 |         with assert_raises_regex(FailReply, '(?i)not capturing'):
148 |             await self.client.request('capture-done')
149 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/test/test_flag_writer.py:
--------------------------------------------------------------------------------
  1 | """Tests for :mod:`katsdpdatawriter.flag_writer`."""
  2 | 
  3 | import tempfile
  4 | import shutil
  5 | from unittest import mock
  6 | from typing import Dict, Any
  7 | 
  8 | import numpy as np
  9 | from nose.tools import (assert_equal, assert_true,
 10 |                         assert_regex, assert_raises_regex, assert_logs)
 11 | 
 12 | import aiokatcp
 13 | from aiokatcp import Sensor
 14 | import spead2
 15 | import spead2.send.asyncio
 16 | import katdal.chunkstore
 17 | from katdal.chunkstore_npy import NpyFileChunkStore
 18 | 
 19 | from ..flag_writer import FlagWriterServer, Status
 20 | from ..spead_write import DeviceStatus, ChunkParams
 21 | from .test_writer import BaseTestWriterServer
 22 | 
 23 | 
 24 | class TestFlagWriterServer(BaseTestWriterServer):
 25 |     async def setup_server(self, **arg_overrides) -> FlagWriterServer:
 26 |         args = dict(
 27 |             host='127.0.0.1', port=0, loop=self.loop, endpoints=self.endpoints,
 28 |             flag_interface='lo', flags_ibv=False,
 29 |             chunk_store=self.chunk_store, chunk_params=self.chunk_params,
 30 |             telstate=self.telstate.root(),
 31 |             input_name='sdp_l1_flags', output_name='sdp_l1_flags', rename_src={},
 32 |             s3_endpoint_url=None, max_workers=4, buffer_dumps=2)
 33 |         args.update(arg_overrides)
 34 |         server = FlagWriterServer(**args)
 35 |         await server.start()
 36 |         self.addCleanup(server.stop)
 37 |         return server
 38 | 
 39 |     def setup_ig(self) -> spead2.send.ItemGroup:
 40 |         self.cbid = '1234567890'
 41 |         n_chans_per_substream = self.telstate['n_chans_per_substream']
 42 |         n_bls = self.telstate['n_bls']
 43 |         flags = np.random.randint(0, 256, (n_chans_per_substream, n_bls), np.uint8)
 44 | 
 45 |         ig = spead2.send.ItemGroup()
 46 |         # This is copied and adapted from katsdpcal
 47 |         ig.add_item(id=None, name='flags', description="Flags for visibilities",
 48 |                     shape=(self.telstate['n_chans_per_substream'], self.telstate['n_bls']),
 49 |                     dtype=None, format=[('u', 8)], value=flags)
 50 |         ig.add_item(id=None, name='timestamp', description="Seconds since sync time",
 51 |                     shape=(), dtype=None, format=[('f', 64)], value=100.0)
 52 |         ig.add_item(id=None, name='dump_index', description='Index in time',
 53 |                     shape=(), dtype=None, format=[('u', 64)], value=0)
 54 |         ig.add_item(id=0x4103, name='frequency',
 55 |                     description="Channel index of first channel in the heap",
 56 |                     shape=(), dtype=np.uint32, value=0)
 57 |         ig.add_item(id=None, name='capture_block_id', description='SDP capture block ID',
 58 |                     shape=(None,), dtype=None, format=[('c', 8)], value=self.cbid)
 59 |         return ig
 60 | 
 61 |     async def stop_server(self) -> None:
 62 |         for queue in self.inproc_queues.values():
 63 |             queue.stop()
 64 |         await self.server.stop()
 65 | 
 66 |     async def setUp(self) -> None:
 67 |         self.npy_path = tempfile.mkdtemp()
 68 |         self.addCleanup(shutil.rmtree, self.npy_path)
 69 |         self.chunk_store = NpyFileChunkStore(self.npy_path)
 70 |         self.telstate = self.setup_telstate('sdp_l1_flags')
 71 |         self.telstate['src_streams'] = ['sdp_l0']
 72 |         self.chunk_channels = 128
 73 |         self.chunk_params = ChunkParams(self.telstate['n_bls'] * self.chunk_channels,
 74 |                                         self.chunk_channels)
 75 |         self.setup_sleep()
 76 |         self.setup_spead()
 77 |         self.server = await self.setup_server()
 78 |         self.client = await self.setup_client(self.server)
 79 |         self.ig = self.setup_ig()
 80 | 
 81 |     def _check_chunk_info(self, output_name: str = 'sdp_l1_flags') -> Dict[str, Any]:
 82 |         n_chans = self.telstate['n_chans']
 83 |         n_bls = self.telstate['n_bls']
 84 |         capture_stream = '{}_{}'.format(self.cbid, output_name)
 85 | 
 86 |         view = self.telstate.root().view(capture_stream)
 87 |         chunk_info = view['chunk_info']
 88 |         n_chunks = n_chans // self.chunk_channels
 89 |         assert_equal(
 90 |             chunk_info,
 91 |             {
 92 |                 'flags': {
 93 |                     'prefix': capture_stream.replace('_', '-'),
 94 |                     'shape': (1, n_chans, n_bls),
 95 |                     'chunks': ((1,), (self.chunk_channels,) * n_chunks, (n_bls,)),
 96 |                     'dtype': np.dtype(np.uint8)
 97 |                 }
 98 |             })
 99 |         return chunk_info['flags']
100 | 
101 |     async def test_capture(self, output_name: str = 'sdp_l1_flags') -> None:
102 |         n_chans_per_substream = self.telstate['n_chans_per_substream']
103 |         self.assert_sensor_equals('status', Status.WAIT_DATA)
104 |         self.assert_sensor_equals('capture-block-state', '{}')
105 | 
106 |         await self.client.request('capture-init', self.cbid)
107 |         self.assert_sensor_equals('capture-block-state', '{"%s": "CAPTURING"}' % self.cbid)
108 | 
109 |         await self.send_heap(self.tx[0], self.ig.get_heap())
110 |         self.assert_sensor_equals('status', Status.CAPTURING)
111 | 
112 |         await self.client.request('capture-done', self.cbid)
113 |         self.assert_sensor_equals('status', Status.CAPTURING)  # Should still be capturing
114 |         self.assert_sensor_equals('capture-block-state', '{}')
115 |         await self.stop_server()
116 |         capture_stream = '{}_{}'.format(self.cbid, output_name)
117 |         prefix = capture_stream.replace('_', '-')
118 |         assert_true(self.chunk_store.is_complete(prefix))
119 | 
120 |         # Validate the data written
121 |         chunk_info = self._check_chunk_info(output_name)
122 |         data = self.chunk_store.get_dask_array(
123 |             self.chunk_store.join(chunk_info['prefix'], 'flags'),
124 |             chunk_info['chunks'], chunk_info['dtype']).compute()
125 |         n_chans_per_substream = self.telstate['n_chans_per_substream']
126 |         np.testing.assert_array_equal(self.ig['flags'].value[np.newaxis],
127 |                                       data[:, :n_chans_per_substream, :])
128 |         np.testing.assert_equal(0, data[:, n_chans_per_substream:, :])
129 | 
130 |     async def test_new_name(self) -> None:
131 |         # Replace client and server with different args
132 |         output_name = 'sdp_l1_flags_new'
133 |         rename_src = {'sdp_l0': 'sdp_l0_new'}
134 |         s3_endpoint_url = 'http://new.invalid/'
135 |         await self.server.stop()
136 |         self.server = await self.setup_server(output_name=output_name,
137 |                                               rename_src=rename_src,
138 |                                               s3_endpoint_url=s3_endpoint_url)
139 |         self.client = await self.setup_client(self.server)
140 |         await self.test_capture(output_name)
141 |         telstate_output = self.telstate.root().view(output_name)
142 |         assert_equal(telstate_output['inherit'], 'sdp_l1_flags')
143 |         assert_equal(telstate_output['s3_endpoint_url'], s3_endpoint_url)
144 |         assert_equal(telstate_output['src_streams'], ['sdp_l0_new'])
145 | 
146 |     async def test_failed_write(self) -> None:
147 |         with mock.patch.object(NpyFileChunkStore, 'put_chunk',
148 |                                side_effect=katdal.chunkstore.StoreUnavailable):
149 |             await self.client.request('capture-init', self.cbid)
150 |             await self.send_heap(self.tx[0], self.ig.get_heap())
151 |             await self.client.request('capture-done', self.cbid)
152 |         self._check_chunk_info()
153 |         self.assert_sensor_equals('device-status', DeviceStatus.FAIL, {Sensor.Status.ERROR})
154 | 
155 |     async def test_double_init(self) -> None:
156 |         await self.client.request('capture-init', self.cbid)
157 |         with assert_raises_regex(aiokatcp.FailReply, 'already active'):
158 |             await self.client.request('capture-init', self.cbid)
159 |         self.assert_sensor_equals('capture-block-state', '{"%s": "CAPTURING"}' % self.cbid)
160 | 
161 |     async def test_done_without_init(self) -> None:
162 |         with assert_raises_regex(aiokatcp.FailReply, 'unknown'):
163 |             await self.client.request('capture-done', self.cbid)
164 | 
165 |     async def test_no_data(self) -> None:
166 |         self.assert_sensor_equals('capture-block-state', '{}')
167 |         await self.client.request('capture-init', self.cbid)
168 |         self.assert_sensor_equals('capture-block-state', '{"%s": "CAPTURING"}' % self.cbid)
169 |         with assert_logs('katsdpdatawriter.flag_writer', 'WARNING'):
170 |             await self.client.request('capture-done', self.cbid)
171 |         self.assert_sensor_equals('capture-block-state', '{}')
172 | 
173 |     async def test_data_after_done(self) -> None:
174 |         await self.client.request('capture-init', self.cbid)
175 |         await self.client.request('capture-done', self.cbid)
176 |         with assert_logs('katsdpdatawriter.flag_writer', 'WARNING') as cm:
177 |             await self.send_heap(self.tx[0], self.ig.get_heap())
178 |         assert_regex(cm.output[0], 'outside of init/done')
179 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/dashboard.py:
--------------------------------------------------------------------------------
  1 | """Bokeh dashboard showing real-time metrics"""
  2 | 
  3 | from datetime import datetime, timedelta
  4 | import logging
  5 | import functools
  6 | from collections import deque
  7 | from weakref import WeakSet
  8 | import argparse
  9 | from typing import Mapping, MutableSet, List, Callable, Iterable   # noqa: F401
 10 | 
 11 | import numpy as np
 12 | 
 13 | from aiokatcp import Sensor, Reading
 14 | 
 15 | from bokeh.document import Document
 16 | from bokeh.application.handlers.handler import Handler
 17 | from bokeh.models import ColumnDataSource, DataRange1d
 18 | from bokeh.layouts import gridplot
 19 | from bokeh.plotting import figure
 20 | from bokeh.palettes import Category10
 21 | from bokeh.server.server import Server
 22 | from bokeh.application.application import Application
 23 | 
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | PALETTE = Category10[10]
 27 | 
 28 | 
 29 | def _convert_timestamp(posix_timestamp: float) -> datetime:
 30 |     return datetime.utcfromtimestamp(posix_timestamp)
 31 | 
 32 | 
 33 | class Watcher:
 34 |     """Observe and collect data for a single sensor
 35 | 
 36 |     Refer to :class:`Dashboard` for the meaning of `window` and `rollover`.
 37 |     """
 38 |     def __init__(self, dashboard: 'Dashboard', sensor: Sensor,
 39 |                  window: float, rollover: int) -> None:
 40 |         self.dashboard = dashboard
 41 |         self.sensor = sensor
 42 |         self.window = window
 43 |         self.rollover = rollover
 44 |         # TODO: use typing.Deque in type hint after migration to Python 3.6
 45 |         self._readings = deque()        # type: deque
 46 |         self.sensor.attach(self._update)
 47 |         self._update(self.sensor, self.sensor.reading)
 48 | 
 49 |     def close(self) -> None:
 50 |         self.sensor.detach(self._update)
 51 | 
 52 |     def _update(self, sensor: Sensor, reading: Reading[float]) -> None:
 53 |         self._readings.append(reading)
 54 |         if (self._readings[-1].timestamp - self._readings[0].timestamp > self.window
 55 |                 or len(self._readings) > self.rollover):
 56 |             self._readings.popleft()
 57 | 
 58 | 
 59 | class LineWatcher(Watcher):
 60 |     """Watcher for drawing line graphs"""
 61 |     def make_data_source(self) -> ColumnDataSource:
 62 |         data = {
 63 |             'time': [_convert_timestamp(reading.timestamp) for reading in self._readings],
 64 |             'value': [reading.value for reading in self._readings]
 65 |         }
 66 |         return ColumnDataSource(data, name='data_source ' + self.sensor.name)
 67 | 
 68 |     def _update(self, sensor: Sensor, reading: Reading[float]) -> None:
 69 |         super()._update(sensor, reading)
 70 |         update = {
 71 |             'time': [_convert_timestamp(reading.timestamp)],
 72 |             'value': [reading.value]
 73 |         }
 74 |         name = 'data_source ' + sensor.name
 75 | 
 76 |         def doc_update(doc):
 77 |             data_source = doc.get_model_by_name(name)
 78 |             data_source.stream(update, rollover=len(self._readings))
 79 | 
 80 |         self.dashboard.update_documents(doc_update)
 81 | 
 82 | 
 83 | class HistogramWatcher(Watcher):
 84 |     def make_data_source(self) -> ColumnDataSource:
 85 |         return ColumnDataSource(self._data, name='data_source ' + self.sensor.name)
 86 | 
 87 |     def _update(self, sensor: Sensor, reading: Reading[float]) -> None:
 88 |         super()._update(sensor, reading)
 89 |         values = [reading.value for reading in self._readings]
 90 |         # Based on https://bokeh.pydata.org/en/latest/docs/gallery/histogram.html
 91 |         hist, edges = np.histogram(values, bins='auto')
 92 |         self._data = {
 93 |             'top': hist,
 94 |             'bottom': [0] * len(hist),
 95 |             'left': edges[:-1],
 96 |             'right': edges[1:]
 97 |         }
 98 |         name = 'data_source ' + sensor.name
 99 | 
100 |         def doc_update(doc):
101 |             data_source = doc.get_model_by_name(name)
102 |             data_source.data = self._data
103 | 
104 |         self.dashboard.update_documents(doc_update)
105 | 
106 | 
107 | class Dashboard(Handler):
108 |     """Bokeh dashboard showing sensor values.
109 | 
110 |     Sensor values are recorded and displayed through graphs. To keep the
111 |     graph size down (more to avoid overloading the browser/network than for
112 |     memory constraints), old values are discarded once either they are
113 |     older than `window` or there are more than `rollover` samples.
114 | 
115 |     Parameters
116 |     ----------
117 |     line_sensors
118 |         Sensors to display as line graphs. Each element is a list of sensors
119 |         to plot on a single graph.
120 |     histogram_sensors
121 |         Sensors to display as histograms. Each sensor update contributes one
122 |         entry on the histogram.
123 |     window
124 |         Maximum length of time (in seconds) to keep samples.
125 |     rollover
126 |         Maximum number of samples to keep (per sensor).
127 |     """
128 |     def __init__(self,
129 |                  line_sensors: Iterable[Iterable[Sensor]],
130 |                  histogram_sensors: Iterable[Sensor],
131 |                  window: float = 1200.0, rollover: int = 10000) -> None:
132 |         super().__init__()
133 |         self._line_watchers = []           # type: List[List[LineWatcher]]
134 |         self._histogram_watchers = []      # type: List[HistogramWatcher]
135 |         self._docs = WeakSet()             # type: MutableSet[Document]
136 |         for sensors in line_sensors:
137 |             watchers = [LineWatcher(self, sensor, window, rollover) for sensor in sensors]
138 |             self._line_watchers.append(watchers)
139 |         for sensor in histogram_sensors:
140 |             watcher = HistogramWatcher(self, sensor, window, rollover)
141 |             self._histogram_watchers.append(watcher)
142 | 
143 |     def modify_document(self, doc: Document) -> None:
144 |         plots = []
145 |         line_renderers = []         # type: List
146 |         for watchers in self._line_watchers:
147 |             plot = figure(plot_width=350, plot_height=350,
148 |                           x_axis_label='time', x_axis_type='datetime', y_axis_label='value')
149 |             for i, line_watcher in enumerate(watchers):
150 |                 data_source = line_watcher.make_data_source()
151 |                 plot.step('time', 'value', source=data_source, mode='after',
152 |                           legend=line_watcher.sensor.name,
153 |                           color=PALETTE[i])
154 |             plot.legend.location = 'top_left'
155 |             plots.append(plot)
156 |             line_renderers.extend(plot.x_range.renderers)
157 |         # Create a single data range so that all line plots show the same time window
158 |         data_range = DataRange1d()
159 |         data_range.renderers = line_renderers
160 |         data_range.follow = 'end'
161 |         data_range.default_span = timedelta(seconds=1)
162 |         data_range.follow_interval = timedelta(seconds=120)
163 |         for plot in plots:
164 |             plot.x_range = data_range
165 | 
166 |         for histogram_watcher in self._histogram_watchers:
167 |             plot = figure(plot_width=350, plot_height=350,
168 |                           x_axis_label=histogram_watcher.sensor.name,
169 |                           y_axis_label='frequency')
170 |             data_source = histogram_watcher.make_data_source()
171 |             plot.quad(top='top', bottom='bottom', left='left', right='right',
172 |                       source=data_source)
173 |             plots.append(plot)
174 | 
175 |         doc.add_root(gridplot(plots, ncols=3))
176 |         logger.debug('Created document with %d plots', len(plots))
177 |         self._docs.add(doc)
178 | 
179 |     def on_server_unloaded(self, server_context) -> None:
180 |         for watchers in self._line_watchers:
181 |             for line_watcher in watchers:
182 |                 line_watcher.close()
183 |         for histogram_watcher in self._histogram_watchers:
184 |             histogram_watcher.close()
185 |         self._line_watchers.clear()
186 |         self._histogram_watchers.clear()
187 | 
188 |     def update_documents(self, callback: Callable[[Document], None]) -> None:
189 |         for doc in self._docs:
190 |             doc.add_next_tick_callback(functools.partial(callback, doc))
191 | 
192 | 
193 | def make_dashboard(sensors: Mapping[str, Sensor]) -> Dashboard:
194 |     """Build a dashboard using a standard set of sensors"""
195 |     line_sensors = [
196 |         [sensors['active-chunks']],
197 |         [sensors['queued-bytes']],
198 |         [sensors['output-seconds-total']],
199 |         [sensors['output-chunks-total']],
200 |         [sensors['input-bytes-total'], sensors['output-bytes-total']],
201 |         [sensors['input-heaps-total']],
202 |         [sensors['input-incomplete-heaps-total'], sensors['input-missing-heaps-total']]
203 |     ]
204 |     histogram_sensors = [sensors['output-seconds']]
205 |     return Dashboard(line_sensors, histogram_sensors)
206 | 
207 | 
208 | def start_dashboard(dashboard: Dashboard, args: argparse.Namespace) -> None:
209 |     app = Application()
210 |     app.add(dashboard)
211 |     if args.dashboard_allow_websocket_origin:
212 |         allow_websocket_origin = args.dashboard_allow_websocket_origin
213 |     else:
214 |         allow_websocket_origin = [
215 |             'localhost:{}'.format(args.dashboard_port),
216 |             '{}:{}'.format(args.external_hostname, args.dashboard_port)
217 |         ]
218 |     server = Server(app, port=args.dashboard_port,
219 |                     allow_websocket_origin=allow_websocket_origin)
220 |     server.start()
221 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/test/test_rechunk.py:
--------------------------------------------------------------------------------
  1 | from unittest import mock
  2 | from typing import List, Tuple   # noqa: F401
  3 | 
  4 | import numpy as np
  5 | from nose.tools import assert_equal, assert_raises
  6 | import asynctest
  7 | 
  8 | from .. import rechunk
  9 | from ..rechunk import Chunks, Offset   # noqa: F401
 10 | 
 11 | 
 12 | def test_offset_to_size_1d() -> None:
 13 |     out = rechunk._offset_to_size_1d((1, 5, 7, 4, 2))
 14 |     assert_equal(out, {0: 1, 1: 5, 6: 7, 13: 4, 17: 2})
 15 | 
 16 | 
 17 | def test_split_chunks_1d() -> None:
 18 |     out = rechunk._split_chunks_1d((4, 6, 2), (1, 3, 2, 2, 2, 2))
 19 |     assert_equal(
 20 |         out,
 21 |         {
 22 |             0: (slice(0, 1), slice(1, 4)),
 23 |             4: (slice(0, 2), slice(2, 4), slice(4, 6)),
 24 |             10: (slice(0, 2),)
 25 |         })
 26 | 
 27 | 
 28 | def test_split_chunks_1d_out_chunks_too_short() -> None:
 29 |     with assert_raises(ValueError):
 30 |         rechunk._split_chunks_1d((4, 6, 2), (1, 3, 2, 2, 2, 1))
 31 | 
 32 | 
 33 | def test_split_chunks_1d_out_chunks_too_long() -> None:
 34 |     with assert_raises(ValueError):
 35 |         rechunk._split_chunks_1d((4, 6, 2), (1, 3, 2, 2, 2, 2, 4))
 36 | 
 37 | 
 38 | def test_split_chunks_1d_misaligned() -> None:
 39 |     with assert_raises(ValueError):
 40 |         # out_chunks not aligned
 41 |         rechunk._split_chunks_1d((4, 6, 2), (1, 4, 1, 2, 2, 2))
 42 | 
 43 | 
 44 | class MockRechunker(rechunk.Rechunker):
 45 |     def __init__(self, *args, **kwargs) -> None:
 46 |         super().__init__(*args, **kwargs)
 47 |         self.calls = []   # type: List[Tuple[Offset, np.ndarray, np.ndarray]]
 48 | 
 49 |     async def output(self, offset: Tuple[int, ...], value: np.ndarray, present: np.ndarray) -> None:
 50 |         self.calls.append((offset, value.copy(), present.copy()))
 51 | 
 52 | 
 53 | class _BaseTestRechunker(asynctest.TestCase):
 54 |     def setup_data(self, in_chunks: Chunks, out_chunks: Chunks) -> None:
 55 |         self.r = MockRechunker('flags', in_chunks, out_chunks, 253, np.uint8)
 56 |         self.data = np.arange(64).reshape(4, 8, 2).astype(np.uint8)
 57 |         self.expected = np.full_like(self.data, 253, np.uint8)
 58 |         self.present = np.zeros_like(self.data, np.bool_)
 59 | 
 60 |     async def send_chunk(self, offset: Tuple[int, ...]) -> None:
 61 |         idx = tuple(slice(ofs, ofs + size) for ofs, size in zip(offset, (1, 4, 2)))
 62 |         value = self.data[idx]
 63 |         await self.r.add(offset, value)
 64 |         self.expected[idx] = self.data[idx]
 65 |         self.present[idx] = True
 66 | 
 67 |     def check_values(self) -> None:
 68 |         # Checks that the calls contain the expected values given the
 69 |         # data send. Does NOT check that the offsets and sizes correspond
 70 |         # correctly to chunks.
 71 |         for call in self.r.calls:
 72 |             idx = tuple(slice(ofs, ofs + size) for ofs, size in zip(call[0], call[1].shape))
 73 |             expected = self.expected[idx]
 74 |             np.testing.assert_array_equal(expected, call[1])
 75 |             present = self.present[idx]
 76 |             present = present[(slice(None),) + (0,) * (present.ndim - 1)]
 77 |             np.testing.assert_array_equal(present, call[2])
 78 | 
 79 |     async def test_add_bad_offset(self) -> None:
 80 |         with assert_raises(KeyError):
 81 |             await self.r.add((0, 2, 0), np.zeros((1, 2, 2), np.uint8))
 82 |         with assert_raises(ValueError):
 83 |             await self.r.add((0, 0), np.zeros((1, 2, 2), np.uint8))
 84 | 
 85 |     async def test_add_bad_shape(self) -> None:
 86 |         with assert_raises(ValueError):
 87 |             await self.r.add((0, 0, 0), np.zeros((1, 2, 2), np.uint8))
 88 |         with assert_raises(ValueError):
 89 |             await self.r.add((0, 0, 0), np.zeros((2, 4, 2), np.uint8))
 90 | 
 91 | 
 92 | class TestRechunker(_BaseTestRechunker):
 93 |     def setUp(self) -> None:
 94 |         self.setup_data(((1,), (4, 4), (2,)), ((2,), (2, 2, 4), (2,)))
 95 | 
 96 |     async def test_end_partial(self, reorder: bool = False) -> None:
 97 |         if reorder:
 98 |             for i in range(3):
 99 |                 await self.send_chunk((i, 0, 0))
100 |             for i in range(3):
101 |                 await self.send_chunk((i, 4, 0))
102 |         else:
103 |             for i in range(3):
104 |                 await self.send_chunk((i, 0, 0))
105 |                 await self.send_chunk((i, 4, 0))
106 |         await self.r.close()
107 |         offsets = [call[0] for call in self.r.calls]
108 |         shapes = [call[1].shape for call in self.r.calls]
109 |         assert_equal(
110 |             offsets,
111 |             [(0, 0, 0), (0, 2, 0), (0, 4, 0),
112 |              (2, 0, 0), (2, 2, 0), (2, 4, 0)])
113 |         assert_equal(
114 |             shapes,
115 |             [(2, 2, 2), (2, 2, 2), (2, 4, 2),
116 |              (1, 2, 2), (1, 2, 2), (1, 4, 2)])
117 |         self.check_values()
118 |         assert_equal(
119 |             self.r.get_chunk_info('flags'),
120 |             {
121 |                 'prefix': 'flags',
122 |                 'chunks': ((2, 1), (2, 2, 4), (2,)),
123 |                 'shape': (3, 8, 2),
124 |                 'dtype': '|u1'
125 |             })
126 | 
127 |     async def test_end_full(self) -> None:
128 |         for i in range(4):
129 |             await self.send_chunk((i, 0, 0))
130 |             await self.send_chunk((i, 4, 0))
131 |         await self.r.close()
132 |         offsets = [call[0] for call in self.r.calls]
133 |         shapes = [call[1].shape for call in self.r.calls]
134 |         assert_equal(
135 |             offsets,
136 |             [(0, 0, 0), (0, 2, 0), (0, 4, 0),
137 |              (2, 0, 0), (2, 2, 0), (2, 4, 0)])
138 |         assert_equal(
139 |             shapes,
140 |             [(2, 2, 2), (2, 2, 2), (2, 4, 2),
141 |              (2, 2, 2), (2, 2, 2), (2, 4, 2)])
142 |         self.check_values()
143 |         assert_equal(
144 |             self.r.get_chunk_info('flags'),
145 |             {
146 |                 'prefix': 'flags',
147 |                 'chunks': ((2, 2), (2, 2, 4), (2,)),
148 |                 'shape': (4, 8, 2),
149 |                 'dtype': '|u1'
150 |             })
151 | 
152 |     async def test_reorder(self) -> None:
153 |         await self.test_end_partial(reorder=True)
154 | 
155 |     async def test_out_of_order(self) -> None:
156 |         with mock.patch.object(self.r, 'out_of_order'):
157 |             await self.send_chunk((2, 0, 0))
158 |             await self.send_chunk((0, 0, 0))
159 |             self.r.out_of_order.assert_called_with(0, 2)   # type: ignore
160 | 
161 |     async def test_missing(self) -> None:
162 |         await self.send_chunk((1, 0, 0))
163 |         await self.send_chunk((2, 4, 0))
164 |         await self.r.close()
165 | 
166 |         offsets = [call[0] for call in self.r.calls]
167 |         shapes = [call[1].shape for call in self.r.calls]
168 |         assert_equal(
169 |             offsets,
170 |             [(0, 0, 0), (0, 2, 0), (2, 4, 0)])
171 |         assert_equal(
172 |             shapes,
173 |             [(2, 2, 2), (2, 2, 2), (1, 4, 2)])
174 |         self.check_values()
175 |         assert_equal(
176 |             self.r.get_chunk_info('flags'),
177 |             {
178 |                 'prefix': 'flags',
179 |                 'chunks': ((2, 1), (2, 2, 4), (2,)),
180 |                 'shape': (3, 8, 2),
181 |                 'dtype': '|u1'
182 |             })
183 | 
184 |     def test_bad_in_chunks(self) -> None:
185 |         with assert_raises(ValueError):
186 |             # in_chunks does not start with (1,)
187 |             MockRechunker('foo', ((2,), (4, 4)), ((2,), (4, 4)), 253, np.uint8)
188 |         with assert_raises(ValueError):
189 |             # zero-sized chunks
190 |             MockRechunker('foo', ((1,), (4, 4, 0)), ((2,), (4, 4)), 253, np.uint8)
191 | 
192 |     def test_bad_out_chunks(self) -> None:
193 |         with assert_raises(ValueError):
194 |             # does not start with singleton
195 |             MockRechunker('foo', ((1,), (4, 4)), ((2, 2), (4, 4)), 253, np.uint8)
196 | 
197 |     def test_mismatched_chunks(self) -> None:
198 |         with assert_raises(ValueError):
199 |             # Dimensions don't match
200 |             MockRechunker('foo', ((1,), (4, 4)), ((2,), (4, 4), (2,)), 253, np.uint8)
201 |         with assert_raises(ValueError):
202 |             # Lengths don't match
203 |             MockRechunker('foo', ((1,), (4, 4)), ((2,), (4, 4, 1)), 253, np.uint8)
204 |         with assert_raises(ValueError):
205 |             # Chunks don't align
206 |             MockRechunker('foo', ((1,), (4, 4)), ((2,), (3, 5)), 253, np.uint8)
207 | 
208 | 
209 | class TestRechunkerNoAccum(_BaseTestRechunker):
210 |     def setUp(self) -> None:
211 |         self.setup_data(((1,), (4, 4), (2,)), ((1,), (2, 2, 4), (2,)))
212 | 
213 |     async def test(self) -> None:
214 |         for i in range(2):
215 |             await self.send_chunk((i, 0, 0))
216 |             await self.send_chunk((i, 4, 0))
217 |         await self.r.close()
218 |         offsets = [call[0] for call in self.r.calls]
219 |         shapes = [call[1].shape for call in self.r.calls]
220 |         assert_equal(
221 |             offsets,
222 |             [(0, 0, 0), (0, 2, 0), (0, 4, 0),
223 |              (1, 0, 0), (1, 2, 0), (1, 4, 0)])
224 |         assert_equal(
225 |             shapes,
226 |             [(1, 2, 2), (1, 2, 2), (1, 4, 2),
227 |              (1, 2, 2), (1, 2, 2), (1, 4, 2)])
228 |         self.check_values()
229 |         assert_equal(
230 |             self.r.get_chunk_info('flags'),
231 |             {
232 |                 'prefix': 'flags',
233 |                 'chunks': ((1, 1), (2, 2, 4), (2,)),
234 |                 'shape': (2, 8, 2),
235 |                 'dtype': '|u1'
236 |             })
237 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/test/test_spead_write.py:
--------------------------------------------------------------------------------
  1 | from unittest import mock
  2 | from concurrent.futures import ThreadPoolExecutor
  3 | 
  4 | import numpy as np
  5 | from nose.tools import assert_equal, assert_count_equal, assert_is_instance, assert_raises
  6 | import asynctest
  7 | from aiokatcp import SensorSet
  8 | from katdal.chunkstore import ChunkStore
  9 | import katsdpservices
 10 | 
 11 | from ..spead_write import (Array, RechunkerGroup, io_sensors,
 12 |                            add_common_args, chunk_store_from_args)
 13 | from ..rechunk import Offset
 14 | from ..queue_space import QueueSpace
 15 | 
 16 | 
 17 | class TestArray:
 18 |     def setup(self) -> None:
 19 |         self.array = Array(
 20 |             'foo',
 21 |             in_chunks=((1,), (4, 4, 4), (2, 2)),
 22 |             out_chunks=((2,), (2, 2, 4, 4), (2, 2)),
 23 |             fill_value=253,
 24 |             dtype=np.float32)
 25 | 
 26 |     def test_dtype(self) -> None:
 27 |         # Check that the converter converted a dtype-like to a real dtype
 28 |         assert_equal(self.array.dtype, np.dtype(np.float32))
 29 |         assert_is_instance(self.array.dtype, np.dtype)
 30 | 
 31 |     def test_substreams(self) -> None:
 32 |         assert_equal(self.array.substreams, 6)
 33 | 
 34 |     def test_shape(self) -> None:
 35 |         assert_equal(self.array.shape, (1, 12, 4))
 36 | 
 37 |     def test_nbytes(self) -> None:
 38 |         assert_equal(self.array.nbytes, 192)
 39 | 
 40 | 
 41 | def _join(*args: str) -> str:
 42 |     return '/'.join(args)
 43 | 
 44 | 
 45 | class TestRechunkerGroup(asynctest.TestCase):
 46 |     def setUp(self) -> None:
 47 |         self.chunk_store = mock.create_autospec(spec=ChunkStore, spec_set=True, instance=True)
 48 |         self.chunk_store.join = _join
 49 | 
 50 |         self.sensors = SensorSet()
 51 |         for sensor in io_sensors():
 52 |             self.sensors.add(sensor)
 53 | 
 54 |         self.arrays = [
 55 |             Array('weights',
 56 |                   ((1,), (4, 4), (2,)),
 57 |                   ((1,), (2, 2, 2, 2), (2,)),
 58 |                   0, np.uint8),
 59 |             Array('weights_channel',
 60 |                   ((1,), (4, 4)),
 61 |                   ((2,), (2, 2, 2, 2)),
 62 |                   0, np.float32)
 63 |         ]
 64 | 
 65 |         self.weights = np.arange(32).reshape(2, 8, 2).astype(np.uint8)
 66 |         self.weights_channel = np.arange(16).reshape(2, 8).astype(np.float32)
 67 | 
 68 |         self.executor = ThreadPoolExecutor(4)
 69 |         self.executor_queue_space = QueueSpace(5 * sum(array.nbytes for array in self.arrays))
 70 |         self.r = RechunkerGroup(self.executor, self.executor_queue_space,
 71 |                                 self.chunk_store, self.sensors, 'prefix', self.arrays)
 72 | 
 73 |     def tearDown(self):
 74 |         self.executor.shutdown(wait=True)
 75 | 
 76 |     async def add_chunks(self, offset: Offset) -> None:
 77 |         slices = np.s_[offset[0]:offset[0]+1, offset[1]:offset[1]+4, :]
 78 |         weights = self.weights[slices]
 79 |         weights_channel = self.weights_channel[slices[:2]]
 80 |         await self.r.add(offset, [weights, weights_channel])
 81 | 
 82 |     async def test(self) -> None:
 83 |         for i in range(0, 8, 4):
 84 |             for j in range(2):
 85 |                 await self.add_chunks((j, i))
 86 |         chunk_info = await self.r.get_chunk_info()
 87 | 
 88 |         expected_calls = []
 89 |         for i in range(0, 8, 4):
 90 |             for j in range(2):
 91 |                 for k in range(i, i + 4, 2):
 92 |                     expected_calls.append(mock.call(
 93 |                         'prefix/weights', np.s_[j:j+1, k:k+2, 0:2], mock.ANY))
 94 |         for i in range(0, 8, 2):
 95 |             expected_calls.append(mock.call(
 96 |                 'prefix/weights_channel', np.s_[0:2, i:i+2], mock.ANY))
 97 |         assert_count_equal(expected_calls, self.chunk_store.put_chunk.mock_calls)
 98 |         # Check the array values. assert_count_equal doesn't work well for this
 99 |         # because of how equality operators are implemented in numpy.
100 |         for call in self.chunk_store.put_chunk.mock_calls:
101 |             name, slices, value = call[1]
102 |             if name == 'prefix/weights':
103 |                 np.testing.assert_array_equal(self.weights[slices], value)
104 |             else:
105 |                 np.testing.assert_array_equal(self.weights_channel[slices], value)
106 | 
107 |         assert_equal(
108 |             chunk_info,
109 |             {
110 |                 'weights': {
111 |                     'prefix': 'prefix',
112 |                     'chunks': ((1, 1), (2, 2, 2, 2), (2,)),
113 |                     'shape': (2, 8, 2),
114 |                     'dtype': '|u1'
115 |                 },
116 |                 'weights_channel': {
117 |                     'prefix': 'prefix',
118 |                     'chunks': ((2,), (2, 2, 2, 2)),
119 |                     'shape': (2, 8),
120 |                     'dtype': np.dtype(np.float32).str
121 |                 }
122 |             })
123 | 
124 |     async def test_accounting(self) -> None:
125 |         qs = self.executor_queue_space
126 |         qsize = qs.value
127 |         sensor = self.sensors['queued-bytes']
128 |         await self.add_chunks((0, 0))
129 |         assert_equal(sensor.value, 24)      # 8x uint8 + 4x float32
130 |         assert_equal(qsize - qs.value, 8)   # 8x uint8 (the 4x float32 is still accumulating)
131 |         await self.add_chunks((1, 0))
132 |         await self.add_chunks((0, 4))
133 |         await self.r.get_chunk_info()
134 |         # Everything should have been written
135 |         assert_equal(sensor.value, 0)
136 |         assert_equal(qsize - qs.value, 0)
137 | 
138 | 
139 | # SpeadWriter gets exercised via its derived classes
140 | 
141 | 
142 | class BadArguments(Exception):
143 |     """Exception used in mock when replacing ArgumentParser.Error"""
144 | 
145 | 
146 | @mock.patch.object(katsdpservices.ArgumentParser, 'error', side_effect=BadArguments)
147 | class TestChunkStoreFromArgs:
148 |     """Test both :meth:`.add_common_args` and :meth:`.chunk_store_from_args`"""
149 |     def setup(self) -> None:
150 |         self.parser = katsdpservices.ArgumentParser()
151 |         add_common_args(self.parser)
152 | 
153 |     def test_missing_args(self, error):
154 |         with assert_raises(BadArguments):
155 |             chunk_store_from_args(self.parser, self.parser.parse_args([]))
156 |         error.assert_called_with('--s3-endpoint-url is required if --npy-path is not given')
157 |         with assert_raises(BadArguments):
158 |             chunk_store_from_args(self.parser, self.parser.parse_args(
159 |                 ['--s3-endpoint-url', 'http://invalid/', '--s3-access-key', 'ACCESS']))
160 |         error.assert_called_with('--s3-secret-key is required if --npy-path is not given')
161 | 
162 |     def test_missing_path(self, error):
163 |         with assert_raises(BadArguments):
164 |             chunk_store_from_args(self.parser, self.parser.parse_args(
165 |                 ['--npy-path=/doesnotexist']))
166 |         error.assert_called_with('Specified --npy-path (/doesnotexist) does not exist.')
167 | 
168 |     def test_npy_and_s3_write(self, error):
169 |         with assert_raises(BadArguments):
170 |             chunk_store_from_args(self.parser, self.parser.parse_args(
171 |                 ['--npy-path=/', '--s3-write-url=https://s3.invalid/']))
172 |         error.assert_called_with('--s3-write-url and --npy-path cannot be used together')
173 | 
174 |     def test_npy(self, error):
175 |         with mock.patch('katdal.chunkstore_npy.NpyFileChunkStore') as m:
176 |             chunk_store_from_args(self.parser, self.parser.parse_args(
177 |                 ['--npy-path=/']))
178 |         m.assert_called_with('/', direct_write=False)
179 | 
180 |     def test_npy_direct_write(self, error):
181 |         with mock.patch('katdal.chunkstore_npy.NpyFileChunkStore') as m:
182 |             chunk_store_from_args(self.parser, self.parser.parse_args(
183 |                 ['--npy-path=/', '--direct-write']))
184 |         m.assert_called_with('/', direct_write=True)
185 | 
186 |     def test_s3(self, error):
187 |         with mock.patch('katdal.chunkstore_s3.S3ChunkStore') as m:
188 |             chunk_store_from_args(self.parser, self.parser.parse_args(
189 |                 ['--s3-endpoint-url=https://s3.invalid',
190 |                  '--s3-secret-key=S3CR3T', '--s3-access-key', 'ACCESS']))
191 |         m.assert_called_with('https://s3.invalid', credentials=('ACCESS', 'S3CR3T'), expiry_days=0)
192 | 
193 |     def test_s3_expire(self, error):
194 |         with mock.patch('katdal.chunkstore_s3.S3ChunkStore') as m:
195 |             chunk_store_from_args(self.parser, self.parser.parse_args(
196 |                 ['--s3-endpoint-url=https://s3.invalid',
197 |                  '--s3-secret-key=S3CR3T', '--s3-access-key', 'ACCESS',
198 |                  '--s3-expiry-days=7']))
199 |         m.assert_called_with('https://s3.invalid', credentials=('ACCESS', 'S3CR3T'), expiry_days=7)
200 | 
201 |     def test_s3_write_url(self, error):
202 |         with mock.patch('katdal.chunkstore_s3.S3ChunkStore') as m:
203 |             chunk_store_from_args(self.parser, self.parser.parse_args(
204 |                 ['--s3-endpoint-url=https://s3.invalid',
205 |                  '--s3-write-url=https://s3.write.invalid',
206 |                  '--s3-secret-key=S3CR3T', '--s3-access-key', 'ACCESS']))
207 |         m.assert_called_with('https://s3.write.invalid', credentials=('ACCESS', 'S3CR3T'),
208 |                              expiry_days=0)
209 | 
210 |     def test_rename_src(self, error):
211 |         args = self.parser.parse_args([
212 |             '--rename-src=foo:bar', '--rename-src', 'x:y',
213 |             '--new-name', 'xyz'])
214 |         assert_equal(args.rename_src, {'foo': 'bar', 'x': 'y'})
215 | 
216 |     def test_rename_src_bad_colons(self, error):
217 |         with assert_raises(BadArguments):
218 |             self.parser.parse_args(['--rename-src=foo:bar:baz', '--new-name', 'xyz'])
219 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/flag_writer.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import enum
  3 | import json
  4 | import asyncio
  5 | from concurrent.futures import ThreadPoolExecutor
  6 | from typing import Any, Dict, List, Mapping, Optional
  7 | 
  8 | import numpy as np
  9 | import spead2
 10 | import spead2.recv.asyncio
 11 | import katsdpservices
 12 | import katdal
 13 | import katdal.chunkstore
 14 | from katdal.visdatav4 import FLAG_NAMES
 15 | from aiokatcp import DeviceServer, Sensor, SensorSet, FailReply
 16 | import katsdptelstate
 17 | from katsdptelstate.endpoint import Endpoint
 18 | 
 19 | import katsdpdatawriter
 20 | from . import spead_write
 21 | from .spead_write import RechunkerGroup
 22 | from .queue_space import QueueSpace
 23 | 
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | class Status(enum.Enum):
 29 |     """Status of the whole process"""
 30 |     WAIT_DATA = 1
 31 |     CAPTURING = 2
 32 |     FINISHED = 3
 33 | 
 34 | 
 35 | class State(enum.Enum):
 36 |     """State of a single capture block"""
 37 |     CAPTURING = 1         # capture-init has been called, but not capture-done
 38 |     COMPLETE = 2          # capture-done has been called
 39 | 
 40 | 
 41 | class EnumEncoder(json.JSONEncoder):
 42 |     """JSON encoder that stringifies enums"""
 43 |     def default(self, obj: Any) -> Any:
 44 |         if isinstance(obj, enum.Enum):
 45 |             return obj.name
 46 |         return json.JSONEncoder.default(self, obj)
 47 | 
 48 | 
 49 | class FlagWriter(spead_write.SpeadWriter):
 50 |     """Glue between :class:`~.SpeadWriter` and :class:`FlagWriterServer`."""
 51 |     def __init__(self, sensors: SensorSet, rx: spead2.recv.asyncio.Stream,
 52 |                  server: 'FlagWriterServer') -> None:
 53 |         super().__init__(sensors, rx)
 54 |         self._server = server
 55 | 
 56 |     def first_heap(self) -> None:
 57 |         logger.info("First flag heap received...")
 58 |         self.sensors['status'].value = Status.CAPTURING
 59 | 
 60 |     def rechunker_group(self, updated: Dict[str, spead2.Item]) -> Optional[RechunkerGroup]:
 61 |         cbid = updated['capture_block_id'].value
 62 |         return self._server.rechunker_group(cbid)
 63 | 
 64 | 
 65 | class FlagWriterServer(DeviceServer):
 66 |     """Top-level device server for flag writer service"""
 67 | 
 68 |     VERSION = "sdp-flag-writer-0.2"
 69 |     BUILD_STATE = "katsdpdatawriter-" + katsdpdatawriter.__version__
 70 | 
 71 |     def __init__(self, host: str, port: int, loop: asyncio.AbstractEventLoop,
 72 |                  endpoints: List[Endpoint], flag_interface: Optional[str], flags_ibv: bool,
 73 |                  chunk_store: katdal.chunkstore.ChunkStore,
 74 |                  chunk_params: spead_write.ChunkParams,
 75 |                  telstate: katsdptelstate.TelescopeState,
 76 |                  input_name: str, output_name: str, rename_src: Mapping[str, str],
 77 |                  s3_endpoint_url: Optional[str],
 78 |                  max_workers: int, buffer_dumps: int) -> None:
 79 |         super().__init__(host, port, loop=loop)
 80 | 
 81 |         self._chunk_store = chunk_store
 82 |         self._telstate = telstate
 83 |         # track the status of each capture block we have seen to date
 84 |         self._capture_block_state = {}   # type: Dict[str, State]
 85 |         self._input_name = input_name
 86 |         self._output_name = output_name
 87 |         # rechunker group for each CBID
 88 |         self._flag_streams = {}          # type: Dict[str, RechunkerGroup]
 89 |         self._executor = ThreadPoolExecutor(max_workers=max_workers)
 90 | 
 91 |         self.sensors.add(Sensor(
 92 |             Status, "status", "The current status of the flag writer process."))
 93 |         self.sensors.add(Sensor(
 94 |             str, "capture-block-state",
 95 |             "JSON dict with the state of each capture block seen in this session.",
 96 |             default='{}', initial_status=Sensor.Status.NOMINAL))
 97 |         for sensor in spead_write.io_sensors():
 98 |             self.sensors.add(sensor)
 99 |         self.sensors.add(spead_write.device_status_sensor())
100 | 
101 |         telstate_input = telstate.view(input_name)
102 |         in_chunks = spead_write.chunks_from_telstate(telstate_input)
103 |         DATA_LOST = 1 << FLAG_NAMES.index('data_lost')
104 |         self._arrays = [
105 |             spead_write.make_array('flags', in_chunks, DATA_LOST, np.uint8, chunk_params)
106 |         ]
107 |         dump_size = sum(array.nbytes for array in self._arrays)
108 |         self._executor_queue_space = QueueSpace(buffer_dumps * dump_size, loop=self.loop)
109 |         spead_write.write_telstate(telstate, input_name, output_name, rename_src, s3_endpoint_url)
110 | 
111 |         rx = spead_write.make_receiver(
112 |             endpoints, self._arrays,
113 |             katsdpservices.get_interface_address(flag_interface), flags_ibv)
114 |         self._writer = FlagWriter(self.sensors, rx, self)
115 |         self._capture_task = loop.create_task(self._do_capture())
116 | 
117 |     def _set_capture_block_state(self, capture_block_id: str, state: State) -> None:
118 |         if state == State.COMPLETE:
119 |             # Remove if present
120 |             self._capture_block_state.pop(capture_block_id, None)
121 |         else:
122 |             self._capture_block_state[capture_block_id] = state
123 |         dumped = json.dumps(self._capture_block_state, sort_keys=True, cls=EnumEncoder)
124 |         self.sensors['capture-block-state'].value = dumped
125 | 
126 |     def _get_capture_block_state(self, capture_block_id: str) -> Optional[State]:
127 |         return self._capture_block_state.get(capture_block_id, None)
128 | 
129 |     def _get_capture_stream_name(self, capture_block_id: str) -> str:
130 |         """Get the capture-stream name of the output stream"""
131 |         return "{}_{}".format(capture_block_id, self._output_name)
132 | 
133 |     def _get_prefix(self, capture_block_id: str) -> str:
134 |         """Get the prefix (aka bucket name) to use with the chunk store"""
135 |         # S3 doesn't allow underscores in bucket names
136 |         return self._get_capture_stream_name(capture_block_id).replace('_', '-')
137 | 
138 |     def rechunker_group(self, cbid: str) -> Optional[RechunkerGroup]:
139 |         extra = dict(capture_block_id=cbid)
140 |         if not self._get_capture_block_state(cbid):
141 |             logger.error("Received flags for CBID %s outside of init/done. "
142 |                          "These flags will be *discarded*.", cbid, extra=extra)
143 |             return None
144 | 
145 |         if cbid not in self._flag_streams:
146 |             self._flag_streams[cbid] = RechunkerGroup(
147 |                 self._executor, self._executor_queue_space,
148 |                 self._chunk_store, self._writer.sensors, self._get_prefix(cbid), self._arrays)
149 |         return self._flag_streams[cbid]
150 | 
151 |     async def _do_capture(self) -> None:
152 |         """Run the entire capture process.
153 | 
154 |         This runs for the lifetime of the server.
155 |         """
156 |         try:
157 |             spead_write.clear_io_sensors(self.sensors)
158 |             self.sensors['status'].value = Status.WAIT_DATA
159 |             logger.info("Waiting for data...")
160 |             await self._writer.run()
161 |         except Exception:
162 |             logger.exception("Error in SPEAD receiver")
163 |             self.sensors['device-status'].value = spead_write.DeviceStatus.FAIL
164 |         finally:
165 |             spead_write.clear_io_sensors(self.sensors)
166 |             self.sensors['status'].value = Status.FINISHED
167 |             self._executor.shutdown()
168 | 
169 |     async def request_capture_init(self, ctx, capture_block_id: str) -> None:
170 |         """Start an observation"""
171 |         if capture_block_id in self._capture_block_state:
172 |             raise FailReply("Capture block ID {} is already active".format(capture_block_id))
173 |         self._set_capture_block_state(capture_block_id, State.CAPTURING)
174 | 
175 |     def _mark_cbid_complete(self, capture_block_id: str) -> None:
176 |         """Inform other users of the on disk data that we are finished with a
177 |         particular capture_block_id.
178 |         """
179 |         extra = dict(capture_block_id=capture_block_id)
180 |         logger.info("Capture block %s flag capture complete.", capture_block_id, extra=extra)
181 |         self._chunk_store.mark_complete(self._get_prefix(capture_block_id))
182 |         self._set_capture_block_state(capture_block_id, State.COMPLETE)
183 | 
184 |     async def _write_telstate_meta(self, capture_block_id: str) -> None:
185 |         """Write out chunk information for the specified CBID to telstate."""
186 |         extra = dict(capture_block_id=capture_block_id)
187 |         if capture_block_id not in self._flag_streams:
188 |             logger.warning("No flag data received for cbid %s. Flag stream will not be usable.",
189 |                            capture_block_id, extra=extra)
190 |             return
191 |         rechunker_group = self._flag_streams[capture_block_id]
192 |         chunk_info = await rechunker_group.get_chunk_info()
193 |         capture_stream_name = self._get_capture_stream_name(capture_block_id)
194 |         telstate_capture = self._telstate.view(capture_stream_name)
195 |         telstate_capture['chunk_info'] = chunk_info
196 |         logger.info("Written chunk information to telstate.", extra=extra)
197 | 
198 |     async def request_capture_done(self, ctx, capture_block_id: str) -> None:
199 |         """Mark specified capture_block_id as complete.
200 | 
201 |         It flushes the flag cache and writes chunk info into telstate.
202 |         """
203 |         if capture_block_id not in self._capture_block_state:
204 |             raise FailReply("Specified capture block ID {} is unknown.".format(capture_block_id))
205 |         # Allow some time for stragglers to appear
206 |         await asyncio.sleep(5, loop=self.loop)
207 |         await self._write_telstate_meta(capture_block_id)
208 |         self._mark_cbid_complete(capture_block_id)
209 | 
210 |     async def stop(self, cancel: bool = True) -> None:
211 |         self._writer.stop()
212 |         await self._capture_task
213 |         await super().stop(cancel)
214 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/vis_writer.py:
--------------------------------------------------------------------------------
  1 | """Capture L0 visibilities from a SPEAD stream and write to a local chunk store.
  2 | 
  3 | This process lives across multiple capture blocks. It writes weights and flags
  4 | as well.
  5 | 
  6 | The status sensor has the following states (with typical transition events):
  7 | 
  8 |   - `idle`: ready to start capture
  9 |      -> ?capture-init ->
 10 |   - `wait-data`: waiting for first heap of L0 visibilities from SPEAD stream
 11 |     -> first SPEAD heap arrives ->
 12 |   - `capturing`: SPEAD data is being captured
 13 |     -> capture stops ->
 14 |   - `finalising`: metadata is being written to telstate
 15 |   - `complete`: both data and metadata capture completed
 16 |   - `error`: capture failed
 17 |     -> ?capture-done ->
 18 |   - `idle`: ready to start capture again
 19 | 
 20 | Objects are stored in chunks split over time and frequency but not baseline.
 21 | The chunking is chosen to produce objects with sizes on the order of 10 MB.
 22 | Objects have the following naming scheme:
 23 | 
 24 |   <capture_stream>/<array>/<idx1>[_<idx2>[_<idx3>]]
 25 | 
 26 |   - <capture_stream>: "file name"/bucket in store i.e. <capture block>_<stream>
 27 |   - <capture_block>: unique ID from capture_init (Unix timestamp at init)
 28 |   - <stream>: name of specific data product (associated with L0 SPEAD stream)
 29 |   - <array>: 'correlator_data' / 'weights' / 'flags' / etc.
 30 |   - <idxN>: chunk start index along N'th dimension
 31 | 
 32 | The following useful object parameters are stored in telstate:
 33 | 
 34 |   - <stream>_s3_endpoint_url: endpoint URL of S3 gateway to Ceph
 35 |   - <capture_stream>_chunk_info: {prefix, dtype, shape, chunks} dict per array
 36 | """
 37 | 
 38 | import asyncio
 39 | import logging
 40 | import enum
 41 | from concurrent.futures import ThreadPoolExecutor
 42 | from typing import List, Tuple, Dict, Any, Optional, Mapping   # noqa: F401
 43 | 
 44 | import numpy as np
 45 | import aiokatcp
 46 | from aiokatcp import DeviceServer, Sensor, SensorSet, FailReply
 47 | from katdal.visdatav4 import FLAG_NAMES
 48 | import katdal.chunkstore
 49 | import katsdptelstate
 50 | from katsdptelstate.endpoint import Endpoint
 51 | import katsdpservices
 52 | import spead2.recv.asyncio
 53 | 
 54 | import katsdpdatawriter
 55 | from . import spead_write
 56 | from .queue_space import QueueSpace
 57 | 
 58 | 
 59 | logger = logging.getLogger(__name__)
 60 | 
 61 | 
 62 | class Status(enum.Enum):
 63 |     IDLE = 1
 64 |     WAIT_DATA = 2
 65 |     CAPTURING = 3
 66 |     FINALISING = 4
 67 |     COMPLETE = 5
 68 |     ERROR = 6
 69 | 
 70 | 
 71 | def _status_status(value: Status) -> aiokatcp.Sensor.Status:
 72 |     if value == Status.ERROR:
 73 |         return Sensor.Status.ERROR
 74 |     else:
 75 |         return Sensor.Status.NOMINAL
 76 | 
 77 | 
 78 | class VisibilityWriter(spead_write.SpeadWriter):
 79 |     """Glue between :class:`~.SpeadWriter` and :class:`VisibilityWriterServer`."""
 80 |     def __init__(self, sensors: SensorSet, rx: spead2.recv.asyncio.Stream,
 81 |                  rechunker_group: spead_write.RechunkerGroup) -> None:
 82 |         super().__init__(sensors, rx)
 83 |         self._rechunker_group = rechunker_group
 84 | 
 85 |     def first_heap(self) -> None:
 86 |         self.sensors['status'].value = Status.CAPTURING
 87 | 
 88 |     def rechunker_group(self, updated: Dict[str, spead2.Item]) -> spead_write.RechunkerGroup:
 89 |         return self._rechunker_group
 90 | 
 91 | 
 92 | class VisibilityWriterServer(DeviceServer):
 93 |     VERSION = "sdp-vis-writer-0.2"
 94 |     BUILD_STATE = "katsdpdatawriter-" + katsdpdatawriter.__version__
 95 | 
 96 |     def __init__(self, host: str, port: int, loop: asyncio.AbstractEventLoop,
 97 |                  endpoints: List[Endpoint], interface: Optional[str], ibv: bool,
 98 |                  chunk_store: katdal.chunkstore.ChunkStore,
 99 |                  chunk_params: spead_write.ChunkParams,
100 |                  telstate: katsdptelstate.TelescopeState,
101 |                  input_name: str, output_name: str, rename_src: Mapping[str, str],
102 |                  s3_endpoint_url: Optional[str],
103 |                  max_workers: int, buffer_dumps: int) -> None:
104 |         super().__init__(host, port, loop=loop)
105 |         self._endpoints = endpoints
106 |         self._interface_address = katsdpservices.get_interface_address(interface)
107 |         self._ibv = ibv
108 |         self._chunk_store = chunk_store
109 |         self._input_name = input_name
110 |         self._output_name = output_name
111 |         self._telstate = telstate
112 |         self._rx = None    # type: Optional[spead2.recv.asyncio.Stream]
113 |         self._max_workers = max_workers
114 | 
115 |         telstate_input = telstate.view(input_name)
116 |         in_chunks = spead_write.chunks_from_telstate(telstate_input)
117 |         DATA_LOST = 1 << FLAG_NAMES.index('data_lost')
118 |         self._arrays = [
119 |             spead_write.make_array('correlator_data', in_chunks, 0, np.complex64, chunk_params),
120 |             spead_write.make_array('flags', in_chunks, DATA_LOST, np.uint8, chunk_params),
121 |             spead_write.make_array('weights', in_chunks, 0, np.uint8, chunk_params),
122 |             spead_write.make_array('weights_channel', in_chunks[:2], 0, np.float32, chunk_params)
123 |         ]
124 |         dump_size = sum(array.nbytes for array in self._arrays)
125 |         self._buffer_size = buffer_dumps * dump_size
126 |         spead_write.write_telstate(telstate, input_name, output_name, rename_src, s3_endpoint_url)
127 | 
128 |         self._capture_task = None     # type: Optional[asyncio.Task]
129 |         self._n_substreams = len(in_chunks[1])
130 | 
131 |         self.sensors.add(Sensor(
132 |             Status, 'status', 'The current status of the capture process',
133 |             default=Status.IDLE, initial_status=Sensor.Status.NOMINAL,
134 |             status_func=_status_status))
135 |         for sensor in spead_write.io_sensors():
136 |             self.sensors.add(sensor)
137 |         self.sensors.add(spead_write.device_status_sensor())
138 | 
139 |     async def _do_capture(self, capture_stream_name: str, rx: spead2.recv.asyncio.Stream) -> None:
140 |         """Capture data for a single capture block"""
141 |         writer = None
142 |         rechunker_group = None
143 |         executor = ThreadPoolExecutor(self._max_workers)
144 |         executor_queue_space = QueueSpace(self._buffer_size, loop=self.loop)
145 |         try:
146 |             spead_write.clear_io_sensors(self.sensors)
147 |             prefix = capture_stream_name.replace('_', '-')  # S3 doesn't allow underscores
148 |             rechunker_group = spead_write.RechunkerGroup(
149 |                 executor, executor_queue_space,
150 |                 self._chunk_store, self.sensors, prefix, self._arrays)
151 |             writer = VisibilityWriter(self.sensors, rx, rechunker_group)
152 |             self.sensors['status'].value = Status.WAIT_DATA
153 | 
154 |             await writer.run(stops=self._n_substreams)
155 | 
156 |             self.sensors['status'].value = Status.FINALISING
157 |             view = self._telstate.view(capture_stream_name)
158 |             view['chunk_info'] = await rechunker_group.get_chunk_info()
159 |             rechunker_group = None   # Tells except block not to clean up
160 |             self._chunk_store.mark_complete(prefix)
161 |             self.sensors['status'].value = Status.COMPLETE
162 |         except Exception:
163 |             logger.exception('Exception in capture task')
164 |             self.sensors['status'].value = Status.ERROR
165 |             self.sensors['device-status'].value = spead_write.DeviceStatus.FAIL
166 |         finally:
167 |             spead_write.clear_io_sensors(self.sensors)
168 |             if rechunker_group is not None:
169 |                 # Has the side effect of doing cleanup
170 |                 await rechunker_group.get_chunk_info()
171 |             # Shouldn't be any pending tasks, because get_chunk_info should wait
172 |             executor.shutdown()
173 | 
174 |     async def request_capture_init(self, ctx, capture_block_id: str) -> None:
175 |         """Start listening for L0 data"""
176 |         if self._capture_task is not None:
177 |             logger.info("Ignoring capture_init: already capturing")
178 |             raise FailReply('Already capturing')
179 |         self.sensors['status'].value = Status.WAIT_DATA
180 |         self.sensors['device-status'].value = spead_write.DeviceStatus.OK
181 |         capture_stream_name = self._telstate.join(capture_block_id, self._output_name)
182 |         self._rx = spead_write.make_receiver(
183 |             self._endpoints, self._arrays, self._interface_address, self._ibv)
184 |         self._capture_task = self.loop.create_task(self._do_capture(capture_stream_name, self._rx))
185 |         logger.info('Starting capture to %s', capture_stream_name)
186 | 
187 |     async def capture_done(self) -> None:
188 |         """Implementation of :meth:`request_capture_done`.
189 | 
190 |         This is split out to allow it to be called on ``SIGINT``.
191 |         """
192 |         if self._capture_task is None:
193 |             return
194 |         capture_task = self._capture_task
195 |         # Give it a chance to stop on its own from stop packets
196 |         try:
197 |             logger.info('Waiting for capture task (5s timeout)')
198 |             await asyncio.wait_for(asyncio.shield(capture_task), timeout=5)
199 |         except asyncio.TimeoutError:
200 |             if self._capture_task is not capture_task:
201 |                 return     # Someone else beat us to the cleanup
202 |             logger.info('Stopping receiver and waiting for capture task')
203 |             if self._rx:
204 |                 self._rx.stop()
205 |             await capture_task
206 | 
207 |         if self._capture_task is not capture_task:
208 |             return     # Someone else beat us to the cleanup
209 |         if self._rx:
210 |             self._rx.stop()
211 |         self._capture_task = None
212 |         self.sensors['status'].value = Status.IDLE
213 |         logger.info('Capture complete')
214 | 
215 |     async def request_capture_done(self, ctx) -> None:
216 |         """Stop capturing, which cleans up the capturing task."""
217 |         if self._capture_task is None:
218 |             logger.info("Ignoring capture_done: already explicitly stopped")
219 |             raise FailReply('Not capturing')
220 |         await self.capture_done()
221 | 
222 |     async def stop(self, cancel=True) -> None:
223 |         await self.capture_done()
224 |         await super().stop(cancel)
225 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/rechunk.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import itertools
  3 | from typing import Tuple, Dict, Any, Optional
  4 | 
  5 | import numpy as np
  6 | import numpy.lib.format as _np_lib_format  # noqa: F401  # Make mypy happy with np.lib.format
  7 | 
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | Offset = Tuple[int, ...]
 13 | Shape = Tuple[int, ...]
 14 | Chunks1D = Tuple[int, ...]
 15 | Chunks = Tuple[Chunks1D, ...]
 16 | Slices = Tuple[slice, ...]
 17 | 
 18 | 
 19 | def _offset_to_size_1d(chunks: Chunks1D) -> Dict[int, int]:
 20 |     """Maps offset of start of each chunk to the size of that chunk
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     chunks : tuple of int
 25 |         Chunk sizes
 26 | 
 27 |     Returns
 28 |     -------
 29 |     dict
 30 |     """
 31 |     out = {}
 32 |     cur = 0
 33 |     for c in chunks:
 34 |         if c <= 0:
 35 |             raise ValueError('Chunk sizes must be positive')
 36 |         out[cur] = c
 37 |         cur += c
 38 |     return out
 39 | 
 40 | 
 41 | def _offset_to_size(chunks: Chunks) -> Tuple[Dict[int, int], ...]:
 42 |     """Multi-dimensional version of :func:`_offset_to_size_1d`."""
 43 |     return tuple(_offset_to_size_1d(c) for c in chunks)
 44 | 
 45 | 
 46 | def _split_chunks_1d(in_chunks: Chunks1D, out_chunks: Chunks1D) -> Dict[int, Slices]:
 47 |     """
 48 |     Divide output chunks into groups that align to the input chunks.
 49 | 
 50 |     For each input chunk, a tuple of slices is generated to index within that
 51 |     input chunk. The result is a dictionary indexed by offset.
 52 | 
 53 |     >>> _split_chunks_1d((4, 6, 2), (1, 3, 2, 2, 2, 2))
 54 |     {
 55 |         0: (slice(0, 1), slice(1, 4)),
 56 |         4: (slice(0, 2), slice(2, 4), slice(4, 6)),
 57 |         10: (slice(0, 2),)
 58 |     }
 59 | 
 60 |     Raises
 61 |     ------
 62 |     ValueError
 63 |         if an output chunk spans multiple input chunks
 64 |     ValueError
 65 |         if ``sum(in_chunks) != sum(out_chunks)``
 66 |     """
 67 |     out = {}
 68 |     pos = 0
 69 |     if sum(in_chunks) != sum(out_chunks):
 70 |         raise ValueError('chunks imply different shapes')
 71 |     offset = 0
 72 |     for c in in_chunks:
 73 |         slices = []
 74 |         cur = 0
 75 |         while cur < c:
 76 |             oc = out_chunks[pos]
 77 |             pos += 1
 78 |             slices.append(slice(cur, cur + oc))
 79 |             cur += oc
 80 |         if cur > c:
 81 |             raise ValueError('input and output chunks do not align')
 82 |         out[offset] = tuple(slices)
 83 |         offset += c
 84 |     return out
 85 | 
 86 | 
 87 | def _split_chunks(in_chunks: Chunks, out_chunks: Chunks) -> Tuple[Dict[int, Slices], ...]:
 88 |     """Multi-dimensional version of :meth:`_split_chunks_1d`."""
 89 |     if len(in_chunks) != len(out_chunks):
 90 |         raise ValueError('in_chunks and out_chunks have different length')
 91 |     return tuple(_split_chunks_1d(*item) for item in zip(in_chunks, out_chunks))
 92 | 
 93 | 
 94 | class Rechunker:
 95 |     """
 96 |     Takes a stream of chunks and generates output with same data but new
 97 |     chunking scheme.
 98 | 
 99 |     This is similar in concept to dask's rechunk, but in a streaming fashion
100 |     (with the assumption that time is on the first axis). It is more limited
101 |     though: non-time axes can only be split, not re-combined. The time axis
102 |     must be size-1 chunks on input, but can be larger on output (accumulation
103 |     in time).
104 | 
105 |     Incoming chunks whose coordinates differ only in the time axis must be
106 |     received in order (out-of-order chunks will be discarded). Chunks with
107 |     different non-time coordinates are handled completely independently. This
108 |     does not apply when no time accumulation is being done, in which case
109 |     chunks can arrive in any order.
110 | 
111 |     Memory usage depends on whether accumulation-in-time is being done. If
112 |     so, it stores data internally (enough for one complete output dump). If
113 |     not, there is no internal data storage, and memory usage only scales with
114 |     the metadata (number of chunks etc).
115 | 
116 |     Do not instantiate this class directly. Instead, subclass it and implement
117 |     :meth:`output`.
118 | 
119 |     .. warning::
120 | 
121 |        While this class has asynchronous methods, it is not safe to use it
122 |        from more than one task at once i.e. wait for each async call to
123 |        complete before making another one.
124 | 
125 |     Parameters
126 |     ----------
127 |     name : str
128 |         Name of this array (purely for logging)
129 |     in_chunks : tuple of tuple of int
130 |         Chunking scheme of the input. The first element must be ``(1,)``
131 |     out_chunks : tuple of tuple of int
132 |         Chunking scheme of the output. The first element must be a 1-tuple,
133 |         with the value indicating the size of each chunk (except possibly
134 |         the last) in time.
135 |     fill_value
136 |         Value to store where no input is received for some of the input
137 |         chunks that combine to form an output chunk.
138 |     dtype : numpy dtype
139 |         Data type of the array
140 | 
141 |     Raises
142 |     ------
143 |     ValueError
144 |         if the restrictions on the input and output chunks are not met
145 |     """
146 | 
147 |     class _Item:
148 |         """Intermediate chunk under construction.
149 | 
150 |         An intermediate chunk has the output chunk size in the time axis and
151 |         the input chunk size in other axes.
152 |         """
153 |         def __init__(self, offset: Offset, initial_value: np.ndarray, present: bool) -> None:
154 |             self.offset = offset
155 |             self.value: Optional[np.ndarray] = initial_value
156 |             self.present = np.full(initial_value.shape[:1], present, np.bool_)
157 | 
158 |         def add(self, offset: Offset, value: np.ndarray) -> None:
159 |             """Add a new input chunk."""
160 |             assert self.value is not None
161 |             assert offset[1:] == self.offset[1:]
162 |             if value.shape[1:] != self.value.shape[1:] or value.shape[0] != 1:
163 |                 raise ValueError('value has wrong shape')
164 |             rel = offset[0] - self.offset[0]
165 |             self.value[rel:rel+1] = value
166 |             self.present[rel] = True
167 | 
168 |         def truncate(self, times: int) -> None:
169 |             assert self.value is not None
170 |             if times < self.value.shape[0]:
171 |                 self.value = self.value[:times]
172 |                 self.present = self.present[:times]
173 | 
174 |     def __init__(self, name: str,
175 |                  in_chunks: Chunks,
176 |                  out_chunks: Chunks,
177 |                  fill_value: Any, dtype: Any) -> None:
178 |         if in_chunks[0] != (1,):
179 |             raise ValueError('in_chunks does not start with (1,)')
180 |         if len(out_chunks[0]) != 1:
181 |             raise ValueError('out_chunks does not start with a singleton')
182 | 
183 |         self.name = name
184 |         self.in_chunks = in_chunks
185 |         self.out_chunks = out_chunks
186 |         self.fill_value = fill_value
187 |         self.dtype = np.dtype(dtype)
188 |         self._items = {}   # type: Dict[Tuple[int, ...], Rechunker._Item]  # Indexed by offset[1:]
189 |         self._sizes = _offset_to_size(in_chunks[1:])
190 |         self._split_chunks = _split_chunks(in_chunks[1:], out_chunks[1:])
191 |         self._time_accum = out_chunks[0][0]
192 |         self._n_dumps = 0
193 | 
194 |     def out_of_order(self, received: int, seen: int) -> None:
195 |         """Report a chunk received from the past.
196 | 
197 |         This can be overridden to change the reporting channel.
198 |         """
199 |         logger.warning(
200 |             "Received old chunk for array %s (%d < %d)",
201 |             self.name, received, seen)         # pragma: nocover
202 | 
203 |     def _item_shape(self, offset: Offset) -> Shape:
204 |         """Expected shape for the :class:`Item` holding the input chunk starting at `offset`."""
205 |         sizes = tuple(s[ofs] for ofs, s in zip(offset[1:], self._sizes))
206 |         return (self._time_accum,) + sizes
207 | 
208 |     async def _flush(self, item: _Item) -> None:
209 |         """Send `item` to :meth:`output`."""
210 |         slices = tuple(s[ofs] for ofs, s in zip(item.offset[1:], self._split_chunks))
211 |         for idx in itertools.product(*slices):
212 |             assert item.value is not None
213 |             full_idx = np.index_exp[0:len(item.value)] + idx
214 |             offset = tuple(s.start + offset for s, offset in zip(full_idx, item.offset))
215 |             await self.output(offset, item.value[full_idx], item.present)
216 |         item.value = None   # Allow GC to reclaim memory now
217 | 
218 |     async def _get_item(self, offset: Offset) -> Optional['_Item']:
219 |         """Get the item that should hold the input chunk starting at `offset`.
220 | 
221 |         It returns ``None`` if the offset is too far in the past to be captured.
222 |         """
223 |         key = offset[1:]
224 |         # Round down to the start of the accumulation
225 |         item_offset = (offset[0] // self._time_accum * self._time_accum,) + key
226 |         item = self._items.get(key)
227 |         if item is None or item.offset[0] < item_offset[0]:
228 |             if item is not None:
229 |                 await self._flush(item)
230 |             shape = self._item_shape(offset)
231 |             initial_value = np.full(shape, self.fill_value, self.dtype)
232 |             item = self._Item(item_offset, initial_value, False)
233 |             self._items[key] = item
234 |         elif item.offset[0] > item_offset[0]:
235 |             self.out_of_order(offset[0], item.offset[0])
236 |             item = None
237 |         return item
238 | 
239 |     async def add(self, offset: Offset, value: np.ndarray) -> None:
240 |         """Add a new incoming chunk.
241 | 
242 |         The `value` is guaranteed to be copied, so it is safe for the caller
243 |         to update it after the call returns (which means the coroutine
244 |         *completing*, not just yielding).
245 | 
246 |         Parameters
247 |         ----------
248 |         offset : tuple of int
249 |             Start coordinates of the chunk. It must be aligned to the
250 |             configured chunking scheme.
251 |         values : array-like
252 |             Values of the chunk.
253 | 
254 |         Raises
255 |         ------
256 |         ValueError
257 |             if `offset` has the wrong number of dimensions
258 |         ValueError
259 |             if `value` has the wrong shape for `offset`
260 |         KeyError
261 |             if `offset` does not match the input chunking scheme
262 |         """
263 |         if len(offset) != len(self.in_chunks):
264 |             raise ValueError('wrong number of dimensions')
265 |         if self._time_accum > 1:
266 |             item = await self._get_item(offset)
267 |             if item is not None:
268 |                 item.add(offset, value)
269 |         else:
270 |             shape = self._item_shape(offset)
271 |             # Ensure the dtype and force a copy at the same time
272 |             value = np.asarray(value).astype(self.dtype, copy=True)
273 |             if value.shape != shape:
274 |                 raise ValueError('value has wrong shape')
275 |             item = self._Item(offset, value, True)
276 |             await self._flush(item)
277 |         self._n_dumps = max(self._n_dumps, offset[0] + 1)
278 | 
279 |     async def close(self) -> None:
280 |         """Flush out any partially buffered items"""
281 |         for item in self._items.values():
282 |             # Truncate to last seen dump
283 |             times = self._n_dumps - item.offset[0]
284 |             item.truncate(times)
285 |             await self._flush(item)
286 |         self._items.clear()
287 | 
288 |     def _get_shape(self) -> Shape:
289 |         return (self._n_dumps,) + tuple(sum(c) for c in self.out_chunks[1:])
290 | 
291 |     def _get_chunks(self) -> Chunks:
292 |         c = self.out_chunks[0][0]
293 |         full = self._n_dumps // c
294 |         last = self._n_dumps % c
295 |         if last > 0:
296 |             time_chunks = (c,) * full + (last,)
297 |         else:
298 |             time_chunks = (c,) * full
299 |         return (time_chunks,) + self.out_chunks[1:]
300 | 
301 |     def get_chunk_info(self, prefix: str) -> Dict[str, Any]:
302 |         """Get chunk info to be placed into telstate to describe the output.
303 | 
304 |         Parameters
305 |         ----------
306 |         prefix : str
307 |             The array name prefix to retrieve the chunks from the chunk store
308 |         """
309 |         return {
310 |             'prefix': prefix,
311 |             'dtype': np.lib.format.dtype_to_descr(self.dtype),
312 |             'shape': self._get_shape(),
313 |             'chunks': self._get_chunks()
314 |         }
315 | 
316 |     async def output(self, offset: Offset, value: np.ndarray, present: np.ndarray) -> None:
317 |         """Called with each output chunk.
318 | 
319 |         It is safe for the callee to save a reference to `value`: it is
320 |         guaranteed that this class will not reuse the memory.
321 | 
322 |         Parameters
323 |         ----------
324 |         offset
325 |             Position of the start of this chunk
326 |         value
327 |             Chunk data
328 |         present
329 |             1D boolean array indexed by time, indicating which of the input
330 |             chunks that were accumulated into the output chunks are actually
331 |             present (rather than replaced by the fill value).
332 |         """
333 |         raise NotImplementedError      # pragma: nocover
334 | 


--------------------------------------------------------------------------------
/katsdpdatawriter/spead_write.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Receive heaps from a SPEAD stream and write corresponding data to a chunk store.
  3 | """
  4 | 
  5 | import copy
  6 | import argparse
  7 | import os.path
  8 | import time
  9 | import enum
 10 | import logging
 11 | import concurrent.futures
 12 | import asyncio
 13 | import socket
 14 | import functools
 15 | from collections import Counter
 16 | from typing import (Optional, Any, Sequence, Iterable,           # noqa: F401
 17 |                     Mapping, MutableMapping, Set, Dict, Tuple)
 18 | 
 19 | import numpy as np
 20 | import attr
 21 | from aiokatcp import Sensor, SensorSet, SensorSampler
 22 | import spead2
 23 | import spead2.recv.asyncio
 24 | import katdal.chunkstore
 25 | import katdal.chunkstore_npy
 26 | import katdal.chunkstore_s3
 27 | import katsdpservices
 28 | import katsdptelstate
 29 | from katsdptelstate.endpoint import Endpoint
 30 | 
 31 | from . import rechunk
 32 | from .rechunk import Chunks, Offset
 33 | from .queue_space import QueueSpace
 34 | 
 35 | 
 36 | logger = logging.getLogger(__name__)
 37 | 
 38 | 
 39 | @attr.s
 40 | class ChunkParams:
 41 |     max_size = attr.ib()                    # type: int     # maximum size in bytes
 42 |     max_channels = attr.ib(default=None)    # type: Optional[int]
 43 |     max_dumps = attr.ib(default=None)       # type: Optional[int]
 44 | 
 45 |     @staticmethod
 46 |     def from_args(args: argparse.Namespace):
 47 |         """Create from command-line arguments (see :func:`add_common_args`)"""
 48 |         if args.buffer_dumps < args.obj_max_dumps:
 49 |             logger.warning('Decreasing --obj-max-dumps to match --buffer-dumps (%d)',
 50 |                            args.buffer_dumps)
 51 |         max_dumps = min(args.obj_max_dumps, args.buffer_dumps)
 52 |         return ChunkParams(args.obj_size_mb * 1e6, args.obj_max_channels, max_dumps)
 53 | 
 54 | 
 55 | # TODO: move this into aiokatcp
 56 | class DeviceStatus(enum.Enum):
 57 |     """Standard katcp device status"""
 58 |     OK = 0
 59 |     DEGRADED = 1
 60 |     FAIL = 2
 61 | 
 62 | 
 63 | def _device_status_status(value: DeviceStatus) -> Sensor.Status:
 64 |     """Sets katcp status for device-status sensor from value"""
 65 |     if value == DeviceStatus.OK:
 66 |         return Sensor.Status.NOMINAL
 67 |     elif value == DeviceStatus.DEGRADED:
 68 |         return Sensor.Status.WARN
 69 |     else:
 70 |         return Sensor.Status.ERROR
 71 | 
 72 | 
 73 | def _warn_if_positive(value: float) -> Sensor.Status:
 74 |     return Sensor.Status.WARN if value > 0 else Sensor.Status.NOMINAL
 75 | 
 76 | 
 77 | # Just to work around https://github.com/python/mypy/issues/4729
 78 | def _dtype_converter(dtype: Any) -> np.dtype:
 79 |     return np.dtype(dtype)
 80 | 
 81 | 
 82 | def io_sensors() -> Sequence[Sensor]:
 83 |     """Create input and output counter sensors."""
 84 |     def make_sensor(*args, **kwargs) -> Sensor:
 85 |         kwargs['auto_strategy'] = SensorSampler.Strategy.EVENT_RATE
 86 |         kwargs['auto_strategy_parameters'] = (0.05, 10.0)
 87 |         return Sensor(*args, **kwargs)
 88 | 
 89 |     return [
 90 |         make_sensor(
 91 |             int, "input-incomplete-heaps-total",
 92 |             "Number of heaps dropped due to being incomplete. (prometheus: counter)",
 93 |             status_func=_warn_if_positive),
 94 |         make_sensor(
 95 |             int, "input-too-old-heaps-total",
 96 |             "Number of heaps dropped because they are too late. (prometheus: counter)",
 97 |             status_func=_warn_if_positive),
 98 |         make_sensor(
 99 |             int, "input-missing-heaps-total",
100 |             "Number of gaps in the heaps seen. (prometheus: counter)",
101 |             status_func=_warn_if_positive),
102 |         make_sensor(
103 |             int, "input-bytes-total",
104 |             "Number of payload bytes received in this session. (prometheus: counter)",
105 |             "B"),
106 |         make_sensor(
107 |             int, "input-heaps-total",
108 |             "Number of input heaps captured in this session. (prometheus: counter)"),
109 |         make_sensor(
110 |             int, "input-dumps-total",
111 |             "Number of complete input dumps captured in this session. (prometheus: counter)"),
112 |         make_sensor(
113 |             int, "output-bytes-total",
114 |             "Number of payload bytes written to chunk store in this session. (prometheus: counter)",
115 |             "B"),
116 |         make_sensor(
117 |             int, "output-chunks-total",
118 |             "Number of chunks written to chunk store in this session. (prometheus: counter)"),
119 |         make_sensor(
120 |             float, "output-seconds-total",
121 |             "Accumulated time spent writing chunks. (prometheus: counter)",
122 |             "s"),
123 |         make_sensor(
124 |             float, "output-seconds",
125 |             "Time spent on the last chunk write.",
126 |             "s"),
127 |         make_sensor(
128 |             int, "active-chunks",
129 |             "Number of chunks currently being written. (prometheus: gauge)"),
130 |         make_sensor(
131 |             int, "queued-bytes",
132 |             "Number of bytes that have been received but not yet written. (prometheus: gauge)"),
133 |         make_sensor(
134 |             int, "max-queued-bytes",
135 |             "Maximum value of queued-bytes sensor for this capture block. (prometheus: gauge)")
136 |     ]
137 | 
138 | 
139 | def device_status_sensor() -> Sensor:
140 |     """Create a sensor to track device status"""
141 |     return Sensor(DeviceStatus, 'device-status', 'Health sensor',
142 |                   default=DeviceStatus.OK, initial_status=Sensor.Status.NOMINAL,
143 |                   status_func=_device_status_status)
144 | 
145 | 
146 | def clear_io_sensors(sensors: SensorSet) -> None:
147 |     """Zero the input and output counters in a sensor set"""
148 |     now = time.time()
149 |     for name in ['input-incomplete-heaps-total',
150 |                  'input-too-old-heaps-total',
151 |                  'input-missing-heaps-total',
152 |                  'input-bytes-total',
153 |                  'input-heaps-total',
154 |                  'input-dumps-total',
155 |                  'output-bytes-total',
156 |                  'output-chunks-total',
157 |                  'output-seconds-total',
158 |                  'active-chunks',
159 |                  'queued-bytes',
160 |                  'max-queued-bytes']:
161 |         sensor = sensors[name]
162 |         sensor.set_value(sensor.stype(0), timestamp=now)
163 | 
164 | 
165 | @attr.s(frozen=True)
166 | class Array:
167 |     """A single array being received over SPEAD. See :class:`.Rechunker` for details."""
168 | 
169 |     name = attr.ib()         # type: str # Excludes the prefix
170 |     in_chunks = attr.ib()    # type: Tuple[Tuple[int, ...], ...]
171 |     out_chunks = attr.ib()   # type: Tuple[Tuple[int, ...], ...]
172 |     fill_value = attr.ib()   # type: Any
173 |     dtype = attr.ib(converter=_dtype_converter)    # type: np.dtype
174 | 
175 |     @property
176 |     def substreams(self):
177 |         return int(np.product([len(c) for c in self.in_chunks]))
178 | 
179 |     @property
180 |     def shape(self):
181 |         """Shape of a single input dump"""
182 |         return tuple(sum(c) for c in self.in_chunks)
183 | 
184 |     @property
185 |     def nbytes(self):
186 |         """Number of bytes in a single input dump"""
187 |         return int(np.product(self.shape)) * self.dtype.itemsize
188 | 
189 | 
190 | def make_array(name, in_chunks: Tuple[Tuple[int, ...], ...],
191 |                fill_value: Any, dtype: Any, chunk_params: ChunkParams) -> Array:
192 |     """Create an :class:`Array` with computed output chunk scheme.
193 | 
194 |     The output chunks are determined by splitting the input chunks along axes 0
195 |     and 1 (time and frequency in typical use) to produce chunks subject to the
196 |     constraints of `chunk_params`.
197 |     """
198 |     # Shape of a single input chunk
199 |     assert in_chunks[0] == (1,)     # Only one chunk in time, with one dump
200 |     shape = tuple(c[0] for c in in_chunks)
201 |     if chunk_params.max_channels is not None:
202 |         max_dim_elements = {1: chunk_params.max_channels}
203 |     else:
204 |         max_dim_elements = {}
205 |     # Compute the decomposition of each input chunk
206 |     chunks = katdal.chunkstore.generate_chunks(
207 |         shape, dtype, chunk_params.max_size,
208 |         dims_to_split=(0, 1), power_of_two=True,
209 |         max_dim_elements=max_dim_elements)  # type: Tuple[Tuple[int, ...], ...]
210 |     # Accumulate in time to make up the chunk size
211 |     chunk_size = np.dtype(dtype).itemsize * np.prod([c[0] for c in chunks])
212 |     n_time = 1
213 |     while (chunk_size * 2 <= chunk_params.max_size
214 |            and (chunk_params.max_dumps is None or n_time * 2 <= chunk_params.max_dumps)):
215 |         n_time *= 2
216 |         chunk_size *= 2
217 |     # the ignore is to suppress see https://github.com/python/mypy/issues/6337
218 |     chunks = ((n_time,),) + chunks[1:]   # type: ignore
219 |     logger.info('Using chunks of shape %s (%.3fMB) for %s',
220 |                 tuple(c[0] for c in chunks), chunk_size / 1e6, name)
221 |     # Repeat for each input chunk
222 |     out_chunks = tuple(outc * len(inc) for inc, outc in zip(in_chunks, chunks))
223 |     return Array(name, in_chunks, out_chunks, fill_value, dtype)
224 | 
225 | 
226 | class ChunkStoreRechunker(rechunk.Rechunker):
227 |     """Rechunker that outputs data to a chunk store via an executor.
228 | 
229 |     The name is used as the array name in the chunk store.
230 | 
231 |     .. note::
232 | 
233 |        The :meth:`output` coroutine will return as soon as it has posted the
234 |        chunk to the executor. It only blocks to acquire from the
235 |        `executor_queue_space`.
236 |     """
237 |     def __init__(
238 |             self,
239 |             executor: concurrent.futures.Executor,
240 |             executor_queue_space: QueueSpace,
241 |             chunk_store: katdal.chunkstore.ChunkStore,
242 |             sensors: SensorSet, name: str,
243 |             in_chunks: Chunks, out_chunks: Chunks,
244 |             fill_value: Any, dtype: Any) -> None:
245 |         super().__init__(name, in_chunks, out_chunks, fill_value, dtype)
246 |         self.executor = executor
247 |         self.executor_queue_space = executor_queue_space
248 |         self.chunk_store = chunk_store
249 |         self.chunk_store.create_array(self.name)
250 |         self.sensors = sensors
251 |         self._futures = set()    # type: Set[asyncio.Future[float]]
252 |         self._loop = asyncio.get_event_loop()
253 | 
254 |     def _put_chunk(self, slices: Tuple[slice, ...], value: np.ndarray) -> float:
255 |         """Put a chunk into the chunk store and return statistics.
256 | 
257 |         This is run in a separate thread, using an executor.
258 |         """
259 |         def increment_active_chunks():
260 |             self.sensors['active-chunks'].value += 1
261 | 
262 |         def decrement_active_chunks():
263 |             self.sensors['active-chunks'].value -= 1
264 | 
265 |         start = time.monotonic()
266 |         self._loop.call_soon_threadsafe(increment_active_chunks)
267 |         try:
268 |             self.chunk_store.put_chunk(self.name, slices, value)
269 |         finally:
270 |             self._loop.call_soon_threadsafe(decrement_active_chunks)
271 |         end = time.monotonic()
272 |         return end - start
273 | 
274 |     def _update_stats(self, nbytes: int, nbytes_present: int,
275 |                       future: 'asyncio.Future[float]') -> None:
276 |         """Done callback for a future running :meth:`_put_chunk`.
277 | 
278 |         This is run on the event loop, so can safely update sensors. It also
279 |         logs any errors.
280 |         """
281 |         self._futures.remove(future)
282 |         self.executor_queue_space.release(nbytes)
283 |         self.sensors['queued-bytes'].value -= nbytes_present
284 |         try:
285 |             elapsed = future.result()
286 |         except asyncio.CancelledError:
287 |             pass
288 |         except Exception:
289 |             logger.exception('Failed to write a chunk to %s', self.name)
290 |             self.sensors['device-status'].value = DeviceStatus.FAIL
291 |         else:
292 |             self.sensors['output-chunks-total'].value += 1
293 |             self.sensors['output-bytes-total'].value += nbytes_present
294 |             self.sensors['output-seconds-total'].value += elapsed
295 |             self.sensors['output-seconds'].value = elapsed
296 | 
297 |     async def output(self, offset: Offset, value: np.ndarray, present: np.ndarray) -> None:
298 |         slices = tuple(slice(ofs, ofs + size) for ofs, size in zip(offset, value.shape))
299 |         await self.executor_queue_space.acquire(value.nbytes)
300 |         future = asyncio.ensure_future(
301 |             self._loop.run_in_executor(self.executor, self._put_chunk, slices, value))
302 |         self._futures.add(future)
303 |         nbytes_present = value.nbytes * np.sum(present) // len(present)
304 |         callback = functools.partial(self._update_stats, value.nbytes, nbytes_present)
305 |         future.add_done_callback(callback)
306 | 
307 |     def out_of_order(self, received: int, seen: int) -> None:
308 |         self.sensors['input-too-old-heaps-total'].value += 1
309 | 
310 |     async def close(self) -> None:
311 |         """Close and wait for all asynchronous writes to complete."""
312 |         await super().close()
313 |         # asyncio.wait is implemented by adding a done callback to each
314 |         # future. Done callbacks are run in order of addition, so when
315 |         # wait returns, we are guaranteed that the done callbacks have
316 |         # run.
317 |         if self._futures:
318 |             await asyncio.wait(self._futures)
319 | 
320 | 
321 | class RechunkerGroup:
322 |     """Collects a number of rechunkers with common input chunk scheme.
323 | 
324 |     The arrays need not all have the same shape. However, there must be a
325 |     prefix of the axes on which they all have the same chunking scheme, and
326 |     on the remaining axes there can only be a single chunk. For example, the
327 |     following chunking schemes could co-exist in a group.
328 |     - ((2, 2), (3, 3, 3))
329 |     - ((2, 2), (3, 3, 3), (4,), (3,))
330 |     - ((2, 2), (3, 3, 3), (6,))
331 | 
332 |     Parameters
333 |     ----------
334 |     executor
335 |         Executor used for asynchronous writes to the chunk store.
336 |     executor_queue_space
337 |         :class:`QueueSpace` bounding the number of bytes that can be in flight
338 |         within `executor`.
339 |     chunk_store
340 |         Chunk-store into which output chunks are written.
341 |     sensors
342 |         Sensor set containing an ``input-dumps-total`` sensor, which will
343 |         be updated to reflect the highest dump index seen.
344 |     prefix
345 |         Prefix for naming arrays in the chunk store. It is prepended to the
346 |         names given in `arrays` when storing the chunks.
347 |     arrays
348 |         Descriptions of the incoming arrays.
349 |     """
350 |     def __init__(self,
351 |                  executor: concurrent.futures.Executor,
352 |                  executor_queue_space: QueueSpace,
353 |                  chunk_store: katdal.chunkstore.ChunkStore,
354 |                  sensors: SensorSet, prefix: str,
355 |                  arrays: Sequence[Array]) -> None:
356 |         self.prefix = prefix
357 |         self.arrays = list(arrays)
358 |         self.sensors = sensors
359 |         self._expected = Counter()    # type: MutableMapping[Offset, int]
360 |         self._seen = Counter()        # type: MutableMapping[Offset, int]
361 |         self._rechunkers = [
362 |             ChunkStoreRechunker(executor, executor_queue_space,
363 |                                 chunk_store, sensors,
364 |                                 chunk_store.join(prefix, a.name),
365 |                                 a.in_chunks, a.out_chunks,
366 |                                 a.fill_value, a.dtype) for a in arrays]
367 | 
368 |     async def add(self, offset_prefix: Offset, values: Iterable[np.ndarray]) -> None:
369 |         """Add a value per array for rechunking.
370 | 
371 |         For each array passed to the constructor, there must be corresponding
372 |         element in `values`. Each such value has an offset given by
373 |         `offset_prefix` plus enough 0's to match the dimensionality.
374 |         """
375 |         dump_index = offset_prefix[0]
376 |         if dump_index >= self.sensors['input-dumps-total'].value:
377 |             self.sensors['input-dumps-total'].value = dump_index + 1
378 | 
379 |         nbytes = sum(value.nbytes for value in values)
380 |         self.sensors['input-heaps-total'].value += 1
381 |         self.sensors['input-bytes-total'].value += nbytes
382 |         queued_bytes = self.sensors['queued-bytes'].value + nbytes
383 |         self.sensors['queued-bytes'].value = queued_bytes
384 |         if queued_bytes > self.sensors['max-queued-bytes'].value:
385 |             self.sensors['max-queued-bytes'].value = queued_bytes
386 | 
387 |         # Update our idea of how many heaps we've missed out on, assuming heaps
388 |         # for each substream arrive in order.
389 |         substream = offset_prefix[1:]
390 |         old_missing = self._expected[substream] - self._seen[substream]
391 |         if dump_index >= self._expected[substream]:
392 |             self._expected[substream] = dump_index + 1
393 |         self._seen[substream] += 1
394 |         new_missing = self._expected[substream] - self._seen[substream]
395 |         self.sensors['input-missing-heaps-total'].value += new_missing - old_missing
396 | 
397 |         for rechunker, value in zip(self._rechunkers, values):
398 |             offset = offset_prefix + (0,) * (value.ndim - len(offset_prefix))
399 |             await rechunker.add(offset, value)
400 | 
401 |     async def get_chunk_info(self) -> Dict[str, Dict[str, Any]]:
402 |         """Get the chunk information to place into telstate to describe the arrays.
403 | 
404 |         This closes the rechunkers (flushing partial output chunks), so no
405 |         further calls to :meth:`add` should be made.
406 |         """
407 |         for rechunker in self._rechunkers:
408 |             await rechunker.close()
409 |         return {array.name: rechunker.get_chunk_info(self.prefix)
410 |                 for array, rechunker in zip(self.arrays, self._rechunkers)}
411 | 
412 | 
413 | class SpeadWriter:
414 |     """Base class to receive data over SPEAD and write it to a chunk store.
415 | 
416 |     It supports multiplexing between instances of :class:`RechunkerGroup` based
417 |     on contents of the SPEAD heaps. This is implemented by subclassing and
418 |     overriding :meth:`rechunker_group`.
419 | 
420 |     Parameters
421 |     ----------
422 |     sensors
423 |         Server sensors including all those returned by :meth:`io_sensors`.
424 |         These are updated as heaps are received.
425 |     rx
426 |         SPEAD receiver. It should be set up with :attr:`stop_on_stop_item` set
427 |         to false. :meth:`make_receiver` returns a suitable receiver with
428 |         optimised memory pool allocations.
429 |     """
430 |     def __init__(self, sensors: SensorSet, rx: spead2.recv.asyncio.Stream) -> None:
431 |         self.sensors = sensors
432 |         self.rx = rx
433 | 
434 |     async def run(self, stops: int = None) -> None:
435 |         """Run the receiver.
436 | 
437 |         Parameters
438 |         ----------
439 |         stops
440 |             If specified, this method will stop once it has seen `stops` stop
441 |             items. Otherwise, it will run until cancelled or :meth:`stop` is
442 |             called.
443 |         """
444 |         first = True
445 |         n_stop = 0
446 |         ig = spead2.ItemGroup()
447 |         async for heap in self.rx:
448 |             if first:
449 |                 self.first_heap()
450 |                 first = False
451 |             updated = {}   # type: Dict[str, spead2.Item]
452 |             if heap.is_end_of_stream():
453 |                 n_stop += 1
454 |                 if stops is not None and n_stop == stops:
455 |                     self.rx.stop()
456 |                     break
457 |                 else:
458 |                     updated = {}
459 |             elif isinstance(heap, spead2.recv.IncompleteHeap):
460 |                 self.sensors['input-incomplete-heaps-total'].value += 1
461 |             else:
462 |                 try:
463 |                     updated = ig.update(heap)
464 |                 except Exception:
465 |                     logger.exception('Invalid heap')
466 | 
467 |             if 'timestamp' in updated:
468 |                 channel0 = int(updated['frequency'].value)
469 |                 dump_index = int(updated['dump_index'].value)
470 |                 group = self.rechunker_group(updated)
471 |                 # Check if subclass decided the heap was good
472 |                 if group is not None:
473 |                     # Get values and add time dimension
474 |                     values = [ig[array.name].value[np.newaxis, ...] for array in group.arrays]
475 |                     await group.add((dump_index, channel0), values)
476 | 
477 |     def stop(self) -> None:
478 |         """Gracefully stop :meth:`run`."""
479 |         self.rx.stop()
480 | 
481 |     def first_heap(self):
482 |         """Callback to notify about the first heap being received.
483 | 
484 |         The default does nothing, but may be overridden
485 |         """
486 |         pass    # pragma: no cover
487 | 
488 |     def rechunker_group(self, updated: Dict[str, spead2.Item]) -> Optional[RechunkerGroup]:
489 |         """Obtain the rechunker group associated with a particular heap.
490 | 
491 |         This must be implemented in derived classes.
492 |         """
493 |         raise NotImplementedError    # pragma: no cover
494 | 
495 | 
496 | def chunks_from_telstate(telstate):
497 |     """Determine input chunking scheme for visibility data from telescope state.
498 | 
499 |     The provided `telstate` must be a view of the appropriate stream.
500 | 
501 |     Raises
502 |     ------
503 |     KeyError
504 |         if any of the necessary telescope state keys are missing.
505 |     """
506 |     try:
507 |         n_chans = telstate['n_chans']
508 |         n_bls = telstate['n_bls']
509 |         n_chans_per_substream = telstate['n_chans_per_substream']
510 |     except KeyError:
511 |         logger.error("Unable to find sizing params (n_bls, n_chans, "
512 |                      "or n_chans_per_substream) in telstate.")
513 |         raise
514 | 
515 |     n_substreams = n_chans // n_chans_per_substream
516 |     return ((1,), (n_chans_per_substream,) * n_substreams, (n_bls,))
517 | 
518 | 
519 | def write_telstate(telstate: katsdptelstate.TelescopeState,
520 |                    input_name: str, output_name: str, rename_src: Mapping[str, str],
521 |                    s3_endpoint_url: Optional[str]) -> None:
522 |     """Write telstate information about output stream."""
523 |     telstate_out = telstate.view(output_name)
524 |     if output_name != input_name:
525 |         telstate_out['inherit'] = input_name
526 |         if rename_src:
527 |             telstate_in = telstate.view(input_name)
528 |             src_streams_in = telstate_in['src_streams']
529 |             src_streams_out = [rename_src.get(stream, stream) for stream in src_streams_in]
530 |             telstate_out['src_streams'] = src_streams_out
531 |     if s3_endpoint_url is not None:
532 |         telstate_out['s3_endpoint_url'] = s3_endpoint_url
533 | 
534 | 
535 | def make_receiver(endpoints: Sequence[Endpoint],
536 |                   arrays: Sequence[Array],
537 |                   interface_address: Optional[str],
538 |                   ibv: bool,
539 |                   max_heaps_per_substream: int = 2,
540 |                   ring_heaps_per_substream: int = 2) -> spead2.recv.asyncio.Stream:
541 |     """Generate a SPEAD receiver suitable for :class:`SpeadWriter`.
542 | 
543 |     Parameters
544 |     ----------
545 |     endpoints
546 |         Multicast UDP endpoints to subscribe to
547 |     arrays
548 |         Arrays that will arrive in each heap
549 |     interface_address
550 |         If given, IP address of a local interface to bind to
551 |     ibv
552 |         If true, use ibverbs acceleration (see SPEAD documentation)
553 |     max_heaps_per_substream
554 |         Number of simultaneously incomplete SPEAD heaps allowed per substream
555 |     ring_heaps_per_substream
556 |         Number of complete heaps allowed in the SPEAD ringbuffer, per substream
557 |     """
558 |     n_substreams = arrays[0].substreams
559 | 
560 |     max_heaps = max_heaps_per_substream * n_substreams
561 |     ring_heaps = ring_heaps_per_substream * n_substreams
562 |     n_memory_buffers = max_heaps + ring_heaps + 2
563 |     heap_size = sum(a.nbytes // a.substreams for a in arrays)
564 |     memory_pool = spead2.MemoryPool(heap_size, heap_size + 4096,
565 |                                     n_memory_buffers, n_memory_buffers)
566 |     rx = spead2.recv.asyncio.Stream(
567 |         spead2.ThreadPool(),
568 |         spead2.recv.StreamConfig(
569 |             max_heaps=max_heaps,
570 |             memory_allocator=memory_pool,
571 |             memcpy=spead2.MEMCPY_NONTEMPORAL,
572 |             stop_on_stop_item=False
573 |         ),
574 |         spead2.recv.RingStreamConfig(
575 |             heaps=ring_heaps,
576 |             contiguous_only=False
577 |         )
578 |     )
579 |     if ibv:
580 |         # The main scripts check this; the assert keeps mypy happy
581 |         assert interface_address is not None, "Interface address is required when using ibverbs"
582 |         endpoint_tuples = [(endpoint.host, endpoint.port) for endpoint in endpoints]
583 |         rx.add_udp_ibv_reader(
584 |             spead2.recv.UdpIbvConfig(
585 |                 endpoints=endpoint_tuples,
586 |                 interface_address=interface_address,
587 |                 buffer_size=64 * 1024**2
588 |             )
589 |         )
590 |     else:
591 |         for endpoint in endpoints:
592 |             if interface_address is not None:
593 |                 rx.add_udp_reader(endpoint.host, endpoint.port,
594 |                                   buffer_size=heap_size + 4096,
595 |                                   interface_address=interface_address)
596 |             else:
597 |                 rx.add_udp_reader(endpoint.port, bind_hostname=endpoint.host,
598 |                                   buffer_size=heap_size + 4096)
599 |     return rx
600 | 
601 | 
602 | class _DictAction(argparse.Action):
603 |     """Argparse action that takes argument of form KEY:VALUE and updates a dict with it.
604 | 
605 |     The input value is expected to be a 2-tuple, so the type must be one that
606 |     generates such a tuple.
607 |     """
608 |     def __init__(self, option_strings, dest, nargs=None, const=None, default=None,
609 |                  type=None, choices=None, required=False, help=None, metavar=None):
610 |         # This code is somewhat cargo-culted from _AppendAction in the argparse
611 |         # source.
612 |         if nargs == 0:
613 |             raise ValueError('nargs for dict action must be > 0')
614 |         if const is not None:
615 |             raise ValueError('const is not supported for dict action')
616 |         super().__init__(
617 |                 option_strings=option_strings,
618 |                 dest=dest,
619 |                 nargs=nargs,
620 |                 const=const,
621 |                 default=default,
622 |                 type=type,
623 |                 choices=choices,
624 |                 required=required,
625 |                 help=help,
626 |                 metavar=metavar)
627 | 
628 |     def __call__(self, parser, namespace, values, option_string=None):
629 |         d = getattr(namespace, self.dest, None)
630 |         if d is None:
631 |             d = {}
632 |         else:
633 |             d = copy.copy(d)
634 |         d.update([values])
635 |         setattr(namespace, self.dest, d)
636 | 
637 | 
638 | def _split_colon(value):
639 |     "Splits a KEY:VALUE string into its two parts"""
640 |     parts = value.split(':')
641 |     if len(parts) != 2:
642 |         raise argparse.ArgumentTypeError('Expected exactly one colon in {!r}'.format(value))
643 |     return parts
644 | 
645 | 
646 | def add_common_args(parser: katsdpservices.ArgumentParser) -> None:
647 |     """Inject command-line arguments that are common to the writers"""
648 |     group = parser.add_argument_group('Chunk store options')
649 |     group.add_argument('--npy-path', metavar='PATH',
650 |                        help='Write NPY files to this directory instead of '
651 |                             'directly to object store')
652 |     group.add_argument('--s3-endpoint-url', metavar='URL',
653 |                        help='URL of S3 endpoint')
654 |     group.add_argument('--s3-access-key', metavar='KEY',
655 |                        help='Access key for S3')
656 |     group.add_argument('--s3-secret-key', metavar='KEY',
657 |                        help='Secret key for S3')
658 |     group.add_argument('--s3-expiry-days', type=int, metavar='DAYS',
659 |                        help='Days after which to expire the data')
660 |     group.add_argument('--s3-write-url', metavar='URL',
661 |                        help='URL of S3 endpoint used for writing, overriding --s3-endpoint-url')
662 |     group.add_argument('--direct-write', action='store_true',
663 |                        help='Use O_DIRECT for writing to .npy files')
664 | 
665 |     group = parser.add_argument_group('Instrumentation options')
666 |     group.add_argument('--dashboard-port', type=int, metavar='PORT',
667 |                        help='Port for dashboard [disabled]')
668 |     group.add_argument('--external-hostname', default=socket.getfqdn(), metavar='HOSTNAME',
669 |                        help='Hostname through which the dashboard will be accessed [%(default)s]')
670 |     group.add_argument('--dashboard-allow-websocket-origin', action='append', metavar='ORIGIN',
671 |                        help='Origin at which the dashboard may be accessed'
672 |                        ' (may be repeated) [auto]')
673 | 
674 |     parser.add_argument('--new-name', metavar='NAME',
675 |                         help='Name for the output stream')
676 |     parser.add_argument('--rename-src', metavar='OLD-NAME:NEW-NAME',
677 |                         type=_split_colon, action=_DictAction,
678 |                         help='Rewrite src_streams for new name (repeat for each rename)')
679 |     parser.add_argument('--obj-size-mb', type=float, default=20., metavar='MB',
680 |                         help='Target object size in MB [%(default)s]')
681 |     parser.add_argument('--obj-max-channels', type=int, metavar='CHANNELS',
682 |                         help='Maximum number of channels per object [no limit]')
683 |     parser.add_argument('--obj-max-dumps', type=int, metavar='DUMPS', default=16,
684 |                         help='Maximum number of dumps per object [%(default)s]')
685 |     parser.add_argument('--workers', type=int, default=50,
686 |                         help='Threads to use for writing chunks [%(default)s]')
687 |     parser.add_argument('--buffer-dumps', type=int, default=20, metavar='DUMPS',
688 |                         help='Number of full dumps to buffer in write queue')
689 |     parser.add_aiomonitor_arguments()
690 |     parser.add_argument('-p', '--port', type=int, metavar='N',
691 |                         help='KATCP host port [%(default)s]')
692 |     parser.add_argument('-a', '--host', default="", metavar='HOST',
693 |                         help='KATCP host address [all hosts]')
694 | 
695 | 
696 | def chunk_store_from_args(parser: argparse.ArgumentParser,
697 |                           args: argparse.Namespace) -> katdal.chunkstore.ChunkStore:
698 |     """Create a chunk store from user-provided arguments.
699 | 
700 |     This checks that a consistent set of the arguments created by
701 |     :meth:`add_common_arguments` was given by the user. If not, it calls
702 |     ``parser.error`` (which terminates the process). Otherwise, it returns a
703 |     new chunk store (any exceptions from the chunk store constructor are passed
704 |     through.
705 |     """
706 |     if not args.npy_path:
707 |         for arg_name in ['s3_endpoint_url', 's3_access_key', 's3_secret_key']:
708 |             if not getattr(args, arg_name):
709 |                 parser.error('--{} is required if --npy-path is not given'
710 |                              .format(arg_name.replace('_', '-')))
711 |                 # Real parser.error kills the process, but the unit tests mock
712 |                 # it and so we want to ensure that we don't carry on.
713 |     else:
714 |         if args.s3_write_url:
715 |             parser.error("--s3-write-url and --npy-path cannot be used together")
716 |         if not os.path.isdir(args.npy_path):
717 |             parser.error("Specified --npy-path ({}) does not exist.".format(args.npy_path))
718 | 
719 |     if args.npy_path:
720 |         chunk_store = katdal.chunkstore_npy.NpyFileChunkStore(
721 |             args.npy_path, direct_write=args.direct_write)
722 |     else:
723 |         chunk_store = katdal.chunkstore_s3.S3ChunkStore(
724 |             args.s3_write_url or args.s3_endpoint_url,
725 |             credentials=(args.s3_access_key, args.s3_secret_key),
726 |             expiry_days=args.s3_expiry_days or 0)
727 |     return chunk_store
728 | 


--------------------------------------------------------------------------------