├── katsdpdatawriter ├── test │ ├── __init__.py │ ├── test_queue_space.py │ ├── test_writer.py │ ├── test_vis_writer.py │ ├── test_flag_writer.py │ ├── test_rechunk.py │ └── test_spead_write.py ├── __init__.py ├── queue_space.py ├── dashboard.py ├── flag_writer.py ├── vis_writer.py ├── rechunk.py └── spead_write.py ├── .flake8 ├── pyproject.toml ├── mypy.ini ├── .gitignore ├── test-requirements.txt ├── .pre-commit-config.yaml ├── Jenkinsfile ├── requirements.txt ├── setup.py ├── Dockerfile ├── LICENSE └── scripts ├── vis_writer.py └── flag_writer.py /katsdpdatawriter/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel", "katversion"] 3 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = True 3 | files = katsdpdatawriter, scripts 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .mypy_cache 2 | *.py[cod] 3 | __pycache__ 4 | *.egg-info 5 | pip-wheel-metadata 6 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | -c https://raw.githubusercontent.com/ska-sa/katsdpdockerbase/master/docker-base-build/base-requirements.txt 2 | 3 | asynctest 4 | coverage 5 | nose 6 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/PyCQA/flake8 3 | rev: 3.9.2 4 | hooks: 5 | - id: flake8 6 | - repo: https://github.com/pre-commit/mirrors-mypy 7 | rev: v0.780 8 | hooks: 9 | - id: mypy 10 | args: [] 11 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | #!groovy 2 | 3 | @Library('katsdpjenkins@master') _ 4 | 5 | katsdp.killOldJobs() 6 | katsdp.setDependencies(['ska-sa/katsdpdockerbase/master', 7 | 'ska-sa/katdal/master', 8 | 'ska-sa/katsdpservices/master', 9 | 'ska-sa/katsdptelstate/master']) 10 | katsdp.standardBuild(push_external: true) 11 | katsdp.mail('sdpdev+katsdpdatawriter@ska.ac.za') 12 | -------------------------------------------------------------------------------- /katsdpdatawriter/__init__.py: -------------------------------------------------------------------------------- 1 | # BEGIN VERSION CHECK 2 | # Get package version when locally imported from repo or via -e develop install 3 | try: 4 | import katversion as _katversion 5 | except ImportError: # pragma: no cover 6 | import time as _time 7 | __version__ = "0.0+unknown.{}".format(_time.strftime('%Y%m%d%H%M')) 8 | else: # pragma: no cover 9 | __version__ = _katversion.get_version(__path__[0]) # type: ignore 10 | # END VERSION CHECK 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -c https://raw.githubusercontent.com/ska-sa/katsdpdockerbase/master/docker-base-build/base-requirements.txt 2 | 3 | aiokatcp 4 | attrs 5 | bokeh 6 | hiredis # Speeds up katsdptelstate 7 | numpy 8 | spead2 9 | 10 | katdal[s3credentials] @ git+https://github.com/ska-sa/katdal 11 | katpoint @ git+https://github.com/ska-sa/katpoint 12 | katsdpservices[argparse,aiomonitor] @ git+https://github.com/ska-sa/katsdpservices 13 | katsdptelstate @ git+https://github.com/ska-sa/katsdptelstate 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from setuptools import setup, find_packages 3 | 4 | 5 | tests_require = ['asynctest', 'nose'] 6 | 7 | setup( 8 | name="katsdpdatawriter", 9 | description="MeerKAT data writer", 10 | author="MeerKAT SDP team", 11 | author_email="sdpdev+katsdpdatawriter@ska.ac.za", 12 | packages=find_packages(), 13 | scripts=[ 14 | "scripts/flag_writer.py", 15 | "scripts/vis_writer.py" 16 | ], 17 | setup_requires=["katversion"], 18 | install_requires=[ 19 | "aiokatcp>=0.7.0", # Needed for auto_strategy 20 | "spead2>=3.0.0", 21 | "katsdptelstate", 22 | "katsdpservices[argparse,aiomonitor]", 23 | "katdal[s3credentials]", 24 | "attrs", 25 | "numpy", 26 | "bokeh" 27 | ], 28 | extras_require={"test": tests_require}, 29 | tests_require=tests_require, 30 | use_katversion=True) 31 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG KATSDPDOCKERBASE_REGISTRY=harbor.sdp.kat.ac.za/dpp 2 | 3 | FROM $KATSDPDOCKERBASE_REGISTRY/docker-base-build as build 4 | 5 | # Switch to Python 3 environment 6 | ENV PATH="$PATH_PYTHON3" VIRTUAL_ENV="$VIRTUAL_ENV_PYTHON3" 7 | 8 | # Install dependencies 9 | COPY --chown=kat:kat requirements.txt /tmp/install/requirements.txt 10 | RUN install_pinned.py -r /tmp/install/requirements.txt 11 | 12 | # Install the current package 13 | COPY --chown=kat:kat . /tmp/install/katsdpdatawriter 14 | WORKDIR /tmp/install/katsdpdatawriter 15 | RUN python ./setup.py clean 16 | RUN pip install --no-deps . 17 | RUN pip check 18 | 19 | ####################################################################### 20 | 21 | FROM $KATSDPDOCKERBASE_REGISTRY/docker-base-runtime 22 | LABEL maintainer="sdpdev+katsdpdatawriter@ska.ac.za" 23 | 24 | COPY --from=build --chown=kat:kat /home/kat/ve3 /home/kat/ve3 25 | ENV PATH="$PATH_PYTHON3" VIRTUAL_ENV="$VIRTUAL_ENV_PYTHON3" 26 | 27 | # katcp for vis_writer 28 | EXPOSE 2046 29 | # katcp for flag_writer 30 | EXPOSE 2052 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2019, National Research Foundation (SARAO) 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 20 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /katsdpdatawriter/test/test_queue_space.py: -------------------------------------------------------------------------------- 1 | import asynctest 2 | from nose.tools import assert_true, assert_false 3 | 4 | from ..queue_space import QueueSpace 5 | 6 | 7 | class TestQueueSpace(asynctest.TestCase): 8 | def setUp(self): 9 | self.qs = QueueSpace(value=100, loop=self.loop) 10 | 11 | async def test_immediate(self): 12 | result = await self.qs.acquire(100) 13 | assert_true(result) 14 | 15 | async def test_block(self): 16 | task = self.loop.create_task(self.qs.acquire(200)) 17 | await asynctest.exhaust_callbacks(self.loop) 18 | assert_false(task.done()) 19 | self.qs.release(120) 20 | await asynctest.exhaust_callbacks(self.loop) 21 | assert_true(task.done()) 22 | assert_true(await task) 23 | 24 | async def test_cancel(self): 25 | task1 = self.loop.create_task(self.qs.acquire(200)) 26 | task2 = self.loop.create_task(self.qs.acquire(100)) 27 | await asynctest.exhaust_callbacks(self.loop) 28 | assert_false(task1.done()) 29 | task1.cancel() 30 | await asynctest.exhaust_callbacks(self.loop) 31 | assert_true(task2.done()) 32 | assert_true(await task2) 33 | 34 | async def test_release_multiple(self): 35 | task1 = self.loop.create_task(self.qs.acquire(200)) 36 | task2 = self.loop.create_task(self.qs.acquire(100)) 37 | await asynctest.exhaust_callbacks(self.loop) 38 | assert_false(task1.done()) 39 | self.qs.release(200) 40 | await asynctest.exhaust_callbacks(self.loop) 41 | assert_true(task1.done()) 42 | assert_true(task2.done()) 43 | assert_true(await task1) 44 | assert_true(await task2) 45 | -------------------------------------------------------------------------------- /katsdpdatawriter/queue_space.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections import deque 3 | 4 | 5 | class QueueSpace: 6 | """Manage space in a queue. 7 | 8 | This is logically similar to a semaphore, but allows the user to specify 9 | how much to acquire and release, rather than 1. It is first-come, 10 | first-served, so a large acquire will block the queue until there is 11 | space, even if there are later smaller acquires that could have been 12 | satisfied. 13 | """ 14 | def __init__(self, value: int = 0, *, loop: asyncio.AbstractEventLoop = None) -> None: 15 | self._loop = loop if loop is not None else asyncio.get_event_loop() 16 | self._value = value 17 | self._waiters = deque() # type: deque 18 | 19 | @property 20 | def value(self) -> int: 21 | """Currently available space.""" 22 | return self._value 23 | 24 | async def acquire(self, value: int) -> bool: 25 | if value <= self._value: 26 | self._value -= value 27 | return True 28 | future = self._loop.create_future() 29 | future.add_done_callback(self._cancel_handler) 30 | self._waiters.append((future, value)) 31 | await future 32 | return True 33 | 34 | def _wakeup(self): 35 | while self._waiters: 36 | if self._waiters[0][0].done(): 37 | # Can happen if it was cancelled 38 | self._waiters.popleft() 39 | elif self._waiters[0][1] <= self._value: 40 | future, req = self._waiters.popleft() 41 | self._value -= req 42 | future.set_result(None) 43 | else: 44 | break 45 | 46 | def _cancel_handler(self, future): 47 | if future.cancelled(): 48 | self._wakeup() # Give next requester a chance 49 | 50 | def release(self, value: int) -> None: 51 | self._value += value 52 | self._wakeup() 53 | 54 | def locked(self, value: int) -> bool: 55 | return value > self._value 56 | -------------------------------------------------------------------------------- /scripts/vis_writer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import asyncio 4 | import signal 5 | import logging 6 | 7 | import katsdpservices 8 | import katsdptelstate 9 | 10 | from katsdpdatawriter.vis_writer import VisibilityWriterServer 11 | from katsdpdatawriter.spead_write import add_common_args, chunk_store_from_args, ChunkParams 12 | from katsdpdatawriter.dashboard import make_dashboard, start_dashboard 13 | 14 | 15 | def on_shutdown(loop: asyncio.AbstractEventLoop, server: VisibilityWriterServer) -> None: 16 | # in case the exit code below borks, we allow shutdown via traditional means 17 | loop.remove_signal_handler(signal.SIGINT) 18 | loop.remove_signal_handler(signal.SIGTERM) 19 | server.halt() 20 | 21 | 22 | async def run(loop: asyncio.AbstractEventLoop, server: VisibilityWriterServer) -> None: 23 | await server.start() 24 | for sig in [signal.SIGINT, signal.SIGTERM]: 25 | loop.add_signal_handler(sig, lambda: on_shutdown(loop, server)) 26 | logger.info("Started visibility writer server.") 27 | await server.join() 28 | 29 | 30 | if __name__ == '__main__': 31 | katsdpservices.setup_logging() 32 | logger = logging.getLogger('vis_writer') 33 | katsdpservices.setup_restart() 34 | 35 | parser = katsdpservices.ArgumentParser() 36 | add_common_args(parser) 37 | parser.add_argument('--l0-spead', default=':7200', metavar='ENDPOINTS', 38 | type=katsdptelstate.endpoint.endpoint_list_parser(7200), 39 | help='Source port/multicast groups for L0 SPEAD stream. ' 40 | '[default=%(default)s]') 41 | parser.add_argument('--l0-interface', metavar='INTERFACE', 42 | help='Network interface to subscribe to for L0 stream. ' 43 | '[default=auto]') 44 | parser.add_argument('--l0-name', default='sdp_l0', metavar='NAME', 45 | help='Name of L0 stream from ingest [default=%(default)s]') 46 | parser.add_argument('--l0-ibv', action='store_true', 47 | help='Use ibverbs acceleration to receive L0 stream [default=no]') 48 | parser.set_defaults(telstate='localhost', port=2046) 49 | args = parser.parse_args() 50 | 51 | if args.l0_ibv and args.l0_interface is None: 52 | parser.error('--l0-ibv requires --l0-interface') 53 | if args.rename_src and args.new_name is None: 54 | parser.error('--rename-src requires --new-name') 55 | 56 | # Connect to object store 57 | chunk_store = chunk_store_from_args(parser, args) 58 | loop = asyncio.get_event_loop() 59 | server = VisibilityWriterServer(args.host, args.port, loop, args.l0_spead, 60 | args.l0_interface, args.l0_ibv, 61 | chunk_store, ChunkParams.from_args(args), 62 | args.telstate, 63 | args.l0_name, 64 | args.new_name if args.new_name is not None else args.l0_name, 65 | args.rename_src, 66 | args.s3_endpoint_url, 67 | args.workers, args.buffer_dumps) 68 | if args.dashboard_port is not None: 69 | dashboard = make_dashboard(server.sensors) 70 | start_dashboard(dashboard, args) 71 | 72 | with katsdpservices.start_aiomonitor(loop, args, locals()): 73 | loop.run_until_complete(run(loop, server)) 74 | loop.close() 75 | -------------------------------------------------------------------------------- /katsdpdatawriter/test/test_writer.py: -------------------------------------------------------------------------------- 1 | """Base functionality for :mod:`test_vis_writer` and :mod:`test_flag_writer`""" 2 | 3 | from unittest import mock 4 | import asyncio 5 | 6 | import asynctest 7 | 8 | import katsdptelstate 9 | from katsdptelstate.endpoint import Endpoint 10 | import aiokatcp 11 | import spead2 12 | import spead2.recv.asyncio 13 | import spead2.send.asyncio 14 | from nose.tools import assert_equal, assert_in 15 | 16 | 17 | class BaseTestWriterServer(asynctest.TestCase): 18 | @classmethod 19 | def setup_telstate(cls, namespace: str) -> katsdptelstate.TelescopeState: 20 | telstate = katsdptelstate.TelescopeState().view(namespace) 21 | n_ants = 3 22 | telstate['n_chans'] = 4096 23 | telstate['n_chans_per_substream'] = 1024 24 | telstate['n_bls'] = n_ants * (n_ants + 1) * 2 25 | return telstate 26 | 27 | def setup_sleep(self) -> None: 28 | """Patch loop.call_later so that delayed callbacks run immediately. 29 | 30 | This speeds up the tests where the code under test has a 5s timeout. 31 | """ 32 | def call_later(delay, callback, *args): 33 | return self.loop.call_soon(callback, *args) 34 | 35 | patcher = mock.patch.object(self.loop, 'call_later', call_later) 36 | patcher.start() 37 | self.addCleanup(patcher.stop) 38 | 39 | def setup_spead(self) -> None: 40 | def add_udp_reader(stream, host: str, port: int, *args, **kwargs) -> None: 41 | queue = self.inproc_queues[Endpoint(host, port)] 42 | stream.add_inproc_reader(queue) 43 | 44 | self.endpoints = [Endpoint('239.102.254.{}'.format(i), 7148) for i in range(4)] 45 | self.inproc_queues = {endpoint: spead2.InprocQueue() for endpoint in self.endpoints} 46 | tx_pool = spead2.ThreadPool() 47 | self.tx = [spead2.send.asyncio.InprocStream(tx_pool, [self.inproc_queues[endpoint]]) 48 | for endpoint in self.endpoints] 49 | patcher = mock.patch('spead2.recv.asyncio.Stream.add_udp_reader', add_udp_reader) 50 | patcher.start() 51 | self.addCleanup(patcher.stop) 52 | 53 | async def get(stream): 54 | heap = await orig_get(stream) 55 | self.received_heaps.release() 56 | return heap 57 | 58 | self.received_heaps = asyncio.Semaphore(value=0, loop=self.loop) 59 | orig_get = spead2.recv.asyncio.Stream.get 60 | patcher = mock.patch('spead2.recv.asyncio.Stream.get', get) # type: ignore 61 | patcher.start() 62 | self.addCleanup(patcher.stop) 63 | 64 | async def setup_client(self, server: aiokatcp.DeviceServer) -> aiokatcp.Client: 65 | assert server.server is not None, "Server has not been started" 66 | # mypy doesn't know about asyncio.base_events.Server, which has the 'sockets' member 67 | port = server.server.sockets[0].getsockname()[1] # type: ignore 68 | client = await aiokatcp.Client.connect('localhost', port) 69 | self.addCleanup(client.wait_closed) 70 | self.addCleanup(client.close) 71 | return client 72 | 73 | def assert_sensor_equals(self, name, value, status=frozenset([aiokatcp.Sensor.Status.NOMINAL])): 74 | assert_equal(self.server.sensors[name].value, value) 75 | assert_in(self.server.sensors[name].status, status) 76 | 77 | async def send_heap(self, tx, heap): 78 | """Send a heap and wait for it to be received. 79 | 80 | .. note:: This only works if all heaps are sent through this interface. 81 | """ 82 | assert self.received_heaps.locked() 83 | await tx.async_send_heap(heap) 84 | # The above just waits until it's been transmitted into the inproc 85 | # queue, but we want to wait until it's come out the other end. 86 | await self.received_heaps.acquire() 87 | -------------------------------------------------------------------------------- /scripts/flag_writer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Capture L1 flags from the SPEAD stream(s) produced by cal. 3 | 4 | We adopt a naive strategy and store the flags for each heap in a single 5 | object. These objects will be later picked up by the trawler process 6 | and inserted into the appropriate bucket in S3 from where they will be 7 | picked up by katdal. 8 | 9 | """ 10 | 11 | import logging 12 | import signal 13 | import asyncio 14 | 15 | import katsdptelstate 16 | import katsdpservices 17 | 18 | from katsdpdatawriter.flag_writer import FlagWriterServer 19 | from katsdpdatawriter.spead_write import add_common_args, chunk_store_from_args, ChunkParams 20 | from katsdpdatawriter.dashboard import make_dashboard, start_dashboard 21 | 22 | 23 | def on_shutdown(loop: asyncio.AbstractEventLoop, server: FlagWriterServer) -> None: 24 | # in case the exit code below borks, we allow shutdown via traditional means 25 | loop.remove_signal_handler(signal.SIGINT) 26 | loop.remove_signal_handler(signal.SIGTERM) 27 | server.halt() 28 | 29 | 30 | async def run(loop: asyncio.AbstractEventLoop, server: FlagWriterServer) -> None: 31 | await server.start() 32 | for sig in [signal.SIGINT, signal.SIGTERM]: 33 | loop.add_signal_handler(sig, lambda: on_shutdown(loop, server)) 34 | logger.info("Started flag writer server.") 35 | await server.join() 36 | 37 | 38 | if __name__ == '__main__': 39 | katsdpservices.setup_logging() 40 | logger = logging.getLogger("flag_writer") 41 | katsdpservices.setup_restart() 42 | 43 | parser = katsdpservices.ArgumentParser() 44 | add_common_args(parser) 45 | parser.add_argument('--flags-spead', default=':7202', metavar='ENDPOINTS', 46 | type=katsdptelstate.endpoint.endpoint_list_parser(7202), 47 | help='Source port/multicast groups for flags SPEAD streams. ' 48 | '[default=%(default)s]') 49 | parser.add_argument('--flags-interface', metavar='INTERFACE', 50 | help='Network interface to subscribe to for flag streams. ' 51 | '[default=auto]') 52 | parser.add_argument('--flags-name', type=str, default='sdp_l1_flags', 53 | help='name for the flags stream. [default=%(default)s]', metavar='NAME') 54 | parser.add_argument('--flags-ibv', action='store_true', 55 | help='Use ibverbs acceleration to receive flags') 56 | parser.set_defaults(telstate='localhost', port=2052) 57 | 58 | args = parser.parse_args() 59 | if args.telstate is None: 60 | parser.error('--telstate is required') 61 | if args.flags_ibv and args.flags_interface is None: 62 | parser.error("--flags-ibv requires --flags-interface") 63 | if args.rename_src and args.new_name is None: 64 | parser.error('--rename-src requires --new-name') 65 | 66 | chunk_store = chunk_store_from_args(parser, args) 67 | loop = asyncio.get_event_loop() 68 | # The type: ignore below is because mypy doesn't know that parser.error never returns 69 | server = FlagWriterServer(args.host, args.port, loop, args.flags_spead, 70 | args.flags_interface, args.flags_ibv, 71 | chunk_store, ChunkParams.from_args(args), 72 | args.telstate, # type: ignore 73 | args.flags_name, 74 | args.new_name if args.new_name is not None else args.flags_name, 75 | args.rename_src, args.s3_endpoint_url, 76 | args.workers, args.buffer_dumps) 77 | if args.dashboard_port is not None: 78 | dashboard = make_dashboard(server.sensors) 79 | start_dashboard(dashboard, args) 80 | 81 | with katsdpservices.start_aiomonitor(loop, args, locals()): 82 | loop.run_until_complete(run(loop, server)) 83 | loop.close() 84 | -------------------------------------------------------------------------------- /katsdpdatawriter/test/test_vis_writer.py: -------------------------------------------------------------------------------- 1 | """Tests for :mod:`katsdpdatawriter.vis_writer`.""" 2 | 3 | import tempfile 4 | import shutil 5 | from unittest import mock 6 | 7 | import numpy as np 8 | import katdal.chunkstore_npy 9 | import spead2.send.asyncio 10 | from aiokatcp import FailReply, Sensor 11 | from nose.tools import assert_equal, assert_raises_regex, assert_true, assert_in 12 | 13 | from ..vis_writer import VisibilityWriterServer, Status 14 | from ..spead_write import DeviceStatus, ChunkParams 15 | from .test_writer import BaseTestWriterServer 16 | 17 | 18 | class TestVisWriterServer(BaseTestWriterServer): 19 | async def setup_server(self, **arg_overrides) -> VisibilityWriterServer: 20 | args = dict( 21 | host='127.0.0.1', port=0, loop=self.loop, endpoints=self.endpoints, 22 | interface='lo', ibv=False, chunk_store=self.chunk_store, 23 | chunk_params=ChunkParams(10000), 24 | telstate=self.telstate.root(), 25 | input_name='sdp_l0', output_name='sdp_l0', rename_src={}, 26 | s3_endpoint_url=None, max_workers=4, buffer_dumps=2) 27 | args.update(arg_overrides) 28 | server = VisibilityWriterServer(**args) 29 | await server.start() 30 | self.addCleanup(server.stop) 31 | return server 32 | 33 | def setup_ig(self) -> spead2.send.ItemGroup: 34 | n_chans_per_substream = self.telstate['n_chans_per_substream'] 35 | n_bls = self.telstate['n_bls'] 36 | shape = (n_chans_per_substream, n_bls) 37 | vis = np.zeros(shape, np.complex64) 38 | flags = np.random.randint(0, 256, shape, np.uint8) 39 | weights = np.random.randint(0, 256, shape, np.uint8) 40 | weights_channel = np.random.random(n_chans_per_substream).astype(np.float32) 41 | # Adapted from katsdpingest/sender.py 42 | ig = spead2.send.ItemGroup() 43 | ig.add_item(id=None, name='correlator_data', 44 | description="Visibilities", 45 | shape=(n_chans_per_substream, n_bls), dtype=np.complex64, 46 | value=vis) 47 | ig.add_item(id=None, name='flags', 48 | description="Flags for visibilities", 49 | shape=(n_chans_per_substream, n_bls), dtype=np.uint8, 50 | value=flags) 51 | ig.add_item(id=None, name='weights', 52 | description="Detailed weights, to be scaled by weights_channel", 53 | shape=(n_chans_per_substream, n_bls), dtype=np.uint8, 54 | value=weights) 55 | ig.add_item(id=None, name='weights_channel', 56 | description="Coarse (per-channel) weights", 57 | shape=(n_chans_per_substream,), dtype=np.float32, 58 | value=weights_channel) 59 | ig.add_item(id=None, name='timestamp', 60 | description="Seconds since CBF sync time", 61 | shape=(), dtype=None, format=[('f', 64)], 62 | value=100.0) 63 | ig.add_item(id=None, name='dump_index', 64 | description='Index in time', 65 | shape=(), dtype=None, format=[('u', 64)], 66 | value=1) 67 | ig.add_item(id=0x4103, name='frequency', 68 | description="Channel index of first channel in the heap", 69 | shape=(), dtype=np.uint32, 70 | value=0) 71 | return ig 72 | 73 | async def setUp(self) -> None: 74 | npy_path = tempfile.mkdtemp() 75 | self.addCleanup(shutil.rmtree, npy_path) 76 | self.chunk_store = katdal.chunkstore_npy.NpyFileChunkStore(npy_path) 77 | self.telstate = self.setup_telstate('sdp_l0') 78 | self.telstate['src_streams'] = ['i0_baseline_correlation_products'] 79 | self.setup_sleep() 80 | self.setup_spead() 81 | self.server = await self.setup_server() 82 | self.client = await self.setup_client(self.server) 83 | self.ig = self.setup_ig() 84 | 85 | async def test_capture(self, output_name: str = 'sdp_l0') -> None: 86 | cbid = '1234567890' 87 | self.assert_sensor_equals('status', Status.IDLE) 88 | await self.client.request('capture-init', cbid) 89 | self.assert_sensor_equals('status', Status.WAIT_DATA) 90 | for tx in self.tx: 91 | await self.send_heap(tx, self.ig.get_start()) 92 | await self.send_heap(self.tx[0], self.ig.get_heap()) 93 | self.assert_sensor_equals('status', Status.CAPTURING) 94 | self.assert_sensor_equals('input-heaps-total', 1) 95 | for tx in self.tx: 96 | await self.send_heap(tx, self.ig.get_end()) 97 | # The writes to chunkstore happen in other threads, so the state here 98 | # depends on timing. 99 | assert_in(self.server.sensors['status'].value, {Status.FINALISING, Status.COMPLETE}) 100 | await self.client.request('capture-done') 101 | self.assert_sensor_equals('status', Status.IDLE) 102 | capture_stream = '{}_{}'.format(cbid, output_name) 103 | prefix = capture_stream.replace('_', '-') 104 | assert_true(self.chunk_store.is_complete(prefix)) 105 | 106 | async def test_new_name(self) -> None: 107 | # Replace the client+server to use new arguments 108 | output_name = 'sdp_l0_new' 109 | s3_endpoint_url = 'http://sdp_l0_new.invalid/' 110 | await self.server.stop() 111 | self.server = await self.setup_server(output_name=output_name, 112 | s3_endpoint_url=s3_endpoint_url) 113 | self.client = await self.setup_client(self.server) 114 | # Run the test 115 | await self.test_capture(output_name) 116 | telstate_output = self.telstate.root().view(output_name) 117 | assert_equal(telstate_output['s3_endpoint_url'], s3_endpoint_url) 118 | assert_equal(telstate_output['inherit'], 'sdp_l0') 119 | 120 | async def test_failed_write(self) -> None: 121 | cbid = '1234567890' 122 | with mock.patch.object(katdal.chunkstore_npy.NpyFileChunkStore, 'put_chunk', 123 | side_effect=katdal.chunkstore.StoreUnavailable): 124 | await self.client.request('capture-init', cbid) 125 | for tx in self.tx: 126 | await self.send_heap(tx, self.ig.get_start()) 127 | await self.send_heap(self.tx[0], self.ig.get_heap()) 128 | await self.client.request('capture-done') 129 | self.assert_sensor_equals('device-status', DeviceStatus.FAIL, {Sensor.Status.ERROR}) 130 | 131 | async def test_missing_stop_item(self) -> None: 132 | cbid = '1234567890' 133 | await self.client.request('capture-init', cbid) 134 | for tx in self.tx: 135 | await self.send_heap(tx, self.ig.get_start()) 136 | await self.send_heap(self.tx[0], self.ig.get_heap()) 137 | for tx in self.tx[:-1]: 138 | await self.send_heap(tx, self.ig.get_end()) 139 | await self.client.request('capture-done') 140 | 141 | async def test_double_init(self) -> None: 142 | await self.client.request('capture-init', '1234567890') 143 | with assert_raises_regex(FailReply, '(?i)already capturing'): 144 | await self.client.request('capture-init', '9876543210') 145 | 146 | async def test_done_without_init(self) -> None: 147 | with assert_raises_regex(FailReply, '(?i)not capturing'): 148 | await self.client.request('capture-done') 149 | -------------------------------------------------------------------------------- /katsdpdatawriter/test/test_flag_writer.py: -------------------------------------------------------------------------------- 1 | """Tests for :mod:`katsdpdatawriter.flag_writer`.""" 2 | 3 | import tempfile 4 | import shutil 5 | from unittest import mock 6 | from typing import Dict, Any 7 | 8 | import numpy as np 9 | from nose.tools import (assert_equal, assert_true, 10 | assert_regex, assert_raises_regex, assert_logs) 11 | 12 | import aiokatcp 13 | from aiokatcp import Sensor 14 | import spead2 15 | import spead2.send.asyncio 16 | import katdal.chunkstore 17 | from katdal.chunkstore_npy import NpyFileChunkStore 18 | 19 | from ..flag_writer import FlagWriterServer, Status 20 | from ..spead_write import DeviceStatus, ChunkParams 21 | from .test_writer import BaseTestWriterServer 22 | 23 | 24 | class TestFlagWriterServer(BaseTestWriterServer): 25 | async def setup_server(self, **arg_overrides) -> FlagWriterServer: 26 | args = dict( 27 | host='127.0.0.1', port=0, loop=self.loop, endpoints=self.endpoints, 28 | flag_interface='lo', flags_ibv=False, 29 | chunk_store=self.chunk_store, chunk_params=self.chunk_params, 30 | telstate=self.telstate.root(), 31 | input_name='sdp_l1_flags', output_name='sdp_l1_flags', rename_src={}, 32 | s3_endpoint_url=None, max_workers=4, buffer_dumps=2) 33 | args.update(arg_overrides) 34 | server = FlagWriterServer(**args) 35 | await server.start() 36 | self.addCleanup(server.stop) 37 | return server 38 | 39 | def setup_ig(self) -> spead2.send.ItemGroup: 40 | self.cbid = '1234567890' 41 | n_chans_per_substream = self.telstate['n_chans_per_substream'] 42 | n_bls = self.telstate['n_bls'] 43 | flags = np.random.randint(0, 256, (n_chans_per_substream, n_bls), np.uint8) 44 | 45 | ig = spead2.send.ItemGroup() 46 | # This is copied and adapted from katsdpcal 47 | ig.add_item(id=None, name='flags', description="Flags for visibilities", 48 | shape=(self.telstate['n_chans_per_substream'], self.telstate['n_bls']), 49 | dtype=None, format=[('u', 8)], value=flags) 50 | ig.add_item(id=None, name='timestamp', description="Seconds since sync time", 51 | shape=(), dtype=None, format=[('f', 64)], value=100.0) 52 | ig.add_item(id=None, name='dump_index', description='Index in time', 53 | shape=(), dtype=None, format=[('u', 64)], value=0) 54 | ig.add_item(id=0x4103, name='frequency', 55 | description="Channel index of first channel in the heap", 56 | shape=(), dtype=np.uint32, value=0) 57 | ig.add_item(id=None, name='capture_block_id', description='SDP capture block ID', 58 | shape=(None,), dtype=None, format=[('c', 8)], value=self.cbid) 59 | return ig 60 | 61 | async def stop_server(self) -> None: 62 | for queue in self.inproc_queues.values(): 63 | queue.stop() 64 | await self.server.stop() 65 | 66 | async def setUp(self) -> None: 67 | self.npy_path = tempfile.mkdtemp() 68 | self.addCleanup(shutil.rmtree, self.npy_path) 69 | self.chunk_store = NpyFileChunkStore(self.npy_path) 70 | self.telstate = self.setup_telstate('sdp_l1_flags') 71 | self.telstate['src_streams'] = ['sdp_l0'] 72 | self.chunk_channels = 128 73 | self.chunk_params = ChunkParams(self.telstate['n_bls'] * self.chunk_channels, 74 | self.chunk_channels) 75 | self.setup_sleep() 76 | self.setup_spead() 77 | self.server = await self.setup_server() 78 | self.client = await self.setup_client(self.server) 79 | self.ig = self.setup_ig() 80 | 81 | def _check_chunk_info(self, output_name: str = 'sdp_l1_flags') -> Dict[str, Any]: 82 | n_chans = self.telstate['n_chans'] 83 | n_bls = self.telstate['n_bls'] 84 | capture_stream = '{}_{}'.format(self.cbid, output_name) 85 | 86 | view = self.telstate.root().view(capture_stream) 87 | chunk_info = view['chunk_info'] 88 | n_chunks = n_chans // self.chunk_channels 89 | assert_equal( 90 | chunk_info, 91 | { 92 | 'flags': { 93 | 'prefix': capture_stream.replace('_', '-'), 94 | 'shape': (1, n_chans, n_bls), 95 | 'chunks': ((1,), (self.chunk_channels,) * n_chunks, (n_bls,)), 96 | 'dtype': np.dtype(np.uint8) 97 | } 98 | }) 99 | return chunk_info['flags'] 100 | 101 | async def test_capture(self, output_name: str = 'sdp_l1_flags') -> None: 102 | n_chans_per_substream = self.telstate['n_chans_per_substream'] 103 | self.assert_sensor_equals('status', Status.WAIT_DATA) 104 | self.assert_sensor_equals('capture-block-state', '{}') 105 | 106 | await self.client.request('capture-init', self.cbid) 107 | self.assert_sensor_equals('capture-block-state', '{"%s": "CAPTURING"}' % self.cbid) 108 | 109 | await self.send_heap(self.tx[0], self.ig.get_heap()) 110 | self.assert_sensor_equals('status', Status.CAPTURING) 111 | 112 | await self.client.request('capture-done', self.cbid) 113 | self.assert_sensor_equals('status', Status.CAPTURING) # Should still be capturing 114 | self.assert_sensor_equals('capture-block-state', '{}') 115 | await self.stop_server() 116 | capture_stream = '{}_{}'.format(self.cbid, output_name) 117 | prefix = capture_stream.replace('_', '-') 118 | assert_true(self.chunk_store.is_complete(prefix)) 119 | 120 | # Validate the data written 121 | chunk_info = self._check_chunk_info(output_name) 122 | data = self.chunk_store.get_dask_array( 123 | self.chunk_store.join(chunk_info['prefix'], 'flags'), 124 | chunk_info['chunks'], chunk_info['dtype']).compute() 125 | n_chans_per_substream = self.telstate['n_chans_per_substream'] 126 | np.testing.assert_array_equal(self.ig['flags'].value[np.newaxis], 127 | data[:, :n_chans_per_substream, :]) 128 | np.testing.assert_equal(0, data[:, n_chans_per_substream:, :]) 129 | 130 | async def test_new_name(self) -> None: 131 | # Replace client and server with different args 132 | output_name = 'sdp_l1_flags_new' 133 | rename_src = {'sdp_l0': 'sdp_l0_new'} 134 | s3_endpoint_url = 'http://new.invalid/' 135 | await self.server.stop() 136 | self.server = await self.setup_server(output_name=output_name, 137 | rename_src=rename_src, 138 | s3_endpoint_url=s3_endpoint_url) 139 | self.client = await self.setup_client(self.server) 140 | await self.test_capture(output_name) 141 | telstate_output = self.telstate.root().view(output_name) 142 | assert_equal(telstate_output['inherit'], 'sdp_l1_flags') 143 | assert_equal(telstate_output['s3_endpoint_url'], s3_endpoint_url) 144 | assert_equal(telstate_output['src_streams'], ['sdp_l0_new']) 145 | 146 | async def test_failed_write(self) -> None: 147 | with mock.patch.object(NpyFileChunkStore, 'put_chunk', 148 | side_effect=katdal.chunkstore.StoreUnavailable): 149 | await self.client.request('capture-init', self.cbid) 150 | await self.send_heap(self.tx[0], self.ig.get_heap()) 151 | await self.client.request('capture-done', self.cbid) 152 | self._check_chunk_info() 153 | self.assert_sensor_equals('device-status', DeviceStatus.FAIL, {Sensor.Status.ERROR}) 154 | 155 | async def test_double_init(self) -> None: 156 | await self.client.request('capture-init', self.cbid) 157 | with assert_raises_regex(aiokatcp.FailReply, 'already active'): 158 | await self.client.request('capture-init', self.cbid) 159 | self.assert_sensor_equals('capture-block-state', '{"%s": "CAPTURING"}' % self.cbid) 160 | 161 | async def test_done_without_init(self) -> None: 162 | with assert_raises_regex(aiokatcp.FailReply, 'unknown'): 163 | await self.client.request('capture-done', self.cbid) 164 | 165 | async def test_no_data(self) -> None: 166 | self.assert_sensor_equals('capture-block-state', '{}') 167 | await self.client.request('capture-init', self.cbid) 168 | self.assert_sensor_equals('capture-block-state', '{"%s": "CAPTURING"}' % self.cbid) 169 | with assert_logs('katsdpdatawriter.flag_writer', 'WARNING'): 170 | await self.client.request('capture-done', self.cbid) 171 | self.assert_sensor_equals('capture-block-state', '{}') 172 | 173 | async def test_data_after_done(self) -> None: 174 | await self.client.request('capture-init', self.cbid) 175 | await self.client.request('capture-done', self.cbid) 176 | with assert_logs('katsdpdatawriter.flag_writer', 'WARNING') as cm: 177 | await self.send_heap(self.tx[0], self.ig.get_heap()) 178 | assert_regex(cm.output[0], 'outside of init/done') 179 | -------------------------------------------------------------------------------- /katsdpdatawriter/dashboard.py: -------------------------------------------------------------------------------- 1 | """Bokeh dashboard showing real-time metrics""" 2 | 3 | from datetime import datetime, timedelta 4 | import logging 5 | import functools 6 | from collections import deque 7 | from weakref import WeakSet 8 | import argparse 9 | from typing import Mapping, MutableSet, List, Callable, Iterable # noqa: F401 10 | 11 | import numpy as np 12 | 13 | from aiokatcp import Sensor, Reading 14 | 15 | from bokeh.document import Document 16 | from bokeh.application.handlers.handler import Handler 17 | from bokeh.models import ColumnDataSource, DataRange1d 18 | from bokeh.layouts import gridplot 19 | from bokeh.plotting import figure 20 | from bokeh.palettes import Category10 21 | from bokeh.server.server import Server 22 | from bokeh.application.application import Application 23 | 24 | 25 | logger = logging.getLogger(__name__) 26 | PALETTE = Category10[10] 27 | 28 | 29 | def _convert_timestamp(posix_timestamp: float) -> datetime: 30 | return datetime.utcfromtimestamp(posix_timestamp) 31 | 32 | 33 | class Watcher: 34 | """Observe and collect data for a single sensor 35 | 36 | Refer to :class:`Dashboard` for the meaning of `window` and `rollover`. 37 | """ 38 | def __init__(self, dashboard: 'Dashboard', sensor: Sensor, 39 | window: float, rollover: int) -> None: 40 | self.dashboard = dashboard 41 | self.sensor = sensor 42 | self.window = window 43 | self.rollover = rollover 44 | # TODO: use typing.Deque in type hint after migration to Python 3.6 45 | self._readings = deque() # type: deque 46 | self.sensor.attach(self._update) 47 | self._update(self.sensor, self.sensor.reading) 48 | 49 | def close(self) -> None: 50 | self.sensor.detach(self._update) 51 | 52 | def _update(self, sensor: Sensor, reading: Reading[float]) -> None: 53 | self._readings.append(reading) 54 | if (self._readings[-1].timestamp - self._readings[0].timestamp > self.window 55 | or len(self._readings) > self.rollover): 56 | self._readings.popleft() 57 | 58 | 59 | class LineWatcher(Watcher): 60 | """Watcher for drawing line graphs""" 61 | def make_data_source(self) -> ColumnDataSource: 62 | data = { 63 | 'time': [_convert_timestamp(reading.timestamp) for reading in self._readings], 64 | 'value': [reading.value for reading in self._readings] 65 | } 66 | return ColumnDataSource(data, name='data_source ' + self.sensor.name) 67 | 68 | def _update(self, sensor: Sensor, reading: Reading[float]) -> None: 69 | super()._update(sensor, reading) 70 | update = { 71 | 'time': [_convert_timestamp(reading.timestamp)], 72 | 'value': [reading.value] 73 | } 74 | name = 'data_source ' + sensor.name 75 | 76 | def doc_update(doc): 77 | data_source = doc.get_model_by_name(name) 78 | data_source.stream(update, rollover=len(self._readings)) 79 | 80 | self.dashboard.update_documents(doc_update) 81 | 82 | 83 | class HistogramWatcher(Watcher): 84 | def make_data_source(self) -> ColumnDataSource: 85 | return ColumnDataSource(self._data, name='data_source ' + self.sensor.name) 86 | 87 | def _update(self, sensor: Sensor, reading: Reading[float]) -> None: 88 | super()._update(sensor, reading) 89 | values = [reading.value for reading in self._readings] 90 | # Based on https://bokeh.pydata.org/en/latest/docs/gallery/histogram.html 91 | hist, edges = np.histogram(values, bins='auto') 92 | self._data = { 93 | 'top': hist, 94 | 'bottom': [0] * len(hist), 95 | 'left': edges[:-1], 96 | 'right': edges[1:] 97 | } 98 | name = 'data_source ' + sensor.name 99 | 100 | def doc_update(doc): 101 | data_source = doc.get_model_by_name(name) 102 | data_source.data = self._data 103 | 104 | self.dashboard.update_documents(doc_update) 105 | 106 | 107 | class Dashboard(Handler): 108 | """Bokeh dashboard showing sensor values. 109 | 110 | Sensor values are recorded and displayed through graphs. To keep the 111 | graph size down (more to avoid overloading the browser/network than for 112 | memory constraints), old values are discarded once either they are 113 | older than `window` or there are more than `rollover` samples. 114 | 115 | Parameters 116 | ---------- 117 | line_sensors 118 | Sensors to display as line graphs. Each element is a list of sensors 119 | to plot on a single graph. 120 | histogram_sensors 121 | Sensors to display as histograms. Each sensor update contributes one 122 | entry on the histogram. 123 | window 124 | Maximum length of time (in seconds) to keep samples. 125 | rollover 126 | Maximum number of samples to keep (per sensor). 127 | """ 128 | def __init__(self, 129 | line_sensors: Iterable[Iterable[Sensor]], 130 | histogram_sensors: Iterable[Sensor], 131 | window: float = 1200.0, rollover: int = 10000) -> None: 132 | super().__init__() 133 | self._line_watchers = [] # type: List[List[LineWatcher]] 134 | self._histogram_watchers = [] # type: List[HistogramWatcher] 135 | self._docs = WeakSet() # type: MutableSet[Document] 136 | for sensors in line_sensors: 137 | watchers = [LineWatcher(self, sensor, window, rollover) for sensor in sensors] 138 | self._line_watchers.append(watchers) 139 | for sensor in histogram_sensors: 140 | watcher = HistogramWatcher(self, sensor, window, rollover) 141 | self._histogram_watchers.append(watcher) 142 | 143 | def modify_document(self, doc: Document) -> None: 144 | plots = [] 145 | line_renderers = [] # type: List 146 | for watchers in self._line_watchers: 147 | plot = figure(plot_width=350, plot_height=350, 148 | x_axis_label='time', x_axis_type='datetime', y_axis_label='value') 149 | for i, line_watcher in enumerate(watchers): 150 | data_source = line_watcher.make_data_source() 151 | plot.step('time', 'value', source=data_source, mode='after', 152 | legend=line_watcher.sensor.name, 153 | color=PALETTE[i]) 154 | plot.legend.location = 'top_left' 155 | plots.append(plot) 156 | line_renderers.extend(plot.x_range.renderers) 157 | # Create a single data range so that all line plots show the same time window 158 | data_range = DataRange1d() 159 | data_range.renderers = line_renderers 160 | data_range.follow = 'end' 161 | data_range.default_span = timedelta(seconds=1) 162 | data_range.follow_interval = timedelta(seconds=120) 163 | for plot in plots: 164 | plot.x_range = data_range 165 | 166 | for histogram_watcher in self._histogram_watchers: 167 | plot = figure(plot_width=350, plot_height=350, 168 | x_axis_label=histogram_watcher.sensor.name, 169 | y_axis_label='frequency') 170 | data_source = histogram_watcher.make_data_source() 171 | plot.quad(top='top', bottom='bottom', left='left', right='right', 172 | source=data_source) 173 | plots.append(plot) 174 | 175 | doc.add_root(gridplot(plots, ncols=3)) 176 | logger.debug('Created document with %d plots', len(plots)) 177 | self._docs.add(doc) 178 | 179 | def on_server_unloaded(self, server_context) -> None: 180 | for watchers in self._line_watchers: 181 | for line_watcher in watchers: 182 | line_watcher.close() 183 | for histogram_watcher in self._histogram_watchers: 184 | histogram_watcher.close() 185 | self._line_watchers.clear() 186 | self._histogram_watchers.clear() 187 | 188 | def update_documents(self, callback: Callable[[Document], None]) -> None: 189 | for doc in self._docs: 190 | doc.add_next_tick_callback(functools.partial(callback, doc)) 191 | 192 | 193 | def make_dashboard(sensors: Mapping[str, Sensor]) -> Dashboard: 194 | """Build a dashboard using a standard set of sensors""" 195 | line_sensors = [ 196 | [sensors['active-chunks']], 197 | [sensors['queued-bytes']], 198 | [sensors['output-seconds-total']], 199 | [sensors['output-chunks-total']], 200 | [sensors['input-bytes-total'], sensors['output-bytes-total']], 201 | [sensors['input-heaps-total']], 202 | [sensors['input-incomplete-heaps-total'], sensors['input-missing-heaps-total']] 203 | ] 204 | histogram_sensors = [sensors['output-seconds']] 205 | return Dashboard(line_sensors, histogram_sensors) 206 | 207 | 208 | def start_dashboard(dashboard: Dashboard, args: argparse.Namespace) -> None: 209 | app = Application() 210 | app.add(dashboard) 211 | if args.dashboard_allow_websocket_origin: 212 | allow_websocket_origin = args.dashboard_allow_websocket_origin 213 | else: 214 | allow_websocket_origin = [ 215 | 'localhost:{}'.format(args.dashboard_port), 216 | '{}:{}'.format(args.external_hostname, args.dashboard_port) 217 | ] 218 | server = Server(app, port=args.dashboard_port, 219 | allow_websocket_origin=allow_websocket_origin) 220 | server.start() 221 | -------------------------------------------------------------------------------- /katsdpdatawriter/test/test_rechunk.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | from typing import List, Tuple # noqa: F401 3 | 4 | import numpy as np 5 | from nose.tools import assert_equal, assert_raises 6 | import asynctest 7 | 8 | from .. import rechunk 9 | from ..rechunk import Chunks, Offset # noqa: F401 10 | 11 | 12 | def test_offset_to_size_1d() -> None: 13 | out = rechunk._offset_to_size_1d((1, 5, 7, 4, 2)) 14 | assert_equal(out, {0: 1, 1: 5, 6: 7, 13: 4, 17: 2}) 15 | 16 | 17 | def test_split_chunks_1d() -> None: 18 | out = rechunk._split_chunks_1d((4, 6, 2), (1, 3, 2, 2, 2, 2)) 19 | assert_equal( 20 | out, 21 | { 22 | 0: (slice(0, 1), slice(1, 4)), 23 | 4: (slice(0, 2), slice(2, 4), slice(4, 6)), 24 | 10: (slice(0, 2),) 25 | }) 26 | 27 | 28 | def test_split_chunks_1d_out_chunks_too_short() -> None: 29 | with assert_raises(ValueError): 30 | rechunk._split_chunks_1d((4, 6, 2), (1, 3, 2, 2, 2, 1)) 31 | 32 | 33 | def test_split_chunks_1d_out_chunks_too_long() -> None: 34 | with assert_raises(ValueError): 35 | rechunk._split_chunks_1d((4, 6, 2), (1, 3, 2, 2, 2, 2, 4)) 36 | 37 | 38 | def test_split_chunks_1d_misaligned() -> None: 39 | with assert_raises(ValueError): 40 | # out_chunks not aligned 41 | rechunk._split_chunks_1d((4, 6, 2), (1, 4, 1, 2, 2, 2)) 42 | 43 | 44 | class MockRechunker(rechunk.Rechunker): 45 | def __init__(self, *args, **kwargs) -> None: 46 | super().__init__(*args, **kwargs) 47 | self.calls = [] # type: List[Tuple[Offset, np.ndarray, np.ndarray]] 48 | 49 | async def output(self, offset: Tuple[int, ...], value: np.ndarray, present: np.ndarray) -> None: 50 | self.calls.append((offset, value.copy(), present.copy())) 51 | 52 | 53 | class _BaseTestRechunker(asynctest.TestCase): 54 | def setup_data(self, in_chunks: Chunks, out_chunks: Chunks) -> None: 55 | self.r = MockRechunker('flags', in_chunks, out_chunks, 253, np.uint8) 56 | self.data = np.arange(64).reshape(4, 8, 2).astype(np.uint8) 57 | self.expected = np.full_like(self.data, 253, np.uint8) 58 | self.present = np.zeros_like(self.data, np.bool_) 59 | 60 | async def send_chunk(self, offset: Tuple[int, ...]) -> None: 61 | idx = tuple(slice(ofs, ofs + size) for ofs, size in zip(offset, (1, 4, 2))) 62 | value = self.data[idx] 63 | await self.r.add(offset, value) 64 | self.expected[idx] = self.data[idx] 65 | self.present[idx] = True 66 | 67 | def check_values(self) -> None: 68 | # Checks that the calls contain the expected values given the 69 | # data send. Does NOT check that the offsets and sizes correspond 70 | # correctly to chunks. 71 | for call in self.r.calls: 72 | idx = tuple(slice(ofs, ofs + size) for ofs, size in zip(call[0], call[1].shape)) 73 | expected = self.expected[idx] 74 | np.testing.assert_array_equal(expected, call[1]) 75 | present = self.present[idx] 76 | present = present[(slice(None),) + (0,) * (present.ndim - 1)] 77 | np.testing.assert_array_equal(present, call[2]) 78 | 79 | async def test_add_bad_offset(self) -> None: 80 | with assert_raises(KeyError): 81 | await self.r.add((0, 2, 0), np.zeros((1, 2, 2), np.uint8)) 82 | with assert_raises(ValueError): 83 | await self.r.add((0, 0), np.zeros((1, 2, 2), np.uint8)) 84 | 85 | async def test_add_bad_shape(self) -> None: 86 | with assert_raises(ValueError): 87 | await self.r.add((0, 0, 0), np.zeros((1, 2, 2), np.uint8)) 88 | with assert_raises(ValueError): 89 | await self.r.add((0, 0, 0), np.zeros((2, 4, 2), np.uint8)) 90 | 91 | 92 | class TestRechunker(_BaseTestRechunker): 93 | def setUp(self) -> None: 94 | self.setup_data(((1,), (4, 4), (2,)), ((2,), (2, 2, 4), (2,))) 95 | 96 | async def test_end_partial(self, reorder: bool = False) -> None: 97 | if reorder: 98 | for i in range(3): 99 | await self.send_chunk((i, 0, 0)) 100 | for i in range(3): 101 | await self.send_chunk((i, 4, 0)) 102 | else: 103 | for i in range(3): 104 | await self.send_chunk((i, 0, 0)) 105 | await self.send_chunk((i, 4, 0)) 106 | await self.r.close() 107 | offsets = [call[0] for call in self.r.calls] 108 | shapes = [call[1].shape for call in self.r.calls] 109 | assert_equal( 110 | offsets, 111 | [(0, 0, 0), (0, 2, 0), (0, 4, 0), 112 | (2, 0, 0), (2, 2, 0), (2, 4, 0)]) 113 | assert_equal( 114 | shapes, 115 | [(2, 2, 2), (2, 2, 2), (2, 4, 2), 116 | (1, 2, 2), (1, 2, 2), (1, 4, 2)]) 117 | self.check_values() 118 | assert_equal( 119 | self.r.get_chunk_info('flags'), 120 | { 121 | 'prefix': 'flags', 122 | 'chunks': ((2, 1), (2, 2, 4), (2,)), 123 | 'shape': (3, 8, 2), 124 | 'dtype': '|u1' 125 | }) 126 | 127 | async def test_end_full(self) -> None: 128 | for i in range(4): 129 | await self.send_chunk((i, 0, 0)) 130 | await self.send_chunk((i, 4, 0)) 131 | await self.r.close() 132 | offsets = [call[0] for call in self.r.calls] 133 | shapes = [call[1].shape for call in self.r.calls] 134 | assert_equal( 135 | offsets, 136 | [(0, 0, 0), (0, 2, 0), (0, 4, 0), 137 | (2, 0, 0), (2, 2, 0), (2, 4, 0)]) 138 | assert_equal( 139 | shapes, 140 | [(2, 2, 2), (2, 2, 2), (2, 4, 2), 141 | (2, 2, 2), (2, 2, 2), (2, 4, 2)]) 142 | self.check_values() 143 | assert_equal( 144 | self.r.get_chunk_info('flags'), 145 | { 146 | 'prefix': 'flags', 147 | 'chunks': ((2, 2), (2, 2, 4), (2,)), 148 | 'shape': (4, 8, 2), 149 | 'dtype': '|u1' 150 | }) 151 | 152 | async def test_reorder(self) -> None: 153 | await self.test_end_partial(reorder=True) 154 | 155 | async def test_out_of_order(self) -> None: 156 | with mock.patch.object(self.r, 'out_of_order'): 157 | await self.send_chunk((2, 0, 0)) 158 | await self.send_chunk((0, 0, 0)) 159 | self.r.out_of_order.assert_called_with(0, 2) # type: ignore 160 | 161 | async def test_missing(self) -> None: 162 | await self.send_chunk((1, 0, 0)) 163 | await self.send_chunk((2, 4, 0)) 164 | await self.r.close() 165 | 166 | offsets = [call[0] for call in self.r.calls] 167 | shapes = [call[1].shape for call in self.r.calls] 168 | assert_equal( 169 | offsets, 170 | [(0, 0, 0), (0, 2, 0), (2, 4, 0)]) 171 | assert_equal( 172 | shapes, 173 | [(2, 2, 2), (2, 2, 2), (1, 4, 2)]) 174 | self.check_values() 175 | assert_equal( 176 | self.r.get_chunk_info('flags'), 177 | { 178 | 'prefix': 'flags', 179 | 'chunks': ((2, 1), (2, 2, 4), (2,)), 180 | 'shape': (3, 8, 2), 181 | 'dtype': '|u1' 182 | }) 183 | 184 | def test_bad_in_chunks(self) -> None: 185 | with assert_raises(ValueError): 186 | # in_chunks does not start with (1,) 187 | MockRechunker('foo', ((2,), (4, 4)), ((2,), (4, 4)), 253, np.uint8) 188 | with assert_raises(ValueError): 189 | # zero-sized chunks 190 | MockRechunker('foo', ((1,), (4, 4, 0)), ((2,), (4, 4)), 253, np.uint8) 191 | 192 | def test_bad_out_chunks(self) -> None: 193 | with assert_raises(ValueError): 194 | # does not start with singleton 195 | MockRechunker('foo', ((1,), (4, 4)), ((2, 2), (4, 4)), 253, np.uint8) 196 | 197 | def test_mismatched_chunks(self) -> None: 198 | with assert_raises(ValueError): 199 | # Dimensions don't match 200 | MockRechunker('foo', ((1,), (4, 4)), ((2,), (4, 4), (2,)), 253, np.uint8) 201 | with assert_raises(ValueError): 202 | # Lengths don't match 203 | MockRechunker('foo', ((1,), (4, 4)), ((2,), (4, 4, 1)), 253, np.uint8) 204 | with assert_raises(ValueError): 205 | # Chunks don't align 206 | MockRechunker('foo', ((1,), (4, 4)), ((2,), (3, 5)), 253, np.uint8) 207 | 208 | 209 | class TestRechunkerNoAccum(_BaseTestRechunker): 210 | def setUp(self) -> None: 211 | self.setup_data(((1,), (4, 4), (2,)), ((1,), (2, 2, 4), (2,))) 212 | 213 | async def test(self) -> None: 214 | for i in range(2): 215 | await self.send_chunk((i, 0, 0)) 216 | await self.send_chunk((i, 4, 0)) 217 | await self.r.close() 218 | offsets = [call[0] for call in self.r.calls] 219 | shapes = [call[1].shape for call in self.r.calls] 220 | assert_equal( 221 | offsets, 222 | [(0, 0, 0), (0, 2, 0), (0, 4, 0), 223 | (1, 0, 0), (1, 2, 0), (1, 4, 0)]) 224 | assert_equal( 225 | shapes, 226 | [(1, 2, 2), (1, 2, 2), (1, 4, 2), 227 | (1, 2, 2), (1, 2, 2), (1, 4, 2)]) 228 | self.check_values() 229 | assert_equal( 230 | self.r.get_chunk_info('flags'), 231 | { 232 | 'prefix': 'flags', 233 | 'chunks': ((1, 1), (2, 2, 4), (2,)), 234 | 'shape': (2, 8, 2), 235 | 'dtype': '|u1' 236 | }) 237 | -------------------------------------------------------------------------------- /katsdpdatawriter/test/test_spead_write.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | from concurrent.futures import ThreadPoolExecutor 3 | 4 | import numpy as np 5 | from nose.tools import assert_equal, assert_count_equal, assert_is_instance, assert_raises 6 | import asynctest 7 | from aiokatcp import SensorSet 8 | from katdal.chunkstore import ChunkStore 9 | import katsdpservices 10 | 11 | from ..spead_write import (Array, RechunkerGroup, io_sensors, 12 | add_common_args, chunk_store_from_args) 13 | from ..rechunk import Offset 14 | from ..queue_space import QueueSpace 15 | 16 | 17 | class TestArray: 18 | def setup(self) -> None: 19 | self.array = Array( 20 | 'foo', 21 | in_chunks=((1,), (4, 4, 4), (2, 2)), 22 | out_chunks=((2,), (2, 2, 4, 4), (2, 2)), 23 | fill_value=253, 24 | dtype=np.float32) 25 | 26 | def test_dtype(self) -> None: 27 | # Check that the converter converted a dtype-like to a real dtype 28 | assert_equal(self.array.dtype, np.dtype(np.float32)) 29 | assert_is_instance(self.array.dtype, np.dtype) 30 | 31 | def test_substreams(self) -> None: 32 | assert_equal(self.array.substreams, 6) 33 | 34 | def test_shape(self) -> None: 35 | assert_equal(self.array.shape, (1, 12, 4)) 36 | 37 | def test_nbytes(self) -> None: 38 | assert_equal(self.array.nbytes, 192) 39 | 40 | 41 | def _join(*args: str) -> str: 42 | return '/'.join(args) 43 | 44 | 45 | class TestRechunkerGroup(asynctest.TestCase): 46 | def setUp(self) -> None: 47 | self.chunk_store = mock.create_autospec(spec=ChunkStore, spec_set=True, instance=True) 48 | self.chunk_store.join = _join 49 | 50 | self.sensors = SensorSet() 51 | for sensor in io_sensors(): 52 | self.sensors.add(sensor) 53 | 54 | self.arrays = [ 55 | Array('weights', 56 | ((1,), (4, 4), (2,)), 57 | ((1,), (2, 2, 2, 2), (2,)), 58 | 0, np.uint8), 59 | Array('weights_channel', 60 | ((1,), (4, 4)), 61 | ((2,), (2, 2, 2, 2)), 62 | 0, np.float32) 63 | ] 64 | 65 | self.weights = np.arange(32).reshape(2, 8, 2).astype(np.uint8) 66 | self.weights_channel = np.arange(16).reshape(2, 8).astype(np.float32) 67 | 68 | self.executor = ThreadPoolExecutor(4) 69 | self.executor_queue_space = QueueSpace(5 * sum(array.nbytes for array in self.arrays)) 70 | self.r = RechunkerGroup(self.executor, self.executor_queue_space, 71 | self.chunk_store, self.sensors, 'prefix', self.arrays) 72 | 73 | def tearDown(self): 74 | self.executor.shutdown(wait=True) 75 | 76 | async def add_chunks(self, offset: Offset) -> None: 77 | slices = np.s_[offset[0]:offset[0]+1, offset[1]:offset[1]+4, :] 78 | weights = self.weights[slices] 79 | weights_channel = self.weights_channel[slices[:2]] 80 | await self.r.add(offset, [weights, weights_channel]) 81 | 82 | async def test(self) -> None: 83 | for i in range(0, 8, 4): 84 | for j in range(2): 85 | await self.add_chunks((j, i)) 86 | chunk_info = await self.r.get_chunk_info() 87 | 88 | expected_calls = [] 89 | for i in range(0, 8, 4): 90 | for j in range(2): 91 | for k in range(i, i + 4, 2): 92 | expected_calls.append(mock.call( 93 | 'prefix/weights', np.s_[j:j+1, k:k+2, 0:2], mock.ANY)) 94 | for i in range(0, 8, 2): 95 | expected_calls.append(mock.call( 96 | 'prefix/weights_channel', np.s_[0:2, i:i+2], mock.ANY)) 97 | assert_count_equal(expected_calls, self.chunk_store.put_chunk.mock_calls) 98 | # Check the array values. assert_count_equal doesn't work well for this 99 | # because of how equality operators are implemented in numpy. 100 | for call in self.chunk_store.put_chunk.mock_calls: 101 | name, slices, value = call[1] 102 | if name == 'prefix/weights': 103 | np.testing.assert_array_equal(self.weights[slices], value) 104 | else: 105 | np.testing.assert_array_equal(self.weights_channel[slices], value) 106 | 107 | assert_equal( 108 | chunk_info, 109 | { 110 | 'weights': { 111 | 'prefix': 'prefix', 112 | 'chunks': ((1, 1), (2, 2, 2, 2), (2,)), 113 | 'shape': (2, 8, 2), 114 | 'dtype': '|u1' 115 | }, 116 | 'weights_channel': { 117 | 'prefix': 'prefix', 118 | 'chunks': ((2,), (2, 2, 2, 2)), 119 | 'shape': (2, 8), 120 | 'dtype': np.dtype(np.float32).str 121 | } 122 | }) 123 | 124 | async def test_accounting(self) -> None: 125 | qs = self.executor_queue_space 126 | qsize = qs.value 127 | sensor = self.sensors['queued-bytes'] 128 | await self.add_chunks((0, 0)) 129 | assert_equal(sensor.value, 24) # 8x uint8 + 4x float32 130 | assert_equal(qsize - qs.value, 8) # 8x uint8 (the 4x float32 is still accumulating) 131 | await self.add_chunks((1, 0)) 132 | await self.add_chunks((0, 4)) 133 | await self.r.get_chunk_info() 134 | # Everything should have been written 135 | assert_equal(sensor.value, 0) 136 | assert_equal(qsize - qs.value, 0) 137 | 138 | 139 | # SpeadWriter gets exercised via its derived classes 140 | 141 | 142 | class BadArguments(Exception): 143 | """Exception used in mock when replacing ArgumentParser.Error""" 144 | 145 | 146 | @mock.patch.object(katsdpservices.ArgumentParser, 'error', side_effect=BadArguments) 147 | class TestChunkStoreFromArgs: 148 | """Test both :meth:`.add_common_args` and :meth:`.chunk_store_from_args`""" 149 | def setup(self) -> None: 150 | self.parser = katsdpservices.ArgumentParser() 151 | add_common_args(self.parser) 152 | 153 | def test_missing_args(self, error): 154 | with assert_raises(BadArguments): 155 | chunk_store_from_args(self.parser, self.parser.parse_args([])) 156 | error.assert_called_with('--s3-endpoint-url is required if --npy-path is not given') 157 | with assert_raises(BadArguments): 158 | chunk_store_from_args(self.parser, self.parser.parse_args( 159 | ['--s3-endpoint-url', 'http://invalid/', '--s3-access-key', 'ACCESS'])) 160 | error.assert_called_with('--s3-secret-key is required if --npy-path is not given') 161 | 162 | def test_missing_path(self, error): 163 | with assert_raises(BadArguments): 164 | chunk_store_from_args(self.parser, self.parser.parse_args( 165 | ['--npy-path=/doesnotexist'])) 166 | error.assert_called_with('Specified --npy-path (/doesnotexist) does not exist.') 167 | 168 | def test_npy_and_s3_write(self, error): 169 | with assert_raises(BadArguments): 170 | chunk_store_from_args(self.parser, self.parser.parse_args( 171 | ['--npy-path=/', '--s3-write-url=https://s3.invalid/'])) 172 | error.assert_called_with('--s3-write-url and --npy-path cannot be used together') 173 | 174 | def test_npy(self, error): 175 | with mock.patch('katdal.chunkstore_npy.NpyFileChunkStore') as m: 176 | chunk_store_from_args(self.parser, self.parser.parse_args( 177 | ['--npy-path=/'])) 178 | m.assert_called_with('/', direct_write=False) 179 | 180 | def test_npy_direct_write(self, error): 181 | with mock.patch('katdal.chunkstore_npy.NpyFileChunkStore') as m: 182 | chunk_store_from_args(self.parser, self.parser.parse_args( 183 | ['--npy-path=/', '--direct-write'])) 184 | m.assert_called_with('/', direct_write=True) 185 | 186 | def test_s3(self, error): 187 | with mock.patch('katdal.chunkstore_s3.S3ChunkStore') as m: 188 | chunk_store_from_args(self.parser, self.parser.parse_args( 189 | ['--s3-endpoint-url=https://s3.invalid', 190 | '--s3-secret-key=S3CR3T', '--s3-access-key', 'ACCESS'])) 191 | m.assert_called_with('https://s3.invalid', credentials=('ACCESS', 'S3CR3T'), expiry_days=0) 192 | 193 | def test_s3_expire(self, error): 194 | with mock.patch('katdal.chunkstore_s3.S3ChunkStore') as m: 195 | chunk_store_from_args(self.parser, self.parser.parse_args( 196 | ['--s3-endpoint-url=https://s3.invalid', 197 | '--s3-secret-key=S3CR3T', '--s3-access-key', 'ACCESS', 198 | '--s3-expiry-days=7'])) 199 | m.assert_called_with('https://s3.invalid', credentials=('ACCESS', 'S3CR3T'), expiry_days=7) 200 | 201 | def test_s3_write_url(self, error): 202 | with mock.patch('katdal.chunkstore_s3.S3ChunkStore') as m: 203 | chunk_store_from_args(self.parser, self.parser.parse_args( 204 | ['--s3-endpoint-url=https://s3.invalid', 205 | '--s3-write-url=https://s3.write.invalid', 206 | '--s3-secret-key=S3CR3T', '--s3-access-key', 'ACCESS'])) 207 | m.assert_called_with('https://s3.write.invalid', credentials=('ACCESS', 'S3CR3T'), 208 | expiry_days=0) 209 | 210 | def test_rename_src(self, error): 211 | args = self.parser.parse_args([ 212 | '--rename-src=foo:bar', '--rename-src', 'x:y', 213 | '--new-name', 'xyz']) 214 | assert_equal(args.rename_src, {'foo': 'bar', 'x': 'y'}) 215 | 216 | def test_rename_src_bad_colons(self, error): 217 | with assert_raises(BadArguments): 218 | self.parser.parse_args(['--rename-src=foo:bar:baz', '--new-name', 'xyz']) 219 | -------------------------------------------------------------------------------- /katsdpdatawriter/flag_writer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import enum 3 | import json 4 | import asyncio 5 | from concurrent.futures import ThreadPoolExecutor 6 | from typing import Any, Dict, List, Mapping, Optional 7 | 8 | import numpy as np 9 | import spead2 10 | import spead2.recv.asyncio 11 | import katsdpservices 12 | import katdal 13 | import katdal.chunkstore 14 | from katdal.visdatav4 import FLAG_NAMES 15 | from aiokatcp import DeviceServer, Sensor, SensorSet, FailReply 16 | import katsdptelstate 17 | from katsdptelstate.endpoint import Endpoint 18 | 19 | import katsdpdatawriter 20 | from . import spead_write 21 | from .spead_write import RechunkerGroup 22 | from .queue_space import QueueSpace 23 | 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | class Status(enum.Enum): 29 | """Status of the whole process""" 30 | WAIT_DATA = 1 31 | CAPTURING = 2 32 | FINISHED = 3 33 | 34 | 35 | class State(enum.Enum): 36 | """State of a single capture block""" 37 | CAPTURING = 1 # capture-init has been called, but not capture-done 38 | COMPLETE = 2 # capture-done has been called 39 | 40 | 41 | class EnumEncoder(json.JSONEncoder): 42 | """JSON encoder that stringifies enums""" 43 | def default(self, obj: Any) -> Any: 44 | if isinstance(obj, enum.Enum): 45 | return obj.name 46 | return json.JSONEncoder.default(self, obj) 47 | 48 | 49 | class FlagWriter(spead_write.SpeadWriter): 50 | """Glue between :class:`~.SpeadWriter` and :class:`FlagWriterServer`.""" 51 | def __init__(self, sensors: SensorSet, rx: spead2.recv.asyncio.Stream, 52 | server: 'FlagWriterServer') -> None: 53 | super().__init__(sensors, rx) 54 | self._server = server 55 | 56 | def first_heap(self) -> None: 57 | logger.info("First flag heap received...") 58 | self.sensors['status'].value = Status.CAPTURING 59 | 60 | def rechunker_group(self, updated: Dict[str, spead2.Item]) -> Optional[RechunkerGroup]: 61 | cbid = updated['capture_block_id'].value 62 | return self._server.rechunker_group(cbid) 63 | 64 | 65 | class FlagWriterServer(DeviceServer): 66 | """Top-level device server for flag writer service""" 67 | 68 | VERSION = "sdp-flag-writer-0.2" 69 | BUILD_STATE = "katsdpdatawriter-" + katsdpdatawriter.__version__ 70 | 71 | def __init__(self, host: str, port: int, loop: asyncio.AbstractEventLoop, 72 | endpoints: List[Endpoint], flag_interface: Optional[str], flags_ibv: bool, 73 | chunk_store: katdal.chunkstore.ChunkStore, 74 | chunk_params: spead_write.ChunkParams, 75 | telstate: katsdptelstate.TelescopeState, 76 | input_name: str, output_name: str, rename_src: Mapping[str, str], 77 | s3_endpoint_url: Optional[str], 78 | max_workers: int, buffer_dumps: int) -> None: 79 | super().__init__(host, port, loop=loop) 80 | 81 | self._chunk_store = chunk_store 82 | self._telstate = telstate 83 | # track the status of each capture block we have seen to date 84 | self._capture_block_state = {} # type: Dict[str, State] 85 | self._input_name = input_name 86 | self._output_name = output_name 87 | # rechunker group for each CBID 88 | self._flag_streams = {} # type: Dict[str, RechunkerGroup] 89 | self._executor = ThreadPoolExecutor(max_workers=max_workers) 90 | 91 | self.sensors.add(Sensor( 92 | Status, "status", "The current status of the flag writer process.")) 93 | self.sensors.add(Sensor( 94 | str, "capture-block-state", 95 | "JSON dict with the state of each capture block seen in this session.", 96 | default='{}', initial_status=Sensor.Status.NOMINAL)) 97 | for sensor in spead_write.io_sensors(): 98 | self.sensors.add(sensor) 99 | self.sensors.add(spead_write.device_status_sensor()) 100 | 101 | telstate_input = telstate.view(input_name) 102 | in_chunks = spead_write.chunks_from_telstate(telstate_input) 103 | DATA_LOST = 1 << FLAG_NAMES.index('data_lost') 104 | self._arrays = [ 105 | spead_write.make_array('flags', in_chunks, DATA_LOST, np.uint8, chunk_params) 106 | ] 107 | dump_size = sum(array.nbytes for array in self._arrays) 108 | self._executor_queue_space = QueueSpace(buffer_dumps * dump_size, loop=self.loop) 109 | spead_write.write_telstate(telstate, input_name, output_name, rename_src, s3_endpoint_url) 110 | 111 | rx = spead_write.make_receiver( 112 | endpoints, self._arrays, 113 | katsdpservices.get_interface_address(flag_interface), flags_ibv) 114 | self._writer = FlagWriter(self.sensors, rx, self) 115 | self._capture_task = loop.create_task(self._do_capture()) 116 | 117 | def _set_capture_block_state(self, capture_block_id: str, state: State) -> None: 118 | if state == State.COMPLETE: 119 | # Remove if present 120 | self._capture_block_state.pop(capture_block_id, None) 121 | else: 122 | self._capture_block_state[capture_block_id] = state 123 | dumped = json.dumps(self._capture_block_state, sort_keys=True, cls=EnumEncoder) 124 | self.sensors['capture-block-state'].value = dumped 125 | 126 | def _get_capture_block_state(self, capture_block_id: str) -> Optional[State]: 127 | return self._capture_block_state.get(capture_block_id, None) 128 | 129 | def _get_capture_stream_name(self, capture_block_id: str) -> str: 130 | """Get the capture-stream name of the output stream""" 131 | return "{}_{}".format(capture_block_id, self._output_name) 132 | 133 | def _get_prefix(self, capture_block_id: str) -> str: 134 | """Get the prefix (aka bucket name) to use with the chunk store""" 135 | # S3 doesn't allow underscores in bucket names 136 | return self._get_capture_stream_name(capture_block_id).replace('_', '-') 137 | 138 | def rechunker_group(self, cbid: str) -> Optional[RechunkerGroup]: 139 | extra = dict(capture_block_id=cbid) 140 | if not self._get_capture_block_state(cbid): 141 | logger.error("Received flags for CBID %s outside of init/done. " 142 | "These flags will be *discarded*.", cbid, extra=extra) 143 | return None 144 | 145 | if cbid not in self._flag_streams: 146 | self._flag_streams[cbid] = RechunkerGroup( 147 | self._executor, self._executor_queue_space, 148 | self._chunk_store, self._writer.sensors, self._get_prefix(cbid), self._arrays) 149 | return self._flag_streams[cbid] 150 | 151 | async def _do_capture(self) -> None: 152 | """Run the entire capture process. 153 | 154 | This runs for the lifetime of the server. 155 | """ 156 | try: 157 | spead_write.clear_io_sensors(self.sensors) 158 | self.sensors['status'].value = Status.WAIT_DATA 159 | logger.info("Waiting for data...") 160 | await self._writer.run() 161 | except Exception: 162 | logger.exception("Error in SPEAD receiver") 163 | self.sensors['device-status'].value = spead_write.DeviceStatus.FAIL 164 | finally: 165 | spead_write.clear_io_sensors(self.sensors) 166 | self.sensors['status'].value = Status.FINISHED 167 | self._executor.shutdown() 168 | 169 | async def request_capture_init(self, ctx, capture_block_id: str) -> None: 170 | """Start an observation""" 171 | if capture_block_id in self._capture_block_state: 172 | raise FailReply("Capture block ID {} is already active".format(capture_block_id)) 173 | self._set_capture_block_state(capture_block_id, State.CAPTURING) 174 | 175 | def _mark_cbid_complete(self, capture_block_id: str) -> None: 176 | """Inform other users of the on disk data that we are finished with a 177 | particular capture_block_id. 178 | """ 179 | extra = dict(capture_block_id=capture_block_id) 180 | logger.info("Capture block %s flag capture complete.", capture_block_id, extra=extra) 181 | self._chunk_store.mark_complete(self._get_prefix(capture_block_id)) 182 | self._set_capture_block_state(capture_block_id, State.COMPLETE) 183 | 184 | async def _write_telstate_meta(self, capture_block_id: str) -> None: 185 | """Write out chunk information for the specified CBID to telstate.""" 186 | extra = dict(capture_block_id=capture_block_id) 187 | if capture_block_id not in self._flag_streams: 188 | logger.warning("No flag data received for cbid %s. Flag stream will not be usable.", 189 | capture_block_id, extra=extra) 190 | return 191 | rechunker_group = self._flag_streams[capture_block_id] 192 | chunk_info = await rechunker_group.get_chunk_info() 193 | capture_stream_name = self._get_capture_stream_name(capture_block_id) 194 | telstate_capture = self._telstate.view(capture_stream_name) 195 | telstate_capture['chunk_info'] = chunk_info 196 | logger.info("Written chunk information to telstate.", extra=extra) 197 | 198 | async def request_capture_done(self, ctx, capture_block_id: str) -> None: 199 | """Mark specified capture_block_id as complete. 200 | 201 | It flushes the flag cache and writes chunk info into telstate. 202 | """ 203 | if capture_block_id not in self._capture_block_state: 204 | raise FailReply("Specified capture block ID {} is unknown.".format(capture_block_id)) 205 | # Allow some time for stragglers to appear 206 | await asyncio.sleep(5, loop=self.loop) 207 | await self._write_telstate_meta(capture_block_id) 208 | self._mark_cbid_complete(capture_block_id) 209 | 210 | async def stop(self, cancel: bool = True) -> None: 211 | self._writer.stop() 212 | await self._capture_task 213 | await super().stop(cancel) 214 | -------------------------------------------------------------------------------- /katsdpdatawriter/vis_writer.py: -------------------------------------------------------------------------------- 1 | """Capture L0 visibilities from a SPEAD stream and write to a local chunk store. 2 | 3 | This process lives across multiple capture blocks. It writes weights and flags 4 | as well. 5 | 6 | The status sensor has the following states (with typical transition events): 7 | 8 | - `idle`: ready to start capture 9 | -> ?capture-init -> 10 | - `wait-data`: waiting for first heap of L0 visibilities from SPEAD stream 11 | -> first SPEAD heap arrives -> 12 | - `capturing`: SPEAD data is being captured 13 | -> capture stops -> 14 | - `finalising`: metadata is being written to telstate 15 | - `complete`: both data and metadata capture completed 16 | - `error`: capture failed 17 | -> ?capture-done -> 18 | - `idle`: ready to start capture again 19 | 20 | Objects are stored in chunks split over time and frequency but not baseline. 21 | The chunking is chosen to produce objects with sizes on the order of 10 MB. 22 | Objects have the following naming scheme: 23 | 24 | //[_[_]] 25 | 26 | - : "file name"/bucket in store i.e. _ 27 | - : unique ID from capture_init (Unix timestamp at init) 28 | - : name of specific data product (associated with L0 SPEAD stream) 29 | - : 'correlator_data' / 'weights' / 'flags' / etc. 30 | - : chunk start index along N'th dimension 31 | 32 | The following useful object parameters are stored in telstate: 33 | 34 | - _s3_endpoint_url: endpoint URL of S3 gateway to Ceph 35 | - _chunk_info: {prefix, dtype, shape, chunks} dict per array 36 | """ 37 | 38 | import asyncio 39 | import logging 40 | import enum 41 | from concurrent.futures import ThreadPoolExecutor 42 | from typing import List, Tuple, Dict, Any, Optional, Mapping # noqa: F401 43 | 44 | import numpy as np 45 | import aiokatcp 46 | from aiokatcp import DeviceServer, Sensor, SensorSet, FailReply 47 | from katdal.visdatav4 import FLAG_NAMES 48 | import katdal.chunkstore 49 | import katsdptelstate 50 | from katsdptelstate.endpoint import Endpoint 51 | import katsdpservices 52 | import spead2.recv.asyncio 53 | 54 | import katsdpdatawriter 55 | from . import spead_write 56 | from .queue_space import QueueSpace 57 | 58 | 59 | logger = logging.getLogger(__name__) 60 | 61 | 62 | class Status(enum.Enum): 63 | IDLE = 1 64 | WAIT_DATA = 2 65 | CAPTURING = 3 66 | FINALISING = 4 67 | COMPLETE = 5 68 | ERROR = 6 69 | 70 | 71 | def _status_status(value: Status) -> aiokatcp.Sensor.Status: 72 | if value == Status.ERROR: 73 | return Sensor.Status.ERROR 74 | else: 75 | return Sensor.Status.NOMINAL 76 | 77 | 78 | class VisibilityWriter(spead_write.SpeadWriter): 79 | """Glue between :class:`~.SpeadWriter` and :class:`VisibilityWriterServer`.""" 80 | def __init__(self, sensors: SensorSet, rx: spead2.recv.asyncio.Stream, 81 | rechunker_group: spead_write.RechunkerGroup) -> None: 82 | super().__init__(sensors, rx) 83 | self._rechunker_group = rechunker_group 84 | 85 | def first_heap(self) -> None: 86 | self.sensors['status'].value = Status.CAPTURING 87 | 88 | def rechunker_group(self, updated: Dict[str, spead2.Item]) -> spead_write.RechunkerGroup: 89 | return self._rechunker_group 90 | 91 | 92 | class VisibilityWriterServer(DeviceServer): 93 | VERSION = "sdp-vis-writer-0.2" 94 | BUILD_STATE = "katsdpdatawriter-" + katsdpdatawriter.__version__ 95 | 96 | def __init__(self, host: str, port: int, loop: asyncio.AbstractEventLoop, 97 | endpoints: List[Endpoint], interface: Optional[str], ibv: bool, 98 | chunk_store: katdal.chunkstore.ChunkStore, 99 | chunk_params: spead_write.ChunkParams, 100 | telstate: katsdptelstate.TelescopeState, 101 | input_name: str, output_name: str, rename_src: Mapping[str, str], 102 | s3_endpoint_url: Optional[str], 103 | max_workers: int, buffer_dumps: int) -> None: 104 | super().__init__(host, port, loop=loop) 105 | self._endpoints = endpoints 106 | self._interface_address = katsdpservices.get_interface_address(interface) 107 | self._ibv = ibv 108 | self._chunk_store = chunk_store 109 | self._input_name = input_name 110 | self._output_name = output_name 111 | self._telstate = telstate 112 | self._rx = None # type: Optional[spead2.recv.asyncio.Stream] 113 | self._max_workers = max_workers 114 | 115 | telstate_input = telstate.view(input_name) 116 | in_chunks = spead_write.chunks_from_telstate(telstate_input) 117 | DATA_LOST = 1 << FLAG_NAMES.index('data_lost') 118 | self._arrays = [ 119 | spead_write.make_array('correlator_data', in_chunks, 0, np.complex64, chunk_params), 120 | spead_write.make_array('flags', in_chunks, DATA_LOST, np.uint8, chunk_params), 121 | spead_write.make_array('weights', in_chunks, 0, np.uint8, chunk_params), 122 | spead_write.make_array('weights_channel', in_chunks[:2], 0, np.float32, chunk_params) 123 | ] 124 | dump_size = sum(array.nbytes for array in self._arrays) 125 | self._buffer_size = buffer_dumps * dump_size 126 | spead_write.write_telstate(telstate, input_name, output_name, rename_src, s3_endpoint_url) 127 | 128 | self._capture_task = None # type: Optional[asyncio.Task] 129 | self._n_substreams = len(in_chunks[1]) 130 | 131 | self.sensors.add(Sensor( 132 | Status, 'status', 'The current status of the capture process', 133 | default=Status.IDLE, initial_status=Sensor.Status.NOMINAL, 134 | status_func=_status_status)) 135 | for sensor in spead_write.io_sensors(): 136 | self.sensors.add(sensor) 137 | self.sensors.add(spead_write.device_status_sensor()) 138 | 139 | async def _do_capture(self, capture_stream_name: str, rx: spead2.recv.asyncio.Stream) -> None: 140 | """Capture data for a single capture block""" 141 | writer = None 142 | rechunker_group = None 143 | executor = ThreadPoolExecutor(self._max_workers) 144 | executor_queue_space = QueueSpace(self._buffer_size, loop=self.loop) 145 | try: 146 | spead_write.clear_io_sensors(self.sensors) 147 | prefix = capture_stream_name.replace('_', '-') # S3 doesn't allow underscores 148 | rechunker_group = spead_write.RechunkerGroup( 149 | executor, executor_queue_space, 150 | self._chunk_store, self.sensors, prefix, self._arrays) 151 | writer = VisibilityWriter(self.sensors, rx, rechunker_group) 152 | self.sensors['status'].value = Status.WAIT_DATA 153 | 154 | await writer.run(stops=self._n_substreams) 155 | 156 | self.sensors['status'].value = Status.FINALISING 157 | view = self._telstate.view(capture_stream_name) 158 | view['chunk_info'] = await rechunker_group.get_chunk_info() 159 | rechunker_group = None # Tells except block not to clean up 160 | self._chunk_store.mark_complete(prefix) 161 | self.sensors['status'].value = Status.COMPLETE 162 | except Exception: 163 | logger.exception('Exception in capture task') 164 | self.sensors['status'].value = Status.ERROR 165 | self.sensors['device-status'].value = spead_write.DeviceStatus.FAIL 166 | finally: 167 | spead_write.clear_io_sensors(self.sensors) 168 | if rechunker_group is not None: 169 | # Has the side effect of doing cleanup 170 | await rechunker_group.get_chunk_info() 171 | # Shouldn't be any pending tasks, because get_chunk_info should wait 172 | executor.shutdown() 173 | 174 | async def request_capture_init(self, ctx, capture_block_id: str) -> None: 175 | """Start listening for L0 data""" 176 | if self._capture_task is not None: 177 | logger.info("Ignoring capture_init: already capturing") 178 | raise FailReply('Already capturing') 179 | self.sensors['status'].value = Status.WAIT_DATA 180 | self.sensors['device-status'].value = spead_write.DeviceStatus.OK 181 | capture_stream_name = self._telstate.join(capture_block_id, self._output_name) 182 | self._rx = spead_write.make_receiver( 183 | self._endpoints, self._arrays, self._interface_address, self._ibv) 184 | self._capture_task = self.loop.create_task(self._do_capture(capture_stream_name, self._rx)) 185 | logger.info('Starting capture to %s', capture_stream_name) 186 | 187 | async def capture_done(self) -> None: 188 | """Implementation of :meth:`request_capture_done`. 189 | 190 | This is split out to allow it to be called on ``SIGINT``. 191 | """ 192 | if self._capture_task is None: 193 | return 194 | capture_task = self._capture_task 195 | # Give it a chance to stop on its own from stop packets 196 | try: 197 | logger.info('Waiting for capture task (5s timeout)') 198 | await asyncio.wait_for(asyncio.shield(capture_task), timeout=5) 199 | except asyncio.TimeoutError: 200 | if self._capture_task is not capture_task: 201 | return # Someone else beat us to the cleanup 202 | logger.info('Stopping receiver and waiting for capture task') 203 | if self._rx: 204 | self._rx.stop() 205 | await capture_task 206 | 207 | if self._capture_task is not capture_task: 208 | return # Someone else beat us to the cleanup 209 | if self._rx: 210 | self._rx.stop() 211 | self._capture_task = None 212 | self.sensors['status'].value = Status.IDLE 213 | logger.info('Capture complete') 214 | 215 | async def request_capture_done(self, ctx) -> None: 216 | """Stop capturing, which cleans up the capturing task.""" 217 | if self._capture_task is None: 218 | logger.info("Ignoring capture_done: already explicitly stopped") 219 | raise FailReply('Not capturing') 220 | await self.capture_done() 221 | 222 | async def stop(self, cancel=True) -> None: 223 | await self.capture_done() 224 | await super().stop(cancel) 225 | -------------------------------------------------------------------------------- /katsdpdatawriter/rechunk.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import itertools 3 | from typing import Tuple, Dict, Any, Optional 4 | 5 | import numpy as np 6 | import numpy.lib.format as _np_lib_format # noqa: F401 # Make mypy happy with np.lib.format 7 | 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | Offset = Tuple[int, ...] 13 | Shape = Tuple[int, ...] 14 | Chunks1D = Tuple[int, ...] 15 | Chunks = Tuple[Chunks1D, ...] 16 | Slices = Tuple[slice, ...] 17 | 18 | 19 | def _offset_to_size_1d(chunks: Chunks1D) -> Dict[int, int]: 20 | """Maps offset of start of each chunk to the size of that chunk 21 | 22 | Parameters 23 | ---------- 24 | chunks : tuple of int 25 | Chunk sizes 26 | 27 | Returns 28 | ------- 29 | dict 30 | """ 31 | out = {} 32 | cur = 0 33 | for c in chunks: 34 | if c <= 0: 35 | raise ValueError('Chunk sizes must be positive') 36 | out[cur] = c 37 | cur += c 38 | return out 39 | 40 | 41 | def _offset_to_size(chunks: Chunks) -> Tuple[Dict[int, int], ...]: 42 | """Multi-dimensional version of :func:`_offset_to_size_1d`.""" 43 | return tuple(_offset_to_size_1d(c) for c in chunks) 44 | 45 | 46 | def _split_chunks_1d(in_chunks: Chunks1D, out_chunks: Chunks1D) -> Dict[int, Slices]: 47 | """ 48 | Divide output chunks into groups that align to the input chunks. 49 | 50 | For each input chunk, a tuple of slices is generated to index within that 51 | input chunk. The result is a dictionary indexed by offset. 52 | 53 | >>> _split_chunks_1d((4, 6, 2), (1, 3, 2, 2, 2, 2)) 54 | { 55 | 0: (slice(0, 1), slice(1, 4)), 56 | 4: (slice(0, 2), slice(2, 4), slice(4, 6)), 57 | 10: (slice(0, 2),) 58 | } 59 | 60 | Raises 61 | ------ 62 | ValueError 63 | if an output chunk spans multiple input chunks 64 | ValueError 65 | if ``sum(in_chunks) != sum(out_chunks)`` 66 | """ 67 | out = {} 68 | pos = 0 69 | if sum(in_chunks) != sum(out_chunks): 70 | raise ValueError('chunks imply different shapes') 71 | offset = 0 72 | for c in in_chunks: 73 | slices = [] 74 | cur = 0 75 | while cur < c: 76 | oc = out_chunks[pos] 77 | pos += 1 78 | slices.append(slice(cur, cur + oc)) 79 | cur += oc 80 | if cur > c: 81 | raise ValueError('input and output chunks do not align') 82 | out[offset] = tuple(slices) 83 | offset += c 84 | return out 85 | 86 | 87 | def _split_chunks(in_chunks: Chunks, out_chunks: Chunks) -> Tuple[Dict[int, Slices], ...]: 88 | """Multi-dimensional version of :meth:`_split_chunks_1d`.""" 89 | if len(in_chunks) != len(out_chunks): 90 | raise ValueError('in_chunks and out_chunks have different length') 91 | return tuple(_split_chunks_1d(*item) for item in zip(in_chunks, out_chunks)) 92 | 93 | 94 | class Rechunker: 95 | """ 96 | Takes a stream of chunks and generates output with same data but new 97 | chunking scheme. 98 | 99 | This is similar in concept to dask's rechunk, but in a streaming fashion 100 | (with the assumption that time is on the first axis). It is more limited 101 | though: non-time axes can only be split, not re-combined. The time axis 102 | must be size-1 chunks on input, but can be larger on output (accumulation 103 | in time). 104 | 105 | Incoming chunks whose coordinates differ only in the time axis must be 106 | received in order (out-of-order chunks will be discarded). Chunks with 107 | different non-time coordinates are handled completely independently. This 108 | does not apply when no time accumulation is being done, in which case 109 | chunks can arrive in any order. 110 | 111 | Memory usage depends on whether accumulation-in-time is being done. If 112 | so, it stores data internally (enough for one complete output dump). If 113 | not, there is no internal data storage, and memory usage only scales with 114 | the metadata (number of chunks etc). 115 | 116 | Do not instantiate this class directly. Instead, subclass it and implement 117 | :meth:`output`. 118 | 119 | .. warning:: 120 | 121 | While this class has asynchronous methods, it is not safe to use it 122 | from more than one task at once i.e. wait for each async call to 123 | complete before making another one. 124 | 125 | Parameters 126 | ---------- 127 | name : str 128 | Name of this array (purely for logging) 129 | in_chunks : tuple of tuple of int 130 | Chunking scheme of the input. The first element must be ``(1,)`` 131 | out_chunks : tuple of tuple of int 132 | Chunking scheme of the output. The first element must be a 1-tuple, 133 | with the value indicating the size of each chunk (except possibly 134 | the last) in time. 135 | fill_value 136 | Value to store where no input is received for some of the input 137 | chunks that combine to form an output chunk. 138 | dtype : numpy dtype 139 | Data type of the array 140 | 141 | Raises 142 | ------ 143 | ValueError 144 | if the restrictions on the input and output chunks are not met 145 | """ 146 | 147 | class _Item: 148 | """Intermediate chunk under construction. 149 | 150 | An intermediate chunk has the output chunk size in the time axis and 151 | the input chunk size in other axes. 152 | """ 153 | def __init__(self, offset: Offset, initial_value: np.ndarray, present: bool) -> None: 154 | self.offset = offset 155 | self.value: Optional[np.ndarray] = initial_value 156 | self.present = np.full(initial_value.shape[:1], present, np.bool_) 157 | 158 | def add(self, offset: Offset, value: np.ndarray) -> None: 159 | """Add a new input chunk.""" 160 | assert self.value is not None 161 | assert offset[1:] == self.offset[1:] 162 | if value.shape[1:] != self.value.shape[1:] or value.shape[0] != 1: 163 | raise ValueError('value has wrong shape') 164 | rel = offset[0] - self.offset[0] 165 | self.value[rel:rel+1] = value 166 | self.present[rel] = True 167 | 168 | def truncate(self, times: int) -> None: 169 | assert self.value is not None 170 | if times < self.value.shape[0]: 171 | self.value = self.value[:times] 172 | self.present = self.present[:times] 173 | 174 | def __init__(self, name: str, 175 | in_chunks: Chunks, 176 | out_chunks: Chunks, 177 | fill_value: Any, dtype: Any) -> None: 178 | if in_chunks[0] != (1,): 179 | raise ValueError('in_chunks does not start with (1,)') 180 | if len(out_chunks[0]) != 1: 181 | raise ValueError('out_chunks does not start with a singleton') 182 | 183 | self.name = name 184 | self.in_chunks = in_chunks 185 | self.out_chunks = out_chunks 186 | self.fill_value = fill_value 187 | self.dtype = np.dtype(dtype) 188 | self._items = {} # type: Dict[Tuple[int, ...], Rechunker._Item] # Indexed by offset[1:] 189 | self._sizes = _offset_to_size(in_chunks[1:]) 190 | self._split_chunks = _split_chunks(in_chunks[1:], out_chunks[1:]) 191 | self._time_accum = out_chunks[0][0] 192 | self._n_dumps = 0 193 | 194 | def out_of_order(self, received: int, seen: int) -> None: 195 | """Report a chunk received from the past. 196 | 197 | This can be overridden to change the reporting channel. 198 | """ 199 | logger.warning( 200 | "Received old chunk for array %s (%d < %d)", 201 | self.name, received, seen) # pragma: nocover 202 | 203 | def _item_shape(self, offset: Offset) -> Shape: 204 | """Expected shape for the :class:`Item` holding the input chunk starting at `offset`.""" 205 | sizes = tuple(s[ofs] for ofs, s in zip(offset[1:], self._sizes)) 206 | return (self._time_accum,) + sizes 207 | 208 | async def _flush(self, item: _Item) -> None: 209 | """Send `item` to :meth:`output`.""" 210 | slices = tuple(s[ofs] for ofs, s in zip(item.offset[1:], self._split_chunks)) 211 | for idx in itertools.product(*slices): 212 | assert item.value is not None 213 | full_idx = np.index_exp[0:len(item.value)] + idx 214 | offset = tuple(s.start + offset for s, offset in zip(full_idx, item.offset)) 215 | await self.output(offset, item.value[full_idx], item.present) 216 | item.value = None # Allow GC to reclaim memory now 217 | 218 | async def _get_item(self, offset: Offset) -> Optional['_Item']: 219 | """Get the item that should hold the input chunk starting at `offset`. 220 | 221 | It returns ``None`` if the offset is too far in the past to be captured. 222 | """ 223 | key = offset[1:] 224 | # Round down to the start of the accumulation 225 | item_offset = (offset[0] // self._time_accum * self._time_accum,) + key 226 | item = self._items.get(key) 227 | if item is None or item.offset[0] < item_offset[0]: 228 | if item is not None: 229 | await self._flush(item) 230 | shape = self._item_shape(offset) 231 | initial_value = np.full(shape, self.fill_value, self.dtype) 232 | item = self._Item(item_offset, initial_value, False) 233 | self._items[key] = item 234 | elif item.offset[0] > item_offset[0]: 235 | self.out_of_order(offset[0], item.offset[0]) 236 | item = None 237 | return item 238 | 239 | async def add(self, offset: Offset, value: np.ndarray) -> None: 240 | """Add a new incoming chunk. 241 | 242 | The `value` is guaranteed to be copied, so it is safe for the caller 243 | to update it after the call returns (which means the coroutine 244 | *completing*, not just yielding). 245 | 246 | Parameters 247 | ---------- 248 | offset : tuple of int 249 | Start coordinates of the chunk. It must be aligned to the 250 | configured chunking scheme. 251 | values : array-like 252 | Values of the chunk. 253 | 254 | Raises 255 | ------ 256 | ValueError 257 | if `offset` has the wrong number of dimensions 258 | ValueError 259 | if `value` has the wrong shape for `offset` 260 | KeyError 261 | if `offset` does not match the input chunking scheme 262 | """ 263 | if len(offset) != len(self.in_chunks): 264 | raise ValueError('wrong number of dimensions') 265 | if self._time_accum > 1: 266 | item = await self._get_item(offset) 267 | if item is not None: 268 | item.add(offset, value) 269 | else: 270 | shape = self._item_shape(offset) 271 | # Ensure the dtype and force a copy at the same time 272 | value = np.asarray(value).astype(self.dtype, copy=True) 273 | if value.shape != shape: 274 | raise ValueError('value has wrong shape') 275 | item = self._Item(offset, value, True) 276 | await self._flush(item) 277 | self._n_dumps = max(self._n_dumps, offset[0] + 1) 278 | 279 | async def close(self) -> None: 280 | """Flush out any partially buffered items""" 281 | for item in self._items.values(): 282 | # Truncate to last seen dump 283 | times = self._n_dumps - item.offset[0] 284 | item.truncate(times) 285 | await self._flush(item) 286 | self._items.clear() 287 | 288 | def _get_shape(self) -> Shape: 289 | return (self._n_dumps,) + tuple(sum(c) for c in self.out_chunks[1:]) 290 | 291 | def _get_chunks(self) -> Chunks: 292 | c = self.out_chunks[0][0] 293 | full = self._n_dumps // c 294 | last = self._n_dumps % c 295 | if last > 0: 296 | time_chunks = (c,) * full + (last,) 297 | else: 298 | time_chunks = (c,) * full 299 | return (time_chunks,) + self.out_chunks[1:] 300 | 301 | def get_chunk_info(self, prefix: str) -> Dict[str, Any]: 302 | """Get chunk info to be placed into telstate to describe the output. 303 | 304 | Parameters 305 | ---------- 306 | prefix : str 307 | The array name prefix to retrieve the chunks from the chunk store 308 | """ 309 | return { 310 | 'prefix': prefix, 311 | 'dtype': np.lib.format.dtype_to_descr(self.dtype), 312 | 'shape': self._get_shape(), 313 | 'chunks': self._get_chunks() 314 | } 315 | 316 | async def output(self, offset: Offset, value: np.ndarray, present: np.ndarray) -> None: 317 | """Called with each output chunk. 318 | 319 | It is safe for the callee to save a reference to `value`: it is 320 | guaranteed that this class will not reuse the memory. 321 | 322 | Parameters 323 | ---------- 324 | offset 325 | Position of the start of this chunk 326 | value 327 | Chunk data 328 | present 329 | 1D boolean array indexed by time, indicating which of the input 330 | chunks that were accumulated into the output chunks are actually 331 | present (rather than replaced by the fill value). 332 | """ 333 | raise NotImplementedError # pragma: nocover 334 | -------------------------------------------------------------------------------- /katsdpdatawriter/spead_write.py: -------------------------------------------------------------------------------- 1 | """ 2 | Receive heaps from a SPEAD stream and write corresponding data to a chunk store. 3 | """ 4 | 5 | import copy 6 | import argparse 7 | import os.path 8 | import time 9 | import enum 10 | import logging 11 | import concurrent.futures 12 | import asyncio 13 | import socket 14 | import functools 15 | from collections import Counter 16 | from typing import (Optional, Any, Sequence, Iterable, # noqa: F401 17 | Mapping, MutableMapping, Set, Dict, Tuple) 18 | 19 | import numpy as np 20 | import attr 21 | from aiokatcp import Sensor, SensorSet, SensorSampler 22 | import spead2 23 | import spead2.recv.asyncio 24 | import katdal.chunkstore 25 | import katdal.chunkstore_npy 26 | import katdal.chunkstore_s3 27 | import katsdpservices 28 | import katsdptelstate 29 | from katsdptelstate.endpoint import Endpoint 30 | 31 | from . import rechunk 32 | from .rechunk import Chunks, Offset 33 | from .queue_space import QueueSpace 34 | 35 | 36 | logger = logging.getLogger(__name__) 37 | 38 | 39 | @attr.s 40 | class ChunkParams: 41 | max_size = attr.ib() # type: int # maximum size in bytes 42 | max_channels = attr.ib(default=None) # type: Optional[int] 43 | max_dumps = attr.ib(default=None) # type: Optional[int] 44 | 45 | @staticmethod 46 | def from_args(args: argparse.Namespace): 47 | """Create from command-line arguments (see :func:`add_common_args`)""" 48 | if args.buffer_dumps < args.obj_max_dumps: 49 | logger.warning('Decreasing --obj-max-dumps to match --buffer-dumps (%d)', 50 | args.buffer_dumps) 51 | max_dumps = min(args.obj_max_dumps, args.buffer_dumps) 52 | return ChunkParams(args.obj_size_mb * 1e6, args.obj_max_channels, max_dumps) 53 | 54 | 55 | # TODO: move this into aiokatcp 56 | class DeviceStatus(enum.Enum): 57 | """Standard katcp device status""" 58 | OK = 0 59 | DEGRADED = 1 60 | FAIL = 2 61 | 62 | 63 | def _device_status_status(value: DeviceStatus) -> Sensor.Status: 64 | """Sets katcp status for device-status sensor from value""" 65 | if value == DeviceStatus.OK: 66 | return Sensor.Status.NOMINAL 67 | elif value == DeviceStatus.DEGRADED: 68 | return Sensor.Status.WARN 69 | else: 70 | return Sensor.Status.ERROR 71 | 72 | 73 | def _warn_if_positive(value: float) -> Sensor.Status: 74 | return Sensor.Status.WARN if value > 0 else Sensor.Status.NOMINAL 75 | 76 | 77 | # Just to work around https://github.com/python/mypy/issues/4729 78 | def _dtype_converter(dtype: Any) -> np.dtype: 79 | return np.dtype(dtype) 80 | 81 | 82 | def io_sensors() -> Sequence[Sensor]: 83 | """Create input and output counter sensors.""" 84 | def make_sensor(*args, **kwargs) -> Sensor: 85 | kwargs['auto_strategy'] = SensorSampler.Strategy.EVENT_RATE 86 | kwargs['auto_strategy_parameters'] = (0.05, 10.0) 87 | return Sensor(*args, **kwargs) 88 | 89 | return [ 90 | make_sensor( 91 | int, "input-incomplete-heaps-total", 92 | "Number of heaps dropped due to being incomplete. (prometheus: counter)", 93 | status_func=_warn_if_positive), 94 | make_sensor( 95 | int, "input-too-old-heaps-total", 96 | "Number of heaps dropped because they are too late. (prometheus: counter)", 97 | status_func=_warn_if_positive), 98 | make_sensor( 99 | int, "input-missing-heaps-total", 100 | "Number of gaps in the heaps seen. (prometheus: counter)", 101 | status_func=_warn_if_positive), 102 | make_sensor( 103 | int, "input-bytes-total", 104 | "Number of payload bytes received in this session. (prometheus: counter)", 105 | "B"), 106 | make_sensor( 107 | int, "input-heaps-total", 108 | "Number of input heaps captured in this session. (prometheus: counter)"), 109 | make_sensor( 110 | int, "input-dumps-total", 111 | "Number of complete input dumps captured in this session. (prometheus: counter)"), 112 | make_sensor( 113 | int, "output-bytes-total", 114 | "Number of payload bytes written to chunk store in this session. (prometheus: counter)", 115 | "B"), 116 | make_sensor( 117 | int, "output-chunks-total", 118 | "Number of chunks written to chunk store in this session. (prometheus: counter)"), 119 | make_sensor( 120 | float, "output-seconds-total", 121 | "Accumulated time spent writing chunks. (prometheus: counter)", 122 | "s"), 123 | make_sensor( 124 | float, "output-seconds", 125 | "Time spent on the last chunk write.", 126 | "s"), 127 | make_sensor( 128 | int, "active-chunks", 129 | "Number of chunks currently being written. (prometheus: gauge)"), 130 | make_sensor( 131 | int, "queued-bytes", 132 | "Number of bytes that have been received but not yet written. (prometheus: gauge)"), 133 | make_sensor( 134 | int, "max-queued-bytes", 135 | "Maximum value of queued-bytes sensor for this capture block. (prometheus: gauge)") 136 | ] 137 | 138 | 139 | def device_status_sensor() -> Sensor: 140 | """Create a sensor to track device status""" 141 | return Sensor(DeviceStatus, 'device-status', 'Health sensor', 142 | default=DeviceStatus.OK, initial_status=Sensor.Status.NOMINAL, 143 | status_func=_device_status_status) 144 | 145 | 146 | def clear_io_sensors(sensors: SensorSet) -> None: 147 | """Zero the input and output counters in a sensor set""" 148 | now = time.time() 149 | for name in ['input-incomplete-heaps-total', 150 | 'input-too-old-heaps-total', 151 | 'input-missing-heaps-total', 152 | 'input-bytes-total', 153 | 'input-heaps-total', 154 | 'input-dumps-total', 155 | 'output-bytes-total', 156 | 'output-chunks-total', 157 | 'output-seconds-total', 158 | 'active-chunks', 159 | 'queued-bytes', 160 | 'max-queued-bytes']: 161 | sensor = sensors[name] 162 | sensor.set_value(sensor.stype(0), timestamp=now) 163 | 164 | 165 | @attr.s(frozen=True) 166 | class Array: 167 | """A single array being received over SPEAD. See :class:`.Rechunker` for details.""" 168 | 169 | name = attr.ib() # type: str # Excludes the prefix 170 | in_chunks = attr.ib() # type: Tuple[Tuple[int, ...], ...] 171 | out_chunks = attr.ib() # type: Tuple[Tuple[int, ...], ...] 172 | fill_value = attr.ib() # type: Any 173 | dtype = attr.ib(converter=_dtype_converter) # type: np.dtype 174 | 175 | @property 176 | def substreams(self): 177 | return int(np.product([len(c) for c in self.in_chunks])) 178 | 179 | @property 180 | def shape(self): 181 | """Shape of a single input dump""" 182 | return tuple(sum(c) for c in self.in_chunks) 183 | 184 | @property 185 | def nbytes(self): 186 | """Number of bytes in a single input dump""" 187 | return int(np.product(self.shape)) * self.dtype.itemsize 188 | 189 | 190 | def make_array(name, in_chunks: Tuple[Tuple[int, ...], ...], 191 | fill_value: Any, dtype: Any, chunk_params: ChunkParams) -> Array: 192 | """Create an :class:`Array` with computed output chunk scheme. 193 | 194 | The output chunks are determined by splitting the input chunks along axes 0 195 | and 1 (time and frequency in typical use) to produce chunks subject to the 196 | constraints of `chunk_params`. 197 | """ 198 | # Shape of a single input chunk 199 | assert in_chunks[0] == (1,) # Only one chunk in time, with one dump 200 | shape = tuple(c[0] for c in in_chunks) 201 | if chunk_params.max_channels is not None: 202 | max_dim_elements = {1: chunk_params.max_channels} 203 | else: 204 | max_dim_elements = {} 205 | # Compute the decomposition of each input chunk 206 | chunks = katdal.chunkstore.generate_chunks( 207 | shape, dtype, chunk_params.max_size, 208 | dims_to_split=(0, 1), power_of_two=True, 209 | max_dim_elements=max_dim_elements) # type: Tuple[Tuple[int, ...], ...] 210 | # Accumulate in time to make up the chunk size 211 | chunk_size = np.dtype(dtype).itemsize * np.prod([c[0] for c in chunks]) 212 | n_time = 1 213 | while (chunk_size * 2 <= chunk_params.max_size 214 | and (chunk_params.max_dumps is None or n_time * 2 <= chunk_params.max_dumps)): 215 | n_time *= 2 216 | chunk_size *= 2 217 | # the ignore is to suppress see https://github.com/python/mypy/issues/6337 218 | chunks = ((n_time,),) + chunks[1:] # type: ignore 219 | logger.info('Using chunks of shape %s (%.3fMB) for %s', 220 | tuple(c[0] for c in chunks), chunk_size / 1e6, name) 221 | # Repeat for each input chunk 222 | out_chunks = tuple(outc * len(inc) for inc, outc in zip(in_chunks, chunks)) 223 | return Array(name, in_chunks, out_chunks, fill_value, dtype) 224 | 225 | 226 | class ChunkStoreRechunker(rechunk.Rechunker): 227 | """Rechunker that outputs data to a chunk store via an executor. 228 | 229 | The name is used as the array name in the chunk store. 230 | 231 | .. note:: 232 | 233 | The :meth:`output` coroutine will return as soon as it has posted the 234 | chunk to the executor. It only blocks to acquire from the 235 | `executor_queue_space`. 236 | """ 237 | def __init__( 238 | self, 239 | executor: concurrent.futures.Executor, 240 | executor_queue_space: QueueSpace, 241 | chunk_store: katdal.chunkstore.ChunkStore, 242 | sensors: SensorSet, name: str, 243 | in_chunks: Chunks, out_chunks: Chunks, 244 | fill_value: Any, dtype: Any) -> None: 245 | super().__init__(name, in_chunks, out_chunks, fill_value, dtype) 246 | self.executor = executor 247 | self.executor_queue_space = executor_queue_space 248 | self.chunk_store = chunk_store 249 | self.chunk_store.create_array(self.name) 250 | self.sensors = sensors 251 | self._futures = set() # type: Set[asyncio.Future[float]] 252 | self._loop = asyncio.get_event_loop() 253 | 254 | def _put_chunk(self, slices: Tuple[slice, ...], value: np.ndarray) -> float: 255 | """Put a chunk into the chunk store and return statistics. 256 | 257 | This is run in a separate thread, using an executor. 258 | """ 259 | def increment_active_chunks(): 260 | self.sensors['active-chunks'].value += 1 261 | 262 | def decrement_active_chunks(): 263 | self.sensors['active-chunks'].value -= 1 264 | 265 | start = time.monotonic() 266 | self._loop.call_soon_threadsafe(increment_active_chunks) 267 | try: 268 | self.chunk_store.put_chunk(self.name, slices, value) 269 | finally: 270 | self._loop.call_soon_threadsafe(decrement_active_chunks) 271 | end = time.monotonic() 272 | return end - start 273 | 274 | def _update_stats(self, nbytes: int, nbytes_present: int, 275 | future: 'asyncio.Future[float]') -> None: 276 | """Done callback for a future running :meth:`_put_chunk`. 277 | 278 | This is run on the event loop, so can safely update sensors. It also 279 | logs any errors. 280 | """ 281 | self._futures.remove(future) 282 | self.executor_queue_space.release(nbytes) 283 | self.sensors['queued-bytes'].value -= nbytes_present 284 | try: 285 | elapsed = future.result() 286 | except asyncio.CancelledError: 287 | pass 288 | except Exception: 289 | logger.exception('Failed to write a chunk to %s', self.name) 290 | self.sensors['device-status'].value = DeviceStatus.FAIL 291 | else: 292 | self.sensors['output-chunks-total'].value += 1 293 | self.sensors['output-bytes-total'].value += nbytes_present 294 | self.sensors['output-seconds-total'].value += elapsed 295 | self.sensors['output-seconds'].value = elapsed 296 | 297 | async def output(self, offset: Offset, value: np.ndarray, present: np.ndarray) -> None: 298 | slices = tuple(slice(ofs, ofs + size) for ofs, size in zip(offset, value.shape)) 299 | await self.executor_queue_space.acquire(value.nbytes) 300 | future = asyncio.ensure_future( 301 | self._loop.run_in_executor(self.executor, self._put_chunk, slices, value)) 302 | self._futures.add(future) 303 | nbytes_present = value.nbytes * np.sum(present) // len(present) 304 | callback = functools.partial(self._update_stats, value.nbytes, nbytes_present) 305 | future.add_done_callback(callback) 306 | 307 | def out_of_order(self, received: int, seen: int) -> None: 308 | self.sensors['input-too-old-heaps-total'].value += 1 309 | 310 | async def close(self) -> None: 311 | """Close and wait for all asynchronous writes to complete.""" 312 | await super().close() 313 | # asyncio.wait is implemented by adding a done callback to each 314 | # future. Done callbacks are run in order of addition, so when 315 | # wait returns, we are guaranteed that the done callbacks have 316 | # run. 317 | if self._futures: 318 | await asyncio.wait(self._futures) 319 | 320 | 321 | class RechunkerGroup: 322 | """Collects a number of rechunkers with common input chunk scheme. 323 | 324 | The arrays need not all have the same shape. However, there must be a 325 | prefix of the axes on which they all have the same chunking scheme, and 326 | on the remaining axes there can only be a single chunk. For example, the 327 | following chunking schemes could co-exist in a group. 328 | - ((2, 2), (3, 3, 3)) 329 | - ((2, 2), (3, 3, 3), (4,), (3,)) 330 | - ((2, 2), (3, 3, 3), (6,)) 331 | 332 | Parameters 333 | ---------- 334 | executor 335 | Executor used for asynchronous writes to the chunk store. 336 | executor_queue_space 337 | :class:`QueueSpace` bounding the number of bytes that can be in flight 338 | within `executor`. 339 | chunk_store 340 | Chunk-store into which output chunks are written. 341 | sensors 342 | Sensor set containing an ``input-dumps-total`` sensor, which will 343 | be updated to reflect the highest dump index seen. 344 | prefix 345 | Prefix for naming arrays in the chunk store. It is prepended to the 346 | names given in `arrays` when storing the chunks. 347 | arrays 348 | Descriptions of the incoming arrays. 349 | """ 350 | def __init__(self, 351 | executor: concurrent.futures.Executor, 352 | executor_queue_space: QueueSpace, 353 | chunk_store: katdal.chunkstore.ChunkStore, 354 | sensors: SensorSet, prefix: str, 355 | arrays: Sequence[Array]) -> None: 356 | self.prefix = prefix 357 | self.arrays = list(arrays) 358 | self.sensors = sensors 359 | self._expected = Counter() # type: MutableMapping[Offset, int] 360 | self._seen = Counter() # type: MutableMapping[Offset, int] 361 | self._rechunkers = [ 362 | ChunkStoreRechunker(executor, executor_queue_space, 363 | chunk_store, sensors, 364 | chunk_store.join(prefix, a.name), 365 | a.in_chunks, a.out_chunks, 366 | a.fill_value, a.dtype) for a in arrays] 367 | 368 | async def add(self, offset_prefix: Offset, values: Iterable[np.ndarray]) -> None: 369 | """Add a value per array for rechunking. 370 | 371 | For each array passed to the constructor, there must be corresponding 372 | element in `values`. Each such value has an offset given by 373 | `offset_prefix` plus enough 0's to match the dimensionality. 374 | """ 375 | dump_index = offset_prefix[0] 376 | if dump_index >= self.sensors['input-dumps-total'].value: 377 | self.sensors['input-dumps-total'].value = dump_index + 1 378 | 379 | nbytes = sum(value.nbytes for value in values) 380 | self.sensors['input-heaps-total'].value += 1 381 | self.sensors['input-bytes-total'].value += nbytes 382 | queued_bytes = self.sensors['queued-bytes'].value + nbytes 383 | self.sensors['queued-bytes'].value = queued_bytes 384 | if queued_bytes > self.sensors['max-queued-bytes'].value: 385 | self.sensors['max-queued-bytes'].value = queued_bytes 386 | 387 | # Update our idea of how many heaps we've missed out on, assuming heaps 388 | # for each substream arrive in order. 389 | substream = offset_prefix[1:] 390 | old_missing = self._expected[substream] - self._seen[substream] 391 | if dump_index >= self._expected[substream]: 392 | self._expected[substream] = dump_index + 1 393 | self._seen[substream] += 1 394 | new_missing = self._expected[substream] - self._seen[substream] 395 | self.sensors['input-missing-heaps-total'].value += new_missing - old_missing 396 | 397 | for rechunker, value in zip(self._rechunkers, values): 398 | offset = offset_prefix + (0,) * (value.ndim - len(offset_prefix)) 399 | await rechunker.add(offset, value) 400 | 401 | async def get_chunk_info(self) -> Dict[str, Dict[str, Any]]: 402 | """Get the chunk information to place into telstate to describe the arrays. 403 | 404 | This closes the rechunkers (flushing partial output chunks), so no 405 | further calls to :meth:`add` should be made. 406 | """ 407 | for rechunker in self._rechunkers: 408 | await rechunker.close() 409 | return {array.name: rechunker.get_chunk_info(self.prefix) 410 | for array, rechunker in zip(self.arrays, self._rechunkers)} 411 | 412 | 413 | class SpeadWriter: 414 | """Base class to receive data over SPEAD and write it to a chunk store. 415 | 416 | It supports multiplexing between instances of :class:`RechunkerGroup` based 417 | on contents of the SPEAD heaps. This is implemented by subclassing and 418 | overriding :meth:`rechunker_group`. 419 | 420 | Parameters 421 | ---------- 422 | sensors 423 | Server sensors including all those returned by :meth:`io_sensors`. 424 | These are updated as heaps are received. 425 | rx 426 | SPEAD receiver. It should be set up with :attr:`stop_on_stop_item` set 427 | to false. :meth:`make_receiver` returns a suitable receiver with 428 | optimised memory pool allocations. 429 | """ 430 | def __init__(self, sensors: SensorSet, rx: spead2.recv.asyncio.Stream) -> None: 431 | self.sensors = sensors 432 | self.rx = rx 433 | 434 | async def run(self, stops: int = None) -> None: 435 | """Run the receiver. 436 | 437 | Parameters 438 | ---------- 439 | stops 440 | If specified, this method will stop once it has seen `stops` stop 441 | items. Otherwise, it will run until cancelled or :meth:`stop` is 442 | called. 443 | """ 444 | first = True 445 | n_stop = 0 446 | ig = spead2.ItemGroup() 447 | async for heap in self.rx: 448 | if first: 449 | self.first_heap() 450 | first = False 451 | updated = {} # type: Dict[str, spead2.Item] 452 | if heap.is_end_of_stream(): 453 | n_stop += 1 454 | if stops is not None and n_stop == stops: 455 | self.rx.stop() 456 | break 457 | else: 458 | updated = {} 459 | elif isinstance(heap, spead2.recv.IncompleteHeap): 460 | self.sensors['input-incomplete-heaps-total'].value += 1 461 | else: 462 | try: 463 | updated = ig.update(heap) 464 | except Exception: 465 | logger.exception('Invalid heap') 466 | 467 | if 'timestamp' in updated: 468 | channel0 = int(updated['frequency'].value) 469 | dump_index = int(updated['dump_index'].value) 470 | group = self.rechunker_group(updated) 471 | # Check if subclass decided the heap was good 472 | if group is not None: 473 | # Get values and add time dimension 474 | values = [ig[array.name].value[np.newaxis, ...] for array in group.arrays] 475 | await group.add((dump_index, channel0), values) 476 | 477 | def stop(self) -> None: 478 | """Gracefully stop :meth:`run`.""" 479 | self.rx.stop() 480 | 481 | def first_heap(self): 482 | """Callback to notify about the first heap being received. 483 | 484 | The default does nothing, but may be overridden 485 | """ 486 | pass # pragma: no cover 487 | 488 | def rechunker_group(self, updated: Dict[str, spead2.Item]) -> Optional[RechunkerGroup]: 489 | """Obtain the rechunker group associated with a particular heap. 490 | 491 | This must be implemented in derived classes. 492 | """ 493 | raise NotImplementedError # pragma: no cover 494 | 495 | 496 | def chunks_from_telstate(telstate): 497 | """Determine input chunking scheme for visibility data from telescope state. 498 | 499 | The provided `telstate` must be a view of the appropriate stream. 500 | 501 | Raises 502 | ------ 503 | KeyError 504 | if any of the necessary telescope state keys are missing. 505 | """ 506 | try: 507 | n_chans = telstate['n_chans'] 508 | n_bls = telstate['n_bls'] 509 | n_chans_per_substream = telstate['n_chans_per_substream'] 510 | except KeyError: 511 | logger.error("Unable to find sizing params (n_bls, n_chans, " 512 | "or n_chans_per_substream) in telstate.") 513 | raise 514 | 515 | n_substreams = n_chans // n_chans_per_substream 516 | return ((1,), (n_chans_per_substream,) * n_substreams, (n_bls,)) 517 | 518 | 519 | def write_telstate(telstate: katsdptelstate.TelescopeState, 520 | input_name: str, output_name: str, rename_src: Mapping[str, str], 521 | s3_endpoint_url: Optional[str]) -> None: 522 | """Write telstate information about output stream.""" 523 | telstate_out = telstate.view(output_name) 524 | if output_name != input_name: 525 | telstate_out['inherit'] = input_name 526 | if rename_src: 527 | telstate_in = telstate.view(input_name) 528 | src_streams_in = telstate_in['src_streams'] 529 | src_streams_out = [rename_src.get(stream, stream) for stream in src_streams_in] 530 | telstate_out['src_streams'] = src_streams_out 531 | if s3_endpoint_url is not None: 532 | telstate_out['s3_endpoint_url'] = s3_endpoint_url 533 | 534 | 535 | def make_receiver(endpoints: Sequence[Endpoint], 536 | arrays: Sequence[Array], 537 | interface_address: Optional[str], 538 | ibv: bool, 539 | max_heaps_per_substream: int = 2, 540 | ring_heaps_per_substream: int = 2) -> spead2.recv.asyncio.Stream: 541 | """Generate a SPEAD receiver suitable for :class:`SpeadWriter`. 542 | 543 | Parameters 544 | ---------- 545 | endpoints 546 | Multicast UDP endpoints to subscribe to 547 | arrays 548 | Arrays that will arrive in each heap 549 | interface_address 550 | If given, IP address of a local interface to bind to 551 | ibv 552 | If true, use ibverbs acceleration (see SPEAD documentation) 553 | max_heaps_per_substream 554 | Number of simultaneously incomplete SPEAD heaps allowed per substream 555 | ring_heaps_per_substream 556 | Number of complete heaps allowed in the SPEAD ringbuffer, per substream 557 | """ 558 | n_substreams = arrays[0].substreams 559 | 560 | max_heaps = max_heaps_per_substream * n_substreams 561 | ring_heaps = ring_heaps_per_substream * n_substreams 562 | n_memory_buffers = max_heaps + ring_heaps + 2 563 | heap_size = sum(a.nbytes // a.substreams for a in arrays) 564 | memory_pool = spead2.MemoryPool(heap_size, heap_size + 4096, 565 | n_memory_buffers, n_memory_buffers) 566 | rx = spead2.recv.asyncio.Stream( 567 | spead2.ThreadPool(), 568 | spead2.recv.StreamConfig( 569 | max_heaps=max_heaps, 570 | memory_allocator=memory_pool, 571 | memcpy=spead2.MEMCPY_NONTEMPORAL, 572 | stop_on_stop_item=False 573 | ), 574 | spead2.recv.RingStreamConfig( 575 | heaps=ring_heaps, 576 | contiguous_only=False 577 | ) 578 | ) 579 | if ibv: 580 | # The main scripts check this; the assert keeps mypy happy 581 | assert interface_address is not None, "Interface address is required when using ibverbs" 582 | endpoint_tuples = [(endpoint.host, endpoint.port) for endpoint in endpoints] 583 | rx.add_udp_ibv_reader( 584 | spead2.recv.UdpIbvConfig( 585 | endpoints=endpoint_tuples, 586 | interface_address=interface_address, 587 | buffer_size=64 * 1024**2 588 | ) 589 | ) 590 | else: 591 | for endpoint in endpoints: 592 | if interface_address is not None: 593 | rx.add_udp_reader(endpoint.host, endpoint.port, 594 | buffer_size=heap_size + 4096, 595 | interface_address=interface_address) 596 | else: 597 | rx.add_udp_reader(endpoint.port, bind_hostname=endpoint.host, 598 | buffer_size=heap_size + 4096) 599 | return rx 600 | 601 | 602 | class _DictAction(argparse.Action): 603 | """Argparse action that takes argument of form KEY:VALUE and updates a dict with it. 604 | 605 | The input value is expected to be a 2-tuple, so the type must be one that 606 | generates such a tuple. 607 | """ 608 | def __init__(self, option_strings, dest, nargs=None, const=None, default=None, 609 | type=None, choices=None, required=False, help=None, metavar=None): 610 | # This code is somewhat cargo-culted from _AppendAction in the argparse 611 | # source. 612 | if nargs == 0: 613 | raise ValueError('nargs for dict action must be > 0') 614 | if const is not None: 615 | raise ValueError('const is not supported for dict action') 616 | super().__init__( 617 | option_strings=option_strings, 618 | dest=dest, 619 | nargs=nargs, 620 | const=const, 621 | default=default, 622 | type=type, 623 | choices=choices, 624 | required=required, 625 | help=help, 626 | metavar=metavar) 627 | 628 | def __call__(self, parser, namespace, values, option_string=None): 629 | d = getattr(namespace, self.dest, None) 630 | if d is None: 631 | d = {} 632 | else: 633 | d = copy.copy(d) 634 | d.update([values]) 635 | setattr(namespace, self.dest, d) 636 | 637 | 638 | def _split_colon(value): 639 | "Splits a KEY:VALUE string into its two parts""" 640 | parts = value.split(':') 641 | if len(parts) != 2: 642 | raise argparse.ArgumentTypeError('Expected exactly one colon in {!r}'.format(value)) 643 | return parts 644 | 645 | 646 | def add_common_args(parser: katsdpservices.ArgumentParser) -> None: 647 | """Inject command-line arguments that are common to the writers""" 648 | group = parser.add_argument_group('Chunk store options') 649 | group.add_argument('--npy-path', metavar='PATH', 650 | help='Write NPY files to this directory instead of ' 651 | 'directly to object store') 652 | group.add_argument('--s3-endpoint-url', metavar='URL', 653 | help='URL of S3 endpoint') 654 | group.add_argument('--s3-access-key', metavar='KEY', 655 | help='Access key for S3') 656 | group.add_argument('--s3-secret-key', metavar='KEY', 657 | help='Secret key for S3') 658 | group.add_argument('--s3-expiry-days', type=int, metavar='DAYS', 659 | help='Days after which to expire the data') 660 | group.add_argument('--s3-write-url', metavar='URL', 661 | help='URL of S3 endpoint used for writing, overriding --s3-endpoint-url') 662 | group.add_argument('--direct-write', action='store_true', 663 | help='Use O_DIRECT for writing to .npy files') 664 | 665 | group = parser.add_argument_group('Instrumentation options') 666 | group.add_argument('--dashboard-port', type=int, metavar='PORT', 667 | help='Port for dashboard [disabled]') 668 | group.add_argument('--external-hostname', default=socket.getfqdn(), metavar='HOSTNAME', 669 | help='Hostname through which the dashboard will be accessed [%(default)s]') 670 | group.add_argument('--dashboard-allow-websocket-origin', action='append', metavar='ORIGIN', 671 | help='Origin at which the dashboard may be accessed' 672 | ' (may be repeated) [auto]') 673 | 674 | parser.add_argument('--new-name', metavar='NAME', 675 | help='Name for the output stream') 676 | parser.add_argument('--rename-src', metavar='OLD-NAME:NEW-NAME', 677 | type=_split_colon, action=_DictAction, 678 | help='Rewrite src_streams for new name (repeat for each rename)') 679 | parser.add_argument('--obj-size-mb', type=float, default=20., metavar='MB', 680 | help='Target object size in MB [%(default)s]') 681 | parser.add_argument('--obj-max-channels', type=int, metavar='CHANNELS', 682 | help='Maximum number of channels per object [no limit]') 683 | parser.add_argument('--obj-max-dumps', type=int, metavar='DUMPS', default=16, 684 | help='Maximum number of dumps per object [%(default)s]') 685 | parser.add_argument('--workers', type=int, default=50, 686 | help='Threads to use for writing chunks [%(default)s]') 687 | parser.add_argument('--buffer-dumps', type=int, default=20, metavar='DUMPS', 688 | help='Number of full dumps to buffer in write queue') 689 | parser.add_aiomonitor_arguments() 690 | parser.add_argument('-p', '--port', type=int, metavar='N', 691 | help='KATCP host port [%(default)s]') 692 | parser.add_argument('-a', '--host', default="", metavar='HOST', 693 | help='KATCP host address [all hosts]') 694 | 695 | 696 | def chunk_store_from_args(parser: argparse.ArgumentParser, 697 | args: argparse.Namespace) -> katdal.chunkstore.ChunkStore: 698 | """Create a chunk store from user-provided arguments. 699 | 700 | This checks that a consistent set of the arguments created by 701 | :meth:`add_common_arguments` was given by the user. If not, it calls 702 | ``parser.error`` (which terminates the process). Otherwise, it returns a 703 | new chunk store (any exceptions from the chunk store constructor are passed 704 | through. 705 | """ 706 | if not args.npy_path: 707 | for arg_name in ['s3_endpoint_url', 's3_access_key', 's3_secret_key']: 708 | if not getattr(args, arg_name): 709 | parser.error('--{} is required if --npy-path is not given' 710 | .format(arg_name.replace('_', '-'))) 711 | # Real parser.error kills the process, but the unit tests mock 712 | # it and so we want to ensure that we don't carry on. 713 | else: 714 | if args.s3_write_url: 715 | parser.error("--s3-write-url and --npy-path cannot be used together") 716 | if not os.path.isdir(args.npy_path): 717 | parser.error("Specified --npy-path ({}) does not exist.".format(args.npy_path)) 718 | 719 | if args.npy_path: 720 | chunk_store = katdal.chunkstore_npy.NpyFileChunkStore( 721 | args.npy_path, direct_write=args.direct_write) 722 | else: 723 | chunk_store = katdal.chunkstore_s3.S3ChunkStore( 724 | args.s3_write_url or args.s3_endpoint_url, 725 | credentials=(args.s3_access_key, args.s3_secret_key), 726 | expiry_days=args.s3_expiry_days or 0) 727 | return chunk_store 728 | --------------------------------------------------------------------------------