├── .github └── workflows │ └── main.yml ├── .gitignore ├── .readthedocs.yaml ├── COPYING-LGPL ├── README.md ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── _templates │ ├── custom-class-template.rst │ └── custom-module-template.rst │ ├── advanced.rst │ ├── api.rst │ ├── conf.py │ ├── developers.rst │ ├── faq.rst │ ├── index.rst │ └── quickstart.rst ├── pyproject.toml ├── sigmf ├── __init__.py ├── apps │ ├── __init__.py │ └── convert_wav.py ├── archive.py ├── archivereader.py ├── error.py ├── schema-collection.json ├── schema-meta.json ├── schema.py ├── sigmf_hash.py ├── sigmffile.py ├── utils.py └── validate.py └── tests ├── __init__.py ├── conftest.py ├── test_archive.py ├── test_archivereader.py ├── test_collection.py ├── test_ncd.py ├── test_sigmffile.py ├── test_utils.py ├── test_validation.py └── testdata.py /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: 4 | push: 5 | pull_request: 6 | types: [opened, synchronize] 7 | schedule: 8 | - cron: "5 5 * * 5" # test every friday @ 0505 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-22.04 13 | strategy: 14 | matrix: 15 | python-version: ["3.7", "3.9", "3.11", "3.13"] 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install .[test,apps] 26 | - name: Test with pytest 27 | run: | 28 | coverage run 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # temp files 2 | __pycache__/ 3 | *.swp 4 | *.py[cod] 5 | .cache 6 | 7 | # packaging related 8 | dist/ 9 | build/ 10 | eggs/ 11 | .eggs/ 12 | SigMF.egg-info/* 13 | 14 | # test related 15 | .coverage 16 | .hypothesis/ 17 | .tox/ 18 | coverage.xml 19 | pytest.xml 20 | htmlcov/* 21 | 22 | # docs related 23 | docs/_build/ 24 | docs/source/_autosummary/ 25 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version, and other tools you might need 8 | build: 9 | os: ubuntu-24.04 10 | tools: 11 | python: "3.13" 12 | 13 | # declare the Python requirements required to build your documentation 14 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 15 | python: 16 | install: 17 | - method: pip 18 | path: . 19 | extra_requirements: 20 | - test 21 | - apps 22 | - requirements: docs/requirements.txt 23 | 24 | # Build documentation in the "docs/" directory with Sphinx 25 | sphinx: 26 | configuration: docs/source/conf.py 27 | -------------------------------------------------------------------------------- /COPYING-LGPL: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Rendered SigMF Logo](https://raw.githubusercontent.com/sigmf/SigMF/refs/heads/main/logo/sigmf_logo.png) 2 | 3 | [![PyPI Version Shield](https://img.shields.io/pypi/v/sigmf)](https://pypi.org/project/SigMF/) 4 | [![Build Status Shield](https://img.shields.io/github/actions/workflow/status/sigmf/sigmf-python/main.yml)](https://github.com/sigmf/sigmf-python/actions?query=branch%3Amain) 5 | [![License Shield](https://img.shields.io/pypi/l/sigmf)](https://en.wikipedia.org/wiki/GNU_Lesser_General_Public_License) 6 | [![Documentation Shield](https://img.shields.io/readthedocs/sigmf)](https://sigmf.readthedocs.io/en/latest/) 7 | [![PyPI Downloads Shield](https://img.shields.io/pypi/dm/sigmf)](https://pypi.org/project/SigMF/) 8 | 9 | The `sigmf` library makes it easy to interact with Signal Metadata Format 10 | (SigMF) recordings. This library is compatible with Python 3.7-3.13 and is distributed 11 | freely under the terms GNU Lesser GPL v3 License. 12 | 13 | This module follows the SigMF specification [html](https://sigmf.org/)/[pdf](https://sigmf.github.io/SigMF/sigmf-spec.pdf) from the [spec repository](https://github.com/sigmf/SigMF). 14 | 15 | To install the latest PyPI release, install from pip: 16 | 17 | ```bash 18 | pip install sigmf 19 | ``` 20 | 21 | **[Please visit the documentation for examples & more info.](https://sigmf.readthedocs.io/en/latest/)** 22 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # pinned 2025-01-15 2 | sphinx==8.1.3 3 | sphinx-rtd-theme==3.0.2 4 | -------------------------------------------------------------------------------- /docs/source/_templates/custom-class-template.rst: -------------------------------------------------------------------------------- 1 | {{ fullname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. autoclass:: {{ objname }} 6 | :members: 7 | :show-inheritance: 8 | :inherited-members: 9 | :special-members: __call__, __add__, __mul__ 10 | 11 | {% block methods %} 12 | {% if methods %} 13 | .. rubric:: {{ _('Methods') }} 14 | 15 | .. autosummary:: 16 | :nosignatures: 17 | {% for item in methods %} 18 | {%- if not item.startswith('_') %} 19 | ~{{ name }}.{{ item }} 20 | {%- endif -%} 21 | {%- endfor %} 22 | {% endif %} 23 | {% endblock %} 24 | 25 | {% block attributes %} 26 | {% if attributes %} 27 | .. rubric:: {{ _('Attributes') }} 28 | 29 | .. autosummary:: 30 | {% for item in attributes %} 31 | ~{{ name }}.{{ item }} 32 | {%- endfor %} 33 | {% endif %} 34 | {% endblock %} 35 | -------------------------------------------------------------------------------- /docs/source/_templates/custom-module-template.rst: -------------------------------------------------------------------------------- 1 | {{ fullname | escape | underline}} 2 | 3 | .. automodule:: {{ fullname }} 4 | 5 | {% block attributes %} 6 | {% if attributes %} 7 | .. rubric:: Module attributes 8 | 9 | .. autosummary:: 10 | :toctree: 11 | {% for item in attributes %} 12 | {{ item }} 13 | {%- endfor %} 14 | {% endif %} 15 | {% endblock %} 16 | 17 | {% block functions %} 18 | {% if functions %} 19 | .. rubric:: {{ _('Functions') }} 20 | 21 | .. autosummary:: 22 | :toctree: 23 | :nosignatures: 24 | {% for item in functions %} 25 | {{ item }} 26 | {%- endfor %} 27 | {% endif %} 28 | {% endblock %} 29 | 30 | {% block classes %} 31 | {% if classes %} 32 | .. rubric:: {{ _('Classes') }} 33 | 34 | .. autosummary:: 35 | :toctree: 36 | :template: custom-class-template.rst 37 | :nosignatures: 38 | {% for item in classes %} 39 | {{ item }} 40 | {%- endfor %} 41 | {% endif %} 42 | {% endblock %} 43 | 44 | {% block exceptions %} 45 | {% if exceptions %} 46 | .. rubric:: {{ _('Exceptions') }} 47 | 48 | .. autosummary:: 49 | :toctree: 50 | {% for item in exceptions %} 51 | {{ item }} 52 | {%- endfor %} 53 | {% endif %} 54 | {% endblock %} 55 | 56 | {% block modules %} 57 | {% if modules %} 58 | .. autosummary:: 59 | :toctree: 60 | :template: custom-module-template.rst 61 | :recursive: 62 | {% for item in modules %} 63 | {{ item }} 64 | {%- endfor %} 65 | {% endif %} 66 | {% endblock %} 67 | -------------------------------------------------------------------------------- /docs/source/advanced.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Advanced 3 | ======== 4 | 5 | Here we discuss more advanced techniques for working with **collections** and 6 | **archives**. 7 | 8 | ------------------------------ 9 | Iterate over SigMF Annotations 10 | ------------------------------ 11 | 12 | Here we will load a SigMF dataset and iterate over the annotations. You can get 13 | the recording of the SigMF logo used in this example `from the specification 14 | `_. 15 | 16 | .. code-block:: python 17 | 18 | from sigmf import SigMFFile, sigmffile 19 | 20 | # Load a dataset 21 | path = 'logo/sigmf_logo' # extension is optional 22 | signal = sigmffile.fromfile(path) 23 | 24 | # Get some metadata and all annotations 25 | sample_rate = signal.get_global_field(SigMFFile.SAMPLE_RATE_KEY) 26 | sample_count = signal.sample_count 27 | signal_duration = sample_count / sample_rate 28 | annotations = signal.get_annotations() 29 | 30 | # Iterate over annotations 31 | for adx, annotation in enumerate(annotations): 32 | annotation_start_idx = annotation[SigMFFile.START_INDEX_KEY] 33 | annotation_length = annotation[SigMFFile.LENGTH_INDEX_KEY] 34 | annotation_comment = annotation.get(SigMFFile.COMMENT_KEY, "[annotation {}]".format(adx)) 35 | 36 | # Get capture info associated with the start of annotation 37 | capture = signal.get_capture_info(annotation_start_idx) 38 | freq_center = capture.get(SigMFFile.FREQUENCY_KEY, 0) 39 | freq_min = freq_center - 0.5*sample_rate 40 | freq_max = freq_center + 0.5*sample_rate 41 | 42 | # Get frequency edges of annotation (default to edges of capture) 43 | freq_start = annotation.get(SigMFFile.FLO_KEY) 44 | freq_stop = annotation.get(SigMFFile.FHI_KEY) 45 | 46 | # Get the samples corresponding to annotation 47 | samples = signal.read_samples(annotation_start_idx, annotation_length) 48 | 49 | # Do something with the samples & metadata for each annotation here 50 | 51 | ------------------------------------- 52 | Save a Collection of SigMF Recordings 53 | ------------------------------------- 54 | 55 | First, create a single SigMF Recording and save it to disk: 56 | 57 | .. code-block:: python 58 | 59 | import datetime as dt 60 | import numpy as np 61 | import sigmf 62 | from sigmf import SigMFFile 63 | from sigmf.utils import get_data_type_str, get_sigmf_iso8601_datetime_now 64 | 65 | # suppose we have a complex timeseries signal 66 | data = np.zeros(1024, dtype=np.complex64) 67 | 68 | # write those samples to file in cf32_le 69 | data.tofile('example_cf32.sigmf-data') 70 | 71 | # create the metadata 72 | meta = SigMFFile( 73 | data_file='example_cf32.sigmf-data', # extension is optional 74 | global_info = { 75 | SigMFFile.DATATYPE_KEY: get_data_type_str(data), # in this case, 'cf32_le' 76 | SigMFFile.SAMPLE_RATE_KEY: 48000, 77 | SigMFFile.AUTHOR_KEY: 'jane.doe@domain.org', 78 | SigMFFile.DESCRIPTION_KEY: 'All zero complex float32 example file.', 79 | } 80 | ) 81 | 82 | # create a capture key at time index 0 83 | meta.add_capture(0, metadata={ 84 | SigMFFile.FREQUENCY_KEY: 915000000, 85 | SigMFFile.DATETIME_KEY: get_sigmf_iso8601_datetime_now(), 86 | }) 87 | 88 | # add an annotation at sample 100 with length 200 & 10 KHz width 89 | meta.add_annotation(100, 200, metadata = { 90 | SigMFFile.FLO_KEY: 914995000.0, 91 | SigMFFile.FHI_KEY: 915005000.0, 92 | SigMFFile.COMMENT_KEY: 'example annotation', 93 | }) 94 | 95 | # check for mistakes & write to disk 96 | meta.tofile('example_cf32.sigmf-meta') # extension is optional 97 | 98 | Now lets add another SigMF Recording and associate them with a SigMF Collection: 99 | 100 | .. code-block:: python 101 | 102 | from sigmf import SigMFCollection 103 | 104 | data_ci16 = np.zeros(1024, dtype=np.complex64) 105 | 106 | #rescale and save as a complex int16 file: 107 | data_ci16 *= pow(2, 15) 108 | data_ci16.view(np.float32).astype(np.int16).tofile('example_ci16.sigmf-data') 109 | 110 | # create the metadata for the second file 111 | meta_ci16 = SigMFFile( 112 | data_file='example_ci16.sigmf-data', # extension is optional 113 | global_info = { 114 | SigMFFile.DATATYPE_KEY: 'ci16_le', # get_data_type_str() is only valid for numpy types 115 | SigMFFile.SAMPLE_RATE_KEY: 48000, 116 | SigMFFile.DESCRIPTION_KEY: 'All zero complex int16 file.', 117 | } 118 | ) 119 | meta_ci16.add_capture(0, metadata=meta.get_capture_info(0)) 120 | meta_ci16.tofile('example_ci16.sigmf-meta') 121 | 122 | collection = SigMFCollection(['example_cf32.sigmf-meta', 'example_ci16.sigmf-meta'], 123 | metadata = {'collection': { 124 | SigMFCollection.AUTHOR_KEY: 'sigmf@sigmf.org', 125 | SigMFCollection.DESCRIPTION_KEY: 'Collection of two all zero files.', 126 | } 127 | } 128 | ) 129 | streams = collection.get_stream_names() 130 | sigmf = [collection.get_SigMFFile(stream) for stream in streams] 131 | collection.tofile('example_zeros.sigmf-collection') 132 | 133 | The SigMF Collection and its associated Recordings can now be loaded like this: 134 | 135 | .. code-block:: python 136 | 137 | from sigmf import sigmffile 138 | collection = sigmffile.fromfile('example_zeros') 139 | ci16_sigmffile = collection.get_SigMFFile(stream_name='example_ci16') 140 | cf32_sigmffile = collection.get_SigMFFile(stream_name='example_cf32') 141 | 142 | ----------------------------------------------- 143 | Load a SigMF Archive and slice without untaring 144 | ----------------------------------------------- 145 | 146 | Since an *archive* is merely a tarball (uncompressed), and since there any many 147 | excellent tools for manipulating tar files, it's fairly straightforward to 148 | access the *data* part of a SigMF archive without un-taring it. This is a 149 | compelling feature because **1** archives make it harder for the ``-data`` and 150 | the ``-meta`` to get separated, and **2** some datasets are so large that it 151 | can be impractical (due to available disk space, or slow network speeds if the 152 | archive file resides on a network file share) or simply obnoxious to untar it 153 | first. 154 | 155 | :: 156 | 157 | >>> import sigmf 158 | >>> arc = sigmf.SigMFArchiveReader('/src/LTE.sigmf') 159 | >>> arc.shape 160 | (15379532,) 161 | >>> arc.ndim 162 | 1 163 | >>> arc[:10] 164 | array([-20.+11.j, -21. -6.j, -17.-20.j, -13.-52.j, 0.-75.j, 22.-58.j, 165 | 48.-44.j, 49.-60.j, 31.-56.j, 23.-47.j], dtype=complex64) 166 | 167 | The preceeding example exhibits another feature of this approach; the archive 168 | ``LTE.sigmf`` is actually ``complex-int16``'s on disk, for which there is no 169 | corresponding type in ``numpy``. However, the ``.sigmffile`` member keeps track of 170 | this, and converts the data to ``numpy.complex64`` *after* slicing it, that is, 171 | after reading it from disk. 172 | 173 | :: 174 | 175 | >>> arc.sigmffile.get_global_field(sigmf.SigMFFile.DATATYPE_KEY) 176 | 'ci16_le' 177 | 178 | >>> arc.sigmffile._memmap.dtype 179 | dtype('int16') 180 | 181 | >>> arc.sigmffile._return_type 182 | '>> import sigmf, io 193 | >>> sigmf_bytes = io.BytesIO(open('/src/LTE.sigmf', 'rb').read()) 194 | >>> arc = sigmf.SigMFArchiveReader(archive_buffer=sigmf_bytes) 195 | >>> arc[:10] 196 | array([-20.+11.j, -21. -6.j, -17.-20.j, -13.-52.j, 0.-75.j, 22.-58.j, 197 | 48.-44.j, 49.-60.j, 31.-56.j, 23.-47.j], dtype=complex64) -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | SigMF API 3 | ========= 4 | 5 | .. autosummary:: 6 | :toctree: _autosummary 7 | :template: custom-module-template.rst 8 | :recursive: 9 | 10 | sigmf.apps.convert_wav 11 | sigmf.archive 12 | sigmf.archivereader 13 | sigmf.error 14 | sigmf.schema 15 | sigmf.sigmf_hash 16 | sigmf.sigmffile 17 | sigmf.utils 18 | sigmf.validate 19 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | """Configuration file for the Sphinx documentation builder.""" 7 | 8 | import datetime 9 | import re 10 | import sys 11 | from pathlib import Path 12 | 13 | # parse info from project files 14 | 15 | root = Path(__file__).parent.parent.parent 16 | with open(root / "sigmf" / "__init__.py", "r") as handle: 17 | init = handle.read() 18 | toolversion = re.search(r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', init).group(1) 19 | specversion = re.search(r'__specification__\s*=\s*[\'"]([^\'"]*)[\'"]', init).group(1) 20 | 21 | # autodoc needs special pathing 22 | sys.path.append(str(root)) 23 | 24 | # -- Project information 25 | 26 | project = "sigmf" 27 | author = "Multiple Authors" 28 | copyright = f"2017-{datetime.date.today().year}, {author}" 29 | 30 | release = toolversion 31 | version = toolversion 32 | 33 | # -- General configuration 34 | 35 | extensions = [ 36 | "sphinx.ext.autodoc", 37 | "sphinx.ext.autosummary", 38 | "sphinx.ext.doctest", 39 | "sphinx.ext.duration", 40 | "sphinx.ext.intersphinx", 41 | "sphinx.ext.napoleon", # allows numpy-style docstrings 42 | ] 43 | 44 | intersphinx_mapping = { 45 | "python": ("https://docs.python.org/3/", None), 46 | "sphinx": ("https://www.sphinx-doc.org/en/master/", None), 47 | } 48 | intersphinx_disabled_domains = ["std"] 49 | 50 | templates_path = ["_templates"] 51 | 52 | # -- Options for HTML output 53 | 54 | html_theme = "sphinx_rtd_theme" 55 | html_favicon = "https://raw.githubusercontent.com/wiki/sigmf/SigMF/logo/logo-icon-32-folder.png" 56 | html_logo = "https://raw.githubusercontent.com/sigmf/SigMF/refs/heads/main/logo/sigmf_logo.svg" 57 | 58 | # -- Options for EPUB output 59 | 60 | epub_show_urls = "footnote" 61 | 62 | # Method to use variables within rst files 63 | # https://stackoverflow.com/a/69211912/760099 64 | 65 | variables_to_export = [ 66 | "toolversion", 67 | "specversion", 68 | ] 69 | frozen_locals = dict(locals()) 70 | rst_epilog = '\n'.join(map(lambda x: f".. |{x}| replace:: {frozen_locals[x]}", variables_to_export)) 71 | del frozen_locals -------------------------------------------------------------------------------- /docs/source/developers.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Developers 3 | ========== 4 | 5 | This page is for developers of the ``sigmf-python`` module. 6 | 7 | ------- 8 | Install 9 | ------- 10 | 11 | To install the latest git release, build from source: 12 | 13 | .. code-block:: console 14 | 15 | $ git clone https://github.com/sigmf/sigmf-python.git 16 | $ cd sigmf-python 17 | $ pip install . 18 | 19 | ------- 20 | Testing 21 | ------- 22 | 23 | This library contains many tests in the ``tests/`` folder. These can all be run locally: 24 | 25 | .. code-block:: console 26 | 27 | $ coverage run 28 | 29 | Or tests can be run within a temporary environment on all supported python versions: 30 | 31 | .. code-block:: console 32 | 33 | $ tox run 34 | 35 | To run a single (perhaps new) test that may be needed verbosely: 36 | 37 | .. code-block:: console 38 | 39 | $ pytest -rA tests/test_archive.py 40 | 41 | To lint the entire project and get suggested changes: 42 | 43 | .. code-block:: console 44 | 45 | $ pylint sigmf tests 46 | 47 | To autoformat the entire project according to our coding standard: 48 | 49 | .. code-block:: console 50 | 51 | $ black sigmf tests # autoformat entire project 52 | $ isort sigmf tests # format imports for entire project 53 | 54 | ---- 55 | Docs 56 | ---- 57 | 58 | To build the docs and host locally: 59 | 60 | .. code-block:: console 61 | 62 | $ cd docs 63 | $ make html 64 | $ cd build/html/ 65 | $ python3 -m http.server 66 | 67 | -------------- 68 | Find an Issue? 69 | -------------- 70 | 71 | Issues can be addressed by opening an `issue 72 | `_ or by forking the project and 73 | submitting a `pull request `_. 74 | -------------------------------------------------------------------------------- /docs/source/faq.rst: -------------------------------------------------------------------------------- 1 | ========================== 2 | Frequently Asked Questions 3 | ========================== 4 | 5 | .. contents:: 6 | :local: 7 | 8 | .. 9 | Frequently asked questions should be questions that actually got asked. 10 | Formulate them as a question and an answer. 11 | Consider that the answer is best as a reference to another place in the documentation. 12 | 13 | --------------------------- 14 | Is this a GNU Radio effort? 15 | --------------------------- 16 | 17 | *No*, this is not a GNU Radio-specific effort. 18 | This effort first emerged from a group of GNU Radio core 19 | developers, but the goal of the project to provide a standard that will be 20 | useful to anyone and everyone, regardless of tool or workflow. 21 | 22 | -------------------------------------------- 23 | Is this specific to wireless communications? 24 | -------------------------------------------- 25 | 26 | *No*, similar to the response, above, the goal is to create something that is 27 | generally applicable to *signal processing*, regardless of whether or not the 28 | application is communications related. 29 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Welcome to SigMF! 3 | ================= 4 | 5 | **SigMF** is a Python library for working with radio recordings in 6 | ``.sigmf`` format according to the `SigMF standard `_. 7 | It offers a *simple* and *intuitive* API for Python developers. 8 | 9 | .. 10 | Note: The toolversion & specversion below are replaced dynamically during build. 11 | 12 | This documentation is for version |toolversion| of the library, which is 13 | compatible with version |specversion| of the SigMF specification. 14 | 15 | To get started, see the :doc:`quickstart` section or learn how to :ref:`install` the library. 16 | 17 | ----- 18 | 19 | .. toctree:: 20 | :maxdepth: 1 21 | :caption: Getting Started 22 | :hidden: 23 | 24 | quickstart 25 | advanced 26 | developers 27 | 28 | .. toctree:: 29 | :maxdepth: 1 30 | :caption: Community 31 | :hidden: 32 | 33 | faq 34 | 35 | .. toctree:: 36 | :maxdepth: 1 37 | :caption: API Reference 38 | :hidden: 39 | 40 | api 41 | -------------------------------------------------------------------------------- /docs/source/quickstart.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Quickstart 3 | ========== 4 | 5 | Here we discuss how to do all basic operations with SigMF. 6 | 7 | .. _install: 8 | 9 | ------- 10 | Install 11 | ------- 12 | 13 | To install the latest PyPi release, install from pip: 14 | 15 | .. code-block:: console 16 | 17 | $ pip install sigmf 18 | 19 | ---------------------- 20 | Read a SigMF Recording 21 | ---------------------- 22 | 23 | .. code-block:: python 24 | 25 | import sigmf 26 | handle = sigmf.sigmffile.fromfile("example.sigmf") 27 | handle.read_samples() # returns all timeseries data 28 | handle.get_global_info() # returns 'global' dictionary 29 | handle.get_captures() # returns list of 'captures' dictionaries 30 | handle.get_annotations() # returns list of all annotations 31 | handle[10:50] # return memory slice of samples 10 through 50 32 | 33 | ----------------------------------- 34 | Verify SigMF Integrity & Compliance 35 | ----------------------------------- 36 | 37 | .. code-block:: console 38 | 39 | $ sigmf_validate example.sigmf 40 | 41 | --------------------------------------- 42 | Save a Numpy array as a SigMF Recording 43 | --------------------------------------- 44 | 45 | .. code-block:: python 46 | 47 | import numpy as np 48 | from sigmf import SigMFFile 49 | from sigmf.utils import get_data_type_str, get_sigmf_iso8601_datetime_now 50 | 51 | # suppose we have a complex timeseries signal 52 | data = np.zeros(1024, dtype=np.complex64) 53 | 54 | # write those samples to file in cf32_le 55 | data.tofile('example_cf32.sigmf-data') 56 | 57 | # create the metadata 58 | meta = SigMFFile( 59 | data_file='example_cf32.sigmf-data', # extension is optional 60 | global_info = { 61 | SigMFFile.DATATYPE_KEY: get_data_type_str(data), # in this case, 'cf32_le' 62 | SigMFFile.SAMPLE_RATE_KEY: 48000, 63 | SigMFFile.AUTHOR_KEY: 'jane.doe@domain.org', 64 | SigMFFile.DESCRIPTION_KEY: 'All zero complex float32 example file.', 65 | } 66 | ) 67 | 68 | # create a capture key at time index 0 69 | meta.add_capture(0, metadata={ 70 | SigMFFile.FREQUENCY_KEY: 915000000, 71 | SigMFFile.DATETIME_KEY: get_sigmf_iso8601_datetime_now(), 72 | }) 73 | 74 | # add an annotation at sample 100 with length 200 & 10 KHz width 75 | meta.add_annotation(100, 200, metadata = { 76 | SigMFFile.FLO_KEY: 914995000.0, 77 | SigMFFile.FHI_KEY: 915005000.0, 78 | SigMFFile.COMMENT_KEY: 'example annotation', 79 | }) 80 | 81 | # check for mistakes & write to disk 82 | meta.tofile('example_cf32.sigmf-meta') # extension is optional -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "SigMF" 3 | description = "Easily interact with Signal Metadata Format (SigMF) recordings." 4 | keywords = ["gnuradio", "radio"] 5 | classifiers = [ 6 | "Development Status :: 5 - Production/Stable", 7 | "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)", 8 | "Operating System :: OS Independent", 9 | "Programming Language :: Python :: 3", 10 | "Programming Language :: Python :: 3.7", 11 | "Programming Language :: Python :: 3.8", 12 | "Programming Language :: Python :: 3.9", 13 | "Programming Language :: Python :: 3.10", 14 | "Programming Language :: Python :: 3.11", 15 | "Programming Language :: Python :: 3.12", 16 | "Programming Language :: Python :: 3.13", 17 | "Topic :: Scientific/Engineering", 18 | "Topic :: Communications :: Ham Radio", 19 | ] 20 | dynamic = ["version", "readme"] 21 | requires-python = ">=3.7" 22 | dependencies = [ 23 | "numpy", # for vector math 24 | "jsonschema", # for spec validation 25 | ] 26 | [project.urls] 27 | repository = "https://github.com/sigmf/sigmf-python" 28 | documentation = "https://sigmf.readthedocs.io/en/latest/" 29 | issues = "https://github.com/sigmf/sigmf-python/issues" 30 | "Specification (HTML)" = "https://sigmf.org/" 31 | "Specification (PDF)" = "https://sigmf.github.io/SigMF/sigmf-spec.pdf" 32 | "Specification (Repo)" = "https://github.com/sigmf/SigMF" 33 | 34 | [project.scripts] 35 | sigmf_validate = "sigmf.validate:main" 36 | sigmf_convert_wav = "sigmf.apps.convert_wav:main [apps]" 37 | [project.optional-dependencies] 38 | test = [ 39 | "pylint", 40 | "pytest", 41 | "pytest-cov", 42 | "hypothesis", # next-gen testing framework 43 | ] 44 | apps = [ 45 | "scipy", # for wav i/o 46 | ] 47 | 48 | [tool.setuptools] 49 | packages = ["sigmf"] 50 | [tool.setuptools.dynamic] 51 | version = {attr = "sigmf.__version__"} 52 | readme = {file = ["README.md"], content-type = "text/markdown"} 53 | [tool.setuptools.package-data] 54 | sigmf = ["*.json"] 55 | 56 | [build-system] 57 | requires = ["setuptools>=65.0", "setuptools-scm"] 58 | build-backend = "setuptools.build_meta" 59 | 60 | [tool.coverage.run] 61 | branch = true 62 | source = ["sigmf", "tests"] 63 | # -rA captures stdout from all tests and places it after the pytest summary 64 | command_line = "-m pytest -rA --doctest-modules --junitxml=pytest.xml" 65 | 66 | [tool.pytest.ini_options] 67 | addopts = "--doctest-modules" 68 | 69 | [tool.pylint] 70 | [tool.pylint.main] 71 | load-plugins = [ 72 | "pylint.extensions.typing", 73 | "pylint.extensions.docparams", 74 | ] 75 | exit-zero = true 76 | [tool.pylint.messages_control] 77 | disable = [ 78 | "logging-not-lazy", 79 | "missing-module-docstring", 80 | "import-error", 81 | "unspecified-encoding", 82 | ] 83 | max-line-length = 120 84 | [tool.pylint.REPORTS] 85 | # omit from the similarity reports 86 | ignore-comments = 'yes' 87 | ignore-docstrings = 'yes' 88 | ignore-imports = 'yes' 89 | ignore-signatures = 'yes' 90 | min-similarity-lines = 4 91 | 92 | [tool.pytype] 93 | inputs = ['sigmf', 'tests'] 94 | 95 | [tool.black] 96 | line-length = 120 97 | 98 | [tool.isort] 99 | profile = "black" 100 | 101 | [tool.tox] 102 | legacy_tox_ini = ''' 103 | [tox] 104 | skip_missing_interpreters = True 105 | envlist = py{37,38,39,310,311,312,313} 106 | 107 | [testenv] 108 | usedevelop = True 109 | deps = .[test,apps] 110 | commands = coverage run 111 | ''' 112 | -------------------------------------------------------------------------------- /sigmf/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | # version of this python module 8 | __version__ = "1.2.10" 9 | # matching version of the SigMF specification 10 | __specification__ = "1.2.5" 11 | 12 | from . import archive, archivereader, error, schema, sigmffile, utils, validate 13 | from .archive import SigMFArchive 14 | from .archivereader import SigMFArchiveReader 15 | from .sigmffile import SigMFCollection, SigMFFile 16 | -------------------------------------------------------------------------------- /sigmf/apps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sigmf/sigmf-python/2ae107f0e34ae4d3cf8a4b23d39803e85839a628/sigmf/apps/__init__.py -------------------------------------------------------------------------------- /sigmf/apps/convert_wav.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """converter for wav containers""" 8 | 9 | import argparse 10 | import getpass 11 | import logging 12 | import tempfile 13 | from datetime import datetime, timezone 14 | from os import PathLike 15 | from pathlib import Path 16 | from typing import Optional 17 | 18 | from scipy.io import wavfile 19 | 20 | from .. import SigMFFile 21 | from .. import __version__ as toolversion 22 | from ..sigmffile import get_sigmf_filenames 23 | from ..utils import SIGMF_DATETIME_ISO8601_FMT, get_data_type_str 24 | 25 | log = logging.getLogger() 26 | 27 | 28 | def convert_wav( 29 | wav_path: str, 30 | out_path: Optional[str] = None, 31 | author: Optional[str] = None, 32 | ) -> PathLike: 33 | """ 34 | Read a wav and write a sigmf archive. 35 | """ 36 | wav_path = Path(wav_path) 37 | wav_stem = wav_path.stem 38 | samp_rate, wav_data = wavfile.read(wav_path) 39 | 40 | global_info = { 41 | SigMFFile.AUTHOR_KEY: getpass.getuser() if author is None else author, 42 | SigMFFile.DATATYPE_KEY: get_data_type_str(wav_data), 43 | SigMFFile.DESCRIPTION_KEY: f"converted from {wav_path.name}", 44 | SigMFFile.NUM_CHANNELS_KEY: 1 if len(wav_data.shape) < 2 else wav_data.shape[1], 45 | SigMFFile.RECORDER_KEY: "Official SigMF wav converter", 46 | SigMFFile.SAMPLE_RATE_KEY: samp_rate, 47 | } 48 | 49 | modify_time = wav_path.lstat().st_mtime 50 | wav_datetime = datetime.fromtimestamp(modify_time, tz=timezone.utc) 51 | 52 | capture_info = { 53 | SigMFFile.START_INDEX_KEY: 0, 54 | SigMFFile.DATETIME_KEY: wav_datetime.strftime(SIGMF_DATETIME_ISO8601_FMT), 55 | } 56 | 57 | temp_dir = Path(tempfile.mkdtemp()) 58 | if out_path is None: 59 | # extension will be changed 60 | out_path = Path(wav_stem) 61 | else: 62 | out_path = Path(out_path) 63 | filenames = get_sigmf_filenames(out_path) 64 | 65 | data_path = temp_dir / filenames["data_fn"] 66 | wav_data.tofile(data_path) 67 | 68 | meta = SigMFFile(data_file=data_path, global_info=global_info) 69 | meta.add_capture(0, metadata=capture_info) 70 | log.debug("created %r", meta) 71 | 72 | arc_path = filenames["archive_fn"] 73 | meta.tofile(arc_path, toarchive=True) 74 | log.info("wrote %s", arc_path) 75 | return arc_path 76 | 77 | 78 | def main() -> None: 79 | """ 80 | entry-point for sigmf_convert_wav 81 | """ 82 | parser = argparse.ArgumentParser(description="Convert wav to sigmf archive.") 83 | parser.add_argument("input", type=str, help="wav path") 84 | parser.add_argument("--author", type=str, default=None, help=f"set {SigMFFile.AUTHOR_KEY} metadata") 85 | parser.add_argument("-v", "--verbose", action="count", default=0) 86 | parser.add_argument("--version", action="version", version=f"%(prog)s v{toolversion}") 87 | args = parser.parse_args() 88 | 89 | level_lut = { 90 | 0: logging.WARNING, 91 | 1: logging.INFO, 92 | 2: logging.DEBUG, 93 | } 94 | logging.basicConfig(level=level_lut[min(args.verbose, 2)]) 95 | 96 | _ = convert_wav( 97 | wav_path=args.input, 98 | author=args.author, 99 | ) 100 | 101 | 102 | if __name__ == "__main__": 103 | main() 104 | -------------------------------------------------------------------------------- /sigmf/archive.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Create and extract SigMF archives.""" 8 | 9 | import io 10 | import shutil 11 | import tarfile 12 | import tempfile 13 | from pathlib import Path 14 | 15 | from .error import SigMFFileError 16 | 17 | SIGMF_ARCHIVE_EXT = ".sigmf" 18 | SIGMF_METADATA_EXT = ".sigmf-meta" 19 | SIGMF_DATASET_EXT = ".sigmf-data" 20 | SIGMF_COLLECTION_EXT = ".sigmf-collection" 21 | 22 | 23 | class SigMFArchive: 24 | """ 25 | Archive a SigMFFile 26 | 27 | A `.sigmf` file must include both valid metadata and data. 28 | If `self.data_file` is not set or the requested output file 29 | is not writable, raises `SigMFFileError`. 30 | 31 | Parameters 32 | ---------- 33 | 34 | sigmffile : SigMFFile 35 | A SigMFFile object with valid metadata and data_file. 36 | 37 | name : PathLike | str | bytes 38 | Path to archive file to create. If file exists, overwrite. 39 | If `name` doesn't end in .sigmf, it will be appended. 40 | For example: if `name` == "/tmp/archive1", then the 41 | following archive will be created: 42 | /tmp/archive1.sigmf 43 | - archive1/ 44 | - archive1.sigmf-meta 45 | - archive1.sigmf-data 46 | 47 | fileobj : BufferedWriter 48 | If `fileobj` is specified, it is used as an alternative to 49 | a file object opened in binary mode for `name`. It is 50 | supposed to be at position 0. `name` is not required, but 51 | if specified will be used to determine the directory and 52 | file names within the archive. `fileobj` won't be closed. 53 | For example: if `name` == "archive1" and fileobj is given, 54 | a tar archive will be written to fileobj with the 55 | following structure: 56 | - archive1/ 57 | - archive1.sigmf-meta 58 | - archive1.sigmf-data 59 | """ 60 | 61 | def __init__(self, sigmffile, name=None, fileobj=None): 62 | is_buffer = fileobj is not None 63 | self.sigmffile = sigmffile 64 | self.path, arcname, fileobj = self._resolve(name, fileobj) 65 | 66 | self._ensure_data_file_set() 67 | self._validate() 68 | 69 | tar = tarfile.TarFile(mode="w", fileobj=fileobj, format=tarfile.PAX_FORMAT) 70 | tmpdir = Path(tempfile.mkdtemp()) 71 | meta_path = tmpdir / (arcname + SIGMF_METADATA_EXT) 72 | data_path = tmpdir / (arcname + SIGMF_DATASET_EXT) 73 | 74 | # write files 75 | with open(meta_path, "w") as handle: 76 | self.sigmffile.dump(handle) 77 | if isinstance(self.sigmffile.data_buffer, io.BytesIO): 78 | # write data buffer to archive 79 | self.sigmffile.data_file = data_path 80 | with open(data_path, "wb") as handle: 81 | handle.write(self.sigmffile.data_buffer.getbuffer()) 82 | else: 83 | # copy data to archive 84 | shutil.copy(self.sigmffile.data_file, data_path) 85 | tar.add(tmpdir, arcname=arcname, filter=self.chmod) 86 | # close files & remove tmpdir 87 | tar.close() 88 | if not is_buffer: 89 | # only close fileobj if we aren't working w/a buffer 90 | fileobj.close() 91 | shutil.rmtree(tmpdir) 92 | 93 | @staticmethod 94 | def chmod(tarinfo: tarfile.TarInfo): 95 | """permission filter for writing tar files""" 96 | if tarinfo.isdir(): 97 | tarinfo.mode = 0o755 # dwrxw-rw-r 98 | else: 99 | tarinfo.mode = 0o644 # -wr-r--r-- 100 | return tarinfo 101 | 102 | def _ensure_data_file_set(self): 103 | if not self.sigmffile.data_file and not isinstance(self.sigmffile.data_buffer, io.BytesIO): 104 | raise SigMFFileError("No data file in SigMFFile; use `set_data_file` before archiving.") 105 | 106 | def _validate(self): 107 | self.sigmffile.validate() 108 | 109 | def _resolve(self, name, fileobj): 110 | """ 111 | Resolve both (name, fileobj) into (path, arcname, fileobj) given either or both. 112 | 113 | Returns 114 | ------- 115 | path : PathLike 116 | Path of the archive file. 117 | arcname : str 118 | Name of the sigmf object within the archive. 119 | fileobj : BufferedWriter 120 | Open file handle object. 121 | """ 122 | if fileobj: 123 | try: 124 | # exception if not byte-writable 125 | fileobj.write(bytes()) 126 | # exception if no name property of handle 127 | path = Path(fileobj.name) 128 | if not name: 129 | arcname = path.stem 130 | else: 131 | arcname = name 132 | except io.UnsupportedOperation as exc: 133 | raise SigMFFileError(f"fileobj {fileobj} is not byte-writable.") from exc 134 | except AttributeError as exc: 135 | raise SigMFFileError(f"fileobj {fileobj} is invalid.") from exc 136 | elif name: 137 | path = Path(name) 138 | # ensure name has correct suffix if it exists 139 | if path.suffix == "": 140 | # add extension if none was given 141 | path = path.with_suffix(SIGMF_ARCHIVE_EXT) 142 | elif path.suffix != SIGMF_ARCHIVE_EXT: 143 | # ensure suffix is correct 144 | raise SigMFFileError(f"Invalid extension ({path.suffix} != {SIGMF_ARCHIVE_EXT}).") 145 | arcname = path.stem 146 | 147 | try: 148 | fileobj = open(path, "wb") 149 | except (OSError, IOError) as exc: 150 | raise SigMFFileError(f"Can't open {name} for writing.") from exc 151 | else: 152 | raise SigMFFileError("Either `name` or `fileobj` needs to be defined.") 153 | 154 | return path, arcname, fileobj 155 | -------------------------------------------------------------------------------- /sigmf/archivereader.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/SigMF 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Access SigMF archives without extracting them.""" 8 | 9 | import io 10 | import tarfile 11 | from pathlib import Path 12 | 13 | from . import __version__ 14 | from .archive import SIGMF_ARCHIVE_EXT, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT 15 | from .error import SigMFFileError 16 | from .sigmffile import SigMFFile 17 | 18 | 19 | class SigMFArchiveReader: 20 | """ 21 | Access data within SigMF archive tarball in-place without extracting. 22 | 23 | Parameters 24 | ---------- 25 | name : str | bytes | PathLike, optional 26 | Optional path to archive file to access. 27 | skip_checksum : bool, optional 28 | Skip dataset checksum calculation. 29 | map_readonly : bool, optional 30 | Indicate whether assignments on the numpy.memmap are allowed. 31 | archive_buffer : buffer, optional 32 | 33 | 34 | Raises 35 | ------ 36 | SigMFError 37 | Archive file does not exist or is improperly formatted. 38 | ValueError 39 | If invalid arguments. 40 | ValidationError 41 | If metadata is invalid. 42 | """ 43 | 44 | def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None): 45 | if name is not None: 46 | path = Path(name) 47 | if path.suffix != SIGMF_ARCHIVE_EXT: 48 | err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT) 49 | raise SigMFFileError(err) 50 | 51 | tar_obj = tarfile.open(path) 52 | 53 | elif archive_buffer is not None: 54 | tar_obj = tarfile.open(fileobj=archive_buffer, mode="r:") 55 | 56 | else: 57 | raise ValueError("Either `name` or `archive_buffer` must be not None.") 58 | 59 | json_contents = None 60 | data_offset = None 61 | data_size_bytes = None 62 | 63 | for memb in tar_obj.getmembers(): 64 | if memb.isdir(): # memb.type == tarfile.DIRTYPE: 65 | # the directory structure will be reflected in the member name 66 | continue 67 | 68 | elif memb.isfile(): # memb.type == tarfile.REGTYPE: 69 | if memb.name.endswith(SIGMF_METADATA_EXT): 70 | json_contents = memb.name 71 | if data_offset is None: 72 | # consider a warnings.warn() here; the datafile should be earlier in the 73 | # archive than the metadata, so that updating it (like, adding an annotation) 74 | # is fast. 75 | pass 76 | with tar_obj.extractfile(memb) as memb_fid: 77 | json_contents = memb_fid.read() 78 | 79 | elif memb.name.endswith(SIGMF_DATASET_EXT): 80 | data_offset = memb.offset_data 81 | data_size_bytes = memb.size 82 | with tar_obj.extractfile(memb) as memb_fid: 83 | data_buffer = io.BytesIO(memb_fid.read()) 84 | 85 | else: 86 | print(f"A regular file {memb.name} was found but ignored in the archive") 87 | else: 88 | print(f"A member of type {memb.type} and name {memb.name} was found but not handled, just FYI.") 89 | 90 | if data_offset is None: 91 | raise SigMFFileError("No .sigmf-data file found in archive!") 92 | 93 | self.sigmffile = SigMFFile(metadata=json_contents) 94 | self.sigmffile.validate() 95 | 96 | self.sigmffile.set_data_file( 97 | data_buffer=data_buffer, 98 | skip_checksum=skip_checksum, 99 | size_bytes=data_size_bytes, 100 | map_readonly=map_readonly, 101 | ) 102 | 103 | self.ndim = self.sigmffile.ndim 104 | self.shape = self.sigmffile.shape 105 | 106 | tar_obj.close() 107 | 108 | def __len__(self): 109 | return self.sigmffile.__len__() 110 | 111 | def __iter__(self): 112 | return self.sigmffile.__iter__() 113 | 114 | def __getitem__(self, sli): 115 | return self.sigmffile.__getitem__(sli) 116 | -------------------------------------------------------------------------------- /sigmf/error.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Defines SigMF exception classes.""" 8 | 9 | 10 | class SigMFError(Exception): 11 | """SigMF base exception.""" 12 | 13 | 14 | class SigMFValidationError(SigMFError): 15 | """Exceptions related to validating SigMF metadata.""" 16 | 17 | 18 | class SigMFAccessError(SigMFError): 19 | """Exceptions related to accessing the contents of SigMF metadata, notably 20 | when expected fields are missing or accessing out of bounds captures.""" 21 | 22 | 23 | class SigMFFileError(SigMFError): 24 | """Exceptions related to reading or writing SigMF files or archives.""" 25 | -------------------------------------------------------------------------------- /sigmf/schema-collection.json: -------------------------------------------------------------------------------- 1 | { 2 | "$id": "https://github.com/sigmf/SigMF/spec/1.2.0/collection-schema", 3 | "$schema": "https://json-schema.org/draft/2020-12/schema", 4 | "default": {}, 5 | "required": ["collection"], 6 | "type": "object", 7 | "properties": { 8 | "collection": { 9 | "default": {}, 10 | "description": "The `sigmf-collection` file contains metadata in a single top-level Object called a `collection`. The Collection Object contains key/value pairs that describe relationships between SigMF Recordings.\\nn The Collection Object associates SigMF Recordings together by specifying `SigMF Recording Objects` in the `core:streams` JSON array. Each Object describes a specific associated SigMF Recording.\\nn The following rules apply to SigMF Collections:\n\n 1. The Collection Object MUST be the only top-level Object in the file.\n\n 2. Keys in the Collection Object SHOULD use SigMF Recording Objects when referencing SigMF Recordings.\n\n 3. SigMF Recording Objects MUST contain both a `name` field, which is the base-name of a SigMF Recording, and a `hash` which is the SHA512 hash of the Recording Metadata file `[base-name].sigmf-meta`.\n\n 4. SigMF Recording Objects MUST appear in a JSON array.\\nn Example `top-level.sigmf-collection` file:\\begin{verbatim}{\n\"collection\": {\n \"core:version\": \"1.2.0\",\n \"core:extensions\" : [\n {\n \"name\": \"antenna\",\n \"version\": \"1.0.0\",\n \"optional\": true\n }\n ],\n \"antenna:hagl\": 120,\n \"antenna:azimuth_angle\": 98,\n \"core:streams\": [\n {\n \"name\": \"example-channel-0-basename\",\n \"hash\": \"b4071db26f5c7b0c70f5066eb9bc3a8b506df0f5af09991ba481f63f97f7f48e9396584bc1c296650cd3d47bc4ad2c5b72d2561078fb6eb16151d2898c9f84c4\"\n },\n {\n \"name\": \"example-channel-1-basename\",\n \"hash\": \"7132aa240e4d8505471cded716073141ae190f763bfca3c27edd8484348d6693d0e8d3427d0bf1990e687a6a40242d514e5d1995642bc39384e9a37a211655d7\"\n }\n ]\n }\n}\\end{verbatim}", 11 | "required": ["core:version"], 12 | "type": "object", 13 | "properties": { 14 | "core:version": { 15 | "description": "The version of the SigMF specification used to create the Collection file.", 16 | "examples": ["1.2.0"], 17 | "type": "string" 18 | }, 19 | "core:description": { 20 | "default": "", 21 | "description": "A text description of the SigMF Collection.", 22 | "type": "string" 23 | }, 24 | "core:author": { 25 | "default": "", 26 | "description": "A text identifier for the author potentially including name, handle, email, and/or other ID like Amateur Call Sign.", 27 | "examples": ["Bruce Wayne bruce@waynetech.com", "Bruce (K3X)"], 28 | "type": "string" 29 | }, 30 | "core:collection_doi": { 31 | "default": "", 32 | "description": "The registered DOI (ISO 26324) for a Collection.", 33 | "type": "string" 34 | }, 35 | "core:license": { 36 | "default": "", 37 | "description": "A URL for the license document under which this Collection metadata is offered.", 38 | "examples": ["https://creativecommons.org/licenses/by-sa/4.0/"], 39 | "type": "string" 40 | }, 41 | "core:extensions": { 42 | "default": [], 43 | "description": "The `core:extensions` field in the Global Object is an array of extension objects that describe SigMF extensions. Extension Objects MUST contain the three key/value pairs defined in Table (FIX REF), and MUST NOT contain any other fields.", 44 | "type": "array", 45 | "additionalItems": true, 46 | "items": { 47 | "anyOf": [ 48 | { 49 | "type": "object", 50 | "title": "The first anyOf schema", 51 | "description": "An explanation about the purpose of this instance.", 52 | "default": {}, 53 | "examples": [ 54 | { 55 | "name": "capture_details", 56 | "version": "1.0.0", 57 | "optional": false 58 | } 59 | ], 60 | "required": ["name", "version", "optional"], 61 | "properties": { 62 | "name": { 63 | "default": "", 64 | "description": "The name of the SigMF extension namespace.", 65 | "type": "string" 66 | }, 67 | "version": { 68 | "default": "", 69 | "description": "The version of the extension namespace specification used.", 70 | "type": "string" 71 | }, 72 | "optional": { 73 | "default": false, 74 | "description": "If this field is `true`, the extension is REQUIRED to parse this Recording.", 75 | "type": "boolean" 76 | } 77 | }, 78 | "additionalProperties": true 79 | } 80 | ] 81 | } 82 | }, 83 | "core:streams": { 84 | "default": [], 85 | "description": "An ordered array of SigMF Recording Tuples, indicating multiple recorded streams of data (e.g., channels from a phased array).", 86 | "type": "array", 87 | "additionalItems": true, 88 | "items": { 89 | "anyOf": [ 90 | { 91 | "default": [], 92 | "examples": [["example-channel-0-basename", "hash"]], 93 | "type": "array", 94 | "additionalItems": true, 95 | "items": { 96 | "anyOf": [ 97 | { 98 | "default": "", 99 | "type": "string" 100 | } 101 | ] 102 | } 103 | }, 104 | { 105 | "default": [], 106 | "examples": [["example-channel-1-basename", "hash"]], 107 | "type": "array", 108 | "additionalItems": true, 109 | "items": { 110 | "anyOf": [ 111 | { 112 | "default": "", 113 | "type": "string" 114 | } 115 | ] 116 | } 117 | } 118 | ] 119 | } 120 | } 121 | }, 122 | "additionalProperties": true 123 | } 124 | }, 125 | "additionalProperties": true 126 | } 127 | -------------------------------------------------------------------------------- /sigmf/schema-meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "$id": "https://raw.githubusercontent.com/sigmf/SigMF/v1.2.5/sigmf-schema.json", 3 | "$schema": "https://json-schema.org/draft/2020-12/schema", 4 | "title": "Schema for SigMF Meta Files", 5 | "description": "SigMF specifies a way to describe sets of recorded digital signal samples with metadata written in JSON. SigMF can be used to describe general information about a collection of samples, the characteristics of the system that generated the samples, features of signals themselves, and the relationship between different recordings.", 6 | "type": "object", 7 | "required": [ 8 | "global", 9 | "captures", 10 | "annotations" 11 | ], 12 | "properties": { 13 | "global": { 14 | "description": "The `global` object consists of key/value pairs that provide information applicable to the entire Dataset. It contains the information that is minimally necessary to open and parse the Dataset file, as well as general information about the Recording itself.", 15 | "required": [ 16 | "core:datatype", 17 | "core:version" 18 | ], 19 | "type": "object", 20 | "properties": { 21 | "core:datatype": { 22 | "description": "The SigMF Dataset format of the stored samples in the Dataset file.", 23 | "examples": [ 24 | "cf32_le", 25 | "ri16_le" 26 | ], 27 | "default": "cf32_le", 28 | "pattern": "^(c|r)(f32|f64|i32|i16|u32|u16|i8|u8)(_le|_be)?", 29 | "type": "string" 30 | }, 31 | "core:sample_rate": { 32 | "description": "The sample rate of the signal in samples per second.", 33 | "minimum": 1, 34 | "maximum": 1000000000000, 35 | "type": "number" 36 | }, 37 | "core:author": { 38 | "description": "A text identifier for the author potentially including name, handle, email, and/or other ID like Amateur Call Sign", 39 | "examples": [ 40 | "Bruce Wayne bruce@waynetech.com", 41 | "Bruce (K3X)" 42 | ], 43 | "type": "string" 44 | }, 45 | "core:collection": { 46 | "description": "The base filename of a `collection` with which this Recording is associated. This field is used to indicate that this Recording is part of a SigMF Collection (described later in this document). It is strongly RECOMMENDED that if you are building a Collection, that each Recording referenced by that Collection use this field to associate up to the relevant `sigmf-collection` file.", 47 | "type": "string" 48 | }, 49 | "core:dataset": { 50 | "description": "The full filename of the Dataset file this Metadata file describes, used ONLY with Non-Conforming Datasets. If provided, this string MUST be the complete filename of the Dataset file, including the extension. The Dataset file must be in the same directory as the .sigmf-meta file; note that this string only includes the filename, not directory. If a Recording does not have this field, it MUST have a compliant SigMF Dataset (NOT a Non-Conforming Dataset) which MUST use the same base filename as the Metadata file and use the `.sigmf-data` extension. If a SigMF Recording or Archive is renamed this field MUST also be updated, because of this it is RECOMMENDED that Compliant SigMF Recordings avoid use of this field. This field SHOULD NOT be used in conjunction the `core:metadata_only` field. If both fields exist and the file specified by `core:dataset` exists, then `core:metadata_only` SHOULD be ignored by the application.", 51 | "type": "string", 52 | "pattern": "^[^\\/\\\\:*?\"<>|]+(\\.[^\\/\\\\:*?\"<>|]+)*" 53 | }, 54 | "core:data_doi": { 55 | "description": "The registered DOI (ISO 26324) for a Recording's Dataset file.", 56 | "type": "string" 57 | }, 58 | "core:description": { 59 | "description": "A text description of the SigMF Recording.", 60 | "type": "string" 61 | }, 62 | "core:hw": { 63 | "description": "A text description of the hardware used to make the Recording.", 64 | "type": "string" 65 | }, 66 | "core:license": { 67 | "description": "A URL for the license document under which the Recording is offered. (RFC 3986)", 68 | "examples": [ 69 | "https://creativecommons.org/licenses/by-sa/4.0/" 70 | ], 71 | "format": "uri", 72 | "type": "string" 73 | }, 74 | "core:metadata_only": { 75 | "description": "Indicates the Metadata file is intentionally distributed without the Dataset. This field should be defined and set to `true` to indicate that the Metadata file is being distributed without a corresponding `.sigmf-data` file. This may be done when the Dataset will be generated dynamically from information in the schema, or because just the schema is sufficient for the intended application. A metadata only distribution is not a SigMF Recording. If a Compliant SigMF Recording uses this field, it MAY indicate that the Dataset was dynamically generated from the metadata. This field MAY NOT be used in conjunction with Non-Conforming Datasets or the `core:dataset` field. ", 76 | "type": "boolean" 77 | }, 78 | "core:meta_doi": { 79 | "description": "The registered DOI (ISO 26324) for a Recording's Metadata file.", 80 | "type": "string" 81 | }, 82 | "core:num_channels": { 83 | "description": "Number of interleaved channels in the Dataset file, if omitted this is implied to be 1, for multiple channels of IQ data, it is RECOMMENDED to use SigMF Collections instead of num_channels for widest application support.", 84 | "default": 1, 85 | "minimum": 1, 86 | "maximum": 9223372036854775807, 87 | "type": "integer" 88 | }, 89 | "core:offset": { 90 | "description": "The index number of the first sample in the Dataset. If not provided, this value defaults to zero. Typically used when a Recording is split over multiple files. All sample indices in SigMF are absolute, and so all other indices referenced in metadata for this recording SHOULD be greater than or equal to this value.", 91 | "default": 0, 92 | "minimum": 0, 93 | "!comment": "The maximum value for this property is equal to 2^63 - 1, making it easy to fit into a signed 64-bit integer.", 94 | "maximum": 9223372036854775807, 95 | "type": "integer" 96 | }, 97 | "core:recorder": { 98 | "description": "The name of the software used to make this SigMF Recording.", 99 | "type": "string" 100 | }, 101 | "core:sha512": { 102 | "description": "The SHA512 hash of the Dataset file associated with the SigMF file.", 103 | "type": "string", 104 | "pattern": "^[0-9a-fA-F]{128}" 105 | }, 106 | "core:trailing_bytes": { 107 | "description": "The number of bytes to ignore at the end of a Dataset, used ONLY with Non-Conforming Datasets. This field is used with Non-Conforming Datasets to indicate some number of bytes that trail the sample data in the NCD file that should be ignored for processing. This can be used to ignore footer data in non-SigMF filetypes. ", 108 | "type": "integer", 109 | "minimum": 0, 110 | "maximum": 9223372036854775807 111 | }, 112 | "core:version": { 113 | "description": "The version of the SigMF specification used to create the Metadata file, in the format X.Y.Z.", 114 | "pattern": "^\\d+\\.\\d+\\.\\d", 115 | "type": "string" 116 | }, 117 | "core:geolocation": { 118 | "description": "The location of the Recording system (note, using the Captures scope `geolocation` field is preferred). See the `geolocation` field within the Captures metadata for details. While using the Captures scope `geolocation` is preferred, fixed recording systems may still provide position information within the Global object so it is RECOMMENDED that applications check and use this field if the Captures `geolocation` field is not present.", 119 | "type": "object", 120 | "required": [ 121 | "type", 122 | "coordinates" 123 | ], 124 | "properties": { 125 | "type": { 126 | "type": "string", 127 | "enum": [ 128 | "Point" 129 | ] 130 | }, 131 | "coordinates": { 132 | "type": "array", 133 | "minItems": 2, 134 | "maxItems": 3, 135 | "items": { 136 | "type": "number" 137 | } 138 | }, 139 | "bbox": { 140 | "type": "array", 141 | "minItems": 4, 142 | "items": { 143 | "type": "number" 144 | } 145 | } 146 | } 147 | }, 148 | "core:extensions": { 149 | "description": "The `core:extensions` field in the Global Object is an array of extension objects that describe SigMF extensions. Extension Objects MUST contain the three key/value pairs defined below, and MUST NOT contain any other fields. \\rowcolors{1}{}{lightblue}\\begin{center}\\begin{tabular}{lllp{3.8in}} \\toprule \\textbf{Name} & \\textbf{Required} & \\textbf{Type} & \\textbf{Description} \\\\ \\midrule name & true & string & The name of the SigMF extension namespace. \\\\ version & true & string & The version of the extension namespace specification used. \\\\ optional & true & boolean & If this field is `false`, then the application MUST support this extension in order to parse the Recording; if the application does not support this extension, it SHOULD report an error. \\\\ \\bottomrule \\end{tabular} \\end{center} \\\\ In the example below, `extension-01` is optional, so the application may ignore it if it does not support `extension-01`. But `extension-02` is not optional, so the application must support `extension-02` in order to parse the Recording. \\begin{verbatim}\"global\": {\n ...\n \"core:extensions\" : [\n {\n \"name\": \"extension-01\",\n \"version\": \"0.0.5\",\n \"optional\": true\n },\n {\n \"name\": \"extension-02\",\n \"version\": \"1.2.3\",\n \"optional\": false\n }\n ]\n ...\n }\\end{verbatim}", 150 | "type": "array", 151 | "default": [], 152 | "additionalItems": false, 153 | "items": { 154 | "type": "object", 155 | "required": [ 156 | "name", 157 | "version", 158 | "optional" 159 | ], 160 | "properties": { 161 | "name": { 162 | "description": "The name of the SigMF extension namespace.", 163 | "type": "string" 164 | }, 165 | "version": { 166 | "description": "The version of the extension namespace specification used.", 167 | "examples": [ 168 | "1.2.0" 169 | ], 170 | "type": "string" 171 | }, 172 | "optional": { 173 | "description": "If this field is `true`, the extension is REQUIRED to parse this Recording.", 174 | "type": "boolean" 175 | } 176 | }, 177 | "additionalProperties": false 178 | } 179 | } 180 | }, 181 | "additionalProperties": true 182 | }, 183 | "captures": { 184 | "description": "The `captures` Object is an array of capture segment objects that describe the parameters of the signal capture. It MUST be sorted by the value of each capture segment's `core:sample_start` key, ascending. Capture Segment Objects are composed of key/value pairs, and each Segment describes a chunk of samples that can be mapped into memory for processing. Each Segment MUST contain a `core:sample_start` key/value pair, which indicates the sample index relative to the Dataset where this Segment's metadata applies. The fields that are described within a Capture Segment are scoped to that Segment only and need to be explicitly declared again if they are valid in subsequent Segments. While it is recommended there be at least one segment defined, if there are no items in the captures array it is implied that a single capture exists with `core:sample_start` equal to zero (no other metadata is implied), i.e., `\"captures\": []` implies `\"captures\": [{\"core:sample_start\": 0}]`.", 185 | "default": [], 186 | "type": "array", 187 | "additionalItems": false, 188 | "items": { 189 | "type": "object", 190 | "required": [ 191 | "core:sample_start" 192 | ], 193 | "properties": { 194 | "core:sample_start": { 195 | "default": 0, 196 | "description": "Index of first sample of this chunk. This field specifies the sample index where this Segment takes effect relative to the recorded Dataset file. If the Dataset is a SigMF Dataset file, this field can be immediately mapped to physical disk location since conforming Datasets only contain sample data. ", 197 | "minimum": 0, 198 | "maximum": 9223372036854775807, 199 | "type": "integer" 200 | }, 201 | "core:datetime": { 202 | "description": "An ISO-8601 string indicating the timestamp of the sample index specified by sample_start. This key/value pair MUST be an ISO-8601 string, as defined by [RFC 3339](https://www.ietf.org/rfc/rfc3339.txt), where the only allowed `time-offset` is `Z`, indicating the UTC/Zulu timezone. The ABNF description is: \\begin{verbatim} date-fullyear = 4DIGIT \n date-month = 2DIGIT ; 01-12 \n date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year \n\n time-hour = 2DIGIT ; 00-23 \n time-minute = 2DIGIT ; 00-59 \n time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules \n\n time-secfrac = \".\" 1*DIGIT \n time-offset = \"Z\" \n\n partial-time = time-hour \":\" time-minute \":\" time-second [time-secfrac] \n full-date = date-fullyear \"-\" date-month \"-\" date-mday \n full-time = partial-time time-offset \n\n date-time = full-date \"T\" full-time \\end{verbatim} Thus, timestamps take the form of `YYYY-MM-DDTHH:MM:SS.SSSZ`, where any number of digits for fractional seconds is permitted. ", 203 | "examples": [ 204 | "1955-11-05T14:00:00.000Z" 205 | ], 206 | "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?", 207 | "type": "string" 208 | }, 209 | "core:frequency": { 210 | "description": "The center frequency of the signal in Hz.", 211 | "type": "number", 212 | "minimum": -1000000000000, 213 | "maximum": 1000000000000, 214 | "examples": [ 215 | 915000000, 216 | 2400000000 217 | ] 218 | }, 219 | "core:global_index": { 220 | "description": "The index of the sample referenced by `sample_start` relative to an original sample stream. The entirety of which may not have been captured in a recorded Dataset. If omitted, this value SHOULD be treated as equal to `sample_start`. For example, some hardware devices are capable of 'counting' samples at the point of data conversion. This sample count is commonly used to indicate a discontinuity in the datastream between the hardware device and processing. For example, in the below Captures array, there are two Segments describing samples in a SigMF Dataset file. The first Segment begins at the start of the Dataset file. The second segment begins at sample index 500 relative to the recorded samples (and since this is a conforming SigMF Dataset, is physically located on-disk at location `sample_start * sizeof(sample)`), but the `global_index` reports this was actually sample number 1000 in the original datastream, indicating that 500 samples were lost before they could be recorded. \\begin{verbatim} ...\n \"captures\": [ \n { \n \"core:sample_start\": 0, \n \"core:global_index\": 0 \n }, \n { \n \"core:sample_start\": 500, \n \"core:global_index\": 1000 \n }\n ],\n ... \\end{verbatim} ", 221 | "type": "integer", 222 | "minimum": 0, 223 | "maximum": 9223372036854775807 224 | }, 225 | "core:header_bytes": { 226 | "description": "The number of bytes preceding a chunk of samples that are not sample data, used for NCDs. This field specifies a number of bytes that are not valid sample data that are physically located at the start of where the chunk of samples referenced by this Segment would otherwise begin. If omitted, this value SHOULD be treated as equal zero. If included, the Dataset is by definition a Non-Conforming Dataset. For example, the below Metadata for a Non-Conforming Dataset contains two segments describing chunks of 8-bit complex samples (2 bytes per sample) recorded to disk with 4-byte headers that are not valid for processing. Thus, to map these two chunks of samples into memory, a reader application would map the `500 samples` (equal to `1000 bytes`) in the first Segment, starting at a file offset of `4 bytes`, and then the remainder of the file through EOF starting at a file offset of `1008 bytes` (equal to the size of the previous Segment of samples plus two headers). \\begin{samepage}\\begin{verbatim} { \n \"global\": { \n \"core:datatype\": \"cu8\", \n \"core:version\": \"1.2.0\", \n \"core:dataset\": \"non-conforming-dataset-01.dat\" \n }, \n \"captures\": [ \n { \n \"core:sample_start\": 0, \n \"core:header_bytes\": 4, \n }, \n { \n \"core:sample_start\": 500, \n \"core:header_bytes\": 4, \n }\n ],\n \"annotations\": []\n } \\end{verbatim}\\end{samepage}", 227 | "type": "integer", 228 | "minimum": 0, 229 | "maximum": 9223372036854775807 230 | }, 231 | "core:geolocation": { 232 | "description": "The location of the recording system at the start of this Captures segment, as a single RFC 7946 GeoJSON `point` Object. For moving emitters, this provides a rudimentary means to manage location through different captures segments. While `core:geolocation` is also allowed in the Global object for backwards compatibility reasons, adding it to Captures is preferred. Per the GeoJSON specification, the point coordinates use the WGS84 coordinate reference system and are `longitude`, `latitude` (REQUIRED, in decimal degrees), and `altitude` (OPTIONAL, in meters above the WGS84 ellipsoid) - in that order. An example including the altitude field is shown below: \\begin{verbatim} \"captures\": {\n ...\n \"core:geolocation\": {\n \"type\": \"Point\",\n \"coordinates\": [-107.6183682, 34.0787916, 2120.0]\n }\n ...\n } \\end{verbatim} GeoJSON permits the use of *Foreign Members* in GeoJSON documents per RFC 7946 Section 6.1. Because the SigMF requirement for the `geolocation` field is to be a valid GeoJSON `point` Object, users MAY include *Foreign Member* fields here for user-defined purposes (position valid indication, GNSS SV counts, dillution of precision, accuracy, etc). It is strongly RECOMMENDED that all fields be documented in a SigMF Extension document. *Note:* Objects named `geometry` or `properties` are prohibited Foreign Members as specified in RFC 7946 Section 7.1.", 233 | "type": "object", 234 | "required": [ 235 | "type", 236 | "coordinates" 237 | ], 238 | "properties": { 239 | "type": { 240 | "type": "string", 241 | "enum": [ 242 | "Point" 243 | ] 244 | }, 245 | "coordinates": { 246 | "type": "array", 247 | "minItems": 2, 248 | "maxItems": 3, 249 | "items": { 250 | "type": "number" 251 | } 252 | }, 253 | "bbox": { 254 | "type": "array", 255 | "minItems": 4, 256 | "items": { 257 | "type": "number" 258 | } 259 | } 260 | } 261 | } 262 | }, 263 | "additionalProperties": true 264 | } 265 | }, 266 | "annotations": { 267 | "default": [], 268 | "description": "The `annotations` Object is an array of annotation segment objects that describe anything regarding the signal data not part of the Captures and Global objects. It MUST be sorted by the value of each Annotation Segment's `core:sample_start` key, ascending. Annotation segment Objects contain key/value pairs and MUST contain a `core:sample_start` key/value pair, which indicates the first index at which the rest of the Segment's key/value pairs apply. There is no limit to the number of annotations that can apply to the same group of samples. If two annotations have the same `sample_start`, there is no defined ordering between them. If `sample_count` is not provided, it SHOULD be assumed that the annotation applies from `sample_start` through the end of the corresponding capture, in all other cases `sample_count` MUST be provided. ", 269 | "type": "array", 270 | "additionalItems": true, 271 | "items": { 272 | "type": "object", 273 | "title": "Annotation", 274 | "required": [ 275 | "core:sample_start" 276 | ], 277 | "properties": { 278 | "core:sample_start": { 279 | "default": 0, 280 | "description": "The sample index at which this Segment takes effect.", 281 | "minimum": 0, 282 | "maximum": 9223372036854775807, 283 | "type": "integer" 284 | }, 285 | "core:sample_count": { 286 | "description": "The number of samples that this Segment applies to.", 287 | "type": "integer", 288 | "minimum": 0, 289 | "maximum": 9223372036854775807 290 | }, 291 | "core:freq_lower_edge": { 292 | "description": "The frequency (Hz) of the lower edge of the feature described by this annotation. The `freq_lower_edge` and `freq_upper_edge` fields SHOULD be at RF if the feature is at a known RF frequency. If there is no known center frequency (as defined by the `frequency` field in the relevant Capture Segment Object), or the center frequency is at baseband, the `freq_lower_edge` and `freq_upper_edge` fields SHOULD be relative to baseband. It is REQUIRED that both `freq_lower_edge` and `freq_upper_edge` be provided, or neither; the use of just one field is not allowed. ", 293 | "type": "number", 294 | "minimum": -1000000000000, 295 | "maximum": 1000000000000 296 | }, 297 | "core:freq_upper_edge": { 298 | "description": "The frequency (Hz) of the upper edge of the feature described by this annotation.", 299 | "type": "number", 300 | "minimum": -1000000000000, 301 | "maximum": 1000000000000 302 | }, 303 | "core:label": { 304 | "description": "A short form human/machine-readable label for the annotation. The `label` field MAY be used for any purpose, but it is RECOMMENDED that it be limited to no more than 20 characters as a common use is a short form GUI indicator. Similarly, it is RECOMMENDED that any user interface making use of this field be capable of displaying up to 20 characters. ", 305 | "type": "string" 306 | }, 307 | "core:comment": { 308 | "description": "A human-readable comment, intended to be used for longer comments (it is recommended to use `label` for shorter text).", 309 | "type": "string" 310 | }, 311 | "core:generator": { 312 | "description": "Human-readable name of the entity that created this annotation.", 313 | "type": "string" 314 | }, 315 | "core:uuid": { 316 | "description": "RFC-4122 unique identifier.", 317 | "format": "uuid", 318 | "type": "string" 319 | } 320 | }, 321 | "additionalProperties": true 322 | } 323 | } 324 | }, 325 | "additionalProperties": false 326 | } 327 | -------------------------------------------------------------------------------- /sigmf/schema.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Schema IO""" 8 | 9 | import json 10 | from pathlib import Path 11 | 12 | from . import __version__ as toolversion 13 | 14 | SCHEMA_META = "schema-meta.json" 15 | SCHEMA_COLLECTION = "schema-collection.json" 16 | 17 | 18 | def get_schema(version=toolversion, schema_file=SCHEMA_META): 19 | """ 20 | Load JSON Schema to for either a `sigmf-meta` or `sigmf-collection`. 21 | 22 | TODO: In the future load specific schema versions. 23 | """ 24 | schema_dir = Path(__file__).parent 25 | with open(schema_dir / schema_file, "rb") as handle: 26 | schema = json.load(handle) 27 | return schema 28 | -------------------------------------------------------------------------------- /sigmf/sigmf_hash.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Hashing Functions""" 8 | 9 | import hashlib 10 | from pathlib import Path 11 | 12 | 13 | def calculate_sha512(filename=None, fileobj=None, offset=None, size=None): 14 | """ 15 | Return sha512 of file or fileobj. 16 | """ 17 | the_hash = hashlib.sha512() 18 | bytes_to_hash = size 19 | bytes_read = 0 20 | 21 | if filename is not None: 22 | fileobj = open(filename, "rb") 23 | if size is None: 24 | bytes_to_hash = Path(filename).stat().st_size 25 | else: 26 | fileobj.seek(offset) 27 | 28 | while bytes_read < bytes_to_hash: 29 | buff = fileobj.read(min(4096, (bytes_to_hash - bytes_read))) 30 | the_hash.update(buff) 31 | bytes_read += len(buff) 32 | 33 | if filename is not None: 34 | fileobj.close() 35 | 36 | return the_hash.hexdigest() 37 | -------------------------------------------------------------------------------- /sigmf/sigmffile.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """SigMFFile Object""" 8 | 9 | import codecs 10 | import io 11 | import json 12 | import warnings 13 | from collections import OrderedDict 14 | from pathlib import Path 15 | 16 | import numpy as np 17 | 18 | from . import __specification__, __version__, schema, sigmf_hash, validate 19 | from .archive import ( 20 | SIGMF_ARCHIVE_EXT, 21 | SIGMF_COLLECTION_EXT, 22 | SIGMF_DATASET_EXT, 23 | SIGMF_METADATA_EXT, 24 | SigMFArchive, 25 | ) 26 | from .error import SigMFAccessError, SigMFError, SigMFFileError 27 | from .utils import dict_merge 28 | 29 | 30 | class SigMFMetafile: 31 | VALID_KEYS = {} 32 | 33 | def __init__(self): 34 | self.version = None 35 | self.schema = None 36 | self._metadata = None 37 | self.shape = None 38 | 39 | def __str__(self): 40 | return self.dumps() 41 | 42 | def __repr__(self): 43 | return f"SigMFFile({self})" 44 | 45 | def __iter__(self): 46 | """special method to iterate through samples""" 47 | self.iter_position = 0 48 | return self 49 | 50 | def ordered_metadata(self): 51 | """ 52 | Get a nicer representation of _metadata. Will sort keys, but put the 53 | top-level fields 'global', 'captures', 'annotations' in front. 54 | 55 | Returns 56 | ------- 57 | ordered_meta : OrderedDict 58 | Cleaner representation of _metadata with top-level keys correctly 59 | ordered and the rest of the keys sorted. 60 | """ 61 | ordered_meta = OrderedDict() 62 | for top_key in self.VALID_KEYS.keys(): 63 | assert top_key in self._metadata 64 | ordered_meta[top_key] = json.loads(json.dumps(self._metadata[top_key], sort_keys=True)) 65 | # If there are other top-level keys, they go later 66 | # TODO: sort potential `other` top-level keys 67 | for oth_key, oth_val in self._metadata.items(): 68 | if oth_key not in self.VALID_KEYS.keys(): 69 | ordered_meta[oth_key] = json.loads(json.dumps(oth_val, sort_keys=True)) 70 | return ordered_meta 71 | 72 | def dump(self, filep, pretty=True): 73 | """ 74 | Write metadata to a file. 75 | 76 | Parameters 77 | ---------- 78 | filep : object 79 | File pointer or something that json.dump() can handle. 80 | pretty : bool, default True 81 | When True will write more human-readable output, otherwise will be flat JSON. 82 | """ 83 | json.dump( 84 | self.ordered_metadata(), 85 | filep, 86 | indent=4 if pretty else None, 87 | separators=(",", ": ") if pretty else None, 88 | ) 89 | 90 | def dumps(self, pretty=True): 91 | """ 92 | Get a string representation of the metadata. 93 | 94 | Parameters 95 | ---------- 96 | pretty : bool, default True 97 | When True will write more human-readable output, otherwise will be flat JSON. 98 | 99 | Returns 100 | ------- 101 | string 102 | String representation of the metadata using json formatter. 103 | """ 104 | return json.dumps( 105 | self.ordered_metadata(), 106 | indent=4 if pretty else None, 107 | separators=(",", ": ") if pretty else None, 108 | ) 109 | 110 | 111 | class SigMFFile(SigMFMetafile): 112 | START_INDEX_KEY = "core:sample_start" 113 | LENGTH_INDEX_KEY = "core:sample_count" 114 | GLOBAL_INDEX_KEY = "core:global_index" 115 | START_OFFSET_KEY = "core:offset" 116 | NUM_CHANNELS_KEY = "core:num_channels" 117 | HASH_KEY = "core:sha512" 118 | VERSION_KEY = "core:version" 119 | DATATYPE_KEY = "core:datatype" 120 | FREQUENCY_KEY = "core:frequency" 121 | HEADER_BYTES_KEY = "core:header_bytes" 122 | FLO_KEY = "core:freq_lower_edge" 123 | FHI_KEY = "core:freq_upper_edge" 124 | SAMPLE_RATE_KEY = "core:sample_rate" 125 | COMMENT_KEY = "core:comment" 126 | DESCRIPTION_KEY = "core:description" 127 | AUTHOR_KEY = "core:author" 128 | META_DOI_KEY = "core:meta-doi" 129 | DATA_DOI_KEY = "core:data-doi" 130 | GENERATOR_KEY = "core:generator" 131 | LABEL_KEY = "core:label" 132 | RECORDER_KEY = "core:recorder" 133 | LICENSE_KEY = "core:license" 134 | HW_KEY = "core:hw" 135 | DATASET_KEY = "core:dataset" 136 | TRAILING_BYTES_KEY = "core:trailing_bytes" 137 | METADATA_ONLY_KEY = "core:metadata_only" 138 | EXTENSIONS_KEY = "core:extensions" 139 | DATETIME_KEY = "core:datetime" 140 | LAT_KEY = "core:latitude" 141 | LON_KEY = "core:longitude" 142 | UUID_KEY = "core:uuid" 143 | GEOLOCATION_KEY = "core:geolocation" 144 | COLLECTION_KEY = "core:collection" 145 | GLOBAL_KEY = "global" 146 | CAPTURE_KEY = "captures" 147 | ANNOTATION_KEY = "annotations" 148 | VALID_GLOBAL_KEYS = [ 149 | AUTHOR_KEY, COLLECTION_KEY, DATASET_KEY, DATATYPE_KEY, DATA_DOI_KEY, DESCRIPTION_KEY, EXTENSIONS_KEY, 150 | GEOLOCATION_KEY, HASH_KEY, HW_KEY, LICENSE_KEY, META_DOI_KEY, METADATA_ONLY_KEY, NUM_CHANNELS_KEY, RECORDER_KEY, 151 | SAMPLE_RATE_KEY, START_OFFSET_KEY, TRAILING_BYTES_KEY, VERSION_KEY 152 | ] 153 | VALID_CAPTURE_KEYS = [DATETIME_KEY, FREQUENCY_KEY, HEADER_BYTES_KEY, GLOBAL_INDEX_KEY, START_INDEX_KEY] 154 | VALID_ANNOTATION_KEYS = [ 155 | COMMENT_KEY, FHI_KEY, FLO_KEY, GENERATOR_KEY, LABEL_KEY, LAT_KEY, LENGTH_INDEX_KEY, LON_KEY, START_INDEX_KEY, 156 | UUID_KEY 157 | ] 158 | VALID_KEYS = {GLOBAL_KEY: VALID_GLOBAL_KEYS, CAPTURE_KEY: VALID_CAPTURE_KEYS, ANNOTATION_KEY: VALID_ANNOTATION_KEYS} 159 | 160 | def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True): 161 | """ 162 | API for SigMF I/O 163 | 164 | Parameters 165 | ---------- 166 | metadata: str or dict, optional 167 | Metadata for associated dataset. 168 | data_file: str, optional 169 | Path to associated dataset. 170 | global_info: dict, optional 171 | Set global field shortcut if creating new object. 172 | skip_checksum: bool, default False 173 | When True will skip calculating hash on data_file (if present) to check against metadata. 174 | map_readonly: bool, default True 175 | Indicates whether assignments on the numpy.memmap are allowed. 176 | """ 177 | super().__init__() 178 | self.data_file = None 179 | self.sample_count = 0 180 | self._memmap = None 181 | self.is_complex_data = False # numpy.iscomplexobj(self._memmap) is not adequate for fixed-point complex case 182 | 183 | self.set_metadata(metadata) 184 | if global_info is not None: 185 | self.set_global_info(global_info) 186 | if data_file is not None: 187 | self.set_data_file(data_file, skip_checksum=skip_checksum, map_readonly=map_readonly) 188 | 189 | def __len__(self): 190 | return self._memmap.shape[0] 191 | 192 | def __eq__(self, other): 193 | """ 194 | Define equality between two `SigMFFile`s. 195 | 196 | Rely on the checksum value in the metadata to decide whether `data_file` is the same since the path of the 197 | dataset is immaterial to equivalency. 198 | """ 199 | if isinstance(other, SigMFFile): 200 | return self._metadata == other._metadata 201 | return False 202 | 203 | def __next__(self): 204 | """get next batch of samples""" 205 | if self.iter_position < len(self): 206 | # normal batch 207 | value = self.read_samples(start_index=self.iter_position, count=1) 208 | self.iter_position += 1 209 | return value 210 | 211 | else: 212 | # no more data 213 | raise StopIteration 214 | 215 | def __getitem__(self, sli): 216 | mem = self._memmap[sli] # matches behavior of numpy.ndarray.__getitem__() 217 | 218 | if self._return_type is None: 219 | return mem 220 | 221 | # is_fixed_point and is_complex 222 | if self._memmap.ndim == 2: 223 | # num_channels == 1 224 | ray = mem[:, 0].astype(self._return_type) + 1.0j * mem[:, 1].astype(self._return_type) 225 | elif self._memmap.ndim == 3: 226 | # num_channels > 1 227 | ray = mem[:, :, 0].astype(self._return_type) + 1.0j * mem[:, :, 1].astype(self._return_type) 228 | else: 229 | raise ValueError("unhandled ndim in SigMFFile.__getitem__(); this shouldn't happen") 230 | return ray[0] if isinstance(sli, int) else ray # return element instead of 1-element array 231 | 232 | def _get_start_offset(self): 233 | """ 234 | Return the offset of the first sample. 235 | """ 236 | return self.get_global_field(self.START_OFFSET_KEY, 0) 237 | 238 | def get_num_channels(self): 239 | """Returns integer number of channels if present, otherwise 1""" 240 | return self.get_global_field(self.NUM_CHANNELS_KEY, 1) 241 | 242 | def _is_conforming_dataset(self): 243 | """ 244 | The dataset is non-conforming if the datafile contains non-sample bytes 245 | which means global trailing_bytes field is zero or not set, all captures 246 | `header_bytes` fields are zero or not set. Because we do not necessarily 247 | know the filename no means of verifying the meta/data filename roots 248 | match, but this will also check that a data file exists. 249 | 250 | Returns 251 | ------- 252 | `True` if the dataset is conforming to SigMF, `False` otherwise 253 | """ 254 | if self.get_global_field(self.TRAILING_BYTES_KEY, 0): 255 | return False 256 | for capture in self.get_captures(): 257 | # check for any non-zero `header_bytes` fields in captures segments 258 | if capture.get(self.HEADER_BYTES_KEY, 0): 259 | return False 260 | if self.data_file is not None and not self.data_file.is_file: 261 | return False 262 | # if we get here, the file exists and is conforming 263 | return True 264 | 265 | def get_schema(self): 266 | """ 267 | Return a schema object valid for the current metadata 268 | """ 269 | current_metadata_version = self.get_global_info().get(self.VERSION_KEY) 270 | if self.version != current_metadata_version or self.schema is None: 271 | self.version = current_metadata_version 272 | self.schema = schema.get_schema(self.version) 273 | assert isinstance(self.schema, dict) 274 | return self.schema 275 | 276 | def set_metadata(self, metadata): 277 | """ 278 | Read provided metadata as either None (empty), string, bytes, or dictionary. 279 | """ 280 | if metadata is None: 281 | # Create empty 282 | self._metadata = {self.GLOBAL_KEY: {}, self.CAPTURE_KEY: [], self.ANNOTATION_KEY: []} 283 | elif isinstance(metadata, dict): 284 | self._metadata = metadata 285 | elif isinstance(metadata, (str, bytes)): 286 | self._metadata = json.loads(metadata) 287 | else: 288 | raise SigMFError("Unable to interpret provided metadata.") 289 | 290 | # if num_channels missing, default to 1 291 | if self.get_global_field(self.NUM_CHANNELS_KEY) is None: 292 | self.set_global_field(self.NUM_CHANNELS_KEY, 1) 293 | 294 | # set version to current implementation 295 | self.set_global_field(self.VERSION_KEY, __specification__) 296 | 297 | def set_global_info(self, new_global): 298 | """ 299 | Recursively override existing global metadata with new global metadata. 300 | """ 301 | self._metadata[self.GLOBAL_KEY] = dict_merge(self._metadata[self.GLOBAL_KEY], new_global) 302 | 303 | def get_global_info(self): 304 | """ 305 | Returns a dictionary with all the global info. 306 | """ 307 | try: 308 | return self._metadata.get(self.GLOBAL_KEY, {}) 309 | except AttributeError: 310 | return {} 311 | 312 | def set_global_field(self, key, value): 313 | """ 314 | Inserts a value into the global field. 315 | """ 316 | self._metadata[self.GLOBAL_KEY][key] = value 317 | 318 | def get_global_field(self, key, default=None): 319 | """ 320 | Return a field from the global info, or default if the field is not set. 321 | """ 322 | return self._metadata[self.GLOBAL_KEY].get(key, default) 323 | 324 | def add_capture(self, start_index, metadata=None): 325 | """ 326 | Insert capture info for sample starting at start_index. 327 | If there is already capture info for this index, metadata will be merged 328 | with the existing metadata, overwriting keys if they were previously set. 329 | """ 330 | assert start_index >= self._get_start_offset() 331 | capture_list = self._metadata[self.CAPTURE_KEY] 332 | new_capture = metadata or {} 333 | new_capture[self.START_INDEX_KEY] = start_index 334 | # merge if capture exists 335 | merged = False 336 | for existing_capture in self._metadata[self.CAPTURE_KEY]: 337 | if existing_capture[self.START_INDEX_KEY] == start_index: 338 | existing_capture = dict_merge(existing_capture, new_capture) 339 | merged = True 340 | if not merged: 341 | capture_list += [new_capture] 342 | # sort captures by start_index 343 | self._metadata[self.CAPTURE_KEY] = sorted( 344 | capture_list, 345 | key=lambda item: item[self.START_INDEX_KEY], 346 | ) 347 | 348 | def get_captures(self): 349 | """ 350 | Returns a list of dictionaries representing all captures. 351 | """ 352 | return self._metadata.get(self.CAPTURE_KEY, []) 353 | 354 | def get_capture_info(self, index): 355 | """ 356 | Returns a dictionary containing all the capture information at sample 357 | 'index'. 358 | """ 359 | assert index >= self._get_start_offset() 360 | captures = self._metadata.get(self.CAPTURE_KEY, []) 361 | assert len(captures) > 0 362 | cap_info = captures[0] 363 | for capture in captures: 364 | if capture[self.START_INDEX_KEY] > index: 365 | break 366 | cap_info = capture 367 | return cap_info 368 | 369 | def get_capture_start(self, index): 370 | """ 371 | Returns a the start sample index of a given capture, will raise 372 | SigMFAccessError if this field is missing. 373 | """ 374 | start = self.get_captures()[index].get(self.START_INDEX_KEY) 375 | if start is None: 376 | raise SigMFAccessError("Capture {} does not have required {} key".format(index, self.START_INDEX_KEY)) 377 | return start 378 | 379 | def get_capture_byte_boundarys(self, index): 380 | """ 381 | Returns a tuple of the file byte range in a dataset of a given SigMF 382 | capture of the form [start, stop). This function works on either 383 | compliant or noncompliant SigMF Recordings. 384 | """ 385 | if index >= len(self.get_captures()): 386 | raise SigMFAccessError( 387 | "Invalid captures index {} (only {} captures in Recording)".format(index, len(self.get_captures())) 388 | ) 389 | 390 | start_byte = 0 391 | prev_start_sample = 0 392 | for ii, capture in enumerate(self.get_captures()): 393 | start_byte += capture.get(self.HEADER_BYTES_KEY, 0) 394 | start_byte += ( 395 | (self.get_capture_start(ii) - prev_start_sample) * self.get_sample_size() * self.get_num_channels() 396 | ) 397 | prev_start_sample = self.get_capture_start(ii) 398 | if ii >= index: 399 | break 400 | 401 | end_byte = start_byte 402 | if index == len(self.get_captures()) - 1: # last captures...data is the rest of the file 403 | end_byte = self.data_file.stat().st_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0) 404 | else: 405 | end_byte += ( 406 | (self.get_capture_start(index + 1) - self.get_capture_start(index)) 407 | * self.get_sample_size() 408 | * self.get_num_channels() 409 | ) 410 | return (start_byte, end_byte) 411 | 412 | def add_annotation(self, start_index, length=None, metadata=None): 413 | """ 414 | Insert annotation at start_index with length (if != None). 415 | """ 416 | assert start_index >= self._get_start_offset() 417 | 418 | new_annot = metadata or {} 419 | new_annot[self.START_INDEX_KEY] = start_index 420 | if length is not None: 421 | assert length >= 1 422 | new_annot[self.LENGTH_INDEX_KEY] = length 423 | 424 | self._metadata[self.ANNOTATION_KEY] += [new_annot] 425 | # sort annotations by start_index 426 | self._metadata[self.ANNOTATION_KEY] = sorted( 427 | self._metadata[self.ANNOTATION_KEY], 428 | key=lambda item: item[self.START_INDEX_KEY], 429 | ) 430 | 431 | def get_annotations(self, index=None): 432 | """ 433 | Get relevant annotations from metadata. 434 | 435 | Parameters 436 | ---------- 437 | index : int, default None 438 | If provided returns all annotations that include this sample index. 439 | When omitted returns all annotations. 440 | 441 | Returns 442 | ------- 443 | list of dict 444 | Each dictionary contains one annotation for the sample at `index`. 445 | """ 446 | annotations = self._metadata.get(self.ANNOTATION_KEY, []) 447 | if index is None: 448 | return annotations 449 | 450 | annotations_including_index = [] 451 | for annotation in annotations: 452 | if index < annotation[self.START_INDEX_KEY]: 453 | # index is before annotation starts -> skip 454 | continue 455 | if self.LENGTH_INDEX_KEY in annotation: 456 | # Annotation includes sample_count -> check end index 457 | if index >= annotation[self.START_INDEX_KEY] + annotation[self.LENGTH_INDEX_KEY]: 458 | # index is after annotation end -> skip 459 | continue 460 | 461 | annotations_including_index.append(annotation) 462 | return annotations_including_index 463 | 464 | def get_sample_size(self): 465 | """ 466 | Determines the size of a sample, in bytes, from the datatype of this set. 467 | For complex data, a 'sample' includes both the real and imaginary part. 468 | """ 469 | return dtype_info(self.get_global_field(self.DATATYPE_KEY))["sample_size"] 470 | 471 | def _count_samples(self): 472 | """ 473 | Count, set, and return the total number of samples in the data file. 474 | If there is no data file but there are annotations, use the sample_count 475 | from the annotation with the highest end index. If there are no annotations, 476 | use 0. 477 | For complex data, a 'sample' includes both the real and imaginary part. 478 | """ 479 | if self.data_file is None: 480 | sample_count = self._get_sample_count_from_annotations() 481 | else: 482 | header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()]) 483 | file_size = self.data_file.stat().st_size if self.data_size_bytes is None else self.data_size_bytes 484 | file_data_size = file_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes # bytes 485 | sample_size = self.get_sample_size() # size of a sample in bytes 486 | num_channels = self.get_num_channels() 487 | sample_count = file_data_size // sample_size // num_channels 488 | if file_data_size % (sample_size * num_channels) != 0: 489 | warnings.warn( 490 | f"File `{self.data_file}` does not contain an integer number of samples across channels. " 491 | "It may be invalid data." 492 | ) 493 | if self._get_sample_count_from_annotations() > sample_count: 494 | warnings.warn( 495 | f"File `{self.data_file}` ends before the final annotation in the corresponding SigMF metadata." 496 | ) 497 | self.sample_count = sample_count 498 | return sample_count 499 | 500 | def _get_sample_count_from_annotations(self): 501 | """ 502 | Returns the number of samples based on annotation with highest end index. 503 | NOTE: Annotations are ordered by START_INDEX_KEY and not end index, so we 504 | need to go through all annotations 505 | """ 506 | annon_sample_count = [] 507 | for annon in self.get_annotations(): 508 | if self.LENGTH_INDEX_KEY in annon: 509 | # Annotation with sample_count 510 | annon_sample_count.append(annon[self.START_INDEX_KEY] + annon[self.LENGTH_INDEX_KEY]) 511 | else: 512 | # Annotation without sample_count - sample count must be at least sample_start 513 | annon_sample_count.append(annon[self.START_INDEX_KEY]) 514 | 515 | if annon_sample_count: 516 | return max(annon_sample_count) 517 | else: 518 | return 0 519 | 520 | def calculate_hash(self): 521 | """ 522 | Calculates the hash of the data file and adds it to the global section. 523 | Also returns a string representation of the hash. 524 | """ 525 | old_hash = self.get_global_field(self.HASH_KEY) 526 | if self.data_file is not None: 527 | new_hash = sigmf_hash.calculate_sha512( 528 | filename=self.data_file, 529 | offset=self.data_offset, 530 | size=self.data_size_bytes, 531 | ) 532 | else: 533 | new_hash = sigmf_hash.calculate_sha512( 534 | fileobj=self.data_buffer, 535 | offset=self.data_offset, 536 | size=self.data_size_bytes, 537 | ) 538 | if old_hash is not None: 539 | if old_hash != new_hash: 540 | raise SigMFFileError("Calculated file hash does not match associated metadata.") 541 | 542 | self.set_global_field(self.HASH_KEY, new_hash) 543 | return new_hash 544 | 545 | def set_data_file( 546 | self, data_file=None, data_buffer=None, skip_checksum=False, offset=0, size_bytes=None, map_readonly=True 547 | ): 548 | """ 549 | Set the datafile path, then recalculate sample count. If not skipped, 550 | update the hash and return the hash string. 551 | """ 552 | if self.get_global_field(self.DATATYPE_KEY) is None: 553 | raise SigMFFileError("Error setting data file, the DATATYPE_KEY must be set in the global metadata first.") 554 | 555 | self.data_file = Path(data_file) if data_file else None 556 | self.data_buffer = data_buffer 557 | self.data_offset = offset 558 | self.data_size_bytes = size_bytes 559 | self._count_samples() 560 | 561 | dtype = dtype_info(self.get_global_field(self.DATATYPE_KEY)) 562 | self.is_complex_data = dtype["is_complex"] 563 | num_channels = self.get_num_channels() 564 | self.ndim = 1 if (num_channels < 2) else 2 565 | 566 | complex_int_separates = dtype["is_complex"] and dtype["is_fixedpoint"] 567 | mapped_dtype_size = dtype["component_size"] if complex_int_separates else dtype["sample_size"] 568 | mapped_length = None if size_bytes is None else size_bytes // mapped_dtype_size 569 | mapped_reshape = (-1,) # we can't use -1 in mapped_length ... 570 | if num_channels > 1: 571 | mapped_reshape = mapped_reshape + (num_channels,) 572 | if complex_int_separates: 573 | # There is no corresponding numpy type, so we'll have to add another axis, with length of 2 574 | mapped_reshape = mapped_reshape + (2,) 575 | self._return_type = dtype["memmap_convert_type"] 576 | common_args = {"dtype": dtype["memmap_map_type"], "offset": offset} 577 | try: 578 | if self.data_file is not None: 579 | open_mode = "r" if map_readonly else "r+" 580 | memmap_shape = None if mapped_length is None else (mapped_length,) 581 | raveled = np.memmap(self.data_file, mode=open_mode, shape=memmap_shape, **common_args) 582 | elif self.data_buffer is not None: 583 | buffer_count = -1 if mapped_length is None else mapped_length 584 | raveled = np.frombuffer(self.data_buffer.getbuffer(), count=buffer_count, **common_args) 585 | else: 586 | raise SigMFFileError("In sigmffile.set_data_file(), either data_file or data_buffer must be not None") 587 | except SigMFFileError: # TODO include likely exceptions here 588 | warnings.warn("Failed to create data array from memory-map-file or buffer!") 589 | else: 590 | self._memmap = raveled.reshape(mapped_reshape) 591 | self.shape = self._memmap.shape if (self._return_type is None) else self._memmap.shape[:-1] 592 | 593 | if self.data_file is not None: 594 | file_name = self.data_file.name 595 | ext = self.data_file.suffix 596 | if ext.lower() != SIGMF_DATASET_EXT: 597 | self.set_global_field(SigMFFile.DATASET_KEY, file_name) 598 | 599 | if skip_checksum: 600 | return None 601 | return self.calculate_hash() 602 | 603 | def validate(self): 604 | """ 605 | Check schema and throw error if issue. 606 | """ 607 | version = self.get_global_field(self.VERSION_KEY) 608 | validate.validate(self._metadata, self.get_schema()) 609 | 610 | def archive(self, name=None, fileobj=None): 611 | """Dump contents to SigMF archive format. 612 | 613 | `name` and `fileobj` are passed to SigMFArchive and are defined there. 614 | 615 | """ 616 | archive = SigMFArchive(self, name, fileobj) 617 | return archive.path 618 | 619 | def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False): 620 | """ 621 | Write metadata file or full archive containing metadata & dataset. 622 | 623 | Parameters 624 | ---------- 625 | file_path : string 626 | Location to save. 627 | pretty : bool, default True 628 | When True will write more human-readable output, otherwise will be flat JSON. 629 | toarchive : bool, default False 630 | If True will write both dataset & metadata into SigMF archive format as a single `tar` file. 631 | If False will only write metadata to `sigmf-meta`. 632 | """ 633 | if not skip_validate: 634 | self.validate() 635 | fns = get_sigmf_filenames(file_path) 636 | if toarchive: 637 | self.archive(fns["archive_fn"]) 638 | else: 639 | with open(fns["meta_fn"], "w") as fp: 640 | self.dump(fp, pretty=pretty) 641 | fp.write("\n") # text files should end in carriage return 642 | 643 | def read_samples_in_capture(self, index=0, autoscale=True): 644 | """ 645 | Reads samples from the specified captures segment in its entirety. 646 | 647 | Parameters 648 | ---------- 649 | index : int, default 0 650 | Captures segment to read samples from. 651 | autoscale : bool, default True 652 | If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0) 653 | 654 | Returns 655 | ------- 656 | data : ndarray 657 | Samples are returned as an array of float or complex, with number of dimensions equal to NUM_CHANNELS_KEY. 658 | """ 659 | cb = self.get_capture_byte_boundarys(index) 660 | if (cb[1] - cb[0]) % (self.get_sample_size() * self.get_num_channels()): 661 | warnings.warn( 662 | f"Capture `{index}` in `{self.data_file}` does not contain " 663 | "an integer number of samples across channels. It may be invalid." 664 | ) 665 | 666 | return self._read_datafile(cb[0], (cb[1] - cb[0]) // self.get_sample_size(), autoscale, False) 667 | 668 | def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=False): 669 | """ 670 | Reads the specified number of samples starting at the specified index from the associated data file. 671 | 672 | Parameters 673 | ---------- 674 | start_index : int, default 0 675 | Starting sample index from which to read. 676 | count : int, default -1 677 | Number of samples to read. -1 will read whole file. 678 | autoscale : bool, default True 679 | If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0) 680 | raw_components : bool, default False 681 | If True read and return the sample components (individual I & Q for complex, samples for real) 682 | with no conversions or interleaved channels. 683 | 684 | Returns 685 | ------- 686 | data : ndarray 687 | Samples are returned as an array of float or complex, with number of dimensions equal to NUM_CHANNELS_KEY. 688 | """ 689 | if count == 0: 690 | raise IOError("Number of samples must be greater than zero, or -1 for all samples.") 691 | elif start_index + count > self.sample_count: 692 | raise IOError("Cannot read beyond EOF.") 693 | if self.data_file is None and not isinstance(self.data_buffer, io.BytesIO): 694 | if self.get_global_field(self.METADATA_ONLY_KEY, False): 695 | # only if data_file is `None` allows access to dynamically generated datsets 696 | raise SigMFFileError("Cannot read samples from a metadata only distribution.") 697 | else: 698 | raise SigMFFileError("No signal data file has been associated with the metadata.") 699 | first_byte = start_index * self.get_sample_size() * self.get_num_channels() 700 | 701 | if not self._is_conforming_dataset(): 702 | warnings.warn(f"Recording dataset appears non-compliant, resulting data may be erroneous") 703 | return self._read_datafile(first_byte, count * self.get_num_channels(), autoscale, False) 704 | 705 | def _read_datafile(self, first_byte, nitems, autoscale, raw_components): 706 | """ 707 | internal function for reading samples from datafile 708 | """ 709 | dtype = dtype_info(self.get_global_field(self.DATATYPE_KEY)) 710 | self.is_complex_data = dtype["is_complex"] 711 | is_fixedpoint_data = dtype["is_fixedpoint"] 712 | is_unsigned_data = dtype["is_unsigned"] 713 | data_type_in = dtype["sample_dtype"] 714 | component_type_in = dtype["component_dtype"] 715 | component_size = dtype["component_size"] 716 | 717 | data_type_out = np.dtype("f4") if not self.is_complex_data else np.dtype("f4, f4") 718 | num_channels = self.get_num_channels() 719 | 720 | if self.data_file is not None: 721 | fp = open(self.data_file, "rb") 722 | fp.seek(first_byte, 0) 723 | data = np.fromfile(fp, dtype=data_type_in, count=nitems) 724 | elif self.data_buffer is not None: 725 | data = np.frombuffer(self.data_buffer.getbuffer(), dtype=data_type_in, count=nitems) 726 | else: 727 | data = self._memmap 728 | 729 | if num_channels != 1: 730 | # return reshaped view for num_channels 731 | # first dimension will be double size if `is_complex_data` 732 | data = data.reshape(data.shape[0] // num_channels, num_channels) 733 | if not raw_components: 734 | data = data.astype(data_type_out) 735 | if autoscale and is_fixedpoint_data: 736 | data = data.view(np.dtype("f4")) 737 | if is_unsigned_data: 738 | data -= 2 ** (component_size * 8 - 1) 739 | data *= 2 ** -(component_size * 8 - 1) 740 | data = data.view(data_type_out) 741 | if self.is_complex_data: 742 | data = data.view(np.complex64) 743 | else: 744 | data = data.view(component_type_in) 745 | 746 | if self.data_file is not None: 747 | fp.close() 748 | 749 | return data 750 | 751 | 752 | class SigMFCollection(SigMFMetafile): 753 | VERSION_KEY = "core:version" 754 | DESCRIPTION_KEY = "core:description" 755 | AUTHOR_KEY = "core:author" 756 | COLLECTION_DOI_KEY = "core:collection_doi" 757 | LICENSE_KEY = "core:license" 758 | EXTENSIONS_KEY = "core:extensions" 759 | STREAMS_KEY = "core:streams" 760 | COLLECTION_KEY = "collection" 761 | VALID_COLLECTION_KEYS = [ 762 | AUTHOR_KEY, 763 | COLLECTION_DOI_KEY, 764 | DESCRIPTION_KEY, 765 | EXTENSIONS_KEY, 766 | LICENSE_KEY, 767 | STREAMS_KEY, 768 | VERSION_KEY, 769 | ] 770 | VALID_KEYS = {COLLECTION_KEY: VALID_COLLECTION_KEYS} 771 | 772 | def __init__(self, metafiles: list = None, metadata: dict = None, base_path=None, skip_checksums: bool = False) -> None: 773 | """ 774 | Create a SigMF Collection object. 775 | 776 | Parameters 777 | ---------- 778 | metafiles: list, optional 779 | A list of SigMF metadata filenames objects comprising the Collection. 780 | There should be at least one file. 781 | metadata: dict, optional 782 | Collection metadata to use, if not provided this will populate a minimal set of default metadata. 783 | The `core:streams` field will be regenerated automatically. 784 | base_path : str | bytes | PathLike, optional 785 | Base path of the collection recordings. 786 | skip_checksums : bool, optional 787 | If true will skip calculating checksum on datasets. 788 | 789 | Raises 790 | ------ 791 | SigMFError 792 | If metadata files do not exist. 793 | """ 794 | super().__init__() 795 | self.skip_checksums = skip_checksums 796 | 797 | if base_path is None: 798 | self.base_path = Path("") 799 | else: 800 | self.base_path = Path(base_path) 801 | 802 | if metadata is None: 803 | self._metadata = {self.COLLECTION_KEY: {}} 804 | self._metadata[self.COLLECTION_KEY][self.STREAMS_KEY] = [] 805 | else: 806 | self._metadata = metadata 807 | 808 | if metafiles is None: 809 | self.metafiles = [] 810 | else: 811 | self.set_streams(metafiles) 812 | 813 | # set version to current implementation 814 | self.set_collection_field(self.VERSION_KEY, __specification__) 815 | 816 | if not self.skip_checksums: 817 | self.verify_stream_hashes() 818 | 819 | def __len__(self) -> int: 820 | """ 821 | The length of a collection is the number of streams. 822 | """ 823 | return len(self.get_stream_names()) 824 | 825 | def verify_stream_hashes(self) -> None: 826 | """ 827 | Compares the stream hashes in the collection metadata to the metadata files. 828 | 829 | Raises 830 | ------ 831 | SigMFFileError 832 | If any dataset checksums do not match saved metadata. 833 | """ 834 | streams = self.get_collection_field(self.STREAMS_KEY, []) 835 | for stream in streams: 836 | old_hash = stream.get("hash") 837 | metafile_name = get_sigmf_filenames(stream.get("name"))["meta_fn"] 838 | metafile_path = self.base_path / metafile_name 839 | if Path.is_file(metafile_path): 840 | new_hash = sigmf_hash.calculate_sha512(filename=metafile_path) 841 | if old_hash != new_hash: 842 | raise SigMFFileError( 843 | f"Calculated file hash for {metafile_path} does not match collection metadata." 844 | ) 845 | 846 | def set_streams(self, metafiles) -> None: 847 | """ 848 | Configures the collection `core:streams` field from the specified list of metafiles. 849 | """ 850 | self.metafiles = metafiles 851 | streams = [] 852 | for metafile in self.metafiles: 853 | metafile_path = self.base_path / metafile 854 | if metafile.endswith(".sigmf-meta") and Path.is_file(metafile_path): 855 | stream = { 856 | # name must be string here to be serializable later 857 | "name": str(get_sigmf_filenames(metafile)["base_fn"]), 858 | "hash": sigmf_hash.calculate_sha512(filename=metafile_path), 859 | } 860 | streams.append(stream) 861 | else: 862 | raise SigMFFileError(f"Specifed stream file {metafile_path} is not a valid SigMF Metadata file") 863 | self.set_collection_field(self.STREAMS_KEY, streams) 864 | 865 | def get_stream_names(self) -> list: 866 | """ 867 | Returns a list of `name` object(s) from the `collection` level `core:streams` metadata. 868 | """ 869 | return [s.get("name") for s in self.get_collection_field(self.STREAMS_KEY, [])] 870 | 871 | def set_collection_info(self, new_collection: dict) -> None: 872 | """ 873 | Overwrite the collection info with a new dictionary. 874 | """ 875 | self._metadata[self.COLLECTION_KEY] = new_collection.copy() 876 | 877 | def get_collection_info(self) -> dict: 878 | """ 879 | Returns a dictionary with all the collection info. 880 | """ 881 | try: 882 | return self._metadata.get(self.COLLECTION_KEY, {}) 883 | except AttributeError: 884 | return {} 885 | 886 | def set_collection_field(self, key: str, value) -> None: 887 | """ 888 | Inserts a value into the collection field. 889 | """ 890 | self._metadata[self.COLLECTION_KEY][key] = value 891 | 892 | def get_collection_field(self, key: str, default=None): 893 | """ 894 | Return a field from the collection info, or default if the field is not set. 895 | """ 896 | return self._metadata[self.COLLECTION_KEY].get(key, default) 897 | 898 | def tofile(self, file_path, pretty: bool = True) -> None: 899 | """ 900 | Write metadata file 901 | 902 | Parameters 903 | ---------- 904 | file_path : string 905 | Location to save. 906 | pretty : bool, default True 907 | When True will write more human-readable output, otherwise will be flat JSON. 908 | """ 909 | filenames = get_sigmf_filenames(file_path) 910 | with open(filenames["collection_fn"], "w") as handle: 911 | self.dump(handle, pretty=pretty) 912 | handle.write("\n") # text files should end in carriage return 913 | 914 | def get_SigMFFile(self, stream_name=None, stream_index=None): 915 | """ 916 | Returns the SigMFFile instance of the specified stream if it exists 917 | """ 918 | if stream_name is not None and stream_name not in self.get_stream_names(): 919 | # invalid stream name 920 | return 921 | if stream_index is not None and stream_index < len(self): 922 | stream_name = self.get_stream_names()[stream_index] 923 | if stream_name is not None: 924 | metafile = get_sigmf_filenames(stream_name)["meta_fn"] 925 | metafile_path = self.base_path / metafile 926 | return fromfile(metafile_path, skip_checksum=self.skip_checksums) 927 | 928 | 929 | def dtype_info(datatype): 930 | """ 931 | Parses a datatype string conforming to the SigMF spec and returns a dict 932 | of values describing the format. 933 | 934 | Keyword arguments: 935 | datatype -- a SigMF-compliant datatype string 936 | """ 937 | if datatype is None: 938 | raise SigMFFileError("Invalid datatype 'None'.") 939 | output_info = {} 940 | dtype = datatype.lower() 941 | 942 | is_unsigned_data = "u" in datatype 943 | is_complex_data = "c" in datatype 944 | is_fixedpoint_data = "f" not in datatype 945 | 946 | dtype = datatype.lower().split("_") 947 | 948 | byte_order = "" 949 | if len(dtype) == 2: 950 | if dtype[1][0] == "l": 951 | byte_order = "<" 952 | elif dtype[1][0] == "b": 953 | byte_order = ">" 954 | else: 955 | raise SigMFFileError("Unrecognized endianness specifier: '{}'".format(dtype[1])) 956 | dtype = dtype[0] 957 | if "64" in dtype: 958 | sample_size = 8 959 | elif "32" in dtype: 960 | sample_size = 4 961 | elif "16" in dtype: 962 | sample_size = 2 963 | elif "8" in dtype: 964 | sample_size = 1 965 | else: 966 | raise SigMFFileError("Unrecognized datatype: '{}'".format(dtype)) 967 | component_size = sample_size 968 | if is_complex_data: 969 | sample_size *= 2 970 | sample_size = int(sample_size) 971 | 972 | data_type_str = byte_order 973 | data_type_str += "f" if not is_fixedpoint_data else "u" if is_unsigned_data else "i" 974 | data_type_str += str(component_size) 975 | 976 | memmap_convert_type = None 977 | if is_complex_data: 978 | data_type_str = ",".join((data_type_str, data_type_str)) 979 | memmap_map_type = byte_order 980 | if is_fixedpoint_data: 981 | memmap_map_type += ("u" if is_unsigned_data else "i") + str(component_size) 982 | memmap_convert_type = byte_order + "c8" 983 | else: 984 | memmap_map_type += "c" + str(sample_size) 985 | else: 986 | memmap_map_type = data_type_str 987 | 988 | data_type_in = np.dtype(data_type_str) 989 | output_info["sample_dtype"] = data_type_in 990 | output_info["component_dtype"] = data_type_in["f0"] if is_complex_data else data_type_in 991 | output_info["sample_size"] = sample_size 992 | output_info["component_size"] = component_size 993 | output_info["is_complex"] = is_complex_data 994 | output_info["is_unsigned"] = is_unsigned_data 995 | output_info["is_fixedpoint"] = is_fixedpoint_data 996 | output_info["memmap_map_type"] = memmap_map_type 997 | output_info["memmap_convert_type"] = memmap_convert_type 998 | return output_info 999 | 1000 | 1001 | def get_dataset_filename_from_metadata(meta_fn, metadata=None): 1002 | """ 1003 | Parse provided metadata and return the expected data filename. 1004 | 1005 | In the case of a metadata-only distribution, or if the file does not exist, 1006 | this will return ``None``. 1007 | 1008 | Priority for conflicting datasets: 1009 | 1010 | 1. Use the file named ``.SIGMF_DATASET_EXT`` if it exists. 1011 | 2. Use the file in the ``DATASET_KEY`` field (non-compliant dataset) if it exists. 1012 | 3. Return ``None`` (may be a metadata-only distribution). 1013 | """ 1014 | compliant_filename = get_sigmf_filenames(meta_fn)["data_fn"] 1015 | noncompliant_filename = metadata["global"].get(SigMFFile.DATASET_KEY, None) 1016 | 1017 | if Path.is_file(compliant_filename): 1018 | if noncompliant_filename: 1019 | warnings.warn( 1020 | f"Compliant Dataset `{compliant_filename}` exists but " 1021 | f"{SigMFFile.DATASET_KEY} is also defined; using `{compliant_filename}`" 1022 | ) 1023 | return compliant_filename 1024 | 1025 | elif noncompliant_filename: 1026 | dir_path = Path(meta_fn).parent 1027 | noncompliant_data_file_path = Path.joinpath(dir_path, noncompliant_filename) 1028 | if Path.is_file(noncompliant_data_file_path): 1029 | if metadata["global"].get(SigMFFile.METADATA_ONLY_KEY, False): 1030 | raise SigMFFileError( 1031 | f"Schema defines {SigMFFile.DATASET_KEY} " 1032 | f"but {SigMFFile.METADATA_ONLY_KEY} also exists; using `{noncompliant_filename}`" 1033 | ) 1034 | return noncompliant_data_file_path 1035 | else: 1036 | raise SigMFFileError( 1037 | f"Non-Compliant Dataset `{noncompliant_filename}` is specified in {SigMFFile.DATASET_KEY} " 1038 | "but does not exist!" 1039 | ) 1040 | return None 1041 | 1042 | 1043 | def fromarchive(archive_path, dir=None, skip_checksum=False): 1044 | """Extract an archive and return a SigMFFile. 1045 | 1046 | The `dir` parameter is no longer used as this function has been changed to 1047 | access SigMF archives without extracting them. 1048 | """ 1049 | from .archivereader import SigMFArchiveReader 1050 | return SigMFArchiveReader(archive_path, skip_checksum=skip_checksum).sigmffile 1051 | 1052 | 1053 | def fromfile(filename, skip_checksum=False): 1054 | """ 1055 | Creates and returns a SigMFFile or SigMFCollection instance with metadata 1056 | loaded from the specified file. The filename may be that of either a 1057 | sigmf-meta file, a sigmf-data file, a sigmf-collection file, or a sigmf 1058 | archive. 1059 | 1060 | Parameters 1061 | ---------- 1062 | filename: str | bytes | PathLike 1063 | Path for SigMF Metadata, Dataset, Archive or Collection (with or without extension). 1064 | skip_checksum: bool, default False 1065 | When True will not read entire dataset to calculate hash. 1066 | 1067 | Returns 1068 | ------- 1069 | object 1070 | SigMFFile with dataset & metadata or a SigMFCollection depending on file type. 1071 | """ 1072 | fns = get_sigmf_filenames(filename) 1073 | meta_fn = fns["meta_fn"] 1074 | archive_fn = fns["archive_fn"] 1075 | collection_fn = fns["collection_fn"] 1076 | 1077 | # extract the extension to check whether we are dealing with an archive, collection, etc. 1078 | file_path = Path(filename) 1079 | ext = file_path.suffix 1080 | 1081 | if (ext.lower().endswith(SIGMF_ARCHIVE_EXT) or not Path.is_file(meta_fn)) and Path.is_file(archive_fn): 1082 | return fromarchive(archive_fn, skip_checksum=skip_checksum) 1083 | 1084 | if (ext.lower().endswith(SIGMF_COLLECTION_EXT) or not Path.is_file(meta_fn)) and Path.is_file(collection_fn): 1085 | collection_fp = open(collection_fn, "rb") 1086 | bytestream_reader = codecs.getreader("utf-8") 1087 | mdfile_reader = bytestream_reader(collection_fp) 1088 | metadata = json.load(mdfile_reader) 1089 | collection_fp.close() 1090 | 1091 | dir_path = meta_fn.parent 1092 | return SigMFCollection(metadata=metadata, base_path=dir_path, skip_checksums=skip_checksum) 1093 | 1094 | else: 1095 | meta_fp = open(meta_fn, "rb") 1096 | bytestream_reader = codecs.getreader("utf-8") 1097 | mdfile_reader = bytestream_reader(meta_fp) 1098 | metadata = json.load(mdfile_reader) 1099 | meta_fp.close() 1100 | 1101 | data_fn = get_dataset_filename_from_metadata(meta_fn, metadata) 1102 | return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum) 1103 | 1104 | 1105 | def get_sigmf_filenames(filename): 1106 | """ 1107 | Safely returns a set of SigMF file paths given an input filename. 1108 | 1109 | Parameters 1110 | ---------- 1111 | filename : str | bytes | PathLike 1112 | The SigMF filename with any extension. 1113 | 1114 | Returns 1115 | ------- 1116 | dict with 'data_fn', 'meta_fn', and 'archive_fn' as keys. 1117 | """ 1118 | stem_path = Path(filename) 1119 | # If the path has a sigmf suffix, remove it. Otherwise do not remove the 1120 | # suffix, because the filename might contain '.' characters which are part 1121 | # of the filename rather than an extension. 1122 | sigmf_suffixes = [ 1123 | SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, 1124 | SIGMF_ARCHIVE_EXT, SIGMF_COLLECTION_EXT, 1125 | ] 1126 | if stem_path.suffix in sigmf_suffixes: 1127 | with_suffix_path = stem_path 1128 | stem_path = stem_path.with_suffix("") 1129 | else: 1130 | # Add a dummy suffix to prevent the .with_suffix() calls below from 1131 | # overriding part of the filename which is interpreted as a suffix 1132 | with_suffix_path = stem_path.with_name(f"{stem_path.name}{SIGMF_DATASET_EXT}") 1133 | 1134 | return { 1135 | "base_fn": stem_path, 1136 | "data_fn": with_suffix_path.with_suffix(SIGMF_DATASET_EXT), 1137 | "meta_fn": with_suffix_path.with_suffix(SIGMF_METADATA_EXT), 1138 | "archive_fn": with_suffix_path.with_suffix(SIGMF_ARCHIVE_EXT), 1139 | "collection_fn": with_suffix_path.with_suffix(SIGMF_COLLECTION_EXT), 1140 | } 1141 | -------------------------------------------------------------------------------- /sigmf/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Utilities""" 8 | 9 | import re 10 | import sys 11 | from copy import deepcopy 12 | from datetime import datetime, timezone 13 | 14 | import numpy as np 15 | 16 | from .error import SigMFError 17 | 18 | SIGMF_DATETIME_ISO8601_FMT = "%Y-%m-%dT%H:%M:%S.%fZ" 19 | 20 | 21 | def get_sigmf_iso8601_datetime_now() -> str: 22 | """Get current UTC time as iso8601 string.""" 23 | return datetime.now(timezone.utc).strftime(SIGMF_DATETIME_ISO8601_FMT) 24 | 25 | 26 | def parse_iso8601_datetime(string: str) -> datetime: 27 | """ 28 | Parse an iso8601 string as a datetime struct. 29 | Input string (indicated by final Z) is in UTC tz. 30 | 31 | Example 32 | ------- 33 | >>> parse_iso8601_datetime("1955-11-05T06:15:00Z") 34 | datetime.datetime(1955, 11, 5, 6, 15, tzinfo=datetime.timezone.utc) 35 | """ 36 | match = re.match(r"^(?P
.*)(?P\.[0-9]{7,})Z$", string) 37 | if match: 38 | # string exceeds max precision allowed by strptime -> truncate to µs 39 | groups = match.groupdict() 40 | length = min(7, len(groups["frac"])) 41 | string = "".join([groups["dt"], groups["frac"][:length], "Z"]) 42 | 43 | if "." in string: 44 | # parse float seconds 45 | format_str = SIGMF_DATETIME_ISO8601_FMT 46 | else: 47 | # parse whole seconds 48 | format_str = SIGMF_DATETIME_ISO8601_FMT.replace(".%f", "") 49 | return datetime.strptime(string, format_str).replace(tzinfo=timezone.utc) 50 | 51 | 52 | def dict_merge(a_dict: dict, b_dict: dict) -> dict: 53 | """ 54 | Recursively merge `b_dict` into `a_dict`. 55 | `b_dict[key]` will overwrite `a_dict[key]` if it exists. 56 | 57 | Example 58 | ------- 59 | >>> a, b = {0:0, 1:2}, {1:3, 2:4} 60 | >>> dict_merge(a, b) 61 | {0: 0, 1: 3, 2: 4} 62 | """ 63 | if not isinstance(b_dict, dict): 64 | return b_dict 65 | result = deepcopy(a_dict) 66 | for key, value in b_dict.items(): 67 | if key in result and isinstance(result[key], dict): 68 | result[key] = dict_merge(result[key], value) 69 | else: 70 | result[key] = deepcopy(value) 71 | return result 72 | 73 | 74 | def get_endian_str(ray: np.ndarray) -> str: 75 | """Return SigMF compatible endianness string for a numpy array""" 76 | if not isinstance(ray, np.ndarray): 77 | raise SigMFError("Argument must be a numpy array") 78 | atype = ray.dtype 79 | 80 | if atype.byteorder == "<": 81 | return "_le" 82 | if atype.byteorder == ">": 83 | return "_be" 84 | # endianness is then either '=' (native) or '|' (doesn't matter) 85 | return "_le" if sys.byteorder == "little" else "_be" 86 | 87 | 88 | def get_data_type_str(ray: np.ndarray) -> str: 89 | """ 90 | Return the SigMF datatype string for the datatype of numpy array `ray`. 91 | 92 | NOTE: this function only supports native numpy types so interleaved complex 93 | integer types are not supported. 94 | """ 95 | if not isinstance(ray, np.ndarray): 96 | raise SigMFError("Argument must be a numpy array") 97 | atype = ray.dtype 98 | if atype.kind not in ("u", "i", "f", "c"): 99 | raise SigMFError("Unsupported data type:", atype) 100 | data_type_str = "" 101 | if atype.kind == "c": 102 | data_type_str += "cf" 103 | # units are component bits, numpy complex types len(I)+len(Q) 104 | data_type_str += str(atype.itemsize * 8 // 2) 105 | elif atype.kind == "f": 106 | data_type_str += "rf" 107 | data_type_str += str(atype.itemsize * 8) # itemsize in bits 108 | elif atype.kind in ("u", "i"): 109 | data_type_str += "r" + atype.kind 110 | data_type_str += str(atype.itemsize * 8) # itemsize in bits 111 | if atype.itemsize > 1: 112 | # only append endianness for types over 8 bits 113 | data_type_str += get_endian_str(ray) 114 | return data_type_str 115 | -------------------------------------------------------------------------------- /sigmf/validate.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """SigMF Validator""" 8 | import argparse 9 | import glob 10 | import json 11 | import logging 12 | import os 13 | import sys 14 | 15 | # multi-threading library - should work well as I/O will be the primary 16 | # cost for small SigMF files. Swap to ProcessPool if files are large. 17 | from concurrent.futures import ThreadPoolExecutor, as_completed 18 | 19 | # required for Python 3.7 20 | from typing import Optional, Tuple 21 | 22 | import jsonschema 23 | 24 | from . import __version__ as toolversion 25 | from . import error, schema, sigmffile 26 | 27 | 28 | def validate(metadata, ref_schema=schema.get_schema()) -> None: 29 | """ 30 | Check that the provided `metadata` dict is valid according to the `ref_schema` dict. 31 | Walk entire schema and check all keys. 32 | 33 | Parameters 34 | ---------- 35 | metadata : dict 36 | The SigMF metadata to be validated. 37 | ref_schema : dict, optional 38 | The schema that holds the SigMF metadata definition. 39 | Since the schema evolves over time, we may want to be able to check 40 | against different versions in the *future*. 41 | 42 | Raises 43 | ------ 44 | ValidationError 45 | If metadata is invalid. 46 | """ 47 | jsonschema.validators.validate(instance=metadata, schema=ref_schema) 48 | 49 | # ensure captures and annotations have monotonically increasing sample_start 50 | for key in ["captures", "annotations"]: 51 | count = -1 52 | for item in metadata[key]: 53 | new_count = item["core:sample_start"] 54 | if new_count < count: 55 | raise jsonschema.exceptions.ValidationError(f"{key} has incorrect sample start ordering.") 56 | count = new_count 57 | 58 | 59 | def _validate_single_file(filename, skip_checksum: bool, logger: logging.Logger) -> int: 60 | """Validates a single SigMF file. 61 | 62 | To be called as part of a multithreading / multiprocess application. 63 | 64 | Parameters 65 | ---------- 66 | filename : str 67 | Path and name to sigmf.data or sigmf.meta file. 68 | skip_checksum : bool 69 | Whether to perform checksum computation. 70 | logger : logging.Logger 71 | Logging object to log errors to. 72 | 73 | Returns 74 | ------- 75 | rc : int 76 | 0 if OK, 1 if err 77 | """ 78 | try: 79 | # load signal 80 | signal = sigmffile.fromfile(filename, skip_checksum=skip_checksum) 81 | # validate 82 | signal.validate() 83 | 84 | # handle any of 4 exceptions at once... 85 | except (jsonschema.exceptions.ValidationError, error.SigMFFileError, json.decoder.JSONDecodeError, IOError) as err: 86 | # catch the error, log, and continue 87 | logger.error(f"file `{filename}`: {err}") 88 | return 1 89 | else: 90 | return 0 91 | 92 | 93 | def main(arg_tuple: Optional[Tuple[str, ...]] = None) -> None: 94 | """entry-point for command-line validator""" 95 | parser = argparse.ArgumentParser( 96 | description="Validate SigMF Archive or file pair against JSON schema.", prog="sigmf_validate" 97 | ) 98 | parser.add_argument("path", nargs="*", help="SigMF path(s). Accepts * wildcards and extensions are optional.") 99 | parser.add_argument("--skip-checksum", action="store_true", help="Skip reading dataset to validate checksum.") 100 | parser.add_argument("-v", "--verbose", action="count", default=0) 101 | parser.add_argument("--version", action="version", version=f"%(prog)s {toolversion}") 102 | 103 | # allow pass-in arg_tuple for testing purposes 104 | args = parser.parse_args(arg_tuple) 105 | 106 | level_lut = { 107 | 0: logging.WARNING, 108 | 1: logging.INFO, 109 | 2: logging.DEBUG, 110 | } 111 | log = logging.getLogger() 112 | logging.basicConfig(level=level_lut[min(args.verbose, 2)]) 113 | 114 | paths = [] 115 | # resolve possible wildcards 116 | for path in args.path: 117 | paths += glob.glob(path) 118 | 119 | # multi-processing / threading pathway. 120 | n_completed = 0 121 | n_total = len(paths) 122 | # estimate number of CPU cores 123 | # https://stackoverflow.com/questions/1006289/how-to-find-out-the-number-of-cpus-using-python 124 | est_num_workers = len(os.sched_getaffinity(0)) if os.name == 'posix' else os.cpu_count() 125 | # create a thread pool 126 | # https://docs.python.org/3.7/library/concurrent.futures.html#threadpoolexecutor 127 | with ThreadPoolExecutor(max_workers=est_num_workers) as executor: 128 | # submit jobs 129 | future_validations = {executor.submit(_validate_single_file, path, args.skip_checksum, log) for path in paths} 130 | # load and await jobs to complete... no return 131 | for future in as_completed(future_validations): 132 | if future.result() == 0: 133 | n_completed += 1 134 | 135 | if n_total == 0: 136 | log.error("No paths to validate.") 137 | sys.exit(1) 138 | elif n_completed != n_total: 139 | log.info("Validated %d of %d files OK", n_completed, n_total) 140 | sys.exit(1) 141 | else: 142 | log.info("Validated all %d files OK!", n_total) 143 | 144 | 145 | if __name__ == "__main__": 146 | main() 147 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sigmf/sigmf-python/2ae107f0e34ae4d3cf8a4b23d39803e85839a628/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Provides pytest fixtures for other tests.""" 8 | 9 | import tempfile 10 | 11 | import pytest 12 | 13 | from sigmf import __specification__ 14 | from sigmf.archive import SIGMF_DATASET_EXT 15 | from sigmf.sigmffile import SigMFFile 16 | 17 | from .testdata import TEST_FLOAT32_DATA, TEST_METADATA 18 | 19 | 20 | @pytest.fixture 21 | def test_data_file(): 22 | """when called, yields temporary dataset""" 23 | with tempfile.NamedTemporaryFile(suffix=f".{SIGMF_DATASET_EXT}") as temp: 24 | TEST_FLOAT32_DATA.tofile(temp.name) 25 | yield temp 26 | 27 | 28 | @pytest.fixture 29 | def test_sigmffile(test_data_file): 30 | """If pytest uses this signature, will return valid SigMF file.""" 31 | meta = SigMFFile() 32 | meta.set_global_field("core:datatype", "rf32_le") 33 | meta.set_global_field("core:version", __specification__) 34 | meta.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA)) 35 | meta.add_capture(start_index=0) 36 | meta.set_data_file(test_data_file.name) 37 | assert meta._metadata == TEST_METADATA 38 | return meta 39 | -------------------------------------------------------------------------------- /tests/test_archive.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Tests for SigMFArchive""" 8 | 9 | import codecs 10 | import json 11 | import tarfile 12 | import tempfile 13 | from pathlib import Path 14 | 15 | import jsonschema 16 | import numpy as np 17 | import pytest 18 | 19 | from sigmf import error 20 | from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT 21 | 22 | from .testdata import TEST_FLOAT32_DATA, TEST_METADATA 23 | 24 | 25 | def create_test_archive(test_sigmffile, tmpfile): 26 | sigmf_archive = test_sigmffile.archive(fileobj=tmpfile) 27 | sigmf_tarfile = tarfile.open(sigmf_archive, mode="r", format=tarfile.PAX_FORMAT) 28 | return sigmf_tarfile 29 | 30 | 31 | def test_without_data_file_throws_fileerror(test_sigmffile): 32 | test_sigmffile.data_file = None 33 | with tempfile.NamedTemporaryFile() as temp: 34 | with pytest.raises(error.SigMFFileError): 35 | test_sigmffile.archive(name=temp.name) 36 | 37 | 38 | def test_invalid_md_throws_validationerror(test_sigmffile): 39 | del test_sigmffile._metadata["global"]["core:datatype"] # required field 40 | with tempfile.NamedTemporaryFile() as temp: 41 | with pytest.raises(jsonschema.exceptions.ValidationError): 42 | test_sigmffile.archive(name=temp.name) 43 | 44 | 45 | def test_name_wrong_extension_throws_fileerror(test_sigmffile): 46 | with tempfile.NamedTemporaryFile() as temp: 47 | with pytest.raises(error.SigMFFileError): 48 | test_sigmffile.archive(name=temp.name + ".zip") 49 | 50 | 51 | def test_fileobj_extension_ignored(test_sigmffile): 52 | with tempfile.NamedTemporaryFile(suffix=".tar") as temp: 53 | test_sigmffile.archive(fileobj=temp) 54 | 55 | 56 | def test_name_used_in_fileobj(test_sigmffile): 57 | with tempfile.NamedTemporaryFile() as temp: 58 | sigmf_archive = test_sigmffile.archive(name="testarchive", fileobj=temp) 59 | sigmf_tarfile = tarfile.open(sigmf_archive, mode="r") 60 | basedir, file1, file2 = sigmf_tarfile.getmembers() 61 | assert basedir.name == "testarchive" 62 | 63 | def filename(tarinfo): 64 | return Path(tarinfo.name).stem 65 | 66 | assert filename(file1) == "testarchive" 67 | assert filename(file2) == "testarchive" 68 | 69 | 70 | def test_fileobj_not_closed(test_sigmffile): 71 | with tempfile.NamedTemporaryFile() as temp: 72 | test_sigmffile.archive(fileobj=temp) 73 | assert not temp.file.closed 74 | 75 | 76 | def test_unwritable_fileobj_throws_fileerror(test_sigmffile): 77 | with tempfile.NamedTemporaryFile(mode="rb") as temp: 78 | with pytest.raises(error.SigMFFileError): 79 | test_sigmffile.archive(fileobj=temp) 80 | 81 | 82 | def test_unwritable_name_throws_fileerror(test_sigmffile): 83 | # Cannot assume /root/ is unwritable (e.g. Docker environment) 84 | # so use invalid filename 85 | unwritable_file = "/bad_name/" 86 | with pytest.raises(error.SigMFFileError): 87 | test_sigmffile.archive(name=unwritable_file) 88 | 89 | 90 | def test_tarfile_layout(test_sigmffile): 91 | with tempfile.NamedTemporaryFile() as temp: 92 | sigmf_tarfile = create_test_archive(test_sigmffile, temp) 93 | basedir, file1, file2 = sigmf_tarfile.getmembers() 94 | assert tarfile.TarInfo.isdir(basedir) 95 | assert tarfile.TarInfo.isfile(file1) 96 | assert tarfile.TarInfo.isfile(file2) 97 | 98 | 99 | def test_tarfile_names_and_extensions(test_sigmffile): 100 | with tempfile.NamedTemporaryFile() as temp: 101 | sigmf_tarfile = create_test_archive(test_sigmffile, temp) 102 | basedir, file1, file2 = sigmf_tarfile.getmembers() 103 | archive_name = basedir.name 104 | assert archive_name == Path(temp.name).name 105 | file_extensions = {SIGMF_DATASET_EXT, SIGMF_METADATA_EXT} 106 | 107 | file1_name, file1_ext = Path(file1.name).stem, Path(file1.name).suffix 108 | assert file1_name == archive_name 109 | assert file1_ext in file_extensions 110 | 111 | file_extensions.remove(file1_ext) 112 | 113 | file2_name, file2_ext = Path(file2.name).stem, Path(file2.name).suffix 114 | assert file2_name == archive_name 115 | assert file2_ext in file_extensions 116 | 117 | 118 | def test_tarfile_persmissions(test_sigmffile): 119 | with tempfile.NamedTemporaryFile() as temp: 120 | sigmf_tarfile = create_test_archive(test_sigmffile, temp) 121 | basedir, file1, file2 = sigmf_tarfile.getmembers() 122 | assert basedir.mode == 0o755 123 | assert file1.mode == 0o644 124 | assert file2.mode == 0o644 125 | 126 | 127 | def test_contents(test_sigmffile): 128 | with tempfile.NamedTemporaryFile() as temp: 129 | sigmf_tarfile = create_test_archive(test_sigmffile, temp) 130 | basedir, file1, file2 = sigmf_tarfile.getmembers() 131 | if file1.name.endswith(SIGMF_METADATA_EXT): 132 | mdfile = file1 133 | datfile = file2 134 | else: 135 | mdfile = file2 136 | datfile = file1 137 | 138 | bytestream_reader = codecs.getreader("utf-8") # bytes -> str 139 | mdfile_reader = bytestream_reader(sigmf_tarfile.extractfile(mdfile)) 140 | assert json.load(mdfile_reader) == TEST_METADATA 141 | 142 | datfile_reader = sigmf_tarfile.extractfile(datfile) 143 | # calling `fileno` on `tarfile.ExFileObject` throws error (?), but 144 | # np.fromfile requires it, so we need this extra step 145 | data = np.frombuffer(datfile_reader.read(), dtype=np.float32) 146 | 147 | assert np.array_equal(data, TEST_FLOAT32_DATA) 148 | 149 | 150 | def test_tarfile_type(test_sigmffile): 151 | with tempfile.NamedTemporaryFile() as temp: 152 | sigmf_tarfile = create_test_archive(test_sigmffile, temp) 153 | assert sigmf_tarfile.format == tarfile.PAX_FORMAT 154 | -------------------------------------------------------------------------------- /tests/test_archivereader.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Tests for SigMFArchiveReader""" 8 | 9 | import unittest 10 | from tempfile import NamedTemporaryFile 11 | 12 | import numpy as np 13 | 14 | import sigmf 15 | from sigmf import SigMFArchiveReader, SigMFFile, __specification__ 16 | 17 | 18 | class TestArchiveReader(unittest.TestCase): 19 | def setUp(self): 20 | # in order to check shapes we need some positive number of samples to work with 21 | # number of samples should be lowest common factor of num_channels 22 | self.raw_count = 16 23 | self.lut = { 24 | "i8": np.int8, 25 | "u8": np.uint8, 26 | "i16": np.int16, 27 | "u16": np.uint16, 28 | "u32": np.uint32, 29 | "i32": np.int32, 30 | "f32": np.float32, 31 | "f64": np.float64, 32 | } 33 | 34 | def test_access_data_without_untar(self): 35 | """iterate through datatypes and verify IO is correct""" 36 | temp_data = NamedTemporaryFile() 37 | temp_archive = NamedTemporaryFile(suffix=".sigmf") 38 | 39 | for key, dtype in self.lut.items(): 40 | # for each type of storage 41 | temp_samples = np.arange(self.raw_count, dtype=dtype) 42 | temp_samples.tofile(temp_data.name) 43 | for num_channels in [1, 4, 8]: 44 | # for single or 8 channel 45 | for complex_prefix in ["r", "c"]: 46 | # for real or complex 47 | target_count = self.raw_count 48 | temp_meta = SigMFFile( 49 | data_file=temp_data.name, 50 | global_info={ 51 | SigMFFile.DATATYPE_KEY: f"{complex_prefix}{key}_le", 52 | SigMFFile.NUM_CHANNELS_KEY: num_channels, 53 | }, 54 | ) 55 | temp_meta.tofile(temp_archive.name, toarchive=True) 56 | 57 | readback = SigMFArchiveReader(temp_archive.name) 58 | readback_samples = readback[:] 59 | 60 | if complex_prefix == "c": 61 | # complex data will be half as long 62 | target_count //= 2 63 | self.assertTrue(np.all(np.iscomplex(readback_samples))) 64 | if num_channels != 1: 65 | # check expected # of channels 66 | self.assertEqual( 67 | readback_samples.ndim, 68 | 2, 69 | "Mismatch in shape of readback samples.", 70 | ) 71 | target_count //= num_channels 72 | 73 | self.assertEqual( 74 | target_count, 75 | temp_meta._count_samples(), 76 | "Mismatch in expected metadata length.", 77 | ) 78 | self.assertEqual( 79 | target_count, 80 | len(readback), 81 | "Mismatch in expected readback length", 82 | ) 83 | 84 | 85 | def test_archiveread_data_file_unchanged(test_sigmffile): 86 | with NamedTemporaryFile(suffix=".sigmf") as temp_file: 87 | input_samples = test_sigmffile.read_samples() 88 | test_sigmffile.archive(temp_file.name) 89 | arc = sigmf.sigmffile.fromfile(temp_file.name) 90 | output_samples = arc.read_samples() 91 | 92 | assert np.array_equal(input_samples, output_samples) 93 | -------------------------------------------------------------------------------- /tests/test_collection.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Tests for collections""" 8 | 9 | import copy 10 | import os 11 | import shutil 12 | import tempfile 13 | import unittest 14 | from pathlib import Path 15 | 16 | import numpy as np 17 | from hypothesis import given 18 | from hypothesis import strategies as st 19 | 20 | from sigmf.archive import SIGMF_COLLECTION_EXT, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT 21 | from sigmf.sigmffile import SigMFCollection, SigMFFile, fromfile 22 | 23 | from .testdata import TEST_FLOAT32_DATA, TEST_METADATA 24 | 25 | 26 | class TestCollection(unittest.TestCase): 27 | """unit tests for colections""" 28 | 29 | def setUp(self): 30 | """create temporary path""" 31 | self.temp_dir = Path(tempfile.mkdtemp()) 32 | 33 | def tearDown(self): 34 | """remove temporary path""" 35 | shutil.rmtree(self.temp_dir) 36 | 37 | @given(st.sampled_from([".", "subdir/", "sub0/sub1/sub2/"])) 38 | def test_load_collection(self, subdir: str) -> None: 39 | """test path handling for collections""" 40 | data_name1 = "dat1" + SIGMF_DATASET_EXT 41 | data_name2 = "dat2" + SIGMF_DATASET_EXT 42 | meta_name1 = "dat1" + SIGMF_METADATA_EXT 43 | meta_name2 = "dat2" + SIGMF_METADATA_EXT 44 | collection_name = "collection" + SIGMF_COLLECTION_EXT 45 | data_path1 = self.temp_dir / subdir / data_name1 46 | data_path2 = self.temp_dir / subdir / data_name2 47 | meta_path1 = self.temp_dir / subdir / meta_name1 48 | meta_path2 = self.temp_dir / subdir / meta_name2 49 | collection_path = self.temp_dir / subdir / collection_name 50 | os.makedirs(collection_path.parent, exist_ok=True) 51 | 52 | # create data files 53 | TEST_FLOAT32_DATA.tofile(data_path1) 54 | TEST_FLOAT32_DATA.tofile(data_path2) 55 | 56 | # create metadata files 57 | metadata = copy.deepcopy(TEST_METADATA) 58 | meta1 = SigMFFile(metadata=metadata, data_file=data_path1) 59 | meta2 = SigMFFile(metadata=metadata, data_file=data_path2) 60 | meta1.tofile(meta_path1) 61 | meta2.tofile(meta_path2) 62 | 63 | # create collection 64 | collection = SigMFCollection( 65 | metafiles=[meta_name1, meta_name2], 66 | base_path=str(self.temp_dir / subdir), 67 | ) 68 | collection.tofile(collection_path) 69 | 70 | # load collection 71 | collection_loopback = fromfile(collection_path) 72 | meta1_loopback = collection_loopback.get_SigMFFile(stream_index=0) 73 | meta2_loopback = collection_loopback.get_SigMFFile(stream_index=1) 74 | 75 | self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta1_loopback.read_samples())) 76 | self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta2_loopback[:])) 77 | -------------------------------------------------------------------------------- /tests/test_ncd.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Tests for Non-Conforming Datasets""" 8 | 9 | import copy 10 | import shutil 11 | import tempfile 12 | import unittest 13 | from pathlib import Path 14 | 15 | import numpy as np 16 | from hypothesis import given 17 | from hypothesis import strategies as st 18 | 19 | from sigmf.error import SigMFFileError 20 | from sigmf.sigmffile import SigMFFile, fromfile 21 | 22 | from .testdata import TEST_FLOAT32_DATA, TEST_METADATA 23 | 24 | 25 | class TestNonConformingDataset(unittest.TestCase): 26 | """unit tests for NCD""" 27 | 28 | def setUp(self): 29 | """create temporary path""" 30 | self.temp_dir = Path(tempfile.mkdtemp()) 31 | 32 | def tearDown(self): 33 | """remove temporary path""" 34 | shutil.rmtree(self.temp_dir) 35 | 36 | @given(st.sampled_from([".", "subdir/", "sub0/sub1/sub2/"])) 37 | def test_load_ncd(self, subdir: str) -> None: 38 | """test loading non-conforming dataset""" 39 | data_path = self.temp_dir / subdir / "dat.bin" 40 | meta_path = self.temp_dir / subdir / "dat.sigmf-meta" 41 | Path.mkdir(data_path.parent, parents=True, exist_ok=True) 42 | 43 | # create data file 44 | TEST_FLOAT32_DATA.tofile(data_path) 45 | 46 | # create metadata file 47 | ncd_metadata = copy.deepcopy(TEST_METADATA) 48 | meta = SigMFFile(metadata=ncd_metadata, data_file=data_path) 49 | meta.tofile(meta_path) 50 | 51 | # load dataset & validate we can read all the data 52 | meta_loopback = fromfile(meta_path) 53 | self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta_loopback.read_samples())) 54 | self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta_loopback[:])) 55 | 56 | # delete the non-conforming dataset and ensure error is raised due to missing dataset; 57 | # in Windows the SigMFFile instances need to be garbage collected first, 58 | # otherwise the np.memmap instances (stored in self._memmap) block the deletion 59 | meta = None 60 | meta_loopback = None 61 | Path.unlink(data_path) 62 | with self.assertRaises(SigMFFileError): 63 | _ = fromfile(meta_path) 64 | -------------------------------------------------------------------------------- /tests/test_sigmffile.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Tests for SigMFFile Object""" 8 | 9 | import copy 10 | import json 11 | import shutil 12 | import tempfile 13 | import unittest 14 | from pathlib import Path 15 | 16 | import numpy as np 17 | 18 | from sigmf import error, sigmffile, utils 19 | from sigmf.sigmffile import SigMFFile 20 | 21 | from .testdata import * 22 | 23 | 24 | class TestClassMethods(unittest.TestCase): 25 | def setUp(self): 26 | """ensure tests have a valid SigMF object to work with""" 27 | self.temp_dir = Path(tempfile.mkdtemp()) 28 | self.temp_path_data = self.temp_dir / "trash.sigmf-data" 29 | self.temp_path_meta = self.temp_dir / "trash.sigmf-meta" 30 | TEST_FLOAT32_DATA.tofile(self.temp_path_data) 31 | self.sigmf_object = SigMFFile(TEST_METADATA, data_file=self.temp_path_data) 32 | self.sigmf_object.tofile(self.temp_path_meta) 33 | 34 | def tearDown(self): 35 | """remove temporary dir""" 36 | shutil.rmtree(self.temp_dir) 37 | 38 | def test_pathlib_handle(self): 39 | """ensure file can be a string or a pathlib object""" 40 | self.assertTrue(self.temp_path_data.exists()) 41 | obj_str = sigmffile.fromfile(str(self.temp_path_data)) 42 | obj_str.validate() 43 | obj_pth = sigmffile.fromfile(self.temp_path_data) 44 | obj_pth.validate() 45 | 46 | def test_filenames_with_dots(self): 47 | """test that filenames with non-extension . characters are handled correctly""" 48 | filenames = ["a", "b.c", "d.e.f"] 49 | for filename in filenames: 50 | temp_path_data = self.temp_dir / f"{filename}.sigmf-data" 51 | temp_path_meta = self.temp_dir / f"{filename}.sigmf-meta" 52 | TEST_FLOAT32_DATA.tofile(temp_path_data) 53 | self.sigmf_object = SigMFFile(TEST_METADATA, data_file=temp_path_data) 54 | self.sigmf_object.tofile(temp_path_meta) 55 | files = [str(temp_path_data), temp_path_data, str(temp_path_meta), temp_path_meta] 56 | for filename in files: 57 | obj = sigmffile.fromfile(filename) 58 | obj.validate() 59 | 60 | def test_iterator_basic(self): 61 | """make sure default batch_size works""" 62 | count = 0 63 | for _ in self.sigmf_object: 64 | count += 1 65 | self.assertEqual(count, len(self.sigmf_object)) 66 | 67 | def test_checksum(self): 68 | """Ensure checksum fails when incorrect or empty string.""" 69 | for new_checksum in ("", "a", 0): 70 | bad_checksum_metadata = copy.deepcopy(TEST_METADATA) 71 | bad_checksum_metadata[SigMFFile.GLOBAL_KEY][SigMFFile.HASH_KEY] = new_checksum 72 | with self.assertRaises(error.SigMFFileError): 73 | _ = SigMFFile(bad_checksum_metadata, self.temp_path_data) 74 | 75 | def test_equality(self): 76 | """Ensure __eq__ working as expected""" 77 | other = SigMFFile(copy.deepcopy(TEST_METADATA)) 78 | self.assertEqual(self.sigmf_object, other) 79 | # different after changing any part of metadata 80 | other.add_annotation(start_index=0, metadata={"a": 0}) 81 | self.assertNotEqual(self.sigmf_object, other) 82 | 83 | 84 | class TestAnnotationHandling(unittest.TestCase): 85 | def test_get_annotations_with_index(self): 86 | """Test that only annotations containing index are returned from get_annotations()""" 87 | smf = SigMFFile(copy.deepcopy(TEST_METADATA)) 88 | smf.add_annotation(start_index=1) 89 | smf.add_annotation(start_index=4, length=4) 90 | annotations_idx10 = smf.get_annotations(index=10) 91 | self.assertListEqual( 92 | annotations_idx10, 93 | [ 94 | {SigMFFile.START_INDEX_KEY: 0, SigMFFile.LENGTH_INDEX_KEY: 16}, 95 | {SigMFFile.START_INDEX_KEY: 1}, 96 | ], 97 | ) 98 | 99 | def test__count_samples_from_annotation(self): 100 | """Make sure sample count from annotations use correct end index""" 101 | smf = SigMFFile(copy.deepcopy(TEST_METADATA)) 102 | smf.add_annotation(start_index=0, length=32) 103 | smf.add_annotation(start_index=4, length=4) 104 | sample_count = smf._count_samples() 105 | self.assertEqual(sample_count, 32) 106 | 107 | def test_set_data_file_without_annotations(self): 108 | """ 109 | Make sure setting data_file with no annotations registered does not 110 | raise any errors 111 | """ 112 | smf = SigMFFile(copy.deepcopy(TEST_METADATA)) 113 | smf._metadata[SigMFFile.ANNOTATION_KEY].clear() 114 | with tempfile.TemporaryDirectory() as tmpdir: 115 | temp_path_data = Path(tmpdir) / "datafile" 116 | TEST_FLOAT32_DATA.tofile(temp_path_data) 117 | smf.set_data_file(temp_path_data) 118 | samples = smf.read_samples() 119 | self.assertTrue(len(samples) == 16) 120 | 121 | def test_set_data_file_with_annotations(self): 122 | """ 123 | Make sure setting data_file with annotations registered use sample 124 | count from data_file and issue a warning if annotations have end 125 | indices bigger than file end index 126 | """ 127 | smf = SigMFFile(copy.deepcopy(TEST_METADATA)) 128 | smf.add_annotation(start_index=0, length=32) 129 | with tempfile.TemporaryDirectory() as tmpdir: 130 | temp_path_data = Path(tmpdir) / "datafile" 131 | TEST_FLOAT32_DATA.tofile(temp_path_data) 132 | with self.assertWarns(Warning): 133 | # Issues warning since file ends before the final annotatio 134 | smf.set_data_file(temp_path_data) 135 | samples = smf.read_samples() 136 | self.assertTrue(len(samples) == 16) 137 | 138 | 139 | class TestMultichannel(unittest.TestCase): 140 | def setUp(self): 141 | # in order to check shapes we need some positive number of samples to work with 142 | # number of samples should be lowest common factor of num_channels 143 | self.raw_count = 16 144 | self.lut = { 145 | "i8": np.int8, 146 | "u8": np.uint8, 147 | "i16": np.int16, 148 | "u16": np.uint16, 149 | "u32": np.uint32, 150 | "i32": np.int32, 151 | "f32": np.float32, 152 | "f64": np.float64, 153 | } 154 | self.temp_file = tempfile.NamedTemporaryFile() 155 | self.temp_path = Path(self.temp_file.name) 156 | 157 | def tearDown(self): 158 | """clean-up temporary files""" 159 | self.temp_file.close() 160 | 161 | def test_multichannel_types(self): 162 | """check that real & complex for all types is reading multiple channels correctly""" 163 | for key, dtype in self.lut.items(): 164 | # for each type of storage 165 | np.arange(self.raw_count, dtype=dtype).tofile(self.temp_path) 166 | for num_channels in [1, 4, 8]: 167 | # for single or 8 channel 168 | for complex_prefix in ["r", "c"]: 169 | # for real or complex 170 | check_count = self.raw_count 171 | temp_signal = SigMFFile( 172 | data_file=self.temp_path, 173 | global_info={ 174 | SigMFFile.DATATYPE_KEY: f"{complex_prefix}{key}_le", 175 | SigMFFile.NUM_CHANNELS_KEY: num_channels, 176 | }, 177 | ) 178 | temp_samples = temp_signal.read_samples() 179 | 180 | if complex_prefix == "c": 181 | # complex data will be half as long 182 | check_count //= 2 183 | self.assertTrue(np.all(np.iscomplex(temp_samples))) 184 | if num_channels != 1: 185 | self.assertEqual(temp_samples.ndim, 2) 186 | check_count //= num_channels 187 | 188 | self.assertEqual(check_count, temp_signal._count_samples()) 189 | 190 | def test_multichannel_seek(self): 191 | """ensure that seeking is working correctly with multichannel files""" 192 | # write some dummy data and read back 193 | np.arange(18, dtype=np.uint16).tofile(self.temp_path) 194 | temp_signal = SigMFFile( 195 | data_file=self.temp_path, 196 | global_info={ 197 | SigMFFile.DATATYPE_KEY: "cu16_le", 198 | SigMFFile.NUM_CHANNELS_KEY: 3, 199 | }, 200 | ) 201 | # read after the first sample 202 | temp_samples = temp_signal.read_samples(start_index=1, autoscale=False) 203 | # ensure samples are in the order we expect 204 | self.assertTrue(np.all(temp_samples[:, 0] == np.array([6 + 7j, 12 + 13j]))) 205 | 206 | 207 | def test_key_validity(): 208 | """ensure the keys in test metadata are valid""" 209 | for top_key, top_val in TEST_METADATA.items(): 210 | if isinstance(top_val, dict): 211 | for core_key in top_val.keys(): 212 | assert core_key in vars(SigMFFile)[f"VALID_{top_key.upper()}_KEYS"] 213 | elif isinstance(top_val, list): 214 | # annotations are in a list 215 | for annot in top_val: 216 | for core_key in annot.keys(): 217 | assert core_key in SigMFFile.VALID_ANNOTATION_KEYS 218 | else: 219 | raise ValueError("expected list or dict") 220 | 221 | 222 | def test_ordered_metadata(): 223 | """check to make sure the metadata is sorted as expected""" 224 | sigf = SigMFFile() 225 | top_sort_order = ["global", "captures", "annotations"] 226 | for kdx, key in enumerate(sigf.ordered_metadata()): 227 | assert kdx == top_sort_order.index(key) 228 | 229 | 230 | class TestCaptures(unittest.TestCase): 231 | """ensure capture access tools work properly""" 232 | 233 | def setUp(self) -> None: 234 | """ensure tests have a valid SigMF object to work with""" 235 | self.temp_dir = Path(tempfile.mkdtemp()) 236 | self.temp_path_data = self.temp_dir / "trash.sigmf-data" 237 | self.temp_path_meta = self.temp_dir / "trash.sigmf-meta" 238 | 239 | def tearDown(self) -> None: 240 | """remove temporary dir""" 241 | shutil.rmtree(self.temp_dir) 242 | 243 | def prepare(self, data: list, meta: dict, dtype: type) -> SigMFFile: 244 | """write some data and metadata to temporary paths""" 245 | np.array(data, dtype=dtype).tofile(self.temp_path_data) 246 | with open(self.temp_path_meta, "w") as handle: 247 | json.dump(meta, handle) 248 | meta = sigmffile.fromfile(self.temp_path_meta, skip_checksum=True) 249 | return meta 250 | 251 | def test_000(self) -> None: 252 | """compliant two-capture recording""" 253 | meta = self.prepare(TEST_U8_DATA0, TEST_U8_META0, np.uint8) 254 | self.assertEqual(256, meta._count_samples()) 255 | self.assertTrue(meta._is_conforming_dataset()) 256 | self.assertTrue((0, 0), meta.get_capture_byte_boundarys(0)) 257 | self.assertTrue((0, 256), meta.get_capture_byte_boundarys(1)) 258 | self.assertTrue(np.array_equal(TEST_U8_DATA0, meta.read_samples(autoscale=False))) 259 | self.assertTrue(np.array_equal(np.array([]), meta.read_samples_in_capture(0))) 260 | self.assertTrue(np.array_equal(TEST_U8_DATA0, meta.read_samples_in_capture(1, autoscale=False))) 261 | 262 | def test_001(self) -> None: 263 | """two capture recording with header_bytes and trailing_bytes set""" 264 | meta = self.prepare(TEST_U8_DATA1, TEST_U8_META1, np.uint8) 265 | self.assertEqual(192, meta._count_samples()) 266 | self.assertFalse(meta._is_conforming_dataset()) 267 | self.assertTrue((32, 160), meta.get_capture_byte_boundarys(0)) 268 | self.assertTrue((160, 224), meta.get_capture_byte_boundarys(1)) 269 | self.assertTrue(np.array_equal(np.arange(128), meta.read_samples_in_capture(0, autoscale=False))) 270 | self.assertTrue(np.array_equal(np.arange(128, 192), meta.read_samples_in_capture(1, autoscale=False))) 271 | 272 | def test_002(self) -> None: 273 | """two capture recording with multiple header_bytes set""" 274 | meta = self.prepare(TEST_U8_DATA2, TEST_U8_META2, np.uint8) 275 | self.assertEqual(192, meta._count_samples()) 276 | self.assertFalse(meta._is_conforming_dataset()) 277 | self.assertTrue((32, 160), meta.get_capture_byte_boundarys(0)) 278 | self.assertTrue((176, 240), meta.get_capture_byte_boundarys(1)) 279 | self.assertTrue(np.array_equal(np.arange(128), meta.read_samples_in_capture(0, autoscale=False))) 280 | self.assertTrue(np.array_equal(np.arange(128, 192), meta.read_samples_in_capture(1, autoscale=False))) 281 | 282 | def test_003(self) -> None: 283 | """three capture recording with multiple header_bytes set""" 284 | meta = self.prepare(TEST_U8_DATA3, TEST_U8_META3, np.uint8) 285 | self.assertEqual(192, meta._count_samples()) 286 | self.assertFalse(meta._is_conforming_dataset()) 287 | self.assertTrue((32, 64), meta.get_capture_byte_boundarys(0)) 288 | self.assertTrue((64, 160), meta.get_capture_byte_boundarys(1)) 289 | self.assertTrue((192, 256), meta.get_capture_byte_boundarys(2)) 290 | self.assertTrue(np.array_equal(np.arange(32), meta.read_samples_in_capture(0, autoscale=False))) 291 | self.assertTrue(np.array_equal(np.arange(32, 128), meta.read_samples_in_capture(1, autoscale=False))) 292 | self.assertTrue(np.array_equal(np.arange(128, 192), meta.read_samples_in_capture(2, autoscale=False))) 293 | 294 | def test_004(self) -> None: 295 | """two channel version of 000""" 296 | meta = self.prepare(TEST_U8_DATA4, TEST_U8_META4, np.uint8) 297 | self.assertEqual(96, meta._count_samples()) 298 | self.assertFalse(meta._is_conforming_dataset()) 299 | self.assertTrue((32, 160), meta.get_capture_byte_boundarys(0)) 300 | self.assertTrue((160, 224), meta.get_capture_byte_boundarys(1)) 301 | self.assertTrue( 302 | np.array_equal(np.arange(64).repeat(2).reshape(-1, 2), meta.read_samples_in_capture(0, autoscale=False)) 303 | ) 304 | self.assertTrue( 305 | np.array_equal(np.arange(64, 96).repeat(2).reshape(-1, 2), meta.read_samples_in_capture(1, autoscale=False)) 306 | ) 307 | 308 | def test_slicing_ru8(self) -> None: 309 | """slice real uint8""" 310 | meta = self.prepare(TEST_U8_DATA0, TEST_U8_META0, np.uint8) 311 | self.assertTrue(np.array_equal(meta[:], TEST_U8_DATA0)) 312 | self.assertTrue(np.array_equal(meta[6], TEST_U8_DATA0[6])) 313 | self.assertTrue(np.array_equal(meta[1:-1], TEST_U8_DATA0[1:-1])) 314 | 315 | def test_slicing_rf32(self) -> None: 316 | """slice real float32""" 317 | meta = self.prepare(TEST_FLOAT32_DATA, TEST_METADATA, np.float32) 318 | self.assertTrue(np.array_equal(meta[:], TEST_FLOAT32_DATA)) 319 | self.assertTrue(np.array_equal(meta[9], TEST_FLOAT32_DATA[9])) 320 | 321 | def test_slicing_multiple_channels(self) -> None: 322 | """slice multiple channels""" 323 | meta = self.prepare(TEST_U8_DATA4, TEST_U8_META4, np.uint8) 324 | channelized = np.array(TEST_U8_DATA4).reshape((-1, 2)) 325 | self.assertTrue(np.array_equal(meta[:][:], channelized)) 326 | self.assertTrue(np.array_equal(meta[10:20, 0], meta.read_samples(autoscale=False)[10:20, 0])) 327 | self.assertTrue(np.array_equal(meta[0], channelized[0])) 328 | self.assertTrue(np.array_equal(meta[1, :], channelized[1])) 329 | 330 | 331 | def simulate_capture(sigmf_md, n, capture_len): 332 | start_index = capture_len * n 333 | 334 | capture_md = {"core:datetime": utils.get_sigmf_iso8601_datetime_now()} 335 | 336 | sigmf_md.add_capture(start_index=start_index, metadata=capture_md) 337 | 338 | annotation_md = { 339 | "core:latitude": 40.0 + 0.0001 * n, 340 | "core:longitude": -105.0 + 0.0001 * n, 341 | } 342 | 343 | sigmf_md.add_annotation(start_index=start_index, length=capture_len, metadata=annotation_md) 344 | 345 | 346 | def test_default_constructor(): 347 | SigMFFile() 348 | 349 | 350 | def test_set_non_required_global_field(): 351 | sigf = SigMFFile() 352 | sigf.set_global_field("this_is:not_in_the_schema", None) 353 | 354 | 355 | def test_add_capture(): 356 | sigf = SigMFFile() 357 | sigf.add_capture(start_index=0, metadata={}) 358 | 359 | 360 | def test_add_annotation(): 361 | sigf = SigMFFile() 362 | sigf.add_capture(start_index=0) 363 | meta = {"latitude": 40.0, "longitude": -105.0} 364 | sigf.add_annotation(start_index=0, length=128, metadata=meta) 365 | 366 | 367 | def test_fromarchive(test_sigmffile): 368 | with tempfile.NamedTemporaryFile(suffix=".sigmf") as temp_file: 369 | archive_path = test_sigmffile.archive(name=temp_file.name) 370 | result = sigmffile.fromarchive(archive_path=archive_path) 371 | assert result._metadata == test_sigmffile._metadata == TEST_METADATA 372 | 373 | 374 | def test_add_multiple_captures_and_annotations(): 375 | sigf = SigMFFile() 376 | for idx in range(3): 377 | simulate_capture(sigf, idx, 1024) 378 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Tests for Utilities""" 8 | 9 | from datetime import datetime, timezone 10 | 11 | import pytest 12 | 13 | from sigmf import utils 14 | 15 | 16 | # fmt: off 17 | @pytest.mark.parametrize("time_str, expected", [ 18 | ("1955-07-04T05:15:00Z", datetime(year=1955, month=7, day=4, hour=5, minute=15, second=00, microsecond=0, tzinfo=timezone.utc)), 19 | ("2956-08-05T06:15:12Z", datetime(year=2956, month=8, day=5, hour=6, minute=15, second=12, microsecond=0, tzinfo=timezone.utc)), 20 | ("3957-09-06T07:15:12.345Z", datetime(year=3957, month=9, day=6, hour=7, minute=15, second=12, microsecond=345000, tzinfo=timezone.utc)), 21 | ("4958-10-07T08:15:12.0345Z", datetime(year=4958, month=10, day=7, hour=8, minute=15, second=12, microsecond=34500, tzinfo=timezone.utc)), 22 | ("5959-11-08T09:15:12.000000Z", datetime(year=5959, month=11, day=8, hour=9, minute=15, second=12, microsecond=0, tzinfo=timezone.utc)), 23 | ("6960-12-09T10:15:12.123456789123Z", datetime(year=6960, month=12, day=9, hour=10, minute=15, second=12, microsecond=123456, tzinfo=timezone.utc)), 24 | ]) 25 | # fmt: on 26 | def test_parse_simple_iso8601(time_str: str, expected: datetime) -> None: 27 | """Ensure various times are represented as expected""" 28 | date_struct = utils.parse_iso8601_datetime(time_str) 29 | assert date_struct == expected 30 | 31 | 32 | def test_roundtrip_datetime() -> None: 33 | """New string -> struct -> string is ok""" 34 | now_str = utils.get_sigmf_iso8601_datetime_now() 35 | now_struct = utils.parse_iso8601_datetime(now_str) 36 | assert now_str == now_struct.strftime(utils.SIGMF_DATETIME_ISO8601_FMT) 37 | -------------------------------------------------------------------------------- /tests/test_validation.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Tests for Validator""" 8 | 9 | import tempfile 10 | import unittest 11 | from pathlib import Path 12 | 13 | from jsonschema.exceptions import ValidationError 14 | 15 | import sigmf 16 | from sigmf import SigMFFile 17 | 18 | from .testdata import TEST_FLOAT32_DATA, TEST_METADATA 19 | 20 | 21 | def test_valid_data(): 22 | """ensure the default metadata is OK""" 23 | SigMFFile(TEST_METADATA).validate() 24 | 25 | 26 | class CommandLineValidator(unittest.TestCase): 27 | """Check behavior of command-line parser""" 28 | 29 | def setUp(self): 30 | """Create a directory with some valid files""" 31 | self.tmp_dir = tempfile.TemporaryDirectory() 32 | self.tmp_path = tmp_path = Path(self.tmp_dir.name) 33 | junk_path = tmp_path / "junk" 34 | TEST_FLOAT32_DATA.tofile(junk_path) 35 | some_meta = SigMFFile(TEST_METADATA, data_file=junk_path) 36 | some_meta.tofile(tmp_path / "a") 37 | some_meta.tofile(tmp_path / "b") 38 | some_meta.tofile(tmp_path / "c", toarchive=True) 39 | 40 | def tearDown(self): 41 | """cleanup""" 42 | self.tmp_dir.cleanup() 43 | 44 | def test_normal(self): 45 | """able to parse archives and non-archives""" 46 | args = (str(self.tmp_path / "*.sigmf*"),) 47 | sigmf.validate.main(args) 48 | 49 | def test_normal_skip(self): 50 | """able to skip checksum""" 51 | args = (str(self.tmp_path / "*.sigmf*"), "--skip-checksum") 52 | sigmf.validate.main(args) 53 | 54 | def test_partial(self): 55 | """checks some but not all files""" 56 | args = (str(self.tmp_path / "*"),) 57 | with self.assertRaises(SystemExit): 58 | sigmf.validate.main(args) 59 | 60 | def test_missing(self): 61 | """exit with rc=1 when run on empty""" 62 | with self.assertRaises(SystemExit) as cm: 63 | sigmf.validate.main(tuple()) 64 | self.assertEqual((1,), cm.exception.args) 65 | 66 | def test_version(self): 67 | """exit with rc=0 after printing version""" 68 | args = ("--version",) 69 | with self.assertRaises(SystemExit) as cm: 70 | sigmf.validate.main(args) 71 | self.assertEqual((0,), cm.exception.args) 72 | 73 | 74 | class FailingCases(unittest.TestCase): 75 | """Cases where the validator should raise an exception.""" 76 | 77 | def setUp(self): 78 | self.metadata = dict(TEST_METADATA) 79 | 80 | def test_no_version(self): 81 | """core:version must be present""" 82 | del self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.VERSION_KEY] 83 | with self.assertRaises(ValidationError): 84 | SigMFFile(self.metadata).validate() 85 | 86 | def test_extra_top_level_key(self): 87 | """no extra keys allowed on the top level""" 88 | self.metadata["extra"] = 0 89 | with self.assertRaises(ValidationError): 90 | SigMFFile(self.metadata).validate() 91 | 92 | def test_invalid_type(self): 93 | """license key must be string""" 94 | self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.LICENSE_KEY] = 1 95 | with self.assertRaises(ValidationError): 96 | SigMFFile(self.metadata).validate() 97 | 98 | def test_invalid_capture_order(self): 99 | """metadata must have captures in order""" 100 | self.metadata[SigMFFile.CAPTURE_KEY] = [{SigMFFile.START_INDEX_KEY: 10}, {SigMFFile.START_INDEX_KEY: 9}] 101 | with self.assertRaises(ValidationError): 102 | SigMFFile(self.metadata).validate() 103 | 104 | def test_invalid_annotation_order(self): 105 | """metadata must have annotations in order""" 106 | self.metadata[SigMFFile.ANNOTATION_KEY] = [ 107 | { 108 | SigMFFile.START_INDEX_KEY: 2, 109 | SigMFFile.LENGTH_INDEX_KEY: 120000, 110 | }, 111 | { 112 | SigMFFile.START_INDEX_KEY: 1, 113 | SigMFFile.LENGTH_INDEX_KEY: 120000, 114 | }, 115 | ] 116 | with self.assertRaises(ValidationError): 117 | SigMFFile(self.metadata).validate() 118 | 119 | def test_annotation_without_sample_count(self): 120 | """annotation without length should be accepted""" 121 | self.metadata[SigMFFile.ANNOTATION_KEY] = [{SigMFFile.START_INDEX_KEY: 2}] 122 | SigMFFile(self.metadata).validate() 123 | 124 | def test_invalid_hash(self): 125 | """wrong hash raises error on creation""" 126 | with tempfile.NamedTemporaryFile() as temp_file: 127 | TEST_FLOAT32_DATA.tofile(temp_file.name) 128 | self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.HASH_KEY] = "derp" 129 | with self.assertRaises(sigmf.error.SigMFFileError): 130 | SigMFFile(metadata=self.metadata, data_file=temp_file.name) 131 | -------------------------------------------------------------------------------- /tests/testdata.py: -------------------------------------------------------------------------------- 1 | # Copyright: Multiple Authors 2 | # 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | 7 | """Shared test data for tests.""" 8 | 9 | import numpy as np 10 | 11 | from sigmf import SigMFFile, __specification__, __version__ 12 | 13 | TEST_FLOAT32_DATA = np.arange(16, dtype=np.float32) 14 | 15 | TEST_METADATA = { 16 | SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}], 17 | SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}], 18 | SigMFFile.GLOBAL_KEY: { 19 | SigMFFile.DATATYPE_KEY: "rf32_le", 20 | SigMFFile.HASH_KEY: "f4984219b318894fa7144519185d1ae81ea721c6113243a52b51e444512a39d74cf41a4cec3c5d000bd7277cc71232c04d7a946717497e18619bdbe94bfeadd6", 21 | SigMFFile.NUM_CHANNELS_KEY: 1, 22 | SigMFFile.VERSION_KEY: __specification__, 23 | }, 24 | } 25 | 26 | # Data0 is a test of a compliant two capture recording 27 | TEST_U8_DATA0 = list(range(256)) 28 | TEST_U8_META0 = { 29 | SigMFFile.ANNOTATION_KEY: [], 30 | SigMFFile.CAPTURE_KEY: [ 31 | {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 0}, 32 | {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 0}, 33 | ], # very strange..but technically legal? 34 | SigMFFile.GLOBAL_KEY: {SigMFFile.DATATYPE_KEY: "ru8", SigMFFile.TRAILING_BYTES_KEY: 0}, 35 | } 36 | # Data1 is a test of a two capture recording with header_bytes and trailing_bytes set 37 | TEST_U8_DATA1 = [0xFE] * 32 + list(range(192)) + [0xFF] * 32 38 | TEST_U8_META1 = { 39 | SigMFFile.ANNOTATION_KEY: [], 40 | SigMFFile.CAPTURE_KEY: [ 41 | {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 32}, 42 | {SigMFFile.START_INDEX_KEY: 128}, 43 | ], 44 | SigMFFile.GLOBAL_KEY: {SigMFFile.DATATYPE_KEY: "ru8", SigMFFile.TRAILING_BYTES_KEY: 32}, 45 | } 46 | # Data2 is a test of a two capture recording with multiple header_bytes set 47 | TEST_U8_DATA2 = [0xFE] * 32 + list(range(128)) + [0xFE] * 16 + list(range(128, 192)) + [0xFF] * 16 48 | TEST_U8_META2 = { 49 | SigMFFile.ANNOTATION_KEY: [], 50 | SigMFFile.CAPTURE_KEY: [ 51 | {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 32}, 52 | {SigMFFile.START_INDEX_KEY: 128, SigMFFile.HEADER_BYTES_KEY: 16}, 53 | ], 54 | SigMFFile.GLOBAL_KEY: {SigMFFile.DATATYPE_KEY: "ru8", SigMFFile.TRAILING_BYTES_KEY: 16}, 55 | } 56 | # Data3 is a test of a three capture recording with multiple header_bytes set 57 | TEST_U8_DATA3 = [0xFE] * 32 + list(range(128)) + [0xFE] * 32 + list(range(128, 192)) 58 | TEST_U8_META3 = { 59 | SigMFFile.ANNOTATION_KEY: [], 60 | SigMFFile.CAPTURE_KEY: [ 61 | {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 32}, 62 | {SigMFFile.START_INDEX_KEY: 32}, 63 | {SigMFFile.START_INDEX_KEY: 128, SigMFFile.HEADER_BYTES_KEY: 32}, 64 | ], 65 | SigMFFile.GLOBAL_KEY: {SigMFFile.DATATYPE_KEY: "ru8"}, 66 | } 67 | # Data4 is a two channel version of Data0 68 | TEST_U8_DATA4 = [0xFE] * 32 + [y for y in list(range(96)) for i in [0, 1]] + [0xFF] * 32 69 | TEST_U8_META4 = { 70 | SigMFFile.ANNOTATION_KEY: [], 71 | SigMFFile.CAPTURE_KEY: [ 72 | {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 32}, 73 | {SigMFFile.START_INDEX_KEY: 64}, 74 | ], 75 | SigMFFile.GLOBAL_KEY: { 76 | SigMFFile.DATATYPE_KEY: "ru8", 77 | SigMFFile.TRAILING_BYTES_KEY: 32, 78 | SigMFFile.NUM_CHANNELS_KEY: 2, 79 | }, 80 | } 81 | --------------------------------------------------------------------------------