├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── .readthedocs.yaml
├── COPYING-LGPL
├── README.md
├── docs
    ├── Makefile
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── _templates
    │       ├── custom-class-template.rst
    │       └── custom-module-template.rst
    │   ├── advanced.rst
    │   ├── api.rst
    │   ├── conf.py
    │   ├── developers.rst
    │   ├── faq.rst
    │   ├── index.rst
    │   └── quickstart.rst
├── pyproject.toml
├── sigmf
    ├── __init__.py
    ├── apps
    │   ├── __init__.py
    │   └── convert_wav.py
    ├── archive.py
    ├── archivereader.py
    ├── error.py
    ├── schema-collection.json
    ├── schema-meta.json
    ├── schema.py
    ├── sigmf_hash.py
    ├── sigmffile.py
    ├── utils.py
    └── validate.py
└── tests
    ├── __init__.py
    ├── conftest.py
    ├── test_archive.py
    ├── test_archivereader.py
    ├── test_collection.py
    ├── test_ncd.py
    ├── test_sigmffile.py
    ├── test_utils.py
    ├── test_validation.py
    └── testdata.py


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Python package
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |     types: [opened, synchronize]
 7 |   schedule:
 8 |     - cron: "5 5 * * 5" # test every friday @ 0505
 9 | 
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-22.04
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.7", "3.9", "3.11", "3.13"]
16 |     steps:
17 |       - uses: actions/checkout@v3
18 |       - name: Set up Python ${{ matrix.python-version }}
19 |         uses: actions/setup-python@v4
20 |         with:
21 |           python-version: ${{ matrix.python-version }}
22 |       - name: Install dependencies
23 |         run: |
24 |           python -m pip install --upgrade pip
25 |           pip install .[test,apps]
26 |       - name: Test with pytest
27 |         run: |
28 |           coverage run
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # temp files
 2 | __pycache__/
 3 | *.swp
 4 | *.py[cod]
 5 | .cache
 6 | 
 7 | # packaging related
 8 | dist/
 9 | build/
10 | eggs/
11 | .eggs/
12 | SigMF.egg-info/*
13 | 
14 | # test related
15 | .coverage
16 | .hypothesis/
17 | .tox/
18 | coverage.xml
19 | pytest.xml
20 | htmlcov/*
21 | 
22 | # docs related
23 | docs/_build/
24 | docs/source/_autosummary/
25 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version, and other tools you might need
 8 | build:
 9 |   os: ubuntu-24.04
10 |   tools:
11 |     python: "3.13"
12 | 
13 | # declare the Python requirements required to build your documentation
14 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
15 | python:
16 |   install:
17 |     - method: pip
18 |       path: .
19 |       extra_requirements:
20 |         - test
21 |         - apps
22 |     - requirements: docs/requirements.txt
23 | 
24 | # Build documentation in the "docs/" directory with Sphinx
25 | sphinx:
26 |   configuration: docs/source/conf.py
27 | 


--------------------------------------------------------------------------------
/COPYING-LGPL:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![Rendered SigMF Logo](https://raw.githubusercontent.com/sigmf/SigMF/refs/heads/main/logo/sigmf_logo.png)
 2 | 
 3 | [![PyPI Version Shield](https://img.shields.io/pypi/v/sigmf)](https://pypi.org/project/SigMF/)
 4 | [![Build Status Shield](https://img.shields.io/github/actions/workflow/status/sigmf/sigmf-python/main.yml)](https://github.com/sigmf/sigmf-python/actions?query=branch%3Amain)
 5 | [![License Shield](https://img.shields.io/pypi/l/sigmf)](https://en.wikipedia.org/wiki/GNU_Lesser_General_Public_License)
 6 | [![Documentation Shield](https://img.shields.io/readthedocs/sigmf)](https://sigmf.readthedocs.io/en/latest/)
 7 | [![PyPI Downloads Shield](https://img.shields.io/pypi/dm/sigmf)](https://pypi.org/project/SigMF/)
 8 | 
 9 | The `sigmf` library makes it easy to interact with Signal Metadata Format
10 | (SigMF) recordings. This library is compatible with Python 3.7-3.13 and is distributed
11 | freely under the terms GNU Lesser GPL v3 License.
12 | 
13 | This module follows the SigMF specification [html](https://sigmf.org/)/[pdf](https://sigmf.github.io/SigMF/sigmf-spec.pdf) from the [spec repository](https://github.com/sigmf/SigMF).
14 | 
15 | To install the latest PyPI release, install from pip:
16 | 
17 | ```bash
18 | pip install sigmf
19 | ```
20 | 
21 | **[Please visit the documentation for examples & more info.](https://sigmf.readthedocs.io/en/latest/)**
22 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | # pinned 2025-01-15
2 | sphinx==8.1.3
3 | sphinx-rtd-theme==3.0.2
4 | 


--------------------------------------------------------------------------------
/docs/source/_templates/custom-class-template.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname | escape | underline}}
 2 | 
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | .. autoclass:: {{ objname }}
 6 |    :members:
 7 |    :show-inheritance:
 8 |    :inherited-members:
 9 |    :special-members: __call__, __add__, __mul__
10 | 
11 |    {% block methods %}
12 |    {% if methods %}
13 |    .. rubric:: {{ _('Methods') }}
14 | 
15 |    .. autosummary::
16 |       :nosignatures:
17 |    {% for item in methods %}
18 |       {%- if not item.startswith('_') %}
19 |       ~{{ name }}.{{ item }}
20 |       {%- endif -%}
21 |    {%- endfor %}
22 |    {% endif %}
23 |    {% endblock %}
24 | 
25 |    {% block attributes %}
26 |    {% if attributes %}
27 |    .. rubric:: {{ _('Attributes') }}
28 | 
29 |    .. autosummary::
30 |    {% for item in attributes %}
31 |       ~{{ name }}.{{ item }}
32 |    {%- endfor %}
33 |    {% endif %}
34 |    {% endblock %}
35 | 


--------------------------------------------------------------------------------
/docs/source/_templates/custom-module-template.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname | escape | underline}}
 2 | 
 3 | .. automodule:: {{ fullname }}
 4 | 
 5 |    {% block attributes %}
 6 |    {% if attributes %}
 7 |    .. rubric:: Module attributes
 8 | 
 9 |    .. autosummary::
10 |       :toctree:
11 |    {% for item in attributes %}
12 |       {{ item }}
13 |    {%- endfor %}
14 |    {% endif %}
15 |    {% endblock %}
16 | 
17 |    {% block functions %}
18 |    {% if functions %}
19 |    .. rubric:: {{ _('Functions') }}
20 | 
21 |    .. autosummary::
22 |       :toctree:
23 |       :nosignatures:
24 |    {% for item in functions %}
25 |       {{ item }}
26 |    {%- endfor %}
27 |    {% endif %}
28 |    {% endblock %}
29 | 
30 |    {% block classes %}
31 |    {% if classes %}
32 |    .. rubric:: {{ _('Classes') }}
33 | 
34 |    .. autosummary::
35 |       :toctree:
36 |       :template: custom-class-template.rst
37 |       :nosignatures:
38 |    {% for item in classes %}
39 |       {{ item }}
40 |    {%- endfor %}
41 |    {% endif %}
42 |    {% endblock %}
43 | 
44 |    {% block exceptions %}
45 |    {% if exceptions %}
46 |    .. rubric:: {{ _('Exceptions') }}
47 | 
48 |    .. autosummary::
49 |       :toctree:
50 |    {% for item in exceptions %}
51 |       {{ item }}
52 |    {%- endfor %}
53 |    {% endif %}
54 |    {% endblock %}
55 | 
56 | {% block modules %}
57 | {% if modules %}
58 | .. autosummary::
59 |    :toctree:
60 |    :template: custom-module-template.rst
61 |    :recursive:
62 | {% for item in modules %}
63 |    {{ item }}
64 | {%- endfor %}
65 | {% endif %}
66 | {% endblock %}
67 | 


--------------------------------------------------------------------------------
/docs/source/advanced.rst:
--------------------------------------------------------------------------------
  1 | ========
  2 | Advanced
  3 | ========
  4 | 
  5 | Here we discuss more advanced techniques for working with **collections** and
  6 | **archives**.
  7 | 
  8 | ------------------------------
  9 | Iterate over SigMF Annotations
 10 | ------------------------------
 11 | 
 12 | Here we will load a SigMF dataset and iterate over the annotations. You can get
 13 | the recording of the SigMF logo used in this example `from the specification
 14 | <https://github.com/sigmf/SigMF/tree/main/logo>`_.
 15 | 
 16 | .. code-block:: python
 17 | 
 18 |     from sigmf import SigMFFile, sigmffile
 19 | 
 20 |     # Load a dataset
 21 |     path = 'logo/sigmf_logo' # extension is optional
 22 |     signal = sigmffile.fromfile(path)
 23 | 
 24 |     # Get some metadata and all annotations
 25 |     sample_rate = signal.get_global_field(SigMFFile.SAMPLE_RATE_KEY)
 26 |     sample_count = signal.sample_count
 27 |     signal_duration = sample_count / sample_rate
 28 |     annotations = signal.get_annotations()
 29 | 
 30 |     # Iterate over annotations
 31 |     for adx, annotation in enumerate(annotations):
 32 |         annotation_start_idx = annotation[SigMFFile.START_INDEX_KEY]
 33 |         annotation_length = annotation[SigMFFile.LENGTH_INDEX_KEY]
 34 |         annotation_comment = annotation.get(SigMFFile.COMMENT_KEY, "[annotation {}]".format(adx))
 35 | 
 36 |         # Get capture info associated with the start of annotation
 37 |         capture = signal.get_capture_info(annotation_start_idx)
 38 |         freq_center = capture.get(SigMFFile.FREQUENCY_KEY, 0)
 39 |         freq_min = freq_center - 0.5*sample_rate
 40 |         freq_max = freq_center + 0.5*sample_rate
 41 | 
 42 |         # Get frequency edges of annotation (default to edges of capture)
 43 |         freq_start = annotation.get(SigMFFile.FLO_KEY)
 44 |         freq_stop = annotation.get(SigMFFile.FHI_KEY)
 45 | 
 46 |         # Get the samples corresponding to annotation
 47 |         samples = signal.read_samples(annotation_start_idx, annotation_length)
 48 | 
 49 |         # Do something with the samples & metadata for each annotation here
 50 | 
 51 | -------------------------------------
 52 | Save a Collection of SigMF Recordings
 53 | -------------------------------------
 54 | 
 55 | First, create a single SigMF Recording and save it to disk:
 56 | 
 57 | .. code-block:: python
 58 | 
 59 |     import datetime as dt
 60 |     import numpy as np
 61 |     import sigmf
 62 |     from sigmf import SigMFFile
 63 |     from sigmf.utils import get_data_type_str, get_sigmf_iso8601_datetime_now
 64 | 
 65 |     # suppose we have a complex timeseries signal
 66 |     data = np.zeros(1024, dtype=np.complex64)
 67 | 
 68 |     # write those samples to file in cf32_le
 69 |     data.tofile('example_cf32.sigmf-data')
 70 | 
 71 |     # create the metadata
 72 |     meta = SigMFFile(
 73 |         data_file='example_cf32.sigmf-data', # extension is optional
 74 |         global_info = {
 75 |             SigMFFile.DATATYPE_KEY: get_data_type_str(data),  # in this case, 'cf32_le'
 76 |             SigMFFile.SAMPLE_RATE_KEY: 48000,
 77 |             SigMFFile.AUTHOR_KEY: 'jane.doe@domain.org',
 78 |             SigMFFile.DESCRIPTION_KEY: 'All zero complex float32 example file.',
 79 |         }
 80 |     )
 81 | 
 82 |     # create a capture key at time index 0
 83 |     meta.add_capture(0, metadata={
 84 |         SigMFFile.FREQUENCY_KEY: 915000000,
 85 |         SigMFFile.DATETIME_KEY: get_sigmf_iso8601_datetime_now(),
 86 |     })
 87 | 
 88 |     # add an annotation at sample 100 with length 200 & 10 KHz width
 89 |     meta.add_annotation(100, 200, metadata = {
 90 |         SigMFFile.FLO_KEY: 914995000.0,
 91 |         SigMFFile.FHI_KEY: 915005000.0,
 92 |         SigMFFile.COMMENT_KEY: 'example annotation',
 93 |     })
 94 | 
 95 |     # check for mistakes & write to disk
 96 |     meta.tofile('example_cf32.sigmf-meta') # extension is optional
 97 | 
 98 | Now lets add another SigMF Recording and associate them with a SigMF Collection:
 99 | 
100 | .. code-block:: python
101 | 
102 |     from sigmf import SigMFCollection
103 | 
104 |     data_ci16 = np.zeros(1024, dtype=np.complex64)
105 | 
106 |     #rescale and save as a complex int16 file:
107 |     data_ci16 *= pow(2, 15)
108 |     data_ci16.view(np.float32).astype(np.int16).tofile('example_ci16.sigmf-data')
109 | 
110 |     # create the metadata for the second file
111 |     meta_ci16 = SigMFFile(
112 |         data_file='example_ci16.sigmf-data', # extension is optional
113 |         global_info = {
114 |             SigMFFile.DATATYPE_KEY: 'ci16_le', # get_data_type_str() is only valid for numpy types
115 |             SigMFFile.SAMPLE_RATE_KEY: 48000,
116 |             SigMFFile.DESCRIPTION_KEY: 'All zero complex int16 file.',
117 |         }
118 |     )
119 |     meta_ci16.add_capture(0, metadata=meta.get_capture_info(0))
120 |     meta_ci16.tofile('example_ci16.sigmf-meta')
121 | 
122 |     collection = SigMFCollection(['example_cf32.sigmf-meta', 'example_ci16.sigmf-meta'],
123 |             metadata = {'collection': {
124 |                 SigMFCollection.AUTHOR_KEY: 'sigmf@sigmf.org',
125 |                 SigMFCollection.DESCRIPTION_KEY: 'Collection of two all zero files.',
126 |             }
127 |         }
128 |     )
129 |     streams = collection.get_stream_names()
130 |     sigmf = [collection.get_SigMFFile(stream) for stream in streams]
131 |     collection.tofile('example_zeros.sigmf-collection')
132 | 
133 | The SigMF Collection and its associated Recordings can now be loaded like this:
134 | 
135 | .. code-block:: python
136 | 
137 |     from sigmf import sigmffile
138 |     collection = sigmffile.fromfile('example_zeros')
139 |     ci16_sigmffile = collection.get_SigMFFile(stream_name='example_ci16')
140 |     cf32_sigmffile = collection.get_SigMFFile(stream_name='example_cf32')
141 | 
142 | -----------------------------------------------
143 | Load a SigMF Archive and slice without untaring
144 | -----------------------------------------------
145 | 
146 | Since an *archive* is merely a tarball (uncompressed), and since there any many
147 | excellent tools for manipulating tar files, it's fairly straightforward to
148 | access the *data* part of a SigMF archive without un-taring it. This is a
149 | compelling feature because **1** archives make it harder for the ``-data`` and
150 | the ``-meta`` to get separated, and **2** some datasets are so large that it
151 | can be impractical (due to available disk space, or slow network speeds if the
152 | archive file resides on a network file share) or simply obnoxious to untar it
153 | first.
154 | 
155 | ::
156 | 
157 |     >>> import sigmf
158 |     >>> arc = sigmf.SigMFArchiveReader('/src/LTE.sigmf')
159 |     >>> arc.shape
160 |     (15379532,)
161 |     >>> arc.ndim
162 |     1
163 |     >>> arc[:10]
164 |     array([-20.+11.j, -21. -6.j, -17.-20.j, -13.-52.j,   0.-75.j,  22.-58.j,
165 |             48.-44.j,  49.-60.j,  31.-56.j,  23.-47.j], dtype=complex64)
166 | 
167 | The preceeding example exhibits another feature of this approach; the archive
168 | ``LTE.sigmf`` is actually ``complex-int16``'s on disk, for which there is no
169 | corresponding type in ``numpy``. However, the ``.sigmffile`` member keeps track of
170 | this, and converts the data to ``numpy.complex64`` *after* slicing it, that is,
171 | after reading it from disk.
172 | 
173 | ::
174 | 
175 |     >>> arc.sigmffile.get_global_field(sigmf.SigMFFile.DATATYPE_KEY)
176 |     'ci16_le'
177 | 
178 |     >>> arc.sigmffile._memmap.dtype
179 |     dtype('int16')
180 | 
181 |     >>> arc.sigmffile._return_type
182 |     '<c8'
183 | 
184 | Another supported mode is the case where you might have an archive that *is not
185 | on disk* but instead is simply ``bytes`` in a python variable.
186 | 
187 | Instead of needing to write this out to a temporary file before being able to
188 | read it, this can be done "in mid air" or "without touching the ground (disk)".
189 | 
190 | ::
191 | 
192 |     >>> import sigmf, io
193 |     >>> sigmf_bytes = io.BytesIO(open('/src/LTE.sigmf', 'rb').read())
194 |     >>> arc = sigmf.SigMFArchiveReader(archive_buffer=sigmf_bytes)
195 |     >>> arc[:10]
196 |     array([-20.+11.j, -21. -6.j, -17.-20.j, -13.-52.j,   0.-75.j,  22.-58.j,
197 |             48.-44.j,  49.-60.j,  31.-56.j,  23.-47.j], dtype=complex64)


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | =========
 2 | SigMF API
 3 | =========
 4 | 
 5 | .. autosummary::
 6 |    :toctree: _autosummary
 7 |    :template: custom-module-template.rst
 8 |    :recursive:
 9 | 
10 |    sigmf.apps.convert_wav
11 |    sigmf.archive
12 |    sigmf.archivereader
13 |    sigmf.error
14 |    sigmf.schema
15 |    sigmf.sigmf_hash
16 |    sigmf.sigmffile
17 |    sigmf.utils
18 |    sigmf.validate
19 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | """Configuration file for the Sphinx documentation builder."""
 7 | 
 8 | import datetime
 9 | import re
10 | import sys
11 | from pathlib import Path
12 | 
13 | # parse info from project files
14 | 
15 | root = Path(__file__).parent.parent.parent
16 | with open(root / "sigmf" / "__init__.py", "r") as handle:
17 |     init = handle.read()
18 |     toolversion = re.search(r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', init).group(1)
19 |     specversion = re.search(r'__specification__\s*=\s*[\'"]([^\'"]*)[\'"]', init).group(1)
20 | 
21 | # autodoc needs special pathing
22 | sys.path.append(str(root))
23 | 
24 | # -- Project information
25 | 
26 | project = "sigmf"
27 | author = "Multiple Authors"
28 | copyright = f"2017-{datetime.date.today().year}, {author}"
29 | 
30 | release = toolversion
31 | version = toolversion
32 | 
33 | # -- General configuration
34 | 
35 | extensions = [
36 |     "sphinx.ext.autodoc",
37 |     "sphinx.ext.autosummary",
38 |     "sphinx.ext.doctest",
39 |     "sphinx.ext.duration",
40 |     "sphinx.ext.intersphinx",
41 |     "sphinx.ext.napoleon",  # allows numpy-style docstrings
42 | ]
43 | 
44 | intersphinx_mapping = {
45 |     "python": ("https://docs.python.org/3/", None),
46 |     "sphinx": ("https://www.sphinx-doc.org/en/master/", None),
47 | }
48 | intersphinx_disabled_domains = ["std"]
49 | 
50 | templates_path = ["_templates"]
51 | 
52 | # -- Options for HTML output
53 | 
54 | html_theme = "sphinx_rtd_theme"
55 | html_favicon = "https://raw.githubusercontent.com/wiki/sigmf/SigMF/logo/logo-icon-32-folder.png"
56 | html_logo = "https://raw.githubusercontent.com/sigmf/SigMF/refs/heads/main/logo/sigmf_logo.svg"
57 | 
58 | # -- Options for EPUB output
59 | 
60 | epub_show_urls = "footnote"
61 | 
62 | # Method to use variables within rst files
63 | # https://stackoverflow.com/a/69211912/760099
64 | 
65 | variables_to_export = [
66 |     "toolversion",
67 |     "specversion",
68 | ]
69 | frozen_locals = dict(locals())
70 | rst_epilog = '\n'.join(map(lambda x: f".. |{x}| replace:: {frozen_locals[x]}", variables_to_export))
71 | del frozen_locals


--------------------------------------------------------------------------------
/docs/source/developers.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | Developers
 3 | ==========
 4 | 
 5 | This page is for developers of the ``sigmf-python`` module.
 6 | 
 7 | -------
 8 | Install
 9 | -------
10 | 
11 | To install the latest git release, build from source:
12 | 
13 | .. code-block:: console
14 | 
15 |    $ git clone https://github.com/sigmf/sigmf-python.git
16 |    $ cd sigmf-python
17 |    $ pip install .
18 | 
19 | -------
20 | Testing
21 | -------
22 | 
23 | This library contains many tests in the ``tests/`` folder. These can all be run locally:
24 | 
25 | .. code-block:: console
26 | 
27 |    $ coverage run
28 | 
29 | Or tests can be run within a temporary environment on all supported python versions:
30 | 
31 | .. code-block:: console
32 | 
33 |    $ tox run
34 | 
35 | To run a single (perhaps new) test that may be needed verbosely:
36 | 
37 | .. code-block:: console
38 | 
39 |    $ pytest -rA tests/test_archive.py
40 | 
41 | To lint the entire project and get suggested changes:
42 | 
43 | .. code-block:: console
44 | 
45 |    $ pylint sigmf tests
46 | 
47 | To autoformat the entire project according to our coding standard:
48 | 
49 | .. code-block:: console
50 | 
51 |    $ black sigmf tests # autoformat entire project
52 |    $ isort sigmf tests # format imports for entire project
53 | 
54 | ----
55 | Docs
56 | ----
57 | 
58 | To build the docs and host locally:
59 | 
60 | .. code-block:: console
61 | 
62 |    $ cd docs
63 |    $ make html
64 |    $ cd build/html/
65 |    $ python3 -m http.server
66 | 
67 | --------------
68 | Find an Issue?
69 | --------------
70 | 
71 | Issues can be addressed by opening an `issue
72 | <https://github.com/sigmf/sigmf-python/issues>`_ or by forking the project and
73 | submitting a `pull request <https://github.com/sigmf/sigmf-python/pulls>`_.
74 | 


--------------------------------------------------------------------------------
/docs/source/faq.rst:
--------------------------------------------------------------------------------
 1 | ==========================
 2 | Frequently Asked Questions
 3 | ==========================
 4 | 
 5 | .. contents::
 6 |     :local:
 7 | 
 8 | ..
 9 |     Frequently asked questions should be questions that actually got asked.
10 |     Formulate them as a question and an answer.
11 |     Consider that the answer is best as a reference to another place in the documentation.
12 | 
13 | ---------------------------
14 | Is this a GNU Radio effort?
15 | ---------------------------
16 | 
17 | *No*, this is not a GNU Radio-specific effort.
18 | This effort first emerged from a group of GNU Radio core
19 | developers, but the goal of the project to provide a standard that will be
20 | useful to anyone and everyone, regardless of tool or workflow.
21 | 
22 | --------------------------------------------
23 | Is this specific to wireless communications?
24 | --------------------------------------------
25 | 
26 | *No*, similar to the response, above, the goal is to create something that is
27 | generally applicable to *signal processing*, regardless of whether or not the
28 | application is communications related.
29 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | =================
 2 | Welcome to SigMF!
 3 | =================
 4 | 
 5 | **SigMF** is a Python library for working with radio recordings in
 6 | ``.sigmf`` format according to the `SigMF standard <https://sigmf.org/>`_.
 7 | It offers a *simple* and *intuitive* API for Python developers.
 8 | 
 9 | ..
10 |    Note: The toolversion & specversion below are replaced dynamically during build.
11 | 
12 | This documentation is for version |toolversion| of the library, which is
13 | compatible with version |specversion| of the SigMF specification.
14 | 
15 | To get started, see the :doc:`quickstart` section or learn how to :ref:`install` the library.
16 | 
17 | -----
18 | 
19 | .. toctree::
20 |    :maxdepth: 1
21 |    :caption: Getting Started
22 |    :hidden:
23 | 
24 |    quickstart
25 |    advanced
26 |    developers
27 | 
28 | .. toctree::
29 |    :maxdepth: 1
30 |    :caption: Community
31 |    :hidden:
32 | 
33 |    faq
34 | 
35 | .. toctree::
36 |    :maxdepth: 1
37 |    :caption: API Reference
38 |    :hidden:
39 | 
40 |    api
41 | 


--------------------------------------------------------------------------------
/docs/source/quickstart.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | Quickstart
 3 | ==========
 4 | 
 5 | Here we discuss how to do all basic operations with SigMF.
 6 | 
 7 | .. _install:
 8 | 
 9 | -------
10 | Install
11 | -------
12 | 
13 | To install the latest PyPi release, install from pip:
14 | 
15 | .. code-block:: console
16 | 
17 |     $ pip install sigmf
18 | 
19 | ----------------------
20 | Read a SigMF Recording
21 | ----------------------
22 | 
23 | .. code-block:: python
24 | 
25 |     import sigmf
26 |     handle = sigmf.sigmffile.fromfile("example.sigmf")
27 |     handle.read_samples() # returns all timeseries data
28 |     handle.get_global_info() # returns 'global' dictionary
29 |     handle.get_captures() # returns list of 'captures' dictionaries
30 |     handle.get_annotations() # returns list of all annotations
31 |     handle[10:50] # return memory slice of samples 10 through 50
32 | 
33 | -----------------------------------
34 | Verify SigMF Integrity & Compliance
35 | -----------------------------------
36 | 
37 | .. code-block:: console
38 | 
39 |     $ sigmf_validate example.sigmf
40 | 
41 | ---------------------------------------
42 | Save a Numpy array as a SigMF Recording
43 | ---------------------------------------
44 | 
45 | .. code-block:: python
46 | 
47 |     import numpy as np
48 |     from sigmf import SigMFFile
49 |     from sigmf.utils import get_data_type_str, get_sigmf_iso8601_datetime_now
50 | 
51 |     # suppose we have a complex timeseries signal
52 |     data = np.zeros(1024, dtype=np.complex64)
53 | 
54 |     # write those samples to file in cf32_le
55 |     data.tofile('example_cf32.sigmf-data')
56 | 
57 |     # create the metadata
58 |     meta = SigMFFile(
59 |         data_file='example_cf32.sigmf-data', # extension is optional
60 |         global_info = {
61 |             SigMFFile.DATATYPE_KEY: get_data_type_str(data),  # in this case, 'cf32_le'
62 |             SigMFFile.SAMPLE_RATE_KEY: 48000,
63 |             SigMFFile.AUTHOR_KEY: 'jane.doe@domain.org',
64 |             SigMFFile.DESCRIPTION_KEY: 'All zero complex float32 example file.',
65 |         }
66 |     )
67 | 
68 |     # create a capture key at time index 0
69 |     meta.add_capture(0, metadata={
70 |         SigMFFile.FREQUENCY_KEY: 915000000,
71 |         SigMFFile.DATETIME_KEY: get_sigmf_iso8601_datetime_now(),
72 |     })
73 | 
74 |     # add an annotation at sample 100 with length 200 & 10 KHz width
75 |     meta.add_annotation(100, 200, metadata = {
76 |         SigMFFile.FLO_KEY: 914995000.0,
77 |         SigMFFile.FHI_KEY: 915005000.0,
78 |         SigMFFile.COMMENT_KEY: 'example annotation',
79 |     })
80 | 
81 |     # check for mistakes & write to disk
82 |     meta.tofile('example_cf32.sigmf-meta') # extension is optional


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "SigMF"
  3 | description = "Easily interact with Signal Metadata Format (SigMF) recordings."
  4 | keywords = ["gnuradio", "radio"]
  5 | classifiers = [
  6 |     "Development Status :: 5 - Production/Stable",
  7 |     "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
  8 |     "Operating System :: OS Independent",
  9 |     "Programming Language :: Python :: 3",
 10 |     "Programming Language :: Python :: 3.7",
 11 |     "Programming Language :: Python :: 3.8",
 12 |     "Programming Language :: Python :: 3.9",
 13 |     "Programming Language :: Python :: 3.10",
 14 |     "Programming Language :: Python :: 3.11",
 15 |     "Programming Language :: Python :: 3.12",
 16 |     "Programming Language :: Python :: 3.13",
 17 |     "Topic :: Scientific/Engineering",
 18 |     "Topic :: Communications :: Ham Radio",
 19 | ]
 20 | dynamic = ["version", "readme"]
 21 | requires-python = ">=3.7"
 22 | dependencies = [
 23 |     "numpy",        # for vector math
 24 |     "jsonschema",   # for spec validation
 25 | ]
 26 |     [project.urls]
 27 |     repository = "https://github.com/sigmf/sigmf-python"
 28 |     documentation = "https://sigmf.readthedocs.io/en/latest/"
 29 |     issues = "https://github.com/sigmf/sigmf-python/issues"
 30 |     "Specification (HTML)" = "https://sigmf.org/"
 31 |     "Specification (PDF)" = "https://sigmf.github.io/SigMF/sigmf-spec.pdf"
 32 |     "Specification (Repo)" = "https://github.com/sigmf/SigMF"
 33 | 
 34 |     [project.scripts]
 35 |     sigmf_validate = "sigmf.validate:main"
 36 |     sigmf_convert_wav = "sigmf.apps.convert_wav:main [apps]"
 37 |     [project.optional-dependencies]
 38 |     test = [
 39 |         "pylint",
 40 |         "pytest",
 41 |         "pytest-cov",
 42 |         "hypothesis",   # next-gen testing framework
 43 |     ]
 44 |     apps = [
 45 |         "scipy",        # for wav i/o
 46 |     ]
 47 | 
 48 | [tool.setuptools]
 49 | packages = ["sigmf"]
 50 |     [tool.setuptools.dynamic]
 51 |     version = {attr = "sigmf.__version__"}
 52 |     readme = {file = ["README.md"], content-type = "text/markdown"}
 53 |     [tool.setuptools.package-data]
 54 |     sigmf = ["*.json"]
 55 | 
 56 | [build-system]
 57 | requires = ["setuptools>=65.0", "setuptools-scm"]
 58 | build-backend = "setuptools.build_meta"
 59 | 
 60 | [tool.coverage.run]
 61 | branch = true
 62 | source = ["sigmf", "tests"]
 63 | # -rA captures stdout from all tests and places it after the pytest summary
 64 | command_line = "-m pytest -rA --doctest-modules --junitxml=pytest.xml"
 65 | 
 66 | [tool.pytest.ini_options]
 67 | addopts = "--doctest-modules"
 68 | 
 69 | [tool.pylint]
 70 |     [tool.pylint.main]
 71 |     load-plugins = [
 72 |         "pylint.extensions.typing",
 73 |         "pylint.extensions.docparams",
 74 |     ]
 75 |     exit-zero = true
 76 |     [tool.pylint.messages_control]
 77 |     disable = [
 78 |         "logging-not-lazy",
 79 |         "missing-module-docstring",
 80 |         "import-error",
 81 |         "unspecified-encoding",
 82 |     ]
 83 |     max-line-length = 120
 84 |     [tool.pylint.REPORTS]
 85 |     # omit from the similarity reports
 86 |     ignore-comments = 'yes'
 87 |     ignore-docstrings = 'yes'
 88 |     ignore-imports = 'yes'
 89 |     ignore-signatures = 'yes'
 90 |     min-similarity-lines = 4
 91 | 
 92 | [tool.pytype]
 93 | inputs = ['sigmf', 'tests']
 94 | 
 95 | [tool.black]
 96 | line-length = 120
 97 | 
 98 | [tool.isort]
 99 | profile = "black"
100 | 
101 | [tool.tox]
102 | legacy_tox_ini = '''
103 |     [tox]
104 |     skip_missing_interpreters = True
105 |     envlist = py{37,38,39,310,311,312,313}
106 | 
107 |     [testenv]
108 |     usedevelop = True
109 |     deps = .[test,apps]
110 |     commands = coverage run
111 | '''
112 | 


--------------------------------------------------------------------------------
/sigmf/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | 
 7 | # version of this python module
 8 | __version__ = "1.2.10"
 9 | # matching version of the SigMF specification
10 | __specification__ = "1.2.5"
11 | 
12 | from . import archive, archivereader, error, schema, sigmffile, utils, validate
13 | from .archive import SigMFArchive
14 | from .archivereader import SigMFArchiveReader
15 | from .sigmffile import SigMFCollection, SigMFFile
16 | 


--------------------------------------------------------------------------------
/sigmf/apps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sigmf/sigmf-python/2ae107f0e34ae4d3cf8a4b23d39803e85839a628/sigmf/apps/__init__.py


--------------------------------------------------------------------------------
/sigmf/apps/convert_wav.py:
--------------------------------------------------------------------------------
  1 | # Copyright: Multiple Authors
  2 | #
  3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
  4 | #
  5 | # SPDX-License-Identifier: LGPL-3.0-or-later
  6 | 
  7 | """converter for wav containers"""
  8 | 
  9 | import argparse
 10 | import getpass
 11 | import logging
 12 | import tempfile
 13 | from datetime import datetime, timezone
 14 | from os import PathLike
 15 | from pathlib import Path
 16 | from typing import Optional
 17 | 
 18 | from scipy.io import wavfile
 19 | 
 20 | from .. import SigMFFile
 21 | from .. import __version__ as toolversion
 22 | from ..sigmffile import get_sigmf_filenames
 23 | from ..utils import SIGMF_DATETIME_ISO8601_FMT, get_data_type_str
 24 | 
 25 | log = logging.getLogger()
 26 | 
 27 | 
 28 | def convert_wav(
 29 |     wav_path: str,
 30 |     out_path: Optional[str] = None,
 31 |     author: Optional[str] = None,
 32 | ) -> PathLike:
 33 |     """
 34 |     Read a wav and write a sigmf archive.
 35 |     """
 36 |     wav_path = Path(wav_path)
 37 |     wav_stem = wav_path.stem
 38 |     samp_rate, wav_data = wavfile.read(wav_path)
 39 | 
 40 |     global_info = {
 41 |         SigMFFile.AUTHOR_KEY: getpass.getuser() if author is None else author,
 42 |         SigMFFile.DATATYPE_KEY: get_data_type_str(wav_data),
 43 |         SigMFFile.DESCRIPTION_KEY: f"converted from {wav_path.name}",
 44 |         SigMFFile.NUM_CHANNELS_KEY: 1 if len(wav_data.shape) < 2 else wav_data.shape[1],
 45 |         SigMFFile.RECORDER_KEY: "Official SigMF wav converter",
 46 |         SigMFFile.SAMPLE_RATE_KEY: samp_rate,
 47 |     }
 48 | 
 49 |     modify_time = wav_path.lstat().st_mtime
 50 |     wav_datetime = datetime.fromtimestamp(modify_time, tz=timezone.utc)
 51 | 
 52 |     capture_info = {
 53 |         SigMFFile.START_INDEX_KEY: 0,
 54 |         SigMFFile.DATETIME_KEY: wav_datetime.strftime(SIGMF_DATETIME_ISO8601_FMT),
 55 |     }
 56 | 
 57 |     temp_dir = Path(tempfile.mkdtemp())
 58 |     if out_path is None:
 59 |         # extension will be changed
 60 |         out_path = Path(wav_stem)
 61 |     else:
 62 |         out_path = Path(out_path)
 63 |     filenames = get_sigmf_filenames(out_path)
 64 | 
 65 |     data_path = temp_dir / filenames["data_fn"]
 66 |     wav_data.tofile(data_path)
 67 | 
 68 |     meta = SigMFFile(data_file=data_path, global_info=global_info)
 69 |     meta.add_capture(0, metadata=capture_info)
 70 |     log.debug("created %r", meta)
 71 | 
 72 |     arc_path = filenames["archive_fn"]
 73 |     meta.tofile(arc_path, toarchive=True)
 74 |     log.info("wrote %s", arc_path)
 75 |     return arc_path
 76 | 
 77 | 
 78 | def main() -> None:
 79 |     """
 80 |     entry-point for sigmf_convert_wav
 81 |     """
 82 |     parser = argparse.ArgumentParser(description="Convert wav to sigmf archive.")
 83 |     parser.add_argument("input", type=str, help="wav path")
 84 |     parser.add_argument("--author", type=str, default=None, help=f"set {SigMFFile.AUTHOR_KEY} metadata")
 85 |     parser.add_argument("-v", "--verbose", action="count", default=0)
 86 |     parser.add_argument("--version", action="version", version=f"%(prog)s v{toolversion}")
 87 |     args = parser.parse_args()
 88 | 
 89 |     level_lut = {
 90 |         0: logging.WARNING,
 91 |         1: logging.INFO,
 92 |         2: logging.DEBUG,
 93 |     }
 94 |     logging.basicConfig(level=level_lut[min(args.verbose, 2)])
 95 | 
 96 |     _ = convert_wav(
 97 |         wav_path=args.input,
 98 |         author=args.author,
 99 |     )
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     main()
104 | 


--------------------------------------------------------------------------------
/sigmf/archive.py:
--------------------------------------------------------------------------------
  1 | # Copyright: Multiple Authors
  2 | #
  3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
  4 | #
  5 | # SPDX-License-Identifier: LGPL-3.0-or-later
  6 | 
  7 | """Create and extract SigMF archives."""
  8 | 
  9 | import io
 10 | import shutil
 11 | import tarfile
 12 | import tempfile
 13 | from pathlib import Path
 14 | 
 15 | from .error import SigMFFileError
 16 | 
 17 | SIGMF_ARCHIVE_EXT = ".sigmf"
 18 | SIGMF_METADATA_EXT = ".sigmf-meta"
 19 | SIGMF_DATASET_EXT = ".sigmf-data"
 20 | SIGMF_COLLECTION_EXT = ".sigmf-collection"
 21 | 
 22 | 
 23 | class SigMFArchive:
 24 |     """
 25 |     Archive a SigMFFile
 26 | 
 27 |     A `.sigmf` file must include both valid metadata and data.
 28 |     If `self.data_file` is not set or the requested output file
 29 |     is not writable, raises `SigMFFileError`.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 | 
 34 |     sigmffile : SigMFFile
 35 |         A SigMFFile object with valid metadata and data_file.
 36 | 
 37 |     name : PathLike | str | bytes
 38 |         Path to archive file to create. If file exists, overwrite.
 39 |         If `name` doesn't end in .sigmf, it will be appended.
 40 |         For example: if `name` == "/tmp/archive1", then the
 41 |         following archive will be created:
 42 |             /tmp/archive1.sigmf
 43 |             - archive1/
 44 |                 - archive1.sigmf-meta
 45 |                 - archive1.sigmf-data
 46 | 
 47 |     fileobj : BufferedWriter
 48 |         If `fileobj` is specified, it is used as an alternative to
 49 |         a file object opened in binary mode for `name`. It is
 50 |         supposed to be at position 0. `name` is not required, but
 51 |         if specified will be used to determine the directory and
 52 |         file names within the archive. `fileobj` won't be closed.
 53 |         For example: if `name` == "archive1" and fileobj is given,
 54 |         a tar archive will be written to fileobj with the
 55 |         following structure:
 56 |             - archive1/
 57 |                 - archive1.sigmf-meta
 58 |                 - archive1.sigmf-data
 59 |     """
 60 | 
 61 |     def __init__(self, sigmffile, name=None, fileobj=None):
 62 |         is_buffer = fileobj is not None
 63 |         self.sigmffile = sigmffile
 64 |         self.path, arcname, fileobj = self._resolve(name, fileobj)
 65 | 
 66 |         self._ensure_data_file_set()
 67 |         self._validate()
 68 | 
 69 |         tar = tarfile.TarFile(mode="w", fileobj=fileobj, format=tarfile.PAX_FORMAT)
 70 |         tmpdir = Path(tempfile.mkdtemp())
 71 |         meta_path = tmpdir / (arcname + SIGMF_METADATA_EXT)
 72 |         data_path = tmpdir / (arcname + SIGMF_DATASET_EXT)
 73 | 
 74 |         # write files
 75 |         with open(meta_path, "w") as handle:
 76 |             self.sigmffile.dump(handle)
 77 |         if isinstance(self.sigmffile.data_buffer, io.BytesIO):
 78 |             # write data buffer to archive
 79 |             self.sigmffile.data_file = data_path
 80 |             with open(data_path, "wb") as handle:
 81 |                 handle.write(self.sigmffile.data_buffer.getbuffer())
 82 |         else:
 83 |             # copy data to archive
 84 |             shutil.copy(self.sigmffile.data_file, data_path)
 85 |         tar.add(tmpdir, arcname=arcname, filter=self.chmod)
 86 |         # close files & remove tmpdir
 87 |         tar.close()
 88 |         if not is_buffer:
 89 |             # only close fileobj if we aren't working w/a buffer
 90 |             fileobj.close()
 91 |         shutil.rmtree(tmpdir)
 92 | 
 93 |     @staticmethod
 94 |     def chmod(tarinfo: tarfile.TarInfo):
 95 |         """permission filter for writing tar files"""
 96 |         if tarinfo.isdir():
 97 |             tarinfo.mode = 0o755  # dwrxw-rw-r
 98 |         else:
 99 |             tarinfo.mode = 0o644  # -wr-r--r--
100 |         return tarinfo
101 | 
102 |     def _ensure_data_file_set(self):
103 |         if not self.sigmffile.data_file and not isinstance(self.sigmffile.data_buffer, io.BytesIO):
104 |             raise SigMFFileError("No data file in SigMFFile; use `set_data_file` before archiving.")
105 | 
106 |     def _validate(self):
107 |         self.sigmffile.validate()
108 | 
109 |     def _resolve(self, name, fileobj):
110 |         """
111 |         Resolve both (name, fileobj) into (path, arcname, fileobj) given either or both.
112 | 
113 |         Returns
114 |         -------
115 |         path : PathLike
116 |             Path of the archive file.
117 |         arcname : str
118 |             Name of the sigmf object within the archive.
119 |         fileobj : BufferedWriter
120 |             Open file handle object.
121 |         """
122 |         if fileobj:
123 |             try:
124 |                 # exception if not byte-writable
125 |                 fileobj.write(bytes())
126 |                 # exception if no name property of handle
127 |                 path = Path(fileobj.name)
128 |                 if not name:
129 |                     arcname = path.stem
130 |                 else:
131 |                     arcname = name
132 |             except io.UnsupportedOperation as exc:
133 |                 raise SigMFFileError(f"fileobj {fileobj} is not byte-writable.") from exc
134 |             except AttributeError as exc:
135 |                 raise SigMFFileError(f"fileobj {fileobj} is invalid.") from exc
136 |         elif name:
137 |             path = Path(name)
138 |             # ensure name has correct suffix if it exists
139 |             if path.suffix == "":
140 |                 # add extension if none was given
141 |                 path = path.with_suffix(SIGMF_ARCHIVE_EXT)
142 |             elif path.suffix != SIGMF_ARCHIVE_EXT:
143 |                 # ensure suffix is correct
144 |                 raise SigMFFileError(f"Invalid extension ({path.suffix} != {SIGMF_ARCHIVE_EXT}).")
145 |             arcname = path.stem
146 | 
147 |             try:
148 |                 fileobj = open(path, "wb")
149 |             except (OSError, IOError) as exc:
150 |                 raise SigMFFileError(f"Can't open {name} for writing.") from exc
151 |         else:
152 |             raise SigMFFileError("Either `name` or `fileobj` needs to be defined.")
153 | 
154 |         return path, arcname, fileobj
155 | 


--------------------------------------------------------------------------------
/sigmf/archivereader.py:
--------------------------------------------------------------------------------
  1 | # Copyright: Multiple Authors
  2 | #
  3 | # This file is part of sigmf-python. https://github.com/sigmf/SigMF
  4 | #
  5 | # SPDX-License-Identifier: LGPL-3.0-or-later
  6 | 
  7 | """Access SigMF archives without extracting them."""
  8 | 
  9 | import io
 10 | import tarfile
 11 | from pathlib import Path
 12 | 
 13 | from . import __version__
 14 | from .archive import SIGMF_ARCHIVE_EXT, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT
 15 | from .error import SigMFFileError
 16 | from .sigmffile import SigMFFile
 17 | 
 18 | 
 19 | class SigMFArchiveReader:
 20 |     """
 21 |     Access data within SigMF archive tarball in-place without extracting.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     name : str | bytes | PathLike, optional
 26 |         Optional path to archive file to access.
 27 |     skip_checksum : bool, optional
 28 |         Skip dataset checksum calculation.
 29 |     map_readonly : bool, optional
 30 |         Indicate whether assignments on the numpy.memmap are allowed.
 31 |     archive_buffer : buffer, optional
 32 | 
 33 | 
 34 |     Raises
 35 |     ------
 36 |     SigMFError
 37 |         Archive file does not exist or is improperly formatted.
 38 |     ValueError
 39 |         If invalid arguments.
 40 |     ValidationError
 41 |         If metadata is invalid.
 42 |     """
 43 | 
 44 |     def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None):
 45 |         if name is not None:
 46 |             path = Path(name)
 47 |             if path.suffix != SIGMF_ARCHIVE_EXT:
 48 |                 err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT)
 49 |                 raise SigMFFileError(err)
 50 | 
 51 |             tar_obj = tarfile.open(path)
 52 | 
 53 |         elif archive_buffer is not None:
 54 |             tar_obj = tarfile.open(fileobj=archive_buffer, mode="r:")
 55 | 
 56 |         else:
 57 |             raise ValueError("Either `name` or `archive_buffer` must be not None.")
 58 | 
 59 |         json_contents = None
 60 |         data_offset = None
 61 |         data_size_bytes = None
 62 | 
 63 |         for memb in tar_obj.getmembers():
 64 |             if memb.isdir():  # memb.type == tarfile.DIRTYPE:
 65 |                 # the directory structure will be reflected in the member name
 66 |                 continue
 67 | 
 68 |             elif memb.isfile():  # memb.type == tarfile.REGTYPE:
 69 |                 if memb.name.endswith(SIGMF_METADATA_EXT):
 70 |                     json_contents = memb.name
 71 |                     if data_offset is None:
 72 |                         # consider a warnings.warn() here; the datafile should be earlier in the
 73 |                         # archive than the metadata, so that updating it (like, adding an annotation)
 74 |                         # is fast.
 75 |                         pass
 76 |                     with tar_obj.extractfile(memb) as memb_fid:
 77 |                         json_contents = memb_fid.read()
 78 | 
 79 |                 elif memb.name.endswith(SIGMF_DATASET_EXT):
 80 |                     data_offset = memb.offset_data
 81 |                     data_size_bytes = memb.size
 82 |                     with tar_obj.extractfile(memb) as memb_fid:
 83 |                         data_buffer = io.BytesIO(memb_fid.read())
 84 | 
 85 |                 else:
 86 |                     print(f"A regular file {memb.name} was found but ignored in the archive")
 87 |             else:
 88 |                 print(f"A member of type {memb.type} and name {memb.name} was found but not handled, just FYI.")
 89 | 
 90 |         if data_offset is None:
 91 |             raise SigMFFileError("No .sigmf-data file found in archive!")
 92 | 
 93 |         self.sigmffile = SigMFFile(metadata=json_contents)
 94 |         self.sigmffile.validate()
 95 | 
 96 |         self.sigmffile.set_data_file(
 97 |             data_buffer=data_buffer,
 98 |             skip_checksum=skip_checksum,
 99 |             size_bytes=data_size_bytes,
100 |             map_readonly=map_readonly,
101 |         )
102 | 
103 |         self.ndim = self.sigmffile.ndim
104 |         self.shape = self.sigmffile.shape
105 | 
106 |         tar_obj.close()
107 | 
108 |     def __len__(self):
109 |         return self.sigmffile.__len__()
110 | 
111 |     def __iter__(self):
112 |         return self.sigmffile.__iter__()
113 | 
114 |     def __getitem__(self, sli):
115 |         return self.sigmffile.__getitem__(sli)
116 | 


--------------------------------------------------------------------------------
/sigmf/error.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | 
 7 | """Defines SigMF exception classes."""
 8 | 
 9 | 
10 | class SigMFError(Exception):
11 |     """SigMF base exception."""
12 | 
13 | 
14 | class SigMFValidationError(SigMFError):
15 |     """Exceptions related to validating SigMF metadata."""
16 | 
17 | 
18 | class SigMFAccessError(SigMFError):
19 |     """Exceptions related to accessing the contents of SigMF metadata, notably
20 |     when expected fields are missing or accessing out of bounds captures."""
21 | 
22 | 
23 | class SigMFFileError(SigMFError):
24 |     """Exceptions related to reading or writing SigMF files or archives."""
25 | 


--------------------------------------------------------------------------------
/sigmf/schema-collection.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$id": "https://github.com/sigmf/SigMF/spec/1.2.0/collection-schema",
  3 |   "$schema": "https://json-schema.org/draft/2020-12/schema",
  4 |   "default": {},
  5 |   "required": ["collection"],
  6 |   "type": "object",
  7 |   "properties": {
  8 |     "collection": {
  9 |       "default": {},
 10 |       "description": "The `sigmf-collection` file contains metadata in a single top-level Object called a `collection`. The Collection Object contains key/value pairs that describe relationships between SigMF Recordings.\\nn  The Collection Object associates SigMF Recordings together by specifying `SigMF Recording Objects` in the `core:streams` JSON array. Each Object describes a specific associated SigMF Recording.\\nn The following rules apply to SigMF Collections:\n\n 1. The Collection Object MUST be the only top-level Object in the file.\n\n 2. Keys in the Collection Object SHOULD use SigMF Recording Objects when referencing SigMF Recordings.\n\n 3. SigMF Recording Objects MUST contain both a `name` field, which is the base-name of a SigMF Recording, and a `hash` which is the SHA512 hash of the Recording Metadata file `[base-name].sigmf-meta`.\n\n 4. SigMF Recording Objects MUST appear in a JSON array.\\nn Example `top-level.sigmf-collection` file:\\begin{verbatim}{\n\"collection\": {\n      \"core:version\": \"1.2.0\",\n      \"core:extensions\" : [\n          {\n              \"name\": \"antenna\",\n              \"version\": \"1.0.0\",\n              \"optional\": true\n          }\n      ],\n      \"antenna:hagl\": 120,\n      \"antenna:azimuth_angle\": 98,\n      \"core:streams\": [\n          {\n             \"name\": \"example-channel-0-basename\",\n             \"hash\": \"b4071db26f5c7b0c70f5066eb9bc3a8b506df0f5af09991ba481f63f97f7f48e9396584bc1c296650cd3d47bc4ad2c5b72d2561078fb6eb16151d2898c9f84c4\"\n                  },\n          {\n             \"name\": \"example-channel-1-basename\",\n             \"hash\": \"7132aa240e4d8505471cded716073141ae190f763bfca3c27edd8484348d6693d0e8d3427d0bf1990e687a6a40242d514e5d1995642bc39384e9a37a211655d7\"\n                  }\n              ]\n  }\n}\\end{verbatim}",
 11 |       "required": ["core:version"],
 12 |       "type": "object",
 13 |       "properties": {
 14 |         "core:version": {
 15 |           "description": "The version of the SigMF specification used to create the Collection file.",
 16 |           "examples": ["1.2.0"],
 17 |           "type": "string"
 18 |         },
 19 |         "core:description": {
 20 |           "default": "",
 21 |           "description": "A text description of the SigMF Collection.",
 22 |           "type": "string"
 23 |         },
 24 |         "core:author": {
 25 |           "default": "",
 26 |           "description": "A text identifier for the author potentially including name, handle, email, and/or other ID like Amateur Call Sign.",
 27 |           "examples": ["Bruce Wayne bruce@waynetech.com", "Bruce (K3X)"],
 28 |           "type": "string"
 29 |         },
 30 |         "core:collection_doi": {
 31 |           "default": "",
 32 |           "description": "The registered DOI (ISO 26324) for a Collection.",
 33 |           "type": "string"
 34 |         },
 35 |         "core:license": {
 36 |           "default": "",
 37 |           "description": "A URL for the license document under which this Collection metadata is offered.",
 38 |           "examples": ["https://creativecommons.org/licenses/by-sa/4.0/"],
 39 |           "type": "string"
 40 |         },
 41 |         "core:extensions": {
 42 |           "default": [],
 43 |           "description": "The `core:extensions` field in the Global Object is an array of extension objects that describe SigMF extensions. Extension Objects MUST contain the three key/value pairs defined in Table (FIX REF), and MUST NOT contain any other fields.",
 44 |           "type": "array",
 45 |           "additionalItems": true,
 46 |           "items": {
 47 |             "anyOf": [
 48 |               {
 49 |                 "type": "object",
 50 |                 "title": "The first anyOf schema",
 51 |                 "description": "An explanation about the purpose of this instance.",
 52 |                 "default": {},
 53 |                 "examples": [
 54 |                   {
 55 |                     "name": "capture_details",
 56 |                     "version": "1.0.0",
 57 |                     "optional": false
 58 |                   }
 59 |                 ],
 60 |                 "required": ["name", "version", "optional"],
 61 |                 "properties": {
 62 |                   "name": {
 63 |                     "default": "",
 64 |                     "description": "The name of the SigMF extension namespace.",
 65 |                     "type": "string"
 66 |                   },
 67 |                   "version": {
 68 |                     "default": "",
 69 |                     "description": "The version of the extension namespace specification used.",
 70 |                     "type": "string"
 71 |                   },
 72 |                   "optional": {
 73 |                     "default": false,
 74 |                     "description": "If this field is `true`, the extension is REQUIRED to parse this Recording.",
 75 |                     "type": "boolean"
 76 |                   }
 77 |                 },
 78 |                 "additionalProperties": true
 79 |               }
 80 |             ]
 81 |           }
 82 |         },
 83 |         "core:streams": {
 84 |           "default": [],
 85 |           "description": "An ordered array of SigMF Recording Tuples, indicating multiple recorded streams of data (e.g., channels from a phased array).",
 86 |           "type": "array",
 87 |           "additionalItems": true,
 88 |           "items": {
 89 |             "anyOf": [
 90 |               {
 91 |                 "default": [],
 92 |                 "examples": [["example-channel-0-basename", "hash"]],
 93 |                 "type": "array",
 94 |                 "additionalItems": true,
 95 |                 "items": {
 96 |                   "anyOf": [
 97 |                     {
 98 |                       "default": "",
 99 |                       "type": "string"
100 |                     }
101 |                   ]
102 |                 }
103 |               },
104 |               {
105 |                 "default": [],
106 |                 "examples": [["example-channel-1-basename", "hash"]],
107 |                 "type": "array",
108 |                 "additionalItems": true,
109 |                 "items": {
110 |                   "anyOf": [
111 |                     {
112 |                       "default": "",
113 |                       "type": "string"
114 |                     }
115 |                   ]
116 |                 }
117 |               }
118 |             ]
119 |           }
120 |         }
121 |       },
122 |       "additionalProperties": true
123 |     }
124 |   },
125 |   "additionalProperties": true
126 | }
127 | 


--------------------------------------------------------------------------------
/sigmf/schema-meta.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$id": "https://raw.githubusercontent.com/sigmf/SigMF/v1.2.5/sigmf-schema.json",
  3 |   "$schema": "https://json-schema.org/draft/2020-12/schema",
  4 |   "title": "Schema for SigMF Meta Files",
  5 |   "description": "SigMF specifies a way to describe sets of recorded digital signal samples with metadata written in JSON. SigMF can be used to describe general information about a collection of samples, the characteristics of the system that generated the samples, features of signals themselves, and the relationship between different recordings.",
  6 |   "type": "object",
  7 |   "required": [
  8 |     "global",
  9 |     "captures",
 10 |     "annotations"
 11 |   ],
 12 |   "properties": {
 13 |     "global": {
 14 |       "description": "The `global` object consists of key/value pairs that provide information applicable to the entire Dataset. It contains the information that is minimally necessary to open and parse the Dataset file, as well as general information about the Recording itself.",
 15 |       "required": [
 16 |         "core:datatype",
 17 |         "core:version"
 18 |       ],
 19 |       "type": "object",
 20 |       "properties": {
 21 |         "core:datatype": {
 22 |           "description": "The SigMF Dataset format of the stored samples in the Dataset file.",
 23 |           "examples": [
 24 |             "cf32_le",
 25 |             "ri16_le"
 26 |           ],
 27 |           "default": "cf32_le",
 28 |           "pattern": "^(c|r)(f32|f64|i32|i16|u32|u16|i8|u8)(_le|_be)?",
 29 |           "type": "string"
 30 |         },
 31 |         "core:sample_rate": {
 32 |           "description": "The sample rate of the signal in samples per second.",
 33 |           "minimum": 1,
 34 |           "maximum": 1000000000000,
 35 |           "type": "number"
 36 |         },
 37 |         "core:author": {
 38 |           "description": "A text identifier for the author potentially including name, handle, email, and/or other ID like Amateur Call Sign",
 39 |           "examples": [
 40 |             "Bruce Wayne bruce@waynetech.com",
 41 |             "Bruce (K3X)"
 42 |           ],
 43 |           "type": "string"
 44 |         },
 45 |         "core:collection": {
 46 |           "description": "The base filename of a `collection` with which this Recording is associated. This field is used to indicate that this Recording is part of a SigMF Collection  (described later in this document). It is strongly RECOMMENDED that if you are building a Collection, that each Recording referenced by that Collection use this field to associate up to the relevant `sigmf-collection` file.",
 47 |           "type": "string"
 48 |         },
 49 |         "core:dataset": {
 50 |           "description": "The full filename of the Dataset file this Metadata file describes, used ONLY with Non-Conforming Datasets. If provided, this string MUST be the complete filename of the Dataset file, including the extension. The Dataset file must be in the same directory as the .sigmf-meta file; note that this string only includes the filename, not directory.  If a Recording does not have this field, it MUST have a compliant SigMF Dataset (NOT a Non-Conforming Dataset) which MUST use the same base filename as the Metadata file and use the `.sigmf-data` extension. If a SigMF Recording or Archive is renamed this field MUST also be updated, because of this it is RECOMMENDED that Compliant SigMF Recordings avoid use of this field.  This field SHOULD NOT be used in conjunction the `core:metadata_only` field. If both fields exist and the file specified by `core:dataset` exists, then `core:metadata_only` SHOULD be ignored by the application.",
 51 |           "type": "string",
 52 |           "pattern": "^[^\\/\\\\:*?\"<>|]+(\\.[^\\/\\\\:*?\"<>|]+)*"
 53 |         },
 54 |         "core:data_doi": {
 55 |           "description": "The registered DOI (ISO 26324) for a Recording's Dataset file.",
 56 |           "type": "string"
 57 |         },
 58 |         "core:description": {
 59 |           "description": "A text description of the SigMF Recording.",
 60 |           "type": "string"
 61 |         },
 62 |         "core:hw": {
 63 |           "description": "A text description of the hardware used to make the Recording.",
 64 |           "type": "string"
 65 |         },
 66 |         "core:license": {
 67 |           "description": "A URL for the license document under which the Recording is offered. (RFC 3986)",
 68 |           "examples": [
 69 |             "https://creativecommons.org/licenses/by-sa/4.0/"
 70 |           ],
 71 |           "format": "uri",
 72 |           "type": "string"
 73 |         },
 74 |         "core:metadata_only": {
 75 |           "description": "Indicates the Metadata file is intentionally distributed without the Dataset. This field should be defined and set to `true` to indicate that the Metadata file is being distributed without a corresponding `.sigmf-data` file. This may be done when the Dataset will be generated dynamically from information in the schema, or because just the schema is sufficient for the intended application. A metadata only distribution is not a SigMF Recording.  If a Compliant SigMF Recording uses this field, it MAY indicate that the Dataset was dynamically generated from the metadata. This field MAY NOT be used in conjunction with Non-Conforming Datasets or the `core:dataset` field. ",
 76 |           "type": "boolean"
 77 |         },
 78 |         "core:meta_doi": {
 79 |           "description": "The registered DOI (ISO 26324) for a Recording's Metadata file.",
 80 |           "type": "string"
 81 |         },
 82 |         "core:num_channels": {
 83 |           "description": "Number of interleaved channels in the Dataset file, if omitted this is implied to be 1, for multiple channels of IQ data, it is RECOMMENDED to use SigMF Collections instead of num_channels for widest application support.",
 84 |           "default": 1,
 85 |           "minimum": 1,
 86 |           "maximum": 9223372036854775807,
 87 |           "type": "integer"
 88 |         },
 89 |         "core:offset": {
 90 |           "description": "The index number of the first sample in the Dataset. If not provided, this value defaults to zero. Typically used when a Recording is split over multiple files. All sample indices in SigMF are absolute, and so all other indices referenced in metadata for this recording SHOULD be greater than or equal to this value.",
 91 |           "default": 0,
 92 |           "minimum": 0,
 93 |           "!comment": "The maximum value for this property is equal to 2^63 - 1, making it easy to fit into a signed 64-bit integer.",
 94 |           "maximum": 9223372036854775807,
 95 |           "type": "integer"
 96 |         },
 97 |         "core:recorder": {
 98 |           "description": "The name of the software used to make this SigMF Recording.",
 99 |           "type": "string"
100 |         },
101 |         "core:sha512": {
102 |           "description": "The SHA512 hash of the Dataset file associated with the SigMF file.",
103 |           "type": "string",
104 |           "pattern": "^[0-9a-fA-F]{128}"
105 |         },
106 |         "core:trailing_bytes": {
107 |           "description": "The number of bytes to ignore at the end of a Dataset, used ONLY with Non-Conforming Datasets. This field is used with Non-Conforming Datasets to indicate some number of bytes that trail the sample data in the NCD file that should be ignored for processing. This can be used to ignore footer data in non-SigMF filetypes. ",
108 |           "type": "integer",
109 |           "minimum": 0,
110 |           "maximum": 9223372036854775807
111 |         },
112 |         "core:version": {
113 |           "description": "The version of the SigMF specification used to create the Metadata file, in the format X.Y.Z.",
114 |           "pattern": "^\\d+\\.\\d+\\.\\d",
115 |           "type": "string"
116 |         },
117 |         "core:geolocation": {
118 |           "description": "The location of the Recording system (note, using the Captures scope `geolocation` field is preferred). See the `geolocation` field within the Captures metadata for details. While using the Captures scope `geolocation` is preferred, fixed recording systems may still provide position information within the Global object so it is RECOMMENDED that applications check and use this field if the Captures `geolocation` field is not present.",
119 |           "type": "object",
120 |           "required": [
121 |             "type",
122 |             "coordinates"
123 |           ],
124 |           "properties": {
125 |             "type": {
126 |               "type": "string",
127 |               "enum": [
128 |                 "Point"
129 |               ]
130 |             },
131 |             "coordinates": {
132 |               "type": "array",
133 |               "minItems": 2,
134 |               "maxItems": 3,
135 |               "items": {
136 |                 "type": "number"
137 |               }
138 |             },
139 |             "bbox": {
140 |               "type": "array",
141 |               "minItems": 4,
142 |               "items": {
143 |                 "type": "number"
144 |               }
145 |             }
146 |           }
147 |         },
148 |         "core:extensions": {
149 |           "description": "The `core:extensions` field in the Global Object is an array of extension objects that describe SigMF extensions. Extension Objects MUST contain the three key/value pairs defined below, and MUST NOT contain any other fields. \\rowcolors{1}{}{lightblue}\\begin{center}\\begin{tabular}{lllp{3.8in}} \\toprule \\textbf{Name} & \\textbf{Required} & \\textbf{Type} & \\textbf{Description} \\\\ \\midrule name & true & string & The name of the SigMF extension namespace. \\\\ version & true & string & The version of the extension namespace specification used. \\\\ optional & true & boolean & If this field is `false`, then the application MUST support this extension in order to parse the Recording; if the application does not support this extension, it SHOULD report an error. \\\\ \\bottomrule \\end{tabular} \\end{center} \\\\ In the example below, `extension-01` is optional, so the application may ignore it if it does not support `extension-01`. But `extension-02` is not optional, so the application must support `extension-02` in order to parse the Recording.  \\begin{verbatim}\"global\": {\n     ...\n     \"core:extensions\" : [\n         {\n         \"name\": \"extension-01\",\n         \"version\": \"0.0.5\",\n         \"optional\": true\n         },\n         {\n         \"name\": \"extension-02\",\n         \"version\": \"1.2.3\",\n         \"optional\": false\n         }\n     ]\n     ...\n }\\end{verbatim}",
150 |           "type": "array",
151 |           "default": [],
152 |           "additionalItems": false,
153 |           "items": {
154 |             "type": "object",
155 |             "required": [
156 |               "name",
157 |               "version",
158 |               "optional"
159 |             ],
160 |             "properties": {
161 |               "name": {
162 |                 "description": "The name of the SigMF extension namespace.",
163 |                 "type": "string"
164 |               },
165 |               "version": {
166 |                 "description": "The version of the extension namespace specification used.",
167 |                 "examples": [
168 |                   "1.2.0"
169 |                 ],
170 |                 "type": "string"
171 |               },
172 |               "optional": {
173 |                 "description": "If this field is `true`, the extension is REQUIRED to parse this Recording.",
174 |                 "type": "boolean"
175 |               }
176 |             },
177 |             "additionalProperties": false
178 |           }
179 |         }
180 |       },
181 |       "additionalProperties": true
182 |     },
183 |     "captures": {
184 |       "description": "The `captures` Object is an array of capture segment objects that describe the parameters of the signal capture. It MUST be sorted by the value of each capture segment's `core:sample_start` key, ascending.  Capture Segment Objects are composed of key/value pairs, and each Segment describes a chunk of samples that can be mapped into memory for processing. Each Segment MUST contain a `core:sample_start` key/value pair, which indicates the sample index relative to the Dataset where this Segment's metadata applies. The fields that are described within a Capture Segment are scoped to that Segment only and need to be explicitly declared again if they are valid in subsequent Segments. While it is recommended there be at least one segment defined, if there are no items in the captures array it is implied that a single capture exists with `core:sample_start` equal to zero (no other metadata is implied), i.e., `\"captures\": []` implies `\"captures\": [{\"core:sample_start\": 0}]`.",
185 |       "default": [],
186 |       "type": "array",
187 |       "additionalItems": false,
188 |       "items": {
189 |         "type": "object",
190 |         "required": [
191 |           "core:sample_start"
192 |         ],
193 |         "properties": {
194 |           "core:sample_start": {
195 |             "default": 0,
196 |             "description": "Index of first sample of this chunk. This field specifies the sample index where this Segment takes effect relative to the recorded Dataset file. If the Dataset is a SigMF Dataset file, this  field can be immediately mapped to physical disk location since conforming Datasets only contain sample data. ",
197 |             "minimum": 0,
198 |             "maximum": 9223372036854775807,
199 |             "type": "integer"
200 |           },
201 |           "core:datetime": {
202 |             "description": "An ISO-8601 string indicating the timestamp of the sample index specified by sample_start. This key/value pair MUST be an ISO-8601 string, as defined by [RFC 3339](https://www.ietf.org/rfc/rfc3339.txt), where the only allowed `time-offset` is `Z`, indicating the UTC/Zulu timezone. The ABNF description is:  \\begin{verbatim} date-fullyear   = 4DIGIT \n date-month      = 2DIGIT  ; 01-12 \n date-mday       = 2DIGIT  ; 01-28, 01-29, 01-30, 01-31 based on month/year \n\n time-hour       = 2DIGIT  ; 00-23 \n time-minute     = 2DIGIT  ; 00-59 \n time-second     = 2DIGIT  ; 00-58, 00-59, 00-60 based on leap second rules \n\n time-secfrac    = \".\" 1*DIGIT \n time-offset     = \"Z\" \n\n partial-time    = time-hour \":\" time-minute \":\" time-second [time-secfrac] \n full-date       = date-fullyear \"-\" date-month \"-\" date-mday \n full-time       = partial-time time-offset \n\n date-time       = full-date \"T\" full-time \\end{verbatim}  Thus, timestamps take the form of `YYYY-MM-DDTHH:MM:SS.SSSZ`, where any number of digits for fractional seconds is permitted. ",
203 |             "examples": [
204 |               "1955-11-05T14:00:00.000Z"
205 |             ],
206 |             "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?",
207 |             "type": "string"
208 |           },
209 |           "core:frequency": {
210 |             "description": "The center frequency of the signal in Hz.",
211 |             "type": "number",
212 |             "minimum": -1000000000000,
213 |             "maximum": 1000000000000,
214 |             "examples": [
215 |               915000000,
216 |               2400000000
217 |             ]
218 |           },
219 |           "core:global_index": {
220 |             "description": "The index of the sample referenced by `sample_start` relative to an original sample stream. The entirety of which may not have been captured in a recorded Dataset. If omitted, this value SHOULD be treated as equal to `sample_start`.  For example, some hardware devices are capable of 'counting' samples at the point of data conversion. This sample count is commonly used to indicate  a discontinuity in the datastream between the hardware device and processing.  For example, in the below Captures array, there are two Segments describing samples in a SigMF Dataset file. The first Segment begins at the start of the Dataset file. The second segment begins at sample index 500 relative to the recorded samples (and since this is a conforming SigMF Dataset, is physically located on-disk at location `sample_start * sizeof(sample)`), but the `global_index` reports this was actually sample number 1000 in the original datastream, indicating that 500 samples were lost before they could be recorded.  \\begin{verbatim} ...\n \"captures\": [ \n    { \n        \"core:sample_start\": 0, \n        \"core:global_index\": 0 \n    }, \n    { \n        \"core:sample_start\": 500, \n        \"core:global_index\": 1000 \n    }\n ],\n ... \\end{verbatim} ",
221 |             "type": "integer",
222 |             "minimum": 0,
223 |             "maximum": 9223372036854775807
224 |           },
225 |           "core:header_bytes": {
226 |             "description": "The number of bytes preceding a chunk of samples that are not sample data, used for NCDs. This field specifies a number of bytes that are not valid sample data that  are physically located at the start of where the chunk of samples referenced by this Segment would otherwise begin. If omitted, this value SHOULD be treated as equal zero. If included, the Dataset is by definition a Non-Conforming Dataset.  For example, the below Metadata for a Non-Conforming Dataset contains two segments describing chunks of 8-bit complex samples (2 bytes per sample) recorded to disk with 4-byte headers that are not valid for processing. Thus, to map these two chunks of samples into memory, a reader application would map the `500 samples` (equal to `1000 bytes`) in the first Segment, starting at a file offset of `4 bytes`, and then the remainder of the file through EOF starting at a file offset of `1008 bytes` (equal to the size  of the previous Segment of samples plus two headers).  \\begin{samepage}\\begin{verbatim} { \n \"global\": { \n    \"core:datatype\": \"cu8\", \n    \"core:version\": \"1.2.0\", \n    \"core:dataset\": \"non-conforming-dataset-01.dat\" \n }, \n \"captures\": [ \n    { \n        \"core:sample_start\": 0, \n        \"core:header_bytes\": 4, \n    }, \n    { \n        \"core:sample_start\": 500, \n        \"core:header_bytes\": 4, \n    }\n ],\n \"annotations\": []\n } \\end{verbatim}\\end{samepage}",
227 |             "type": "integer",
228 |             "minimum": 0,
229 |             "maximum": 9223372036854775807
230 |           },
231 |           "core:geolocation": {
232 |             "description": "The location of the recording system at the start of this Captures segment, as a single RFC 7946 GeoJSON `point` Object. For moving emitters, this provides a rudimentary means to manage location through different captures segments. While `core:geolocation` is also allowed in the Global object for backwards compatibility reasons, adding it to Captures is preferred.  Per the GeoJSON specification, the point coordinates use the WGS84 coordinate reference system and are `longitude`, `latitude` (REQUIRED, in decimal degrees), and `altitude` (OPTIONAL, in meters above the WGS84 ellipsoid) - in that order. An example including the altitude field is shown below:  \\begin{verbatim} \"captures\": {\n   ...\n   \"core:geolocation\": {\n     \"type\": \"Point\",\n     \"coordinates\": [-107.6183682, 34.0787916, 2120.0]\n   }\n   ...\n } \\end{verbatim}  GeoJSON permits the use of *Foreign Members* in GeoJSON documents per RFC 7946 Section 6.1. Because the SigMF requirement for the `geolocation` field is to be a valid GeoJSON `point` Object, users MAY include *Foreign Member* fields here for user-defined purposes (position valid indication, GNSS SV counts, dillution of precision, accuracy, etc). It is strongly RECOMMENDED that all fields be documented in a SigMF Extension document.  *Note:* Objects named `geometry` or `properties` are prohibited Foreign Members as specified in RFC 7946 Section 7.1.",
233 |             "type": "object",
234 |             "required": [
235 |               "type",
236 |               "coordinates"
237 |             ],
238 |             "properties": {
239 |               "type": {
240 |                 "type": "string",
241 |                 "enum": [
242 |                   "Point"
243 |                 ]
244 |               },
245 |               "coordinates": {
246 |                 "type": "array",
247 |                 "minItems": 2,
248 |                 "maxItems": 3,
249 |                 "items": {
250 |                   "type": "number"
251 |                 }
252 |               },
253 |               "bbox": {
254 |                 "type": "array",
255 |                 "minItems": 4,
256 |                 "items": {
257 |                   "type": "number"
258 |                 }
259 |               }
260 |             }
261 |           }
262 |         },
263 |         "additionalProperties": true
264 |       }
265 |     },
266 |     "annotations": {
267 |       "default": [],
268 |       "description": "The `annotations` Object is an array of annotation segment objects that describe anything regarding the signal data not part of the Captures and Global objects. It MUST be sorted by the value of each Annotation Segment's `core:sample_start` key, ascending.  Annotation segment Objects contain key/value pairs and MUST contain a `core:sample_start` key/value pair, which indicates the first index  at which the rest of the Segment's key/value pairs apply. There is no limit to the number of annotations that can apply to the same group of samples. If two annotations have the same `sample_start`, there is no defined ordering between them. If `sample_count` is not provided, it SHOULD be assumed that the annotation applies from `sample_start` through the end of the corresponding capture, in all other cases `sample_count` MUST be provided. ",
269 |       "type": "array",
270 |       "additionalItems": true,
271 |       "items": {
272 |         "type": "object",
273 |         "title": "Annotation",
274 |         "required": [
275 |           "core:sample_start"
276 |         ],
277 |         "properties": {
278 |           "core:sample_start": {
279 |             "default": 0,
280 |             "description": "The sample index at which this Segment takes effect.",
281 |             "minimum": 0,
282 |             "maximum": 9223372036854775807,
283 |             "type": "integer"
284 |           },
285 |           "core:sample_count": {
286 |             "description": "The number of samples that this Segment applies to.",
287 |             "type": "integer",
288 |             "minimum": 0,
289 |             "maximum": 9223372036854775807
290 |           },
291 |           "core:freq_lower_edge": {
292 |             "description": "The frequency (Hz) of the lower edge of the feature described by this annotation. The `freq_lower_edge` and `freq_upper_edge` fields SHOULD be at RF if the feature is at a known RF frequency. If there is no known center frequency (as defined by the `frequency` field in the relevant Capture Segment Object), or the center frequency is at baseband, the `freq_lower_edge` and `freq_upper_edge` fields SHOULD be relative to baseband. It is REQUIRED that both `freq_lower_edge` and `freq_upper_edge` be provided, or neither; the use of just one field is not allowed. ",
293 |             "type": "number",
294 |             "minimum": -1000000000000,
295 |             "maximum": 1000000000000
296 |           },
297 |           "core:freq_upper_edge": {
298 |             "description": "The frequency (Hz) of the upper edge of the feature described by this annotation.",
299 |             "type": "number",
300 |             "minimum": -1000000000000,
301 |             "maximum": 1000000000000
302 |           },
303 |           "core:label": {
304 |             "description": "A short form human/machine-readable label for the annotation. The `label` field MAY be used for any purpose, but it is RECOMMENDED that it be limited to no more than 20 characters as a common use is a short form GUI indicator. Similarly, it is RECOMMENDED that any user interface making use of this field be capable of displaying up to 20 characters. ",
305 |             "type": "string"
306 |           },
307 |           "core:comment": {
308 |             "description": "A human-readable comment, intended to be used for longer comments (it is recommended to use `label` for shorter text).",
309 |             "type": "string"
310 |           },
311 |           "core:generator": {
312 |             "description": "Human-readable name of the entity that created this annotation.",
313 |             "type": "string"
314 |           },
315 |           "core:uuid": {
316 |             "description": "RFC-4122 unique identifier.",
317 |             "format": "uuid",
318 |             "type": "string"
319 |           }
320 |         },
321 |         "additionalProperties": true
322 |       }
323 |     }
324 |   },
325 |   "additionalProperties": false
326 | }
327 | 


--------------------------------------------------------------------------------
/sigmf/schema.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | 
 7 | """Schema IO"""
 8 | 
 9 | import json
10 | from pathlib import Path
11 | 
12 | from . import __version__ as toolversion
13 | 
14 | SCHEMA_META = "schema-meta.json"
15 | SCHEMA_COLLECTION = "schema-collection.json"
16 | 
17 | 
18 | def get_schema(version=toolversion, schema_file=SCHEMA_META):
19 |     """
20 |     Load JSON Schema to for either a `sigmf-meta` or `sigmf-collection`.
21 | 
22 |     TODO: In the future load specific schema versions.
23 |     """
24 |     schema_dir = Path(__file__).parent
25 |     with open(schema_dir / schema_file, "rb") as handle:
26 |         schema = json.load(handle)
27 |     return schema
28 | 


--------------------------------------------------------------------------------
/sigmf/sigmf_hash.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | 
 7 | """Hashing Functions"""
 8 | 
 9 | import hashlib
10 | from pathlib import Path
11 | 
12 | 
13 | def calculate_sha512(filename=None, fileobj=None, offset=None, size=None):
14 |     """
15 |     Return sha512 of file or fileobj.
16 |     """
17 |     the_hash = hashlib.sha512()
18 |     bytes_to_hash = size
19 |     bytes_read = 0
20 | 
21 |     if filename is not None:
22 |         fileobj = open(filename, "rb")
23 |     if size is None:
24 |         bytes_to_hash = Path(filename).stat().st_size
25 |     else:
26 |         fileobj.seek(offset)
27 | 
28 |     while bytes_read < bytes_to_hash:
29 |         buff = fileobj.read(min(4096, (bytes_to_hash - bytes_read)))
30 |         the_hash.update(buff)
31 |         bytes_read += len(buff)
32 | 
33 |     if filename is not None:
34 |         fileobj.close()
35 | 
36 |     return the_hash.hexdigest()
37 | 


--------------------------------------------------------------------------------
/sigmf/sigmffile.py:
--------------------------------------------------------------------------------
   1 | # Copyright: Multiple Authors
   2 | #
   3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
   4 | #
   5 | # SPDX-License-Identifier: LGPL-3.0-or-later
   6 | 
   7 | """SigMFFile Object"""
   8 | 
   9 | import codecs
  10 | import io
  11 | import json
  12 | import warnings
  13 | from collections import OrderedDict
  14 | from pathlib import Path
  15 | 
  16 | import numpy as np
  17 | 
  18 | from . import __specification__, __version__, schema, sigmf_hash, validate
  19 | from .archive import (
  20 |     SIGMF_ARCHIVE_EXT,
  21 |     SIGMF_COLLECTION_EXT,
  22 |     SIGMF_DATASET_EXT,
  23 |     SIGMF_METADATA_EXT,
  24 |     SigMFArchive,
  25 | )
  26 | from .error import SigMFAccessError, SigMFError, SigMFFileError
  27 | from .utils import dict_merge
  28 | 
  29 | 
  30 | class SigMFMetafile:
  31 |     VALID_KEYS = {}
  32 | 
  33 |     def __init__(self):
  34 |         self.version = None
  35 |         self.schema = None
  36 |         self._metadata = None
  37 |         self.shape = None
  38 | 
  39 |     def __str__(self):
  40 |         return self.dumps()
  41 | 
  42 |     def __repr__(self):
  43 |         return f"SigMFFile({self})"
  44 | 
  45 |     def __iter__(self):
  46 |         """special method to iterate through samples"""
  47 |         self.iter_position = 0
  48 |         return self
  49 | 
  50 |     def ordered_metadata(self):
  51 |         """
  52 |         Get a nicer representation of _metadata. Will sort keys, but put the
  53 |         top-level fields 'global', 'captures', 'annotations' in front.
  54 | 
  55 |         Returns
  56 |         -------
  57 |         ordered_meta : OrderedDict
  58 |             Cleaner representation of _metadata with top-level keys correctly
  59 |             ordered and the rest of the keys sorted.
  60 |         """
  61 |         ordered_meta = OrderedDict()
  62 |         for top_key in self.VALID_KEYS.keys():
  63 |             assert top_key in self._metadata
  64 |             ordered_meta[top_key] = json.loads(json.dumps(self._metadata[top_key], sort_keys=True))
  65 |         # If there are other top-level keys, they go later
  66 |         # TODO: sort potential `other` top-level keys
  67 |         for oth_key, oth_val in self._metadata.items():
  68 |             if oth_key not in self.VALID_KEYS.keys():
  69 |                 ordered_meta[oth_key] = json.loads(json.dumps(oth_val, sort_keys=True))
  70 |         return ordered_meta
  71 | 
  72 |     def dump(self, filep, pretty=True):
  73 |         """
  74 |         Write metadata to a file.
  75 | 
  76 |         Parameters
  77 |         ----------
  78 |         filep : object
  79 |             File pointer or something that json.dump() can handle.
  80 |         pretty : bool, default True
  81 |             When True will write more human-readable output, otherwise will be flat JSON.
  82 |         """
  83 |         json.dump(
  84 |             self.ordered_metadata(),
  85 |             filep,
  86 |             indent=4 if pretty else None,
  87 |             separators=(",", ": ") if pretty else None,
  88 |         )
  89 | 
  90 |     def dumps(self, pretty=True):
  91 |         """
  92 |         Get a string representation of the metadata.
  93 | 
  94 |         Parameters
  95 |         ----------
  96 |         pretty : bool, default True
  97 |             When True will write more human-readable output, otherwise will be flat JSON.
  98 | 
  99 |         Returns
 100 |         -------
 101 |         string
 102 |             String representation of the metadata using json formatter.
 103 |         """
 104 |         return json.dumps(
 105 |             self.ordered_metadata(),
 106 |             indent=4 if pretty else None,
 107 |             separators=(",", ": ") if pretty else None,
 108 |         )
 109 | 
 110 | 
 111 | class SigMFFile(SigMFMetafile):
 112 |     START_INDEX_KEY = "core:sample_start"
 113 |     LENGTH_INDEX_KEY = "core:sample_count"
 114 |     GLOBAL_INDEX_KEY = "core:global_index"
 115 |     START_OFFSET_KEY = "core:offset"
 116 |     NUM_CHANNELS_KEY = "core:num_channels"
 117 |     HASH_KEY = "core:sha512"
 118 |     VERSION_KEY = "core:version"
 119 |     DATATYPE_KEY = "core:datatype"
 120 |     FREQUENCY_KEY = "core:frequency"
 121 |     HEADER_BYTES_KEY = "core:header_bytes"
 122 |     FLO_KEY = "core:freq_lower_edge"
 123 |     FHI_KEY = "core:freq_upper_edge"
 124 |     SAMPLE_RATE_KEY = "core:sample_rate"
 125 |     COMMENT_KEY = "core:comment"
 126 |     DESCRIPTION_KEY = "core:description"
 127 |     AUTHOR_KEY = "core:author"
 128 |     META_DOI_KEY = "core:meta-doi"
 129 |     DATA_DOI_KEY = "core:data-doi"
 130 |     GENERATOR_KEY = "core:generator"
 131 |     LABEL_KEY = "core:label"
 132 |     RECORDER_KEY = "core:recorder"
 133 |     LICENSE_KEY = "core:license"
 134 |     HW_KEY = "core:hw"
 135 |     DATASET_KEY = "core:dataset"
 136 |     TRAILING_BYTES_KEY = "core:trailing_bytes"
 137 |     METADATA_ONLY_KEY = "core:metadata_only"
 138 |     EXTENSIONS_KEY = "core:extensions"
 139 |     DATETIME_KEY = "core:datetime"
 140 |     LAT_KEY = "core:latitude"
 141 |     LON_KEY = "core:longitude"
 142 |     UUID_KEY = "core:uuid"
 143 |     GEOLOCATION_KEY = "core:geolocation"
 144 |     COLLECTION_KEY = "core:collection"
 145 |     GLOBAL_KEY = "global"
 146 |     CAPTURE_KEY = "captures"
 147 |     ANNOTATION_KEY = "annotations"
 148 |     VALID_GLOBAL_KEYS = [
 149 |         AUTHOR_KEY, COLLECTION_KEY, DATASET_KEY, DATATYPE_KEY, DATA_DOI_KEY, DESCRIPTION_KEY, EXTENSIONS_KEY,
 150 |         GEOLOCATION_KEY, HASH_KEY, HW_KEY, LICENSE_KEY, META_DOI_KEY, METADATA_ONLY_KEY, NUM_CHANNELS_KEY, RECORDER_KEY,
 151 |         SAMPLE_RATE_KEY, START_OFFSET_KEY, TRAILING_BYTES_KEY, VERSION_KEY
 152 |     ]
 153 |     VALID_CAPTURE_KEYS = [DATETIME_KEY, FREQUENCY_KEY, HEADER_BYTES_KEY, GLOBAL_INDEX_KEY, START_INDEX_KEY]
 154 |     VALID_ANNOTATION_KEYS = [
 155 |         COMMENT_KEY, FHI_KEY, FLO_KEY, GENERATOR_KEY, LABEL_KEY, LAT_KEY, LENGTH_INDEX_KEY, LON_KEY, START_INDEX_KEY,
 156 |         UUID_KEY
 157 |     ]
 158 |     VALID_KEYS = {GLOBAL_KEY: VALID_GLOBAL_KEYS, CAPTURE_KEY: VALID_CAPTURE_KEYS, ANNOTATION_KEY: VALID_ANNOTATION_KEYS}
 159 | 
 160 |     def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True):
 161 |         """
 162 |         API for SigMF I/O
 163 | 
 164 |         Parameters
 165 |         ----------
 166 |         metadata: str or dict, optional
 167 |             Metadata for associated dataset.
 168 |         data_file: str, optional
 169 |             Path to associated dataset.
 170 |         global_info: dict, optional
 171 |             Set global field shortcut if creating new object.
 172 |         skip_checksum: bool, default False
 173 |             When True will skip calculating hash on data_file (if present) to check against metadata.
 174 |         map_readonly: bool, default True
 175 |             Indicates whether assignments on the numpy.memmap are allowed.
 176 |         """
 177 |         super().__init__()
 178 |         self.data_file = None
 179 |         self.sample_count = 0
 180 |         self._memmap = None
 181 |         self.is_complex_data = False  # numpy.iscomplexobj(self._memmap) is not adequate for fixed-point complex case
 182 | 
 183 |         self.set_metadata(metadata)
 184 |         if global_info is not None:
 185 |             self.set_global_info(global_info)
 186 |         if data_file is not None:
 187 |             self.set_data_file(data_file, skip_checksum=skip_checksum, map_readonly=map_readonly)
 188 | 
 189 |     def __len__(self):
 190 |         return self._memmap.shape[0]
 191 | 
 192 |     def __eq__(self, other):
 193 |         """
 194 |         Define equality between two `SigMFFile`s.
 195 | 
 196 |         Rely on the checksum value in the metadata to decide whether `data_file` is the same since the path of the
 197 |         dataset is immaterial to equivalency.
 198 |         """
 199 |         if isinstance(other, SigMFFile):
 200 |             return self._metadata == other._metadata
 201 |         return False
 202 | 
 203 |     def __next__(self):
 204 |         """get next batch of samples"""
 205 |         if self.iter_position < len(self):
 206 |             # normal batch
 207 |             value = self.read_samples(start_index=self.iter_position, count=1)
 208 |             self.iter_position += 1
 209 |             return value
 210 | 
 211 |         else:
 212 |             # no more data
 213 |             raise StopIteration
 214 | 
 215 |     def __getitem__(self, sli):
 216 |         mem = self._memmap[sli]  # matches behavior of numpy.ndarray.__getitem__()
 217 | 
 218 |         if self._return_type is None:
 219 |             return mem
 220 | 
 221 |         # is_fixed_point and is_complex
 222 |         if self._memmap.ndim == 2:
 223 |             # num_channels == 1
 224 |             ray = mem[:, 0].astype(self._return_type) + 1.0j * mem[:, 1].astype(self._return_type)
 225 |         elif self._memmap.ndim == 3:
 226 |             # num_channels > 1
 227 |             ray = mem[:, :, 0].astype(self._return_type) + 1.0j * mem[:, :, 1].astype(self._return_type)
 228 |         else:
 229 |             raise ValueError("unhandled ndim in SigMFFile.__getitem__(); this shouldn't happen")
 230 |         return ray[0] if isinstance(sli, int) else ray  # return element instead of 1-element array
 231 | 
 232 |     def _get_start_offset(self):
 233 |         """
 234 |         Return the offset of the first sample.
 235 |         """
 236 |         return self.get_global_field(self.START_OFFSET_KEY, 0)
 237 | 
 238 |     def get_num_channels(self):
 239 |         """Returns integer number of channels if present, otherwise 1"""
 240 |         return self.get_global_field(self.NUM_CHANNELS_KEY, 1)
 241 | 
 242 |     def _is_conforming_dataset(self):
 243 |         """
 244 |         The dataset is non-conforming if the datafile contains non-sample bytes
 245 |         which means global trailing_bytes field is zero or not set, all captures
 246 |         `header_bytes` fields are zero or not set. Because we do not necessarily
 247 |         know the filename no means of verifying the meta/data filename roots
 248 |         match, but this will also check that a data file exists.
 249 | 
 250 |         Returns
 251 |         -------
 252 |         `True` if the dataset is conforming to SigMF, `False` otherwise
 253 |         """
 254 |         if self.get_global_field(self.TRAILING_BYTES_KEY, 0):
 255 |             return False
 256 |         for capture in self.get_captures():
 257 |             # check for any non-zero `header_bytes` fields in captures segments
 258 |             if capture.get(self.HEADER_BYTES_KEY, 0):
 259 |                 return False
 260 |         if self.data_file is not None and not self.data_file.is_file:
 261 |             return False
 262 |         # if we get here, the file exists and is conforming
 263 |         return True
 264 | 
 265 |     def get_schema(self):
 266 |         """
 267 |         Return a schema object valid for the current metadata
 268 |         """
 269 |         current_metadata_version = self.get_global_info().get(self.VERSION_KEY)
 270 |         if self.version != current_metadata_version or self.schema is None:
 271 |             self.version = current_metadata_version
 272 |             self.schema = schema.get_schema(self.version)
 273 |         assert isinstance(self.schema, dict)
 274 |         return self.schema
 275 | 
 276 |     def set_metadata(self, metadata):
 277 |         """
 278 |         Read provided metadata as either None (empty), string, bytes, or dictionary.
 279 |         """
 280 |         if metadata is None:
 281 |             # Create empty
 282 |             self._metadata = {self.GLOBAL_KEY: {}, self.CAPTURE_KEY: [], self.ANNOTATION_KEY: []}
 283 |         elif isinstance(metadata, dict):
 284 |             self._metadata = metadata
 285 |         elif isinstance(metadata, (str, bytes)):
 286 |             self._metadata = json.loads(metadata)
 287 |         else:
 288 |             raise SigMFError("Unable to interpret provided metadata.")
 289 | 
 290 |         # if num_channels missing, default to 1
 291 |         if self.get_global_field(self.NUM_CHANNELS_KEY) is None:
 292 |             self.set_global_field(self.NUM_CHANNELS_KEY, 1)
 293 | 
 294 |         # set version to current implementation
 295 |         self.set_global_field(self.VERSION_KEY, __specification__)
 296 | 
 297 |     def set_global_info(self, new_global):
 298 |         """
 299 |         Recursively override existing global metadata with new global metadata.
 300 |         """
 301 |         self._metadata[self.GLOBAL_KEY] = dict_merge(self._metadata[self.GLOBAL_KEY], new_global)
 302 | 
 303 |     def get_global_info(self):
 304 |         """
 305 |         Returns a dictionary with all the global info.
 306 |         """
 307 |         try:
 308 |             return self._metadata.get(self.GLOBAL_KEY, {})
 309 |         except AttributeError:
 310 |             return {}
 311 | 
 312 |     def set_global_field(self, key, value):
 313 |         """
 314 |         Inserts a value into the global field.
 315 |         """
 316 |         self._metadata[self.GLOBAL_KEY][key] = value
 317 | 
 318 |     def get_global_field(self, key, default=None):
 319 |         """
 320 |         Return a field from the global info, or default if the field is not set.
 321 |         """
 322 |         return self._metadata[self.GLOBAL_KEY].get(key, default)
 323 | 
 324 |     def add_capture(self, start_index, metadata=None):
 325 |         """
 326 |         Insert capture info for sample starting at start_index.
 327 |         If there is already capture info for this index, metadata will be merged
 328 |         with the existing metadata, overwriting keys if they were previously set.
 329 |         """
 330 |         assert start_index >= self._get_start_offset()
 331 |         capture_list = self._metadata[self.CAPTURE_KEY]
 332 |         new_capture = metadata or {}
 333 |         new_capture[self.START_INDEX_KEY] = start_index
 334 |         # merge if capture exists
 335 |         merged = False
 336 |         for existing_capture in self._metadata[self.CAPTURE_KEY]:
 337 |             if existing_capture[self.START_INDEX_KEY] == start_index:
 338 |                 existing_capture = dict_merge(existing_capture, new_capture)
 339 |                 merged = True
 340 |         if not merged:
 341 |             capture_list += [new_capture]
 342 |         # sort captures by start_index
 343 |         self._metadata[self.CAPTURE_KEY] = sorted(
 344 |             capture_list,
 345 |             key=lambda item: item[self.START_INDEX_KEY],
 346 |         )
 347 | 
 348 |     def get_captures(self):
 349 |         """
 350 |         Returns a list of dictionaries representing all captures.
 351 |         """
 352 |         return self._metadata.get(self.CAPTURE_KEY, [])
 353 | 
 354 |     def get_capture_info(self, index):
 355 |         """
 356 |         Returns a dictionary containing all the capture information at sample
 357 |         'index'.
 358 |         """
 359 |         assert index >= self._get_start_offset()
 360 |         captures = self._metadata.get(self.CAPTURE_KEY, [])
 361 |         assert len(captures) > 0
 362 |         cap_info = captures[0]
 363 |         for capture in captures:
 364 |             if capture[self.START_INDEX_KEY] > index:
 365 |                 break
 366 |             cap_info = capture
 367 |         return cap_info
 368 | 
 369 |     def get_capture_start(self, index):
 370 |         """
 371 |         Returns a the start sample index of a given capture, will raise
 372 |         SigMFAccessError if this field is missing.
 373 |         """
 374 |         start = self.get_captures()[index].get(self.START_INDEX_KEY)
 375 |         if start is None:
 376 |             raise SigMFAccessError("Capture {} does not have required {} key".format(index, self.START_INDEX_KEY))
 377 |         return start
 378 | 
 379 |     def get_capture_byte_boundarys(self, index):
 380 |         """
 381 |         Returns a tuple of the file byte range in a dataset of a given SigMF
 382 |         capture of the form [start, stop). This function works on either
 383 |         compliant or noncompliant SigMF Recordings.
 384 |         """
 385 |         if index >= len(self.get_captures()):
 386 |             raise SigMFAccessError(
 387 |                 "Invalid captures index {} (only {} captures in Recording)".format(index, len(self.get_captures()))
 388 |             )
 389 | 
 390 |         start_byte = 0
 391 |         prev_start_sample = 0
 392 |         for ii, capture in enumerate(self.get_captures()):
 393 |             start_byte += capture.get(self.HEADER_BYTES_KEY, 0)
 394 |             start_byte += (
 395 |                 (self.get_capture_start(ii) - prev_start_sample) * self.get_sample_size() * self.get_num_channels()
 396 |             )
 397 |             prev_start_sample = self.get_capture_start(ii)
 398 |             if ii >= index:
 399 |                 break
 400 | 
 401 |         end_byte = start_byte
 402 |         if index == len(self.get_captures()) - 1:  # last captures...data is the rest of the file
 403 |             end_byte = self.data_file.stat().st_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0)
 404 |         else:
 405 |             end_byte += (
 406 |                 (self.get_capture_start(index + 1) - self.get_capture_start(index))
 407 |                 * self.get_sample_size()
 408 |                 * self.get_num_channels()
 409 |             )
 410 |         return (start_byte, end_byte)
 411 | 
 412 |     def add_annotation(self, start_index, length=None, metadata=None):
 413 |         """
 414 |         Insert annotation at start_index with length (if != None).
 415 |         """
 416 |         assert start_index >= self._get_start_offset()
 417 | 
 418 |         new_annot = metadata or {}
 419 |         new_annot[self.START_INDEX_KEY] = start_index
 420 |         if length is not None:
 421 |             assert length >= 1
 422 |             new_annot[self.LENGTH_INDEX_KEY] = length
 423 | 
 424 |         self._metadata[self.ANNOTATION_KEY] += [new_annot]
 425 |         # sort annotations by start_index
 426 |         self._metadata[self.ANNOTATION_KEY] = sorted(
 427 |             self._metadata[self.ANNOTATION_KEY],
 428 |             key=lambda item: item[self.START_INDEX_KEY],
 429 |         )
 430 | 
 431 |     def get_annotations(self, index=None):
 432 |         """
 433 |         Get relevant annotations from metadata.
 434 | 
 435 |         Parameters
 436 |         ----------
 437 |         index : int, default None
 438 |             If provided returns all annotations that include this sample index.
 439 |             When omitted returns all annotations.
 440 | 
 441 |         Returns
 442 |         -------
 443 |         list of dict
 444 |             Each dictionary contains one annotation for the sample at `index`.
 445 |         """
 446 |         annotations = self._metadata.get(self.ANNOTATION_KEY, [])
 447 |         if index is None:
 448 |             return annotations
 449 | 
 450 |         annotations_including_index = []
 451 |         for annotation in annotations:
 452 |             if index < annotation[self.START_INDEX_KEY]:
 453 |                 # index is before annotation starts -> skip
 454 |                 continue
 455 |             if self.LENGTH_INDEX_KEY in annotation:
 456 |                 # Annotation includes sample_count -> check end index
 457 |                 if index >= annotation[self.START_INDEX_KEY] + annotation[self.LENGTH_INDEX_KEY]:
 458 |                     # index is after annotation end -> skip
 459 |                     continue
 460 | 
 461 |             annotations_including_index.append(annotation)
 462 |         return annotations_including_index
 463 | 
 464 |     def get_sample_size(self):
 465 |         """
 466 |         Determines the size of a sample, in bytes, from the datatype of this set.
 467 |         For complex data, a 'sample' includes both the real and imaginary part.
 468 |         """
 469 |         return dtype_info(self.get_global_field(self.DATATYPE_KEY))["sample_size"]
 470 | 
 471 |     def _count_samples(self):
 472 |         """
 473 |         Count, set, and return the total number of samples in the data file.
 474 |         If there is no data file but there are annotations, use the sample_count
 475 |         from the annotation with the highest end index. If there are no annotations,
 476 |         use 0.
 477 |         For complex data, a 'sample' includes both the real and imaginary part.
 478 |         """
 479 |         if self.data_file is None:
 480 |             sample_count = self._get_sample_count_from_annotations()
 481 |         else:
 482 |             header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()])
 483 |             file_size = self.data_file.stat().st_size if self.data_size_bytes is None else self.data_size_bytes
 484 |             file_data_size = file_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes  # bytes
 485 |             sample_size = self.get_sample_size()  # size of a sample in bytes
 486 |             num_channels = self.get_num_channels()
 487 |             sample_count = file_data_size // sample_size // num_channels
 488 |             if file_data_size % (sample_size * num_channels) != 0:
 489 |                 warnings.warn(
 490 |                     f"File `{self.data_file}` does not contain an integer number of samples across channels. "
 491 |                     "It may be invalid data."
 492 |                 )
 493 |             if self._get_sample_count_from_annotations() > sample_count:
 494 |                 warnings.warn(
 495 |                     f"File `{self.data_file}` ends before the final annotation in the corresponding SigMF metadata."
 496 |                 )
 497 |         self.sample_count = sample_count
 498 |         return sample_count
 499 | 
 500 |     def _get_sample_count_from_annotations(self):
 501 |         """
 502 |         Returns the number of samples based on annotation with highest end index.
 503 |         NOTE: Annotations are ordered by START_INDEX_KEY and not end index, so we
 504 |         need to go through all annotations
 505 |         """
 506 |         annon_sample_count = []
 507 |         for annon in self.get_annotations():
 508 |             if self.LENGTH_INDEX_KEY in annon:
 509 |                 # Annotation with sample_count
 510 |                 annon_sample_count.append(annon[self.START_INDEX_KEY] + annon[self.LENGTH_INDEX_KEY])
 511 |             else:
 512 |                 # Annotation without sample_count - sample count must be at least sample_start
 513 |                 annon_sample_count.append(annon[self.START_INDEX_KEY])
 514 | 
 515 |         if annon_sample_count:
 516 |             return max(annon_sample_count)
 517 |         else:
 518 |             return 0
 519 | 
 520 |     def calculate_hash(self):
 521 |         """
 522 |         Calculates the hash of the data file and adds it to the global section.
 523 |         Also returns a string representation of the hash.
 524 |         """
 525 |         old_hash = self.get_global_field(self.HASH_KEY)
 526 |         if self.data_file is not None:
 527 |             new_hash = sigmf_hash.calculate_sha512(
 528 |                 filename=self.data_file,
 529 |                 offset=self.data_offset,
 530 |                 size=self.data_size_bytes,
 531 |             )
 532 |         else:
 533 |             new_hash = sigmf_hash.calculate_sha512(
 534 |                 fileobj=self.data_buffer,
 535 |                 offset=self.data_offset,
 536 |                 size=self.data_size_bytes,
 537 |             )
 538 |         if old_hash is not None:
 539 |             if old_hash != new_hash:
 540 |                 raise SigMFFileError("Calculated file hash does not match associated metadata.")
 541 | 
 542 |         self.set_global_field(self.HASH_KEY, new_hash)
 543 |         return new_hash
 544 | 
 545 |     def set_data_file(
 546 |         self, data_file=None, data_buffer=None, skip_checksum=False, offset=0, size_bytes=None, map_readonly=True
 547 |     ):
 548 |         """
 549 |         Set the datafile path, then recalculate sample count. If not skipped,
 550 |         update the hash and return the hash string.
 551 |         """
 552 |         if self.get_global_field(self.DATATYPE_KEY) is None:
 553 |             raise SigMFFileError("Error setting data file, the DATATYPE_KEY must be set in the global metadata first.")
 554 | 
 555 |         self.data_file = Path(data_file) if data_file else None
 556 |         self.data_buffer = data_buffer
 557 |         self.data_offset = offset
 558 |         self.data_size_bytes = size_bytes
 559 |         self._count_samples()
 560 | 
 561 |         dtype = dtype_info(self.get_global_field(self.DATATYPE_KEY))
 562 |         self.is_complex_data = dtype["is_complex"]
 563 |         num_channels = self.get_num_channels()
 564 |         self.ndim = 1 if (num_channels < 2) else 2
 565 | 
 566 |         complex_int_separates = dtype["is_complex"] and dtype["is_fixedpoint"]
 567 |         mapped_dtype_size = dtype["component_size"] if complex_int_separates else dtype["sample_size"]
 568 |         mapped_length = None if size_bytes is None else size_bytes // mapped_dtype_size
 569 |         mapped_reshape = (-1,)  # we can't use -1 in mapped_length ...
 570 |         if num_channels > 1:
 571 |             mapped_reshape = mapped_reshape + (num_channels,)
 572 |         if complex_int_separates:
 573 |             # There is no corresponding numpy type, so we'll have to add another axis, with length of 2
 574 |             mapped_reshape = mapped_reshape + (2,)
 575 |         self._return_type = dtype["memmap_convert_type"]
 576 |         common_args = {"dtype": dtype["memmap_map_type"], "offset": offset}
 577 |         try:
 578 |             if self.data_file is not None:
 579 |                 open_mode = "r" if map_readonly else "r+"
 580 |                 memmap_shape = None if mapped_length is None else (mapped_length,)
 581 |                 raveled = np.memmap(self.data_file, mode=open_mode, shape=memmap_shape, **common_args)
 582 |             elif self.data_buffer is not None:
 583 |                 buffer_count = -1 if mapped_length is None else mapped_length
 584 |                 raveled = np.frombuffer(self.data_buffer.getbuffer(), count=buffer_count, **common_args)
 585 |             else:
 586 |                 raise SigMFFileError("In sigmffile.set_data_file(), either data_file or data_buffer must be not None")
 587 |         except SigMFFileError:  # TODO include likely exceptions here
 588 |             warnings.warn("Failed to create data array from memory-map-file or buffer!")
 589 |         else:
 590 |             self._memmap = raveled.reshape(mapped_reshape)
 591 |             self.shape = self._memmap.shape if (self._return_type is None) else self._memmap.shape[:-1]
 592 | 
 593 |         if self.data_file is not None:
 594 |             file_name = self.data_file.name
 595 |             ext = self.data_file.suffix
 596 |             if ext.lower() != SIGMF_DATASET_EXT:
 597 |                 self.set_global_field(SigMFFile.DATASET_KEY, file_name)
 598 | 
 599 |         if skip_checksum:
 600 |             return None
 601 |         return self.calculate_hash()
 602 | 
 603 |     def validate(self):
 604 |         """
 605 |         Check schema and throw error if issue.
 606 |         """
 607 |         version = self.get_global_field(self.VERSION_KEY)
 608 |         validate.validate(self._metadata, self.get_schema())
 609 | 
 610 |     def archive(self, name=None, fileobj=None):
 611 |         """Dump contents to SigMF archive format.
 612 | 
 613 |         `name` and `fileobj` are passed to SigMFArchive and are defined there.
 614 | 
 615 |         """
 616 |         archive = SigMFArchive(self, name, fileobj)
 617 |         return archive.path
 618 | 
 619 |     def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False):
 620 |         """
 621 |         Write metadata file or full archive containing metadata & dataset.
 622 | 
 623 |         Parameters
 624 |         ----------
 625 |         file_path : string
 626 |             Location to save.
 627 |         pretty : bool, default True
 628 |             When True will write more human-readable output, otherwise will be flat JSON.
 629 |         toarchive : bool, default False
 630 |             If True will write both dataset & metadata into SigMF archive format as a single `tar` file.
 631 |             If False will only write metadata to `sigmf-meta`.
 632 |         """
 633 |         if not skip_validate:
 634 |             self.validate()
 635 |         fns = get_sigmf_filenames(file_path)
 636 |         if toarchive:
 637 |             self.archive(fns["archive_fn"])
 638 |         else:
 639 |             with open(fns["meta_fn"], "w") as fp:
 640 |                 self.dump(fp, pretty=pretty)
 641 |                 fp.write("\n")  # text files should end in carriage return
 642 | 
 643 |     def read_samples_in_capture(self, index=0, autoscale=True):
 644 |         """
 645 |         Reads samples from the specified captures segment in its entirety.
 646 | 
 647 |         Parameters
 648 |         ----------
 649 |         index : int, default 0
 650 |             Captures segment to read samples from.
 651 |         autoscale : bool, default True
 652 |             If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0)
 653 | 
 654 |         Returns
 655 |         -------
 656 |         data : ndarray
 657 |             Samples are returned as an array of float or complex, with number of dimensions equal to NUM_CHANNELS_KEY.
 658 |         """
 659 |         cb = self.get_capture_byte_boundarys(index)
 660 |         if (cb[1] - cb[0]) % (self.get_sample_size() * self.get_num_channels()):
 661 |             warnings.warn(
 662 |                 f"Capture `{index}` in `{self.data_file}` does not contain "
 663 |                 "an integer number of samples across channels. It may be invalid."
 664 |             )
 665 | 
 666 |         return self._read_datafile(cb[0], (cb[1] - cb[0]) // self.get_sample_size(), autoscale, False)
 667 | 
 668 |     def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=False):
 669 |         """
 670 |         Reads the specified number of samples starting at the specified index from the associated data file.
 671 | 
 672 |         Parameters
 673 |         ----------
 674 |         start_index : int, default 0
 675 |             Starting sample index from which to read.
 676 |         count : int, default -1
 677 |             Number of samples to read. -1 will read whole file.
 678 |         autoscale : bool, default True
 679 |             If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0)
 680 |         raw_components : bool, default False
 681 |             If True read and return the sample components (individual I & Q for complex, samples for real)
 682 |             with no conversions or interleaved channels.
 683 | 
 684 |         Returns
 685 |         -------
 686 |         data : ndarray
 687 |             Samples are returned as an array of float or complex, with number of dimensions equal to NUM_CHANNELS_KEY.
 688 |         """
 689 |         if count == 0:
 690 |             raise IOError("Number of samples must be greater than zero, or -1 for all samples.")
 691 |         elif start_index + count > self.sample_count:
 692 |             raise IOError("Cannot read beyond EOF.")
 693 |         if self.data_file is None and not isinstance(self.data_buffer, io.BytesIO):
 694 |             if self.get_global_field(self.METADATA_ONLY_KEY, False):
 695 |                 # only if data_file is `None` allows access to dynamically generated datsets
 696 |                 raise SigMFFileError("Cannot read samples from a metadata only distribution.")
 697 |             else:
 698 |                 raise SigMFFileError("No signal data file has been associated with the metadata.")
 699 |         first_byte = start_index * self.get_sample_size() * self.get_num_channels()
 700 | 
 701 |         if not self._is_conforming_dataset():
 702 |             warnings.warn(f"Recording dataset appears non-compliant, resulting data may be erroneous")
 703 |         return self._read_datafile(first_byte, count * self.get_num_channels(), autoscale, False)
 704 | 
 705 |     def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
 706 |         """
 707 |         internal function for reading samples from datafile
 708 |         """
 709 |         dtype = dtype_info(self.get_global_field(self.DATATYPE_KEY))
 710 |         self.is_complex_data = dtype["is_complex"]
 711 |         is_fixedpoint_data = dtype["is_fixedpoint"]
 712 |         is_unsigned_data = dtype["is_unsigned"]
 713 |         data_type_in = dtype["sample_dtype"]
 714 |         component_type_in = dtype["component_dtype"]
 715 |         component_size = dtype["component_size"]
 716 | 
 717 |         data_type_out = np.dtype("f4") if not self.is_complex_data else np.dtype("f4, f4")
 718 |         num_channels = self.get_num_channels()
 719 | 
 720 |         if self.data_file is not None:
 721 |             fp = open(self.data_file, "rb")
 722 |             fp.seek(first_byte, 0)
 723 |             data = np.fromfile(fp, dtype=data_type_in, count=nitems)
 724 |         elif self.data_buffer is not None:
 725 |             data = np.frombuffer(self.data_buffer.getbuffer(), dtype=data_type_in, count=nitems)
 726 |         else:
 727 |             data = self._memmap
 728 | 
 729 |         if num_channels != 1:
 730 |             # return reshaped view for num_channels
 731 |             # first dimension will be double size if `is_complex_data`
 732 |             data = data.reshape(data.shape[0] // num_channels, num_channels)
 733 |         if not raw_components:
 734 |             data = data.astype(data_type_out)
 735 |             if autoscale and is_fixedpoint_data:
 736 |                 data = data.view(np.dtype("f4"))
 737 |                 if is_unsigned_data:
 738 |                     data -= 2 ** (component_size * 8 - 1)
 739 |                 data *= 2 ** -(component_size * 8 - 1)
 740 |                 data = data.view(data_type_out)
 741 |             if self.is_complex_data:
 742 |                 data = data.view(np.complex64)
 743 |         else:
 744 |             data = data.view(component_type_in)
 745 | 
 746 |         if self.data_file is not None:
 747 |             fp.close()
 748 | 
 749 |         return data
 750 | 
 751 | 
 752 | class SigMFCollection(SigMFMetafile):
 753 |     VERSION_KEY = "core:version"
 754 |     DESCRIPTION_KEY = "core:description"
 755 |     AUTHOR_KEY = "core:author"
 756 |     COLLECTION_DOI_KEY = "core:collection_doi"
 757 |     LICENSE_KEY = "core:license"
 758 |     EXTENSIONS_KEY = "core:extensions"
 759 |     STREAMS_KEY = "core:streams"
 760 |     COLLECTION_KEY = "collection"
 761 |     VALID_COLLECTION_KEYS = [
 762 |         AUTHOR_KEY,
 763 |         COLLECTION_DOI_KEY,
 764 |         DESCRIPTION_KEY,
 765 |         EXTENSIONS_KEY,
 766 |         LICENSE_KEY,
 767 |         STREAMS_KEY,
 768 |         VERSION_KEY,
 769 |     ]
 770 |     VALID_KEYS = {COLLECTION_KEY: VALID_COLLECTION_KEYS}
 771 | 
 772 |     def __init__(self, metafiles: list = None, metadata: dict = None, base_path=None, skip_checksums: bool = False) -> None:
 773 |         """
 774 |         Create a SigMF Collection object.
 775 | 
 776 |         Parameters
 777 |         ----------
 778 |         metafiles: list, optional
 779 |             A list of SigMF metadata filenames objects comprising the Collection.
 780 |             There should be at least one file.
 781 |         metadata: dict, optional
 782 |             Collection metadata to use, if not provided this will populate a minimal set of default metadata.
 783 |             The `core:streams` field will be regenerated automatically.
 784 |         base_path : str | bytes | PathLike, optional
 785 |             Base path of the collection recordings.
 786 |         skip_checksums : bool, optional
 787 |             If true will skip calculating checksum on datasets.
 788 | 
 789 |         Raises
 790 |         ------
 791 |         SigMFError
 792 |             If metadata files do not exist.
 793 |         """
 794 |         super().__init__()
 795 |         self.skip_checksums = skip_checksums
 796 | 
 797 |         if base_path is None:
 798 |             self.base_path = Path("")
 799 |         else:
 800 |             self.base_path = Path(base_path)
 801 | 
 802 |         if metadata is None:
 803 |             self._metadata = {self.COLLECTION_KEY: {}}
 804 |             self._metadata[self.COLLECTION_KEY][self.STREAMS_KEY] = []
 805 |         else:
 806 |             self._metadata = metadata
 807 | 
 808 |         if metafiles is None:
 809 |             self.metafiles = []
 810 |         else:
 811 |             self.set_streams(metafiles)
 812 | 
 813 |         # set version to current implementation
 814 |         self.set_collection_field(self.VERSION_KEY, __specification__)
 815 | 
 816 |         if not self.skip_checksums:
 817 |             self.verify_stream_hashes()
 818 | 
 819 |     def __len__(self) -> int:
 820 |         """
 821 |         The length of a collection is the number of streams.
 822 |         """
 823 |         return len(self.get_stream_names())
 824 | 
 825 |     def verify_stream_hashes(self) -> None:
 826 |         """
 827 |         Compares the stream hashes in the collection metadata to the metadata files.
 828 | 
 829 |         Raises
 830 |         ------
 831 |         SigMFFileError
 832 |             If any dataset checksums do not match saved metadata.
 833 |         """
 834 |         streams = self.get_collection_field(self.STREAMS_KEY, [])
 835 |         for stream in streams:
 836 |             old_hash = stream.get("hash")
 837 |             metafile_name = get_sigmf_filenames(stream.get("name"))["meta_fn"]
 838 |             metafile_path = self.base_path / metafile_name
 839 |             if Path.is_file(metafile_path):
 840 |                 new_hash = sigmf_hash.calculate_sha512(filename=metafile_path)
 841 |                 if old_hash != new_hash:
 842 |                     raise SigMFFileError(
 843 |                         f"Calculated file hash for {metafile_path} does not match collection metadata."
 844 |                     )
 845 | 
 846 |     def set_streams(self, metafiles) -> None:
 847 |         """
 848 |         Configures the collection `core:streams` field from the specified list of metafiles.
 849 |         """
 850 |         self.metafiles = metafiles
 851 |         streams = []
 852 |         for metafile in self.metafiles:
 853 |             metafile_path = self.base_path / metafile
 854 |             if metafile.endswith(".sigmf-meta") and Path.is_file(metafile_path):
 855 |                 stream = {
 856 |                     # name must be string here to be serializable later
 857 |                     "name": str(get_sigmf_filenames(metafile)["base_fn"]),
 858 |                     "hash": sigmf_hash.calculate_sha512(filename=metafile_path),
 859 |                 }
 860 |                 streams.append(stream)
 861 |             else:
 862 |                 raise SigMFFileError(f"Specifed stream file {metafile_path} is not a valid SigMF Metadata file")
 863 |         self.set_collection_field(self.STREAMS_KEY, streams)
 864 | 
 865 |     def get_stream_names(self) -> list:
 866 |         """
 867 |         Returns a list of `name` object(s) from the `collection` level `core:streams` metadata.
 868 |         """
 869 |         return [s.get("name") for s in self.get_collection_field(self.STREAMS_KEY, [])]
 870 | 
 871 |     def set_collection_info(self, new_collection: dict) -> None:
 872 |         """
 873 |         Overwrite the collection info with a new dictionary.
 874 |         """
 875 |         self._metadata[self.COLLECTION_KEY] = new_collection.copy()
 876 | 
 877 |     def get_collection_info(self) -> dict:
 878 |         """
 879 |         Returns a dictionary with all the collection info.
 880 |         """
 881 |         try:
 882 |             return self._metadata.get(self.COLLECTION_KEY, {})
 883 |         except AttributeError:
 884 |             return {}
 885 | 
 886 |     def set_collection_field(self, key: str, value) -> None:
 887 |         """
 888 |         Inserts a value into the collection field.
 889 |         """
 890 |         self._metadata[self.COLLECTION_KEY][key] = value
 891 | 
 892 |     def get_collection_field(self, key: str, default=None):
 893 |         """
 894 |         Return a field from the collection info, or default if the field is not set.
 895 |         """
 896 |         return self._metadata[self.COLLECTION_KEY].get(key, default)
 897 | 
 898 |     def tofile(self, file_path, pretty: bool = True) -> None:
 899 |         """
 900 |         Write metadata file
 901 | 
 902 |         Parameters
 903 |         ----------
 904 |         file_path : string
 905 |             Location to save.
 906 |         pretty : bool, default True
 907 |             When True will write more human-readable output, otherwise will be flat JSON.
 908 |         """
 909 |         filenames = get_sigmf_filenames(file_path)
 910 |         with open(filenames["collection_fn"], "w") as handle:
 911 |             self.dump(handle, pretty=pretty)
 912 |             handle.write("\n")  # text files should end in carriage return
 913 | 
 914 |     def get_SigMFFile(self, stream_name=None, stream_index=None):
 915 |         """
 916 |         Returns the SigMFFile instance of the specified stream if it exists
 917 |         """
 918 |         if stream_name is not None and stream_name not in self.get_stream_names():
 919 |             # invalid stream name
 920 |             return
 921 |         if stream_index is not None and stream_index < len(self):
 922 |             stream_name = self.get_stream_names()[stream_index]
 923 |         if stream_name is not None:
 924 |             metafile = get_sigmf_filenames(stream_name)["meta_fn"]
 925 |             metafile_path = self.base_path / metafile
 926 |             return fromfile(metafile_path, skip_checksum=self.skip_checksums)
 927 | 
 928 | 
 929 | def dtype_info(datatype):
 930 |     """
 931 |     Parses a datatype string conforming to the SigMF spec and returns a dict
 932 |     of values describing the format.
 933 | 
 934 |     Keyword arguments:
 935 |     datatype -- a SigMF-compliant datatype string
 936 |     """
 937 |     if datatype is None:
 938 |         raise SigMFFileError("Invalid datatype 'None'.")
 939 |     output_info = {}
 940 |     dtype = datatype.lower()
 941 | 
 942 |     is_unsigned_data = "u" in datatype
 943 |     is_complex_data = "c" in datatype
 944 |     is_fixedpoint_data = "f" not in datatype
 945 | 
 946 |     dtype = datatype.lower().split("_")
 947 | 
 948 |     byte_order = ""
 949 |     if len(dtype) == 2:
 950 |         if dtype[1][0] == "l":
 951 |             byte_order = "<"
 952 |         elif dtype[1][0] == "b":
 953 |             byte_order = ">"
 954 |         else:
 955 |             raise SigMFFileError("Unrecognized endianness specifier: '{}'".format(dtype[1]))
 956 |     dtype = dtype[0]
 957 |     if "64" in dtype:
 958 |         sample_size = 8
 959 |     elif "32" in dtype:
 960 |         sample_size = 4
 961 |     elif "16" in dtype:
 962 |         sample_size = 2
 963 |     elif "8" in dtype:
 964 |         sample_size = 1
 965 |     else:
 966 |         raise SigMFFileError("Unrecognized datatype: '{}'".format(dtype))
 967 |     component_size = sample_size
 968 |     if is_complex_data:
 969 |         sample_size *= 2
 970 |     sample_size = int(sample_size)
 971 | 
 972 |     data_type_str = byte_order
 973 |     data_type_str += "f" if not is_fixedpoint_data else "u" if is_unsigned_data else "i"
 974 |     data_type_str += str(component_size)
 975 | 
 976 |     memmap_convert_type = None
 977 |     if is_complex_data:
 978 |         data_type_str = ",".join((data_type_str, data_type_str))
 979 |         memmap_map_type = byte_order
 980 |         if is_fixedpoint_data:
 981 |             memmap_map_type += ("u" if is_unsigned_data else "i") + str(component_size)
 982 |             memmap_convert_type = byte_order + "c8"
 983 |         else:
 984 |             memmap_map_type += "c" + str(sample_size)
 985 |     else:
 986 |         memmap_map_type = data_type_str
 987 | 
 988 |     data_type_in = np.dtype(data_type_str)
 989 |     output_info["sample_dtype"] = data_type_in
 990 |     output_info["component_dtype"] = data_type_in["f0"] if is_complex_data else data_type_in
 991 |     output_info["sample_size"] = sample_size
 992 |     output_info["component_size"] = component_size
 993 |     output_info["is_complex"] = is_complex_data
 994 |     output_info["is_unsigned"] = is_unsigned_data
 995 |     output_info["is_fixedpoint"] = is_fixedpoint_data
 996 |     output_info["memmap_map_type"] = memmap_map_type
 997 |     output_info["memmap_convert_type"] = memmap_convert_type
 998 |     return output_info
 999 | 
1000 | 
1001 | def get_dataset_filename_from_metadata(meta_fn, metadata=None):
1002 |     """
1003 |     Parse provided metadata and return the expected data filename.
1004 | 
1005 |     In the case of a metadata-only distribution, or if the file does not exist,
1006 |     this will return ``None``.
1007 | 
1008 |     Priority for conflicting datasets:
1009 | 
1010 |     1. Use the file named ``<stem>.SIGMF_DATASET_EXT`` if it exists.
1011 |     2. Use the file in the ``DATASET_KEY`` field (non-compliant dataset) if it exists.
1012 |     3. Return ``None`` (may be a metadata-only distribution).
1013 |     """
1014 |     compliant_filename = get_sigmf_filenames(meta_fn)["data_fn"]
1015 |     noncompliant_filename = metadata["global"].get(SigMFFile.DATASET_KEY, None)
1016 | 
1017 |     if Path.is_file(compliant_filename):
1018 |         if noncompliant_filename:
1019 |             warnings.warn(
1020 |                 f"Compliant Dataset `{compliant_filename}` exists but "
1021 |                 f"{SigMFFile.DATASET_KEY} is also defined; using `{compliant_filename}`"
1022 |             )
1023 |         return compliant_filename
1024 | 
1025 |     elif noncompliant_filename:
1026 |         dir_path = Path(meta_fn).parent
1027 |         noncompliant_data_file_path = Path.joinpath(dir_path, noncompliant_filename)
1028 |         if Path.is_file(noncompliant_data_file_path):
1029 |             if metadata["global"].get(SigMFFile.METADATA_ONLY_KEY, False):
1030 |                 raise SigMFFileError(
1031 |                     f"Schema defines {SigMFFile.DATASET_KEY} "
1032 |                     f"but {SigMFFile.METADATA_ONLY_KEY} also exists; using `{noncompliant_filename}`"
1033 |                 )
1034 |             return noncompliant_data_file_path
1035 |         else:
1036 |             raise SigMFFileError(
1037 |                 f"Non-Compliant Dataset `{noncompliant_filename}` is specified in {SigMFFile.DATASET_KEY} "
1038 |                 "but does not exist!"
1039 |             )
1040 |     return None
1041 | 
1042 | 
1043 | def fromarchive(archive_path, dir=None, skip_checksum=False):
1044 |     """Extract an archive and return a SigMFFile.
1045 | 
1046 |     The `dir` parameter is no longer used as this function has been changed to
1047 |     access SigMF archives without extracting them.
1048 |     """
1049 |     from .archivereader import SigMFArchiveReader
1050 |     return SigMFArchiveReader(archive_path, skip_checksum=skip_checksum).sigmffile
1051 | 
1052 | 
1053 | def fromfile(filename, skip_checksum=False):
1054 |     """
1055 |     Creates and returns a SigMFFile or SigMFCollection instance with metadata
1056 |     loaded from the specified file. The filename may be that of either a
1057 |     sigmf-meta file, a sigmf-data file, a sigmf-collection file, or a sigmf
1058 |     archive.
1059 | 
1060 |     Parameters
1061 |     ----------
1062 |     filename: str | bytes | PathLike
1063 |         Path for SigMF Metadata, Dataset, Archive or Collection (with or without extension).
1064 |     skip_checksum: bool, default False
1065 |         When True will not read entire dataset to calculate hash.
1066 | 
1067 |     Returns
1068 |     -------
1069 |     object
1070 |         SigMFFile with dataset & metadata or a SigMFCollection depending on file type.
1071 |     """
1072 |     fns = get_sigmf_filenames(filename)
1073 |     meta_fn = fns["meta_fn"]
1074 |     archive_fn = fns["archive_fn"]
1075 |     collection_fn = fns["collection_fn"]
1076 | 
1077 |     # extract the extension to check whether we are dealing with an archive, collection, etc.
1078 |     file_path = Path(filename)
1079 |     ext = file_path.suffix
1080 | 
1081 |     if (ext.lower().endswith(SIGMF_ARCHIVE_EXT) or not Path.is_file(meta_fn)) and Path.is_file(archive_fn):
1082 |         return fromarchive(archive_fn, skip_checksum=skip_checksum)
1083 | 
1084 |     if (ext.lower().endswith(SIGMF_COLLECTION_EXT) or not Path.is_file(meta_fn)) and Path.is_file(collection_fn):
1085 |         collection_fp = open(collection_fn, "rb")
1086 |         bytestream_reader = codecs.getreader("utf-8")
1087 |         mdfile_reader = bytestream_reader(collection_fp)
1088 |         metadata = json.load(mdfile_reader)
1089 |         collection_fp.close()
1090 | 
1091 |         dir_path = meta_fn.parent
1092 |         return SigMFCollection(metadata=metadata, base_path=dir_path, skip_checksums=skip_checksum)
1093 | 
1094 |     else:
1095 |         meta_fp = open(meta_fn, "rb")
1096 |         bytestream_reader = codecs.getreader("utf-8")
1097 |         mdfile_reader = bytestream_reader(meta_fp)
1098 |         metadata = json.load(mdfile_reader)
1099 |         meta_fp.close()
1100 | 
1101 |         data_fn = get_dataset_filename_from_metadata(meta_fn, metadata)
1102 |         return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum)
1103 | 
1104 | 
1105 | def get_sigmf_filenames(filename):
1106 |     """
1107 |     Safely returns a set of SigMF file paths given an input filename.
1108 | 
1109 |     Parameters
1110 |     ----------
1111 |     filename : str | bytes | PathLike
1112 |         The SigMF filename with any extension.
1113 | 
1114 |     Returns
1115 |     -------
1116 |     dict with 'data_fn', 'meta_fn', and 'archive_fn' as keys.
1117 |     """
1118 |     stem_path = Path(filename)
1119 |     # If the path has a sigmf suffix, remove it. Otherwise do not remove the
1120 |     # suffix, because the filename might contain '.' characters which are part
1121 |     # of the filename rather than an extension.
1122 |     sigmf_suffixes = [
1123 |         SIGMF_DATASET_EXT, SIGMF_METADATA_EXT,
1124 |         SIGMF_ARCHIVE_EXT, SIGMF_COLLECTION_EXT,
1125 |     ]
1126 |     if stem_path.suffix in sigmf_suffixes:
1127 |         with_suffix_path = stem_path
1128 |         stem_path = stem_path.with_suffix("")
1129 |     else:
1130 |         # Add a dummy suffix to prevent the .with_suffix() calls below from
1131 |         # overriding part of the filename which is interpreted as a suffix
1132 |         with_suffix_path = stem_path.with_name(f"{stem_path.name}{SIGMF_DATASET_EXT}")
1133 | 
1134 |     return {
1135 |         "base_fn": stem_path,
1136 |         "data_fn": with_suffix_path.with_suffix(SIGMF_DATASET_EXT),
1137 |         "meta_fn": with_suffix_path.with_suffix(SIGMF_METADATA_EXT),
1138 |         "archive_fn": with_suffix_path.with_suffix(SIGMF_ARCHIVE_EXT),
1139 |         "collection_fn": with_suffix_path.with_suffix(SIGMF_COLLECTION_EXT),
1140 |     }
1141 | 


--------------------------------------------------------------------------------
/sigmf/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright: Multiple Authors
  2 | #
  3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
  4 | #
  5 | # SPDX-License-Identifier: LGPL-3.0-or-later
  6 | 
  7 | """Utilities"""
  8 | 
  9 | import re
 10 | import sys
 11 | from copy import deepcopy
 12 | from datetime import datetime, timezone
 13 | 
 14 | import numpy as np
 15 | 
 16 | from .error import SigMFError
 17 | 
 18 | SIGMF_DATETIME_ISO8601_FMT = "%Y-%m-%dT%H:%M:%S.%fZ"
 19 | 
 20 | 
 21 | def get_sigmf_iso8601_datetime_now() -> str:
 22 |     """Get current UTC time as iso8601 string."""
 23 |     return datetime.now(timezone.utc).strftime(SIGMF_DATETIME_ISO8601_FMT)
 24 | 
 25 | 
 26 | def parse_iso8601_datetime(string: str) -> datetime:
 27 |     """
 28 |     Parse an iso8601 string as a datetime struct.
 29 |     Input string (indicated by final Z) is in UTC tz.
 30 | 
 31 |     Example
 32 |     -------
 33 |     >>> parse_iso8601_datetime("1955-11-05T06:15:00Z")
 34 |     datetime.datetime(1955, 11, 5, 6, 15, tzinfo=datetime.timezone.utc)
 35 |     """
 36 |     match = re.match(r"^(?P<dt>.*)(?P<frac>\.[0-9]{7,})Z$", string)
 37 |     if match:
 38 |         # string exceeds max precision allowed by strptime -> truncate to µs
 39 |         groups = match.groupdict()
 40 |         length = min(7, len(groups["frac"]))
 41 |         string = "".join([groups["dt"], groups["frac"][:length], "Z"])
 42 | 
 43 |     if "." in string:
 44 |         # parse float seconds
 45 |         format_str = SIGMF_DATETIME_ISO8601_FMT
 46 |     else:
 47 |         # parse whole seconds
 48 |         format_str = SIGMF_DATETIME_ISO8601_FMT.replace(".%f", "")
 49 |     return datetime.strptime(string, format_str).replace(tzinfo=timezone.utc)
 50 | 
 51 | 
 52 | def dict_merge(a_dict: dict, b_dict: dict) -> dict:
 53 |     """
 54 |     Recursively merge `b_dict` into `a_dict`.
 55 |     `b_dict[key]` will overwrite `a_dict[key]` if it exists.
 56 | 
 57 |     Example
 58 |     -------
 59 |     >>> a, b = {0:0, 1:2}, {1:3, 2:4}
 60 |     >>> dict_merge(a, b)
 61 |     {0: 0, 1: 3, 2: 4}
 62 |     """
 63 |     if not isinstance(b_dict, dict):
 64 |         return b_dict
 65 |     result = deepcopy(a_dict)
 66 |     for key, value in b_dict.items():
 67 |         if key in result and isinstance(result[key], dict):
 68 |             result[key] = dict_merge(result[key], value)
 69 |         else:
 70 |             result[key] = deepcopy(value)
 71 |     return result
 72 | 
 73 | 
 74 | def get_endian_str(ray: np.ndarray) -> str:
 75 |     """Return SigMF compatible endianness string for a numpy array"""
 76 |     if not isinstance(ray, np.ndarray):
 77 |         raise SigMFError("Argument must be a numpy array")
 78 |     atype = ray.dtype
 79 | 
 80 |     if atype.byteorder == "<":
 81 |         return "_le"
 82 |     if atype.byteorder == ">":
 83 |         return "_be"
 84 |     # endianness is then either '=' (native) or '|' (doesn't matter)
 85 |     return "_le" if sys.byteorder == "little" else "_be"
 86 | 
 87 | 
 88 | def get_data_type_str(ray: np.ndarray) -> str:
 89 |     """
 90 |     Return the SigMF datatype string for the datatype of numpy array `ray`.
 91 | 
 92 |     NOTE: this function only supports native numpy types so interleaved complex
 93 |     integer types are not supported.
 94 |     """
 95 |     if not isinstance(ray, np.ndarray):
 96 |         raise SigMFError("Argument must be a numpy array")
 97 |     atype = ray.dtype
 98 |     if atype.kind not in ("u", "i", "f", "c"):
 99 |         raise SigMFError("Unsupported data type:", atype)
100 |     data_type_str = ""
101 |     if atype.kind == "c":
102 |         data_type_str += "cf"
103 |         # units are component bits, numpy complex types len(I)+len(Q)
104 |         data_type_str += str(atype.itemsize * 8 // 2)
105 |     elif atype.kind == "f":
106 |         data_type_str += "rf"
107 |         data_type_str += str(atype.itemsize * 8)  # itemsize in bits
108 |     elif atype.kind in ("u", "i"):
109 |         data_type_str += "r" + atype.kind
110 |         data_type_str += str(atype.itemsize * 8)  # itemsize in bits
111 |     if atype.itemsize > 1:
112 |         # only append endianness for types over 8 bits
113 |         data_type_str += get_endian_str(ray)
114 |     return data_type_str
115 | 


--------------------------------------------------------------------------------
/sigmf/validate.py:
--------------------------------------------------------------------------------
  1 | # Copyright: Multiple Authors
  2 | #
  3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
  4 | #
  5 | # SPDX-License-Identifier: LGPL-3.0-or-later
  6 | 
  7 | """SigMF Validator"""
  8 | import argparse
  9 | import glob
 10 | import json
 11 | import logging
 12 | import os
 13 | import sys
 14 | 
 15 | # multi-threading library - should work well as I/O will be the primary
 16 | # cost for small SigMF files. Swap to ProcessPool if files are large.
 17 | from concurrent.futures import ThreadPoolExecutor, as_completed
 18 | 
 19 | # required for Python 3.7
 20 | from typing import Optional, Tuple
 21 | 
 22 | import jsonschema
 23 | 
 24 | from . import __version__ as toolversion
 25 | from . import error, schema, sigmffile
 26 | 
 27 | 
 28 | def validate(metadata, ref_schema=schema.get_schema()) -> None:
 29 |     """
 30 |     Check that the provided `metadata` dict is valid according to the `ref_schema` dict.
 31 |     Walk entire schema and check all keys.
 32 | 
 33 |     Parameters
 34 |     ----------
 35 |     metadata : dict
 36 |         The SigMF metadata to be validated.
 37 |     ref_schema : dict, optional
 38 |         The schema that holds the SigMF metadata definition.
 39 |         Since the schema evolves over time, we may want to be able to check
 40 |         against different versions in the *future*.
 41 | 
 42 |     Raises
 43 |     ------
 44 |     ValidationError
 45 |         If metadata is invalid.
 46 |     """
 47 |     jsonschema.validators.validate(instance=metadata, schema=ref_schema)
 48 | 
 49 |     # ensure captures and annotations have monotonically increasing sample_start
 50 |     for key in ["captures", "annotations"]:
 51 |         count = -1
 52 |         for item in metadata[key]:
 53 |             new_count = item["core:sample_start"]
 54 |             if new_count < count:
 55 |                 raise jsonschema.exceptions.ValidationError(f"{key} has incorrect sample start ordering.")
 56 |             count = new_count
 57 | 
 58 | 
 59 | def _validate_single_file(filename, skip_checksum: bool, logger: logging.Logger) -> int:
 60 |     """Validates a single SigMF file.
 61 | 
 62 |     To be called as part of a multithreading / multiprocess application.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     filename : str
 67 |         Path and name to sigmf.data or sigmf.meta file.
 68 |     skip_checksum : bool
 69 |         Whether to perform checksum computation.
 70 |     logger : logging.Logger
 71 |         Logging object to log errors to.
 72 | 
 73 |     Returns
 74 |     -------
 75 |     rc : int
 76 |         0 if OK, 1 if err
 77 |     """
 78 |     try:
 79 |         # load signal
 80 |         signal = sigmffile.fromfile(filename, skip_checksum=skip_checksum)
 81 |         # validate
 82 |         signal.validate()
 83 | 
 84 |     # handle any of 4 exceptions at once...
 85 |     except (jsonschema.exceptions.ValidationError, error.SigMFFileError, json.decoder.JSONDecodeError, IOError) as err:
 86 |         # catch the error, log, and continue
 87 |         logger.error(f"file `{filename}`: {err}")
 88 |         return 1
 89 |     else:
 90 |         return 0
 91 | 
 92 | 
 93 | def main(arg_tuple: Optional[Tuple[str, ...]] = None) -> None:
 94 |     """entry-point for command-line validator"""
 95 |     parser = argparse.ArgumentParser(
 96 |         description="Validate SigMF Archive or file pair against JSON schema.", prog="sigmf_validate"
 97 |     )
 98 |     parser.add_argument("path", nargs="*", help="SigMF path(s). Accepts * wildcards and extensions are optional.")
 99 |     parser.add_argument("--skip-checksum", action="store_true", help="Skip reading dataset to validate checksum.")
100 |     parser.add_argument("-v", "--verbose", action="count", default=0)
101 |     parser.add_argument("--version", action="version", version=f"%(prog)s {toolversion}")
102 | 
103 |     # allow pass-in arg_tuple for testing purposes
104 |     args = parser.parse_args(arg_tuple)
105 | 
106 |     level_lut = {
107 |         0: logging.WARNING,
108 |         1: logging.INFO,
109 |         2: logging.DEBUG,
110 |     }
111 |     log = logging.getLogger()
112 |     logging.basicConfig(level=level_lut[min(args.verbose, 2)])
113 | 
114 |     paths = []
115 |     # resolve possible wildcards
116 |     for path in args.path:
117 |         paths += glob.glob(path)
118 | 
119 |     # multi-processing / threading pathway.
120 |     n_completed = 0
121 |     n_total = len(paths)
122 |     # estimate number of CPU cores
123 |     # https://stackoverflow.com/questions/1006289/how-to-find-out-the-number-of-cpus-using-python
124 |     est_num_workers = len(os.sched_getaffinity(0)) if os.name == 'posix' else os.cpu_count()
125 |     # create a thread pool
126 |     # https://docs.python.org/3.7/library/concurrent.futures.html#threadpoolexecutor
127 |     with ThreadPoolExecutor(max_workers=est_num_workers) as executor:
128 |         # submit jobs
129 |         future_validations = {executor.submit(_validate_single_file, path, args.skip_checksum, log) for path in paths}
130 |         # load and await jobs to complete... no return
131 |         for future in as_completed(future_validations):
132 |             if future.result() == 0:
133 |                 n_completed += 1
134 | 
135 |     if n_total == 0:
136 |         log.error("No paths to validate.")
137 |         sys.exit(1)
138 |     elif n_completed != n_total:
139 |         log.info("Validated %d of %d files OK", n_completed, n_total)
140 |         sys.exit(1)
141 |     else:
142 |         log.info("Validated all %d files OK!", n_total)
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     main()
147 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sigmf/sigmf-python/2ae107f0e34ae4d3cf8a4b23d39803e85839a628/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | 
 7 | """Provides pytest fixtures for other tests."""
 8 | 
 9 | import tempfile
10 | 
11 | import pytest
12 | 
13 | from sigmf import __specification__
14 | from sigmf.archive import SIGMF_DATASET_EXT
15 | from sigmf.sigmffile import SigMFFile
16 | 
17 | from .testdata import TEST_FLOAT32_DATA, TEST_METADATA
18 | 
19 | 
20 | @pytest.fixture
21 | def test_data_file():
22 |     """when called, yields temporary dataset"""
23 |     with tempfile.NamedTemporaryFile(suffix=f".{SIGMF_DATASET_EXT}") as temp:
24 |         TEST_FLOAT32_DATA.tofile(temp.name)
25 |         yield temp
26 | 
27 | 
28 | @pytest.fixture
29 | def test_sigmffile(test_data_file):
30 |     """If pytest uses this signature, will return valid SigMF file."""
31 |     meta = SigMFFile()
32 |     meta.set_global_field("core:datatype", "rf32_le")
33 |     meta.set_global_field("core:version", __specification__)
34 |     meta.add_annotation(start_index=0, length=len(TEST_FLOAT32_DATA))
35 |     meta.add_capture(start_index=0)
36 |     meta.set_data_file(test_data_file.name)
37 |     assert meta._metadata == TEST_METADATA
38 |     return meta
39 | 


--------------------------------------------------------------------------------
/tests/test_archive.py:
--------------------------------------------------------------------------------
  1 | # Copyright: Multiple Authors
  2 | #
  3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
  4 | #
  5 | # SPDX-License-Identifier: LGPL-3.0-or-later
  6 | 
  7 | """Tests for SigMFArchive"""
  8 | 
  9 | import codecs
 10 | import json
 11 | import tarfile
 12 | import tempfile
 13 | from pathlib import Path
 14 | 
 15 | import jsonschema
 16 | import numpy as np
 17 | import pytest
 18 | 
 19 | from sigmf import error
 20 | from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT
 21 | 
 22 | from .testdata import TEST_FLOAT32_DATA, TEST_METADATA
 23 | 
 24 | 
 25 | def create_test_archive(test_sigmffile, tmpfile):
 26 |     sigmf_archive = test_sigmffile.archive(fileobj=tmpfile)
 27 |     sigmf_tarfile = tarfile.open(sigmf_archive, mode="r", format=tarfile.PAX_FORMAT)
 28 |     return sigmf_tarfile
 29 | 
 30 | 
 31 | def test_without_data_file_throws_fileerror(test_sigmffile):
 32 |     test_sigmffile.data_file = None
 33 |     with tempfile.NamedTemporaryFile() as temp:
 34 |         with pytest.raises(error.SigMFFileError):
 35 |             test_sigmffile.archive(name=temp.name)
 36 | 
 37 | 
 38 | def test_invalid_md_throws_validationerror(test_sigmffile):
 39 |     del test_sigmffile._metadata["global"]["core:datatype"]  # required field
 40 |     with tempfile.NamedTemporaryFile() as temp:
 41 |         with pytest.raises(jsonschema.exceptions.ValidationError):
 42 |             test_sigmffile.archive(name=temp.name)
 43 | 
 44 | 
 45 | def test_name_wrong_extension_throws_fileerror(test_sigmffile):
 46 |     with tempfile.NamedTemporaryFile() as temp:
 47 |         with pytest.raises(error.SigMFFileError):
 48 |             test_sigmffile.archive(name=temp.name + ".zip")
 49 | 
 50 | 
 51 | def test_fileobj_extension_ignored(test_sigmffile):
 52 |     with tempfile.NamedTemporaryFile(suffix=".tar") as temp:
 53 |         test_sigmffile.archive(fileobj=temp)
 54 | 
 55 | 
 56 | def test_name_used_in_fileobj(test_sigmffile):
 57 |     with tempfile.NamedTemporaryFile() as temp:
 58 |         sigmf_archive = test_sigmffile.archive(name="testarchive", fileobj=temp)
 59 |         sigmf_tarfile = tarfile.open(sigmf_archive, mode="r")
 60 |         basedir, file1, file2 = sigmf_tarfile.getmembers()
 61 |         assert basedir.name == "testarchive"
 62 | 
 63 |         def filename(tarinfo):
 64 |             return Path(tarinfo.name).stem
 65 | 
 66 |         assert filename(file1) == "testarchive"
 67 |         assert filename(file2) == "testarchive"
 68 | 
 69 | 
 70 | def test_fileobj_not_closed(test_sigmffile):
 71 |     with tempfile.NamedTemporaryFile() as temp:
 72 |         test_sigmffile.archive(fileobj=temp)
 73 |         assert not temp.file.closed
 74 | 
 75 | 
 76 | def test_unwritable_fileobj_throws_fileerror(test_sigmffile):
 77 |     with tempfile.NamedTemporaryFile(mode="rb") as temp:
 78 |         with pytest.raises(error.SigMFFileError):
 79 |             test_sigmffile.archive(fileobj=temp)
 80 | 
 81 | 
 82 | def test_unwritable_name_throws_fileerror(test_sigmffile):
 83 |     # Cannot assume /root/ is unwritable (e.g. Docker environment)
 84 |     # so use invalid filename
 85 |     unwritable_file = "/bad_name/"
 86 |     with pytest.raises(error.SigMFFileError):
 87 |         test_sigmffile.archive(name=unwritable_file)
 88 | 
 89 | 
 90 | def test_tarfile_layout(test_sigmffile):
 91 |     with tempfile.NamedTemporaryFile() as temp:
 92 |         sigmf_tarfile = create_test_archive(test_sigmffile, temp)
 93 |         basedir, file1, file2 = sigmf_tarfile.getmembers()
 94 |         assert tarfile.TarInfo.isdir(basedir)
 95 |         assert tarfile.TarInfo.isfile(file1)
 96 |         assert tarfile.TarInfo.isfile(file2)
 97 | 
 98 | 
 99 | def test_tarfile_names_and_extensions(test_sigmffile):
100 |     with tempfile.NamedTemporaryFile() as temp:
101 |         sigmf_tarfile = create_test_archive(test_sigmffile, temp)
102 |         basedir, file1, file2 = sigmf_tarfile.getmembers()
103 |         archive_name = basedir.name
104 |         assert archive_name == Path(temp.name).name
105 |         file_extensions = {SIGMF_DATASET_EXT, SIGMF_METADATA_EXT}
106 | 
107 |         file1_name, file1_ext = Path(file1.name).stem, Path(file1.name).suffix
108 |         assert file1_name == archive_name
109 |         assert file1_ext in file_extensions
110 | 
111 |         file_extensions.remove(file1_ext)
112 | 
113 |         file2_name, file2_ext = Path(file2.name).stem, Path(file2.name).suffix
114 |         assert file2_name == archive_name
115 |         assert file2_ext in file_extensions
116 | 
117 | 
118 | def test_tarfile_persmissions(test_sigmffile):
119 |     with tempfile.NamedTemporaryFile() as temp:
120 |         sigmf_tarfile = create_test_archive(test_sigmffile, temp)
121 |         basedir, file1, file2 = sigmf_tarfile.getmembers()
122 |         assert basedir.mode == 0o755
123 |         assert file1.mode == 0o644
124 |         assert file2.mode == 0o644
125 | 
126 | 
127 | def test_contents(test_sigmffile):
128 |     with tempfile.NamedTemporaryFile() as temp:
129 |         sigmf_tarfile = create_test_archive(test_sigmffile, temp)
130 |         basedir, file1, file2 = sigmf_tarfile.getmembers()
131 |         if file1.name.endswith(SIGMF_METADATA_EXT):
132 |             mdfile = file1
133 |             datfile = file2
134 |         else:
135 |             mdfile = file2
136 |             datfile = file1
137 | 
138 |         bytestream_reader = codecs.getreader("utf-8")  # bytes -> str
139 |         mdfile_reader = bytestream_reader(sigmf_tarfile.extractfile(mdfile))
140 |         assert json.load(mdfile_reader) == TEST_METADATA
141 | 
142 |         datfile_reader = sigmf_tarfile.extractfile(datfile)
143 |         # calling `fileno` on `tarfile.ExFileObject` throws error (?), but
144 |         # np.fromfile requires it, so we need this extra step
145 |         data = np.frombuffer(datfile_reader.read(), dtype=np.float32)
146 | 
147 |         assert np.array_equal(data, TEST_FLOAT32_DATA)
148 | 
149 | 
150 | def test_tarfile_type(test_sigmffile):
151 |     with tempfile.NamedTemporaryFile() as temp:
152 |         sigmf_tarfile = create_test_archive(test_sigmffile, temp)
153 |         assert sigmf_tarfile.format == tarfile.PAX_FORMAT
154 | 


--------------------------------------------------------------------------------
/tests/test_archivereader.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | 
 7 | """Tests for SigMFArchiveReader"""
 8 | 
 9 | import unittest
10 | from tempfile import NamedTemporaryFile
11 | 
12 | import numpy as np
13 | 
14 | import sigmf
15 | from sigmf import SigMFArchiveReader, SigMFFile, __specification__
16 | 
17 | 
18 | class TestArchiveReader(unittest.TestCase):
19 |     def setUp(self):
20 |         # in order to check shapes we need some positive number of samples to work with
21 |         # number of samples should be lowest common factor of num_channels
22 |         self.raw_count = 16
23 |         self.lut = {
24 |             "i8": np.int8,
25 |             "u8": np.uint8,
26 |             "i16": np.int16,
27 |             "u16": np.uint16,
28 |             "u32": np.uint32,
29 |             "i32": np.int32,
30 |             "f32": np.float32,
31 |             "f64": np.float64,
32 |         }
33 | 
34 |     def test_access_data_without_untar(self):
35 |         """iterate through datatypes and verify IO is correct"""
36 |         temp_data = NamedTemporaryFile()
37 |         temp_archive = NamedTemporaryFile(suffix=".sigmf")
38 | 
39 |         for key, dtype in self.lut.items():
40 |             # for each type of storage
41 |             temp_samples = np.arange(self.raw_count, dtype=dtype)
42 |             temp_samples.tofile(temp_data.name)
43 |             for num_channels in [1, 4, 8]:
44 |                 # for single or 8 channel
45 |                 for complex_prefix in ["r", "c"]:
46 |                     # for real or complex
47 |                     target_count = self.raw_count
48 |                     temp_meta = SigMFFile(
49 |                         data_file=temp_data.name,
50 |                         global_info={
51 |                             SigMFFile.DATATYPE_KEY: f"{complex_prefix}{key}_le",
52 |                             SigMFFile.NUM_CHANNELS_KEY: num_channels,
53 |                         },
54 |                     )
55 |                     temp_meta.tofile(temp_archive.name, toarchive=True)
56 | 
57 |                     readback = SigMFArchiveReader(temp_archive.name)
58 |                     readback_samples = readback[:]
59 | 
60 |                     if complex_prefix == "c":
61 |                         # complex data will be half as long
62 |                         target_count //= 2
63 |                         self.assertTrue(np.all(np.iscomplex(readback_samples)))
64 |                     if num_channels != 1:
65 |                         # check expected # of channels
66 |                         self.assertEqual(
67 |                             readback_samples.ndim,
68 |                             2,
69 |                             "Mismatch in shape of readback samples.",
70 |                         )
71 |                     target_count //= num_channels
72 | 
73 |                     self.assertEqual(
74 |                         target_count,
75 |                         temp_meta._count_samples(),
76 |                         "Mismatch in expected metadata length.",
77 |                     )
78 |                     self.assertEqual(
79 |                         target_count,
80 |                         len(readback),
81 |                         "Mismatch in expected readback length",
82 |                     )
83 | 
84 | 
85 | def test_archiveread_data_file_unchanged(test_sigmffile):
86 |     with NamedTemporaryFile(suffix=".sigmf") as temp_file:
87 |         input_samples = test_sigmffile.read_samples()
88 |         test_sigmffile.archive(temp_file.name)
89 |         arc = sigmf.sigmffile.fromfile(temp_file.name)
90 |         output_samples = arc.read_samples()
91 | 
92 |         assert np.array_equal(input_samples, output_samples)
93 | 


--------------------------------------------------------------------------------
/tests/test_collection.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | 
 7 | """Tests for collections"""
 8 | 
 9 | import copy
10 | import os
11 | import shutil
12 | import tempfile
13 | import unittest
14 | from pathlib import Path
15 | 
16 | import numpy as np
17 | from hypothesis import given
18 | from hypothesis import strategies as st
19 | 
20 | from sigmf.archive import SIGMF_COLLECTION_EXT, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT
21 | from sigmf.sigmffile import SigMFCollection, SigMFFile, fromfile
22 | 
23 | from .testdata import TEST_FLOAT32_DATA, TEST_METADATA
24 | 
25 | 
26 | class TestCollection(unittest.TestCase):
27 |     """unit tests for colections"""
28 | 
29 |     def setUp(self):
30 |         """create temporary path"""
31 |         self.temp_dir = Path(tempfile.mkdtemp())
32 | 
33 |     def tearDown(self):
34 |         """remove temporary path"""
35 |         shutil.rmtree(self.temp_dir)
36 | 
37 |     @given(st.sampled_from([".", "subdir/", "sub0/sub1/sub2/"]))
38 |     def test_load_collection(self, subdir: str) -> None:
39 |         """test path handling for collections"""
40 |         data_name1 = "dat1" + SIGMF_DATASET_EXT
41 |         data_name2 = "dat2" + SIGMF_DATASET_EXT
42 |         meta_name1 = "dat1" + SIGMF_METADATA_EXT
43 |         meta_name2 = "dat2" + SIGMF_METADATA_EXT
44 |         collection_name = "collection" + SIGMF_COLLECTION_EXT
45 |         data_path1 = self.temp_dir / subdir / data_name1
46 |         data_path2 = self.temp_dir / subdir / data_name2
47 |         meta_path1 = self.temp_dir / subdir / meta_name1
48 |         meta_path2 = self.temp_dir / subdir / meta_name2
49 |         collection_path = self.temp_dir / subdir / collection_name
50 |         os.makedirs(collection_path.parent, exist_ok=True)
51 | 
52 |         # create data files
53 |         TEST_FLOAT32_DATA.tofile(data_path1)
54 |         TEST_FLOAT32_DATA.tofile(data_path2)
55 | 
56 |         # create metadata files
57 |         metadata = copy.deepcopy(TEST_METADATA)
58 |         meta1 = SigMFFile(metadata=metadata, data_file=data_path1)
59 |         meta2 = SigMFFile(metadata=metadata, data_file=data_path2)
60 |         meta1.tofile(meta_path1)
61 |         meta2.tofile(meta_path2)
62 | 
63 |         # create collection
64 |         collection = SigMFCollection(
65 |             metafiles=[meta_name1, meta_name2],
66 |             base_path=str(self.temp_dir / subdir),
67 |         )
68 |         collection.tofile(collection_path)
69 | 
70 |         # load collection
71 |         collection_loopback = fromfile(collection_path)
72 |         meta1_loopback = collection_loopback.get_SigMFFile(stream_index=0)
73 |         meta2_loopback = collection_loopback.get_SigMFFile(stream_index=1)
74 | 
75 |         self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta1_loopback.read_samples()))
76 |         self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta2_loopback[:]))
77 | 


--------------------------------------------------------------------------------
/tests/test_ncd.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | 
 7 | """Tests for Non-Conforming Datasets"""
 8 | 
 9 | import copy
10 | import shutil
11 | import tempfile
12 | import unittest
13 | from pathlib import Path
14 | 
15 | import numpy as np
16 | from hypothesis import given
17 | from hypothesis import strategies as st
18 | 
19 | from sigmf.error import SigMFFileError
20 | from sigmf.sigmffile import SigMFFile, fromfile
21 | 
22 | from .testdata import TEST_FLOAT32_DATA, TEST_METADATA
23 | 
24 | 
25 | class TestNonConformingDataset(unittest.TestCase):
26 |     """unit tests for NCD"""
27 | 
28 |     def setUp(self):
29 |         """create temporary path"""
30 |         self.temp_dir = Path(tempfile.mkdtemp())
31 | 
32 |     def tearDown(self):
33 |         """remove temporary path"""
34 |         shutil.rmtree(self.temp_dir)
35 | 
36 |     @given(st.sampled_from([".", "subdir/", "sub0/sub1/sub2/"]))
37 |     def test_load_ncd(self, subdir: str) -> None:
38 |         """test loading non-conforming dataset"""
39 |         data_path = self.temp_dir / subdir / "dat.bin"
40 |         meta_path = self.temp_dir / subdir / "dat.sigmf-meta"
41 |         Path.mkdir(data_path.parent, parents=True, exist_ok=True)
42 | 
43 |         # create data file
44 |         TEST_FLOAT32_DATA.tofile(data_path)
45 | 
46 |         # create metadata file
47 |         ncd_metadata = copy.deepcopy(TEST_METADATA)
48 |         meta = SigMFFile(metadata=ncd_metadata, data_file=data_path)
49 |         meta.tofile(meta_path)
50 | 
51 |         # load dataset & validate we can read all the data
52 |         meta_loopback = fromfile(meta_path)
53 |         self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta_loopback.read_samples()))
54 |         self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta_loopback[:]))
55 | 
56 |         # delete the non-conforming dataset and ensure error is raised due to missing dataset;
57 |         # in Windows the SigMFFile instances need to be garbage collected first,
58 |         # otherwise the np.memmap instances (stored in self._memmap) block the deletion
59 |         meta = None
60 |         meta_loopback = None
61 |         Path.unlink(data_path)
62 |         with self.assertRaises(SigMFFileError):
63 |             _ = fromfile(meta_path)
64 | 


--------------------------------------------------------------------------------
/tests/test_sigmffile.py:
--------------------------------------------------------------------------------
  1 | # Copyright: Multiple Authors
  2 | #
  3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
  4 | #
  5 | # SPDX-License-Identifier: LGPL-3.0-or-later
  6 | 
  7 | """Tests for SigMFFile Object"""
  8 | 
  9 | import copy
 10 | import json
 11 | import shutil
 12 | import tempfile
 13 | import unittest
 14 | from pathlib import Path
 15 | 
 16 | import numpy as np
 17 | 
 18 | from sigmf import error, sigmffile, utils
 19 | from sigmf.sigmffile import SigMFFile
 20 | 
 21 | from .testdata import *
 22 | 
 23 | 
 24 | class TestClassMethods(unittest.TestCase):
 25 |     def setUp(self):
 26 |         """ensure tests have a valid SigMF object to work with"""
 27 |         self.temp_dir = Path(tempfile.mkdtemp())
 28 |         self.temp_path_data = self.temp_dir / "trash.sigmf-data"
 29 |         self.temp_path_meta = self.temp_dir / "trash.sigmf-meta"
 30 |         TEST_FLOAT32_DATA.tofile(self.temp_path_data)
 31 |         self.sigmf_object = SigMFFile(TEST_METADATA, data_file=self.temp_path_data)
 32 |         self.sigmf_object.tofile(self.temp_path_meta)
 33 | 
 34 |     def tearDown(self):
 35 |         """remove temporary dir"""
 36 |         shutil.rmtree(self.temp_dir)
 37 | 
 38 |     def test_pathlib_handle(self):
 39 |         """ensure file can be a string or a pathlib object"""
 40 |         self.assertTrue(self.temp_path_data.exists())
 41 |         obj_str = sigmffile.fromfile(str(self.temp_path_data))
 42 |         obj_str.validate()
 43 |         obj_pth = sigmffile.fromfile(self.temp_path_data)
 44 |         obj_pth.validate()
 45 | 
 46 |     def test_filenames_with_dots(self):
 47 |         """test that filenames with non-extension . characters are handled correctly"""
 48 |         filenames = ["a", "b.c", "d.e.f"]
 49 |         for filename in filenames:
 50 |             temp_path_data = self.temp_dir / f"{filename}.sigmf-data"
 51 |             temp_path_meta = self.temp_dir / f"{filename}.sigmf-meta"
 52 |             TEST_FLOAT32_DATA.tofile(temp_path_data)
 53 |             self.sigmf_object = SigMFFile(TEST_METADATA, data_file=temp_path_data)
 54 |             self.sigmf_object.tofile(temp_path_meta)
 55 |             files = [str(temp_path_data), temp_path_data, str(temp_path_meta), temp_path_meta]
 56 |             for filename in files:
 57 |                 obj = sigmffile.fromfile(filename)
 58 |                 obj.validate()
 59 | 
 60 |     def test_iterator_basic(self):
 61 |         """make sure default batch_size works"""
 62 |         count = 0
 63 |         for _ in self.sigmf_object:
 64 |             count += 1
 65 |         self.assertEqual(count, len(self.sigmf_object))
 66 | 
 67 |     def test_checksum(self):
 68 |         """Ensure checksum fails when incorrect or empty string."""
 69 |         for new_checksum in ("", "a", 0):
 70 |             bad_checksum_metadata = copy.deepcopy(TEST_METADATA)
 71 |             bad_checksum_metadata[SigMFFile.GLOBAL_KEY][SigMFFile.HASH_KEY] = new_checksum
 72 |             with self.assertRaises(error.SigMFFileError):
 73 |                 _ = SigMFFile(bad_checksum_metadata, self.temp_path_data)
 74 | 
 75 |     def test_equality(self):
 76 |         """Ensure __eq__ working as expected"""
 77 |         other = SigMFFile(copy.deepcopy(TEST_METADATA))
 78 |         self.assertEqual(self.sigmf_object, other)
 79 |         # different after changing any part of metadata
 80 |         other.add_annotation(start_index=0, metadata={"a": 0})
 81 |         self.assertNotEqual(self.sigmf_object, other)
 82 | 
 83 | 
 84 | class TestAnnotationHandling(unittest.TestCase):
 85 |     def test_get_annotations_with_index(self):
 86 |         """Test that only annotations containing index are returned from get_annotations()"""
 87 |         smf = SigMFFile(copy.deepcopy(TEST_METADATA))
 88 |         smf.add_annotation(start_index=1)
 89 |         smf.add_annotation(start_index=4, length=4)
 90 |         annotations_idx10 = smf.get_annotations(index=10)
 91 |         self.assertListEqual(
 92 |             annotations_idx10,
 93 |             [
 94 |                 {SigMFFile.START_INDEX_KEY: 0, SigMFFile.LENGTH_INDEX_KEY: 16},
 95 |                 {SigMFFile.START_INDEX_KEY: 1},
 96 |             ],
 97 |         )
 98 | 
 99 |     def test__count_samples_from_annotation(self):
100 |         """Make sure sample count from annotations use correct end index"""
101 |         smf = SigMFFile(copy.deepcopy(TEST_METADATA))
102 |         smf.add_annotation(start_index=0, length=32)
103 |         smf.add_annotation(start_index=4, length=4)
104 |         sample_count = smf._count_samples()
105 |         self.assertEqual(sample_count, 32)
106 | 
107 |     def test_set_data_file_without_annotations(self):
108 |         """
109 |         Make sure setting data_file with no annotations registered does not
110 |         raise any errors
111 |         """
112 |         smf = SigMFFile(copy.deepcopy(TEST_METADATA))
113 |         smf._metadata[SigMFFile.ANNOTATION_KEY].clear()
114 |         with tempfile.TemporaryDirectory() as tmpdir:
115 |             temp_path_data = Path(tmpdir) / "datafile"
116 |             TEST_FLOAT32_DATA.tofile(temp_path_data)
117 |             smf.set_data_file(temp_path_data)
118 |             samples = smf.read_samples()
119 |             self.assertTrue(len(samples) == 16)
120 | 
121 |     def test_set_data_file_with_annotations(self):
122 |         """
123 |         Make sure setting data_file with annotations registered use sample
124 |         count from data_file and issue a warning if annotations have end
125 |         indices bigger than file end index
126 |         """
127 |         smf = SigMFFile(copy.deepcopy(TEST_METADATA))
128 |         smf.add_annotation(start_index=0, length=32)
129 |         with tempfile.TemporaryDirectory() as tmpdir:
130 |             temp_path_data = Path(tmpdir) / "datafile"
131 |             TEST_FLOAT32_DATA.tofile(temp_path_data)
132 |             with self.assertWarns(Warning):
133 |                 # Issues warning since file ends before the final annotatio
134 |                 smf.set_data_file(temp_path_data)
135 |                 samples = smf.read_samples()
136 |                 self.assertTrue(len(samples) == 16)
137 | 
138 | 
139 | class TestMultichannel(unittest.TestCase):
140 |     def setUp(self):
141 |         # in order to check shapes we need some positive number of samples to work with
142 |         # number of samples should be lowest common factor of num_channels
143 |         self.raw_count = 16
144 |         self.lut = {
145 |             "i8": np.int8,
146 |             "u8": np.uint8,
147 |             "i16": np.int16,
148 |             "u16": np.uint16,
149 |             "u32": np.uint32,
150 |             "i32": np.int32,
151 |             "f32": np.float32,
152 |             "f64": np.float64,
153 |         }
154 |         self.temp_file = tempfile.NamedTemporaryFile()
155 |         self.temp_path = Path(self.temp_file.name)
156 | 
157 |     def tearDown(self):
158 |         """clean-up temporary files"""
159 |         self.temp_file.close()
160 | 
161 |     def test_multichannel_types(self):
162 |         """check that real & complex for all types is reading multiple channels correctly"""
163 |         for key, dtype in self.lut.items():
164 |             # for each type of storage
165 |             np.arange(self.raw_count, dtype=dtype).tofile(self.temp_path)
166 |             for num_channels in [1, 4, 8]:
167 |                 # for single or 8 channel
168 |                 for complex_prefix in ["r", "c"]:
169 |                     # for real or complex
170 |                     check_count = self.raw_count
171 |                     temp_signal = SigMFFile(
172 |                         data_file=self.temp_path,
173 |                         global_info={
174 |                             SigMFFile.DATATYPE_KEY: f"{complex_prefix}{key}_le",
175 |                             SigMFFile.NUM_CHANNELS_KEY: num_channels,
176 |                         },
177 |                     )
178 |                     temp_samples = temp_signal.read_samples()
179 | 
180 |                     if complex_prefix == "c":
181 |                         # complex data will be half as long
182 |                         check_count //= 2
183 |                         self.assertTrue(np.all(np.iscomplex(temp_samples)))
184 |                     if num_channels != 1:
185 |                         self.assertEqual(temp_samples.ndim, 2)
186 |                     check_count //= num_channels
187 | 
188 |                     self.assertEqual(check_count, temp_signal._count_samples())
189 | 
190 |     def test_multichannel_seek(self):
191 |         """ensure that seeking is working correctly with multichannel files"""
192 |         # write some dummy data and read back
193 |         np.arange(18, dtype=np.uint16).tofile(self.temp_path)
194 |         temp_signal = SigMFFile(
195 |             data_file=self.temp_path,
196 |             global_info={
197 |                 SigMFFile.DATATYPE_KEY: "cu16_le",
198 |                 SigMFFile.NUM_CHANNELS_KEY: 3,
199 |             },
200 |         )
201 |         # read after the first sample
202 |         temp_samples = temp_signal.read_samples(start_index=1, autoscale=False)
203 |         # ensure samples are in the order we expect
204 |         self.assertTrue(np.all(temp_samples[:, 0] == np.array([6 + 7j, 12 + 13j])))
205 | 
206 | 
207 | def test_key_validity():
208 |     """ensure the keys in test metadata are valid"""
209 |     for top_key, top_val in TEST_METADATA.items():
210 |         if isinstance(top_val, dict):
211 |             for core_key in top_val.keys():
212 |                 assert core_key in vars(SigMFFile)[f"VALID_{top_key.upper()}_KEYS"]
213 |         elif isinstance(top_val, list):
214 |             # annotations are in a list
215 |             for annot in top_val:
216 |                 for core_key in annot.keys():
217 |                     assert core_key in SigMFFile.VALID_ANNOTATION_KEYS
218 |         else:
219 |             raise ValueError("expected list or dict")
220 | 
221 | 
222 | def test_ordered_metadata():
223 |     """check to make sure the metadata is sorted as expected"""
224 |     sigf = SigMFFile()
225 |     top_sort_order = ["global", "captures", "annotations"]
226 |     for kdx, key in enumerate(sigf.ordered_metadata()):
227 |         assert kdx == top_sort_order.index(key)
228 | 
229 | 
230 | class TestCaptures(unittest.TestCase):
231 |     """ensure capture access tools work properly"""
232 | 
233 |     def setUp(self) -> None:
234 |         """ensure tests have a valid SigMF object to work with"""
235 |         self.temp_dir = Path(tempfile.mkdtemp())
236 |         self.temp_path_data = self.temp_dir / "trash.sigmf-data"
237 |         self.temp_path_meta = self.temp_dir / "trash.sigmf-meta"
238 | 
239 |     def tearDown(self) -> None:
240 |         """remove temporary dir"""
241 |         shutil.rmtree(self.temp_dir)
242 | 
243 |     def prepare(self, data: list, meta: dict, dtype: type) -> SigMFFile:
244 |         """write some data and metadata to temporary paths"""
245 |         np.array(data, dtype=dtype).tofile(self.temp_path_data)
246 |         with open(self.temp_path_meta, "w") as handle:
247 |             json.dump(meta, handle)
248 |         meta = sigmffile.fromfile(self.temp_path_meta, skip_checksum=True)
249 |         return meta
250 | 
251 |     def test_000(self) -> None:
252 |         """compliant two-capture recording"""
253 |         meta = self.prepare(TEST_U8_DATA0, TEST_U8_META0, np.uint8)
254 |         self.assertEqual(256, meta._count_samples())
255 |         self.assertTrue(meta._is_conforming_dataset())
256 |         self.assertTrue((0, 0), meta.get_capture_byte_boundarys(0))
257 |         self.assertTrue((0, 256), meta.get_capture_byte_boundarys(1))
258 |         self.assertTrue(np.array_equal(TEST_U8_DATA0, meta.read_samples(autoscale=False)))
259 |         self.assertTrue(np.array_equal(np.array([]), meta.read_samples_in_capture(0)))
260 |         self.assertTrue(np.array_equal(TEST_U8_DATA0, meta.read_samples_in_capture(1, autoscale=False)))
261 | 
262 |     def test_001(self) -> None:
263 |         """two capture recording with header_bytes and trailing_bytes set"""
264 |         meta = self.prepare(TEST_U8_DATA1, TEST_U8_META1, np.uint8)
265 |         self.assertEqual(192, meta._count_samples())
266 |         self.assertFalse(meta._is_conforming_dataset())
267 |         self.assertTrue((32, 160), meta.get_capture_byte_boundarys(0))
268 |         self.assertTrue((160, 224), meta.get_capture_byte_boundarys(1))
269 |         self.assertTrue(np.array_equal(np.arange(128), meta.read_samples_in_capture(0, autoscale=False)))
270 |         self.assertTrue(np.array_equal(np.arange(128, 192), meta.read_samples_in_capture(1, autoscale=False)))
271 | 
272 |     def test_002(self) -> None:
273 |         """two capture recording with multiple header_bytes set"""
274 |         meta = self.prepare(TEST_U8_DATA2, TEST_U8_META2, np.uint8)
275 |         self.assertEqual(192, meta._count_samples())
276 |         self.assertFalse(meta._is_conforming_dataset())
277 |         self.assertTrue((32, 160), meta.get_capture_byte_boundarys(0))
278 |         self.assertTrue((176, 240), meta.get_capture_byte_boundarys(1))
279 |         self.assertTrue(np.array_equal(np.arange(128), meta.read_samples_in_capture(0, autoscale=False)))
280 |         self.assertTrue(np.array_equal(np.arange(128, 192), meta.read_samples_in_capture(1, autoscale=False)))
281 | 
282 |     def test_003(self) -> None:
283 |         """three capture recording with multiple header_bytes set"""
284 |         meta = self.prepare(TEST_U8_DATA3, TEST_U8_META3, np.uint8)
285 |         self.assertEqual(192, meta._count_samples())
286 |         self.assertFalse(meta._is_conforming_dataset())
287 |         self.assertTrue((32, 64), meta.get_capture_byte_boundarys(0))
288 |         self.assertTrue((64, 160), meta.get_capture_byte_boundarys(1))
289 |         self.assertTrue((192, 256), meta.get_capture_byte_boundarys(2))
290 |         self.assertTrue(np.array_equal(np.arange(32), meta.read_samples_in_capture(0, autoscale=False)))
291 |         self.assertTrue(np.array_equal(np.arange(32, 128), meta.read_samples_in_capture(1, autoscale=False)))
292 |         self.assertTrue(np.array_equal(np.arange(128, 192), meta.read_samples_in_capture(2, autoscale=False)))
293 | 
294 |     def test_004(self) -> None:
295 |         """two channel version of 000"""
296 |         meta = self.prepare(TEST_U8_DATA4, TEST_U8_META4, np.uint8)
297 |         self.assertEqual(96, meta._count_samples())
298 |         self.assertFalse(meta._is_conforming_dataset())
299 |         self.assertTrue((32, 160), meta.get_capture_byte_boundarys(0))
300 |         self.assertTrue((160, 224), meta.get_capture_byte_boundarys(1))
301 |         self.assertTrue(
302 |             np.array_equal(np.arange(64).repeat(2).reshape(-1, 2), meta.read_samples_in_capture(0, autoscale=False))
303 |         )
304 |         self.assertTrue(
305 |             np.array_equal(np.arange(64, 96).repeat(2).reshape(-1, 2), meta.read_samples_in_capture(1, autoscale=False))
306 |         )
307 | 
308 |     def test_slicing_ru8(self) -> None:
309 |         """slice real uint8"""
310 |         meta = self.prepare(TEST_U8_DATA0, TEST_U8_META0, np.uint8)
311 |         self.assertTrue(np.array_equal(meta[:], TEST_U8_DATA0))
312 |         self.assertTrue(np.array_equal(meta[6], TEST_U8_DATA0[6]))
313 |         self.assertTrue(np.array_equal(meta[1:-1], TEST_U8_DATA0[1:-1]))
314 | 
315 |     def test_slicing_rf32(self) -> None:
316 |         """slice real float32"""
317 |         meta = self.prepare(TEST_FLOAT32_DATA, TEST_METADATA, np.float32)
318 |         self.assertTrue(np.array_equal(meta[:], TEST_FLOAT32_DATA))
319 |         self.assertTrue(np.array_equal(meta[9], TEST_FLOAT32_DATA[9]))
320 | 
321 |     def test_slicing_multiple_channels(self) -> None:
322 |         """slice multiple channels"""
323 |         meta = self.prepare(TEST_U8_DATA4, TEST_U8_META4, np.uint8)
324 |         channelized = np.array(TEST_U8_DATA4).reshape((-1, 2))
325 |         self.assertTrue(np.array_equal(meta[:][:], channelized))
326 |         self.assertTrue(np.array_equal(meta[10:20, 0], meta.read_samples(autoscale=False)[10:20, 0]))
327 |         self.assertTrue(np.array_equal(meta[0], channelized[0]))
328 |         self.assertTrue(np.array_equal(meta[1, :], channelized[1]))
329 | 
330 | 
331 | def simulate_capture(sigmf_md, n, capture_len):
332 |     start_index = capture_len * n
333 | 
334 |     capture_md = {"core:datetime": utils.get_sigmf_iso8601_datetime_now()}
335 | 
336 |     sigmf_md.add_capture(start_index=start_index, metadata=capture_md)
337 | 
338 |     annotation_md = {
339 |         "core:latitude": 40.0 + 0.0001 * n,
340 |         "core:longitude": -105.0 + 0.0001 * n,
341 |     }
342 | 
343 |     sigmf_md.add_annotation(start_index=start_index, length=capture_len, metadata=annotation_md)
344 | 
345 | 
346 | def test_default_constructor():
347 |     SigMFFile()
348 | 
349 | 
350 | def test_set_non_required_global_field():
351 |     sigf = SigMFFile()
352 |     sigf.set_global_field("this_is:not_in_the_schema", None)
353 | 
354 | 
355 | def test_add_capture():
356 |     sigf = SigMFFile()
357 |     sigf.add_capture(start_index=0, metadata={})
358 | 
359 | 
360 | def test_add_annotation():
361 |     sigf = SigMFFile()
362 |     sigf.add_capture(start_index=0)
363 |     meta = {"latitude": 40.0, "longitude": -105.0}
364 |     sigf.add_annotation(start_index=0, length=128, metadata=meta)
365 | 
366 | 
367 | def test_fromarchive(test_sigmffile):
368 |     with tempfile.NamedTemporaryFile(suffix=".sigmf") as temp_file:
369 |         archive_path = test_sigmffile.archive(name=temp_file.name)
370 |         result = sigmffile.fromarchive(archive_path=archive_path)
371 |         assert result._metadata == test_sigmffile._metadata == TEST_METADATA
372 | 
373 | 
374 | def test_add_multiple_captures_and_annotations():
375 |     sigf = SigMFFile()
376 |     for idx in range(3):
377 |         simulate_capture(sigf, idx, 1024)
378 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | 
 7 | """Tests for Utilities"""
 8 | 
 9 | from datetime import datetime, timezone
10 | 
11 | import pytest
12 | 
13 | from sigmf import utils
14 | 
15 | 
16 | # fmt: off
17 | @pytest.mark.parametrize("time_str, expected", [
18 |     ("1955-07-04T05:15:00Z",                datetime(year=1955, month=7,  day=4, hour=5,  minute=15, second=00, microsecond=0, tzinfo=timezone.utc)),
19 |     ("2956-08-05T06:15:12Z",                datetime(year=2956, month=8,  day=5, hour=6,  minute=15, second=12, microsecond=0, tzinfo=timezone.utc)),
20 |     ("3957-09-06T07:15:12.345Z",            datetime(year=3957, month=9,  day=6, hour=7,  minute=15, second=12, microsecond=345000, tzinfo=timezone.utc)),
21 |     ("4958-10-07T08:15:12.0345Z",           datetime(year=4958, month=10, day=7, hour=8,  minute=15, second=12, microsecond=34500, tzinfo=timezone.utc)),
22 |     ("5959-11-08T09:15:12.000000Z",         datetime(year=5959, month=11, day=8, hour=9,  minute=15, second=12, microsecond=0, tzinfo=timezone.utc)),
23 |     ("6960-12-09T10:15:12.123456789123Z",   datetime(year=6960, month=12, day=9, hour=10, minute=15, second=12, microsecond=123456, tzinfo=timezone.utc)),
24 | ])
25 | # fmt: on
26 | def test_parse_simple_iso8601(time_str: str, expected: datetime) -> None:
27 |     """Ensure various times are represented as expected"""
28 |     date_struct = utils.parse_iso8601_datetime(time_str)
29 |     assert date_struct == expected
30 | 
31 | 
32 | def test_roundtrip_datetime() -> None:
33 |     """New string -> struct -> string is ok"""
34 |     now_str = utils.get_sigmf_iso8601_datetime_now()
35 |     now_struct = utils.parse_iso8601_datetime(now_str)
36 |     assert now_str == now_struct.strftime(utils.SIGMF_DATETIME_ISO8601_FMT)
37 | 


--------------------------------------------------------------------------------
/tests/test_validation.py:
--------------------------------------------------------------------------------
  1 | # Copyright: Multiple Authors
  2 | #
  3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
  4 | #
  5 | # SPDX-License-Identifier: LGPL-3.0-or-later
  6 | 
  7 | """Tests for Validator"""
  8 | 
  9 | import tempfile
 10 | import unittest
 11 | from pathlib import Path
 12 | 
 13 | from jsonschema.exceptions import ValidationError
 14 | 
 15 | import sigmf
 16 | from sigmf import SigMFFile
 17 | 
 18 | from .testdata import TEST_FLOAT32_DATA, TEST_METADATA
 19 | 
 20 | 
 21 | def test_valid_data():
 22 |     """ensure the default metadata is OK"""
 23 |     SigMFFile(TEST_METADATA).validate()
 24 | 
 25 | 
 26 | class CommandLineValidator(unittest.TestCase):
 27 |     """Check behavior of command-line parser"""
 28 | 
 29 |     def setUp(self):
 30 |         """Create a directory with some valid files"""
 31 |         self.tmp_dir = tempfile.TemporaryDirectory()
 32 |         self.tmp_path = tmp_path = Path(self.tmp_dir.name)
 33 |         junk_path = tmp_path / "junk"
 34 |         TEST_FLOAT32_DATA.tofile(junk_path)
 35 |         some_meta = SigMFFile(TEST_METADATA, data_file=junk_path)
 36 |         some_meta.tofile(tmp_path / "a")
 37 |         some_meta.tofile(tmp_path / "b")
 38 |         some_meta.tofile(tmp_path / "c", toarchive=True)
 39 | 
 40 |     def tearDown(self):
 41 |         """cleanup"""
 42 |         self.tmp_dir.cleanup()
 43 | 
 44 |     def test_normal(self):
 45 |         """able to parse archives and non-archives"""
 46 |         args = (str(self.tmp_path / "*.sigmf*"),)
 47 |         sigmf.validate.main(args)
 48 | 
 49 |     def test_normal_skip(self):
 50 |         """able to skip checksum"""
 51 |         args = (str(self.tmp_path / "*.sigmf*"), "--skip-checksum")
 52 |         sigmf.validate.main(args)
 53 | 
 54 |     def test_partial(self):
 55 |         """checks some but not all files"""
 56 |         args = (str(self.tmp_path / "*"),)
 57 |         with self.assertRaises(SystemExit):
 58 |             sigmf.validate.main(args)
 59 | 
 60 |     def test_missing(self):
 61 |         """exit with rc=1 when run on empty"""
 62 |         with self.assertRaises(SystemExit) as cm:
 63 |             sigmf.validate.main(tuple())
 64 |         self.assertEqual((1,), cm.exception.args)
 65 | 
 66 |     def test_version(self):
 67 |         """exit with rc=0 after printing version"""
 68 |         args = ("--version",)
 69 |         with self.assertRaises(SystemExit) as cm:
 70 |             sigmf.validate.main(args)
 71 |         self.assertEqual((0,), cm.exception.args)
 72 | 
 73 | 
 74 | class FailingCases(unittest.TestCase):
 75 |     """Cases where the validator should raise an exception."""
 76 | 
 77 |     def setUp(self):
 78 |         self.metadata = dict(TEST_METADATA)
 79 | 
 80 |     def test_no_version(self):
 81 |         """core:version must be present"""
 82 |         del self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.VERSION_KEY]
 83 |         with self.assertRaises(ValidationError):
 84 |             SigMFFile(self.metadata).validate()
 85 | 
 86 |     def test_extra_top_level_key(self):
 87 |         """no extra keys allowed on the top level"""
 88 |         self.metadata["extra"] = 0
 89 |         with self.assertRaises(ValidationError):
 90 |             SigMFFile(self.metadata).validate()
 91 | 
 92 |     def test_invalid_type(self):
 93 |         """license key must be string"""
 94 |         self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.LICENSE_KEY] = 1
 95 |         with self.assertRaises(ValidationError):
 96 |             SigMFFile(self.metadata).validate()
 97 | 
 98 |     def test_invalid_capture_order(self):
 99 |         """metadata must have captures in order"""
100 |         self.metadata[SigMFFile.CAPTURE_KEY] = [{SigMFFile.START_INDEX_KEY: 10}, {SigMFFile.START_INDEX_KEY: 9}]
101 |         with self.assertRaises(ValidationError):
102 |             SigMFFile(self.metadata).validate()
103 | 
104 |     def test_invalid_annotation_order(self):
105 |         """metadata must have annotations in order"""
106 |         self.metadata[SigMFFile.ANNOTATION_KEY] = [
107 |             {
108 |                 SigMFFile.START_INDEX_KEY: 2,
109 |                 SigMFFile.LENGTH_INDEX_KEY: 120000,
110 |             },
111 |             {
112 |                 SigMFFile.START_INDEX_KEY: 1,
113 |                 SigMFFile.LENGTH_INDEX_KEY: 120000,
114 |             },
115 |         ]
116 |         with self.assertRaises(ValidationError):
117 |             SigMFFile(self.metadata).validate()
118 | 
119 |     def test_annotation_without_sample_count(self):
120 |         """annotation without length should be accepted"""
121 |         self.metadata[SigMFFile.ANNOTATION_KEY] = [{SigMFFile.START_INDEX_KEY: 2}]
122 |         SigMFFile(self.metadata).validate()
123 | 
124 |     def test_invalid_hash(self):
125 |         """wrong hash raises error on creation"""
126 |         with tempfile.NamedTemporaryFile() as temp_file:
127 |             TEST_FLOAT32_DATA.tofile(temp_file.name)
128 |             self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.HASH_KEY] = "derp"
129 |             with self.assertRaises(sigmf.error.SigMFFileError):
130 |                 SigMFFile(metadata=self.metadata, data_file=temp_file.name)
131 | 


--------------------------------------------------------------------------------
/tests/testdata.py:
--------------------------------------------------------------------------------
 1 | # Copyright: Multiple Authors
 2 | #
 3 | # This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
 4 | #
 5 | # SPDX-License-Identifier: LGPL-3.0-or-later
 6 | 
 7 | """Shared test data for tests."""
 8 | 
 9 | import numpy as np
10 | 
11 | from sigmf import SigMFFile, __specification__, __version__
12 | 
13 | TEST_FLOAT32_DATA = np.arange(16, dtype=np.float32)
14 | 
15 | TEST_METADATA = {
16 |     SigMFFile.ANNOTATION_KEY: [{SigMFFile.LENGTH_INDEX_KEY: 16, SigMFFile.START_INDEX_KEY: 0}],
17 |     SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}],
18 |     SigMFFile.GLOBAL_KEY: {
19 |         SigMFFile.DATATYPE_KEY: "rf32_le",
20 |         SigMFFile.HASH_KEY: "f4984219b318894fa7144519185d1ae81ea721c6113243a52b51e444512a39d74cf41a4cec3c5d000bd7277cc71232c04d7a946717497e18619bdbe94bfeadd6",
21 |         SigMFFile.NUM_CHANNELS_KEY: 1,
22 |         SigMFFile.VERSION_KEY: __specification__,
23 |     },
24 | }
25 | 
26 | # Data0 is a test of a compliant two capture recording
27 | TEST_U8_DATA0 = list(range(256))
28 | TEST_U8_META0 = {
29 |     SigMFFile.ANNOTATION_KEY: [],
30 |     SigMFFile.CAPTURE_KEY: [
31 |         {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 0},
32 |         {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 0},
33 |     ],  # very strange..but technically legal?
34 |     SigMFFile.GLOBAL_KEY: {SigMFFile.DATATYPE_KEY: "ru8", SigMFFile.TRAILING_BYTES_KEY: 0},
35 | }
36 | # Data1 is a test of a two capture recording with header_bytes and trailing_bytes set
37 | TEST_U8_DATA1 = [0xFE] * 32 + list(range(192)) + [0xFF] * 32
38 | TEST_U8_META1 = {
39 |     SigMFFile.ANNOTATION_KEY: [],
40 |     SigMFFile.CAPTURE_KEY: [
41 |         {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 32},
42 |         {SigMFFile.START_INDEX_KEY: 128},
43 |     ],
44 |     SigMFFile.GLOBAL_KEY: {SigMFFile.DATATYPE_KEY: "ru8", SigMFFile.TRAILING_BYTES_KEY: 32},
45 | }
46 | # Data2 is a test of a two capture recording with multiple header_bytes set
47 | TEST_U8_DATA2 = [0xFE] * 32 + list(range(128)) + [0xFE] * 16 + list(range(128, 192)) + [0xFF] * 16
48 | TEST_U8_META2 = {
49 |     SigMFFile.ANNOTATION_KEY: [],
50 |     SigMFFile.CAPTURE_KEY: [
51 |         {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 32},
52 |         {SigMFFile.START_INDEX_KEY: 128, SigMFFile.HEADER_BYTES_KEY: 16},
53 |     ],
54 |     SigMFFile.GLOBAL_KEY: {SigMFFile.DATATYPE_KEY: "ru8", SigMFFile.TRAILING_BYTES_KEY: 16},
55 | }
56 | # Data3 is a test of a three capture recording with multiple header_bytes set
57 | TEST_U8_DATA3 = [0xFE] * 32 + list(range(128)) + [0xFE] * 32 + list(range(128, 192))
58 | TEST_U8_META3 = {
59 |     SigMFFile.ANNOTATION_KEY: [],
60 |     SigMFFile.CAPTURE_KEY: [
61 |         {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 32},
62 |         {SigMFFile.START_INDEX_KEY: 32},
63 |         {SigMFFile.START_INDEX_KEY: 128, SigMFFile.HEADER_BYTES_KEY: 32},
64 |     ],
65 |     SigMFFile.GLOBAL_KEY: {SigMFFile.DATATYPE_KEY: "ru8"},
66 | }
67 | # Data4 is a two channel version of Data0
68 | TEST_U8_DATA4 = [0xFE] * 32 + [y for y in list(range(96)) for i in [0, 1]] + [0xFF] * 32
69 | TEST_U8_META4 = {
70 |     SigMFFile.ANNOTATION_KEY: [],
71 |     SigMFFile.CAPTURE_KEY: [
72 |         {SigMFFile.START_INDEX_KEY: 0, SigMFFile.HEADER_BYTES_KEY: 32},
73 |         {SigMFFile.START_INDEX_KEY: 64},
74 |     ],
75 |     SigMFFile.GLOBAL_KEY: {
76 |         SigMFFile.DATATYPE_KEY: "ru8",
77 |         SigMFFile.TRAILING_BYTES_KEY: 32,
78 |         SigMFFile.NUM_CHANNELS_KEY: 2,
79 |     },
80 | }
81 | 


--------------------------------------------------------------------------------