├── .github └── workflows │ └── python-tests.yml ├── .gitignore ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.rst ├── bin ├── batlogger2guano.py ├── d500x2guano.py ├── disperse.py ├── guano_dump.py ├── guano_edit.py ├── sb2guano.py └── wamd2guano.py ├── docs ├── Makefile ├── api.rst ├── changelog.rst ├── conf.py ├── examples.rst ├── index.rst ├── install.rst ├── license.rst └── utils.rst ├── guano.py ├── requirements_dev.txt ├── setup.py └── tests ├── test_guano.py └── test_utils.py /.github/workflows/python-tests.yml: -------------------------------------------------------------------------------- 1 | name: Python Tests 2 | 3 | on: 4 | push: 5 | branches: [ master, dev ] 6 | pull_request: 7 | branches: [ master, dev ] 8 | 9 | jobs: 10 | test: 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | include: 15 | # Older Python versions 16 | - python-version: "3.6" 17 | os: ubuntu-20.04 18 | - python-version: "3.7" 19 | os: ubuntu-22.04 20 | 21 | # Newer Python versions on latest Ubuntu 22 | - python-version: "3.8" 23 | os: ubuntu-24.04 24 | - python-version: "3.9" 25 | os: ubuntu-24.04 26 | - python-version: "3.10" 27 | os: ubuntu-24.04 28 | - python-version: "3.11" 29 | os: ubuntu-24.04 30 | - python-version: "3.12" 31 | os: ubuntu-24.04 32 | - python-version: "pypy-3.9" 33 | os: ubuntu-24.04 34 | 35 | runs-on: ${{ matrix.os }} 36 | 37 | steps: 38 | - uses: actions/checkout@v3 39 | 40 | - name: Set up Python ${{ matrix.python-version }} 41 | uses: actions/setup-python@v4 42 | with: 43 | python-version: ${{ matrix.python-version }} 44 | 45 | - name: Install dependencies 46 | run: | 47 | python -m pip install --upgrade pip 48 | python -m pip install . 49 | 50 | - name: Test with unittest 51 | run: | 52 | python -m unittest discover -s tests 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.pyo 3 | *.pyc 4 | build 5 | dist 6 | GUANO_BACKUP 7 | docs/_build 8 | test_guano.wav 9 | .venv* 10 | .env* 11 | .idea 12 | .vscode 13 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c)2015-2017 Myotisoft LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE.txt 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON=python 2 | 3 | .PHONY: help clean test docs dist upload 4 | 5 | help: 6 | @echo 7 | @echo make targets for guano-py 8 | @echo 9 | @echo help ..... Print this helpful documentation 10 | @echo clean .... Clean up build artifacts 11 | @echo test ..... Run all project unit tests 12 | @echo docs ..... Build documentation 13 | @echo dist ..... Build distributable package 14 | @echo upload ... Build and upload distributable package to PyPI 15 | @echo 16 | @echo specify an explicit Python version like this: 17 | @echo " $$> make test PYTHON=python3" 18 | @echo 19 | 20 | clean: 21 | rm -rf *.pyo *.pyc *.egg-info bin/*.pyc dist __pycache__ 22 | 23 | test: 24 | $(PYTHON) -m unittest discover -s tests 25 | 26 | docs: 27 | cd docs && make html 28 | 29 | dist: 30 | $(PYTHON) setup.py sdist bdist_wheel --universal 31 | 32 | upload: dist 33 | twine upload dist/*.* 34 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | guano-py 2 | ======== 3 | 4 | This is the `Python`_ reference implementation for `GUANO`_, the “Grand 5 | Unified Acoustic Notation Ontology”, a metadata format for bat acoustics 6 | recordings. It includes a production-ready Python module with full 7 | support for reading and writing GUANO metadata, as well as several 8 | helpful commandline utilities. 9 | 10 | For more information about GUANO metadata itself, including the format 11 | specification, see the GUANO project homepage: http://guano-md.org 12 | 13 | Documentation for guano-py can be found at: http://guano-py.readthedocs.io 14 | 15 | 16 | Requirements 17 | ============ 18 | 19 | - Python 3.6+ 20 | 21 | 22 | Installation 23 | ============ 24 | 25 | Download and install magically from the Python Package Index:: 26 | 27 | $> pip install -U guano 28 | 29 | In addition to having the ``guano`` Python module available for use in 30 | your own software, you’ll also have a small collection of `useful 31 | scripts`_ to use. 32 | 33 | Alternately, you can check out the project from GitHub and install 34 | locally in developer mode to hack on it yourself:: 35 | 36 | $> git clone https://github.com/riggsd/guano-py.git 37 | $> cd guano-py 38 | $> python setup.py develop 39 | 40 | Master:|Master Tests| Dev:|Dev Tests| |Documentation Status| 41 | 42 | 43 | API Usage 44 | ========= 45 | 46 | .. code:: python 47 | 48 | from guano import GuanoFile 49 | 50 | # load a .WAV file with (or without) GUANO metadata 51 | g = GuanoFile('test.wav') 52 | 53 | # get and set metadata values like a Python dict 54 | print g['GUANO|Version'] 55 | >>> 1.0 56 | 57 | print g['Make'], g['Model'] 58 | >>> 'Pettersson', 'D500X' 59 | 60 | g['Species Manual ID'] = 'Myso' 61 | 62 | g['Note'] = 'I love GUANO!' 63 | 64 | # namespaced fields can be specified separately or pipe-delimited 65 | print g['PET', 'Gain'], g['PET|Gain'] 66 | >>> 80, 80 67 | 68 | g['SB|Consensus'] = 'Epfu' 69 | g['SB', 'Consensus'] = 'Epfu' 70 | 71 | # print all the metadata values 72 | for key, value in g.items(): 73 | print '%s: %s' % (key, value) 74 | 75 | # write the updated .WAV file back to disk 76 | g.write() 77 | 78 | # have some GUANO metadata from some other source? load it from a string 79 | g = GuanoFile.from_string('GUANO|Version:1.0\nTags:voucher,hand-release') 80 | 81 | # write GUANO metadata somewhere else, say an Anabat file or text file 82 | with open('sidecar_file.guano', 'wb') as outfile: 83 | outfile.write( g.serialize() ) 84 | 85 | # teach the parser to recognize custom metadata fields 86 | GuanoFile.register('Anabat', ['Humidity', 'Temperature'], float) 87 | GuanoFile.register('SB', 'Thumbnail Image', guano.base64decode) 88 | 89 | 90 | .. _Python: http://python.org 91 | .. _GUANO: http://guano-md.org 92 | .. _useful scripts: bin/ 93 | 94 | .. |Master Tests| image:: https://github.com/riggsd/guano-py/actions/workflows/python-tests.yml/badge.svg?branch=master 95 | :target: https://github.com/riggsd/guano-py/actions/workflows/python-tests.yml?query=branch%3Amaster 96 | .. |Dev Tests| image:: https://github.com/riggsd/guano-py/actions/workflows/python-tests.yml/badge.svg?branch=dev 97 | :target: https://github.com/riggsd/guano-py/actions/workflows/python-tests.yml?query=branch%3Adev 98 | .. |Documentation Status| image:: https://readthedocs.org/projects/guano-py/badge/?version=latest 99 | :target: http://guano-py.readthedocs.io/en/latest/?badge=latest 100 | -------------------------------------------------------------------------------- /bin/batlogger2guano.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Convert files from the Elekon BatLogger to use GUANO metadata instead. 4 | 5 | usage:: 6 | 7 | $> batlogger2guano.py WAVFILE... 8 | """ 9 | 10 | from __future__ import print_function 11 | 12 | import sys, os, os.path 13 | from datetime import datetime 14 | from xml.etree import ElementTree 15 | 16 | from guano import GuanoFile 17 | 18 | 19 | def get(xml, path, coerce=None, default=None): 20 | """Extract a value from an ElementTree node""" 21 | node = xml.find(path) 22 | if node is None: 23 | return default 24 | if coerce is None: 25 | return node.text 26 | else: 27 | return coerce(node.text) 28 | 29 | 30 | def batlogger2guano(fname): 31 | """Convert an Elekon BatLogger .WAV with sidecar .XML to GUANO metadata""" 32 | xmlfname = os.path.splitext(fname)[0] + '.xml' 33 | if not os.path.exists(xmlfname): 34 | raise ValueError('Unable to find XML metadata file for %s' % fname) 35 | g = GuanoFile(fname) 36 | with open(xmlfname, 'rt') as f: 37 | xml = ElementTree.parse(f) 38 | 39 | g['Timestamp'] = get(xml, 'DateTime', lambda x: datetime.strptime(x, '%d.%m.%Y %H:%M:%S')) 40 | g['Firmware Version'] = get(xml, 'Firmware') 41 | g['Make'] = 'Elekon' 42 | g['Model'] = 'BatLogger' 43 | g['Serial'] = get(xml, 'SN') 44 | g['Samplerate'] = get(xml, 'Samplerate', lambda x: int(x.split()[0])) 45 | g['Length'] = get(xml, 'Duration', lambda x: float(x.split()[0])) 46 | g['Original Filename'] = get(xml, 'Filename') 47 | g['Temperature Ext'] = get(xml, 'Temperature', lambda x: float(x.split()[0])) 48 | g['Loc Position'] = get(xml, 'GPS/Position', lambda x: tuple(map(float, x.split()))) 49 | g['Loc Elevation'] = get(xml, 'GPS/Altitude', lambda x: float(x.split()[0])) 50 | 51 | g['Elekon|BattVoltage'] = get(xml, 'BattVoltage') 52 | for node in xml.find('Trigger'): 53 | g['Elekon|Trigger|%s' % node.tag] = node.text 54 | for node in xml.find('GPS'): 55 | g['Elekon|GPS|%s' % node.tag] = node.text 56 | 57 | # for k, v in g.items(): 58 | # print('%s:\t%s' % (k, v)) 59 | 60 | print(g.to_string()) 61 | g.write() 62 | os.remove(xmlfname) 63 | 64 | return g 65 | 66 | 67 | if __name__ == '__main__': 68 | from glob import glob 69 | 70 | if len(sys.argv) < 2: 71 | print('usage: %s FILE...' % os.path.basename(sys.argv[0]), file=sys.stderr) 72 | sys.exit(2) 73 | 74 | if os.name == 'nt' and '*' in sys.argv[1]: 75 | fnames = glob(sys.argv[1]) 76 | else: 77 | fnames = sys.argv[1:] 78 | 79 | for fname in fnames: 80 | print(fname, '...') 81 | batlogger2guano(fname) 82 | print() 83 | -------------------------------------------------------------------------------- /bin/d500x2guano.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Convert files with raw D500X metadata to use GUANO metadata instead. 4 | 5 | usage:: 6 | 7 | $> d500x2guano.py WAVFILE... 8 | """ 9 | 10 | from __future__ import print_function 11 | 12 | import sys 13 | import os 14 | import os.path 15 | import mmap 16 | import wave 17 | import stat 18 | import struct 19 | from contextlib import closing 20 | from datetime import datetime 21 | from pprint import pprint 22 | 23 | from guano import GuanoFile 24 | 25 | 26 | D500X_DATA_SKIP_BYTES = 0x3D4 27 | 28 | 29 | def dms2decimal(dms_str): 30 | """Convert D500X Degrees-Minuts-Seconds to Decimal Degrees""" 31 | d, m, s, direction = dms_str.split() 32 | sign = -1 if direction in ('S', 'W') else 1 33 | return sign * (int(d) + float(m) / 60 + float(s) / 3600) 34 | 35 | 36 | def unlock(fname): 37 | """Enable filesystem modification of a locked file""" 38 | if os.name == 'nt': 39 | os.chmod(fname, stat.S_IWRITE) 40 | elif os.name == 'posix' and hasattr(os, 'chflags'): 41 | os.chflags(fname, os.stat(fname).st_flags & ~stat.UF_IMMUTABLE) 42 | 43 | 44 | def extract_d500x_metadata(fname): 45 | """Extract raw D500X metadata as a dict, or None if file has none""" 46 | md = {} 47 | with open(fname, 'rb') as infile: 48 | with closing(mmap.mmap(infile.fileno(), 0, access=mmap.ACCESS_READ)) as mmfile: 49 | if mmfile[0xF0:0xF0+5] != b'D500X': 50 | print('No D500X metadata found in file: ' + fname, file=sys.stderr) 51 | return None 52 | 53 | md['Samplerate'] = struct.unpack_from('< i', mmfile, 0x18)[0] 54 | md['File Name'] = mmfile[0xD0:0xD0+10].decode('latin-1') 55 | md['File Time'] = mmfile[0xE0:0xE0+15].decode('latin-1') 56 | md['FW Version'] = mmfile[0xF0:0xF0+32].strip(b'\0 ').decode('latin-1') 57 | profile_settings_1 = mmfile[0x120:0x120+20].strip(b'\0 ').decode('latin-1') 58 | profile_settings_2 = mmfile[0x138:0x138+16].strip(b'\0 ').decode('latin-1') 59 | for tok in (profile_settings_1 + ' ' + profile_settings_2).split(): 60 | k, v = tok.split('=', 1) 61 | md['Profile ' + k] = v 62 | # TODO: 0x150 - 0x157 ? 63 | md['Profile Name'] = mmfile[0x158:0x158+8].strip(b'\0\xFF ').decode('latin-1') 64 | 65 | # block from 0x200 - 0x400 is a big '\r\n' delimited string. 2.0+ firmware only 66 | extra_md_block = mmfile[0x200:0x400].strip().decode('latin-1') 67 | if extra_md_block: 68 | for line in extra_md_block.splitlines(): 69 | if not line.strip('\0 '): 70 | continue 71 | k, v = line.split(':', 1) 72 | md[k] = v.strip('\0 ') 73 | 74 | md['File Time'] = datetime.strptime(md['File Time'], '%y%m%d %H:%M:%S') 75 | 76 | with closing(wave.open(fname)) as wavfile: 77 | frame_count = wavfile.getnframes() - (D500X_DATA_SKIP_BYTES / wavfile.getsampwidth()) 78 | duration_s = frame_count / float(wavfile.getframerate()) 79 | md['Length'] = round(duration_s, 2) 80 | 81 | return md 82 | 83 | 84 | def d500x2guano(fname): 85 | """Convert a file with raw D500X metadata to use GUANO metadata instead""" 86 | print('\n', fname) 87 | md = extract_d500x_metadata(fname) 88 | if not md: 89 | print('Skipping non-D500X file: ' + fname, file=sys.stderr) 90 | return False 91 | #pprint(md) 92 | 93 | gfile = GuanoFile(fname) 94 | gfile['GUANO|Version'] = 1.0 95 | 96 | gfile['Make'] = 'Pettersson' 97 | gfile['Model'] = 'D500X' 98 | gfile['Timestamp'] = md.pop('File Time') 99 | gfile['Original Filename'] = md.pop('File Name') 100 | gfile['Samplerate'] = md.pop('Samplerate') 101 | gfile['Length'] = md.pop('Length') 102 | 103 | if md.get('Profile HP', None) == 'Y': 104 | gfile['Filter HP'] = 20 105 | 106 | lat, lon = md.pop('LAT', None), md.pop('LON', None) 107 | if lat and lon: 108 | gfile['Loc Position'] = dms2decimal(lat), dms2decimal(lon) 109 | 110 | for k, v in md.items(): 111 | gfile['PET', k] = v 112 | 113 | print(gfile.to_string()) 114 | 115 | # throw out the Pettersson metadata bytes from 'data' chunk 116 | gfile.wav_data = gfile.wav_data[D500X_DATA_SKIP_BYTES:] 117 | 118 | unlock(fname) # D500X "locks" files as unwriteable, we must unlock before we can modify 119 | gfile.write() 120 | 121 | 122 | if __name__ == '__main__': 123 | from glob import glob 124 | 125 | if len(sys.argv) < 2: 126 | print('usage: %s FILE...' % os.path.basename(sys.argv[0]), file=sys.stderr) 127 | sys.exit(2) 128 | 129 | if os.name == 'nt' and '*' in sys.argv[1]: 130 | fnames = glob(sys.argv[1]) 131 | else: 132 | fnames = sys.argv[1:] 133 | 134 | for fname in fnames: 135 | d500x2guano(fname) 136 | -------------------------------------------------------------------------------- /bin/disperse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | "Disperse" files by moving them into folders according to their species label. 4 | 5 | The `Species Manual ID` field will be preferred over `Species Auto ID`. 6 | 7 | usage:: 8 | 9 | $> disperse.py [--copy] ROOTDIR 10 | """ 11 | 12 | # TODO: distinguish between Manual / Auto ID; un-disperse; recursive 13 | 14 | from __future__ import print_function 15 | 16 | import os 17 | import os.path 18 | from glob import glob 19 | import shutil 20 | 21 | import guano 22 | 23 | 24 | def get_species(fname): 25 | """Get the species label from a GUANO file, or `None`. Prefer `Manual ID` over 'Auto ID'.""" 26 | try: 27 | f = guano.GuanoFile(fname) 28 | except ValueError: 29 | return None 30 | 31 | species = f.get('Species Manual ID', None) 32 | return species if species else f.get('Species Auto ID', None) 33 | 34 | 35 | def disperse(rootdir, copy=False, destination_root=None): 36 | """ 37 | Disperse GUANO .wav files into folders by their species label. 38 | 39 | :param str rootdir: the root directory where we search for GUANO files 40 | :param bool copy: whether we should *copy* or *move* (default) files 41 | :param str destination_root: optional destination directory where files are output 42 | """ 43 | for fname in glob(os.path.join(rootdir, '*.wav')): 44 | species = get_species(fname) 45 | if not species: 46 | print('Skipping file without species %s .' % fname) 47 | continue 48 | destination = os.path.join(destination_root or rootdir, species) 49 | if not os.path.isdir(destination): 50 | print('Creating directory %s ...' % destination) 51 | os.mkdir(destination) 52 | new_fname = os.path.join(destination, os.path.basename(fname)) 53 | print('%sing %s -> %s ...' % ('copy' if copy else 'mov', fname, new_fname)) 54 | if copy: 55 | shutil.copy2(fname, new_fname) 56 | else: 57 | os.rename(fname, new_fname) 58 | 59 | 60 | def main(): 61 | """Commandline interface""" 62 | import argparse 63 | parser = argparse.ArgumentParser(description='Disperse files to folders by their species field') 64 | parser.add_argument('-c', '--copy', action='store_true', help='Copy files rather than moving them') 65 | parser.add_argument('rootdir') 66 | args = parser.parse_args() 67 | disperse(args.rootdir, copy=args.copy) 68 | 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /bin/guano_dump.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Print the GUANO metadata found in a file or files. 4 | 5 | usage:: 6 | 7 | $> guano_dump.py [--strict] WAVFILE... 8 | """ 9 | 10 | from __future__ import print_function 11 | 12 | import sys 13 | import os 14 | import os.path 15 | 16 | from guano import GuanoFile 17 | 18 | 19 | def dump(fname, strict=False): 20 | print() 21 | print(fname) 22 | gfile = GuanoFile(fname, strict=strict) 23 | print(gfile.to_string()) 24 | 25 | 26 | if __name__ == '__main__': 27 | from glob import glob 28 | import logging 29 | logging.basicConfig(level=logging.DEBUG, format='%(asctime)s\t%(levelname)s\t%(message)s') 30 | 31 | if len(sys.argv) < 2: 32 | print('usage: %s [--strict] FILE...' % os.path.basename(sys.argv[0]), file=sys.stderr) 33 | sys.exit(2) 34 | 35 | if os.name == 'nt' and '*' in sys.argv[1]: 36 | fnames = glob(sys.argv[1]) 37 | else: 38 | fnames = sys.argv[1:] 39 | 40 | strict = False 41 | if '--strict' in fnames: 42 | fnames.remove('--strict') 43 | strict = True 44 | 45 | for fname in fnames: 46 | if os.path.isdir(fname): 47 | for subfname in glob(os.path.join(fname, '*.[Ww][Aa][Vv]')): 48 | dump(subfname, strict=strict) 49 | else: 50 | dump(fname, strict=strict) 51 | -------------------------------------------------------------------------------- /bin/guano_edit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | guano_edit.py - Manipulate GUANO metadata of individual files or in bulk. 4 | 5 | Specify GUANO fields and values, followed by a list of files which the 6 | changes should be applied to. The values of existing fields may be 7 | used by specifying the field as `${Fieldname}`. 8 | 9 | Be careful to properly escape values for your shell commandline, especially 10 | if using value templates! Study the examples below. 11 | 12 | Add the `--dry-run` argument and no changes will be saved, but you'll be 13 | able to review the proposed metadata changes on stdout. 14 | 15 | 16 | Examples:: 17 | 18 | # Add NABat grid cell to all recordings under the `foo` directory 19 | $> guano_edit.py "NABat|Grid Cell ID: 45678" ~/bat_calls/foo/ 20 | 21 | # Append additional text to the end of the existing Note text 22 | $> guano_edit.py 'Note: ${Note} Recorded by Dave.' EPFU_refcall.wav 23 | 24 | 25 | TODO:: 26 | * Ensure that we persist all RIFF chunks 27 | * Add support for adding GUANO metadata to "new" files 28 | * Support Anabat files 29 | """ 30 | 31 | from __future__ import print_function 32 | 33 | import sys, os, os.path 34 | from string import Template 35 | 36 | import guano 37 | 38 | 39 | MAKE_BACKUPS = True 40 | 41 | 42 | class GuanoTemplate(Template): 43 | """ 44 | String template with support for valid GUANO namespaced fields. 45 | """ 46 | idpattern = r'[_a-z][_a-z0-9| ]*' # added support for spaces and pipe char 47 | 48 | 49 | def locate_files(rootdir): 50 | """Find files with GUANO metadata""" 51 | if os.path.isdir(rootdir): 52 | for root, dirnames, filenames in os.walk(rootdir): 53 | for filename in filenames: 54 | if filename.endswith('.wav') or filename.endswith('.WAV'): 55 | try: 56 | yield guano.GuanoFile(os.path.join(root, filename)) 57 | except ValueError as e: 58 | pass # no guano metadata 59 | elif os.path.isfile(rootdir): 60 | filename = rootdir 61 | try: 62 | yield guano.GuanoFile(filename) 63 | except ValueError as e: 64 | pass 65 | else: 66 | raise RuntimeError(rootdir) 67 | 68 | 69 | def update(gfile, md, dry_run=False): 70 | """Update the GUANO metadata in a specified file""" 71 | print() 72 | print(gfile.filename) 73 | 74 | for key, value in md.items(): 75 | value = GuanoTemplate(value).substitute(gfile) 76 | value = gfile._coerce(key, value) 77 | gfile[key] = value 78 | 79 | print(gfile.to_string()) 80 | if not dry_run: 81 | gfile.write(make_backup=MAKE_BACKUPS) 82 | 83 | 84 | def main(): 85 | """Commandline processing script""" 86 | md = {} # new metadata values 87 | inputs = [] # files and folders we're operating on 88 | dry_run = False 89 | 90 | for arg in sys.argv[1:]: 91 | if arg == '--dry-run': 92 | dry_run = True 93 | elif ':' in arg: 94 | k, v = (x.strip() for x in arg.split(':', 1)) 95 | md[k] = v 96 | else: 97 | inputs.append(arg) 98 | 99 | print(md) 100 | 101 | for input in inputs: 102 | for gfile in locate_files(input): 103 | update(gfile, md, dry_run=dry_run) 104 | 105 | 106 | if __name__ == '__main__': 107 | main() 108 | -------------------------------------------------------------------------------- /bin/sb2guano.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Convert files with SonoBat-format metadata to use GUANO metadata instead. 4 | 5 | usage:: 6 | 7 | $> sb2guano.py WAVFILE... 8 | """ 9 | 10 | from __future__ import print_function 11 | 12 | import sys 13 | import os 14 | import os.path 15 | import mmap 16 | import re 17 | import wave 18 | from contextlib import closing 19 | from datetime import datetime 20 | from pprint import pprint 21 | 22 | from guano import GuanoFile 23 | 24 | 25 | # regex for parsing Sonobat metadata 26 | SB_MD_REGEX = re.compile(b'MMMMMMMMM(?P[\w\W]+)MMMMMMMMM') 27 | SB_FREQ_REGEX = re.compile(r'\(#([\d]+)#\)') 28 | SB_TE_REGEX = re.compile(r'<&([\d]*)&>') 29 | SB_DFREQ_REGEX = re.compile(r'\[!([\w]+)!\]') 30 | 31 | D500X_ATTRIBUTE_REGEX = re.compile(r'(?PD500X V.+S/N=\d+)') 32 | AR125_ATTRIBUTE_REGEX = re.compile(r'(?PDEV=.+CMT=<.+>)') 33 | 34 | # old SonoBat format e.g. TransectTestRun1-24Mar11-16,27,56-Myoluc.wav 35 | SONOBAT_FILENAME1_REGEX = re.compile(r'(?P[ 0123][0-9][A-Z][a-z][a-z][0-9][0-9]-[012][0-9],[0-6][0-9],[0-6][0-9])(-(?P[A-Za-z]+))?') 36 | SONOBAT_FILENAME1_TIMESTAMP_FMT = '%d%b%y-%H,%M,%S' 37 | 38 | # new SonoBat format 4-digit year e.g. TransectTestRun1-20110324_162756-Myoluc.wav 39 | SONOBAT_FILENAME2_REGEX = re.compile(r'(?P\d{8}_\d{6})(-(?P[A-Za-z]+))?') 40 | SONOBAT_FILENAME2_TIMESTAMP_FMT = '%Y%m%d_%H%M%S' 41 | 42 | # new new SonoBat format 2-digit year e.g. TransectTestRun1-20110324_162756-Myoluc.wav 43 | SONOBAT_FILENAME3_REGEX = re.compile(r'(?P\d{6}_\d{6})(-(?P[A-Za-z]+))?') 44 | SONOBAT_FILENAME3_TIMESTAMP_FMT = '%y%m%d_%H%M%S' 45 | 46 | # AR125 raw 47 | AR125_FILENAME_REGEX = re.compile(r'_(?PD\d{8}T\d{6})m\d{3}(-(?P[A-Za-z]+))?') 48 | AR125_FILENAME_TIMESTAMP_FMT = 'D%Y%m%dT%H%M%S' 49 | 50 | SB_FILENAME_FORMATS = [ 51 | (SONOBAT_FILENAME1_REGEX, SONOBAT_FILENAME1_TIMESTAMP_FMT), 52 | (SONOBAT_FILENAME2_REGEX, SONOBAT_FILENAME2_TIMESTAMP_FMT), 53 | (SONOBAT_FILENAME3_REGEX, SONOBAT_FILENAME3_TIMESTAMP_FMT), 54 | (AR125_FILENAME_REGEX, AR125_FILENAME_TIMESTAMP_FMT) 55 | ] 56 | 57 | 58 | def _decode_text(text): 59 | """ 60 | SonoBat uses the system locale for encoding text, so we have to guess what it might have been. 61 | Try Mac_Roman first if we're running on OS X, otherwise default to Windows 1252. Yuck. 62 | """ 63 | encodings = ['windows-1252', 'latin-1'] 64 | encodings.insert(0 if sys.platform == 'darwin' else 1, 'mac_roman') 65 | for encoding in encodings: 66 | try: 67 | return text.decode(encoding) 68 | except: 69 | pass 70 | raise ValueError('Unable to decode native SonoBat text!') 71 | 72 | 73 | def _parse_sonobat_metadata(md): 74 | """Parse Sonobat-format metadata string as a dict""" 75 | sb_md = dict() 76 | sb_md['samplerate'] = int(re.search(SB_FREQ_REGEX, md).groups()[0]) 77 | sb_md['te'] = int(re.search(SB_TE_REGEX, md).groups()[0]) 78 | sb_md['dfreq'] = re.search(SB_DFREQ_REGEX, md).groups()[0] 79 | note = md.split('!]', 1)[1] 80 | 81 | # If this file was created with Sonobat D500X Attributer, parse out D500X metadata 82 | match = re.search(D500X_ATTRIBUTE_REGEX, note) 83 | if match and match.group('d500x').count(',') == 8: 84 | fw, f, pre, len, hp, a, ts, timestamp, sn = match.group('d500x').split(',') 85 | f, pre, len, hp, a, ts, sn = tuple(s.split('=',1)[1].strip() for s in (f, pre, len, hp, a, ts, sn)) 86 | sb_md['d500x'] = dict(Firmware=fw, F=f, PRE=pre, LEN=len, HP=hp, A=a, TS=ts, Timestamp=timestamp, Serial=sn) 87 | 88 | # Binary Acoustic AR125 stuffs metadata into Sonobat note 89 | match = re.search(AR125_ATTRIBUTE_REGEX, note) 90 | if match: 91 | dev, dc, utc, ltb, cmt = match.group('ar125').split(',', 4) 92 | dev, dc, utc, ltb, cmt = tuple(s.split('=',1)[1].strip() for s in (dev, dc, utc, ltb, cmt)) 93 | cmt = cmt.strip('<>') 94 | sb_md['ar125'] = dict(DEV=dev, DC=dc, UTC=utc, LTB=ltb, CMT=cmt) 95 | 96 | sb_md['note'] = note 97 | 98 | return sb_md 99 | 100 | 101 | def extract_sonobat_metadata(fname): 102 | """Extract Sonobat-format metadata as a dict""" 103 | 104 | # parse the Sonobat metadata itself from file 105 | with open(fname, 'rb') as infile: 106 | with closing(mmap.mmap(infile.fileno(), 0, access=mmap.ACCESS_READ)) as mmfile: 107 | md_match = re.search(SB_MD_REGEX, mmfile) 108 | if not md_match: 109 | print('No Sonobat metadata found in file: ' + fname, file=sys.stderr) 110 | return None 111 | md = md_match.groups()[0] 112 | md = _decode_text(md) 113 | sb_md = _parse_sonobat_metadata(md) 114 | 115 | with closing(wave.open(fname)) as wavfile: 116 | duration_s = wavfile.getnframes() / float(wavfile.getframerate()) 117 | sb_md['length'] = round(duration_s / sb_md['te'], 2) 118 | 119 | # try to extract info from the filename 120 | for regex, timestamp_fmt in SB_FILENAME_FORMATS: 121 | match = regex.search(fname) 122 | if match: 123 | sb_md['timestamp'] = datetime.strptime(match.group('date'), timestamp_fmt) 124 | sb_md['species'] = match.group('species') 125 | 126 | return sb_md 127 | 128 | 129 | def sonobat2guano(fname): 130 | """Convert a file with Sonobat metadata to GUANO metadata""" 131 | print('\n', fname) 132 | sb_md = extract_sonobat_metadata(fname) 133 | if not sb_md: 134 | print('Skipping non-Sonobat file: ' + fname, file=sys.stderr) 135 | return False 136 | pprint(sb_md) 137 | 138 | gfile = GuanoFile(fname) 139 | gfile['GUANO|Version'] = 1.0 140 | if 'timestamp' in sb_md: 141 | gfile['Timestamp'] = sb_md['timestamp'] 142 | if sb_md.get('te', 1) != 1: 143 | gfile['TE'] = sb_md['te'] 144 | gfile['Length'] = sb_md['length'] 145 | gfile['Note'] = sb_md['note'].strip().replace('\r\n', '\\n').replace('\n', '\\n') 146 | if sb_md.get('species', None): 147 | gfile['Species Auto ID'] = sb_md['species'] 148 | 149 | if 'd500x' in sb_md: 150 | for k, v in sb_md['d500x'].items(): 151 | gfile['PET', k] = v 152 | 153 | if 'ar125' in sb_md: 154 | for k, v in sb_md['ar125'].items(): 155 | gfile['BAT', k] = v 156 | 157 | print(gfile.to_string()) 158 | 159 | gfile.write() 160 | 161 | 162 | if __name__ == '__main__': 163 | from glob import glob 164 | 165 | if len(sys.argv) < 2: 166 | print('usage: %s FILE...' % os.path.basename(sys.argv[0]), file=sys.stderr) 167 | sys.exit(2) 168 | 169 | if os.name == 'nt' and '*' in sys.argv[1]: 170 | fnames = glob(sys.argv[1]) 171 | else: 172 | fnames = sys.argv[1:] 173 | 174 | for fname in fnames: 175 | sonobat2guano(fname) 176 | -------------------------------------------------------------------------------- /bin/wamd2guano.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Convert Wildlife Acoustics WAMD metadata files to use GUANO metadata instead. 4 | 5 | usage:: 6 | 7 | $> wamd2guano.py WAVFILE... 8 | """ 9 | 10 | from __future__ import print_function 11 | 12 | import os 13 | import os.path 14 | import sys 15 | import struct 16 | from datetime import datetime 17 | from pprint import pprint 18 | 19 | from guano import GuanoFile, tzoffset 20 | 21 | 22 | # binary WAMD field identifiers 23 | WAMD_IDS = { 24 | 0x00: 'version', 25 | 0x01: 'model', 26 | 0x02: 'serial', 27 | 0x03: 'firmware', 28 | 0x04: 'prefix', 29 | 0x05: 'timestamp', 30 | 0x06: 'gpsfirst', 31 | 0x07: 'gpstrack', 32 | 0x08: 'software', 33 | 0x09: 'license', 34 | 0x0A: 'notes', 35 | 0x0B: 'auto_id', 36 | 0x0C: 'manual_id', 37 | 0x0D: 'voicenotes', 38 | 0x0E: 'auto_id_stats', 39 | 0x0F: 'time_expansion', 40 | 0x10: 'program', 41 | 0x11: 'runstate', 42 | 0x12: 'microphone', 43 | 0x13: 'sensitivity', 44 | } 45 | 46 | # fields that we exclude from our in-memory representation 47 | WAMD_DROP_IDS = ( 48 | 0x0D, # voice note embedded .WAV 49 | 0x10, # program binary 50 | 0x11, # runstate giant binary blob 51 | 0xFFFF, # used for 16-bit alignment 52 | ) 53 | 54 | # rules to coerce values from binary string to native types (default is `str`) 55 | WAMD_COERCE = { 56 | 'version': lambda x: struct.unpack(' 4 else None 111 | else: 112 | # EMTouch format 113 | lat, lon = float(vals[0]), float(vals[1]) 114 | alt = int(round(float(vals[2]))) if len(vals) > 2 else None 115 | return lat, lon, alt 116 | 117 | 118 | class RiffChunk: 119 | """A replacement for chunk.Chunk to handle RIFF chunks.""" 120 | 121 | def __init__(self, file_or_chunk, bigendian=False): 122 | self.bigendian = bigendian 123 | self.format = '>I' if bigendian else '>> 1.0 14 | 15 | print g['Make'], g['Model'] 16 | >>> 'Pettersson', 'D500X' 17 | 18 | g['Species Manual ID'] = 'Myso' 19 | 20 | g['Note'] = 'I love GUANO!' 21 | 22 | # namespaced fields can be specified separately or pipe-delimited 23 | print g['PET', 'Gain'], g['PET|Gain'] 24 | >>> 80, 80 25 | 26 | g['SB|Consensus'] = 'Epfu' 27 | g['SB', 'Consensus'] = 'Epfu' 28 | 29 | # print all the metadata values 30 | for key, value in g.items(): 31 | print '%s: %s' % (key, value) 32 | 33 | # write the updated .WAV file back to disk 34 | g.write() 35 | 36 | # have some GUANO metadata from some other source? load it from a string 37 | g = GuanoFile.from_string('GUANO|Version:1.0\nTags:voucher,hand-release') 38 | 39 | # write GUANO metadata somewhere else, say an Anabat file or text file 40 | with open('sidecar_file.guano', 'wb') as outfile: 41 | outfile.write( g.serialize() ) 42 | 43 | # teach the parser to recognize custom metadata fields 44 | GuanoFile.register('Anabat', ['Humidity', 'Temperature'], float) 45 | GuanoFile.register('SB', 'Thumbnail Image', guano.base64decode) 46 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | guano-py documentation 3 | ====================== 4 | 5 | This is the `Python`_ reference implementation for `GUANO`_, the “Grand 6 | Unified Acoustic Notation Ontology”, a universal metadata format for bat acoustic 7 | recordings. It includes a production-ready Python module with full 8 | support for reading and writing GUANO metadata, as well as several 9 | helpful commandline utilities. This is :doc:`Free Software ` which 10 | may be used for *any* purpose. 11 | 12 | For more information about GUANO metadata itself, including the format 13 | specification, see the GUANO project homepage: http://guano-md.org 14 | 15 | Documentation for guano-py can be found at: http://guano-py.readthedocs.io 16 | 17 | 18 | Table of Contents 19 | ================= 20 | 21 | .. toctree:: 22 | :maxdepth: 2 23 | 24 | install 25 | api 26 | examples 27 | utils 28 | license 29 | changelog 30 | 31 | 32 | Indices and tables 33 | ================== 34 | 35 | * :ref:`genindex` 36 | * :ref:`modindex` 37 | * :ref:`search` 38 | 39 | 40 | .. _Python: http://python.org 41 | .. _GUANO: http://guano-md.org 42 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Requirements 5 | ------------ 6 | 7 | - Python 2.7 or Python 3.3+ 8 | 9 | 10 | Installation 11 | ------------ 12 | 13 | Download and install magically from the Python Package Index:: 14 | 15 | $> pip install -U guano 16 | 17 | In addition to having the :doc:`guano Python module ` available for use in 18 | your own software, you’ll also have a small collection of :doc:`useful scripts ` to use. 19 | 20 | Alternately, you can clone the `guano-py GitHub project`_ and install 21 | locally in developer mode to hack on it yourself:: 22 | 23 | $> git clone https://github.com/riggsd/guano-py.git 24 | $> cd guano-py 25 | $> python setup.py develop 26 | 27 | |Build Status| |Documentation Status| 28 | 29 | 30 | .. _guano-py GitHub project: https://github.com/riggsd/guano-py 31 | .. |Build Status| image:: https://travis-ci.org/riggsd/guano-py.svg?branch=master 32 | :target: https://travis-ci.org/riggsd/guano-py 33 | .. |Documentation Status| image:: https://readthedocs.org/projects/guano-py/badge/?version=latest 34 | :target: http://guano-py.readthedocs.io/en/latest/?badge=latest 35 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | .. include:: 5 | 6 | Copyright |copy| 2015-2017 `Myotisoft LLC`_ 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | 26 | 27 | .. _Myotisoft LLC: http://myotisoft.com 28 | -------------------------------------------------------------------------------- /docs/utils.rst: -------------------------------------------------------------------------------- 1 | 2 | Utilities 3 | ========= 4 | 5 | The guano-py Python module includes several helpful commandline utilities for working with 6 | GUANO metadata. When you install guano-py with `pip install guano`, the following scripts 7 | will then be callable from the commandline. 8 | 9 | 10 | guano_dump.py 11 | ------------- 12 | 13 | .. automodule:: guano_dump 14 | 15 | 16 | guano_edit.py 17 | ------------- 18 | 19 | .. automodule:: guano_edit 20 | 21 | 22 | d500x2guano.py 23 | -------------- 24 | 25 | .. automodule:: d500x2guano 26 | 27 | 28 | sb2guano.py 29 | ----------- 30 | 31 | .. automodule:: sb2guano 32 | 33 | 34 | wamd2guano.py 35 | ------------- 36 | 37 | .. automodule:: wamd2guano 38 | 39 | 40 | batlogger2guano.py 41 | ------------------ 42 | 43 | .. automodule:: batlogger2guano 44 | 45 | 46 | disperse.py 47 | ----------- 48 | 49 | .. automodule:: disperse 50 | -------------------------------------------------------------------------------- /guano.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the Python reference implementation for reading and writing GUANO metadata. 3 | 4 | GUANO is the "Grand Unified Acoustic Notation Ontology", an extensible metadata format 5 | for representing bat acoustics data. 6 | 7 | Import this Python module as:: 8 | 9 | import guano 10 | 11 | This module utilizes the Python :mod:`logging` framework for issuing warnings and debug messages. 12 | Application code may wish to enable logging with the :func:`logging.basicConfig` function. 13 | 14 | """ 15 | 16 | import os 17 | import wave 18 | import struct 19 | import os.path 20 | import shutil 21 | from datetime import datetime, tzinfo, timedelta 22 | from contextlib import closing 23 | from tempfile import NamedTemporaryFile 24 | from collections import OrderedDict, namedtuple 25 | from base64 import standard_b64encode as base64encode 26 | from base64 import standard_b64decode as base64decode 27 | from typing import Any, BinaryIO, Callable, Iterable, Tuple, Union 28 | 29 | import logging 30 | log = logging.Logger(__name__) 31 | 32 | 33 | __version__ = '1.0.16' 34 | 35 | __all__ = 'GuanoFile', 36 | 37 | 38 | WHITESPACE = ' \t\n\x0b\x0c\r\0' 39 | 40 | wavparams = namedtuple('wavparams', 'nchannels, sampwidth, framerate, nframes, comptype, compname') 41 | 42 | 43 | _ZERO = timedelta(0) 44 | 45 | class tzutc(tzinfo): 46 | """UTC timezone""" 47 | 48 | def utcoffset(self, dt): 49 | return _ZERO 50 | 51 | def tzname(self, dt): 52 | return 'UTC' 53 | 54 | def dst(self, dt): 55 | return _ZERO 56 | 57 | def __repr__(self): 58 | return 'UTC' 59 | 60 | utc = tzutc() 61 | 62 | class tzoffset(tzinfo): 63 | """ 64 | Fixed-offset concrete timezone implementation. 65 | `offset` should be numeric hours or ISO format string like '-07:00'. 66 | """ 67 | 68 | def __init__(self, offset): 69 | if isinstance(offset, str): 70 | # offset as ISO string '-07:00', '-0700', or '-07' format 71 | if len(offset) < 4: 72 | vals = offset, '00' # eg '-07' 73 | elif ':' in offset: 74 | vals = offset.split(':') # '-07:00' 75 | else: 76 | vals = offset[:-2], offset[-2:] # '-0700' 77 | if vals[0].startswith('-'): 78 | offset = int(vals[0]) - int(vals[1])/60.0 79 | else: 80 | offset = int(vals[0]) + int(vals[1])/60.0 81 | self._offset_hours = offset 82 | self._offset = timedelta(hours=offset) 83 | 84 | def utcoffset(self, dt): 85 | return self._offset 86 | 87 | def dst(self, dt): 88 | return _ZERO 89 | 90 | def tzname(self, dt): 91 | return 'UTC'+str(self._offset_hours) 92 | 93 | def __repr__(self): 94 | return self.tzname(None) 95 | 96 | 97 | def parse_timestamp(s) -> datetime: 98 | """ 99 | Parse a string in supported subset of ISO 8601 / RFC 3331 format to :class:`datetime.datetime`. 100 | The timestamp will be timezone-aware of a TZ is specified, or timezone-naive if in "local" fmt. 101 | 102 | :rtype: datetime or None 103 | """ 104 | # Python's standard library does an awful job of parsing ISO timestamps, so we do it manually 105 | 106 | if s is None or not s.strip(): 107 | return None 108 | 109 | timestamp, tz = None, None 110 | 111 | s = s.replace(' ', 'T', 1) # support using space rather than 'T' as date/time delimiter 112 | 113 | if s[-1] == 'Z': # UTC "zulu" time 114 | tz = utc 115 | s = s[:-1] 116 | elif '+' in s or s.count('-') == 3: # UTC offset provided 117 | i = s.index('+') if '+' in s else s.rfind('-') 118 | s, offset = s[:i], s[i:] 119 | tz = tzoffset(offset) 120 | 121 | if len(s) > 22: # milliseconds included 122 | timestamp = datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%f') 123 | else: 124 | timestamp = datetime.strptime(s, '%Y-%m-%dT%H:%M:%S') 125 | 126 | return timestamp.replace(tzinfo=tz) if tz else timestamp 127 | 128 | 129 | _chunkid = struct.Struct('> 4s') 130 | _chunksz = struct.Struct('< L') 131 | 132 | 133 | class GuanoFile(object): 134 | """ 135 | An abstraction of a .WAV file with GUANO metadata. 136 | 137 | A `GuanoFile` object behaves like a normal Python :class:`dict`, where keys can either be 138 | well-known metadata keys, namespaced keys, or a tuple of (namespace, key). 139 | 140 | Well-known keys will have their values coerced into the correct data type. The parser may be 141 | configured to coerce new namespaced keys with the :func:`register()` function. 142 | 143 | Example usage:: 144 | 145 | gfile = GuanoFile('myfile.wav') 146 | print gfile['GUANO|Version'] 147 | >>> '1.0' 148 | gfile['Species Manual ID'] = 'Mylu' 149 | gfile['Note'] = 'I love GUANO!' 150 | gfile.write() 151 | 152 | Though reading, writing, and editing .WAV files is the target usage, this class may also be 153 | used independent from the .WAV file format. GUANO metadata can be written into an 154 | Anabat-format file or to a sidecar file, for example, by populating a `GuanoFile` object and 155 | then using the :func:`serialize()` method to produce correctly formatted UTF-8 encoded metadata. 156 | 157 | :ivar str filename: path to the file which this object represents, or `None` if a "new" file 158 | :ivar bool strict_mode: whether the GUANO parser is configured for strict or lenient parsing 159 | :ivar bytes wav_data: the `data` subchunk of a .WAV file consisting of its actual audio data, 160 | lazily-loaded and cached for performance 161 | :ivar wavparams wav_params: namedtuple of .WAV parameters (nchannels, sampwidth, framerate, nframes, comptype, compname) 162 | """ 163 | 164 | _coersion_rules = { 165 | 'Filter HP': float, 166 | 'Length': float, 167 | 'Loc Accuracy': float, 168 | 'Loc Elevation': float, 169 | 'Loc Position': lambda value: tuple(float(v) for v in value.split()), 170 | 'Note': lambda value: value.replace('\\n', '\n'), 171 | 'Samplerate': int, 172 | 'TE': lambda value: int(value) if value else 1, 173 | 'Timestamp': parse_timestamp, 174 | } 175 | _serialization_rules = { 176 | 'Length': lambda value: '%.2f' % value, 177 | 'Loc Position': lambda value: '%f %f' % value, 178 | 'Note': lambda value: value.replace('\n', '\\n'), 179 | 'Timestamp': lambda value: value.isoformat() if value else '', 180 | } 181 | 182 | def __init__(self, file: Union[str, BinaryIO] = None, strict=False): 183 | """ 184 | Create a GuanoFile instance which represents a single file's GUANO metadata. 185 | If the file already contains GUANO metadata, it will be parsed immediately. If not, then 186 | this object will be initialized as "new" metadata. 187 | 188 | :param file: an existing .WAV file with GUANO metadata; if the path does not 189 | exist or is `None` then this instance represents a "new" file 190 | :type file: str or file-like object (implements methods seek, read, tell) or None 191 | :param bool strict: whether the parser should be strict and raise exceptions when 192 | encountering bad metadata values, or whether it should be as lenient 193 | as possible (default: False, lenient); if in lenient mode, bad values 194 | will remain in their UTF-8 string form as found persisted in the file 195 | :raises ValueError: if the specified file doesn't represent a valid .WAV or if its 196 | existing GUANO metadata is broken 197 | """ 198 | if isinstance(file, str): 199 | self.filename = file 200 | self._file = None 201 | else: 202 | self.filename = file.name if hasattr(file, 'name') else None 203 | self._file: BinaryIO = file # a file-like object 204 | 205 | self.strict_mode = strict 206 | 207 | self.wav_params = None 208 | self._md = OrderedDict() # metadata storage - map of maps: namespace->key->val 209 | 210 | self._wav_data = None # lazily-loaded and cached 211 | self._wav_data_offset = 0 212 | self._wav_data_size = 0 213 | 214 | if self._file or (self.filename and os.path.isfile(self.filename)): 215 | self._load() 216 | 217 | def _coerce(self, key: str, value: str) -> Any: 218 | """Coerce a value from its Unicode representation to a specific data type""" 219 | if key in self._coersion_rules: 220 | try: 221 | return self._coersion_rules[key](value) 222 | except (ValueError, TypeError) as e: 223 | if self.strict_mode: 224 | raise 225 | else: 226 | log.warning('Failed coercing "%s": %s', key, e) 227 | return value # default should already be a Unicode string 228 | 229 | def _serialize(self, key: str, value: Any) -> str: 230 | """Serialize a value from its real representation to GUANO Unicode representation""" 231 | serialize = self._serialization_rules.get(key, str) 232 | try: 233 | return serialize(value) 234 | except (ValueError, TypeError) as e: 235 | if self.strict_mode: 236 | raise 237 | else: 238 | log.warning('Failed serializing "%s": %s', key, e) 239 | 240 | def _load(self): 241 | """Load the contents of our underlying .WAV file""" 242 | opener = open(self.filename, 'rb') if self._file is None else nullcontext(self._file) 243 | with opener as f: 244 | # check filesize: seek to end of file and tell its byte offset 245 | f.seek(0, 2) 246 | fsize = f.tell() 247 | if fsize < 8: 248 | raise ValueError('File too small to contain valid RIFF "WAVE" header (size %d bytes)' % fsize) 249 | 250 | f.seek(0x08) 251 | chunk = _chunkid.unpack(f.read(4))[0] 252 | if chunk != b'WAVE': 253 | raise ValueError('Expected RIFF chunk "WAVE" at 0x08, but found "%s"' % repr(chunk)) 254 | 255 | try: 256 | f.seek(0) 257 | self.wav_params = wavparams(*wave.open(f).getparams()) 258 | except RuntimeError as e: 259 | return ValueError(e) # Python's chunk.py throws this inappropriate exception 260 | 261 | # iterate through the file until we find our 'guan' subchunk 262 | metadata_buf = None 263 | f.seek(0x0c) 264 | while f.tell() < fsize - 1: 265 | try: 266 | chunkid = _chunkid.unpack(f.read(4))[0] 267 | size = _chunksz.unpack(f.read(4))[0] 268 | except struct.error as e: 269 | raise ValueError(e) 270 | 271 | if chunkid == b'guan': 272 | metadata_buf = f.read(size) 273 | elif chunkid == b'data': 274 | self._wav_data_offset = f.tell() 275 | self._wav_data_size = size 276 | f.seek(size, 1) # skip over wav data 277 | else: 278 | f.seek(size, 1) # skip over wav data 279 | 280 | if size % 2: 281 | f.read(1) # align to 16-bit boundary 282 | 283 | if not self._wav_data_offset: 284 | raise ValueError('No DATA sub-chunk found in .WAV file') 285 | 286 | if metadata_buf: 287 | self._parse(metadata_buf) 288 | 289 | def _parse(self, metadata_str): 290 | """Parse metadata and populate our internal mappings""" 291 | if not isinstance(metadata_str, str): 292 | try: 293 | metadata_str = metadata_str.decode('utf-8') 294 | except UnicodeDecodeError as e: 295 | log.warning('GUANO metadata is not UTF-8 encoded! Attempting to coerce. %s', repr(self)) 296 | metadata_str = metadata_str.decode('latin-1') 297 | 298 | for line in metadata_str.split('\n'): 299 | line = line.strip(WHITESPACE) 300 | if not line: 301 | continue 302 | full_key, val = line.split(':', 1) 303 | namespace, key = full_key.split('|', 1) if '|' in full_key else ('', full_key) 304 | namespace, key, full_key, val = namespace.strip(), key.strip(), full_key.strip(), val.strip() 305 | if not key or not val: 306 | continue 307 | if namespace not in self._md: 308 | self._md[namespace] = OrderedDict() 309 | self._md[namespace][key] = self._coerce(full_key, val) 310 | return self 311 | 312 | @classmethod 313 | def from_string(cls, metadata_str, *args, **kwargs) -> 'GuanoFile': 314 | """ 315 | Create a :class:`GuanoFile` instance from a GUANO metadata string 316 | 317 | :param metadata_str: a string (or string-like buffer) of GUANO metadata 318 | :param bool strict: whether the parser should be strict and raise exceptions when 319 | encountering bad metadata values, or whether it should be as lenient 320 | as possible (default: True); if in lenient mode, bad values will 321 | remain in their UTF-8 string form as found persisted in the file 322 | :rtype: GuanoFile 323 | """ 324 | return GuanoFile(*args, **kwargs)._parse(metadata_str) 325 | 326 | @classmethod 327 | def register(cls, namespace: str, keys: Union[str, Iterable[str]], coerce_function: Callable, serialize_function: Callable = str): 328 | """ 329 | Configure the GUANO parser to recognize new namespaced keys. 330 | 331 | :param namespace: vendor namespace which the keys belong to 332 | :param keys: a key or sequence of keys under the specified vendor namespace 333 | :param coerce_function: a function for coercing the UTF-8 value to any desired data type 334 | :type coerce_function: callable 335 | :param serialize_function: an optional function for serializing the value to UTF-8 string 336 | :type serialize_function: callable 337 | """ 338 | if isinstance(keys, str): 339 | keys = [keys] 340 | for k in keys: 341 | full_key = namespace+'|'+k if namespace else k 342 | cls._coersion_rules[full_key] = coerce_function 343 | cls._serialization_rules[full_key] = serialize_function 344 | 345 | def _split_key(self, item) -> Tuple[str, str]: 346 | if isinstance(item, tuple): 347 | namespace, key = item[0], item[1] 348 | elif '|' in item: 349 | namespace, key = item.split('|', 1) 350 | else: 351 | namespace, key = '', item 352 | return namespace, key 353 | 354 | def __getitem__(self, item) -> Any: 355 | namespace, key = self._split_key(item) 356 | return self._md[namespace][key] 357 | 358 | def get(self, item, default=None) -> Any: 359 | try: 360 | return self[item] 361 | except KeyError: 362 | return default 363 | 364 | def __setitem__(self, key, value): 365 | if not self._md: 366 | self._md['GUANO'] = {} 367 | self._md['GUANO']['Version'] = '1.0' 368 | 369 | namespace, key = self._split_key(key) 370 | if namespace not in self._md: 371 | self._md[namespace] = {} 372 | self._md[namespace][key] = value 373 | 374 | def __contains__(self, item) -> bool: 375 | namespace, key = self._split_key(item) 376 | return namespace in self._md and key in self._md[namespace] 377 | 378 | def __delitem__(self, key): 379 | namespace, key = self._split_key(key) 380 | del self._md[namespace][key] 381 | if not self._md[namespace]: 382 | del self._md[namespace] 383 | 384 | def __bool__(self) -> bool: 385 | return bool(self._md) 386 | 387 | def __repr__(self) -> str: 388 | return '%s(%s)' % (self.__class__.__name__, self.filename or self._file) 389 | 390 | def get_namespaces(self) -> list: 391 | """ 392 | Get list of all namespaces represented by this metadata. 393 | This includes the 'GUANO' namespace, and the '' (empty string) namespace for well-known fields. 394 | """ 395 | return list(self._md.keys()) 396 | 397 | def items(self, namespace: str = None) -> Iterable[Tuple[str, Any]]: 398 | """Iterate over (key, value) for entire metadata or for specified namespace of fields""" 399 | if namespace is not None: 400 | for k, v in self._md[namespace].items(): 401 | yield k, v 402 | else: 403 | for namespace, data in self._md.items(): 404 | for k, v in data.items(): 405 | k = '%s|%s' % (namespace, k) if namespace else k 406 | yield k, v 407 | 408 | def items_namespaced(self) -> Iterable[Tuple[str, str, Any]]: 409 | """Iterate over (namespace, key, value) for entire metadata""" 410 | for namespace, data in self._md.items(): 411 | for k, v in data.items(): 412 | yield namespace, k, v 413 | 414 | def well_known_items(self) -> Iterable[Tuple[str, Any]]: 415 | """Iterate over (key, value) for all the well-known (defined) fields""" 416 | return self.items('') 417 | 418 | def to_string(self) -> str: 419 | """Represent the GUANO metadata as a Unicode string""" 420 | lines = [] 421 | for namespace, data in self._md.items(): 422 | for k, v in data.items(): 423 | k = u'%s|%s' % (namespace, k) if namespace else k 424 | v = self._serialize(k, v) 425 | lines.append(u'%s: %s' % (k, v)) 426 | return u'\n'.join(lines) 427 | 428 | def serialize(self, pad='\n') -> bytes: 429 | """Serialize the GUANO metadata as UTF-8 encoded bytes""" 430 | md_bytes = bytearray(self.to_string(), 'utf-8') 431 | if pad is not None and len(md_bytes) % 2: 432 | # pad for alignment on even word boundary 433 | md_bytes.append(ord(pad)) 434 | return md_bytes 435 | 436 | @property 437 | def wav_data(self) -> bytes: 438 | """Actual audio data from the wav `data` chunk. Lazily loaded and cached.""" 439 | if not self._wav_data_size: 440 | raise ValueError() 441 | if not self._wav_data: 442 | opener = open(self.filename, 'rb') if self._file is None else nullcontext(self._file) 443 | with opener as f: 444 | f.seek(self._wav_data_offset) 445 | self._wav_data = f.read(self._wav_data_size) 446 | 447 | return self._wav_data 448 | 449 | @wav_data.setter 450 | def wav_data(self, data: bytes): 451 | self._wav_data_size = len(data) 452 | self._wav_data = data 453 | 454 | def write(self, make_backup=True): 455 | """ 456 | Write the GUANO .WAV file to disk. 457 | 458 | :param bool make_backup: create a backup file copy before writing changes or not (default: True); 459 | backups will be saved to a folder named `GUANO_BACKUP` 460 | :raises ValueError: if this `GuanoFile` doesn't represent a valid .WAV by having 461 | appropriate values for `self.wav_params` (see :meth:`wave.Wave_write.setparams()`) 462 | and `self.wav_data` (see :meth:`wave.Wave_write.writeframes()`) 463 | """ 464 | # FIXME: optionally write other unknown subchunks for redundant metadata formats 465 | 466 | if not self.filename: 467 | raise ValueError('Cannot write .WAV file without a self.filename!') 468 | if not self.wav_params: 469 | raise ValueError('Cannot write .WAV file without appropriate self.wav_params (see `wavfile.setparams()`)') 470 | if not self.wav_data: 471 | raise ValueError('Cannot write .WAV file without appropriate self.wav_data (see `wavfile.writeframes()`)') 472 | 473 | # prepare our metadata for a byte-wise representation 474 | md_bytes = self.serialize() 475 | 476 | # create tempfile and write our vanilla .WAV ('data' sub-chunk only) 477 | tempfile = NamedTemporaryFile(mode='w+b', prefix='guano_temp-', suffix='.wav', delete=False) 478 | if os.path.isfile(self.filename): 479 | shutil.copystat(self.filename, tempfile.name) 480 | 481 | with closing(wave.Wave_write(tempfile)) as wavfile: 482 | wavfile.setparams(self.wav_params) 483 | wavfile.writeframes(self.wav_data) 484 | 485 | # add the 'guan' sub-chunk after the 'data' sub-chunk 486 | tempfile.write(_chunkid.pack(b'guan')) 487 | tempfile.write(_chunksz.pack(len(md_bytes))) 488 | tempfile.write(md_bytes) 489 | 490 | # fix the RIFF file length 491 | total_size = tempfile.tell() 492 | tempfile.seek(0x04) 493 | tempfile.write(_chunksz.pack(total_size - 8)) 494 | tempfile.close() 495 | 496 | # verify it by re-parsing the new version 497 | GuanoFile(tempfile.name) 498 | 499 | # finally overwrite the original with our new version (and optionally back up first) 500 | if make_backup and os.path.exists(self.filename): 501 | backup_dir = os.path.join(os.path.dirname(self.filename), 'GUANO_BACKUP') 502 | backup_file = os.path.join(backup_dir, os.path.basename(self.filename)) 503 | if not os.path.isdir(backup_dir): 504 | log.debug('Creating backup dir: %s', backup_dir) 505 | os.mkdir(backup_dir) 506 | if os.path.exists(backup_file): 507 | os.remove(backup_file) 508 | shutil.move(self.filename, backup_file) 509 | shutil.move(tempfile.name, self.filename) 510 | 511 | 512 | class nullcontext(): 513 | """Fake ContextManager for Python < 3.7 compatibility""" 514 | 515 | def __init__(self, enter_result=None): 516 | self.enter_result = enter_result 517 | 518 | def __enter__(self): 519 | return self.enter_result 520 | 521 | def __exit__(self, *excinfo): 522 | pass 523 | 524 | 525 | # prevents a warning if application-level code doesn't configure logging 526 | log.addHandler(logging.NullHandler()) 527 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Requirements for developing `guano-py` itself, not necessary for *using* guano-py! 3 | # 4 | # Install with: 5 | # $> pip install -r requirements_dev.txt 6 | # 7 | 8 | wheel 9 | twine 10 | sphinx 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | from setuptools import setup 3 | from glob import glob 4 | 5 | from guano import __version__ 6 | 7 | 8 | setup( 9 | name='guano', 10 | version=__version__, 11 | description='GUANO, the "Grand Unified" bat acoustics metadata format', 12 | long_description=io.open('README.rst', encoding='utf-8').read(), 13 | url='https://github.com/riggsd/guano-py', 14 | license='MIT', 15 | author='David A. Riggs', 16 | author_email='driggs@myotisoft.com', 17 | classifiers=[ 18 | 'Development Status :: 5 - Production/Stable', 19 | 'Intended Audience :: Developers', 20 | 'Intended Audience :: Science/Research', 21 | 'License :: OSI Approved :: MIT License', 22 | 'Programming Language :: Python :: 2.7', 23 | 'Programming Language :: Python :: 3', 24 | ], 25 | keywords='bats acoustics metadata guano', 26 | py_modules=['guano'], 27 | scripts=glob('bin/*.py'), 28 | ) 29 | -------------------------------------------------------------------------------- /tests/test_guano.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import unittest 5 | 6 | from guano import GuanoFile, wavparams, parse_timestamp, tzoffset 7 | 8 | 9 | class UnicodeTest(unittest.TestCase): 10 | 11 | NOTE = u'¡GUANO is the 💩 !' 12 | MD = u"""GUANO|Version: 1.0\nNote: %s""" % NOTE 13 | 14 | def setUp(self): 15 | pass 16 | 17 | def test_from_string(self): 18 | """Parse a GUANO metadata block containing Unicode data""" 19 | g = GuanoFile.from_string(self.MD) 20 | self.assertEqual(self.NOTE, g['Note']) 21 | 22 | def test_file_roundtrip(self): 23 | """Write a GUANO .WAV file containing Unicode data, re-read it and confirm value is identical""" 24 | fname = 'test_guano.wav' 25 | 26 | # write a fake .WAV file 27 | g = GuanoFile.from_string(self.MD) 28 | g.filename = fname 29 | g.wav_params = wavparams(1, 2, 500000, 2, 'NONE', None) 30 | g._wav_data = b'\01\02' # faking it, don't try this at home! 31 | g._wav_data_size = 2 32 | g.write() 33 | 34 | # read it back in 35 | g2 = GuanoFile(fname) 36 | 37 | self.assertEqual(self.NOTE, g2['Note']) 38 | 39 | def test_filelike_roundtrip(self): 40 | """Same as test_file_roundtrip, but reading a file-like object, not filename.""" 41 | fname = 'test_guano.wav' 42 | 43 | # write a fake .WAV file 44 | g = GuanoFile.from_string(self.MD) 45 | g.filename = fname 46 | g.wav_params = wavparams(1, 2, 500000, 2, 'NONE', None) 47 | g._wav_data = b'\01\02' # faking it, don't try this at home! 48 | g._wav_data_size = 2 49 | g.write() 50 | 51 | # read it back in 52 | with open(fname, 'rb') as f: 53 | g2 = GuanoFile(f) 54 | 55 | self.assertEqual(self.NOTE, g2['Note']) 56 | 57 | 58 | class GeneralTest(unittest.TestCase): 59 | 60 | MD = r'''GUANO|Version: 1.0 61 | Timestamp: 2017-04-20T01:23:45-07:00 62 | Note: This is a \nmultiline text note\nfor testing. 63 | User|Haiku: five\nseven\nfive 64 | User|Answer: 42 65 | MSFT|Transect|Version: 1.0.16 66 | ''' 67 | 68 | def setUp(self): 69 | GuanoFile.register('User', 'Answer', int) 70 | self.md = GuanoFile.from_string(self.MD) 71 | 72 | def test_get_namespaces(self): 73 | """Test that we can extract namespaces""" 74 | expected = {'GUANO', '', 'User', 'MSFT'} 75 | namespaces = set(self.md.get_namespaces()) 76 | self.assertSetEqual(expected, namespaces) 77 | 78 | def test_get_types(self): 79 | """Test multiple ways of requesting a namespaced value""" 80 | self.assertEqual(42, self.md['User|Answer']) 81 | self.assertEqual(42, self.md['User', 'Answer']) 82 | self.assertEqual(42, self.md.get('User|Answer')) 83 | 84 | def test_multiline(self): 85 | """Ensure multiline string `Note` is parsed as `\n` containing string""" 86 | self.assertEqual(3, len(self.md['Note'].splitlines())) 87 | 88 | def test_parse_timestamps(self): 89 | """Verify that we can at least parse all timestamp formats""" 90 | fmts = [ 91 | '2016-12-10T01:02:03', 92 | '2016-12-10T01:02:03.123', 93 | '2016-12-10T01:02:03.123456', 94 | 95 | '2016-12-10T01:02:03Z', 96 | '2016-12-10T01:02:03.123Z', 97 | '2016-12-10T01:02:03.123456Z', 98 | 99 | '2016-12-10T01:02:03-07:00', 100 | '2016-12-10T01:02:03.123-07:00', 101 | '2016-12-10T01:02:03.123456-07:00', 102 | 103 | '2016-12-10 01:02:03', # bonus 104 | ] 105 | 106 | for fmt in fmts: 107 | try: 108 | ts = parse_timestamp(fmt) 109 | ts.isoformat() 110 | except Exception as e: 111 | self.fail('Failed parsing: %s %s' % (fmt, e)) 112 | 113 | def test_tzoffset(self): 114 | """Verify our UTC offset timezone support""" 115 | fmts = [ 116 | 7, 117 | -7, 118 | 7.0, 119 | -7.0, 120 | 121 | '07:00', 122 | '+07:00', 123 | '-07:00', 124 | 125 | '07', 126 | '+07', 127 | '-07', 128 | 129 | '0700', 130 | '+0700', 131 | '-0700', 132 | ] 133 | for fmt in fmts: 134 | tz = tzoffset(fmt) 135 | if abs(tz.utcoffset(None).total_seconds()/60/60) > 8: 136 | self.fail('Failed parsing UTC offset: %s %s' % (fmt, tz)) 137 | 138 | def test_tzoffset_nst(self): 139 | """Verify fractional tzoffset like Newfoundland NST""" 140 | offset = tzoffset('-02:30') # Newfoundland NST 141 | offset_hours = offset.utcoffset(None).total_seconds() / 60.0 / 60.0 142 | self.assertEqual(offset_hours, -2.5) 143 | 144 | def test_new_empty(self): 145 | """Verify that "new" GUANO file metadata is "falsey" but populated metadata is "truthy".""" 146 | g = GuanoFile('nonexistent_file.wav') 147 | self.assertFalse(g) 148 | self.assertFalse('GUANO|Version' in g) 149 | 150 | g['Foo'] = 'bar' 151 | self.assertTrue(g) 152 | self.assertTrue('GUANO|Version' in g) 153 | 154 | def test_delete_simple(self): 155 | """Verify that we can delete fields""" 156 | g = GuanoFile() 157 | g['Foo'] = 'xyz' 158 | self.assertTrue('Foo' in g) 159 | 160 | del g['Foo'] 161 | self.assertFalse('Foo' in g) 162 | 163 | try: 164 | del g['Foo'] 165 | self.fail('Deleting a deleted key should throw KeyError') 166 | except KeyError: 167 | pass 168 | 169 | def test_delete_namespaced(self): 170 | """Verify that we can delete namespaced fields""" 171 | g = GuanoFile() 172 | g['Foo|Bar'] = 'xyz' 173 | self.assertTrue('Foo|Bar' in g) 174 | self.assertTrue('Foo' in g.get_namespaces()) 175 | 176 | del g['Foo|Bar'] 177 | self.assertFalse('Foo|Bar' in g) 178 | self.assertFalse('Foo' in g.get_namespaces()) 179 | 180 | try: 181 | del g['Foo|Bar'] 182 | self.fail('Deleting a deleted key should throw KeyError') 183 | except KeyError: 184 | pass 185 | 186 | g['Foo|Bar1'] = 'xyz' 187 | g['Foo|Bar2'] = 'abc' 188 | del g['Foo|Bar1'] 189 | self.assertFalse('Foo|Bar1' in g) 190 | self.assertTrue('Foo|Bar2' in g) 191 | self.assertTrue('Foo' in g.get_namespaces()) 192 | 193 | 194 | class BadDataTest(unittest.TestCase): 195 | """ 196 | These are hacks that may go against the specification, done in the name of permissive reading. 197 | John Postel: "Be conservative in what you do, be liberal in what you accept from others." 198 | """ 199 | 200 | def test_sb41_bad_te(self): 201 | """SonoBat 4.1 "optional" TE value""" 202 | md = '''GUANO|Version: 1.0 203 | TE: 204 | ''' 205 | GuanoFile.from_string(md) 206 | 207 | def test_sb41_bad_key(self): 208 | """SonoBat 4.1 disembodied colon""" 209 | md = '''GUANO|Version: 1.0 210 | : 211 | ''' 212 | self.assertEqual(1, len(list(GuanoFile.from_string(md).items()))) 213 | 214 | def test_sb42_bad_timestamp(self): 215 | """SonoBat 4.2 blank timestamp""" 216 | md = '''GUANO|Version: 1.0 217 | Timestamp: 218 | ''' 219 | GuanoFile.from_string(md) 220 | 221 | def test_sb42_bad_encoding(self): 222 | """SonoBat 4.2 doesn't actually encode as UTF-8. At least try not to blow up when reading.""" 223 | # SonoBat *probably* uses mac-roman on OS X and windows-1252 on Windows... in the US at least. 224 | md = b'GUANO|Version: 1.0\nNote: Mobile transect with mic 4\xd5 above roof.\n\x00\x00' 225 | GuanoFile.from_string(md) 226 | 227 | def test_sb42_bad_guano_version(self): 228 | """Some version of SonoBat 4.2 writes a GUANO|Version of "1.0:" by accident.""" 229 | md = b'GUANO|Version: 1.0:\n1.0:\n' 230 | GuanoFile.from_string(md) 231 | 232 | def test_empty_values(self): 233 | """EMTouchPro (and probably others) writes field keys with empty values""" 234 | md = md = b'GUANO|Version: 1.0\nLoc Elevation:\n' 235 | GuanoFile.from_string(md) 236 | 237 | 238 | class StrictParsingTest(unittest.TestCase): 239 | """ 240 | Test our strict/lenient parsing modes. 241 | Note that we are always lenient for some types of "bad data", as in :class:BadDataTest above. 242 | """ 243 | 244 | def test_strict_mode(self): 245 | md = '''GUANO|Version: 1.0 246 | TE: no 247 | Loc Position: 10N 567288E 4584472N 248 | ''' 249 | try: 250 | GuanoFile.from_string(md, strict=True) 251 | self.fail('Expected to fail with strict=True') 252 | except ValueError as e: 253 | pass 254 | g = GuanoFile.from_string(md, strict=False) 255 | self.assertEqual(g.get('TE', None), 'no') 256 | self.assertEqual(g.get('Loc Position', None), '10N 567288E 4584472N') 257 | 258 | 259 | if __name__ == '__main__': 260 | unittest.main() 261 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Unit tests for our extra utility scripts 4 | """ 5 | 6 | from __future__ import print_function 7 | 8 | import sys 9 | import os 10 | import os.path 11 | import unittest 12 | from itertools import chain 13 | 14 | from guano import GuanoFile 15 | 16 | bin_path = os.path.normpath(os.path.join(os.path.abspath(__file__), '..', '..', 'bin')) 17 | sys.path.insert(0, bin_path) 18 | import sb2guano 19 | import wamd2guano 20 | from guano_edit import GuanoTemplate 21 | 22 | 23 | 24 | class WamdTest(unittest.TestCase): 25 | 26 | def test_timestamps(self): 27 | for val in [ 28 | b'2014-04-02 22:59:14-05:00', 29 | b'2014-04-02 22:59:14.000', 30 | b'2014-04-02 22:59:14', 31 | ]: 32 | ts = wamd2guano._parse_wamd_timestamp(val) 33 | 34 | def test_gps(self): 35 | for val in [ 36 | b'WGS84, 41.713889, N, 121.508333, W', 37 | b'WGS84, 41.713889, N, 121.508333, W , 4200', 38 | b'WGS84, 41.713889, -21.508333', 39 | b'WGS84, 41.713889, -21.508333, 4200', 40 | ]: 41 | lat, lon, alt = wamd2guano._parse_wamd_gps(val) 42 | 43 | 44 | class SonoBatTest(unittest.TestCase): 45 | 46 | def test_ar125(self): 47 | md = 'MMMMMMMMM(#25000#)<&10&>[!250!]DEV=AR125RevA,DC=Off,UTC=2011:04:17::04:25:49.089,LTB=420,CMT=MMMMMMMMM' 48 | md = sb2guano._parse_sonobat_metadata(md) 49 | #print(md) 50 | # TODO: parse the AR125 specific fields out separately 51 | 52 | 53 | class GuanoEditTest(unittest.TestCase): 54 | 55 | def setUp(self): 56 | self.g = g = GuanoFile() 57 | g['A'] = 'A value' 58 | g['Foo Bar'] = 'Foo Bar value' 59 | g['NS|C'] = 'C value' 60 | g['NS|Foo Bar'] = 'Namespaced Foo Bar value' 61 | 62 | def test_template_1(self): 63 | s = GuanoTemplate('${A}').substitute(self.g) 64 | self.assertEqual(s, 'A value') 65 | 66 | def test_template_2(self): 67 | s = GuanoTemplate('${Foo Bar}').substitute(self.g) 68 | self.assertEqual(s, 'Foo Bar value') 69 | 70 | def test_template_3(self): 71 | s = GuanoTemplate('${NS|C}').substitute(self.g) 72 | self.assertEqual(s, 'C value') 73 | 74 | def test_template_4(self): 75 | s = GuanoTemplate('${NS|Foo Bar}').substitute(self.g) 76 | self.assertEqual(s, 'Namespaced Foo Bar value') 77 | 78 | def test_template_fail(self): 79 | try: 80 | GuanoTemplate('${DOES NOT EXIST}').substitute(self.g) 81 | self.fail('Expected failure with KeyError for nonexistent template substitution key') 82 | except KeyError: 83 | pass 84 | 85 | def test_well_known(self): 86 | # pretend this is an exhaustive list of well-known fields! 87 | # FIXME: because these are class attributes, we "accumulate" fields within the unit testing process 88 | keys = set(chain(GuanoFile._coersion_rules.keys(), GuanoFile._serialization_rules.keys())) 89 | for key in keys: 90 | self.g[key] = key 91 | s = GuanoTemplate('${'+key+'}').substitute(self.g) 92 | self.assertEqual(s, key) 93 | 94 | 95 | if __name__ == '__main__': 96 | unittest.main() 97 | --------------------------------------------------------------------------------