├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md └── workflows │ └── python-package.yml ├── .gitignore ├── LICENSE ├── MANIFEST ├── README.md ├── examples ├── README.txt ├── example1.py └── example2.py ├── pympi ├── Elan.py ├── Praat.py └── __init__.py ├── setup.cfg ├── setup.py ├── test ├── .gitignore ├── EAFv2.8.xsd ├── EAFv3.0.xsd ├── conftest.py ├── sample_2.7.eaf ├── sample_2.8.eaf ├── sample_3.0.eaf ├── test_elan.py └── test_praat.py └── tox.ini /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Expected behaviour** 11 | Please provide the exact means of reproducing the bug 12 | 13 | **Actual behaviour** 14 | What did you expect? 15 | 16 | **System information** 17 | - python version: 18 | - os: 19 | - are you up to date with the latest master?: 20 | 21 | **Additional context** 22 | Add any other context about the problem here. 23 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: [3.7, 3.8, 3.9] 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install .[test] 27 | - name: Test with pytest 28 | run: | 29 | pytest 30 | 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Distribution files 2 | bin 3 | build 4 | dist 5 | 6 | # Virtual env files 7 | include 8 | lib 9 | lib64 10 | pyvenv.cfg 11 | 12 | # Test files or temporary data files 13 | *.pfsx 14 | *.pyc 15 | 16 | # Vim files 17 | *~ 18 | *.swp 19 | 20 | # Pyenv 21 | .python-version 22 | 23 | 24 | # Python egg 25 | *.egg-info 26 | 27 | .coverage 28 | .tox/ 29 | .idea/ 30 | 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Mart Lubbers 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | setup.cfg 3 | setup.py 4 | pympi/Elan.py 5 | pympi/Praat.py 6 | pympi/__init__.py 7 | test/test_elan.py 8 | test/test_praat.py 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pympi version 1.70.2 2 | ==================== 3 | ### Introduction 4 | Pympi is a package that allows you to interact with [Elan][1] files and [TextGrid][2] (regular, short and binary) files. 5 | You can create, edit and convert both formats into each other. 6 | It includes besides all the basic functions also functions for: 7 | - Calculating gaps and overlaps between speakers conform [Heldner and Edlund's method][3]. (Could be used to calculate floor transfers) 8 | - Shift annotations in both directions (Could be used when due to an error all annotations are misaligned). 9 | - Import from CLAN's chat files. 10 | - Merge and or filter tiers (could be used to combine hands in gesture coding) 11 | - Move tiers between elan files. 12 | - Etc. 13 | 14 | ### Requirements 15 | None 16 | 17 | ### Optional requirements 18 | - [lxml][4] is used for testing. 19 | 20 | ### Documentation and downloads 21 | Full api documentation of the current and old versions can be found on [here][5]. 22 | 23 | Pypi repository location can be found [here][6]. 24 | 25 | ### Installation 26 | #### Automatic 27 | - From a shell run with administrator rights: 28 | ```Shell 29 | pip install pympi-ling 30 | ``` 31 | - Or alternatively run with administrator rights: 32 | ```Shell 33 | easy_install pympi-ling 34 | ``` 35 | 36 | *NOTE: on windows the executable might not be in $PATH.* 37 | 38 | #### Manual 39 | 1. Download the latest version from [pypi][5] 40 | 2. Untar the file 41 | 3. From that directory run with administrator rights 42 | ```Shell 43 | python setup.py install 44 | ``` 45 | 46 | ### How to cite 47 | ```tex 48 | @misc{pympi-1.70, 49 | author={Lubbers, Mart and Torreira, Francisco}, 50 | title={pympi-ling: a {Python} module for processing {ELAN}s {EAF} and {Praat}s {TextGrid} annotation files.}, 51 | howpublished={\url{https://pypi.python.org/pypi/pympi-ling}}, 52 | year={2013-2021}, 53 | note={Version 1.70} 54 | } 55 | ``` 56 | 57 | ### Authors 58 | Mart Lubbers (mart at martlubbers.net) 59 | and 60 | Francisco Toreirra (francisco.torreira at mpi.nl) 61 | 62 | and with contributions from: 63 | sarpu, hadware, thomaskisler, mome, mimrock and xrotwang 64 | 65 | [1]: https://tla.mpi.nl/tools/tla-tools/elan/ 66 | [2]: http://www.fon.hum.uva.nl/praat/ 67 | [3]: http://www.sciencedirect.com/science/article/pii/S0095447010000628 68 | [4]: http://lxml.de/ 69 | [5]: http://dopefishh.github.io/pympi/ 70 | [6]: https://pypi.python.org/pypi/pympi-ling/ 71 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | This directory contains some example scripts that can be freely adapted to 2 | create your own scripts or serve as inspiration. 3 | 4 | Example 1. 5 | This script makes a word frequency list of all the orthography tiers present 6 | in the elan files in a corpus. 7 | 8 | Example 2. 9 | This script calculates the gaps and the overlaps between two signers that are 10 | transcribed with different hands as different tiers. 11 | -------------------------------------------------------------------------------- /examples/example1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import glob # Import glob to easily loop over files 5 | import pympi # Import pympi to work with elan files 6 | import string # Import string to get the punctuation data 7 | 8 | # Define some variables for later use 9 | corpus_root = '/home/frobnicator/corpora/corpus_1' 10 | output_file = '{}/word_frequencies.txt'.format(corpus_root) 11 | ort_tier_names = ['spkA', 'spkB', 'spkC'] 12 | 13 | # Initialize the frequency dictionary 14 | frequency_dict = {} 15 | 16 | # Loop over all elan files the corpusroot subdirectory called eaf 17 | for file_path in glob.glob('{}/eaf/*.eaf'.format(corpus_root)): 18 | # Initialize the elan file 19 | eafob = pympi.Elan.Eaf(file_path) 20 | # Loop over all the defined tiers that contain orthography 21 | for ort_tier in ort_tier_names: 22 | # If the tier is not present in the elan file spew an error and 23 | # continue. This is done to avoid possible KeyErrors 24 | if ort_tier not in eafob.get_tier_names(): 25 | print 'WARNING!!!' 26 | print 'One of the ortography tiers is not present in the elan file' 27 | print 'namely: {}. skipping this one...'.format(ort_tier) 28 | # If the tier is present we can loop through the annotation data 29 | else: 30 | for annotation in eafob.get_annotation_data_for_tier(ort_tier): 31 | # We are only interested in the utterance 32 | utterance = annotation[2] 33 | # Split, by default, splits on whitespace thus separating words 34 | words = utterance.split() 35 | # For every word increment the frequency 36 | for word in words: 37 | # Remove the possible punctuation 38 | for char in string.punctuation: 39 | word = word.replace(char, '') 40 | # Convert to lowercase 41 | word = word.lower() 42 | # Increment the frequency, using the get method we can 43 | # avoid KeyErrors and make sure the word is added when it 44 | # wasn't present in the frequency dictionary 45 | frequency_dict[word] = frequency_dict.get(word, 0) + 1 46 | 47 | # Open an output file to write the data to 48 | with open(output_file, 'w') as output_file: 49 | # Loop throught the words with their frequencies, we do this sorted because 50 | # the file will then be more easily searchable 51 | for word, frequency in sorted(frequency_dict.items()): 52 | # We write the output separated by tabs 53 | output_file.write('{}\t{}\n'.format(word, frequency)) 54 | -------------------------------------------------------------------------------- /examples/example2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import pympi # Import pympi to work with elan files 5 | 6 | # Specify the file path 7 | elan_file_path = '/home/frobnicator/corpus/sign/file1.eaf' 8 | 9 | # Initialize the elan file 10 | eaf = pympi.Elan.Eaf(elan_file_path) 11 | # Merge both hands for speaker 1 12 | eaf.merge_tiers(['spk1L', 'spk1R'], 'spk1', 80) 13 | # Merge both hands for speaker 2 14 | eaf.merge_tiers(['spk2L', 'spk2R'], 'spk2', 80) 15 | # Create gaps and overlaps tier called ftos with a maximum length of 5000ms and 16 | # using the fast method 17 | eaf.create_gaps_and_overlaps_tier('spk1', 'spk2', 'ftos', 5000, True) 18 | # Write the results to file with the _fto suffix 19 | eaf.to_file(elan_file_path.replace('.eaf', '_fto.eaf')) 20 | -------------------------------------------------------------------------------- /pympi/Praat.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import re 3 | import struct 4 | 5 | VERSION = '1.70.2' 6 | 7 | 8 | class TextGrid: 9 | """Read write and edit Praat's TextGrid files. 10 | 11 | .. note:: All times are in seconds and can have decimals 12 | 13 | :var float xmin: Minimum x value. 14 | :var float xmax: Maximum x value. 15 | :var int tier_num: Number of tiers. 16 | :var list tiers: Internal (unsorted) list of tiers. 17 | :var str codec: Codec of the input file. 18 | """ 19 | def __init__(self, file_path=None, xmin=0, xmax=None, codec='utf-8'): 20 | """Construct either a new TextGrid object or read one from a 21 | file/stream. When you create an empty TextGrid you must at least 22 | specify the xmax. When you want to load a TextGrid from file you need 23 | to specify at least the file_path and optionally the codec. Binary, 24 | short and normal TextGrids are supported. 25 | 26 | :param str file_path: Path to read from, - for stdin. If ``None`` an 27 | empty TextGrid will be created. 28 | :param int xmin: Xmin value, only needed when not loading from file. 29 | :param int xmax: Xmax value, needed when not loading from file. 30 | :param str codec: Text encoding for the input. Note that this will be 31 | ignored for binary TextGrids. 32 | :raises Exception: If filepath is not specified but no xmax 33 | """ 34 | self.tiers = [] 35 | self.codec = codec 36 | if not file_path: 37 | if xmax is None: 38 | raise Exception('No xmax specified') 39 | self.tier_num = 0 40 | self.xmin = xmin 41 | self.xmax = xmax 42 | else: 43 | with open(file_path, 'rb') as f: 44 | self.from_file(f, codec) 45 | 46 | def from_file(self, ifile, codec='ascii'): 47 | """Read textgrid from stream. 48 | 49 | :param file ifile: Stream to read from. 50 | :param str codec: Text encoding for the input. Note that this will be 51 | ignored for binary TextGrids. 52 | """ 53 | if ifile.read(12) == b'ooBinaryFile': 54 | def bin2str(ifile): 55 | textlen = struct.unpack('>h', ifile.read(2))[0] 56 | # Single byte characters 57 | if textlen >= 0: 58 | return ifile.read(textlen).decode('ascii') 59 | # Multi byte characters have initial len -1 and then \xff bytes 60 | elif textlen == -1: 61 | textlen = struct.unpack('>h', ifile.read(2))[0] 62 | data = ifile.read(textlen*2) 63 | charlist = (data[i:i+2] for i in range(0, len(data), 2)) 64 | return ''.join( 65 | chr(struct.unpack('>h', i)[0]) for i in charlist) 66 | 67 | ifile.read(ord(ifile.read(1))) # skip oo type 68 | self.xmin = struct.unpack('>d', ifile.read(8))[0] 69 | self.xmax = struct.unpack('>d', ifile.read(8))[0] 70 | ifile.read(1) # skip 71 | self.tier_num = struct.unpack('>i', ifile.read(4))[0] 72 | for i in range(self.tier_num): 73 | tier_type = ifile.read(ord(ifile.read(1))).decode('ascii') 74 | name = bin2str(ifile) 75 | tier = Tier(0, 0, name=name, tier_type=tier_type) 76 | self.tiers.append(tier) 77 | tier.xmin = struct.unpack('>d', ifile.read(8))[0] 78 | tier.xmax = struct.unpack('>d', ifile.read(8))[0] 79 | nint = struct.unpack('>i', ifile.read(4))[0] 80 | for i in range(nint): 81 | x1 = struct.unpack('>d', ifile.read(8))[0] 82 | if tier.tier_type == 'IntervalTier': 83 | x2 = struct.unpack('>d', ifile.read(8))[0] 84 | text = bin2str(ifile) 85 | if tier.tier_type == 'IntervalTier': 86 | tier.intervals.append((x1, x2, text)) 87 | elif tier.tier_type == 'TextTier': 88 | tier.intervals.append((x1, text)) 89 | else: 90 | raise Exception('Tiertype does not exist.') 91 | else: 92 | 93 | ifile.seek(0) 94 | 95 | line_list = ifile.read().decode(codec).splitlines(keepends = True) 96 | line_index = 0 97 | 98 | def next_line(): 99 | 100 | nonlocal line_index 101 | 102 | if line_index >= len(line_list): 103 | raise StopIteration 104 | 105 | line = line_list[line_index] 106 | line_index += 1 107 | 108 | return line 109 | 110 | regfloat = re.compile(r'([\d.]+)\s*$', flags = re.UNICODE) 111 | regint = re.compile(r'([\d]+)\s*$', flags = re.UNICODE) 112 | regstr = re.compile(r'^[^"]*"((?:""|[^"])*)"\s*$', flags = re.UNICODE | re.DOTALL) 113 | 114 | def parse_float(): 115 | 116 | return float(regfloat.search(next_line()).group(1)) 117 | 118 | def parse_int(): 119 | 120 | return int(regint.search(next_line()).group(1)) 121 | 122 | def parse_str(): 123 | 124 | line_str = next_line() 125 | 126 | while True: 127 | 128 | try: 129 | return regstr.search(line_str).group(1).replace('""', '"') 130 | except AttributeError: 131 | pass 132 | 133 | line_str += next_line() 134 | 135 | # Skip the Headers and empty line 136 | next_line(), next_line(), next_line() 137 | self.xmin = parse_float() 138 | self.xmax = parse_float() 139 | # Skip 140 | line = next_line() 141 | short = line.strip() == '' 142 | self.tier_num = parse_int() 143 | not short and next_line() 144 | for i in range(self.tier_num): 145 | not short and next_line() # skip item[]: and item[\d]: 146 | tier_type = parse_str() 147 | name = parse_str() 148 | tier = Tier(0, 0, name=name, tier_type=tier_type) 149 | self.tiers.append(tier) 150 | tier.xmin = parse_float() 151 | tier.xmax = parse_float() 152 | for i in range(parse_int()): 153 | not short and next_line() # skip intervals [\d] 154 | x1 = parse_float() 155 | if tier.tier_type == 'IntervalTier': 156 | x2 = parse_float() 157 | t = parse_str() 158 | tier.intervals.append((x1, x2, t)) 159 | elif tier.tier_type == 'TextTier': 160 | t = parse_str() 161 | tier.intervals.append((x1, t)) 162 | 163 | def sort_tiers(self, key=lambda x: x.name): 164 | """Sort the tiers given the key. Example key functions: 165 | 166 | Sort according to the tiername in a list: 167 | 168 | ``lambda x: ['name1', 'name2' ... 'namen'].index(x.name)``. 169 | 170 | Sort according to the number of annotations: 171 | 172 | ``lambda x: len(list(x.get_intervals()))`` 173 | 174 | :param func key: A key function. Default sorts alphabetically. 175 | """ 176 | self.tiers.sort(key=key) 177 | 178 | def add_tier(self, name, tier_type='IntervalTier', number=None): 179 | """Add an IntervalTier or a TextTier on the specified location. 180 | 181 | :param str name: Name of the tier, duplicate names is allowed. 182 | :param str tier_type: Type of the tier. 183 | :param int number: Place to insert the tier, when ``None`` the number 184 | is generated and the tier will be placed on the bottom. 185 | :returns: The created tier. 186 | :raises ValueError: If the number is out of bounds. 187 | """ 188 | if number is None: 189 | number = 1 if not self.tiers else len(self.tiers)+1 190 | elif number < 1 or number > len(self.tiers): 191 | raise ValueError('Number not in [1..{}]'.format(len(self.tiers))) 192 | elif tier_type not in Tier.P_TIERS: 193 | raise ValueError('tier_type has to be in {}'.format(Tier.P_TIERS)) 194 | self.tiers.insert(number-1, 195 | Tier(self.xmin, self.xmax, name, tier_type)) 196 | return self.tiers[number-1] 197 | 198 | def remove_tier(self, name_num): 199 | """Remove a tier, when multiple tiers exist with that name only the 200 | first is removed. 201 | 202 | :param name_num: Name or number of the tier to remove. 203 | :type name_num: int or str 204 | :raises IndexError: If there is no tier with that number. 205 | """ 206 | if isinstance(name_num, int): 207 | del(self.tiers[name_num-1]) 208 | else: 209 | self.tiers = [i for i in self.tiers if i.name != name_num] 210 | 211 | def get_tier(self, name_num): 212 | """Gives a tier, when multiple tiers exist with that name only the 213 | first is returned. 214 | 215 | :param name_num: Name or number of the tier to return. 216 | :type name_num: int or str 217 | :returns: The tier. 218 | :raises IndexError: If the tier doesn't exist. 219 | """ 220 | return self.tiers[name_num - 1] if isinstance(name_num, int) else\ 221 | [i for i in self.tiers if i.name == name_num][0] 222 | 223 | def change_tier_name(self, name_num, name2): 224 | """Changes the name of the tier, when multiple tiers exist with that 225 | name only the first is renamed. 226 | 227 | :param name_num: Name or number of the tier to rename. 228 | :type name_num: int or str 229 | :param str name2: New name of the tier. 230 | :raises IndexError: If the tier doesn't exist. 231 | """ 232 | self.get_tier(name_num).name = name2 233 | 234 | def get_tiers(self): 235 | """Give all tiers. 236 | 237 | :yields: All tiers 238 | """ 239 | for tier in self.tiers: 240 | yield tier 241 | 242 | def get_tier_name_num(self): 243 | """Give all tiers with their numbers. 244 | 245 | :yield: Enumerate of the form ``[(num1, tier1), ... (numn, tiern)]`` 246 | """ 247 | return enumerate((s.name for s in self.tiers), 1) 248 | 249 | def to_file(self, filepath, codec='utf-8', mode='normal'): 250 | """Write the object to a file. 251 | 252 | :param str filepath: Path of the fil. 253 | :param str codec: Text encoding. 254 | :param string mode: Flag to for write mode, possible modes: 255 | 'n'/'normal', 's'/'short' and 'b'/'binary' 256 | """ 257 | self.tier_num = len(self.tiers) 258 | if mode in ['binary', 'b']: 259 | with open(filepath, 'wb') as f: 260 | def writebstr(s): 261 | try: 262 | bstr = s.encode('ascii') 263 | except UnicodeError: 264 | f.write(b'\xff\xff') 265 | bstr = b''.join(struct.pack('>h', ord(c)) for c in s) 266 | f.write(struct.pack('>h', len(s))) 267 | f.write(bstr) 268 | 269 | f.write(b'ooBinaryFile\x08TextGrid') 270 | f.write(struct.pack('>d', self.xmin)) 271 | f.write(struct.pack('>d', self.xmax)) 272 | f.write(b'\x01') 273 | f.write(struct.pack('>i', self.tier_num)) 274 | for tier in self.tiers: 275 | f.write(chr(len(tier.tier_type)).encode('ascii')) 276 | f.write(tier.tier_type.encode('ascii')) 277 | writebstr(tier.name) 278 | f.write(struct.pack('>d', tier.xmin)) 279 | f.write(struct.pack('>d', tier.xmax)) 280 | ints = tier.get_all_intervals() 281 | f.write(struct.pack('>i', len(ints))) 282 | itier = tier.tier_type == 'IntervalTier' 283 | for c in ints: 284 | f.write(struct.pack('>d', c[0])) 285 | itier and f.write(struct.pack('>d', c[1])) 286 | writebstr(c[2 if itier else 1]) 287 | elif mode in ['normal', 'n', 'short', 's']: 288 | # py3.5 compat: codecs.open does not support pathlib.Path objects in py3.5. 289 | with codecs.open(str(filepath), 'w', codec) as f: 290 | short = mode[0] == 's' 291 | 292 | def wrt(indent, prefix, value, ff=''): 293 | indent = 0 if short else indent 294 | prefix = '' if short else prefix 295 | if value is not None or not short: 296 | s = u'{{}}{{}}{}\n'.format(ff) 297 | f.write(s.format(' '*indent, prefix, value)) 298 | 299 | f.write(u'File type = "ooTextFile"\n' 300 | u'Object class = "TextGrid"\n\n') 301 | wrt(0, u'xmin = ', self.xmin, '{:f}') 302 | wrt(0, u'xmax = ', self.xmax, '{:f}') 303 | wrt(0, u'tiers? ', u'', '{}') 304 | wrt(0, u'size = ', self.tier_num, '{:d}') 305 | wrt(0, u'item []:', None) 306 | for tnum, tier in enumerate(self.tiers, 1): 307 | wrt(4, u'item [{:d}]:'.format(tnum), None) 308 | wrt(8, u'class = ', tier.tier_type, '"{}"') 309 | wrt(8, u'name = ', tier.name, '"{}"') 310 | wrt(8, u'xmin = ', tier.xmin, '{:f}') 311 | wrt(8, u'xmax = ', tier.xmax, '{:f}') 312 | if tier.tier_type == 'IntervalTier': 313 | ints = tier.get_all_intervals() 314 | wrt(8, u'intervals: size = ', len(ints), '{:d}') 315 | for i, c in enumerate(ints): 316 | wrt(8, 'intervals [{:d}]:'.format(i+1), None) 317 | wrt(12, 'xmin = ', c[0], '{:f}') 318 | wrt(12, 'xmax = ', c[1], '{:f}') 319 | wrt(12, 'text = ', c[2].replace('"', '""'), '"{}"') 320 | elif tier.tier_type == 'TextTier': 321 | wrt(8, u'points: size = ', len(tier.intervals), '{:d}') 322 | for i, c in enumerate(tier.get_intervals()): 323 | wrt(8, 'points [{:d}]:'.format(i+1), None) 324 | wrt(12, 'number = ', c[0], '{:f}') 325 | wrt(12, 'mark = ', c[1].replace('"', '""'), '"{}"') 326 | else: 327 | raise Exception('Unknown mode') 328 | 329 | def to_eaf(self, skipempty=True, pointlength=0.1): 330 | """Convert the object to an pympi.Elan.Eaf object 331 | 332 | :param int pointlength: Length of respective interval from points in 333 | seconds 334 | :param bool skipempty: Skip the empty annotations 335 | :returns: :class:`pympi.Elan.Eaf` object 336 | :raises ImportError: If the Eaf module can't be loaded. 337 | :raises ValueError: If the pointlength is not strictly positive. 338 | """ 339 | from pympi.Elan import Eaf 340 | eaf_out = Eaf() 341 | if pointlength <= 0: 342 | raise ValueError('Pointlength should be strictly positive') 343 | for tier in self.get_tiers(): 344 | eaf_out.add_tier(tier.name) 345 | for ann in tier.get_intervals(True): 346 | if tier.tier_type == 'TextTier': 347 | ann = (ann[0], ann[0]+pointlength, ann[1]) 348 | if ann[2].strip() or not skipempty: 349 | eaf_out.add_annotation(tier.name, int(round(ann[0]*1000)), 350 | int(round(ann[1]*1000)), ann[2]) 351 | return eaf_out 352 | 353 | 354 | class Tier: 355 | """Class representing a TextGrid tier, either an Interval or TextTier 356 | 357 | :var str name: Name of the tier. 358 | :var list intervals: List of intervals where each interval is 359 | (start, [end,] value). 360 | :var str tier_type: Type of the tier('IntervalTier' or 'TextTier'). 361 | :var int xmin: Minimum x value. 362 | :var int xmax: Maximum x value. 363 | """ 364 | P_TIERS = {'IntervalTier', 'TextTier'} 365 | 366 | def __init__(self, xmin, xmax, name=None, tier_type=None): 367 | """Creates a tier, if lines is ``None`` a new tier is created. 368 | 369 | :param str name: Name of the tier. 370 | :param str tier_type: Type of the tier('IntervalTier' or 'TextTier'). 371 | :raises TierTypeException: If the tier type is unknown. 372 | """ 373 | self.intervals = [] 374 | self.name = name 375 | self.tier_type = tier_type 376 | self.xmin, self.xmax = xmin, xmax 377 | if tier_type not in self.P_TIERS: 378 | raise Exception('Tiertype does not exist.') 379 | 380 | def add_point(self, point, value, check=True): 381 | """Add a point to the TextTier 382 | 383 | :param int point: Time of the point. 384 | :param str value: Text of the point. 385 | :param bool check: Flag to check for overlap. 386 | :raises Exception: If overlap or wrong tiertype. 387 | """ 388 | if self.tier_type != 'TextTier': 389 | raise Exception('Tiertype must be TextTier.') 390 | if check and any(i for i in self.intervals if i[0] == point): 391 | raise Exception('No overlap is allowed') 392 | self.intervals.append((point, value)) 393 | 394 | def add_interval(self, begin, end, value, check=True): 395 | """Add an interval to the IntervalTier. 396 | 397 | :param float begin: Start time of the interval. 398 | :param float end: End time of the interval. 399 | :param str value: Text of the interval. 400 | :param bool check: Flag to check for overlap. 401 | :raises Exception: If overlap, begin > end or wrong tiertype. 402 | """ 403 | if self.tier_type != 'IntervalTier': 404 | raise Exception('Tiertype must be IntervalTier') 405 | if check: 406 | if any(i for i in self.intervals if begin < i[1] and end > i[0]): 407 | raise Exception('No overlap is allowed') 408 | if begin > end: 409 | raise Exception('Begin must be smaller then end') 410 | self.intervals.append((begin, end, value)) 411 | 412 | def remove_interval(self, time): 413 | """Remove an interval, if no interval is found nothing happens. 414 | 415 | :param int time: Time of the interval. 416 | :raises TierTypeException: If the tier is not a IntervalTier. 417 | """ 418 | if self.tier_type != 'IntervalTier': 419 | raise Exception('Tiertype must be IntervalTier.') 420 | self.intervals = [i for i in self.intervals 421 | if not(i[0] <= time and i[1] >= time)] 422 | 423 | def remove_point(self, time): 424 | """Remove a point, if no point is found nothing happens. 425 | 426 | :param int time: Time of the point. 427 | :raises TierTypeException: If the tier is not a TextTier. 428 | """ 429 | if self.tier_type != 'TextTier': 430 | raise Exception('Tiertype must be TextTier.') 431 | self.intervals = [i for i in self.intervals if i[0] != time] 432 | 433 | def get_intervals(self, sort=False): 434 | """Give all the intervals or points. 435 | 436 | :param bool sort: Flag for yielding the intervals or points sorted. 437 | :yields: All the intervals 438 | """ 439 | for i in sorted(self.intervals) if sort else self.intervals: 440 | yield i 441 | 442 | def clear_intervals(self): 443 | """Removes all the intervals in the tier""" 444 | self.intervals = [] 445 | 446 | def get_all_intervals(self): 447 | """Returns the true list of intervals including the empty intervals.""" 448 | ints = sorted(self.get_intervals(True)) 449 | if self.tier_type == 'IntervalTier': 450 | if not ints: 451 | ints.append((self.xmin, self.xmax, '')) 452 | else: 453 | if ints[0][0] > self.xmin: 454 | ints.insert(0, (self.xmin, ints[0][0], '')) 455 | if ints[-1][1] < self.xmax: 456 | ints.append((ints[-1][1], self.xmax, '')) 457 | p = ints[-1] 458 | for index, i in reversed(list(enumerate(ints[:-1], 1))): 459 | if p[0] - i[1] != 0: 460 | ints.insert(index, (i[1], p[0], '')) 461 | p = i 462 | return ints 463 | -------------------------------------------------------------------------------- /pympi/__init__.py: -------------------------------------------------------------------------------- 1 | # Import the packages 2 | from pympi.Praat import TextGrid 3 | from pympi.Elan import Eaf, eaf_from_chat 4 | 5 | __all__ = ['Praat', 'Elan', 'eaf_from_chat'] 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE 3 | 4 | [bdist_wheel] 5 | universal = 1 6 | 7 | [tool:pytest] 8 | minversion = 5 9 | testpaths = test 10 | addopts = --cov 11 | 12 | [easy_install] 13 | zip_ok = false 14 | 15 | [coverage:run] 16 | source = 17 | pympi 18 | test 19 | 20 | [coverage:report] 21 | show_missing = true 22 | 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from distutils.core import setup 5 | 6 | licence = 'MIT Licence' 7 | version = '1.70.2' 8 | 9 | setup(name='pympi-ling', 10 | version=version, 11 | description= 12 | 'A python module for processing ELAN and Praat annotation files', 13 | author='Mart Lubbers', 14 | long_description=r""" 15 | pympi version 1.70.2 16 | ==================== 17 | 18 | Introduction 19 | ------------ 20 | 21 | Pympi is a package that allows you to interact with `Elan`_ files and 22 | `TextGrid`_ files. You can create, edit and convert both formats into 23 | each other. It includes besides all the basic functions also functions 24 | for: - Calculating gaps and overlaps between speakers conform `Heldner 25 | and Edlund’s method`_. (Could be used to calculate floor transfers) - 26 | Shift annotations in both directions (Could be used when due to an error 27 | all annotations are misaligned). - Import from CLAN’s chat files. - 28 | Merge and or filter tiers (Could be used to combine hands in gesture 29 | coding) - Move tiers between elan files. - Etc. 30 | 31 | Requirements 32 | ------------ 33 | 34 | None 35 | 36 | Optional requirements 37 | --------------------- 38 | 39 | - `lxml`_ is used for testing. 40 | 41 | Documentation and downloads 42 | --------------------------- 43 | 44 | Full api documentation of the current and old versions can be found on 45 | `here`_. 46 | 47 | Pypi repository location can be found 48 | `here `__. 49 | 50 | Installation 51 | ------------ 52 | 53 | Automatic 54 | ~~~~~~~~~ 55 | 56 | - From a shell run with administrator rights: 57 | 58 | .. code:: shell 59 | 60 | pip install pympi-ling 61 | 62 | - Or alternatively run with administrator rights: 63 | 64 | .. code:: shell 65 | 66 | easy_install pympi-ling 67 | 68 | *NOTE: on windows the executable might not be in $PATH.* 69 | 70 | Manual 71 | ~~~~~~ 72 | 73 | 1. Download the latest version from `pypi`_ 74 | 2. Untar the file 75 | 3. From that directory run with administrator rights 76 | 77 | .. code:: shell 78 | 79 | python setup.py install 80 | 81 | How to cite 82 | ----------- 83 | 84 | .. code:: tex 85 | 86 | @misc{pympi-1.70, 87 | author={Lubbers, Mart and Torreira, Francisco}, 88 | title={pympi-ling: a Python module for processing ELANs EAF and Praats TextGrid annotation files.}, 89 | howpublished={\url{https://pypi.python.org/pypi/pympi-ling}}, 90 | year={2013-2021}, 91 | note={Version 1.70} 92 | } 93 | 94 | Authors 95 | ------- 96 | 97 | Mart Lubbers (mart at martlubbers.net) 98 | and 99 | Francisco Toreirra (francisco.torreira at mpi.nl) 100 | 101 | and with contributions from: 102 | sarpu, hadware, thomaskisler, mome, mimrock and xrotwang 103 | 104 | .. _Elan: https://tla.mpi.nl/tools/tla-tools/elan/ 105 | .. _TextGrid: http://www.fon.hum.uva.nl/praat/ 106 | .. _Heldner and Edlund’s method: http://www.sciencedirect.com/science/article/pii/S0095447010000628 107 | .. _lxml: http://lxml.de/ 108 | .. _here: http://dopefishh.github.io/pympi/ 109 | .. _pypi: http://dopefishh.github.io/pympi/""", 110 | author_email='mart@martlubbers.net', 111 | url='https://github.com/dopefishh/pympi', 112 | classifiers=['Development Status :: 5 - Production/Stable', 113 | 'Environment :: Console', 114 | 'Natural Language :: English', 115 | 'Operating System :: OS Independent', 116 | 'Programming Language :: Python :: 3.5', 117 | 'Programming Language :: Python :: 3.6', 118 | 'Programming Language :: Python :: 3.7', 119 | 'Programming Language :: Python :: 3.8', 120 | 'Programming Language :: Python :: 3.9', 121 | 'Topic :: Text Processing :: Linguistic'], 122 | packages=['pympi'], 123 | extras_require={ 124 | 'test': [ 125 | 'pytest>=5', 126 | 'pytest-mock', 127 | 'pytest-cov', 128 | 'coverage>=4.2', 129 | 'lxml', 130 | ], 131 | }) 132 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | sample_2.8_out.eaf 2 | -------------------------------------------------------------------------------- /test/EAFv2.8.xsd: -------------------------------------------------------------------------------- 1 | 2 | 52 | 53 | 54 | 55 | ELAN Annotation Format 56 | version 2.8 57 | May 2014 58 | Schema by Alexander Klassmann 17/01/03 59 | Adapted by Hennie Brugman, Han Sloetjes, Micha Hulsbosch 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | The entry id should be unique within the collection of entry elements 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | The Tier name/id should be unique within the collection 99 | of Tier elements 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | A Tier can be associated with a parent Tier by referring to an existing Tier id. 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | The Linguistic Type name/id should be unique within the collection 119 | of Linguistic Type elements 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | A Tier must refer to an existing Linguistic Type id. 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | The Controlled Vocabulary name/id should be unique within the 139 | collection of Controlled Vocabulary elements 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | A Linguistic Type can be associated with a Controlled Vocabulary by 149 | referring to an existing Controlled Vocabulary id. 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | The Lexicon Service name/id should be unique within the 160 | collection of Lexicon Service elements 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | A Linguistic Type can be associated with a Lexicon Service by 170 | referring to an existing Lexicon Service id. 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | A key and keyref pair to enforce that a previous annotation idref at least refers 183 | to an annotation id of a reference annotation. 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | Two key-keyref pairs to enforce that time slot references refer to the id of a time slot. 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | The ID of a language identifier, can be referred to by any element that 217 | needs a reference to a language identifier. 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | Reference from a value in a multilingual CV to a language identifier. 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | Reference from a description in a multilingual CV to a language identifier. 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | Reference from a tier to a language identifier, to indicate the (main) language recorded 245 | on that tier. 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | Reference from an individual alignable annotation to a language identifier. 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | Reference from an individual reference annotation to a language identifier. 264 | 265 | 266 | 267 | 268 | 269 | 282 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | This attribute is deprecated. Use MEDIA_DESCRIPTOR elements instead. 348 | 349 | Ignore 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | This is in fact a reference to the parent annotation. 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | A reference to an url of an external Controlled Vocabulary. 457 | Is intended to be mutually exclusive with a sequence of CV_ENTRY_ML elements. 458 | 459 | 460 | 461 | 462 | 463 | 473 | 474 | 475 | 476 | 477 | 478 | An entry in a multilingual controlled vocabulary, containing the values and the descriptions 479 | in multiple languages. 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | A controlled vocabulary entry value with a language attribute. 494 | This allows multilingual controlled vocabularies. It adds a language reference attribute 495 | compared to the mono-lingual cv entry element. 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | A description element with a language reference attribute. 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | A reference to the id of an ISO Data Category (url including id). 537 | 538 | 539 | 540 | 541 | 542 | 543 | A reference to an external (closed) Controlled Vocabulary (url). 544 | 545 | 546 | 547 | 548 | 549 | 550 | A reference to the id of an Entry in an external Controlled Vocabulary (id). 551 | 552 | 553 | 554 | 555 | 556 | 557 | A reference to the id of an entry in a lexicon (url, url+id or id) 558 | 559 | 560 | 561 | 562 | 563 | 564 | A reference or hyperlink to any type document (url) 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | The Language element containing a reference to a language name or (if possible persistent) definition. 590 | 591 | 592 | 593 | 594 | 595 | 596 | ISO-639-3 still seems to be the best choice for language codes and closest to persistent language ID's 597 | seem to be the http://cdb.iso.org/lg/... identifiers also used by the iso-language-639-3 component in 598 | the CLARIN ComponentRegistry? 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | The license element can be used to include license information in the eaf file itself. 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | The entry id should be unique within the 635 | collection of entry elements 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 661 | 662 | 663 | 686 | 701 | 702 | 703 | -------------------------------------------------------------------------------- /test/EAFv3.0.xsd: -------------------------------------------------------------------------------- 1 | 2 | 57 | 58 | 59 | 60 | ELAN Annotation Format 61 | version 3.0 62 | December 2016 63 | Schema by Alexander Klassmann 17/01/03 64 | Adapted by Hennie Brugman, Han Sloetjes, Micha Hulsbosch 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | The entry id should be unique within the collection of entry elements 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | The Tier name/id should be unique within the collection 105 | of Tier elements 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | A Tier can be associated with a parent Tier by referring to an existing Tier id. 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | The Linguistic Type name/id should be unique within the collection 125 | of Linguistic Type elements 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | A Tier must refer to an existing Linguistic Type id. 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | The Controlled Vocabulary name/id should be unique within the 145 | collection of Controlled Vocabulary elements 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | A Linguistic Type can be associated with a Controlled Vocabulary by 155 | referring to an existing Controlled Vocabulary id. 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | The Lexicon Service name/id should be unique within the 166 | collection of Lexicon Service elements 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | A Linguistic Type can be associated with a Lexicon Service by 176 | referring to an existing Lexicon Service id. 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | A key and keyref pair to enforce that a previous annotation idref at least refers 189 | to an annotation id of a reference annotation. 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | Two key-keyref pairs to enforce that time slot references refer to the id of a time slot. 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | The ID of a language identifier, can be referred to by any element that 223 | needs a reference to a language identifier. 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | Reference from a value in a multilingual CV to a language identifier. 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | Reference from a description in a multilingual CV to a language identifier. 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | Reference from a tier to a language identifier, to indicate the (main) language recorded 251 | on that tier. 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | Reference from an individual alignable annotation to a language identifier. 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | Reference from an individual reference annotation to a language identifier. 270 | 271 | 272 | 273 | 274 | 275 | 288 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | This attribute is deprecated. Use MEDIA_DESCRIPTOR elements instead. 422 | 423 | Ignore 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | This is in fact a reference to the parent annotation. 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | A reference to an url of an external Controlled Vocabulary. 530 | Is intended to be mutually exclusive with a sequence of CV_ENTRY_ML elements. 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | An entry in a multilingual controlled vocabulary, containing the values and the descriptions 541 | in multiple languages. 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | A controlled vocabulary entry value with a language attribute. 556 | This allows multilingual controlled vocabularies. It adds a language reference attribute 557 | compared to the mono-lingual cv entry element. 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | A description element with a language reference attribute. 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | A reference to the id of an ISO Data Category (url including id). 599 | 600 | 601 | 602 | 603 | 604 | 605 | A reference to an external (closed) Controlled Vocabulary (url). 606 | 607 | 608 | 609 | 610 | 611 | 612 | A reference to the id of an Entry in an external Controlled Vocabulary (id). 613 | 614 | 615 | 616 | 617 | 618 | 619 | A reference to the id of an entry in a lexicon (url, url+id or id) 620 | 621 | 622 | 623 | 624 | 625 | 626 | A reference or hyperlink to any type document (url) 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | The Language element containing a reference to a language name or (if possible persistent) definition. 652 | 653 | 654 | 655 | 656 | 657 | 658 | ISO-639-3 still seems to be the best choice for language codes and closest to persistent language ID's 659 | seem to be the http://cdb.iso.org/lg/... identifiers also used by the iso-language-639-3 component in 660 | the CLARIN ComponentRegistry? 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | The license element can be used to include license information in the eaf file itself. 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | A set containing referential links. 684 | A set can contain both cross-references and grouping referential links. 685 | Apart from an ID the set can have a meaningful, "friendly" name. 686 | A set can have an external reference, a language and a CV reference. 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | A cross reference is a referential link between two existing elements (REF1 and REF2). 707 | Each of these elements can be either an annotation or a referential link. 708 | Optionally the direction of the link can be specified. 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 732 | 733 | 734 | A referential element for grouping any number of existing elements (the REFS). 735 | Each element can be an annotation or a referential link. 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | Attributes common for both cross- and group references. 750 | Apart from an ID it is possible to associate a meaningful, "friendly" 751 | name to the link. Furthermore a link can have an external reference, a language and a 752 | CV entry reference and a type attribute. 753 | 754 | 755 | 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | An attribute that allows to specify the type of the cross- or group reference/link. 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | The entry id should be unique within the 788 | collection of entry elements 789 | 790 | 791 | 792 | 793 | 794 | 795 | 796 | 797 | 798 | 799 | 800 | 801 | 802 | 803 | 804 | 814 | 815 | 816 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import pytest 4 | 5 | 6 | @pytest.fixture 7 | def test_dir(): 8 | return pathlib.Path(__file__).parent 9 | -------------------------------------------------------------------------------- /test/test_elan.py: -------------------------------------------------------------------------------- 1 | from lxml import etree 2 | import pytest 3 | 4 | from pympi import Eaf 5 | import unittest 6 | 7 | 8 | class Elan(unittest.TestCase): 9 | def setUp(self): 10 | self.eaf = Eaf() 11 | 12 | def test_add_annotation(self): 13 | self.eaf.add_tier('tier1') 14 | self.eaf.add_annotation('tier1', 0, 1) 15 | self.assertEqual( 16 | sorted(self.eaf.get_annotation_data_for_tier('tier1')), 17 | [(0, 1, '')]) 18 | self.eaf.add_annotation('tier1', 1, 2, 'abc') 19 | self.assertEqual( 20 | sorted(self.eaf.get_annotation_data_for_tier('tier1')), 21 | sorted([(0, 1, ''), (1, 2, 'abc')])) 22 | self.assertRaises(KeyError, self.eaf.add_annotation, 't1', 0, 0) 23 | self.assertRaises(ValueError, 24 | self.eaf.add_annotation, 'tier1', 1, 1) 25 | self.assertRaises(ValueError, 26 | self.eaf.add_annotation, 'tier1', 2, 1) 27 | self.assertRaises(ValueError, 28 | self.eaf.add_annotation, 'tier1', -1, 1) 29 | self.eaf.add_tier('tier2') 30 | self.eaf.add_ref_annotation('tier2', 'tier1', 0, 'r1') 31 | self.assertRaises(ValueError, 32 | self.eaf.add_annotation, 'tier2', 0, 1) 33 | 34 | def test_add_controlled_vocabulary(self): 35 | self.eaf.add_controlled_vocabulary('cv1') 36 | self.eaf.add_controlled_vocabulary('cv2') 37 | self.eaf.add_controlled_vocabulary('cv3', 'er1') 38 | self.assertEqual(sorted(self.eaf.get_controlled_vocabulary_names()), 39 | ['cv1', 'cv2', 'cv3']) 40 | 41 | def test_add_cv_entry(self): 42 | self.eaf.add_controlled_vocabulary('cv1') 43 | self.eaf.add_language('eng') 44 | self.eaf.add_language('nld') 45 | self.eaf.add_cv_entry( 46 | 'cv1', 'cve1', [('H', 'eng', 'hold'), ('H', 'nld', None)]) 47 | self.assertEqual(self.eaf.get_cv_entries('cv1'), { 48 | 'cve1': ([('H', 'eng', 'hold'), ('H', 'nld', None)], None)}) 49 | self.eaf.add_cv_entry( 50 | 'cv1', 'cve2', [('S', 'eng', 'stroke'), ('S', 'nld', None)]) 51 | self.assertEqual(self.eaf.get_cv_entries('cv1'), { 52 | 'cve1': ([('H', 'eng', 'hold'), ('H', 'nld', None)], None), 53 | 'cve2': ([('S', 'eng', 'stroke'), ('S', 'nld', None)], None)}) 54 | self.assertRaises(KeyError, self.eaf.add_cv_entry, 'cv2', 'cve1', []) 55 | self.assertRaises(ValueError, self.eaf.add_cv_entry, 'cv1', 'cve1', 56 | [('H', 'spa', None)]) 57 | 58 | def test_add_cv_description(self): 59 | self.eaf.add_controlled_vocabulary('cv1') 60 | self.eaf.add_language('eng') 61 | self.eaf.add_language('nld') 62 | self.eaf.add_cv_description('cv1', 'eng', 'Gesture Phases') 63 | self.eaf.add_cv_description('cv1', 'nld', None) 64 | self.assertEqual(self.eaf.get_cv_descriptions('cv1'), [ 65 | ('eng', 'Gesture Phases'), ('nld', None)]) 66 | self.assertRaises(KeyError, self.eaf.add_cv_description, 'cv2', 'eng') 67 | self.assertRaises(ValueError, 68 | self.eaf.add_cv_description, 'cv1', 'spa', None) 69 | 70 | def test_add_external_ref(self): 71 | self.eaf.add_external_ref('er1', 'ecv', 'location') 72 | self.eaf.add_external_ref('er2', 'lexen_id', 'location2') 73 | self.assertEqual(sorted(self.eaf.get_external_ref_names()), 74 | ['er1', 'er2']) 75 | self.assertRaises(KeyError, self.eaf.add_external_ref, 'er1', 'a', '') 76 | 77 | def test_add_language(self): 78 | self.eaf.add_language('ru', 'RUS', 'YAWERTY (Phonetic)') 79 | self.eaf.add_language('en') 80 | self.assertEqual( 81 | self.eaf.get_languages(), 82 | {'ru': ('RUS', 'YAWERTY (Phonetic)'), 'en': (None, None)}) 83 | 84 | def test_add_lexicon_ref(self): 85 | self.eaf.add_lexicon_ref('id1', 'long name', 't1', 'url1', 'lid1', 86 | 'lname1') 87 | self.eaf.add_lexicon_ref('id2', 'long name', 't2', 'url1', 'lid1', 88 | 'lname1', 'dc1', 'dc1') 89 | self.assertEqual(sorted(self.eaf.get_lexicon_ref_names()), 90 | ['id1', 'id2']) 91 | self.assertEqual(self.eaf.get_lexicon_ref('id1'), { 92 | 'DATCAT_ID': None, 'NAME': 'long name', 'DATCAT_NAME': None, 'URL': 93 | 'url1', 'LEX_REF_ID': 'id1', 'LEXICON_NAME': 'lname1', 'TYPE': 94 | 't1', 'LEXICON_ID': 'lid1'}) 95 | self.assertEqual(self.eaf.get_lexicon_ref('id2'), { 96 | 'DATCAT_ID': 'dc1', 'NAME': 'long name', 'DATCAT_NAME': 'dc1', 97 | 'URL': 'url1', 'LEX_REF_ID': 'id2', 'LEXICON_NAME': 'lname1', 98 | 'TYPE': 't2', 'LEXICON_ID': 'lid1'}) 99 | 100 | def test_add_license(self): 101 | self.eaf.add_license('k1', 'v1') 102 | self.eaf.add_license('k2', 'v2') 103 | self.assertEqual(self.eaf.get_licenses(), [ 104 | ('k1', 'v1'), ('k2', 'v2')]) 105 | 106 | def test_add_linguistic_type(self): 107 | self.eaf.add_linguistic_type('l1') 108 | self.eaf.add_linguistic_type('l2', 'Time_Subdivision', False, True) 109 | self.assertEqual( 110 | self.eaf.linguistic_types['l1'], { 111 | 'CONSTRAINTS': None, 'TIME_ALIGNABLE': 'true', 112 | 'LINGUISTIC_TYPE_ID': 'l1', 'GRAPHIC_REFERENCES': 'false'}) 113 | self.assertEqual( 114 | self.eaf.linguistic_types['l2'], { 115 | 'CONSTRAINTS': 'Time_Subdivision', 'TIME_ALIGNABLE': 'false', 116 | 'LINGUISTIC_TYPE_ID': 'l2', 'GRAPHIC_REFERENCES': 'true'}) 117 | self.eaf.add_linguistic_type('l3', param_dict={ 118 | 'CONSTRAINTS': 'Time_Subdivision', 'TIME_ALIGNABLE': 'false', 119 | 'LINGUISTIC_TYPE_ID': 'l2', 'GRAPHIC_REFERENCES': 'true'}) 120 | self.assertEqual(self.eaf.get_parameters_for_linguistic_type('l3'), { 121 | 'CONSTRAINTS': 'Time_Subdivision', 'TIME_ALIGNABLE': 'false', 122 | 'LINGUISTIC_TYPE_ID': 'l2', 'GRAPHIC_REFERENCES': 'true'}) 123 | 124 | self.assertRaises(KeyError, self.eaf.add_linguistic_type, 'l2', 'a') 125 | 126 | def test_add_linked_file(self): 127 | self.eaf.add_linked_file('/some/file/path/test.wav') 128 | self.assertEqual(self.eaf.media_descriptors[0]['MIME_TYPE'], 129 | 'audio/x-wav') 130 | self.eaf.add_linked_file('/some/file/path/test.mpg', 131 | './test.mpg', time_origin=5, ex_from='ef') 132 | self.assertEqual(self.eaf.media_descriptors[1]['MIME_TYPE'], 133 | 'video/mpeg') 134 | self.assertEqual(self.eaf.media_descriptors[1]['RELATIVE_MEDIA_URL'], 135 | './test.mpg') 136 | self.assertEqual(self.eaf.media_descriptors[1]['TIME_ORIGIN'], 5) 137 | self.assertEqual(self.eaf.media_descriptors[1]['EXTRACTED_FROM'], 'ef') 138 | 139 | self.eaf.add_linked_file('/some/file/path/test.wierd', 140 | mimetype='none/wierd') 141 | self.assertEqual(self.eaf.media_descriptors[2]['MIME_TYPE'], 142 | 'none/wierd') 143 | 144 | self.assertRaises(KeyError, self.eaf.add_linked_file, '/test.wierd') 145 | 146 | def test_add_locale(self): 147 | self.eaf.add_locale('ru', 'RUS', 'YAWERTY (Phonetic)') 148 | self.eaf.add_locale('en') 149 | self.assertEqual( 150 | self.eaf.get_locales(), 151 | {'ru': ('RUS', 'YAWERTY (Phonetic)'), 'en': (None, None)}) 152 | 153 | def test_add_property(self): 154 | self.eaf.add_property('k1', 'v1') 155 | self.eaf.add_property('k2', 'v2') 156 | self.assertEqual(self.eaf.get_properties(), [ 157 | ('lastUsedAnnotation', 0), ('k1', 'v1'), ('k2', 'v2')]) 158 | 159 | def test_add_ref_annotation(self): 160 | self.eaf.add_tier('p1') 161 | self.eaf.add_linguistic_type('c', 'Symbolic_Association') 162 | self.eaf.add_tier('a1', 'c', 'p1') 163 | self.eaf.add_annotation('p1', 0, 1000, 'a1') 164 | self.eaf.add_annotation('p1', 1000, 2000, 'a2') 165 | self.eaf.add_annotation('p1', 3000, 4000, 'a3') 166 | self.eaf.add_ref_annotation('a1', 'p1', 500, 'ref1') 167 | self.eaf.add_ref_annotation('a1', 'p1', 3000) 168 | self.assertEqual( 169 | sorted([(3000, 4000, '', 'a3'), (0, 1000, 'ref1', 'a1')]), 170 | sorted(self.eaf.get_ref_annotation_data_for_tier('a1'))) 171 | 172 | self.assertRaises(ValueError, 173 | self.eaf.add_ref_annotation, 'p1', 'a1', 0, 'r1') 174 | self.assertRaises(ValueError, self.eaf.add_ref_annotation, 'a1', 175 | 'p1', 2500, 'r') 176 | self.assertRaises(KeyError, 177 | self.eaf.add_ref_annotation, 'aa', 'bb', 0, 'r1') 178 | 179 | def test_add_secondary_linked_file(self): 180 | self.eaf.add_secondary_linked_file('/some/file/path/test.wav') 181 | self.assertEqual(self.eaf.linked_file_descriptors[0]['MIME_TYPE'], 182 | 'audio/x-wav') 183 | self.eaf.add_secondary_linked_file( 184 | '/some/file/path/test.mpg', './test.mpg', 185 | time_origin=5, assoc_with='ef') 186 | self.assertEqual(self.eaf.linked_file_descriptors[1]['MIME_TYPE'], 187 | 'video/mpeg') 188 | self.assertEqual( 189 | self.eaf.linked_file_descriptors[1]['RELATIVE_LINK_URL'], 190 | './test.mpg') 191 | self.assertEqual(self.eaf.linked_file_descriptors[1]['TIME_ORIGIN'], 5) 192 | self.assertEqual( 193 | self.eaf.linked_file_descriptors[1]['ASSOCIATED_WITH'], 'ef') 194 | 195 | self.eaf.add_secondary_linked_file('/some/file/path/test.wierd', 196 | mimetype='none/wierd') 197 | self.assertEqual(self.eaf.linked_file_descriptors[2]['MIME_TYPE'], 198 | 'none/wierd') 199 | 200 | self.assertRaises(KeyError, 201 | self.eaf.add_secondary_linked_file, '/test.wierd') 202 | 203 | def test_add_tier(self): 204 | self.eaf.add_locale('ru') 205 | self.eaf.add_language('RUS') 206 | self.assertEqual(len(self.eaf.get_tier_names()), 1) 207 | self.eaf.add_tier('tier1', 'default-lt', locale='ru', language='RUS') 208 | self.assertEqual(len(self.eaf.get_tier_names()), 2) 209 | self.assertEqual( 210 | self.eaf.get_parameters_for_tier('tier1')['LINGUISTIC_TYPE_REF'], 211 | 'default-lt') 212 | self.assertEqual( 213 | self.eaf.get_parameters_for_tier('tier1')['DEFAULT_LOCALE'], 214 | 'ru') 215 | self.assertEqual( 216 | self.eaf.get_parameters_for_tier('tier1')['LANG_REF'], 'RUS') 217 | 218 | self.eaf.add_tier('tier2', 'non-existing-linguistic-type') 219 | self.assertEqual(len(self.eaf.get_tier_names()), 3) 220 | self.assertEqual( 221 | self.eaf.get_parameters_for_tier('tier2')['LINGUISTIC_TYPE_REF'], 222 | 'default-lt') 223 | self.assertEqual(['default', 'tier1', 'tier2'], 224 | sorted(self.eaf.get_tier_names())) 225 | 226 | self.eaf.add_tier('tier3', None, 'tier1', 'en', 'person', 'person2') 227 | self.assertEqual(self.eaf.get_parameters_for_tier('tier3'), { 228 | 'ANNOTATOR': 'person2', 'DEFAULT_LOCALE': None, 229 | 'LINGUISTIC_TYPE_REF': 'default-lt', 'PARENT_REF': 'tier1', 230 | 'LANG_REF': None, 'PARTICIPANT': 'person', 'TIER_ID': 'tier3'}) 231 | 232 | self.eaf.add_tier('tier4', tier_dict={ 233 | 'ANNOTATOR': 'person2', 'DEFAULT_LOCALE': 'en', 234 | 'LINGUISTIC_TYPE_REF': 'default-lt', 'PARENT_REF': 'tier1', 235 | 'PARTICIPANT': 'person', 'TIER_ID': 'tier4', 'LANG_ID': 'RUS'}) 236 | self.assertEqual(self.eaf.get_parameters_for_tier('tier4'), { 237 | 'ANNOTATOR': 'person2', 'DEFAULT_LOCALE': 'en', 238 | 'LINGUISTIC_TYPE_REF': 'default-lt', 'PARENT_REF': 'tier1', 239 | 'PARTICIPANT': 'person', 'TIER_ID': 'tier4', 'LANG_ID': 'RUS'}) 240 | 241 | for tier in ['tier1', 'tier2', 'tier3']: 242 | self.assertEqual(self.eaf.tiers[tier][0], {}) 243 | self.assertEqual(self.eaf.tiers[tier][1], {}) 244 | 245 | self.assertRaises(ValueError, self.eaf.add_tier, '') 246 | 247 | def test_clean_time_slots(self): 248 | self.eaf.add_tier('tier1') 249 | self.eaf.add_tier('tier2') 250 | self.eaf.add_annotation('tier1', 0, 1, 'a1') 251 | self.eaf.add_annotation('tier1', 1000, 2000, 'a2') 252 | self.eaf.add_annotation('tier1', 2000, 3000, 'a3') 253 | self.eaf.add_annotation('tier1', 3000, 4000, 'a4') 254 | ts = [x for x in self.eaf.timeslots] 255 | self.eaf.remove_annotation('tier1', 1500, False) 256 | self.assertEqual(len(ts), len(self.eaf.timeslots)) 257 | self.eaf.clean_time_slots() 258 | self.assertEqual(len(ts)-2, len(self.eaf.timeslots)) 259 | 260 | def test_copy_tier(self): 261 | self.eaf.add_tier('test1') 262 | self.eaf.add_annotation('test1', 0, 100, 'a') 263 | self.eaf.add_annotation('test1', 100, 200, 'a') 264 | self.eaf.add_tier('test2') 265 | self.eaf.add_annotation('test2', 0, 100, 'a') 266 | self.eaf.add_annotation('test2', 100, 200, 'a') 267 | target = Eaf() 268 | self.eaf.copy_tier(target, 'test2') 269 | self.assertEqual(sorted(target.get_parameters_for_tier('test2')), 270 | sorted(self.eaf.get_parameters_for_tier('test2'))) 271 | self.assertEqual( 272 | sorted(target.get_annotation_data_for_tier('test2')), 273 | sorted(self.eaf.get_annotation_data_for_tier('test2'))) 274 | 275 | def test_create_gaps_and_overlaps_tier(self): 276 | self.eaf.add_tier('t1') 277 | self.eaf.add_tier('t2') 278 | # Pause 279 | self.eaf.add_annotation('t1', 0, 1000) 280 | self.eaf.add_annotation('t1', 1200, 2000) 281 | # Gap 282 | self.eaf.add_annotation('t2', 2200, 3000) 283 | # Overlap 284 | self.eaf.add_annotation('t1', 2800, 4000) 285 | # Exact fto 286 | self.eaf.add_annotation('t2', 4000, 5000) 287 | # Within overlap 288 | self.eaf.add_annotation('t1', 4200, 4800) 289 | # Long pause 290 | self.eaf.add_annotation('t2', 14800, 15000) 291 | # Long gap 292 | self.eaf.add_annotation('t1', 20000, 20500) 293 | self.eaf.create_gaps_and_overlaps_tier('t1', 't2') 294 | self.eaf.create_gaps_and_overlaps_tier('t1', 't2', 'tt', 3000) 295 | self.assertEqual( 296 | sorted(self.eaf.get_annotation_data_for_tier('t1_t2_ftos')), 297 | [(1001, 1199, 'P1_t1'), (2001, 2199, 'G12_t1_t2'), 298 | (2800, 3000, 'O21_t2_t1'), (4200, 4800, 'W21_t2_t1'), 299 | (5001, 14799, 'P2_t2'), (15001, 19999, 'G21_t2_t1')]) 300 | self.assertEqual( 301 | sorted(self.eaf.get_annotation_data_for_tier('tt')), 302 | [(1001, 1199, 'P1_t1'), (2001, 2199, 'G12_t1_t2'), 303 | (2800, 3000, 'O21_t2_t1'), (4200, 4800, 'W21_t2_t1')]) 304 | self.assertEqual( 305 | sorted(self.eaf.get_annotation_data_for_tier('t1_t2_ftos') + 306 | [(4000, 4000, 'O12_t1_t2')]), 307 | list(self.eaf.get_gaps_and_overlaps('t1', 't2'))) 308 | self.assertEqual( 309 | sorted(self.eaf.get_annotation_data_for_tier('tt') + 310 | [(4000, 4000, 'O12_t1_t2')]), 311 | list(self.eaf.get_gaps_and_overlaps('t1', 't2', 3000))) 312 | 313 | def test_extract(self): 314 | self.eaf.add_tier('tier1') 315 | self.eaf.add_annotation('tier1', 0, 1000, 'a1') 316 | self.eaf.add_annotation('tier1', 1000, 2000, 'a2') 317 | self.eaf.add_annotation('tier1', 2000, 3000, 'a3') 318 | self.eaf.add_annotation('tier1', 3000, 4000, 'a4') 319 | self.eaf.add_tier('tier2') 320 | e1 = self.eaf.extract(1500, 2500) 321 | self.assertEqual(e1.adocument, self.eaf.adocument) 322 | self.assertEqual(e1.licenses, self.eaf.licenses) 323 | self.assertEqual(e1.header, self.eaf.header) 324 | self.assertEqual(e1.media_descriptors, self.eaf.media_descriptors) 325 | self.assertEqual(e1.linked_file_descriptors, 326 | self.eaf.linked_file_descriptors) 327 | self.assertEqual(e1.linguistic_types, self.eaf.linguistic_types) 328 | self.assertEqual(e1.locales, self.eaf.locales) 329 | self.assertEqual(e1.constraints, self.eaf.constraints) 330 | self.assertEqual(e1.controlled_vocabularies, 331 | self.eaf.controlled_vocabularies) 332 | self.assertEqual(e1.external_refs, self.eaf.external_refs) 333 | self.assertEqual(e1.lexicon_refs, self.eaf.lexicon_refs) 334 | self.assertEqual(e1.get_tier_names(), self.eaf.get_tier_names()) 335 | self.assertEqual(sorted(e1.get_annotation_data_for_tier('tier1')), 336 | [(1000, 2000, 'a2'), (2000, 3000, 'a3')]) 337 | e1 = self.eaf.extract(1000, 2000) 338 | self.assertEqual(sorted(e1.get_annotation_data_for_tier('tier1')), 339 | [(0, 1000, 'a1'), (1000, 2000, 'a2'), (2000, 3000, 'a3')]) 340 | e1 = self.eaf.extract(4001, 30000) 341 | self.assertEqual(sorted(e1.get_annotation_data_for_tier('tier1')), []) 342 | 343 | def test_filter_annotations(self): 344 | self.eaf.add_tier('tier1') 345 | self.eaf.add_annotation('tier1', 0, 1, '1') 346 | self.eaf.add_annotation('tier1', 1, 2, '2') 347 | self.eaf.add_annotation('tier1', 2, 3, '3') 348 | self.eaf.add_annotation('tier1', 3, 4, '4') 349 | self.eaf.add_annotation('tier1', 4, 5, 'a') 350 | self.eaf.add_annotation('tier1', 5, 6, 'b') 351 | self.eaf.add_annotation('tier1', 6, 7, 'c') 352 | self.eaf.add_annotation('tier1', 7, 8, 'd') 353 | 354 | # No in or exclude 355 | self.eaf.filter_annotations('tier1') 356 | self.assertEqual( 357 | sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')), 358 | sorted(self.eaf.get_annotation_data_for_tier('tier1'))) 359 | 360 | # Inclusion 361 | self.eaf.filter_annotations('tier1', filtin=['1', '2', '3']) 362 | self.assertEqual( 363 | sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')), 364 | sorted([(0, 1, '1'), (2, 3, '3'), (1, 2, '2')])) 365 | self.eaf.filter_annotations('tier1', filtin=['[123]'], regex=True) 366 | self.assertEqual( 367 | sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')), 368 | sorted([(0, 1, '1'), (2, 3, '3'), (1, 2, '2')])) 369 | 370 | # Exclusion 371 | self.eaf.filter_annotations('tier1', filtex=['1', '2', '3', '4']) 372 | self.assertEqual( 373 | sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')), 374 | sorted([(4, 5, 'a'), (6, 7, 'c'), (5, 6, 'b'), (7, 8, 'd')])) 375 | self.eaf.filter_annotations('tier1', filtex=['[1234]'], regex=True) 376 | self.assertEqual( 377 | sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')), 378 | sorted([(4, 5, 'a'), (6, 7, 'c'), (5, 6, 'b'), (7, 8, 'd')])) 379 | 380 | # Combination 381 | self.eaf.filter_annotations('tier1', filtin=['1', '2', '3', '4'], 382 | filtex=['1', '2']) 383 | self.assertEqual( 384 | sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')), 385 | sorted([(2, 3, '3'), (3, 4, '4')])) 386 | self.eaf.filter_annotations('tier1', tier_name='t', filtin=['[1234]'], 387 | filtex=['[12]'], regex=True) 388 | self.assertEqual( 389 | sorted(self.eaf.get_annotation_data_for_tier('t')), 390 | sorted([(2, 3, '3'), (3, 4, '4')])) 391 | 392 | self.assertRaises(KeyError, self.eaf.filter_annotations, 'a') 393 | 394 | def test_get_annotation_data_at_time(self): 395 | self.eaf.add_tier('tier1') 396 | self.eaf.add_annotation('tier1', 0, 1000, 'a1') 397 | self.eaf.add_annotation('tier1', 1000, 2000, 'a2') 398 | self.eaf.add_annotation('tier1', 2000, 3000, 'a3') 399 | self.assertEqual( 400 | sorted(self.eaf.get_annotation_data_at_time('tier1', 500)), 401 | [(0, 1000, 'a1')]) 402 | self.assertEqual( 403 | sorted(self.eaf.get_annotation_data_at_time('tier1', 1000)), 404 | sorted([(0, 1000, 'a1'), (1000, 2000, 'a2')])) 405 | self.assertEqual( 406 | sorted(self.eaf.get_annotation_data_at_time('tier1', 3001)), []) 407 | self.assertRaises(KeyError, 408 | self.eaf.get_annotation_data_at_time, 'tier2', 0) 409 | 410 | def test_get_annotation_data_after_time(self): 411 | self.eaf.add_tier('tier1') 412 | self.eaf.add_annotation('tier1', 500, 1000, 'a1') 413 | self.eaf.add_annotation('tier1', 2000, 3000, 'a2') 414 | self.eaf.add_annotation('tier1', 4000, 5000, 'a3') 415 | self.assertEqual( 416 | sorted(self.eaf.get_annotation_data_after_time('tier1', 3001)), 417 | [(4000, 5000, 'a3')]) 418 | self.assertEqual( 419 | sorted(self.eaf.get_annotation_data_after_time('tier1', 505)), 420 | [(500, 1000, 'a1')]) 421 | self.assertEqual( 422 | sorted(self.eaf.get_annotation_data_after_time('tier1', 5001)), 423 | []) 424 | self.assertRaises(KeyError, 425 | self.eaf.get_annotation_data_after_time, 'tier2', 0) 426 | 427 | def test_get_annotation_data_before_time(self): 428 | self.eaf.add_tier('tier1') 429 | self.eaf.add_annotation('tier1', 500, 1000, 'a1') 430 | self.eaf.add_annotation('tier1', 2000, 3000, 'a2') 431 | self.eaf.add_annotation('tier1', 4000, 5000, 'a3') 432 | self.assertEqual( 433 | sorted(self.eaf.get_annotation_data_before_time('tier1', 1001)), 434 | [(500, 1000, 'a1')]) 435 | self.assertEqual( 436 | sorted(self.eaf.get_annotation_data_before_time('tier1', 499)), 437 | []) 438 | self.assertEqual( 439 | sorted(self.eaf.get_annotation_data_before_time('tier1', 750)), 440 | [(500, 1000, 'a1')]) 441 | self.assertRaises(KeyError, 442 | self.eaf.get_annotation_data_before_time, 'tier2', 0) 443 | 444 | def test_get_annotation_data_between_times(self): 445 | self.eaf.add_tier('tier1') 446 | self.eaf.add_annotation('tier1', 0, 1000, 'a1') 447 | self.eaf.add_annotation('tier1', 1000, 2000, 'a2') 448 | self.eaf.add_annotation('tier1', 2000, 3000, 'a3') 449 | self.eaf.add_annotation('tier1', 3000, 4000, 'a4') 450 | self.assertEqual(sorted(self.eaf.get_annotation_data_between_times( 451 | 'tier1', 1500, 2500)), [(1000, 2000, 'a2'), (2000, 3000, 'a3')]) 452 | self.assertEqual(sorted(self.eaf.get_annotation_data_between_times( 453 | 'tier1', 1000, 2000)), [(0, 1000, 'a1'), 454 | (1000, 2000, 'a2'), (2000, 3000, 'a3')]) 455 | self.assertEqual(sorted(self.eaf.get_annotation_data_between_times( 456 | 'tier1', 4001, 30000)), []) 457 | self.assertRaises( 458 | KeyError, self.eaf.get_annotation_data_between_times, 'ter1', 0, 1) 459 | 460 | def test_get_annotation_data_for_tier(self): 461 | self.eaf.add_tier('tier1') 462 | self.eaf.add_annotation('tier1', 0, 1000, 'a1') 463 | self.eaf.add_annotation('tier1', 1000, 2000, 'a1') 464 | self.eaf.add_annotation('tier1', 2000, 3000, 'a1') 465 | self.assertEqual( 466 | sorted(self.eaf.get_annotation_data_for_tier('tier1')), 467 | sorted([(0, 1000, 'a1'), (2000, 3000, 'a1'), (1000, 2000, 'a1')])) 468 | self.assertRaises(KeyError, 469 | self.eaf.get_annotation_data_for_tier, 'tier2') 470 | 471 | def test_get_child_tiers_for(self): 472 | self.eaf.add_tier('parent1') 473 | self.eaf.add_tier('parent2') 474 | self.eaf.add_tier('child11', parent='parent1') 475 | self.eaf.add_tier('child12', parent='parent1') 476 | self.eaf.add_tier('child13', parent='parent1') 477 | self.eaf.add_tier('orphan21') 478 | self.eaf.add_tier('orphan22') 479 | self.eaf.add_tier('orphan23') 480 | self.assertEqual(sorted(self.eaf.child_tiers_for('parent1')), 481 | ['child11', 'child12', 'child13']) 482 | self.assertEqual(sorted(self.eaf.child_tiers_for('parent2')), []) 483 | self.assertRaises(KeyError, self.eaf.child_tiers_for, 'parent3') 484 | 485 | def test_get_full_time_interval(self): 486 | self.assertEqual(self.eaf.get_full_time_interval(), (0, 0)) 487 | self.eaf.add_tier('tier1') 488 | self.eaf.add_annotation('tier1', 100, 500, 'a') 489 | self.eaf.add_annotation('tier1', 500, 1000, 'b') 490 | self.assertEqual(self.eaf.get_full_time_interval(), (100, 1000)) 491 | 492 | def test_get_gaps_and_overlaps2(self): 493 | self.eaf.add_tier('t1') 494 | self.eaf.add_tier('t2') 495 | # Pause 496 | self.eaf.add_annotation('t1', 0, 1000) 497 | self.eaf.add_annotation('t1', 1200, 2000) 498 | # Gap 499 | self.eaf.add_annotation('t2', 2200, 3000) 500 | # Overlap 501 | self.eaf.add_annotation('t1', 2800, 4000) 502 | # Exact fto 503 | self.eaf.add_annotation('t2', 4000, 5000) 504 | # Within overlap 505 | self.eaf.add_annotation('t1', 4200, 4800) 506 | # Long pause 507 | self.eaf.add_annotation('t2', 14800, 15000) 508 | # Long gap 509 | self.eaf.add_annotation('t1', 20000, 20500) 510 | g1 = self.eaf.get_gaps_and_overlaps2('t1', 't2') 511 | g2 = self.eaf.get_gaps_and_overlaps2('t1', 't2', 3000) 512 | self.assertEqual(sorted(g1), [ 513 | (1000, 1200, 'P1'), (2000, 2200, 'G12'), (2800, 3000, 'O21'), 514 | (4200, 4800, 'W21'), (5000, 14800, 'P2'), (15000, 20000, 'G21')]) 515 | self.assertEqual(sorted(g2), [ 516 | (1000, 1200, 'P1'), (2000, 2200, 'G12'), 517 | (2800, 3000, 'O21'), (4200, 4800, 'W21')]) 518 | self.assertRaises(KeyError, list, 519 | self.eaf.get_gaps_and_overlaps2('2', '3')) 520 | 521 | def test_get_controlled_vocabulary_names(self): 522 | self.eaf.add_controlled_vocabulary('cv1') 523 | self.eaf.add_controlled_vocabulary('cv2') 524 | self.eaf.add_controlled_vocabulary('cv3', 'er1') 525 | self.assertEqual(sorted(self.eaf.get_controlled_vocabulary_names()), 526 | ['cv1', 'cv2', 'cv3']) 527 | 528 | def test_get_cv_entry(self): 529 | self.eaf.add_controlled_vocabulary('cv1') 530 | self.eaf.add_language('eng') 531 | self.eaf.add_language('nld') 532 | self.eaf.add_cv_entry( 533 | 'cv1', 'cve1', [('H', 'eng', 'hold'), ('H', 'nld', None)]) 534 | self.assertEqual(self.eaf.get_cv_entries('cv1'), { 535 | 'cve1': ([('H', 'eng', 'hold'), ('H', 'nld', None)], None)}) 536 | self.eaf.add_cv_entry( 537 | 'cv1', 'cve2', [('S', 'eng', 'stroke'), ('S', 'nld', None)]) 538 | self.assertEqual(self.eaf.get_cv_entries('cv1'), { 539 | 'cve1': ([('H', 'eng', 'hold'), ('H', 'nld', None)], None), 540 | 'cve2': ([('S', 'eng', 'stroke'), ('S', 'nld', None)], None)}) 541 | self.assertRaises(KeyError, self.eaf.get_cv_entries, 'cv2') 542 | 543 | def test_get_cv_descriptions(self): 544 | self.eaf.add_controlled_vocabulary('cv1') 545 | self.eaf.add_language('eng') 546 | self.eaf.add_language('nld') 547 | self.eaf.add_cv_description('cv1', 'eng', 'Gesture Phases') 548 | self.eaf.add_cv_description('cv1', 'nld', None) 549 | self.assertEqual(self.eaf.get_cv_descriptions('cv1'), [ 550 | ('eng', 'Gesture Phases'), ('nld', None)]) 551 | self.assertRaises(KeyError, self.eaf.get_cv_descriptions, 'cv2') 552 | 553 | def test_get_external_ref(self): 554 | self.eaf.add_external_ref('er1', 'ecv', 'location') 555 | self.eaf.add_external_ref('er2', 'lexen_id', 'location2') 556 | self.assertEqual(self.eaf.get_external_ref('er1'), ('ecv', 'location')) 557 | self.assertRaises(KeyError, self.eaf.get_external_ref, 'er3') 558 | 559 | def test_get_external_ref_names(self): 560 | self.assertEqual(sorted(self.eaf.get_external_ref_names()), 561 | []) 562 | self.eaf.add_external_ref('er1', 'ecv', 'location') 563 | self.eaf.add_external_ref('er2', 'lexen_id', 'location2') 564 | self.assertEqual(sorted(self.eaf.get_external_ref_names()), 565 | ['er1', 'er2']) 566 | 567 | def test_get_lexicon_ref(self): 568 | self.eaf.add_lexicon_ref('id1', 'long name', 't1', 'url1', 'lid1', 569 | 'lname1') 570 | self.eaf.add_lexicon_ref('id2', 'long name', 't2', 'url1', 'lid1', 571 | 'lname1', 'dc1', 'dc1') 572 | self.assertEqual(self.eaf.get_lexicon_ref('id1'), { 573 | 'DATCAT_ID': None, 'NAME': 'long name', 'DATCAT_NAME': None, 'URL': 574 | 'url1', 'LEX_REF_ID': 'id1', 'LEXICON_NAME': 'lname1', 'TYPE': 575 | 't1', 'LEXICON_ID': 'lid1'}) 576 | self.assertEqual(self.eaf.get_lexicon_ref('id2'), { 577 | 'DATCAT_ID': 'dc1', 'NAME': 'long name', 'DATCAT_NAME': 'dc1', 578 | 'URL': 'url1', 'LEX_REF_ID': 'id2', 'LEXICON_NAME': 'lname1', 579 | 'TYPE': 't2', 'LEXICON_ID': 'lid1'}) 580 | self.assertRaises(KeyError, self.eaf.get_lexicon_ref, 'id3') 581 | 582 | def test_get_lexicon_ref_names(self): 583 | self.assertEqual(sorted(self.eaf.get_lexicon_ref_names()), []) 584 | self.eaf.add_lexicon_ref('id1', 'long name', 't1', 'url1', 'lid1', 585 | 'lname1') 586 | self.eaf.add_lexicon_ref('id2', 'long name', 't2', 'url1', 'lid1', 587 | 'lname1', 'dc1', 'dc1') 588 | self.assertEqual(sorted(self.eaf.get_lexicon_ref_names()), 589 | ['id1', 'id2']) 590 | 591 | def test_get_languages(self): 592 | self.eaf.add_language('ru', 'RUS', 'YAWERTY (Phonetic)') 593 | self.eaf.add_language('en') 594 | self.assertEqual( 595 | self.eaf.get_languages(), 596 | {'ru': ('RUS', 'YAWERTY (Phonetic)'), 'en': (None, None)}) 597 | 598 | def test_get_licenses(self): 599 | self.eaf.add_license('k1', 'v1') 600 | self.eaf.add_license('k2', 'v2') 601 | self.eaf.add_license('k3', 'v3') 602 | self.eaf.add_license('k4', 'v4') 603 | self.eaf.add_license('k4', 'v5') 604 | self.assertEqual(self.eaf.get_licenses(), [ 605 | ('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3'), ('k4', 'v4'), 606 | ('k4', 'v5')]) 607 | 608 | def test_get_linguistic_types_names(self): 609 | self.assertEqual(sorted(self.eaf.get_linguistic_type_names()), 610 | ['default-lt']) 611 | self.eaf.add_linguistic_type('l1') 612 | self.eaf.add_linguistic_type('l2') 613 | self.eaf.add_linguistic_type('l3') 614 | self.assertEqual(sorted(self.eaf.get_linguistic_type_names()), 615 | ['default-lt', 'l1', 'l2', 'l3']) 616 | 617 | def test_get_linked_files(self): 618 | self.eaf.add_linked_file('/some/file/path/test.wav') 619 | self.eaf.add_linked_file('/some/file/path/test.mpg', './test.mpg', 620 | time_origin=5, ex_from='ef') 621 | self.assertEqual(self.eaf.get_linked_files(), 622 | self.eaf.media_descriptors) 623 | 624 | def test_get_locales(self): 625 | self.eaf.add_locale('ru', 'RUS', 'YAWERTY (Phonetic)') 626 | self.eaf.add_locale('en') 627 | self.assertEqual( 628 | self.eaf.get_locales(), 629 | {'ru': ('RUS', 'YAWERTY (Phonetic)'), 'en': (None, None)}) 630 | 631 | def test_get_parameters_for_tier(self): 632 | self.eaf.add_tier('tier1', 'default-lt', 'tier1', None, 'person', 633 | 'person2') 634 | self.eaf.add_tier('tier2') 635 | self.assertEqual(self.eaf.get_parameters_for_tier('tier1'), { 636 | 'ANNOTATOR': 'person2', 'DEFAULT_LOCALE': None, 'LANG_REF': None, 637 | 'LINGUISTIC_TYPE_REF': 'default-lt', 'PARENT_REF': 'tier1', 638 | 'PARTICIPANT': 'person', 'TIER_ID': 'tier1'}) 639 | self.assertEqual(self.eaf.get_parameters_for_tier('tier2'), { 640 | 'PARTICIPANT': None, 'DEFAULT_LOCALE': None, 641 | 'LINGUISTIC_TYPE_REF': 'default-lt', 'ANNOTATOR': None, 642 | 'LANG_REF': None, 'PARENT_REF': None, 'TIER_ID': 'tier2'}) 643 | 644 | def test_get_parameters_for_linguistic_type(self): 645 | self.eaf.add_tier('tier2') 646 | self.eaf.add_linguistic_type('l2', 'Time_Subdivision', False, True) 647 | self.assertEqual(self.eaf.get_parameters_for_linguistic_type('l2'), { 648 | 'CONSTRAINTS': 'Time_Subdivision', 'TIME_ALIGNABLE': 'false', 649 | 'LINGUISTIC_TYPE_ID': 'l2', 'GRAPHIC_REFERENCES': 'true'}) 650 | 651 | def test_get_properties(self): 652 | self.eaf.add_property('k1', 'v1') 653 | self.eaf.add_property('k2', 'v2') 654 | self.eaf.add_property('k3', 'v3') 655 | self.eaf.add_property('k4', 'v4') 656 | self.eaf.add_property('k4', 'v5') 657 | self.assertEqual(self.eaf.get_properties(), [ 658 | ('lastUsedAnnotation', 0), ('k1', 'v1'), ('k2', 'v2'), 659 | ('k3', 'v3'), ('k4', 'v4'), ('k4', 'v5')]) 660 | 661 | def test_get_ref_annotation_at_time(self): 662 | self.eaf.add_tier('p1') 663 | self.eaf.add_linguistic_type('c', 'Symbolic_Association') 664 | self.eaf.add_tier('a1', 'c', 'p1') 665 | self.eaf.add_annotation('p1', 0, 1000, 'a1') 666 | self.eaf.add_annotation('p1', 1000, 2000, 'a2') 667 | self.eaf.add_annotation('p1', 3000, 4000, 'a3') 668 | self.eaf.add_ref_annotation('a1', 'p1', 500, 'ref1') 669 | self.eaf.add_ref_annotation('a1', 'p1', 3000, 'ref2') 670 | self.assertEqual(self.eaf.get_ref_annotation_at_time('a1', 500), 671 | [(0, 1000, 'ref1', 'a1')]) 672 | self.assertEqual(self.eaf.get_ref_annotation_at_time('p1', 2500), []) 673 | self.assertRaises(KeyError, 674 | self.eaf.get_ref_annotation_at_time, 'eau', 0) 675 | 676 | def test_ref_get_annotation_data_after_time(self): 677 | pass 678 | 679 | def test_ref_get_annotation_data_before_time(self): 680 | pass 681 | 682 | def test_get_ref_annotation_data_between_times(self): 683 | self.eaf.add_tier('p1') 684 | self.eaf.add_linguistic_type('c', 'Symbolic_Association') 685 | self.eaf.add_tier('a1', 'c', 'p1') 686 | self.eaf.add_annotation('p1', 0, 1000, 'a1') 687 | self.eaf.add_annotation('p1', 1000, 2000, 'a2') 688 | self.eaf.add_annotation('p1', 3000, 4000, 'a3') 689 | self.eaf.add_ref_annotation('a1', 'p1', 500, 'ref1') 690 | self.eaf.add_ref_annotation('a1', 'p1', 3000, 'ref2') 691 | self.assertEqual(sorted(self.eaf.get_ref_annotation_data_between_times( 692 | 'a1', 500, 3500)), sorted([ 693 | (0, 1000, 'ref1', 'a1'), (3000, 4000, 'ref2', 'a3')])) 694 | self.assertRaises(KeyError, 695 | self.eaf.get_ref_annotation_data_between_times, 696 | 'eau', 0, 1) 697 | 698 | def test_get_ref_annotation_data_for_tier(self): 699 | self.eaf.add_tier('p1') 700 | self.eaf.add_linguistic_type('c', 'Symbolic_Association') 701 | self.eaf.add_tier('a1', 'c', 'p1') 702 | self.eaf.add_annotation('p1', 0, 1000, 'a1') 703 | self.eaf.add_annotation('p1', 1000, 2000, 'a2') 704 | self.eaf.add_annotation('p1', 3000, 4000, 'a3') 705 | self.eaf.add_ref_annotation('a1', 'p1', 500, 'ref1') 706 | self.eaf.add_ref_annotation('a1', 'p1', 3000) 707 | self.assertEqual( 708 | sorted([(3000, 4000, '', 'a3'), (0, 1000, 'ref1', 'a1')]), 709 | sorted(self.eaf.get_ref_annotation_data_for_tier('a1'))) 710 | self.assertRaises(KeyError, 711 | self.eaf.get_ref_annotation_data_for_tier, 'aaa') 712 | self.assertEqual(self.eaf.get_ref_annotation_data_for_tier('p1'), []) 713 | 714 | def test_get_secondary_linked_files(self): 715 | self.eaf.add_secondary_linked_file('/some/file/path/test.wav') 716 | self.eaf.add_secondary_linked_file( 717 | '/some/file/path/test.mpg', './test.mpg', time_origin=5, 718 | assoc_with='ef') 719 | self.assertEqual(self.eaf.get_secondary_linked_files(), 720 | self.eaf.linked_file_descriptors) 721 | 722 | def test_get_tier_ids_for_linguistic_type(self): 723 | self.eaf.add_linguistic_type('l1') 724 | self.eaf.add_linguistic_type('l2') 725 | self.eaf.add_tier('t1', 'l1') 726 | self.eaf.add_tier('t2', 'l2') 727 | self.eaf.add_tier('t3', 'l2') 728 | self.eaf.add_tier('t4', parent='t1') 729 | self.eaf.add_tier('t5', 'l1', parent='t1') 730 | self.eaf.add_tier('t6') 731 | self.assertEqual(sorted(self.eaf.get_tier_ids_for_linguistic_type( 732 | 'l1')), ['t1', 't5']) 733 | self.assertEqual(sorted(self.eaf.get_tier_ids_for_linguistic_type( 734 | 'l2')), ['t2', 't3']) 735 | self.assertEqual(sorted(self.eaf.get_tier_ids_for_linguistic_type( 736 | 'default-lt', 't1')), ['t4']) 737 | 738 | def test_get_tier_names(self): 739 | self.eaf.add_tier('tier1') 740 | self.eaf.add_tier('tier2') 741 | self.eaf.add_tier('tier3') 742 | self.eaf.add_tier('tier4') 743 | self.assertEqual(sorted(self.eaf.get_tier_names()), 744 | ['default', 'tier1', 'tier2', 'tier3', 'tier4']) 745 | 746 | def test_merge_tiers(self): 747 | self.eaf.add_tier('tier1') 748 | self.eaf.add_tier('tier2') 749 | self.eaf.add_tier('tier3') 750 | # Overlap 751 | self.eaf.add_annotation('tier1', 0, 1000, 'a1') 752 | self.eaf.add_annotation('tier2', 500, 1500, 'b1') 753 | 754 | # Gap 755 | self.eaf.add_annotation('tier1', 2000, 2500, 'a2') 756 | self.eaf.add_annotation('tier2', 3000, 4000, 'b2') 757 | 758 | # Within 759 | self.eaf.add_annotation('tier1', 5000, 6000, 'a3') 760 | self.eaf.add_annotation('tier2', 5100, 5900, 'b3') 761 | 762 | # Three 763 | self.eaf.add_annotation('tier1', 6050, 6250, 'c') 764 | self.eaf.add_annotation('tier1', 6250, 6500, 'c') 765 | self.eaf.add_annotation('tier1', 6500, 6750, 'c') 766 | self.eaf.add_annotation('tier3', 6100, 6800, 'd') 767 | 768 | # Gap of 5 ms 769 | self.eaf.add_annotation('tier1', 7000, 7995, 'a4') 770 | self.eaf.add_annotation('tier2', 8000, 9000, 'b4') 771 | 772 | self.eaf.merge_tiers(['tier1', 'tier2'], 'm_0') 773 | self.eaf.merge_tiers(['tier1'], 'm_a', 5) 774 | self.eaf.merge_tiers(['tier1', 'tier2'], 'm_5', 5) 775 | self.eaf.merge_tiers(['tier1', 'tier2'], 'm_6', 6) 776 | self.eaf.merge_tiers(['tier1', 'tier2', 'tier3'], 'mm') 777 | 778 | m0 = [(0, 1500, 'a1_b1'), (2000, 2500, 'a2'), (3000, 4000, 'b2'), 779 | (5000, 6000, 'a3_b3'), (6050, 6250, 'c'), (6250, 6500, 'c'), 780 | (6500, 6750, 'c'), (7000, 7995, 'a4'), (8000, 9000, 'b4')] 781 | m5 = [(0, 1500, 'a1_b1'), (2000, 2500, 'a2'), (3000, 4000, 'b2'), 782 | (5000, 6000, 'a3_b3'), (6050, 6750, 'c_c_c'), (7000, 7995, 'a4'), 783 | (8000, 9000, 'b4')] 784 | m6 = [(0, 1500, 'a1_b1'), (2000, 2500, 'a2'), (3000, 4000, 'b2'), 785 | (5000, 6000, 'a3_b3'), (6050, 6750, 'c_c_c'), 786 | (7000, 9000, 'a4_b4')] 787 | mm = [(0, 1500, 'a1_b1'), (2000, 2500, 'a2'), (3000, 4000, 'b2'), 788 | (5000, 6000, 'a3_b3'), (6050, 6800, 'c_d_c_c'), 789 | (7000, 7995, 'a4'), (8000, 9000, 'b4')] 790 | self.assertEqual( 791 | sorted(self.eaf.get_annotation_data_for_tier('m_0')), m0) 792 | self.assertEqual( 793 | sorted(self.eaf.get_annotation_data_for_tier('m_5')), m5) 794 | self.assertEqual( 795 | sorted(self.eaf.get_annotation_data_for_tier('m_6')), m6) 796 | self.assertEqual( 797 | sorted(self.eaf.get_annotation_data_for_tier('mm')), mm) 798 | self.assertRaises(KeyError, self.eaf.merge_tiers, ['a', 'b']) 799 | 800 | def test_remove_all_annotations_from_tier(self): 801 | self.eaf.add_tier('tier1') 802 | self.eaf.add_annotation('tier1', 0, 1000, 'a1') 803 | self.eaf.add_annotation('tier1', 1000, 2000, 'a2') 804 | self.eaf.add_annotation('tier1', 2000, 3000, 'a3') 805 | self.eaf.add_annotation('tier1', 3000, 4000, 'a4') 806 | self.eaf.remove_all_annotations_from_tier('tier1') 807 | self.assertEqual(self.eaf.get_annotation_data_for_tier('tier1'), []) 808 | 809 | def test_remove_annotation(self): 810 | self.eaf.add_tier('tier1') 811 | self.eaf.add_annotation('tier1', 0, 1000, 'a1') 812 | self.eaf.add_annotation('tier1', 1000, 2000, 'a2') 813 | self.eaf.add_annotation('tier1', 2000, 3000, 'a3') 814 | self.eaf.add_annotation('tier1', 3000, 4000, 'a4') 815 | self.assertEqual(self.eaf.remove_annotation('tier1', 500), 1) 816 | self.assertEqual( 817 | sorted(self.eaf.get_annotation_data_for_tier('tier1')), 818 | sorted([(1000, 2000, 'a2'), (2000, 3000, 'a3'), 819 | (3000, 4000, 'a4')])) 820 | 821 | self.assertEqual(self.eaf.remove_annotation('tier1', 2000), 2) 822 | self.assertEqual( 823 | sorted(self.eaf.get_annotation_data_for_tier('tier1')), 824 | sorted([(3000, 4000, 'a4')])) 825 | self.assertEqual( 826 | sorted(self.eaf.get_annotation_data_for_tier('tier1')), 827 | sorted([(3000, 4000, 'a4')])) 828 | self.assertRaises(KeyError, self.eaf.remove_annotation, 'tier2', 0) 829 | 830 | def test_remove_controlled_vocabulary(self): 831 | self.eaf.add_controlled_vocabulary('cv1') 832 | self.eaf.add_controlled_vocabulary('cv2') 833 | self.eaf.add_controlled_vocabulary('cv3', 'er1') 834 | self.eaf.remove_controlled_vocabulary('cv3') 835 | self.assertEqual(sorted(self.eaf.get_controlled_vocabulary_names()), 836 | ['cv1', 'cv2']) 837 | self.eaf.remove_controlled_vocabulary('cv1') 838 | self.assertEqual(sorted(self.eaf.get_controlled_vocabulary_names()), 839 | ['cv2']) 840 | self.assertRaises(KeyError, self.eaf.remove_controlled_vocabulary, 'c') 841 | 842 | def test_remove_cv_entry(self): 843 | self.eaf.add_controlled_vocabulary('cv1') 844 | self.eaf.add_language('eng') 845 | self.eaf.add_language('nld') 846 | self.eaf.add_cv_entry( 847 | 'cv1', 'cve1', [('H', 'eng', 'hold'), ('H', 'nld', None)]) 848 | self.eaf.add_cv_entry( 849 | 'cv1', 'cve2', [('S', 'eng', 'stroke'), ('S', 'nld', None)]) 850 | self.eaf.remove_cv_entry('cv1', 'cve1') 851 | self.assertEqual(self.eaf.get_cv_entries('cv1'), { 852 | 'cve2': ([('S', 'eng', 'stroke'), ('S', 'nld', None)], None)}) 853 | self.assertRaises(KeyError, self.eaf.remove_cv_entry, 'cv2', 'c') 854 | self.assertRaises(KeyError, self.eaf.remove_cv_entry, 'cv1', 'c') 855 | 856 | def test_remove_cv_description(self): 857 | self.eaf.add_controlled_vocabulary('cv1') 858 | self.eaf.add_language('eng') 859 | self.eaf.add_language('nld') 860 | self.eaf.add_cv_description('cv1', 'eng', 'Gesture Phases') 861 | self.eaf.add_cv_description('cv1', 'nld', None) 862 | self.assertEqual(self.eaf.get_cv_descriptions('cv1'), [ 863 | ('eng', 'Gesture Phases'), ('nld', None)]) 864 | self.assertRaises(KeyError, self.eaf.get_cv_descriptions, 'cv2') 865 | 866 | def test_remove_external_ref(self): 867 | self.eaf.add_external_ref('er1', 'ecv', 'location') 868 | self.eaf.add_external_ref('er2', 'lexen_id', 'location2') 869 | self.eaf.remove_external_ref('er1') 870 | self.assertEqual(sorted(self.eaf.get_external_ref_names()), ['er2']) 871 | 872 | def test_remove_language(self): 873 | self.eaf.add_language('ru', 'RUS', 'YAWERTY (Phonetic)') 874 | self.eaf.add_language('en') 875 | self.eaf.remove_language('ru') 876 | self.assertEqual(self.eaf.get_languages(), {'en': (None, None)}) 877 | self.assertRaises(KeyError, self.eaf.remove_language, 'ru') 878 | 879 | def test_remove_lexicon_ref(self): 880 | self.eaf.add_lexicon_ref('id1', 'long name', 't1', 'url1', 'lid1', 881 | 'lname1') 882 | self.eaf.add_lexicon_ref('id2', 'long name', 't2', 'url1', 'lid1', 883 | 'lname1', 'dc1', 'dc1') 884 | self.eaf.remove_lexicon_ref('id1') 885 | self.assertEqual(sorted(self.eaf.get_lexicon_ref_names()), 886 | ['id2']) 887 | self.assertRaises(KeyError, self.eaf.remove_lexicon_ref, 'i') 888 | 889 | def test_remove_license(self): 890 | self.eaf.add_license('k1', 'v1') 891 | self.eaf.add_license('k2', 'v2') 892 | self.eaf.add_license('k3', 'v3') 893 | self.eaf.add_license('k4', 'v4') 894 | self.eaf.add_license('k4', 'v5') 895 | self.eaf.remove_license('a1') 896 | self.assertEqual(self.eaf.get_licenses(), [ 897 | ('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3'), ('k4', 'v4'), 898 | ('k4', 'v5')]) 899 | self.eaf.remove_license('k1') 900 | self.assertEqual(self.eaf.get_licenses(), [ 901 | ('k2', 'v2'), ('k3', 'v3'), ('k4', 'v4'), ('k4', 'v5')]) 902 | self.eaf.remove_license(url='v2') 903 | self.assertEqual(self.eaf.get_licenses(), [ 904 | ('k3', 'v3'), ('k4', 'v4'), ('k4', 'v5')]) 905 | self.eaf.remove_license('k4') 906 | self.assertEqual(self.eaf.get_licenses(), [('k3', 'v3')]) 907 | self.eaf.remove_license() 908 | self.assertEqual(self.eaf.get_licenses(), []) 909 | 910 | def test_remove_linguistic_type(self): 911 | self.eaf.add_linguistic_type('l1') 912 | self.eaf.add_linguistic_type('l2') 913 | self.eaf.add_linguistic_type('l3') 914 | self.eaf.remove_linguistic_type('l2') 915 | self.assertEqual(sorted(self.eaf.get_linguistic_type_names()), 916 | ['default-lt', 'l1', 'l3']) 917 | self.assertRaises(KeyError, self.eaf.remove_linguistic_type, 'a') 918 | 919 | def test_remove_linked_files(self): 920 | self.eaf.add_linked_file('/some/file/path/test.wav', 921 | './test.wav', time_origin=5, ex_from='ef1') 922 | self.eaf.add_linked_file('/some/file/path/test2.wav', 923 | './test2.wav', time_origin=10, ex_from='ef2') 924 | self.eaf.add_linked_file('/some/file/path/test3.mpg', 925 | './test3.mpg', time_origin=15, ex_from='ef3') 926 | self.eaf.add_linked_file('/some/file/path/test4.mpg', 927 | './test4.mpg', time_origin=20, ex_from='ef3') 928 | self.eaf.remove_linked_files(mimetype='audio/x-wav') 929 | self.assertEqual(len(self.eaf.get_linked_files()), 2) 930 | self.eaf.remove_linked_files(ex_from='ef1') 931 | self.assertEqual(len(self.eaf.get_linked_files()), 2) 932 | self.eaf.remove_linked_files(file_path='/some/file/path/test4.mpg') 933 | self.assertEqual(len(self.eaf.get_linked_files()), 1) 934 | self.eaf.remove_linked_files(relpath='./test3.mpg') 935 | self.assertEqual(self.eaf.get_linked_files(), []) 936 | 937 | def test_remove_locale(self): 938 | self.eaf.add_locale('ru', 'RUS', 'YAWERTY (Phonetic)') 939 | self.eaf.add_locale('en') 940 | self.eaf.remove_locale('ru') 941 | self.assertEqual(self.eaf.get_locales(), {'en': (None, None)}) 942 | self.assertRaises(KeyError, self.eaf.remove_locale, 'ru') 943 | 944 | def test_remove_property(self): 945 | self.eaf.add_property('k1', 'v1') 946 | self.eaf.add_property('k2', 'v2') 947 | self.eaf.add_property('k3', 'v3') 948 | self.eaf.add_property('k4', 'v4') 949 | self.eaf.add_property('k4', 'v5') 950 | self.eaf.remove_property('a1') 951 | self.assertEqual(self.eaf.get_properties(), [ 952 | ('lastUsedAnnotation', 0), ('k1', 'v1'), ('k2', 'v2'), 953 | ('k3', 'v3'), ('k4', 'v4'), ('k4', 'v5')]) 954 | self.eaf.remove_property('k1') 955 | self.assertEqual(self.eaf.get_properties(), [ 956 | ('lastUsedAnnotation', 0), ('k2', 'v2'), ('k3', 'v3'), 957 | ('k4', 'v4'), ('k4', 'v5')]) 958 | self.eaf.remove_property(value='v2') 959 | self.assertEqual(self.eaf.get_properties(), [ 960 | ('lastUsedAnnotation', 0), ('k3', 'v3'), ('k4', 'v4'), 961 | ('k4', 'v5')]) 962 | self.eaf.remove_property('k4') 963 | self.assertEqual(self.eaf.get_properties(), [ 964 | ('lastUsedAnnotation', 0), ('k3', 'v3')]) 965 | self.eaf.remove_property() 966 | self.assertEqual(self.eaf.get_properties(), []) 967 | 968 | def test_remove_ref_annotation(self): 969 | self.eaf.add_tier('p1') 970 | self.eaf.add_linguistic_type('c', 'Symbolic_Association') 971 | self.eaf.add_tier('a1', 'c', 'p1') 972 | self.eaf.add_annotation('p1', 0, 1000, 'a1') 973 | self.eaf.add_annotation('p1', 1000, 2000, 'a2') 974 | self.eaf.add_annotation('p1', 3000, 4000, 'a3') 975 | self.eaf.add_ref_annotation('a1', 'p1', 500, 'ref1') 976 | self.eaf.add_ref_annotation('a1', 'p1', 3000) 977 | self.assertEqual( 978 | sorted([(3000, 4000, '', 'a3'), (0, 1000, 'ref1', 'a1')]), 979 | sorted(self.eaf.get_ref_annotation_data_for_tier('a1'))) 980 | self.eaf.remove_ref_annotation('a1', 500) 981 | self.assertEqual( 982 | sorted([(3000, 4000, '', 'a3')]), 983 | sorted(self.eaf.get_ref_annotation_data_for_tier('a1'))) 984 | self.assertRaises(KeyError, self.eaf.remove_ref_annotation, 'aa', 0) 985 | 986 | def test_remove_secondary_linked_files(self): 987 | self.eaf.add_secondary_linked_file( 988 | '/some/file/path/test.wav', './test.wav', time_origin=5, 989 | assoc_with='ef1') 990 | self.eaf.add_secondary_linked_file( 991 | '/some/file/path/test2.wav', './test2.wav', time_origin=10, 992 | assoc_with='ef2') 993 | self.eaf.add_secondary_linked_file( 994 | '/some/file/path/test3.mpg', './test3.mpg', time_origin=15, 995 | assoc_with='ef3') 996 | self.eaf.add_secondary_linked_file( 997 | '/some/file/path/test4.mpg', './test4.mpg', time_origin=20, 998 | assoc_with='ef3') 999 | self.eaf.remove_secondary_linked_files(mimetype='audio/x-wav') 1000 | self.assertEqual(len(self.eaf.get_secondary_linked_files()), 2) 1001 | self.eaf.remove_secondary_linked_files(assoc_with='ef1') 1002 | self.assertEqual(len(self.eaf.get_secondary_linked_files()), 2) 1003 | self.eaf.remove_secondary_linked_files( 1004 | file_path='/some/file/path/test4.mpg') 1005 | self.assertEqual(len(self.eaf.get_secondary_linked_files()), 1) 1006 | self.eaf.remove_secondary_linked_files(relpath='./test3.mpg') 1007 | self.assertEqual(self.eaf.get_secondary_linked_files(), []) 1008 | 1009 | def test_remove_tier(self): 1010 | self.eaf.add_tier('tier1') 1011 | self.eaf.add_tier('tier2') 1012 | self.eaf.add_tier('tier3') 1013 | self.eaf.add_tier('tier4') 1014 | self.eaf.remove_tier('tier1') 1015 | self.assertEqual(sorted(self.eaf.get_tier_names()), 1016 | ['default', 'tier2', 'tier3', 'tier4']) 1017 | self.assertRaises(KeyError, self.eaf.remove_tier, 'tier1') 1018 | 1019 | def test_remove_tiers(self): 1020 | self.eaf.add_tier('tier1') 1021 | self.eaf.add_tier('tier2') 1022 | self.eaf.add_tier('tier3') 1023 | self.eaf.add_tier('tier4') 1024 | self.eaf.remove_tiers(['default', 'tier4', 'tier1']) 1025 | self.assertEqual(sorted(self.eaf.get_tier_names()), ['tier2', 'tier3']) 1026 | self.assertRaises(KeyError, self.eaf.remove_tiers, ['tier1']) 1027 | self.eaf.remove_tiers(['tier2', 'tier3']) 1028 | self.assertEqual(sorted(self.eaf.get_tier_names()), []) 1029 | 1030 | def test_rename_tier(self): 1031 | self.eaf.add_tier('child', parent='default') 1032 | self.eaf.add_tier('test1') 1033 | self.eaf.add_tier('test2') 1034 | self.eaf.add_tier('test3') 1035 | self.eaf.add_tier('test4') 1036 | self.eaf.rename_tier('test1', 'test1a') 1037 | self.eaf.rename_tier('default', 'test5') 1038 | self.assertEqual(sorted(self.eaf.get_tier_names()), sorted([ 1039 | 'child', 'test1a', 'test2', 'test3', 'test4', 'test5'])) 1040 | self.assertEqual(sorted(self.eaf.child_tiers_for('test5')), 1041 | sorted(['child'])) 1042 | 1043 | def test_shift_annotations(self): 1044 | self.eaf.add_tier('tier1') 1045 | self.eaf.add_tier('tier2') 1046 | # Overlap 1047 | self.eaf.add_annotation('tier1', 0, 100, 'a1') 1048 | self.eaf.add_annotation('tier1', 1000, 2000, 'a2') 1049 | self.eaf.add_annotation('tier2', 500, 1500, 'b1') 1050 | self.eaf.add_annotation('tier2', 0, 150, 'b1') 1051 | d1 = self.eaf.get_annotation_data_for_tier('tier1') 1052 | d2 = self.eaf.get_annotation_data_for_tier('tier2') 1053 | self.eaf.shift_annotations(0) 1054 | self.assertEqual(d1, self.eaf.get_annotation_data_for_tier('tier1')) 1055 | self.assertEqual(d2, self.eaf.get_annotation_data_for_tier('tier2')) 1056 | 1057 | self.eaf.shift_annotations(100) 1058 | self.assertEqual(self.eaf.get_annotation_data_for_tier('tier1'), 1059 | [(x+100, y+100, v) for x, y, v in d1]) 1060 | self.assertEqual(self.eaf.get_annotation_data_for_tier('tier2'), 1061 | [(x+100, y+100, v) for x, y, v in d2]) 1062 | self.assertEqual(self.eaf.shift_annotations(-200), 1063 | ([('tier2', 100, 250, 'b1')], 1064 | [('tier1', 100, 200, 'a1')])) 1065 | 1066 | def test_to_textgrid(self): 1067 | self.eaf.remove_tier('default') 1068 | tg = self.eaf.to_textgrid() 1069 | self.assertEqual(list(tg.get_tier_name_num()), []) 1070 | self.eaf.add_tier('t1') 1071 | self.eaf.add_annotation('t1', 0, 100, 'a11') 1072 | self.eaf.add_annotation('t1', 100, 200, 'a21') 1073 | self.eaf.add_annotation('t1', 200, 300, 'a31') 1074 | self.eaf.add_annotation('t1', 300, 400, 'a41') 1075 | self.eaf.add_tier('t2') 1076 | self.eaf.add_annotation('t2', 0, 100, 'a12') 1077 | self.eaf.add_annotation('t2', 100, 200, 'a22') 1078 | self.eaf.add_annotation('t2', 200, 300, 'a32') 1079 | self.eaf.add_annotation('t2', 300, 400, 'a42') 1080 | self.eaf.add_tier('t3') 1081 | self.eaf.add_annotation('t3', 0, 100, 'a13') 1082 | self.eaf.add_annotation('t3', 100, 200, 'a23') 1083 | self.eaf.add_annotation('t3', 200, 300, 'a33') 1084 | self.eaf.add_annotation('t3', 300, 400, 'a43') 1085 | self.eaf.add_tier('t4') 1086 | self.eaf.add_annotation('t4', 0, 100, 'a14') 1087 | self.eaf.add_annotation('t4', 100, 200, 'a24') 1088 | self.eaf.add_annotation('t4', 200, 300, 'a34') 1089 | self.eaf.add_annotation('t4', 300, 400, 'a44') 1090 | self.eaf.add_tier('t5') 1091 | self.eaf.add_annotation('t5', 0, 100, 'a15') 1092 | self.eaf.add_annotation('t5', 100, 200, 'a25') 1093 | self.eaf.add_annotation('t5', 200, 300, 'a35') 1094 | self.eaf.add_annotation('t5', 300, 400, 'a45') 1095 | self.eaf.add_tier('t6') 1096 | self.eaf.add_annotation('t6', 0, 100, 'a16') 1097 | self.eaf.add_annotation('t6', 100, 200, 'a26') 1098 | self.eaf.add_annotation('t6', 200, 300, 'a36') 1099 | self.eaf.add_annotation('t6', 300, 400, 'a46') 1100 | tg = self.eaf.to_textgrid() 1101 | self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()), 1102 | ['t1', 't2', 't3', 't4', 't5', 't6']) 1103 | tg = self.eaf.to_textgrid(filtin=['t1', 't2', 't3']) 1104 | self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()), 1105 | ['t1', 't2', 't3']) 1106 | tg = self.eaf.to_textgrid(filtex=['t1', 't2', 't3']) 1107 | self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()), 1108 | ['t4', 't5', 't6']) 1109 | tg = self.eaf.to_textgrid(filtin=['t[123]'], regex=True) 1110 | self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()), 1111 | ['t1', 't2', 't3']) 1112 | tg = self.eaf.to_textgrid(filtex=['t[123]'], regex=True) 1113 | self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()), 1114 | ['t4', 't5', 't6']) 1115 | self.eaf.add_tier('t7') 1116 | tg = self.eaf.to_textgrid() 1117 | self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()), 1118 | ['t1', 't2', 't3', 't4', 't5', 't6', 't7']) 1119 | self.assertEqual(list(tg.get_tier('t1').get_intervals(sort=True)), 1120 | [(0.0, 0.1, 'a11'), (0.1, 0.2, 'a21'), 1121 | (0.2, 0.3, 'a31'), (0.3, 0.4, 'a41')]) 1122 | self.assertEqual(list(tg.get_tier('t7').get_intervals()), []) 1123 | 1124 | def test_add_nested_reference_annotations(self): 1125 | self.eaf.add_linguistic_type('refT') 1126 | self.eaf.add_linguistic_type('orthT', 'Symbolic_Association') 1127 | self.eaf.add_linguistic_type('wordT', 'Symbolic_Subdivision') 1128 | 1129 | self.eaf.add_tier('ref', ling='refT') 1130 | self.eaf.add_tier('orth', ling='orthT', parent='ref') 1131 | self.eaf.add_tier('word', ling='wordT', parent='orth') 1132 | 1133 | self.eaf.add_annotation('ref', 0, 1, 'test.001') 1134 | self.eaf.add_ref_annotation('orth', 'ref', 0, 'Words here.') 1135 | self.eaf.add_ref_annotation('word', 'orth', 0, 'Words') 1136 | 1137 | def test_parse_eaf(self): 1138 | pass 1139 | 1140 | def test_eaf_from_chat(self): 1141 | pass 1142 | 1143 | 1144 | @pytest.mark.parametrize( 1145 | 'eaf,schema', 1146 | [ 1147 | ('sample_2.8.eaf', 'EAFv2.8.xsd'), 1148 | ('sample_2.7.eaf', 'EAFv2.8.xsd'), 1149 | ('sample_3.0.eaf', 'EAFv3.0.xsd'), 1150 | ] 1151 | ) 1152 | def test_to_file_to_eaf(eaf, schema, test_dir, tmp_path): 1153 | filepath = str(tmp_path / 'test.eaf') 1154 | eaf = Eaf(str(test_dir / eaf)) 1155 | eaf.to_file(filepath) 1156 | 1157 | schema = etree.XMLSchema(etree.XML(test_dir.joinpath(schema).read_text(encoding='utf8'))) 1158 | xmlparser = etree.XMLParser(schema=schema) 1159 | etree.parse(str(filepath), xmlparser) 1160 | 1161 | 1162 | def test_to_textgrid(test_dir): 1163 | _ = Eaf(str(test_dir / 'sample_2.7.eaf')).to_textgrid() 1164 | -------------------------------------------------------------------------------- /test/test_praat.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import unittest 3 | import pytest 4 | 5 | from pympi.Praat import TextGrid 6 | 7 | 8 | class PraatTest(unittest.TestCase): 9 | def setUp(self): 10 | self.tg = TextGrid(xmax=20) 11 | self.maxdiff = None 12 | 13 | # Test all the Praat.TextGrid functions 14 | def test_sort_tiers(self): 15 | self.tg.add_tier('t2') 16 | self.tg.add_tier('t1') 17 | self.tg.add_tier('t3') 18 | self.tg.add_tier('t6') 19 | self.tg.add_tier('t4') 20 | self.tg.add_tier('t5') 21 | 22 | tiernames = ['t1', 't2', 't3', 't4', 't5', 't6'] 23 | self.tg.sort_tiers() 24 | self.assertEqual([a[1] for a in self.tg.get_tier_name_num()], 25 | tiernames) 26 | self.tg.sort_tiers(lambda x: list(reversed(tiernames)).index(x.name)) 27 | self.assertEqual([a[1] for a in self.tg.get_tier_name_num()], 28 | list(reversed(tiernames))) 29 | 30 | def test_add_tier(self): 31 | self.assertRaises(ValueError, self.tg.add_tier, 'a', number=-1) 32 | self.assertRaises(ValueError, self.tg.add_tier, 'a', number=10) 33 | 34 | self.tg.add_tier('tier1') 35 | self.assertEqual(len(self.tg.tiers), 1) 36 | self.assertEqual(self.tg.tiers[0].tier_type, 'IntervalTier') 37 | 38 | self.tg.add_tier('tier2', tier_type='TextTier') 39 | self.assertEqual(len(self.tg.tiers), 2) 40 | self.assertEqual(self.tg.tiers[1].tier_type, 'TextTier') 41 | 42 | self.tg.add_tier('tier3') 43 | self.assertEqual(len(self.tg.tiers), 3) 44 | 45 | self.assertEqual(['tier1', 'tier2', 'tier3'], 46 | [a.name for a in self.tg.tiers]) 47 | 48 | self.tg.add_tier('tier4', number=2) 49 | self.assertEqual(len(self.tg.tiers), 4) 50 | self.assertEqual(4, len(self.tg.tiers)) 51 | 52 | def test_remove_tier(self): 53 | self.assertRaises(Exception, self.tg.remove_tier, -1) 54 | self.assertRaises(Exception, self.tg.remove_tier, 10) 55 | 56 | self.tg.add_tier('tier1') 57 | self.tg.add_tier('tier2') 58 | self.tg.add_tier('tier3') 59 | self.tg.add_tier('tier4', number=2) 60 | 61 | self.tg.remove_tier(3) 62 | self.assertEqual(len(self.tg.tiers), 3) 63 | self.assertEqual(['tier1', 'tier3', 'tier4'], 64 | sorted(a.name for a in self.tg.tiers)) 65 | 66 | self.tg.remove_tier('tier1') 67 | self.assertEqual(len(self.tg.tiers), 2) 68 | self.assertEqual(['tier3', 'tier4'], 69 | sorted(a.name for a in self.tg.tiers)) 70 | 71 | self.tg.remove_tier(2) 72 | self.assertEqual(len(self.tg.tiers), 1) 73 | self.assertEqual(['tier4'], [a.name for a in self.tg.tiers]) 74 | 75 | self.tg.remove_tier('tier4') 76 | self.assertTrue(not self.tg.tiers) 77 | 78 | def test_get_tier(self): 79 | self.assertRaises(Exception, self.tg.get_tier, -1) 80 | self.assertRaises(Exception, self.tg.get_tier, 'a') 81 | self.assertRaises(Exception, self.tg.get_tier, 10) 82 | 83 | tier1 = self.tg.add_tier('tier1') 84 | tier2 = self.tg.add_tier('tier2') 85 | tier3 = self.tg.add_tier('tier3') 86 | 87 | self.assertEqual(tier1, self.tg.get_tier(tier1.name)) 88 | self.assertEqual(tier3, self.tg.get_tier(tier3.name)) 89 | 90 | self.assertEqual(self.tg.tiers[1], self.tg.get_tier(tier2.name)) 91 | 92 | def test_change_tier_name(self): 93 | self.assertRaises(Exception, 94 | self.tg.change_tier_name, -1, 'b') 95 | self.assertRaises(Exception, 96 | self.tg.change_tier_name, 'a', 'b') 97 | self.assertRaises(Exception, 98 | self.tg.change_tier_name, 10, 'b') 99 | self.tg.add_tier('tier1') 100 | tier2 = self.tg.add_tier('tier2') 101 | self.tg.add_tier('tier3') 102 | 103 | self.tg.change_tier_name('tier1', 'tier1a') 104 | self.assertEqual(['tier1a', 'tier2', 'tier3'], 105 | [a.name for a in self.tg.tiers]) 106 | self.tg.change_tier_name(self.tg.tiers.index(tier2)+1, 'tier2a') 107 | self.assertEqual(['tier1a', 'tier2a', 'tier3'], 108 | [a.name for a in self.tg.tiers]) 109 | self.tg.change_tier_name('tier1a', 'tier1') 110 | self.assertEqual(['tier1', 'tier2a', 'tier3'], 111 | [a.name for a in self.tg.tiers]) 112 | 113 | def test_get_tiers(self): 114 | self.tg.add_tier('tier1') 115 | self.tg.add_tier('tier2') 116 | self.tg.add_tier('tier3') 117 | self.assertEqual(self.tg.tiers, 118 | list(self.tg.get_tiers())) 119 | 120 | def test_get_tier_name_num(self): 121 | self.tg.add_tier('tier1') 122 | self.tg.add_tier('tier2') 123 | self.tg.add_tier('tier3', number=2) 124 | self.assertEqual([(1, 'tier1'), (2, 'tier3'), (3, 'tier2')], 125 | list(self.tg.get_tier_name_num())) 126 | 127 | def test_to_eaf(self): 128 | tier1 = self.tg.add_tier('tier1') 129 | tier2 = self.tg.add_tier('tier2', tier_type='TextTier') 130 | tier1.add_interval(0, 1, 'int1') 131 | tier1.add_interval(2, 3, 'int2') 132 | tier1.add_interval(5, 6, 'int3') 133 | tier2.add_point(1.5, 'point1') 134 | tier2.add_point(2.5, 'point2') 135 | tier2.add_point(3.5, 'point3') 136 | eaf = self.tg.to_eaf(True, 0.03) 137 | self.assertRaises(ValueError, self.tg.to_eaf, pointlength=-1) 138 | self.assertEqual(sorted(eaf.get_tier_names()), 139 | sorted(['default', 'tier1', 'tier2'])) 140 | self.assertEqual(sorted(eaf.get_annotation_data_for_tier('tier1')), 141 | sorted([(0, 1000, 'int1'), (5000, 6000, 'int3'), 142 | (2000, 3000, 'int2')])) 143 | self.assertEqual(sorted(eaf.get_annotation_data_for_tier('tier2')), 144 | sorted([(2500, 2530, 'point2'), 145 | (1500, 1530, 'point1'), 146 | (3500, 3530, 'point3')])) 147 | 148 | # Test all the Praat.Tier functions 149 | def setup_tier(self): 150 | self.tier1 = self.tg.add_tier('tier1') 151 | self.tier2 = self.tg.add_tier('tier2', tier_type='TextTier') 152 | 153 | def test_add_point(self): 154 | self.setup_tier() 155 | self.assertRaises(Exception, self.tier1.add_point, 5, 'a') 156 | self.tier2.add_point(5, 't') 157 | self.assertEqual([(5, 't')], self.tier2.intervals) 158 | self.assertRaises(Exception, self.tier2.add_point, 5, 'a') 159 | self.tier2.add_point(6, 'a') 160 | self.assertEqual([(5, 't'), (6, 'a')], self.tier2.intervals) 161 | self.tier2.add_point(5, 'a', False) 162 | 163 | def test_add_interval(self): 164 | self.setup_tier() 165 | self.assertRaises(Exception, 166 | self.tier2.add_interval, 5, 6, 'a') 167 | self.assertRaises(Exception, self.tier2.add_interval, 6, 5, 'a') 168 | 169 | self.tier1.add_interval(5, 6, 't') 170 | self.assertEqual([(5, 6, 't')], self.tier1.intervals) 171 | self.assertRaises(Exception, self.tier1.add_interval, 5.5, 6.5, 't') 172 | self.tier1.add_interval(6, 7, 'a') 173 | self.assertEqual([(5, 6, 't'), (6, 7, 'a')], self.tier1.intervals) 174 | 175 | self.tier1.add_interval(5.5, 6.5, 't', False) 176 | 177 | def test_remove_interval(self): 178 | self.setup_tier() 179 | self.assertRaises(Exception, self.tier2.remove_interval, 5) 180 | self.tier1.add_interval(5, 6, 'a') 181 | self.tier1.add_interval(6, 7, 'b') 182 | self.tier1.add_interval(7, 8, 'c') 183 | self.tier1.remove_interval(5.5) 184 | self.assertEqual([(6, 7, 'b'), (7, 8, 'c')], 185 | self.tier1.intervals) 186 | self.tier1.remove_interval(8) 187 | self.assertEqual([(6, 7, 'b')], 188 | self.tier1.intervals) 189 | self.tier1.remove_interval(8) 190 | self.assertEqual([(6, 7, 'b')], 191 | self.tier1.intervals) 192 | 193 | def test_remove_point(self): 194 | self.setup_tier() 195 | self.assertRaises(Exception, self.tier1.remove_point, 5) 196 | self.tier2.add_point(5, 'a') 197 | self.tier2.add_point(6, 'b') 198 | self.tier2.add_point(7, 'c') 199 | self.tier2.remove_point(5) 200 | self.assertEqual([(6, 'b'), (7, 'c')], 201 | self.tier2.intervals) 202 | self.tier2.remove_point(7) 203 | self.assertEqual([(6, 'b')], 204 | self.tier2.intervals) 205 | self.tier2.remove_point(7) 206 | self.assertEqual([(6, 'b')], 207 | self.tier2.intervals) 208 | 209 | def test_get_intervals(self): 210 | self.setup_tier() 211 | self.tier1.add_interval(5, 6, 'a') 212 | self.tier1.add_interval(7, 8, 'c') 213 | self.tier1.add_interval(6, 7, 'b') 214 | self.assertEqual([(5, 6, 'a'), (6, 7, 'b'), (7, 8, 'c')], 215 | sorted(self.tier1.get_intervals())) 216 | self.tier2.add_point(5, 'a') 217 | self.tier2.add_point(7, 'c') 218 | self.tier2.add_point(6, 'b') 219 | self.assertEqual([(5, 'a'), (6, 'b'), (7, 'c')], 220 | sorted(self.tier2.get_intervals())) 221 | 222 | def test_clear_intervals(self): 223 | self.setup_tier() 224 | self.tier1.add_interval(5, 6, 'a') 225 | self.tier1.add_interval(6, 7, 'b') 226 | self.tier1.add_interval(7, 8, 'c') 227 | self.tier1.clear_intervals() 228 | self.assertEqual([], self.tier1.intervals) 229 | 230 | self.tier2.add_point(5, 'a') 231 | self.tier2.add_point(6, 'b') 232 | self.tier2.add_point(7, 'c') 233 | self.tier2.clear_intervals() 234 | self.assertEqual([], self.tier2.intervals) 235 | 236 | 237 | @pytest.mark.parametrize('codec', ['utf-8', 'latin_1', 'mac_roman']) 238 | def test_to_file(codec, tmp_path): 239 | tg = TextGrid(xmax=20) 240 | tier1 = tg.add_tier('tier') 241 | tier1.add_interval(1, 2, 'i1') 242 | tier1.add_interval(2, 3, 'i2') 243 | tier1.add_interval(4, 5, 'i3') 244 | 245 | tier4 = tg.add_tier('tier') 246 | tier4.add_interval(1, 2, u'i1ü') 247 | tier4.add_interval(2.0, 3, 'i2') 248 | tier4.add_interval(4, 5.0, 'i3') 249 | 250 | tier2 = tg.add_tier('tier2', tier_type='TextTier') 251 | tier2.add_point(1, u'p1ü') 252 | tier2.add_point(2, 'p1') 253 | tier2.add_point(3, 'p1') 254 | 255 | tempf = str(tmp_path / 'test') 256 | 257 | # Normal mode 258 | tg.to_file(pathlib.Path(tempf), codec=codec) 259 | TextGrid(tempf, codec=codec) 260 | # Short mode 261 | tg.to_file(tempf, codec=codec, mode='s') 262 | TextGrid(tempf, codec=codec) 263 | # Binary mode 264 | tg.to_file(tempf, mode='b') 265 | TextGrid(tempf) 266 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{35,36,37,38} 3 | skip_missing_interpreters = true 4 | 5 | [testenv] 6 | extras = test 7 | commands = pytest {posargs} 8 | --------------------------------------------------------------------------------