├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    └── workflows
    │   └── python-package.yml
├── .gitignore
├── LICENSE
├── MANIFEST
├── README.md
├── examples
    ├── README.txt
    ├── example1.py
    └── example2.py
├── pympi
    ├── Elan.py
    ├── Praat.py
    └── __init__.py
├── setup.cfg
├── setup.py
├── test
    ├── .gitignore
    ├── EAFv2.8.xsd
    ├── EAFv3.0.xsd
    ├── conftest.py
    ├── sample_2.7.eaf
    ├── sample_2.8.eaf
    ├── sample_3.0.eaf
    ├── test_elan.py
    └── test_praat.py
└── tox.ini


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Expected behaviour**
11 | Please provide the exact means of reproducing the bug
12 | 
13 | **Actual behaviour**
14 | What did you expect?
15 | 
16 | **System information**
17 | - python version:
18 | - os:
19 | - are you up to date with the latest master?:  
20 | 
21 | **Additional context**
22 | Add any other context about the problem here.
23 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: [3.7, 3.8, 3.9]
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Set up Python ${{ matrix.python-version }}
20 |       uses: actions/setup-python@v2
21 |       with:
22 |         python-version: ${{ matrix.python-version }}
23 |     - name: Install dependencies
24 |       run: |
25 |         python -m pip install --upgrade pip
26 |         pip install .[test]
27 |     - name: Test with pytest
28 |       run: |
29 |         pytest
30 | 
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Distribution files
 2 | bin
 3 | build
 4 | dist
 5 | 
 6 | # Virtual env files
 7 | include
 8 | lib
 9 | lib64
10 | pyvenv.cfg
11 | 
12 | # Test files or temporary data files
13 | *.pfsx
14 | *.pyc
15 | 
16 | # Vim files
17 | *~
18 | *.swp
19 | 
20 | # Pyenv
21 | .python-version
22 | 
23 | 
24 | # Python egg
25 | *.egg-info
26 | 
27 | .coverage
28 | .tox/
29 | .idea/
30 | 
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Mart Lubbers
 2 | 
 3 | Permission is hereby granted, free of charge, to any person
 4 | obtaining a copy of this software and associated documentation
 5 | files (the "Software"), to deal in the Software without
 6 | restriction, including without limitation the rights to use,
 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the
 9 | Software is furnished to do so, subject to the following
10 | conditions:
11 | 
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
1 | # file GENERATED by distutils, do NOT edit
2 | setup.cfg
3 | setup.py
4 | pympi/Elan.py
5 | pympi/Praat.py
6 | pympi/__init__.py
7 | test/test_elan.py
8 | test/test_praat.py
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | pympi version 1.70.2
 2 | ====================
 3 | ### Introduction
 4 | Pympi is a package that allows you to interact with [Elan][1] files and [TextGrid][2] (regular, short and binary) files.
 5 | You can create, edit and convert both formats into each other.
 6 | It includes besides all the basic functions also functions for:
 7 | - Calculating gaps and overlaps between speakers conform [Heldner and Edlund's method][3]. (Could be used to calculate floor transfers)
 8 | - Shift annotations in both directions (Could be used when due to an error all annotations are misaligned).
 9 | - Import from CLAN's chat files.
10 | - Merge and or filter tiers (could be used to combine hands in gesture coding)
11 | - Move tiers between elan files.
12 | - Etc.
13 | 
14 | ### Requirements
15 | None
16 | 
17 | ### Optional requirements
18 | - [lxml][4] is used for testing.
19 | 
20 | ### Documentation and downloads
21 | Full api documentation of the current and old versions can be found on [here][5].
22 | 
23 | Pypi repository location can be found [here][6].
24 | 
25 | ### Installation
26 | #### Automatic
27 | - From a shell run with administrator rights:
28 | ```Shell
29 | pip install pympi-ling
30 | ```
31 | - Or alternatively run with administrator rights:
32 | ```Shell
33 | easy_install pympi-ling
34 | ```
35 | 
36 | *NOTE: on windows the executable might not be in $PATH.*
37 | 
38 | #### Manual
39 | 1. Download the latest version from [pypi][5]
40 | 2. Untar the file
41 | 3. From that directory run with administrator rights
42 | ```Shell
43 | python setup.py install
44 | ```
45 | 
46 | ### How to cite
47 | ```tex
48 | @misc{pympi-1.70,
49 | 	author={Lubbers, Mart and Torreira, Francisco},
50 | 	title={pympi-ling: a {Python} module for processing {ELAN}s {EAF} and {Praat}s {TextGrid} annotation files.},
51 | 	howpublished={\url{https://pypi.python.org/pypi/pympi-ling}},
52 | 	year={2013-2021},
53 | 	note={Version 1.70}
54 | }
55 | ```
56 | 
57 | ### Authors
58 | Mart Lubbers (mart at martlubbers.net)
59 | and
60 | Francisco Toreirra (francisco.torreira at mpi.nl)
61 | 
62 | and with contributions from:
63 | sarpu, hadware, thomaskisler, mome, mimrock and xrotwang
64 | 
65 | [1]: https://tla.mpi.nl/tools/tla-tools/elan/
66 | [2]: http://www.fon.hum.uva.nl/praat/
67 | [3]: http://www.sciencedirect.com/science/article/pii/S0095447010000628
68 | [4]: http://lxml.de/
69 | [5]: http://dopefishh.github.io/pympi/
70 | [6]: https://pypi.python.org/pypi/pympi-ling/
71 | 


--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
 1 | This directory contains some example scripts that can be freely adapted to
 2 | create your own scripts or serve as inspiration.
 3 | 
 4 | Example 1.
 5 | 	This script makes a word frequency list of all the orthography tiers present
 6 | 	in the elan files in a corpus.
 7 | 	
 8 | Example 2.
 9 | 	This script calculates the gaps and the overlaps between two signers that are
10 | 	transcribed with different hands as different tiers.
11 | 


--------------------------------------------------------------------------------
/examples/example1.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import glob     # Import glob to easily loop over files
 5 | import pympi    # Import pympi to work with elan files
 6 | import string   # Import string to get the punctuation data
 7 | 
 8 | # Define some variables for later use
 9 | corpus_root = '/home/frobnicator/corpora/corpus_1'
10 | output_file = '{}/word_frequencies.txt'.format(corpus_root)
11 | ort_tier_names = ['spkA', 'spkB', 'spkC']
12 | 
13 | # Initialize the frequency dictionary
14 | frequency_dict = {}
15 | 
16 | # Loop over all elan files the corpusroot subdirectory called eaf
17 | for file_path in glob.glob('{}/eaf/*.eaf'.format(corpus_root)):
18 |     # Initialize the elan file
19 |     eafob = pympi.Elan.Eaf(file_path)
20 |     # Loop over all the defined tiers that contain orthography
21 |     for ort_tier in ort_tier_names:
22 |         # If the tier is not present in the elan file spew an error and
23 |         # continue. This is done to avoid possible KeyErrors
24 |         if ort_tier not in eafob.get_tier_names():
25 |             print 'WARNING!!!'
26 |             print 'One of the ortography tiers is not present in the elan file'
27 |             print 'namely: {}. skipping this one...'.format(ort_tier)
28 |         # If the tier is present we can loop through the annotation data
29 |         else:
30 |             for annotation in eafob.get_annotation_data_for_tier(ort_tier):
31 |                 # We are only interested in the utterance
32 |                 utterance = annotation[2]
33 |                 # Split, by default, splits on whitespace thus separating words
34 |                 words = utterance.split()
35 |                 # For every word increment the frequency
36 |                 for word in words:
37 |                     # Remove the possible punctuation
38 |                     for char in string.punctuation:
39 |                         word = word.replace(char, '')
40 |                     # Convert to lowercase
41 |                     word = word.lower()
42 |                     # Increment the frequency, using the get method we can
43 |                     # avoid KeyErrors and make sure the word is added when it
44 |                     # wasn't present in the frequency dictionary
45 |                     frequency_dict[word] = frequency_dict.get(word, 0) + 1
46 | 
47 | # Open an output file to write the data to
48 | with open(output_file, 'w') as output_file:
49 |     # Loop throught the words with their frequencies, we do this sorted because
50 |     # the file will then be more easily searchable
51 |     for word, frequency in sorted(frequency_dict.items()):
52 |         # We write the output separated by tabs
53 |         output_file.write('{}\t{}\n'.format(word, frequency))
54 | 


--------------------------------------------------------------------------------
/examples/example2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import pympi    # Import pympi to work with elan files
 5 | 
 6 | # Specify the file path
 7 | elan_file_path = '/home/frobnicator/corpus/sign/file1.eaf'
 8 | 
 9 | # Initialize the elan file
10 | eaf = pympi.Elan.Eaf(elan_file_path)
11 | # Merge both hands for speaker 1
12 | eaf.merge_tiers(['spk1L', 'spk1R'], 'spk1', 80)
13 | # Merge both hands for speaker 2
14 | eaf.merge_tiers(['spk2L', 'spk2R'], 'spk2', 80)
15 | # Create gaps and overlaps tier called ftos with a maximum length of 5000ms and
16 | # using the fast method
17 | eaf.create_gaps_and_overlaps_tier('spk1', 'spk2', 'ftos', 5000, True)
18 | # Write the results to file with the _fto suffix
19 | eaf.to_file(elan_file_path.replace('.eaf', '_fto.eaf'))
20 | 


--------------------------------------------------------------------------------
/pympi/Praat.py:
--------------------------------------------------------------------------------
  1 | import codecs
  2 | import re
  3 | import struct
  4 | 
  5 | VERSION = '1.70.2'
  6 | 
  7 | 
  8 | class TextGrid:
  9 |     """Read write and edit Praat's TextGrid files.
 10 | 
 11 |     .. note:: All times are in seconds and can have decimals
 12 | 
 13 |     :var float xmin: Minimum x value.
 14 |     :var float xmax: Maximum x value.
 15 |     :var int tier_num: Number of tiers.
 16 |     :var list tiers: Internal (unsorted) list of tiers.
 17 |     :var str codec: Codec of the input file.
 18 |     """
 19 |     def __init__(self, file_path=None, xmin=0, xmax=None, codec='utf-8'):
 20 |         """Construct either a new TextGrid object or read one from a
 21 |         file/stream. When you create an empty TextGrid you must at least
 22 |         specify the xmax. When you want to load a TextGrid from file you need
 23 |         to specify at least the file_path and optionally the codec. Binary,
 24 |         short and normal TextGrids are supported.
 25 | 
 26 |         :param str file_path: Path to read from, - for stdin. If ``None`` an
 27 |                               empty TextGrid will be created.
 28 |         :param int xmin: Xmin value, only needed when not loading from file.
 29 |         :param int xmax: Xmax value, needed when not loading from file.
 30 |         :param str codec: Text encoding for the input. Note that this will be
 31 |             ignored for binary TextGrids.
 32 |         :raises Exception: If filepath is not specified but no xmax
 33 |         """
 34 |         self.tiers = []
 35 |         self.codec = codec
 36 |         if not file_path:
 37 |             if xmax is None:
 38 |                 raise Exception('No xmax specified')
 39 |             self.tier_num = 0
 40 |             self.xmin = xmin
 41 |             self.xmax = xmax
 42 |         else:
 43 |             with open(file_path, 'rb') as f:
 44 |                 self.from_file(f, codec)
 45 | 
 46 |     def from_file(self, ifile, codec='ascii'):
 47 |         """Read textgrid from stream.
 48 | 
 49 |         :param file ifile: Stream to read from.
 50 |         :param str codec: Text encoding for the input. Note that this will be
 51 |             ignored for binary TextGrids.
 52 |         """
 53 |         if ifile.read(12) == b'ooBinaryFile':
 54 |             def bin2str(ifile):
 55 |                 textlen = struct.unpack('>h', ifile.read(2))[0]
 56 |                 # Single byte characters
 57 |                 if textlen >= 0:
 58 |                     return ifile.read(textlen).decode('ascii')
 59 |                 # Multi byte characters have initial len -1 and then \xff bytes
 60 |                 elif textlen == -1:
 61 |                     textlen = struct.unpack('>h', ifile.read(2))[0]
 62 |                     data = ifile.read(textlen*2)
 63 |                     charlist = (data[i:i+2] for i in range(0, len(data), 2))
 64 |                     return ''.join(
 65 |                         chr(struct.unpack('>h', i)[0]) for i in charlist)
 66 | 
 67 |             ifile.read(ord(ifile.read(1)))  # skip oo type
 68 |             self.xmin = struct.unpack('>d', ifile.read(8))[0]
 69 |             self.xmax = struct.unpack('>d', ifile.read(8))[0]
 70 |             ifile.read(1)  # skip <exists>
 71 |             self.tier_num = struct.unpack('>i', ifile.read(4))[0]
 72 |             for i in range(self.tier_num):
 73 |                 tier_type = ifile.read(ord(ifile.read(1))).decode('ascii')
 74 |                 name = bin2str(ifile)
 75 |                 tier = Tier(0, 0, name=name, tier_type=tier_type)
 76 |                 self.tiers.append(tier)
 77 |                 tier.xmin = struct.unpack('>d', ifile.read(8))[0]
 78 |                 tier.xmax = struct.unpack('>d', ifile.read(8))[0]
 79 |                 nint = struct.unpack('>i', ifile.read(4))[0]
 80 |                 for i in range(nint):
 81 |                     x1 = struct.unpack('>d', ifile.read(8))[0]
 82 |                     if tier.tier_type == 'IntervalTier':
 83 |                         x2 = struct.unpack('>d', ifile.read(8))[0]
 84 |                     text = bin2str(ifile)
 85 |                     if tier.tier_type == 'IntervalTier':
 86 |                         tier.intervals.append((x1, x2, text))
 87 |                     elif tier.tier_type == 'TextTier':
 88 |                         tier.intervals.append((x1, text))
 89 |                     else:
 90 |                         raise Exception('Tiertype does not exist.')
 91 |         else:
 92 | 
 93 |             ifile.seek(0)
 94 | 
 95 |             line_list = ifile.read().decode(codec).splitlines(keepends = True)
 96 |             line_index = 0
 97 | 
 98 |             def next_line():
 99 | 
100 |                 nonlocal line_index
101 | 
102 |                 if line_index >= len(line_list):
103 |                     raise StopIteration
104 | 
105 |                 line = line_list[line_index]
106 |                 line_index += 1
107 | 
108 |                 return line
109 | 
110 |             regfloat = re.compile(r'([\d.]+)\s*$', flags = re.UNICODE)
111 |             regint = re.compile(r'([\d]+)\s*$', flags = re.UNICODE)
112 |             regstr = re.compile(r'^[^"]*"((?:""|[^"])*)"\s*$', flags = re.UNICODE | re.DOTALL)
113 | 
114 |             def parse_float():
115 | 
116 |                 return float(regfloat.search(next_line()).group(1))
117 | 
118 |             def parse_int():
119 | 
120 |                 return int(regint.search(next_line()).group(1))
121 | 
122 |             def parse_str():
123 | 
124 |                 line_str = next_line()
125 | 
126 |                 while True:
127 | 
128 |                     try:
129 |                         return regstr.search(line_str).group(1).replace('""', '"')
130 |                     except AttributeError:
131 |                         pass
132 | 
133 |                     line_str += next_line()
134 | 
135 |             # Skip the Headers and empty line
136 |             next_line(), next_line(), next_line()
137 |             self.xmin = parse_float()
138 |             self.xmax = parse_float()
139 |             # Skip <exists>
140 |             line = next_line()
141 |             short = line.strip() == '<exists>'
142 |             self.tier_num = parse_int()
143 |             not short and next_line()
144 |             for i in range(self.tier_num):
145 |                 not short and next_line()  # skip item[]: and item[\d]:
146 |                 tier_type = parse_str()
147 |                 name = parse_str()
148 |                 tier = Tier(0, 0, name=name, tier_type=tier_type)
149 |                 self.tiers.append(tier)
150 |                 tier.xmin = parse_float()
151 |                 tier.xmax = parse_float()
152 |                 for i in range(parse_int()):
153 |                     not short and next_line()  # skip intervals [\d]
154 |                     x1 = parse_float()
155 |                     if tier.tier_type == 'IntervalTier':
156 |                         x2 = parse_float()
157 |                         t = parse_str()
158 |                         tier.intervals.append((x1, x2, t))
159 |                     elif tier.tier_type == 'TextTier':
160 |                         t = parse_str()
161 |                         tier.intervals.append((x1, t))
162 | 
163 |     def sort_tiers(self, key=lambda x: x.name):
164 |         """Sort the tiers given the key. Example key functions:
165 | 
166 |         Sort according to the tiername in a list:
167 | 
168 |         ``lambda x: ['name1', 'name2' ... 'namen'].index(x.name)``.
169 | 
170 |         Sort according to the number of annotations:
171 | 
172 |         ``lambda x: len(list(x.get_intervals()))``
173 | 
174 |         :param func key: A key function. Default sorts alphabetically.
175 |         """
176 |         self.tiers.sort(key=key)
177 | 
178 |     def add_tier(self, name, tier_type='IntervalTier', number=None):
179 |         """Add an IntervalTier or a TextTier on the specified location.
180 | 
181 |         :param str name: Name of the tier, duplicate names is allowed.
182 |         :param str tier_type: Type of the tier.
183 |         :param int number: Place to insert the tier, when ``None`` the number
184 |             is generated and the tier will be placed on the bottom.
185 |         :returns: The created tier.
186 |         :raises ValueError: If the number is out of bounds.
187 |         """
188 |         if number is None:
189 |             number = 1 if not self.tiers else len(self.tiers)+1
190 |         elif number < 1 or number > len(self.tiers):
191 |             raise ValueError('Number not in [1..{}]'.format(len(self.tiers)))
192 |         elif tier_type not in Tier.P_TIERS:
193 |             raise ValueError('tier_type has to be in {}'.format(Tier.P_TIERS))
194 |         self.tiers.insert(number-1,
195 |                           Tier(self.xmin, self.xmax, name, tier_type))
196 |         return self.tiers[number-1]
197 | 
198 |     def remove_tier(self, name_num):
199 |         """Remove a tier, when multiple tiers exist with that name only the
200 |         first is removed.
201 | 
202 |         :param name_num: Name or number of the tier to remove.
203 |         :type name_num: int or str
204 |         :raises IndexError: If there is no tier with that number.
205 |         """
206 |         if isinstance(name_num, int):
207 |             del(self.tiers[name_num-1])
208 |         else:
209 |             self.tiers = [i for i in self.tiers if i.name != name_num]
210 | 
211 |     def get_tier(self, name_num):
212 |         """Gives a tier, when multiple tiers exist with that name only the
213 |         first is returned.
214 | 
215 |         :param name_num: Name or number of the tier to return.
216 |         :type name_num: int or str
217 |         :returns: The tier.
218 |         :raises IndexError: If the tier doesn't exist.
219 |         """
220 |         return self.tiers[name_num - 1] if isinstance(name_num, int) else\
221 |             [i for i in self.tiers if i.name == name_num][0]
222 | 
223 |     def change_tier_name(self, name_num, name2):
224 |         """Changes the name of the tier, when multiple tiers exist with that
225 |         name only the first is renamed.
226 | 
227 |         :param name_num: Name or number of the tier to rename.
228 |         :type name_num: int or str
229 |         :param str name2: New name of the tier.
230 |         :raises IndexError: If the tier doesn't exist.
231 |         """
232 |         self.get_tier(name_num).name = name2
233 | 
234 |     def get_tiers(self):
235 |         """Give all tiers.
236 | 
237 |         :yields: All tiers
238 |         """
239 |         for tier in self.tiers:
240 |             yield tier
241 | 
242 |     def get_tier_name_num(self):
243 |         """Give all tiers with their numbers.
244 | 
245 |         :yield: Enumerate of the form ``[(num1, tier1),  ... (numn, tiern)]``
246 |         """
247 |         return enumerate((s.name for s in self.tiers), 1)
248 | 
249 |     def to_file(self, filepath, codec='utf-8', mode='normal'):
250 |         """Write the object to a file.
251 | 
252 |         :param str filepath: Path of the fil.
253 |         :param str codec: Text encoding.
254 |         :param string mode: Flag to for write mode, possible modes:
255 |             'n'/'normal', 's'/'short' and 'b'/'binary'
256 |         """
257 |         self.tier_num = len(self.tiers)
258 |         if mode in ['binary', 'b']:
259 |             with open(filepath, 'wb') as f:
260 |                 def writebstr(s):
261 |                     try:
262 |                         bstr = s.encode('ascii')
263 |                     except UnicodeError:
264 |                         f.write(b'\xff\xff')
265 |                         bstr = b''.join(struct.pack('>h', ord(c)) for c in s)
266 |                     f.write(struct.pack('>h', len(s)))
267 |                     f.write(bstr)
268 | 
269 |                 f.write(b'ooBinaryFile\x08TextGrid')
270 |                 f.write(struct.pack('>d', self.xmin))
271 |                 f.write(struct.pack('>d', self.xmax))
272 |                 f.write(b'\x01')
273 |                 f.write(struct.pack('>i', self.tier_num))
274 |                 for tier in self.tiers:
275 |                     f.write(chr(len(tier.tier_type)).encode('ascii'))
276 |                     f.write(tier.tier_type.encode('ascii'))
277 |                     writebstr(tier.name)
278 |                     f.write(struct.pack('>d', tier.xmin))
279 |                     f.write(struct.pack('>d', tier.xmax))
280 |                     ints = tier.get_all_intervals()
281 |                     f.write(struct.pack('>i', len(ints)))
282 |                     itier = tier.tier_type == 'IntervalTier'
283 |                     for c in ints:
284 |                         f.write(struct.pack('>d', c[0]))
285 |                         itier and f.write(struct.pack('>d', c[1]))
286 |                         writebstr(c[2 if itier else 1])
287 |         elif mode in ['normal', 'n', 'short', 's']:
288 |             # py3.5 compat: codecs.open does not support pathlib.Path objects in py3.5.
289 |             with codecs.open(str(filepath), 'w', codec) as f:
290 |                 short = mode[0] == 's'
291 | 
292 |                 def wrt(indent, prefix, value, ff=''):
293 |                     indent = 0 if short else indent
294 |                     prefix = '' if short else prefix
295 |                     if value is not None or not short:
296 |                         s = u'{{}}{{}}{}\n'.format(ff)
297 |                         f.write(s.format(' '*indent, prefix, value))
298 | 
299 |                 f.write(u'File type = "ooTextFile"\n'
300 |                         u'Object class = "TextGrid"\n\n')
301 |                 wrt(0, u'xmin = ', self.xmin, '{:f}')
302 |                 wrt(0, u'xmax = ', self.xmax, '{:f}')
303 |                 wrt(0, u'tiers? ', u'<exists>', '{}')
304 |                 wrt(0, u'size = ', self.tier_num, '{:d}')
305 |                 wrt(0, u'item []:', None)
306 |                 for tnum, tier in enumerate(self.tiers, 1):
307 |                     wrt(4, u'item [{:d}]:'.format(tnum), None)
308 |                     wrt(8, u'class = ', tier.tier_type, '"{}"')
309 |                     wrt(8, u'name = ', tier.name, '"{}"')
310 |                     wrt(8, u'xmin = ', tier.xmin, '{:f}')
311 |                     wrt(8, u'xmax = ', tier.xmax, '{:f}')
312 |                     if tier.tier_type == 'IntervalTier':
313 |                         ints = tier.get_all_intervals()
314 |                         wrt(8, u'intervals: size = ', len(ints), '{:d}')
315 |                         for i, c in enumerate(ints):
316 |                             wrt(8, 'intervals [{:d}]:'.format(i+1), None)
317 |                             wrt(12, 'xmin = ', c[0], '{:f}')
318 |                             wrt(12, 'xmax = ', c[1], '{:f}')
319 |                             wrt(12, 'text = ', c[2].replace('"', '""'), '"{}"')
320 |                     elif tier.tier_type == 'TextTier':
321 |                         wrt(8, u'points: size = ', len(tier.intervals), '{:d}')
322 |                         for i, c in enumerate(tier.get_intervals()):
323 |                             wrt(8, 'points [{:d}]:'.format(i+1), None)
324 |                             wrt(12, 'number = ', c[0], '{:f}')
325 |                             wrt(12, 'mark = ', c[1].replace('"', '""'), '"{}"')
326 |         else:
327 |             raise Exception('Unknown mode')
328 | 
329 |     def to_eaf(self, skipempty=True, pointlength=0.1):
330 |         """Convert the object to an pympi.Elan.Eaf object
331 | 
332 |         :param int pointlength: Length of respective interval from points in
333 |                                 seconds
334 |         :param bool skipempty: Skip the empty annotations
335 |         :returns: :class:`pympi.Elan.Eaf` object
336 |         :raises ImportError: If the Eaf module can't be loaded.
337 |         :raises ValueError: If the pointlength is not strictly positive.
338 |         """
339 |         from pympi.Elan import Eaf
340 |         eaf_out = Eaf()
341 |         if pointlength <= 0:
342 |             raise ValueError('Pointlength should be strictly positive')
343 |         for tier in self.get_tiers():
344 |             eaf_out.add_tier(tier.name)
345 |             for ann in tier.get_intervals(True):
346 |                 if tier.tier_type == 'TextTier':
347 |                     ann = (ann[0], ann[0]+pointlength, ann[1])
348 |                 if ann[2].strip() or not skipempty:
349 |                     eaf_out.add_annotation(tier.name, int(round(ann[0]*1000)),
350 |                                            int(round(ann[1]*1000)), ann[2])
351 |         return eaf_out
352 | 
353 | 
354 | class Tier:
355 |     """Class representing a TextGrid tier, either an Interval or TextTier
356 | 
357 |     :var str name: Name of the tier.
358 |     :var list intervals: List of intervals where each interval is
359 |                          (start, [end,] value).
360 |     :var str tier_type: Type of the tier('IntervalTier' or 'TextTier').
361 |     :var int xmin: Minimum x value.
362 |     :var int xmax: Maximum x value.
363 |     """
364 |     P_TIERS = {'IntervalTier', 'TextTier'}
365 | 
366 |     def __init__(self, xmin, xmax, name=None, tier_type=None):
367 |         """Creates a tier, if lines is ``None`` a new tier is created.
368 | 
369 |         :param str name: Name of the tier.
370 |         :param str tier_type: Type of the tier('IntervalTier' or 'TextTier').
371 |         :raises TierTypeException: If the tier type is unknown.
372 |         """
373 |         self.intervals = []
374 |         self.name = name
375 |         self.tier_type = tier_type
376 |         self.xmin, self.xmax = xmin, xmax
377 |         if tier_type not in self.P_TIERS:
378 |             raise Exception('Tiertype does not exist.')
379 | 
380 |     def add_point(self, point, value, check=True):
381 |         """Add a point to the TextTier
382 | 
383 |         :param int point: Time of the point.
384 |         :param str value: Text of the point.
385 |         :param bool check: Flag to check for overlap.
386 |         :raises Exception: If overlap or wrong tiertype.
387 |         """
388 |         if self.tier_type != 'TextTier':
389 |             raise Exception('Tiertype must be TextTier.')
390 |         if check and any(i for i in self.intervals if i[0] == point):
391 |                 raise Exception('No overlap is allowed')
392 |         self.intervals.append((point, value))
393 | 
394 |     def add_interval(self, begin, end, value, check=True):
395 |         """Add an interval to the IntervalTier.
396 | 
397 |         :param float begin: Start time of the interval.
398 |         :param float end: End time of the interval.
399 |         :param str value: Text of the interval.
400 |         :param bool check: Flag to check for overlap.
401 |         :raises Exception: If overlap, begin > end or wrong tiertype.
402 |         """
403 |         if self.tier_type != 'IntervalTier':
404 |             raise Exception('Tiertype must be IntervalTier')
405 |         if check:
406 |             if any(i for i in self.intervals if begin < i[1] and end > i[0]):
407 |                 raise Exception('No overlap is allowed')
408 |             if begin > end:
409 |                 raise Exception('Begin must be smaller then end')
410 |         self.intervals.append((begin, end, value))
411 | 
412 |     def remove_interval(self, time):
413 |         """Remove an interval, if no interval is found nothing happens.
414 | 
415 |         :param int time: Time of the interval.
416 |         :raises TierTypeException: If the tier is not a IntervalTier.
417 |         """
418 |         if self.tier_type != 'IntervalTier':
419 |             raise Exception('Tiertype must be IntervalTier.')
420 |         self.intervals = [i for i in self.intervals
421 |                           if not(i[0] <= time and i[1] >= time)]
422 | 
423 |     def remove_point(self, time):
424 |         """Remove a point, if no point is found nothing happens.
425 | 
426 |         :param int time: Time of the point.
427 |         :raises TierTypeException: If the tier is not a TextTier.
428 |         """
429 |         if self.tier_type != 'TextTier':
430 |             raise Exception('Tiertype must be TextTier.')
431 |         self.intervals = [i for i in self.intervals if i[0] != time]
432 | 
433 |     def get_intervals(self, sort=False):
434 |         """Give all the intervals or points.
435 | 
436 |         :param bool sort: Flag for yielding the intervals or points sorted.
437 |         :yields: All the intervals
438 |         """
439 |         for i in sorted(self.intervals) if sort else self.intervals:
440 |             yield i
441 | 
442 |     def clear_intervals(self):
443 |         """Removes all the intervals in the tier"""
444 |         self.intervals = []
445 | 
446 |     def get_all_intervals(self):
447 |         """Returns the true list of intervals including the empty intervals."""
448 |         ints = sorted(self.get_intervals(True))
449 |         if self.tier_type == 'IntervalTier':
450 |             if not ints:
451 |                 ints.append((self.xmin, self.xmax, ''))
452 |             else:
453 |                 if ints[0][0] > self.xmin:
454 |                     ints.insert(0, (self.xmin, ints[0][0], ''))
455 |                 if ints[-1][1] < self.xmax:
456 |                     ints.append((ints[-1][1], self.xmax, ''))
457 |                 p = ints[-1]
458 |                 for index, i in reversed(list(enumerate(ints[:-1], 1))):
459 |                     if p[0] - i[1] != 0:
460 |                         ints.insert(index, (i[1], p[0], ''))
461 |                     p = i
462 |         return ints
463 | 


--------------------------------------------------------------------------------
/pympi/__init__.py:
--------------------------------------------------------------------------------
1 | # Import the packages
2 | from pympi.Praat import TextGrid
3 | from pympi.Elan import Eaf, eaf_from_chat
4 | 
5 | __all__ = ['Praat', 'Elan', 'eaf_from_chat']
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | license_file = LICENSE
 3 | 
 4 | [bdist_wheel]
 5 | universal = 1
 6 | 
 7 | [tool:pytest]
 8 | minversion = 5
 9 | testpaths = test
10 | addopts = --cov
11 | 
12 | [easy_install]
13 | zip_ok = false
14 | 
15 | [coverage:run]
16 | source =
17 |     pympi
18 |     test
19 | 
20 | [coverage:report]
21 | show_missing = true
22 | 
23 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from distutils.core import setup
  5 | 
  6 | licence = 'MIT Licence'
  7 | version = '1.70.2'
  8 | 
  9 | setup(name='pympi-ling',
 10 |       version=version,
 11 |       description=
 12 |         'A python module for processing ELAN and Praat annotation files',
 13 |       author='Mart Lubbers',
 14 |       long_description=r"""
 15 | pympi version 1.70.2
 16 | ====================
 17 | 
 18 | Introduction
 19 | ------------
 20 | 
 21 | Pympi is a package that allows you to interact with `Elan`_ files and
 22 | `TextGrid`_ files. You can create, edit and convert both formats into
 23 | each other. It includes besides all the basic functions also functions
 24 | for: - Calculating gaps and overlaps between speakers conform `Heldner
 25 | and Edlund’s method`_. (Could be used to calculate floor transfers) -
 26 | Shift annotations in both directions (Could be used when due to an error
 27 | all annotations are misaligned). - Import from CLAN’s chat files. -
 28 | Merge and or filter tiers (Could be used to combine hands in gesture
 29 | coding) - Move tiers between elan files. - Etc.
 30 | 
 31 | Requirements
 32 | ------------
 33 | 
 34 | None
 35 | 
 36 | Optional requirements
 37 | ---------------------
 38 | 
 39 | -  `lxml`_ is used for testing.
 40 | 
 41 | Documentation and downloads
 42 | ---------------------------
 43 | 
 44 | Full api documentation of the current and old versions can be found on
 45 | `here`_.
 46 | 
 47 | Pypi repository location can be found
 48 | `here <https://pypi.python.org/pypi/pympi-ling/>`__.
 49 | 
 50 | Installation
 51 | ------------
 52 | 
 53 | Automatic
 54 | ~~~~~~~~~
 55 | 
 56 | -  From a shell run with administrator rights:
 57 | 
 58 |    .. code:: shell
 59 | 
 60 |        pip install pympi-ling
 61 | 
 62 | -  Or alternatively run with administrator rights:
 63 | 
 64 |    .. code:: shell
 65 | 
 66 |        easy_install pympi-ling
 67 | 
 68 | *NOTE: on windows the executable might not be in $PATH.*
 69 | 
 70 | Manual
 71 | ~~~~~~
 72 | 
 73 | 1. Download the latest version from `pypi`_
 74 | 2. Untar the file
 75 | 3. From that directory run with administrator rights
 76 | 
 77 |    .. code:: shell
 78 | 
 79 |        python setup.py install
 80 | 
 81 | How to cite
 82 | -----------
 83 | 
 84 | .. code:: tex
 85 | 
 86 |     @misc{pympi-1.70,
 87 |         author={Lubbers, Mart and Torreira, Francisco},
 88 |         title={pympi-ling: a Python module for processing ELANs EAF and Praats TextGrid annotation files.},
 89 |         howpublished={\url{https://pypi.python.org/pypi/pympi-ling}},
 90 |         year={2013-2021},
 91 |         note={Version 1.70}
 92 |     }
 93 | 
 94 | Authors
 95 | -------
 96 | 
 97 | Mart Lubbers (mart at martlubbers.net)
 98 | and 
 99 | Francisco Toreirra (francisco.torreira at mpi.nl)
100 | 
101 | and with contributions from:
102 | sarpu, hadware, thomaskisler, mome, mimrock and xrotwang
103 | 
104 | .. _Elan: https://tla.mpi.nl/tools/tla-tools/elan/
105 | .. _TextGrid: http://www.fon.hum.uva.nl/praat/
106 | .. _Heldner and Edlund’s method: http://www.sciencedirect.com/science/article/pii/S0095447010000628
107 | .. _lxml: http://lxml.de/
108 | .. _here: http://dopefishh.github.io/pympi/
109 | .. _pypi: http://dopefishh.github.io/pympi/""",
110 |       author_email='mart@martlubbers.net',
111 |       url='https://github.com/dopefishh/pympi',
112 |       classifiers=['Development Status :: 5 - Production/Stable',
113 |                    'Environment :: Console',
114 |                    'Natural Language :: English',
115 |                    'Operating System :: OS Independent',
116 |                    'Programming Language :: Python :: 3.5',
117 |                    'Programming Language :: Python :: 3.6',
118 |                    'Programming Language :: Python :: 3.7',
119 |                    'Programming Language :: Python :: 3.8',
120 |                    'Programming Language :: Python :: 3.9',
121 |                    'Topic :: Text Processing :: Linguistic'],
122 |       packages=['pympi'],
123 |       extras_require={
124 |         'test': [
125 |             'pytest>=5',
126 |             'pytest-mock',
127 |             'pytest-cov',
128 |             'coverage>=4.2',
129 |             'lxml',
130 |         ],
131 |       })
132 | 


--------------------------------------------------------------------------------
/test/.gitignore:
--------------------------------------------------------------------------------
1 | sample_2.8_out.eaf
2 | 


--------------------------------------------------------------------------------
/test/EAFv2.8.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <!--
  3 | 	VERSION 2.8
  4 | 	DATE May 2014
  5 | 	- changes that add support for multilingual controlled vocabularies and for associating tiers
  6 | 	and annotations with a specific language
  7 | 	    - added new element LANGUAGE
  8 | 	    - changed the structure of CONTROLLED_VOCABULARY and CV_ENTRY elements
  9 | 	    - a cv entry can now have multiple CVE_VALUE child nodes with a language reference
 10 | 	- introduction of a LICENSE element
 11 | 	- added an EXT_REF attribute on the TIER level, so that e.g. a data category reference can be specified on
 12 | 	  the tier level (overriding the one specified on the TYPE level)
 13 | 	
 14 | 	VERSION 2.7
 15 | 	DATE December 2010
 16 | 	- new elements and attributes where added in relation to
 17 | 	  - support for externally defined controlled vocabularies. A new possible root element CV_RESOURCE 
 18 | 	    has been added for such vocabularies in an eaf like xml file. Annotations can hold a reference
 19 | 	    to the id of ean entry in an external CV.
 20 | 	- a new element for storing information about a lexicon and about a link to an entry or a field 
 21 | 	  in a lexicon has been added. A linguistic type can be associated with a lexicon or a field / 
 22 | 	  data category in a lexicon
 23 | 	
 24 | 	VERSION 2.6
 25 | 	DATE May 2008
 26 | 	- added elements and attributes for references to concepts defined in the ISO Data Category Registry 
 27 | 	and possibly/eventually other external resources.
 28 | 	  - attribute EXT_REF added to type annotationAttribute, to elements CV_ENTRY and LINGUISTIC_TYPE
 29 | 	  - element EXTERNAL_REF with attributes EXT_REF_ID, TYPE and VALUE
 30 | 	
 31 | 	DATE November 2007
 32 | 	- added optional attributes: RELATIVE_MEDIA_URL to MEDIA_DESCRIPTOR and RELATIVE_LINK_URL to 
 33 | 	LINKED_FILE_DESCRIPTOR for storage of relative url's
 34 | 	- changed the FORMAT from fixed to default, and from 2.4. to 2.5
 35 | 	
 36 | 	DATE December 2006
 37 | 	- added attribute: ANNOTATOR to element TIER
 38 | 	- added element: PROPERTY to element HEADER
 39 | 	- changed the type of attribute SVG_REF of ALIGNABLE_ANNOTATION to xsd:string since 
 40 | 	it does not refer to an ID in the same file
 41 | 	- changed the type of the TIME_ALIGNABLE and GRAPHIC_REFERENCES attributes of the LINGUISTIC_TYPE
 42 | 	element to type="xsd:boolean" (was xsd:string)
 43 | 	- changed the ID/IDREF mechanism for the combinations of:
 44 | 	  - TIER/TIER_ID and TIER/PARENT_REF
 45 | 	  - LINGUISTIC_TYPE/LINGUISTIC_TYPE_ID and TIER/LINGUISTIC_TYPE_REF
 46 | 	  - CONTROLLED_VOCABULARY/CV_ID and LINGUISTIC_TYPE/CONTROLLED_VOCABULARY_REF
 47 | 	  into pairs of xsd:key and xsd:keyref elements.
 48 | 	  The advantage is that the ID's only have to be unique per element type (e.g. TIER_ID's
 49 | 	  should be unique within the TIER elements but can be the same as a LINGUISTIC_TYPE_ID) 
 50 | 	  and that there are no constraints on characters that can be used in id's/names.
 51 | -->
 52 | <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
 53 | 	<xsd:annotation>
 54 | 		<xsd:documentation xml:lang="en">
 55 | 			ELAN Annotation Format
 56 | 			version 2.8
 57 | 			May 2014
 58 | 			Schema by Alexander Klassmann 17/01/03
 59 | 			Adapted by Hennie Brugman, Han Sloetjes, Micha Hulsbosch
 60 | 		</xsd:documentation>
 61 | 	</xsd:annotation>
 62 | 	
 63 | 	<xsd:element name="ANNOTATION_DOCUMENT">
 64 | 		<xsd:complexType>
 65 | 			<xsd:sequence>
 66 | 				<xsd:element name="LICENSE" type="licenseType" minOccurs="0" maxOccurs="unbounded"/>
 67 | 				<xsd:element name="HEADER" type="headType"/>
 68 | 				<xsd:element name="TIME_ORDER" type="timeType"/>
 69 | 				<xsd:element name="TIER" type="tierType" minOccurs="0" maxOccurs="unbounded"/>
 70 | 				<xsd:element name="LINGUISTIC_TYPE" type="lingType" minOccurs="0" maxOccurs="unbounded"/>
 71 | 				<xsd:element name="LOCALE" type="localeType" minOccurs="0" maxOccurs="unbounded"/>
 72 | 				<xsd:element name="LANGUAGE" type="langType" minOccurs="0" maxOccurs="unbounded"/>
 73 | 				<xsd:element name="CONSTRAINT" type="constraintType" minOccurs="0" maxOccurs="unbounded"/>
 74 | 				<xsd:element name="CONTROLLED_VOCABULARY" type="convocType" minOccurs="0" maxOccurs="unbounded">
 75 | 				    <xsd:key name="cvEntryKey">
 76 | 					    <xsd:annotation>
 77 | 						    <xsd:documentation xml:lang="en">
 78 | 							    The entry id should be unique within the collection of entry elements
 79 | 						    </xsd:documentation>
 80 | 					    </xsd:annotation>
 81 | 					    <xsd:selector xpath="CV_ENTRY_ML"/>
 82 | 					    <xsd:field xpath="@CVE_ID"/>
 83 | 				    </xsd:key>
 84 | 				</xsd:element>
 85 | 				<xsd:element name="LEXICON_REF" type="lexRefType" minOccurs="0" maxOccurs="unbounded"/>
 86 | 				<xsd:element name="EXTERNAL_REF" type="extRefType" minOccurs="0" maxOccurs="unbounded"/>
 87 | 			</xsd:sequence>
 88 | 			<xsd:attribute name="DATE" type="xsd:dateTime" use="required"/>
 89 | 			<xsd:attribute name="AUTHOR" type="xsd:string" use="required"/>
 90 | 			<xsd:attribute name="VERSION" type="xsd:string" use="required"/>
 91 | 			<xsd:attribute name="FORMAT" type="xsd:string" use="optional" default="2.8"/>
 92 | 		</xsd:complexType>
 93 | 		
 94 | 		<!-- define key - keyref pairs -->
 95 | 		<xsd:key name="tierNameKey">
 96 | 			<xsd:annotation>
 97 | 				<xsd:documentation xml:lang="en">
 98 | 					The Tier name/id should be unique within the collection 
 99 | 					of Tier elements
100 | 				</xsd:documentation>
101 | 			</xsd:annotation>
102 | 			<xsd:selector xpath="TIER"/>
103 | 			<xsd:field xpath="@TIER_ID"/>
104 | 		</xsd:key>
105 | 		<xsd:keyref name="tierNameRef" refer="tierNameKey">
106 | 			<xsd:annotation>
107 | 				<xsd:documentation xml:lang="en">
108 | 					A Tier can be associated with a parent Tier by referring to an existing Tier id.
109 | 				</xsd:documentation>
110 | 			</xsd:annotation>
111 | 			<xsd:selector xpath="TIER"/>
112 | 			<xsd:field xpath="@PARENT_REF"/>
113 | 		</xsd:keyref>
114 | 		
115 | 		<xsd:key name="linTypeNameKey">
116 | 			<xsd:annotation>
117 | 				<xsd:documentation xml:lang="en">
118 | 					The Linguistic Type name/id should be unique within the collection 
119 | 					of Linguistic Type elements
120 | 				</xsd:documentation>
121 | 			</xsd:annotation>
122 | 			<xsd:selector xpath="LINGUISTIC_TYPE"/>
123 | 			<xsd:field xpath="@LINGUISTIC_TYPE_ID"/>
124 | 		</xsd:key>
125 | 		<xsd:keyref name="linTypeNameRef" refer="linTypeNameKey">
126 | 			<xsd:annotation>
127 | 				<xsd:documentation xml:lang="en">
128 | 					A Tier must refer to an existing Linguistic Type id.
129 | 				</xsd:documentation>
130 | 			</xsd:annotation>
131 | 			<xsd:selector xpath="TIER"/>
132 | 			<xsd:field xpath="@LINGUISTIC_TYPE_REF"/>
133 | 		</xsd:keyref>
134 | 		
135 | 		<xsd:key name="cvNameKey">
136 | 			<xsd:annotation>
137 | 				<xsd:documentation xml:lang="en">
138 | 					The Controlled Vocabulary name/id should be unique within the  
139 | 					collection of Controlled Vocabulary elements
140 | 				</xsd:documentation>
141 | 			</xsd:annotation>
142 | 			<xsd:selector xpath="CONTROLLED_VOCABULARY"/>
143 | 			<xsd:field xpath="@CV_ID"/>
144 | 		</xsd:key>
145 | 		<xsd:keyref name="cvNameRef" refer="cvNameKey">
146 | 			<xsd:annotation>
147 | 				<xsd:documentation xml:lang="en">
148 | 					A Linguistic Type can be associated with a Controlled Vocabulary by 
149 | 					referring to an existing Controlled Vocabulary id.
150 | 				</xsd:documentation>
151 | 			</xsd:annotation>
152 | 			<xsd:selector xpath="LINGUISTIC_TYPE"/>
153 | 			<xsd:field xpath="@CONTROLLED_VOCABULARY_REF"/>
154 | 		</xsd:keyref>
155 | 		
156 | 		<xsd:key name="lexNameKey">
157 | 			<xsd:annotation>
158 | 				<xsd:documentation xml:lang="en">
159 | 					The Lexicon Service name/id should be unique within the  
160 | 					collection of Lexicon Service elements
161 | 				</xsd:documentation>
162 | 			</xsd:annotation>
163 | 			<xsd:selector xpath="LEXICON_REF"/>
164 | 			<xsd:field xpath="@LEX_REF_ID"/>
165 | 		</xsd:key>
166 | 		<xsd:keyref name="lexNameRef" refer="lexNameKey">
167 | 			<xsd:annotation>
168 | 				<xsd:documentation xml:lang="en">
169 | 					A Linguistic Type can be associated with a Lexicon Service by 
170 | 					referring to an existing Lexicon Service id.
171 | 				</xsd:documentation>
172 | 			</xsd:annotation>
173 | 			<xsd:selector xpath="LINGUISTIC_TYPE"/>
174 | 			<xsd:field xpath="@LEXICON_REF"/>
175 | 		</xsd:keyref>
176 | 		
177 | 		<!-- added in 2.8 but unrelated to the introduction of new elements and attributes -->
178 | 		<!-- previous annotation reference -->
179 | 		<xsd:key name="prevAnnoKey">
180 | 			<xsd:annotation>
181 | 				<xsd:documentation>
182 | 					A key and keyref pair to enforce that a previous annotation idref at least refers
183 | 					to an annotation id of a reference annotation.
184 | 				</xsd:documentation>
185 | 			</xsd:annotation>
186 | 			<xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/>
187 | 			<xsd:field xpath="@ANNOTATION_ID"/>
188 | 		</xsd:key>
189 | 		<xsd:keyref name="prevAnnoRef" refer="prevAnnoKey">
190 | 			<xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/>
191 | 			<xsd:field xpath="@PREVIOUS_ANNOTATION"/>
192 | 		</xsd:keyref>
193 | 		<!-- time slot references -->
194 | 		<xsd:key name="timeSlotKey">
195 | 			<xsd:annotation>
196 | 				<xsd:documentation>
197 | 					Two key-keyref pairs to enforce that time slot references refer to the id of a time slot.
198 | 				</xsd:documentation>
199 | 			</xsd:annotation>
200 | 			<xsd:selector xpath="TIME_ORDER/TIME_SLOT"/>
201 | 			<xsd:field xpath="@TIME_SLOT_ID"/>
202 | 		</xsd:key>
203 | 		<xsd:keyref name="timeSlotRef1" refer="timeSlotKey">
204 | 			<xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/>
205 | 			<xsd:field xpath="@TIME_SLOT_REF1"/>
206 | 		</xsd:keyref>
207 | 		<xsd:keyref name="timeSlotRef2" refer="timeSlotKey">
208 | 			<xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/>
209 | 			<xsd:field xpath="@TIME_SLOT_REF2"/>
210 | 		</xsd:keyref>
211 | 		
212 | 		<!-- introduced in 2.8 -->
213 | 		<xsd:key name="langIdKey">
214 | 			<xsd:annotation>
215 | 				<xsd:documentation>
216 | 					The ID of a language identifier, can be referred to by any element that
217 | 					needs a reference to a language identifier.
218 | 				</xsd:documentation>
219 | 			</xsd:annotation>
220 | 			<xsd:selector xpath="LANGUAGE"/>
221 | 			<xsd:field xpath="@LANG_ID"/>
222 | 		</xsd:key>
223 | 		<xsd:keyref name="cvValueLangRef" refer="langIdKey">
224 | 			<xsd:annotation>
225 | 				<xsd:documentation>
226 | 					Reference from a value in a multilingual CV to a language identifier.
227 | 				</xsd:documentation>
228 | 			</xsd:annotation>
229 | 			<xsd:selector xpath="CONTROLLED_VOCABULARY/CV_ENTRY_ML/CVE_VALUE"/>
230 | 			<xsd:field xpath="@LANG_REF"/>
231 | 		</xsd:keyref>
232 | 		<xsd:keyref name="cvDescLangRef" refer="langIdKey">
233 | 			<xsd:annotation>
234 | 				<xsd:documentation>
235 | 					Reference from a description in a multilingual CV to a language identifier.
236 | 				</xsd:documentation>
237 | 			</xsd:annotation>
238 | 			<xsd:selector xpath="CONTROLLED_VOCABULARY/DESCRIPTION"/>
239 | 			<xsd:field xpath="@LANG_REF"/>
240 | 		</xsd:keyref>
241 | 		<xsd:keyref name="tierLangRef" refer="langIdKey">
242 | 			<xsd:annotation>
243 | 				<xsd:documentation>
244 | 					Reference from a tier to a language identifier, to indicate the (main) language recorded
245 | 					on that tier.
246 | 				</xsd:documentation>
247 | 			</xsd:annotation>
248 | 			<xsd:selector xpath="TIER"/>
249 | 			<xsd:field xpath="@LANG_REF"/>
250 | 		</xsd:keyref>
251 | 		<xsd:keyref name="annoAlignLangRef" refer="langIdKey">
252 | 			<xsd:annotation>
253 | 				<xsd:documentation>
254 | 					Reference from an individual alignable annotation to a language identifier.
255 | 				</xsd:documentation>
256 | 			</xsd:annotation>
257 | 			<xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/>
258 | 			<xsd:field xpath="@LANG_REF"/>
259 | 		</xsd:keyref>
260 | 		<xsd:keyref name="annoRefLangRef" refer="langIdKey">
261 | 			<xsd:annotation>
262 | 				<xsd:documentation>
263 | 					Reference from an individual reference annotation to a language identifier.
264 | 				</xsd:documentation>
265 | 			</xsd:annotation>
266 | 			<xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/>
267 | 			<xsd:field xpath="@LANG_REF"/>
268 | 		</xsd:keyref>
269 | 		<!--
270 | 		     Since we try to describe that the @CVE_IDs are unique within the CONTROLLED_VOCABULARY,
271 | 		     the xsd:key element must be located just inside the CONTROLLED_VOCABULARY.
272 | 		<xsd:key name="cvEntryKey">
273 | 			<xsd:annotation>
274 | 				<xsd:documentation xml:lang="en">
275 | 					The entry id should be unique within the collection of entry elements
276 | 				</xsd:documentation>
277 | 			</xsd:annotation>
278 | 			<xsd:selector xpath="CV_ENTRY_ML"/>
279 | 			<xsd:field xpath="@CVE_ID"/>
280 | 		</xsd:key>
281 | 		-->
282 | 		<!--
283 | 		     Getting from the CVE_REF to the appropriately matching CVE_ID isn't so simple!
284 | 		     It probably can't be done in XPath, never mind the more restricted version that is
285 | 		     allowed here.
286 | 		     http://www.w3.org/TR/2004/PER-xmlschema-1-20040318/structures.html#coss-identity-constraint
287 | 
288 | 		     TIER/ALIGNABLE_ANNOTATION/@CVE_REF/../../@LINGUISTIC_TYPE_REF => call this value x
289 | 		     search for a value equal to x in
290 | 		     LINGUISTIC_TYPE/@LINGUISTIC_TYPE_ID . When found, take (relative to that)
291 | 		     ../@CONTROLLED_VOCABULARY_REF => call this value y
292 | 		     search for a value equal to y in
293 | 		     CONTROLLED_VOCABULARY/@CV_ID and this is the CONTROLLED_VOCABULARY which should
294 | 		     contain (in CVE_ENTRY_ML/@CVE_ID) the value from @CVE_REF.
295 | 
296 | 		     A weaker check could just try to find any matching CONTROLLED_VOCABULARY/CVE_ENTRY_ML/@CVE_ID,
297 | 		     without checking if this is in the correct CONTROLLED_VOCABULARY.
298 | 
299 | 		     According to O'Reilly's book about schemas, ch. 9.2 (XPath-Based Identity Checks),
300 | 		     putting a keyref in a parent node of some key definition creates an extra
301 | 		     uniqueness constraint on the key values. That is not desired here.
302 | 		<xsd:keyref name="cvEntryAlignRef" refer="cvEntryKey">
303 | 			<xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/>
304 | 			<xsd:field xpath="@CVE_REF"/>
305 | 		</xsd:keyref>
306 | 		<xsd:keyref name="cvEntryRefRef" refer="cvEntryKey">
307 | 			<xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/>
308 | 			<xsd:field xpath="@CVE_REF"/>
309 | 		</xsd:keyref>
310 | 		-->
311 | 		<xsd:key name="alignAnnotationIdKey">
312 | 			<xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/>
313 | 			<xsd:field xpath="@ANNOTATION_ID"/>
314 | 		</xsd:key>
315 | 		<xsd:key name="refAnnotationIdKey">
316 | 			<xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/>
317 | 			<xsd:field xpath="@ANNOTATION_ID"/>
318 | 		</xsd:key>
319 | 		<!-- end of key - keyref pairs -->
320 | 	</xsd:element>
321 | 	
322 | 	<xsd:complexType name="headType">
323 | 		<xsd:sequence>
324 | 			<xsd:element name="MEDIA_DESCRIPTOR" minOccurs="0" maxOccurs="unbounded">
325 | 				<xsd:complexType>
326 | 					<xsd:attribute name="MEDIA_URL" type="xsd:anyURI" use="required"/>
327 | 					<xsd:attribute name="RELATIVE_MEDIA_URL" type="xsd:anyURI" use="optional"/>
328 | 					<xsd:attribute name="MIME_TYPE" type="xsd:string" use="required"/>
329 | 					<xsd:attribute name="TIME_ORIGIN" type="xsd:long" use="optional"/>
330 | 					<xsd:attribute name="EXTRACTED_FROM" type="xsd:anyURI" use="optional"/>
331 | 				</xsd:complexType>
332 | 			</xsd:element>
333 | 			<xsd:element name="LINKED_FILE_DESCRIPTOR" minOccurs="0" maxOccurs="unbounded">
334 | 				<xsd:complexType>
335 | 					<xsd:attribute name="LINK_URL" type="xsd:anyURI" use="required"/>
336 | 					<xsd:attribute name="RELATIVE_LINK_URL" type="xsd:anyURI" use="optional"/>
337 | 					<xsd:attribute name="MIME_TYPE" type="xsd:string" use="required"/>
338 | 					<xsd:attribute name="TIME_ORIGIN" type="xsd:long" use="optional"/>
339 | 					<xsd:attribute name="ASSOCIATED_WITH" type="xsd:anyURI" use="optional"/>
340 | 				</xsd:complexType>
341 | 			</xsd:element>
342 | 		    <xsd:element name="PROPERTY" type="propType" minOccurs="0" maxOccurs="unbounded"/>
343 | 		</xsd:sequence>
344 | 		<xsd:attribute name="MEDIA_FILE" use="optional" type="xsd:string">
345 | 			<xsd:annotation>
346 | 				<xsd:documentation xml:lang="en">
347 | 					This attribute is deprecated. Use MEDIA_DESCRIPTOR elements instead. 
348 | 				</xsd:documentation>
349 | 				<xsd:appinfo>Ignore</xsd:appinfo>
350 | 			</xsd:annotation>
351 | 		</xsd:attribute>
352 | 		<xsd:attribute name="TIME_UNITS" use="optional" default="milliseconds">
353 | 			<xsd:simpleType>
354 | 				<xsd:restriction base="xsd:string">
355 | 					<xsd:enumeration value="NTSC-frames"/>
356 | 					<xsd:enumeration value="PAL-frames"/>
357 | 					<xsd:enumeration value="milliseconds"/>
358 | 				</xsd:restriction>
359 | 			</xsd:simpleType>
360 | 		</xsd:attribute>
361 | 	</xsd:complexType>
362 | 	
363 | 	<xsd:complexType name="timeType">
364 | 		<xsd:sequence>
365 | 			<xsd:element name="TIME_SLOT" minOccurs="0" maxOccurs="unbounded">
366 | 				<xsd:complexType>
367 | 					<xsd:attribute name="TIME_SLOT_ID" type="xsd:ID" use="required"/>
368 | 					<xsd:attribute name="TIME_VALUE" type="xsd:unsignedInt" use="optional"/>
369 | 				</xsd:complexType>
370 | 			</xsd:element>
371 | 		</xsd:sequence>
372 | 	</xsd:complexType>
373 | 	
374 | 	<xsd:complexType name="tierType">
375 | 		<xsd:sequence>
376 | 			<xsd:element name="ANNOTATION" type="annotationType" minOccurs="0" maxOccurs="unbounded"/>
377 | 		</xsd:sequence>
378 | 		<xsd:attribute name="TIER_ID" type="xsd:string" use="required"/>
379 | 		<xsd:attribute name="PARTICIPANT" type="xsd:string" use="optional"/>
380 | 		<xsd:attribute name="ANNOTATOR" type="xsd:string" use="optional"/>
381 | 		<xsd:attribute name="LINGUISTIC_TYPE_REF" type="xsd:string" use="required"/>
382 | 		<xsd:attribute name="DEFAULT_LOCALE" type="xsd:IDREF" use="optional"/>
383 | 		<xsd:attribute name="PARENT_REF" type="xsd:string" use="optional"/>
384 | 		<!-- since 2.8, to overrule an EXT_REF on the type level -->
385 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"/>
386 | 		<!-- since 2.8 -->
387 | 		<xsd:attribute name="LANG_REF" type="xsd:IDREF" use="optional"/>
388 | 	</xsd:complexType>
389 | 	
390 | 	<xsd:complexType name="annotationType">
391 | 		<xsd:choice>
392 | 			<xsd:element name="ALIGNABLE_ANNOTATION" type="alignableType"/>
393 | 			<xsd:element name="REF_ANNOTATION" type="refAnnoType"/>
394 | 		</xsd:choice>
395 | 	</xsd:complexType>
396 | 	
397 | 	<xsd:complexType name="alignableType">
398 | 		<xsd:sequence>
399 | 			<xsd:element name="ANNOTATION_VALUE" type="xsd:string"/>
400 | 		</xsd:sequence>
401 | 		<xsd:attributeGroup ref="annotationAttribute"/>
402 | 		<xsd:attribute name="TIME_SLOT_REF1" type="xsd:IDREF" use="required"/>
403 | 		<xsd:attribute name="TIME_SLOT_REF2" type="xsd:IDREF" use="required"/>
404 | 		<xsd:attribute name="SVG_REF" type="xsd:string" use="optional"/>
405 | 	</xsd:complexType>
406 | 	
407 | 	<xsd:complexType name="refAnnoType">
408 | 		<xsd:sequence>
409 | 			<xsd:element name="ANNOTATION_VALUE" type="xsd:string"/>
410 | 		</xsd:sequence>
411 | 		<xsd:attributeGroup ref="annotationAttribute"/>
412 | 		<xsd:attribute name="ANNOTATION_REF" type="xsd:IDREF" use="required">
413 | 			<xsd:annotation>
414 | 				<xsd:documentation>
415 | 					This is in fact a reference to the parent annotation.
416 | 				</xsd:documentation>
417 | 			</xsd:annotation>
418 | 		</xsd:attribute>
419 | 		<xsd:attribute name="PREVIOUS_ANNOTATION" type="xsd:IDREF" use="optional"/>
420 | 	</xsd:complexType>
421 | 	
422 | 	<xsd:complexType name="lingType">
423 | 		<xsd:attribute name="LINGUISTIC_TYPE_ID" type="xsd:string" use="required"/>
424 | 		<xsd:attribute name="TIME_ALIGNABLE" type="xsd:boolean" use="optional"/>
425 | 		<xsd:attribute name="CONSTRAINTS" type="xsd:IDREF" use="optional"/>
426 | 		<xsd:attribute name="GRAPHIC_REFERENCES" type="xsd:boolean" use="optional"/>
427 | 		<xsd:attribute name="CONTROLLED_VOCABULARY_REF" type="xsd:string" use="optional"/>
428 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"/>
429 | 		<xsd:attribute name="LEXICON_REF" type="xsd:IDREF" use="optional"/>
430 | 	</xsd:complexType>
431 | 	
432 | 	<xsd:complexType name="localeType">
433 | 		<xsd:attribute name="LANGUAGE_CODE" type="xsd:ID" use="required"/>
434 | 		<xsd:attribute name="COUNTRY_CODE" type="xsd:string" use="optional"/>
435 | 		<xsd:attribute name="VARIANT" type="xsd:string" use="optional"/>
436 | 	</xsd:complexType>
437 | 	
438 | 	<xsd:complexType name="constraintType">
439 | 		<xsd:attribute name="STEREOTYPE" type="xsd:ID" use="required"/>
440 | 		<xsd:attribute name="DESCRIPTION" type="xsd:string" use="optional"/>
441 | 	</xsd:complexType>
442 | 	
443 | 	<xsd:complexType name="convocType">
444 | 		<!-- change in 2.8, now it contains 
445 | 			a list of multilingual entries plus possible multiple description elements -->
446 | 		<xsd:sequence>
447 | 			<xsd:element name="DESCRIPTION" type="descMultiLangType" minOccurs="0" maxOccurs="unbounded"/>
448 | 			<xsd:element name="CV_ENTRY_ML" type="cventryType" minOccurs="0" maxOccurs="unbounded"/>
449 | 		</xsd:sequence>
450 | 		
451 | 		<xsd:attribute name="CV_ID" type="xsd:string" use="required"/>
452 | 		<!-- <xsd:attribute name="DESCRIPTION" type="xsd:string" use="optional"/> -->
453 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional">
454 | 			<xsd:annotation>
455 | 				<xsd:documentation>
456 | 					A reference to an url of an external Controlled Vocabulary.
457 | 					Is intended to be mutually exclusive with a sequence of CV_ENTRY_ML elements. 
458 | 				</xsd:documentation>
459 | 			</xsd:annotation>
460 | 		</xsd:attribute>
461 | 	</xsd:complexType>
462 | 	
463 | 	<!-- old cvEntryType
464 | 	<xsd:complexType name="cventryType">
465 | 		<xsd:simpleContent>
466 | 			<xsd:extension base="xsd:string">
467 | 				<xsd:attribute name="DESCRIPTION" type="xsd:string" use="optional"/>
468 | 				<xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"/>
469 | 			</xsd:extension>
470 | 		</xsd:simpleContent>
471 | 	</xsd:complexType>
472 | 	-->
473 | 	
474 | 	<!-- introduced in 2.8, modification that breaks compatibility with previous version -->
475 | 	<xsd:complexType name="cventryType">
476 | 		<xsd:annotation>
477 | 			<xsd:documentation>
478 | 				An entry in a multilingual controlled vocabulary, containing the values and the descriptions 
479 | 				in multiple languages.
480 | 			</xsd:documentation>
481 | 		</xsd:annotation>
482 | 		<xsd:sequence>
483 | 			<xsd:element name="CVE_VALUE" type="cveValueType" maxOccurs="unbounded"/>
484 | 		</xsd:sequence>
485 | 		<xsd:attribute name="CVE_ID" type="xsd:string" use="required"/><!-- in 2.8 moved from ecventry to cv entry -->
486 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"/>		
487 | 	</xsd:complexType>
488 | 	
489 | 	<!-- introduced in 2.8 -->
490 | 	<xsd:complexType name="cveValueType">
491 | 		<xsd:annotation>
492 | 			<xsd:documentation>
493 | 				A controlled vocabulary entry value with a language attribute. 
494 | 				This allows multilingual controlled vocabularies. It adds a language reference attribute
495 | 				compared to the mono-lingual cv entry element.
496 | 			</xsd:documentation>
497 | 		</xsd:annotation>
498 | 		<xsd:simpleContent>
499 | 			<xsd:extension base="xsd:string">
500 | 				<xsd:attribute name="LANG_REF" type="xsd:IDREF" use="required"/>
501 | 				<xsd:attribute name="DESCRIPTION" type="xsd:string" use="optional"/>
502 | 			</xsd:extension>
503 | 		</xsd:simpleContent>
504 | 	</xsd:complexType>
505 | 	
506 | 	<!-- introduced in 2.8 -->
507 | 	<xsd:complexType name="descMultiLangType">
508 | 		<xsd:annotation>
509 | 			<xsd:documentation>
510 | 				A description element with a language reference attribute.
511 | 			</xsd:documentation>
512 | 		</xsd:annotation>
513 | 		<xsd:simpleContent>
514 | 			<xsd:extension base="xsd:string">
515 | 				<xsd:attribute name="LANG_REF" type="xsd:IDREF" use="required"/>
516 | 			</xsd:extension>
517 | 		</xsd:simpleContent>
518 | 	</xsd:complexType>
519 | 	
520 | 	<xsd:complexType name="propType">
521 | 		<xsd:simpleContent>
522 | 			<xsd:extension base="xsd:string">
523 | 				<xsd:attribute name="NAME" type="xsd:string" use="optional"/>
524 | 			</xsd:extension>
525 | 		</xsd:simpleContent>
526 | 	</xsd:complexType>
527 | 	
528 | 	<xsd:complexType name="extRefType">
529 | 		<xsd:attribute name="EXT_REF_ID" type="xsd:ID" use="required"/>
530 | 		<xsd:attribute name="TYPE" use="required">
531 | 			<xsd:simpleType>
532 | 				<xsd:restriction base="xsd:string">
533 | 					<xsd:enumeration value="iso12620">
534 | 						<xsd:annotation>
535 | 							<xsd:documentation>
536 | 								A reference to the id of an ISO Data Category (url including id).
537 | 							</xsd:documentation>
538 | 						</xsd:annotation>
539 | 					</xsd:enumeration>
540 | 					<xsd:enumeration value="ecv">
541 | 						<xsd:annotation>
542 | 							<xsd:documentation>
543 | 								A reference to an external (closed) Controlled Vocabulary (url).
544 | 							</xsd:documentation>
545 | 						</xsd:annotation>
546 | 					</xsd:enumeration>
547 | 					<xsd:enumeration value="cve_id">
548 | 						<xsd:annotation>
549 | 							<xsd:documentation>
550 | 								A reference to the id of an Entry in an external Controlled Vocabulary (id).
551 | 							</xsd:documentation>
552 | 						</xsd:annotation>
553 | 					</xsd:enumeration>
554 | 					<xsd:enumeration value="lexen_id">
555 | 						<xsd:annotation>
556 | 							<xsd:documentation>
557 | 								A reference to the id of an entry in a lexicon (url, url+id or id)
558 | 							</xsd:documentation>
559 | 						</xsd:annotation>
560 | 					</xsd:enumeration>
561 | 					<xsd:enumeration value="resource_url">
562 | 						<xsd:annotation>
563 | 							<xsd:documentation>
564 | 								A reference or hyperlink to any type document (url)
565 | 							</xsd:documentation>
566 | 						</xsd:annotation>
567 | 					</xsd:enumeration>
568 | 					<!-- other external reference types can be added later -->
569 | 				</xsd:restriction>
570 | 			</xsd:simpleType>
571 | 		</xsd:attribute>
572 | 		<xsd:attribute name="VALUE" type="xsd:string" use="required"/>
573 | 	</xsd:complexType>
574 | 	
575 | 	<xsd:complexType name="lexRefType">
576 | 		<xsd:attribute name="LEX_REF_ID" type="xsd:ID" use="required"/>
577 | 		<xsd:attribute name="NAME" type="xsd:string" use="required"/>
578 | 		<xsd:attribute name="TYPE" type="xsd:string" use="required"/>
579 | 		<xsd:attribute name="URL" type="xsd:string" use="required"/>
580 | 		<xsd:attribute name="LEXICON_ID" type="xsd:string" use="required"/>
581 | 		<xsd:attribute name="LEXICON_NAME" type="xsd:string" use="required"/>
582 | 		<xsd:attribute name="DATCAT_ID" type="xsd:string" use="optional"/>
583 | 		<xsd:attribute name="DATCAT_NAME" type="xsd:string" use="optional"/>
584 | 	</xsd:complexType>
585 | 	
586 | 	<xsd:complexType name="langType">
587 | 		<xsd:annotation>
588 | 			<xsd:documentation xml:lang="en">
589 | 				The Language element containing a reference to a language name or (if possible persistent) definition. 
590 | 			</xsd:documentation>
591 | 		</xsd:annotation>
592 | 		<xsd:attribute name="LANG_ID" type="xsd:ID" use="required"/>
593 | 		<!-- definition is optional so that user defined languages are easy to add -->
594 | 		<xsd:attribute name="LANG_DEF" type="xsd:string" use="optional">
595 | 			<xsd:annotation><xsd:documentation>
596 | 				ISO-639-3 still seems to be the best choice for language codes and closest to persistent language ID's 
597 | 				seem to be the http://cdb.iso.org/lg/... identifiers also used by the iso-language-639-3 component in
598 | 				the CLARIN ComponentRegistry?
599 | 			</xsd:documentation></xsd:annotation>
600 | 		</xsd:attribute>
601 | 		<xsd:attribute name="LANG_LABEL" type="xsd:string" use="optional"/>
602 | 	</xsd:complexType>
603 | 	<!-- since 2.8 -->
604 | 	<xsd:complexType name="licenseType">
605 | 		<xsd:annotation>
606 | 			<xsd:documentation xml:lang="en">
607 | 				The license element can be used to include license information in the eaf file itself.  
608 | 			</xsd:documentation>			
609 | 		</xsd:annotation>
610 | 		<xsd:simpleContent>
611 | 			<xsd:extension base="xsd:string">
612 | 				<xsd:attribute name="LICENSE_URL" type="xsd:anyURI" use="optional"/>
613 | 			</xsd:extension>
614 | 		</xsd:simpleContent>
615 | 	</xsd:complexType>
616 | 
617 | 	<xsd:attributeGroup name="annotationAttribute">
618 | 		<xsd:attribute name="ANNOTATION_ID" type="xsd:ID" use="required"/>
619 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREFS" use="optional"/>
620 | 		<xsd:attribute name="LANG_REF" type="xsd:IDREF" use="optional"/><!-- since 2.8 -->
621 | 		<xsd:attribute name="CVE_REF" type="xsd:string" use="optional"/><!-- since 2.8 -->
622 | 	</xsd:attributeGroup>
623 | 	
624 | 	
625 | 	<!-- Start of CV_RESOURCE part, an alternative root element -->
626 | 	<xsd:element name="CV_RESOURCE">
627 | 		<xsd:complexType>
628 | 			<xsd:sequence>
629 | 				<xsd:element name="LANGUAGE" type="langType" minOccurs="0" maxOccurs="unbounded"/>
630 | 				<xsd:element name="CONTROLLED_VOCABULARY" type="convocType" minOccurs="1" maxOccurs="unbounded">
631 | 					<xsd:key name="cvEntryKey2">
632 | 						<xsd:annotation>
633 | 							<xsd:documentation xml:lang="en">
634 | 								The entry id should be unique within the  
635 | 								collection of entry elements
636 | 							</xsd:documentation>
637 | 						</xsd:annotation>
638 | 						<xsd:selector xpath="CV_ENTRY_ML"/>
639 | 						<xsd:field xpath="@CVE_ID"/>
640 | 					</xsd:key>
641 | 					
642 | 				</xsd:element>
643 | 				<xsd:element name="EXTERNAL_REF" type="extRefType" minOccurs="0" maxOccurs="unbounded"/>
644 | 			</xsd:sequence>
645 | 			<xsd:attribute name="DATE" type="xsd:dateTime" use="optional"/>
646 | 			<xsd:attribute name="AUTHOR" type="xsd:string" use="optional"/>
647 | 			<xsd:attribute name="VERSION" type="xsd:string" use="optional"/>
648 | 		</xsd:complexType>
649 | 		<!-- define key - keyref pairs -->
650 | 				<!-- If not commented this is considered a double global definition of cvNameKey -->
651 | <!--			<xsd:key name="cvNameKey">
652 | 				<xsd:annotation>
653 | 					<xsd:documentation xml:lang="en">
654 | 						The Controlled Vocabulary name/id should be unique within the  
655 | 						collection of Controlled Vocabulary elements
656 | 					</xsd:documentation>
657 | 				</xsd:annotation>
658 | 				<xsd:selector xpath="CONTROLLED_VOCABULARY"/>
659 | 				<xsd:field xpath="@CV_ID"/>
660 | 			</xsd:key>-->
661 | 	</xsd:element>
662 | 	<!-- Did not use extension here because it contains an other type of CVEntry elements -->
663 | 	<!-- deleted in 2.8 because of changes in the "normal" controlled vocabulary element type
664 | 	<xsd:complexType name="ecvConvocType">
665 | 		<xsd:choice>
666 | 			<xsd:sequence>
667 | 				<xsd:element name="CV_ENTRY" type="ecvEntryType" minOccurs="0" maxOccurs="unbounded"/>
668 | 			</xsd:sequence>
669 | 			<xsd:sequence>
670 | 				<xsd:element name="DESCRIPTION" type="descMultiLangType" minOccurs="0" maxOccurs="unbounded"/>
671 | 				<xsd:element name="CV_ENTRY_ML" type="ecvEntryMultiLangType" minOccurs="0" maxOccurs="unbounded"/>
672 | 			</xsd:sequence>
673 | 		</xsd:choice>
674 | 		<xsd:attribute name="CV_ID" type="xsd:string" use="required"/>
675 | 		<xsd:attribute name="DESCRIPTION" type="xsd:string" use="optional"/>
676 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional">
677 | 			<xsd:annotation>
678 | 				<xsd:documentation>
679 | 					A reference to an url of an external Controlled Vocabulary.
680 | 					Is intended to be mutually exclusive with a sequence of CV_ENTRY elements. 
681 | 				</xsd:documentation>
682 | 			</xsd:annotation>
683 | 		</xsd:attribute>
684 | 	</xsd:complexType>
685 | 	-->
686 | 	<!-- removed in 2.8 becuase the standard cv entry now has an entry id
687 | 	<xsd:complexType name="ecvEntryType">
688 | 		<xsd:complexContent>
689 | 			<xsd:extension base="cventryType">
690 | 				<xsd:attribute name="CVE_ID" type="xsd:string" use="required">
691 | 					<xsd:annotation>
692 | 						<xsd:documentation>
693 | 							The id of the entry (intended for externally defined entries).
694 | 						</xsd:documentation>
695 | 					</xsd:annotation>
696 | 				</xsd:attribute>
697 | 			</xsd:extension>
698 | 		</xsd:complexContent>
699 | 	</xsd:complexType>
700 | 	-->
701 | 	
702 | </xsd:schema>
703 | 


--------------------------------------------------------------------------------
/test/EAFv3.0.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <!--
  3 | 	VERSION 3.0
  4 | 	DATE December 2016
  5 | 	- added two referential link elements, CROSS_REF_LINK and GROUP_REF_LINK, contained in 
  6 | 	sets of such elements, REF_LINK_SET.
  7 | 		
  8 | 	VERSION 2.8
  9 | 	DATE April 2014
 10 | 	- changes that add support for multilingual controlled vocabularies and for associating tiers
 11 | 	and annotations with a specific language
 12 | 	    - added new element LANGUAGE
 13 | 	    - changed the structure of CONTROLLED_VOCABULARY and CV_ENTRY elements
 14 | 	    - a cv entry can now have multiple CVE_VALUE child nodes with a language reference
 15 | 	- introduction of a LICENSE element
 16 | 	- added an EXT_REF attribute on the TIER level, so that e.g. a data category reference can be specified on
 17 | 	  the tier level (overriding the one specified on the TYPE level)
 18 | 	
 19 | 	VERSION 2.7
 20 | 	DATE December 2010
 21 | 	- new elements and attributes where added in relation to
 22 | 	  - support for externally defined controlled vocabularies. A new possible root element CV_RESOURCE 
 23 | 	    has been added for such vocabularies in an eaf like xml file. Annotations can hold a reference
 24 | 	    to the id of ean entry in an external CV.
 25 | 	- a new element for storing information about a lexicon and about a link to an entry or a field 
 26 | 	  in a lexicon has been added. A linguistic type can be associated with a lexicon or a field / 
 27 | 	  data category in a lexicon
 28 | 	
 29 | 	VERSION 2.6
 30 | 	DATE May 2008
 31 | 	- added elements and attributes for references to concepts defined in the ISO Data Category Registry 
 32 | 	and possibly/eventually other external resources.
 33 | 	  - attribute EXT_REF added to type annotationAttribute, to elements CV_ENTRY and LINGUISTIC_TYPE
 34 | 	  - element EXTERNAL_REF with attributes EXT_REF_ID, TYPE and VALUE
 35 | 	
 36 | 	DATE November 2007
 37 | 	- added optional attributes: RELATIVE_MEDIA_URL to MEDIA_DESCRIPTOR and RELATIVE_LINK_URL to 
 38 | 	LINKED_FILE_DESCRIPTOR for storage of relative url's
 39 | 	- changed the FORMAT from fixed to default, and from 2.4. to 2.5
 40 | 	
 41 | 	DATE December 2006
 42 | 	- added attribute: ANNOTATOR to element TIER
 43 | 	- added element: PROPERTY to element HEADER
 44 | 	- changed the type of attribute SVG_REF of ALIGNABLE_ANNOTATION to xsd:string since 
 45 | 	it does not refer to an ID in the same file
 46 | 	- changed the type of the TIME_ALIGNABLE and GRAPHIC_REFERENCES attributes of the LINGUISTIC_TYPE
 47 | 	element to type="xsd:boolean" (was xsd:string)
 48 | 	- changed the ID/IDREF mechanism for the combinations of:
 49 | 	  - TIER/TIER_ID and TIER/PARENT_REF
 50 | 	  - LINGUISTIC_TYPE/LINGUISTIC_TYPE_ID and TIER/LINGUISTIC_TYPE_REF
 51 | 	  - CONTROLLED_VOCABULARY/CV_ID and LINGUISTIC_TYPE/CONTROLLED_VOCABULARY_REF
 52 | 	  into pairs of xsd:key and xsd:keyref elements.
 53 | 	  The advantage is that the ID's only have to be unique per element type (e.g. TIER_ID's
 54 | 	  should be unique within the TIER elements but can be the same as a LINGUISTIC_TYPE_ID) 
 55 | 	  and that there are no constraints on characters that can be used in id's/names.
 56 | -->
 57 | <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
 58 | 	<xsd:annotation>
 59 | 		<xsd:documentation xml:lang="en">
 60 | 			ELAN Annotation Format
 61 | 			version 3.0
 62 | 			December 2016
 63 | 			Schema by Alexander Klassmann 17/01/03
 64 | 			Adapted by Hennie Brugman, Han Sloetjes, Micha Hulsbosch
 65 | 		</xsd:documentation>
 66 | 	</xsd:annotation>
 67 | 	
 68 | 	<xsd:element name="ANNOTATION_DOCUMENT">
 69 | 		<xsd:complexType>
 70 | 			<xsd:sequence>
 71 | 				<xsd:element name="LICENSE" type="licenseType" minOccurs="0" maxOccurs="unbounded"/>
 72 | 				<xsd:element name="HEADER" type="headType"/>
 73 | 				<xsd:element name="TIME_ORDER" type="timeType"/>
 74 | 				<xsd:element name="TIER" type="tierType" minOccurs="0" maxOccurs="unbounded"/>
 75 | 				<xsd:element name="LINGUISTIC_TYPE" type="lingType" minOccurs="0" maxOccurs="unbounded"/>
 76 | 				<xsd:element name="LOCALE" type="localeType" minOccurs="0" maxOccurs="unbounded"/>
 77 | 				<xsd:element name="LANGUAGE" type="langType" minOccurs="0" maxOccurs="unbounded"/>
 78 | 				<xsd:element name="CONSTRAINT" type="constraintType" minOccurs="0" maxOccurs="unbounded"/>
 79 | 				<xsd:element name="CONTROLLED_VOCABULARY" type="convocType" minOccurs="0" maxOccurs="unbounded">
 80 | 				    <xsd:key name="cvEntryKey">
 81 | 					    <xsd:annotation>
 82 | 						    <xsd:documentation xml:lang="en">
 83 | 							    The entry id should be unique within the collection of entry elements
 84 | 						    </xsd:documentation>
 85 | 					    </xsd:annotation>
 86 | 					    <xsd:selector xpath="CV_ENTRY_ML"/>
 87 | 					    <xsd:field xpath="@CVE_ID"/>
 88 | 				    </xsd:key>
 89 | 				</xsd:element>
 90 | 				<xsd:element name="LEXICON_REF" type="lexRefType" minOccurs="0" maxOccurs="unbounded"/>
 91 | 				<xsd:element name="REF_LINK_SET" type="refLinksType" minOccurs="0" maxOccurs="unbounded"/>
 92 | 				<xsd:element name="EXTERNAL_REF" type="extRefType" minOccurs="0" maxOccurs="unbounded"/>
 93 | 			</xsd:sequence>
 94 | 			<xsd:attribute name="DATE" type="xsd:dateTime" use="required"/>
 95 | 			<xsd:attribute name="AUTHOR" type="xsd:string" use="required"/>
 96 | 			<xsd:attribute name="VERSION" type="xsd:string" use="required"/>
 97 | 			<xsd:attribute name="FORMAT" type="xsd:string" use="optional" default="3.0"/>
 98 | 		</xsd:complexType>
 99 | 		
100 | 		<!-- define key - keyref pairs -->
101 | 		<xsd:key name="tierNameKey">
102 | 			<xsd:annotation>
103 | 				<xsd:documentation xml:lang="en">
104 | 					The Tier name/id should be unique within the collection 
105 | 					of Tier elements
106 | 				</xsd:documentation>
107 | 			</xsd:annotation>
108 | 			<xsd:selector xpath="TIER"/>
109 | 			<xsd:field xpath="@TIER_ID"/>
110 | 		</xsd:key>
111 | 		<xsd:keyref name="tierNameRef" refer="tierNameKey">
112 | 			<xsd:annotation>
113 | 				<xsd:documentation xml:lang="en">
114 | 					A Tier can be associated with a parent Tier by referring to an existing Tier id.
115 | 				</xsd:documentation>
116 | 			</xsd:annotation>
117 | 			<xsd:selector xpath="TIER"/>
118 | 			<xsd:field xpath="@PARENT_REF"/>
119 | 		</xsd:keyref>
120 | 		
121 | 		<xsd:key name="linTypeNameKey">
122 | 			<xsd:annotation>
123 | 				<xsd:documentation xml:lang="en">
124 | 					The Linguistic Type name/id should be unique within the collection 
125 | 					of Linguistic Type elements
126 | 				</xsd:documentation>
127 | 			</xsd:annotation>
128 | 			<xsd:selector xpath="LINGUISTIC_TYPE"/>
129 | 			<xsd:field xpath="@LINGUISTIC_TYPE_ID"/>
130 | 		</xsd:key>
131 | 		<xsd:keyref name="linTypeNameRef" refer="linTypeNameKey">
132 | 			<xsd:annotation>
133 | 				<xsd:documentation xml:lang="en">
134 | 					A Tier must refer to an existing Linguistic Type id.
135 | 				</xsd:documentation>
136 | 			</xsd:annotation>
137 | 			<xsd:selector xpath="TIER"/>
138 | 			<xsd:field xpath="@LINGUISTIC_TYPE_REF"/>
139 | 		</xsd:keyref>
140 | 		
141 | 		<xsd:key name="cvNameKey">
142 | 			<xsd:annotation>
143 | 				<xsd:documentation xml:lang="en">
144 | 					The Controlled Vocabulary name/id should be unique within the  
145 | 					collection of Controlled Vocabulary elements
146 | 				</xsd:documentation>
147 | 			</xsd:annotation>
148 | 			<xsd:selector xpath="CONTROLLED_VOCABULARY"/>
149 | 			<xsd:field xpath="@CV_ID"/>
150 | 		</xsd:key>
151 | 		<xsd:keyref name="cvNameRef" refer="cvNameKey">
152 | 			<xsd:annotation>
153 | 				<xsd:documentation xml:lang="en">
154 | 					A Linguistic Type can be associated with a Controlled Vocabulary by 
155 | 					referring to an existing Controlled Vocabulary id.
156 | 				</xsd:documentation>
157 | 			</xsd:annotation>
158 | 			<xsd:selector xpath="LINGUISTIC_TYPE"/>
159 | 			<xsd:field xpath="@CONTROLLED_VOCABULARY_REF"/>
160 | 		</xsd:keyref>
161 | 		
162 | 		<xsd:key name="lexNameKey">
163 | 			<xsd:annotation>
164 | 				<xsd:documentation xml:lang="en">
165 | 					The Lexicon Service name/id should be unique within the  
166 | 					collection of Lexicon Service elements
167 | 				</xsd:documentation>
168 | 			</xsd:annotation>
169 | 			<xsd:selector xpath="LEXICON_REF"/>
170 | 			<xsd:field xpath="@LEX_REF_ID"/>
171 | 		</xsd:key>
172 | 		<xsd:keyref name="lexNameRef" refer="lexNameKey">
173 | 			<xsd:annotation>
174 | 				<xsd:documentation xml:lang="en">
175 | 					A Linguistic Type can be associated with a Lexicon Service by 
176 | 					referring to an existing Lexicon Service id.
177 | 				</xsd:documentation>
178 | 			</xsd:annotation>
179 | 			<xsd:selector xpath="LINGUISTIC_TYPE"/>
180 | 			<xsd:field xpath="@LEXICON_REF"/>
181 | 		</xsd:keyref>
182 | 		
183 | 		<!-- added in 2.8 but unrelated to the introduction of new elements and attributes -->
184 | 		<!-- previous annotation reference -->
185 | 		<xsd:key name="prevAnnoKey">
186 | 			<xsd:annotation>
187 | 				<xsd:documentation>
188 | 					A key and keyref pair to enforce that a previous annotation idref at least refers
189 | 					to an annotation id of a reference annotation.
190 | 				</xsd:documentation>
191 | 			</xsd:annotation>
192 | 			<xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/>
193 | 			<xsd:field xpath="@ANNOTATION_ID"/>
194 | 		</xsd:key>
195 | 		<xsd:keyref name="prevAnnoRef" refer="prevAnnoKey">
196 | 			<xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/>
197 | 			<xsd:field xpath="@PREVIOUS_ANNOTATION"/>
198 | 		</xsd:keyref>
199 | 		<!-- time slot references -->
200 | 		<xsd:key name="timeSlotKey">
201 | 			<xsd:annotation>
202 | 				<xsd:documentation>
203 | 					Two key-keyref pairs to enforce that time slot references refer to the id of a time slot.
204 | 				</xsd:documentation>
205 | 			</xsd:annotation>
206 | 			<xsd:selector xpath="TIME_ORDER/TIME_SLOT"/>
207 | 			<xsd:field xpath="@TIME_SLOT_ID"/>
208 | 		</xsd:key>
209 | 		<xsd:keyref name="timeSlotRef1" refer="timeSlotKey">
210 | 			<xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/>
211 | 			<xsd:field xpath="@TIME_SLOT_REF1"/>
212 | 		</xsd:keyref>
213 | 		<xsd:keyref name="timeSlotRef2" refer="timeSlotKey">
214 | 			<xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/>
215 | 			<xsd:field xpath="@TIME_SLOT_REF2"/>
216 | 		</xsd:keyref>
217 | 		
218 | 		<!-- introduced in 2.8 -->
219 | 		<xsd:key name="langIdKey">
220 | 			<xsd:annotation>
221 | 				<xsd:documentation>
222 | 					The ID of a language identifier, can be referred to by any element that
223 | 					needs a reference to a language identifier.
224 | 				</xsd:documentation>
225 | 			</xsd:annotation>
226 | 			<xsd:selector xpath="LANGUAGE"/>
227 | 			<xsd:field xpath="@LANG_ID"/>
228 | 		</xsd:key>
229 | 		<xsd:keyref name="cvValueLangRef" refer="langIdKey">
230 | 			<xsd:annotation>
231 | 				<xsd:documentation>
232 | 					Reference from a value in a multilingual CV to a language identifier.
233 | 				</xsd:documentation>
234 | 			</xsd:annotation>
235 | 			<xsd:selector xpath="CONTROLLED_VOCABULARY/CV_ENTRY_ML/CVE_VALUE"/>
236 | 			<xsd:field xpath="@LANG_REF"/>
237 | 		</xsd:keyref>
238 | 		<xsd:keyref name="cvDescLangRef" refer="langIdKey">
239 | 			<xsd:annotation>
240 | 				<xsd:documentation>
241 | 					Reference from a description in a multilingual CV to a language identifier.
242 | 				</xsd:documentation>
243 | 			</xsd:annotation>
244 | 			<xsd:selector xpath="CONTROLLED_VOCABULARY/DESCRIPTION"/>
245 | 			<xsd:field xpath="@LANG_REF"/>
246 | 		</xsd:keyref>
247 | 		<xsd:keyref name="tierLangRef" refer="langIdKey">
248 | 			<xsd:annotation>
249 | 				<xsd:documentation>
250 | 					Reference from a tier to a language identifier, to indicate the (main) language recorded
251 | 					on that tier.
252 | 				</xsd:documentation>
253 | 			</xsd:annotation>
254 | 			<xsd:selector xpath="TIER"/>
255 | 			<xsd:field xpath="@LANG_REF"/>
256 | 		</xsd:keyref>
257 | 		<xsd:keyref name="annoAlignLangRef" refer="langIdKey">
258 | 			<xsd:annotation>
259 | 				<xsd:documentation>
260 | 					Reference from an individual alignable annotation to a language identifier.
261 | 				</xsd:documentation>
262 | 			</xsd:annotation>
263 | 			<xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/>
264 | 			<xsd:field xpath="@LANG_REF"/>
265 | 		</xsd:keyref>
266 | 		<xsd:keyref name="annoRefLangRef" refer="langIdKey">
267 | 			<xsd:annotation>
268 | 				<xsd:documentation>
269 | 					Reference from an individual reference annotation to a language identifier.
270 | 				</xsd:documentation>
271 | 			</xsd:annotation>
272 | 			<xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/>
273 | 			<xsd:field xpath="@LANG_REF"/>
274 | 		</xsd:keyref>
275 | 		<!--
276 | 		     Since we try to describe that the @CVE_IDs are unique within the CONTROLLED_VOCABULARY,
277 | 		     the xsd:key element must be located just inside the CONTROLLED_VOCABULARY.
278 | 		<xsd:key name="cvEntryKey">
279 | 			<xsd:annotation>
280 | 				<xsd:documentation xml:lang="en">
281 | 					The entry id should be unique within the collection of entry elements
282 | 				</xsd:documentation>
283 | 			</xsd:annotation>
284 | 			<xsd:selector xpath="CV_ENTRY_ML"/>
285 | 			<xsd:field xpath="@CVE_ID"/>
286 | 		</xsd:key>
287 | 		-->
288 | 		<!--
289 | 		     Getting from the CVE_REF to the appropriately matching CVE_ID isn't so simple!
290 | 		     It probably can't be done in XPath, never mind the more restricted version that is
291 | 		     allowed here.
292 | 		     http://www.w3.org/TR/2004/PER-xmlschema-1-20040318/structures.html#coss-identity-constraint
293 | 
294 | 		     TIER/ALIGNABLE_ANNOTATION/@CVE_REF/../../@LINGUISTIC_TYPE_REF => call this value x
295 | 		     search for a value equal to x in
296 | 		     LINGUISTIC_TYPE/@LINGUISTIC_TYPE_ID . When found, take (relative to that)
297 | 		     ../@CONTROLLED_VOCABULARY_REF => call this value y
298 | 		     search for a value equal to y in
299 | 		     CONTROLLED_VOCABULARY/@CV_ID and this is the CONTROLLED_VOCABULARY which should
300 | 		     contain (in CVE_ENTRY_ML/@CVE_ID) the value from @CVE_REF.
301 | 
302 | 
303 | 		     A weaker check could just try to find any matching CONTROLLED_VOCABULARY/CVE_ENTRY_ML/@CVE_ID,
304 | 		     without checking if this is in the correct CONTROLLED_VOCABULARY.
305 | 
306 | 		     According to http://docstore.mik.ua/orelly/xml/schema/ch09_02.htm, putting a keyref
307 | 		     in a parent node of some key definition creates an extra uniqueness constraint on
308 | 		     the key values. That is not desired here.
309 | 		     (The validator that we use doesn't seem to check that but gives other, strange, error
310 | 		     messages)			
311 | 		<xsd:keyref name="cvEntryAlignRef" refer="cvEntryKey">
312 | 			<xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/>
313 | 			<xsd:field xpath="@CVE_REF"/>
314 | 		</xsd:keyref>
315 | 		<xsd:keyref name="cvEntryRefRef" refer="cvEntryKey">
316 | 			<xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/>
317 | 			<xsd:field xpath="@CVE_REF"/>
318 | 		</xsd:keyref>
319 | 		-->
320 | 		<xsd:key name="alignAnnotationIdKey">
321 | 			<xsd:selector xpath="TIER/ANNOTATION/ALIGNABLE_ANNOTATION"/>
322 | 			<xsd:field xpath="@ANNOTATION_ID"/>
323 | 		</xsd:key>
324 | 		<xsd:key name="refAnnotationIdKey">
325 | 			<xsd:selector xpath="TIER/ANNOTATION/REF_ANNOTATION"/>
326 | 			<xsd:field xpath="@ANNOTATION_ID"/>
327 | 		</xsd:key>
328 | 		<!-- set of key and key refs for referential links 
329 | 			4 keys for links to refer to: alignable and reference annotation id's and cross link and group link id.
330 | 			2 x 4 keyrefs for the cross link ref1 and ref2 idrefs to one of the 4 keys and  
331 | 			1 x 4 keyrefs for the group link refs idrefs to one of the 4 keys.
332 | 		-->
333 | 		<xsd:key name="crossRefLinkIdKey">
334 | 			<xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/>
335 | 			<xsd:field xpath="@REF_LINK_ID"/>
336 | 		</xsd:key>
337 | 		<xsd:key name="groupRefLinkIdKey">
338 | 			<xsd:selector xpath="REF_LINK_SET/GROUP_REF_LINK"/>
339 | 			<xsd:field xpath="@REF_LINK_ID"/>
340 | 		</xsd:key>
341 | 		
342 | 		<xsd:keyref name="crossLinkRef1AlignAnnoKeyRef" refer="alignAnnotationIdKey">
343 | 			<xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/>
344 | 			<xsd:field xpath="REF1"/>
345 | 		</xsd:keyref> 
346 | 		<xsd:keyref name="crossLinkRef1RefAnnoKeyRef" refer="refAnnotationIdKey">
347 | 			<xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/>
348 | 			<xsd:field xpath="REF1"/>
349 | 		</xsd:keyref>
350 | 		<xsd:keyref name="crossLinkRef1CrossLinkKeyRef" refer="crossRefLinkIdKey">
351 | 			<xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/>
352 | 			<xsd:field xpath="REF1"/>
353 | 		</xsd:keyref>
354 | 		<xsd:keyref name="crossLinkRef1GroupLinkKeyRef" refer="groupRefLinkIdKey">
355 | 			<xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/>
356 | 			<xsd:field xpath="REF1"/>
357 | 		</xsd:keyref>
358 | 		
359 | 		<xsd:keyref name="crossLinkRef2AlignAnnoKeyRef" refer="alignAnnotationIdKey">
360 | 			<xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/>
361 | 			<xsd:field xpath="REF2"/>
362 | 		</xsd:keyref>
363 | 		<xsd:keyref name="crossLinkRef2RefAnnoKeyRef" refer="refAnnotationIdKey">
364 | 			<xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/>
365 | 			<xsd:field xpath="REF2"/>
366 | 		</xsd:keyref>
367 | 		<xsd:keyref name="crossLinkRef2CrossLinkKeyRef" refer="crossRefLinkIdKey">
368 | 			<xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/>
369 | 			<xsd:field xpath="REF2"/>
370 | 		</xsd:keyref>
371 | 		<xsd:keyref name="crossLinkRef2GroupLinkKeyRef" refer="groupRefLinkIdKey">
372 | 			<xsd:selector xpath="REF_LINK_SET/CROSS_REF_LINK"/>
373 | 			<xsd:field xpath="REF2"/>
374 | 		</xsd:keyref>
375 | 		
376 | 		
377 | 		<xsd:keyref name="groupLinkRefsAlignAnnoKeyRef" refer="alignAnnotationIdKey">
378 | 			<xsd:selector xpath="REF_LINK_SET/GROUP_REF_LINK"/>
379 | 			<xsd:field xpath="REFS"/>
380 | 		</xsd:keyref>
381 | 		<xsd:keyref name="groupLinkRefsRefAnnoKeyRef" refer="refAnnotationIdKey">
382 | 			<xsd:selector xpath="REF_LINK_SET/GROUP_REF_LINK"/>
383 | 			<xsd:field xpath="REFS"/>
384 | 		</xsd:keyref>
385 | 		<xsd:keyref name="groupLinkRefsCrossLinkKeyRef" refer="crossRefLinkIdKey">
386 | 			<xsd:selector xpath="REF_LINK_SET/GROUP_REF_LINK"/>
387 | 			<xsd:field xpath="REFS"/>
388 | 		</xsd:keyref>
389 | 		<xsd:keyref name="groupLinkRefsGroupLinkKeyRef" refer="groupRefLinkIdKey">
390 | 			<xsd:selector xpath="REF_LINK_SET/GROUP_REF_LINK"/>
391 | 			<xsd:field xpath="REFS"/>
392 | 		</xsd:keyref>
393 | 		<!-- end of key - keyref pairs -->
394 | 	</xsd:element>
395 | 	
396 | 	<xsd:complexType name="headType">
397 | 		<xsd:sequence>
398 | 			<xsd:element name="MEDIA_DESCRIPTOR" minOccurs="0" maxOccurs="unbounded">
399 | 				<xsd:complexType>
400 | 					<xsd:attribute name="MEDIA_URL" type="xsd:anyURI" use="required"/>
401 | 					<xsd:attribute name="RELATIVE_MEDIA_URL" type="xsd:anyURI" use="optional"/>
402 | 					<xsd:attribute name="MIME_TYPE" type="xsd:string" use="required"/>
403 | 					<xsd:attribute name="TIME_ORIGIN" type="xsd:long" use="optional"/>
404 | 					<xsd:attribute name="EXTRACTED_FROM" type="xsd:anyURI" use="optional"/>
405 | 				</xsd:complexType>
406 | 			</xsd:element>
407 | 			<xsd:element name="LINKED_FILE_DESCRIPTOR" minOccurs="0" maxOccurs="unbounded">
408 | 				<xsd:complexType>
409 | 					<xsd:attribute name="LINK_URL" type="xsd:anyURI" use="required"/>
410 | 					<xsd:attribute name="RELATIVE_LINK_URL" type="xsd:anyURI" use="optional"/>
411 | 					<xsd:attribute name="MIME_TYPE" type="xsd:string" use="required"/>
412 | 					<xsd:attribute name="TIME_ORIGIN" type="xsd:long" use="optional"/>
413 | 					<xsd:attribute name="ASSOCIATED_WITH" type="xsd:anyURI" use="optional"/>
414 | 				</xsd:complexType>
415 | 			</xsd:element>
416 | 		    <xsd:element name="PROPERTY" type="propType" minOccurs="0" maxOccurs="unbounded"/>
417 | 		</xsd:sequence>
418 | 		<xsd:attribute name="MEDIA_FILE" use="optional" type="xsd:string">
419 | 			<xsd:annotation>
420 | 				<xsd:documentation xml:lang="en">
421 | 					This attribute is deprecated. Use MEDIA_DESCRIPTOR elements instead. 
422 | 				</xsd:documentation>
423 | 				<xsd:appinfo>Ignore</xsd:appinfo>
424 | 			</xsd:annotation>
425 | 		</xsd:attribute>
426 | 		<xsd:attribute name="TIME_UNITS" use="optional" default="milliseconds">
427 | 			<xsd:simpleType>
428 | 				<xsd:restriction base="xsd:string">
429 | 					<xsd:enumeration value="NTSC-frames"/>
430 | 					<xsd:enumeration value="PAL-frames"/>
431 | 					<xsd:enumeration value="milliseconds"/>
432 | 				</xsd:restriction>
433 | 			</xsd:simpleType>
434 | 		</xsd:attribute>
435 | 	</xsd:complexType>
436 | 	
437 | 	<xsd:complexType name="timeType">
438 | 		<xsd:sequence>
439 | 			<xsd:element name="TIME_SLOT" minOccurs="0" maxOccurs="unbounded">
440 | 				<xsd:complexType>
441 | 					<xsd:attribute name="TIME_SLOT_ID" type="xsd:ID" use="required"/>
442 | 					<xsd:attribute name="TIME_VALUE" type="xsd:unsignedInt" use="optional"/>
443 | 				</xsd:complexType>
444 | 			</xsd:element>
445 | 		</xsd:sequence>
446 | 	</xsd:complexType>
447 | 	
448 | 	<xsd:complexType name="tierType">
449 | 		<xsd:sequence>
450 | 			<xsd:element name="ANNOTATION" type="annotationType" minOccurs="0" maxOccurs="unbounded"/>
451 | 		</xsd:sequence>
452 | 		<xsd:attribute name="TIER_ID" type="xsd:string" use="required"/>
453 | 		<xsd:attribute name="PARTICIPANT" type="xsd:string" use="optional"/>
454 | 		<xsd:attribute name="ANNOTATOR" type="xsd:string" use="optional"/>
455 | 		<xsd:attribute name="LINGUISTIC_TYPE_REF" type="xsd:string" use="required"/>
456 | 		<xsd:attribute name="DEFAULT_LOCALE" type="xsd:IDREF" use="optional"/>
457 | 		<xsd:attribute name="PARENT_REF" type="xsd:string" use="optional"/>
458 | 		<!-- since 2.8, to overrule an EXT_REF on the type level -->
459 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"/>
460 | 		<!-- since 2.8 -->
461 | 		<xsd:attribute name="LANG_REF" type="xsd:IDREF" use="optional"/>
462 | 	</xsd:complexType>
463 | 	
464 | 	<xsd:complexType name="annotationType">
465 | 		<xsd:choice>
466 | 			<xsd:element name="ALIGNABLE_ANNOTATION" type="alignableType"/>
467 | 			<xsd:element name="REF_ANNOTATION" type="refAnnoType"/>
468 | 		</xsd:choice>
469 | 	</xsd:complexType>
470 | 	
471 | 	<xsd:complexType name="alignableType">
472 | 		<xsd:sequence>
473 | 			<xsd:element name="ANNOTATION_VALUE" type="xsd:string"/>
474 | 		</xsd:sequence>
475 | 		<xsd:attributeGroup ref="annotationAttribute"/>
476 | 		<xsd:attribute name="TIME_SLOT_REF1" type="xsd:IDREF" use="required"/>
477 | 		<xsd:attribute name="TIME_SLOT_REF2" type="xsd:IDREF" use="required"/>
478 | 		<xsd:attribute name="SVG_REF" type="xsd:string" use="optional"/>
479 | 	</xsd:complexType>
480 | 	
481 | 	<xsd:complexType name="refAnnoType">
482 | 		<xsd:sequence>
483 | 			<xsd:element name="ANNOTATION_VALUE" type="xsd:string"/>
484 | 		</xsd:sequence>
485 | 		<xsd:attributeGroup ref="annotationAttribute"/>
486 | 		<xsd:attribute name="ANNOTATION_REF" type="xsd:IDREF" use="required">
487 | 			<xsd:annotation>
488 | 				<xsd:documentation>
489 | 					This is in fact a reference to the parent annotation.
490 | 				</xsd:documentation>
491 | 			</xsd:annotation>
492 | 		</xsd:attribute>
493 | 		<xsd:attribute name="PREVIOUS_ANNOTATION" type="xsd:IDREF" use="optional"/>
494 | 	</xsd:complexType>
495 | 	
496 | 	<xsd:complexType name="lingType">
497 | 		<xsd:attribute name="LINGUISTIC_TYPE_ID" type="xsd:string" use="required"/>
498 | 		<xsd:attribute name="TIME_ALIGNABLE" type="xsd:boolean" use="optional"/>
499 | 		<xsd:attribute name="CONSTRAINTS" type="xsd:IDREF" use="optional"/>
500 | 		<xsd:attribute name="GRAPHIC_REFERENCES" type="xsd:boolean" use="optional"/>
501 | 		<xsd:attribute name="CONTROLLED_VOCABULARY_REF" type="xsd:string" use="optional"/>
502 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"/>
503 | 		<xsd:attribute name="LEXICON_REF" type="xsd:IDREF" use="optional"/>
504 | 	</xsd:complexType>
505 | 	
506 | 	<xsd:complexType name="localeType">
507 | 		<xsd:attribute name="LANGUAGE_CODE" type="xsd:ID" use="required"/>
508 | 		<xsd:attribute name="COUNTRY_CODE" type="xsd:string" use="optional"/>
509 | 		<xsd:attribute name="VARIANT" type="xsd:string" use="optional"/>
510 | 	</xsd:complexType>
511 | 	
512 | 	<xsd:complexType name="constraintType">
513 | 		<xsd:attribute name="STEREOTYPE" type="xsd:ID" use="required"/>
514 | 		<xsd:attribute name="DESCRIPTION" type="xsd:string" use="optional"/>
515 | 	</xsd:complexType>
516 | 	
517 | 	<xsd:complexType name="convocType">
518 | 		<!-- change in 2.8, now it contains 
519 | 			a list of multilingual entries plus possible multiple description elements -->
520 | 		<xsd:sequence>
521 | 			<xsd:element name="DESCRIPTION" type="descMultiLangType" minOccurs="0" maxOccurs="unbounded"/>
522 | 			<xsd:element name="CV_ENTRY_ML" type="cventryType" minOccurs="0" maxOccurs="unbounded"/>
523 | 		</xsd:sequence>
524 | 		
525 | 		<xsd:attribute name="CV_ID" type="xsd:string" use="required"/>
526 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional">
527 | 			<xsd:annotation>
528 | 				<xsd:documentation>
529 | 					A reference to an url of an external Controlled Vocabulary.
530 | 					Is intended to be mutually exclusive with a sequence of CV_ENTRY_ML elements. 
531 | 				</xsd:documentation>
532 | 			</xsd:annotation>
533 | 		</xsd:attribute>
534 | 	</xsd:complexType>
535 | 	
536 | 	<!-- introduced in 2.8, modification that breaks compatibility with previous version -->
537 | 	<xsd:complexType name="cventryType">
538 | 		<xsd:annotation>
539 | 			<xsd:documentation>
540 | 				An entry in a multilingual controlled vocabulary, containing the values and the descriptions 
541 | 				in multiple languages.
542 | 			</xsd:documentation>
543 | 		</xsd:annotation>
544 | 		<xsd:sequence>
545 | 			<xsd:element name="CVE_VALUE" type="cveValueType" maxOccurs="unbounded"/>
546 | 		</xsd:sequence>
547 | 		<xsd:attribute name="CVE_ID" type="xsd:string" use="required"/><!-- in 2.8 moved from ecventry to cv entry -->
548 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREF" use="optional"/>		
549 | 	</xsd:complexType>
550 | 	
551 | 	<!-- introduced in 2.8 -->
552 | 	<xsd:complexType name="cveValueType">
553 | 		<xsd:annotation>
554 | 			<xsd:documentation>
555 | 				A controlled vocabulary entry value with a language attribute. 
556 | 				This allows multilingual controlled vocabularies. It adds a language reference attribute
557 | 				compared to the mono-lingual cv entry element.
558 | 			</xsd:documentation>
559 | 		</xsd:annotation>
560 | 		<xsd:simpleContent>
561 | 			<xsd:extension base="xsd:string">
562 | 				<xsd:attribute name="LANG_REF" type="xsd:IDREF" use="required"/>
563 | 				<xsd:attribute name="DESCRIPTION" type="xsd:string" use="optional"/>
564 | 			</xsd:extension>
565 | 		</xsd:simpleContent>
566 | 	</xsd:complexType>
567 | 	
568 | 	<!-- introduced in 2.8 -->
569 | 	<xsd:complexType name="descMultiLangType">
570 | 		<xsd:annotation>
571 | 			<xsd:documentation>
572 | 				A description element with a language reference attribute.
573 | 			</xsd:documentation>
574 | 		</xsd:annotation>
575 | 		<xsd:simpleContent>
576 | 			<xsd:extension base="xsd:string">
577 | 				<xsd:attribute name="LANG_REF" type="xsd:IDREF" use="required"/>
578 | 			</xsd:extension>
579 | 		</xsd:simpleContent>
580 | 	</xsd:complexType>
581 | 	
582 | 	<xsd:complexType name="propType">
583 | 		<xsd:simpleContent>
584 | 			<xsd:extension base="xsd:string">
585 | 				<xsd:attribute name="NAME" type="xsd:string" use="optional"/>
586 | 			</xsd:extension>
587 | 		</xsd:simpleContent>
588 | 	</xsd:complexType>
589 | 	
590 | 	<xsd:complexType name="extRefType">
591 | 		<xsd:attribute name="EXT_REF_ID" type="xsd:ID" use="required"/>
592 | 		<xsd:attribute name="TYPE" use="required">
593 | 			<xsd:simpleType>
594 | 				<xsd:restriction base="xsd:string">
595 | 					<xsd:enumeration value="iso12620">
596 | 						<xsd:annotation>
597 | 							<xsd:documentation>
598 | 								A reference to the id of an ISO Data Category (url including id).
599 | 							</xsd:documentation>
600 | 						</xsd:annotation>
601 | 					</xsd:enumeration>
602 | 					<xsd:enumeration value="ecv">
603 | 						<xsd:annotation>
604 | 							<xsd:documentation>
605 | 								A reference to an external (closed) Controlled Vocabulary (url).
606 | 							</xsd:documentation>
607 | 						</xsd:annotation>
608 | 					</xsd:enumeration>
609 | 					<xsd:enumeration value="cve_id">
610 | 						<xsd:annotation>
611 | 							<xsd:documentation>
612 | 								A reference to the id of an Entry in an external Controlled Vocabulary (id).
613 | 							</xsd:documentation>
614 | 						</xsd:annotation>
615 | 					</xsd:enumeration>
616 | 					<xsd:enumeration value="lexen_id">
617 | 						<xsd:annotation>
618 | 							<xsd:documentation>
619 | 								A reference to the id of an entry in a lexicon (url, url+id or id)
620 | 							</xsd:documentation>
621 | 						</xsd:annotation>
622 | 					</xsd:enumeration>
623 | 					<xsd:enumeration value="resource_url">
624 | 						<xsd:annotation>
625 | 							<xsd:documentation>
626 | 								A reference or hyperlink to any type document (url)
627 | 							</xsd:documentation>
628 | 						</xsd:annotation>
629 | 					</xsd:enumeration>
630 | 					<!-- other external reference types can be added later -->
631 | 				</xsd:restriction>
632 | 			</xsd:simpleType>
633 | 		</xsd:attribute>
634 | 		<xsd:attribute name="VALUE" type="xsd:string" use="required"/>
635 | 	</xsd:complexType>
636 | 	
637 | 	<xsd:complexType name="lexRefType">
638 | 		<xsd:attribute name="LEX_REF_ID" type="xsd:ID" use="required"/>
639 | 		<xsd:attribute name="NAME" type="xsd:string" use="required"/>
640 | 		<xsd:attribute name="TYPE" type="xsd:string" use="required"/>
641 | 		<xsd:attribute name="URL" type="xsd:string" use="required"/>
642 | 		<xsd:attribute name="LEXICON_ID" type="xsd:string" use="required"/>
643 | 		<xsd:attribute name="LEXICON_NAME" type="xsd:string" use="required"/>
644 | 		<xsd:attribute name="DATCAT_ID" type="xsd:string" use="optional"/>
645 | 		<xsd:attribute name="DATCAT_NAME" type="xsd:string" use="optional"/>
646 | 	</xsd:complexType>
647 | 	
648 | 	<xsd:complexType name="langType">
649 | 		<xsd:annotation>
650 | 			<xsd:documentation xml:lang="en">
651 | 				The Language element containing a reference to a language name or (if possible persistent) definition. 
652 | 			</xsd:documentation>
653 | 		</xsd:annotation>
654 | 		<xsd:attribute name="LANG_ID" type="xsd:ID" use="required"/>
655 | 		<!-- definition is optional so that user defined languages are easy to add -->
656 | 		<xsd:attribute name="LANG_DEF" type="xsd:string" use="optional">
657 | 			<xsd:annotation><xsd:documentation>
658 | 				ISO-639-3 still seems to be the best choice for language codes and closest to persistent language ID's 
659 | 				seem to be the http://cdb.iso.org/lg/... identifiers also used by the iso-language-639-3 component in
660 | 				the CLARIN ComponentRegistry?
661 | 			</xsd:documentation></xsd:annotation>
662 | 		</xsd:attribute>
663 | 		<xsd:attribute name="LANG_LABEL" type="xsd:string" use="optional"/>
664 | 	</xsd:complexType>
665 | 	<!-- since 2.8 -->
666 | 	<xsd:complexType name="licenseType">
667 | 		<xsd:annotation>
668 | 			<xsd:documentation xml:lang="en">
669 | 				The license element can be used to include license information in the eaf file itself.  
670 | 			</xsd:documentation>			
671 | 		</xsd:annotation>
672 | 		<xsd:simpleContent>
673 | 			<xsd:extension base="xsd:string">
674 | 				<xsd:attribute name="LICENSE_URL" type="xsd:anyURI" use="optional"/>
675 | 			</xsd:extension>
676 | 		</xsd:simpleContent>
677 | 	</xsd:complexType>
678 | 	
679 | 	<!-- introduced in 3.0 -->
680 | 	<xsd:complexType name="refLinksType">
681 | 		<xsd:annotation>
682 | 			<xsd:documentation>
683 | 				A set containing referential links. 
684 | 				A set can contain both cross-references and grouping referential links. 
685 | 				Apart from an ID the set can have a meaningful, "friendly" name.
686 | 				A set can have an external reference, a language and a CV reference.
687 | 			</xsd:documentation>
688 | 		</xsd:annotation>
689 | 		<xsd:sequence>
690 | 			<xsd:choice minOccurs="0" maxOccurs="unbounded">
691 | 				<xsd:element name="CROSS_REF_LINK" type="crossLinkType"/>
692 | 				<xsd:element name="GROUP_REF_LINK" type="groupLinkType"/>
693 | 			</xsd:choice>
694 | 		</xsd:sequence>
695 | 		<xsd:attribute name="LINK_SET_ID" type="xsd:ID" use="required"/>
696 | 		<xsd:attribute name="LINK_SET_NAME" type="xsd:string" use="optional"/>
697 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREFS" use="optional"/>
698 | 		<xsd:attribute name="LANG_REF" type="xsd:IDREF" use="optional"/>
699 | 		<xsd:attribute name="CV_REF" type="xsd:string" use="optional"/>
700 | 	</xsd:complexType>
701 | 	<!-- introduced in 3.0 -->
702 | 	<!-- a cross reference element -->
703 | 	<xsd:complexType name="crossLinkType">
704 | 		<xsd:annotation>
705 | 			<xsd:documentation>
706 | 				A cross reference is a referential link between two existing elements (REF1 and REF2).
707 | 				Each of these elements can be either an annotation or a referential link. 
708 | 				Optionally the direction of the link can be specified.
709 | 			</xsd:documentation>
710 | 		</xsd:annotation>
711 | 		<xsd:simpleContent>
712 | 			<xsd:extension base="xsd:string">
713 | 				<!-- refers to the ID of an annotation or a reference link -->
714 | 				<xsd:attribute name="REF1" type="xsd:IDREF" use="required"/>
715 | 				<xsd:attribute name="REF2" type="xsd:IDREF" use="required"/>
716 | 				<xsd:attribute name="DIRECTIONALITY" use="optional">
717 | 					<xsd:simpleType>
718 | 						<xsd:restriction base="xsd:string">
719 | 							<xsd:enumeration value="undirected"/>
720 | 							<xsd:enumeration value="unidirectional"/>
721 | 							<xsd:enumeration value="bidirectional"/>
722 | 						</xsd:restriction>
723 | 					</xsd:simpleType>
724 | 				</xsd:attribute>
725 | 				
726 | 				<xsd:attributeGroup ref="refLinkAttribute"/>
727 | 			</xsd:extension>
728 | 		</xsd:simpleContent>
729 | 	</xsd:complexType>
730 | 	<!-- a grouping reference element -->
731 | 	<xsd:complexType name="groupLinkType">
732 | 		<xsd:annotation>
733 | 			<xsd:documentation>
734 | 				A referential element for grouping any number of existing elements (the REFS). 
735 | 				Each element can be an annotation or a referential link.
736 | 			</xsd:documentation>
737 | 		</xsd:annotation>
738 | 		<xsd:simpleContent>
739 | 			<xsd:extension base="xsd:string">
740 | 				<xsd:attribute name="REFS" type="xsd:IDREFS" use="required"/>
741 | 				<xsd:attributeGroup ref="refLinkAttribute"/>
742 | 			</xsd:extension>
743 | 		</xsd:simpleContent>
744 | 	</xsd:complexType>
745 | 	<!-- attributes shared by reference link elements -->
746 | 	<xsd:attributeGroup name="refLinkAttribute">
747 | 		<xsd:annotation>
748 | 			<xsd:documentation>
749 | 				Attributes common for both cross- and group references. 
750 | 				Apart from an ID it is possible to associate a meaningful, "friendly"
751 | 				name to the link. Furthermore a link can have an external reference, a language and a 
752 | 				CV entry reference and a type attribute.
753 | 			</xsd:documentation>
754 | 		</xsd:annotation>
755 | 		<xsd:attribute name="REF_LINK_ID" type="xsd:ID" use="required"/>
756 | 		<xsd:attribute name="REF_LINK_NAME" type="xsd:string" use="optional"/>
757 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREFS" use="optional"/>
758 | 		<xsd:attribute name="LANG_REF" type="xsd:IDREF" use="optional"/>
759 | 		<xsd:attribute name="CVE_REF" type="xsd:string" use="optional"/>
760 | 		<xsd:attribute name="REF_TYPE" type="xsd:string" use="optional">
761 | 			<xsd:annotation>
762 | 				<xsd:documentation>
763 | 					An attribute that allows to specify the type of the cross- or group reference/link.
764 | 				</xsd:documentation>
765 | 			</xsd:annotation>
766 | 		</xsd:attribute>
767 | 	</xsd:attributeGroup>
768 | 	<!-- end of new in 3.0 -->
769 | 
770 | 	<xsd:attributeGroup name="annotationAttribute">
771 | 		<xsd:attribute name="ANNOTATION_ID" type="xsd:ID" use="required"/>
772 | 		<xsd:attribute name="EXT_REF" type="xsd:IDREFS" use="optional"/>
773 | 		<xsd:attribute name="LANG_REF" type="xsd:IDREF" use="optional"/><!-- since 2.8 -->
774 | 		<xsd:attribute name="CVE_REF" type="xsd:string" use="optional"/><!-- since 2.8 -->
775 | 	</xsd:attributeGroup>
776 | 	
777 | 	
778 | 	<!-- Start of CV_RESOURCE part, an alternative root element -->
779 | 	<xsd:element name="CV_RESOURCE">
780 | 		<xsd:complexType>
781 | 			<xsd:sequence>
782 | 				<xsd:element name="LANGUAGE" type="langType" minOccurs="0" maxOccurs="unbounded"/>
783 | 				<xsd:element name="CONTROLLED_VOCABULARY" type="convocType" minOccurs="1" maxOccurs="unbounded">
784 | 					<xsd:key name="cvEntryKey2">
785 | 						<xsd:annotation>
786 | 							<xsd:documentation xml:lang="en">
787 | 								The entry id should be unique within the  
788 | 								collection of entry elements
789 | 							</xsd:documentation>
790 | 						</xsd:annotation>
791 | 						<xsd:selector xpath="CV_ENTRY_ML"/>
792 | 						<xsd:field xpath="@CVE_ID"/>
793 | 					</xsd:key>
794 | 					
795 | 				</xsd:element>
796 | 				<xsd:element name="EXTERNAL_REF" type="extRefType" minOccurs="0" maxOccurs="unbounded"/>
797 | 			</xsd:sequence>
798 | 			<xsd:attribute name="DATE" type="xsd:dateTime" use="optional"/>
799 | 			<xsd:attribute name="AUTHOR" type="xsd:string" use="optional"/>
800 | 			<xsd:attribute name="VERSION" type="xsd:string" use="optional"/>
801 | 		</xsd:complexType>
802 | 		<!-- define key - keyref pairs -->
803 | 		<!-- If not commented this is considered a double global definition of cvNameKey -->
804 | 		<!-- <xsd:key name="cvNameKey">
805 | 			<xsd:annotation>
806 | 				<xsd:documentation xml:lang="en">
807 | 					The Controlled Vocabulary name/id should be unique within the  
808 | 					collection of Controlled Vocabulary elements
809 | 				</xsd:documentation>
810 | 			</xsd:annotation>
811 | 			<xsd:selector xpath="CONTROLLED_VOCABULARY"/>
812 | 			<xsd:field xpath="@CV_ID"/>
813 | 		</xsd:key>-->
814 | 	</xsd:element>
815 | 	
816 | </xsd:schema>


--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | 
3 | import pytest
4 | 
5 | 
6 | @pytest.fixture
7 | def test_dir():
8 |     return pathlib.Path(__file__).parent
9 | 


--------------------------------------------------------------------------------
/test/test_elan.py:
--------------------------------------------------------------------------------
   1 | from lxml import etree
   2 | import pytest
   3 | 
   4 | from pympi import Eaf
   5 | import unittest
   6 | 
   7 | 
   8 | class Elan(unittest.TestCase):
   9 |     def setUp(self):
  10 |         self.eaf = Eaf()
  11 | 
  12 |     def test_add_annotation(self):
  13 |         self.eaf.add_tier('tier1')
  14 |         self.eaf.add_annotation('tier1', 0, 1)
  15 |         self.assertEqual(
  16 |             sorted(self.eaf.get_annotation_data_for_tier('tier1')),
  17 |             [(0, 1, '')])
  18 |         self.eaf.add_annotation('tier1', 1, 2, 'abc')
  19 |         self.assertEqual(
  20 |             sorted(self.eaf.get_annotation_data_for_tier('tier1')),
  21 |             sorted([(0, 1, ''), (1, 2, 'abc')]))
  22 |         self.assertRaises(KeyError, self.eaf.add_annotation, 't1', 0, 0)
  23 |         self.assertRaises(ValueError,
  24 |                           self.eaf.add_annotation, 'tier1', 1, 1)
  25 |         self.assertRaises(ValueError,
  26 |                           self.eaf.add_annotation, 'tier1', 2, 1)
  27 |         self.assertRaises(ValueError,
  28 |                           self.eaf.add_annotation, 'tier1', -1, 1)
  29 |         self.eaf.add_tier('tier2')
  30 |         self.eaf.add_ref_annotation('tier2', 'tier1', 0, 'r1')
  31 |         self.assertRaises(ValueError,
  32 |                           self.eaf.add_annotation, 'tier2', 0, 1)
  33 | 
  34 |     def test_add_controlled_vocabulary(self):
  35 |         self.eaf.add_controlled_vocabulary('cv1')
  36 |         self.eaf.add_controlled_vocabulary('cv2')
  37 |         self.eaf.add_controlled_vocabulary('cv3', 'er1')
  38 |         self.assertEqual(sorted(self.eaf.get_controlled_vocabulary_names()),
  39 |                          ['cv1', 'cv2', 'cv3'])
  40 | 
  41 |     def test_add_cv_entry(self):
  42 |         self.eaf.add_controlled_vocabulary('cv1')
  43 |         self.eaf.add_language('eng')
  44 |         self.eaf.add_language('nld')
  45 |         self.eaf.add_cv_entry(
  46 |             'cv1', 'cve1', [('H', 'eng', 'hold'), ('H', 'nld', None)])
  47 |         self.assertEqual(self.eaf.get_cv_entries('cv1'), {
  48 |             'cve1': ([('H', 'eng', 'hold'), ('H', 'nld', None)], None)})
  49 |         self.eaf.add_cv_entry(
  50 |             'cv1', 'cve2', [('S', 'eng', 'stroke'), ('S', 'nld', None)])
  51 |         self.assertEqual(self.eaf.get_cv_entries('cv1'), {
  52 |             'cve1': ([('H', 'eng', 'hold'), ('H', 'nld', None)], None),
  53 |             'cve2': ([('S', 'eng', 'stroke'), ('S', 'nld', None)], None)})
  54 |         self.assertRaises(KeyError, self.eaf.add_cv_entry, 'cv2', 'cve1', [])
  55 |         self.assertRaises(ValueError, self.eaf.add_cv_entry, 'cv1', 'cve1',
  56 |                           [('H', 'spa', None)])
  57 | 
  58 |     def test_add_cv_description(self):
  59 |         self.eaf.add_controlled_vocabulary('cv1')
  60 |         self.eaf.add_language('eng')
  61 |         self.eaf.add_language('nld')
  62 |         self.eaf.add_cv_description('cv1', 'eng', 'Gesture Phases')
  63 |         self.eaf.add_cv_description('cv1', 'nld', None)
  64 |         self.assertEqual(self.eaf.get_cv_descriptions('cv1'), [
  65 |             ('eng', 'Gesture Phases'), ('nld', None)])
  66 |         self.assertRaises(KeyError, self.eaf.add_cv_description, 'cv2', 'eng')
  67 |         self.assertRaises(ValueError,
  68 |                           self.eaf.add_cv_description, 'cv1', 'spa', None)
  69 | 
  70 |     def test_add_external_ref(self):
  71 |         self.eaf.add_external_ref('er1', 'ecv', 'location')
  72 |         self.eaf.add_external_ref('er2', 'lexen_id', 'location2')
  73 |         self.assertEqual(sorted(self.eaf.get_external_ref_names()),
  74 |                          ['er1', 'er2'])
  75 |         self.assertRaises(KeyError, self.eaf.add_external_ref, 'er1', 'a', '')
  76 | 
  77 |     def test_add_language(self):
  78 |         self.eaf.add_language('ru', 'RUS', 'YAWERTY (Phonetic)')
  79 |         self.eaf.add_language('en')
  80 |         self.assertEqual(
  81 |             self.eaf.get_languages(),
  82 |             {'ru': ('RUS', 'YAWERTY (Phonetic)'), 'en': (None, None)})
  83 | 
  84 |     def test_add_lexicon_ref(self):
  85 |         self.eaf.add_lexicon_ref('id1', 'long name', 't1', 'url1', 'lid1',
  86 |                                  'lname1')
  87 |         self.eaf.add_lexicon_ref('id2', 'long name', 't2', 'url1', 'lid1',
  88 |                                  'lname1', 'dc1', 'dc1')
  89 |         self.assertEqual(sorted(self.eaf.get_lexicon_ref_names()),
  90 |                          ['id1', 'id2'])
  91 |         self.assertEqual(self.eaf.get_lexicon_ref('id1'), {
  92 |             'DATCAT_ID': None, 'NAME': 'long name', 'DATCAT_NAME': None, 'URL':
  93 |             'url1', 'LEX_REF_ID': 'id1', 'LEXICON_NAME': 'lname1', 'TYPE':
  94 |             't1', 'LEXICON_ID': 'lid1'})
  95 |         self.assertEqual(self.eaf.get_lexicon_ref('id2'), {
  96 |             'DATCAT_ID': 'dc1', 'NAME': 'long name', 'DATCAT_NAME': 'dc1',
  97 |             'URL': 'url1', 'LEX_REF_ID': 'id2', 'LEXICON_NAME': 'lname1',
  98 |             'TYPE': 't2', 'LEXICON_ID': 'lid1'})
  99 | 
 100 |     def test_add_license(self):
 101 |         self.eaf.add_license('k1', 'v1')
 102 |         self.eaf.add_license('k2', 'v2')
 103 |         self.assertEqual(self.eaf.get_licenses(), [
 104 |             ('k1', 'v1'), ('k2', 'v2')])
 105 | 
 106 |     def test_add_linguistic_type(self):
 107 |         self.eaf.add_linguistic_type('l1')
 108 |         self.eaf.add_linguistic_type('l2', 'Time_Subdivision', False, True)
 109 |         self.assertEqual(
 110 |             self.eaf.linguistic_types['l1'], {
 111 |                 'CONSTRAINTS': None, 'TIME_ALIGNABLE': 'true',
 112 |                 'LINGUISTIC_TYPE_ID': 'l1', 'GRAPHIC_REFERENCES': 'false'})
 113 |         self.assertEqual(
 114 |             self.eaf.linguistic_types['l2'], {
 115 |                 'CONSTRAINTS': 'Time_Subdivision', 'TIME_ALIGNABLE': 'false',
 116 |                 'LINGUISTIC_TYPE_ID': 'l2', 'GRAPHIC_REFERENCES': 'true'})
 117 |         self.eaf.add_linguistic_type('l3', param_dict={
 118 |             'CONSTRAINTS': 'Time_Subdivision', 'TIME_ALIGNABLE': 'false',
 119 |             'LINGUISTIC_TYPE_ID': 'l2', 'GRAPHIC_REFERENCES': 'true'})
 120 |         self.assertEqual(self.eaf.get_parameters_for_linguistic_type('l3'), {
 121 |             'CONSTRAINTS': 'Time_Subdivision', 'TIME_ALIGNABLE': 'false',
 122 |             'LINGUISTIC_TYPE_ID': 'l2', 'GRAPHIC_REFERENCES': 'true'})
 123 | 
 124 |         self.assertRaises(KeyError, self.eaf.add_linguistic_type, 'l2', 'a')
 125 | 
 126 |     def test_add_linked_file(self):
 127 |         self.eaf.add_linked_file('/some/file/path/test.wav')
 128 |         self.assertEqual(self.eaf.media_descriptors[0]['MIME_TYPE'],
 129 |                          'audio/x-wav')
 130 |         self.eaf.add_linked_file('/some/file/path/test.mpg',
 131 |                                  './test.mpg', time_origin=5, ex_from='ef')
 132 |         self.assertEqual(self.eaf.media_descriptors[1]['MIME_TYPE'],
 133 |                          'video/mpeg')
 134 |         self.assertEqual(self.eaf.media_descriptors[1]['RELATIVE_MEDIA_URL'],
 135 |                          './test.mpg')
 136 |         self.assertEqual(self.eaf.media_descriptors[1]['TIME_ORIGIN'], 5)
 137 |         self.assertEqual(self.eaf.media_descriptors[1]['EXTRACTED_FROM'], 'ef')
 138 | 
 139 |         self.eaf.add_linked_file('/some/file/path/test.wierd',
 140 |                                  mimetype='none/wierd')
 141 |         self.assertEqual(self.eaf.media_descriptors[2]['MIME_TYPE'],
 142 |                          'none/wierd')
 143 | 
 144 |         self.assertRaises(KeyError, self.eaf.add_linked_file, '/test.wierd')
 145 | 
 146 |     def test_add_locale(self):
 147 |         self.eaf.add_locale('ru', 'RUS', 'YAWERTY (Phonetic)')
 148 |         self.eaf.add_locale('en')
 149 |         self.assertEqual(
 150 |             self.eaf.get_locales(),
 151 |             {'ru': ('RUS', 'YAWERTY (Phonetic)'), 'en': (None, None)})
 152 | 
 153 |     def test_add_property(self):
 154 |         self.eaf.add_property('k1', 'v1')
 155 |         self.eaf.add_property('k2', 'v2')
 156 |         self.assertEqual(self.eaf.get_properties(), [
 157 |             ('lastUsedAnnotation', 0), ('k1', 'v1'), ('k2', 'v2')])
 158 | 
 159 |     def test_add_ref_annotation(self):
 160 |         self.eaf.add_tier('p1')
 161 |         self.eaf.add_linguistic_type('c', 'Symbolic_Association')
 162 |         self.eaf.add_tier('a1', 'c', 'p1')
 163 |         self.eaf.add_annotation('p1', 0, 1000, 'a1')
 164 |         self.eaf.add_annotation('p1', 1000, 2000, 'a2')
 165 |         self.eaf.add_annotation('p1', 3000, 4000, 'a3')
 166 |         self.eaf.add_ref_annotation('a1', 'p1', 500, 'ref1')
 167 |         self.eaf.add_ref_annotation('a1', 'p1', 3000)
 168 |         self.assertEqual(
 169 |             sorted([(3000, 4000, '', 'a3'), (0, 1000, 'ref1', 'a1')]),
 170 |             sorted(self.eaf.get_ref_annotation_data_for_tier('a1')))
 171 | 
 172 |         self.assertRaises(ValueError,
 173 |                           self.eaf.add_ref_annotation, 'p1', 'a1', 0, 'r1')
 174 |         self.assertRaises(ValueError, self.eaf.add_ref_annotation, 'a1',
 175 |                           'p1', 2500, 'r')
 176 |         self.assertRaises(KeyError,
 177 |                           self.eaf.add_ref_annotation, 'aa', 'bb', 0, 'r1')
 178 | 
 179 |     def test_add_secondary_linked_file(self):
 180 |         self.eaf.add_secondary_linked_file('/some/file/path/test.wav')
 181 |         self.assertEqual(self.eaf.linked_file_descriptors[0]['MIME_TYPE'],
 182 |                          'audio/x-wav')
 183 |         self.eaf.add_secondary_linked_file(
 184 |             '/some/file/path/test.mpg', './test.mpg',
 185 |             time_origin=5, assoc_with='ef')
 186 |         self.assertEqual(self.eaf.linked_file_descriptors[1]['MIME_TYPE'],
 187 |                          'video/mpeg')
 188 |         self.assertEqual(
 189 |             self.eaf.linked_file_descriptors[1]['RELATIVE_LINK_URL'],
 190 |             './test.mpg')
 191 |         self.assertEqual(self.eaf.linked_file_descriptors[1]['TIME_ORIGIN'], 5)
 192 |         self.assertEqual(
 193 |             self.eaf.linked_file_descriptors[1]['ASSOCIATED_WITH'], 'ef')
 194 | 
 195 |         self.eaf.add_secondary_linked_file('/some/file/path/test.wierd',
 196 |                                            mimetype='none/wierd')
 197 |         self.assertEqual(self.eaf.linked_file_descriptors[2]['MIME_TYPE'],
 198 |                          'none/wierd')
 199 | 
 200 |         self.assertRaises(KeyError,
 201 |                           self.eaf.add_secondary_linked_file, '/test.wierd')
 202 | 
 203 |     def test_add_tier(self):
 204 |         self.eaf.add_locale('ru')
 205 |         self.eaf.add_language('RUS')
 206 |         self.assertEqual(len(self.eaf.get_tier_names()), 1)
 207 |         self.eaf.add_tier('tier1', 'default-lt', locale='ru', language='RUS')
 208 |         self.assertEqual(len(self.eaf.get_tier_names()), 2)
 209 |         self.assertEqual(
 210 |             self.eaf.get_parameters_for_tier('tier1')['LINGUISTIC_TYPE_REF'],
 211 |             'default-lt')
 212 |         self.assertEqual(
 213 |             self.eaf.get_parameters_for_tier('tier1')['DEFAULT_LOCALE'],
 214 |             'ru')
 215 |         self.assertEqual(
 216 |             self.eaf.get_parameters_for_tier('tier1')['LANG_REF'], 'RUS')
 217 | 
 218 |         self.eaf.add_tier('tier2', 'non-existing-linguistic-type')
 219 |         self.assertEqual(len(self.eaf.get_tier_names()), 3)
 220 |         self.assertEqual(
 221 |             self.eaf.get_parameters_for_tier('tier2')['LINGUISTIC_TYPE_REF'],
 222 |             'default-lt')
 223 |         self.assertEqual(['default', 'tier1', 'tier2'],
 224 |                          sorted(self.eaf.get_tier_names()))
 225 | 
 226 |         self.eaf.add_tier('tier3', None, 'tier1', 'en', 'person', 'person2')
 227 |         self.assertEqual(self.eaf.get_parameters_for_tier('tier3'), {
 228 |             'ANNOTATOR': 'person2', 'DEFAULT_LOCALE': None,
 229 |             'LINGUISTIC_TYPE_REF': 'default-lt', 'PARENT_REF': 'tier1',
 230 |             'LANG_REF': None, 'PARTICIPANT': 'person', 'TIER_ID': 'tier3'})
 231 | 
 232 |         self.eaf.add_tier('tier4', tier_dict={
 233 |             'ANNOTATOR': 'person2', 'DEFAULT_LOCALE': 'en',
 234 |             'LINGUISTIC_TYPE_REF': 'default-lt', 'PARENT_REF': 'tier1',
 235 |             'PARTICIPANT': 'person', 'TIER_ID': 'tier4', 'LANG_ID': 'RUS'})
 236 |         self.assertEqual(self.eaf.get_parameters_for_tier('tier4'), {
 237 |             'ANNOTATOR': 'person2', 'DEFAULT_LOCALE': 'en',
 238 |             'LINGUISTIC_TYPE_REF': 'default-lt', 'PARENT_REF': 'tier1',
 239 |             'PARTICIPANT': 'person', 'TIER_ID': 'tier4', 'LANG_ID': 'RUS'})
 240 | 
 241 |         for tier in ['tier1', 'tier2', 'tier3']:
 242 |             self.assertEqual(self.eaf.tiers[tier][0], {})
 243 |             self.assertEqual(self.eaf.tiers[tier][1], {})
 244 | 
 245 |         self.assertRaises(ValueError, self.eaf.add_tier, '')
 246 | 
 247 |     def test_clean_time_slots(self):
 248 |         self.eaf.add_tier('tier1')
 249 |         self.eaf.add_tier('tier2')
 250 |         self.eaf.add_annotation('tier1', 0, 1, 'a1')
 251 |         self.eaf.add_annotation('tier1', 1000, 2000, 'a2')
 252 |         self.eaf.add_annotation('tier1', 2000, 3000, 'a3')
 253 |         self.eaf.add_annotation('tier1', 3000, 4000, 'a4')
 254 |         ts = [x for x in self.eaf.timeslots]
 255 |         self.eaf.remove_annotation('tier1', 1500, False)
 256 |         self.assertEqual(len(ts), len(self.eaf.timeslots))
 257 |         self.eaf.clean_time_slots()
 258 |         self.assertEqual(len(ts)-2, len(self.eaf.timeslots))
 259 | 
 260 |     def test_copy_tier(self):
 261 |         self.eaf.add_tier('test1')
 262 |         self.eaf.add_annotation('test1', 0, 100, 'a')
 263 |         self.eaf.add_annotation('test1', 100, 200, 'a')
 264 |         self.eaf.add_tier('test2')
 265 |         self.eaf.add_annotation('test2', 0, 100, 'a')
 266 |         self.eaf.add_annotation('test2', 100, 200, 'a')
 267 |         target = Eaf()
 268 |         self.eaf.copy_tier(target, 'test2')
 269 |         self.assertEqual(sorted(target.get_parameters_for_tier('test2')),
 270 |                          sorted(self.eaf.get_parameters_for_tier('test2')))
 271 |         self.assertEqual(
 272 |             sorted(target.get_annotation_data_for_tier('test2')),
 273 |             sorted(self.eaf.get_annotation_data_for_tier('test2')))
 274 | 
 275 |     def test_create_gaps_and_overlaps_tier(self):
 276 |         self.eaf.add_tier('t1')
 277 |         self.eaf.add_tier('t2')
 278 |         # Pause
 279 |         self.eaf.add_annotation('t1', 0, 1000)
 280 |         self.eaf.add_annotation('t1', 1200, 2000)
 281 |         # Gap
 282 |         self.eaf.add_annotation('t2', 2200, 3000)
 283 |         # Overlap
 284 |         self.eaf.add_annotation('t1', 2800, 4000)
 285 |         # Exact fto
 286 |         self.eaf.add_annotation('t2', 4000, 5000)
 287 |         # Within overlap
 288 |         self.eaf.add_annotation('t1', 4200, 4800)
 289 |         # Long pause
 290 |         self.eaf.add_annotation('t2', 14800, 15000)
 291 |         # Long gap
 292 |         self.eaf.add_annotation('t1', 20000, 20500)
 293 |         self.eaf.create_gaps_and_overlaps_tier('t1', 't2')
 294 |         self.eaf.create_gaps_and_overlaps_tier('t1', 't2', 'tt', 3000)
 295 |         self.assertEqual(
 296 |             sorted(self.eaf.get_annotation_data_for_tier('t1_t2_ftos')),
 297 |             [(1001, 1199, 'P1_t1'), (2001, 2199, 'G12_t1_t2'),
 298 |              (2800, 3000, 'O21_t2_t1'), (4200, 4800, 'W21_t2_t1'),
 299 |              (5001, 14799, 'P2_t2'), (15001, 19999, 'G21_t2_t1')])
 300 |         self.assertEqual(
 301 |             sorted(self.eaf.get_annotation_data_for_tier('tt')),
 302 |             [(1001, 1199, 'P1_t1'), (2001, 2199, 'G12_t1_t2'),
 303 |              (2800, 3000, 'O21_t2_t1'), (4200, 4800, 'W21_t2_t1')])
 304 |         self.assertEqual(
 305 |             sorted(self.eaf.get_annotation_data_for_tier('t1_t2_ftos') +
 306 |                    [(4000, 4000, 'O12_t1_t2')]),
 307 |             list(self.eaf.get_gaps_and_overlaps('t1', 't2')))
 308 |         self.assertEqual(
 309 |             sorted(self.eaf.get_annotation_data_for_tier('tt') +
 310 |                    [(4000, 4000, 'O12_t1_t2')]),
 311 |             list(self.eaf.get_gaps_and_overlaps('t1', 't2', 3000)))
 312 | 
 313 |     def test_extract(self):
 314 |         self.eaf.add_tier('tier1')
 315 |         self.eaf.add_annotation('tier1', 0, 1000, 'a1')
 316 |         self.eaf.add_annotation('tier1', 1000, 2000, 'a2')
 317 |         self.eaf.add_annotation('tier1', 2000, 3000, 'a3')
 318 |         self.eaf.add_annotation('tier1', 3000, 4000, 'a4')
 319 |         self.eaf.add_tier('tier2')
 320 |         e1 = self.eaf.extract(1500, 2500)
 321 |         self.assertEqual(e1.adocument, self.eaf.adocument)
 322 |         self.assertEqual(e1.licenses, self.eaf.licenses)
 323 |         self.assertEqual(e1.header, self.eaf.header)
 324 |         self.assertEqual(e1.media_descriptors, self.eaf.media_descriptors)
 325 |         self.assertEqual(e1.linked_file_descriptors,
 326 |                          self.eaf.linked_file_descriptors)
 327 |         self.assertEqual(e1.linguistic_types, self.eaf.linguistic_types)
 328 |         self.assertEqual(e1.locales, self.eaf.locales)
 329 |         self.assertEqual(e1.constraints, self.eaf.constraints)
 330 |         self.assertEqual(e1.controlled_vocabularies,
 331 |                          self.eaf.controlled_vocabularies)
 332 |         self.assertEqual(e1.external_refs, self.eaf.external_refs)
 333 |         self.assertEqual(e1.lexicon_refs, self.eaf.lexicon_refs)
 334 |         self.assertEqual(e1.get_tier_names(), self.eaf.get_tier_names())
 335 |         self.assertEqual(sorted(e1.get_annotation_data_for_tier('tier1')),
 336 |             [(1000, 2000, 'a2'), (2000, 3000, 'a3')])
 337 |         e1 = self.eaf.extract(1000, 2000)
 338 |         self.assertEqual(sorted(e1.get_annotation_data_for_tier('tier1')),
 339 |             [(0, 1000, 'a1'), (1000, 2000, 'a2'), (2000, 3000, 'a3')])
 340 |         e1 = self.eaf.extract(4001, 30000)
 341 |         self.assertEqual(sorted(e1.get_annotation_data_for_tier('tier1')), [])
 342 | 
 343 |     def test_filter_annotations(self):
 344 |         self.eaf.add_tier('tier1')
 345 |         self.eaf.add_annotation('tier1', 0, 1, '1')
 346 |         self.eaf.add_annotation('tier1', 1, 2, '2')
 347 |         self.eaf.add_annotation('tier1', 2, 3, '3')
 348 |         self.eaf.add_annotation('tier1', 3, 4, '4')
 349 |         self.eaf.add_annotation('tier1', 4, 5, 'a')
 350 |         self.eaf.add_annotation('tier1', 5, 6, 'b')
 351 |         self.eaf.add_annotation('tier1', 6, 7, 'c')
 352 |         self.eaf.add_annotation('tier1', 7, 8, 'd')
 353 | 
 354 |         # No in or exclude
 355 |         self.eaf.filter_annotations('tier1')
 356 |         self.assertEqual(
 357 |             sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')),
 358 |             sorted(self.eaf.get_annotation_data_for_tier('tier1')))
 359 | 
 360 |         # Inclusion
 361 |         self.eaf.filter_annotations('tier1', filtin=['1', '2', '3'])
 362 |         self.assertEqual(
 363 |             sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')),
 364 |             sorted([(0, 1, '1'), (2, 3, '3'), (1, 2, '2')]))
 365 |         self.eaf.filter_annotations('tier1', filtin=['[123]'], regex=True)
 366 |         self.assertEqual(
 367 |             sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')),
 368 |             sorted([(0, 1, '1'), (2, 3, '3'), (1, 2, '2')]))
 369 | 
 370 |         # Exclusion
 371 |         self.eaf.filter_annotations('tier1', filtex=['1', '2', '3', '4'])
 372 |         self.assertEqual(
 373 |             sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')),
 374 |             sorted([(4, 5, 'a'), (6, 7, 'c'), (5, 6, 'b'), (7, 8, 'd')]))
 375 |         self.eaf.filter_annotations('tier1', filtex=['[1234]'], regex=True)
 376 |         self.assertEqual(
 377 |             sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')),
 378 |             sorted([(4, 5, 'a'), (6, 7, 'c'), (5, 6, 'b'), (7, 8, 'd')]))
 379 | 
 380 |         # Combination
 381 |         self.eaf.filter_annotations('tier1', filtin=['1', '2', '3', '4'],
 382 |                                     filtex=['1', '2'])
 383 |         self.assertEqual(
 384 |             sorted(self.eaf.get_annotation_data_for_tier('tier1_filter')),
 385 |             sorted([(2, 3, '3'), (3, 4, '4')]))
 386 |         self.eaf.filter_annotations('tier1', tier_name='t', filtin=['[1234]'],
 387 |                                     filtex=['[12]'], regex=True)
 388 |         self.assertEqual(
 389 |             sorted(self.eaf.get_annotation_data_for_tier('t')),
 390 |             sorted([(2, 3, '3'), (3, 4, '4')]))
 391 | 
 392 |         self.assertRaises(KeyError, self.eaf.filter_annotations, 'a')
 393 | 
 394 |     def test_get_annotation_data_at_time(self):
 395 |         self.eaf.add_tier('tier1')
 396 |         self.eaf.add_annotation('tier1', 0, 1000, 'a1')
 397 |         self.eaf.add_annotation('tier1', 1000, 2000, 'a2')
 398 |         self.eaf.add_annotation('tier1', 2000, 3000, 'a3')
 399 |         self.assertEqual(
 400 |             sorted(self.eaf.get_annotation_data_at_time('tier1', 500)),
 401 |             [(0, 1000, 'a1')])
 402 |         self.assertEqual(
 403 |             sorted(self.eaf.get_annotation_data_at_time('tier1', 1000)),
 404 |             sorted([(0, 1000, 'a1'), (1000, 2000, 'a2')]))
 405 |         self.assertEqual(
 406 |             sorted(self.eaf.get_annotation_data_at_time('tier1', 3001)), [])
 407 |         self.assertRaises(KeyError,
 408 |                           self.eaf.get_annotation_data_at_time, 'tier2', 0)
 409 | 
 410 |     def test_get_annotation_data_after_time(self):
 411 |         self.eaf.add_tier('tier1')
 412 |         self.eaf.add_annotation('tier1', 500, 1000, 'a1')
 413 |         self.eaf.add_annotation('tier1', 2000, 3000, 'a2')
 414 |         self.eaf.add_annotation('tier1', 4000, 5000, 'a3')
 415 |         self.assertEqual(
 416 |             sorted(self.eaf.get_annotation_data_after_time('tier1', 3001)),
 417 |             [(4000, 5000, 'a3')])
 418 |         self.assertEqual(
 419 |             sorted(self.eaf.get_annotation_data_after_time('tier1', 505)),
 420 |             [(500, 1000, 'a1')])
 421 |         self.assertEqual(
 422 |             sorted(self.eaf.get_annotation_data_after_time('tier1', 5001)),
 423 |             [])
 424 |         self.assertRaises(KeyError,
 425 |                           self.eaf.get_annotation_data_after_time, 'tier2', 0)
 426 | 
 427 |     def test_get_annotation_data_before_time(self):
 428 |         self.eaf.add_tier('tier1')
 429 |         self.eaf.add_annotation('tier1', 500, 1000, 'a1')
 430 |         self.eaf.add_annotation('tier1', 2000, 3000, 'a2')
 431 |         self.eaf.add_annotation('tier1', 4000, 5000, 'a3')
 432 |         self.assertEqual(
 433 |             sorted(self.eaf.get_annotation_data_before_time('tier1', 1001)),
 434 |             [(500, 1000, 'a1')])
 435 |         self.assertEqual(
 436 |             sorted(self.eaf.get_annotation_data_before_time('tier1', 499)),
 437 |             [])
 438 |         self.assertEqual(
 439 |             sorted(self.eaf.get_annotation_data_before_time('tier1', 750)),
 440 |             [(500, 1000, 'a1')])
 441 |         self.assertRaises(KeyError,
 442 |                           self.eaf.get_annotation_data_before_time, 'tier2', 0)
 443 | 
 444 |     def test_get_annotation_data_between_times(self):
 445 |         self.eaf.add_tier('tier1')
 446 |         self.eaf.add_annotation('tier1', 0, 1000, 'a1')
 447 |         self.eaf.add_annotation('tier1', 1000, 2000, 'a2')
 448 |         self.eaf.add_annotation('tier1', 2000, 3000, 'a3')
 449 |         self.eaf.add_annotation('tier1', 3000, 4000, 'a4')
 450 |         self.assertEqual(sorted(self.eaf.get_annotation_data_between_times(
 451 |             'tier1', 1500, 2500)), [(1000, 2000, 'a2'), (2000, 3000, 'a3')])
 452 |         self.assertEqual(sorted(self.eaf.get_annotation_data_between_times(
 453 |             'tier1', 1000, 2000)), [(0, 1000, 'a1'),
 454 |                                     (1000, 2000, 'a2'), (2000, 3000, 'a3')])
 455 |         self.assertEqual(sorted(self.eaf.get_annotation_data_between_times(
 456 |             'tier1', 4001, 30000)), [])
 457 |         self.assertRaises(
 458 |             KeyError, self.eaf.get_annotation_data_between_times, 'ter1', 0, 1)
 459 | 
 460 |     def test_get_annotation_data_for_tier(self):
 461 |         self.eaf.add_tier('tier1')
 462 |         self.eaf.add_annotation('tier1', 0, 1000, 'a1')
 463 |         self.eaf.add_annotation('tier1', 1000, 2000, 'a1')
 464 |         self.eaf.add_annotation('tier1', 2000, 3000, 'a1')
 465 |         self.assertEqual(
 466 |             sorted(self.eaf.get_annotation_data_for_tier('tier1')),
 467 |             sorted([(0, 1000, 'a1'), (2000, 3000, 'a1'), (1000, 2000, 'a1')]))
 468 |         self.assertRaises(KeyError,
 469 |                           self.eaf.get_annotation_data_for_tier, 'tier2')
 470 | 
 471 |     def test_get_child_tiers_for(self):
 472 |         self.eaf.add_tier('parent1')
 473 |         self.eaf.add_tier('parent2')
 474 |         self.eaf.add_tier('child11', parent='parent1')
 475 |         self.eaf.add_tier('child12', parent='parent1')
 476 |         self.eaf.add_tier('child13', parent='parent1')
 477 |         self.eaf.add_tier('orphan21')
 478 |         self.eaf.add_tier('orphan22')
 479 |         self.eaf.add_tier('orphan23')
 480 |         self.assertEqual(sorted(self.eaf.child_tiers_for('parent1')),
 481 |                          ['child11', 'child12', 'child13'])
 482 |         self.assertEqual(sorted(self.eaf.child_tiers_for('parent2')), [])
 483 |         self.assertRaises(KeyError, self.eaf.child_tiers_for, 'parent3')
 484 | 
 485 |     def test_get_full_time_interval(self):
 486 |         self.assertEqual(self.eaf.get_full_time_interval(), (0, 0))
 487 |         self.eaf.add_tier('tier1')
 488 |         self.eaf.add_annotation('tier1', 100, 500, 'a')
 489 |         self.eaf.add_annotation('tier1', 500, 1000, 'b')
 490 |         self.assertEqual(self.eaf.get_full_time_interval(), (100, 1000))
 491 | 
 492 |     def test_get_gaps_and_overlaps2(self):
 493 |         self.eaf.add_tier('t1')
 494 |         self.eaf.add_tier('t2')
 495 |         # Pause
 496 |         self.eaf.add_annotation('t1', 0, 1000)
 497 |         self.eaf.add_annotation('t1', 1200, 2000)
 498 |         # Gap
 499 |         self.eaf.add_annotation('t2', 2200, 3000)
 500 |         # Overlap
 501 |         self.eaf.add_annotation('t1', 2800, 4000)
 502 |         # Exact fto
 503 |         self.eaf.add_annotation('t2', 4000, 5000)
 504 |         # Within overlap
 505 |         self.eaf.add_annotation('t1', 4200, 4800)
 506 |         # Long pause
 507 |         self.eaf.add_annotation('t2', 14800, 15000)
 508 |         # Long gap
 509 |         self.eaf.add_annotation('t1', 20000, 20500)
 510 |         g1 = self.eaf.get_gaps_and_overlaps2('t1', 't2')
 511 |         g2 = self.eaf.get_gaps_and_overlaps2('t1', 't2', 3000)
 512 |         self.assertEqual(sorted(g1), [
 513 |             (1000, 1200, 'P1'), (2000, 2200, 'G12'), (2800, 3000, 'O21'),
 514 |             (4200, 4800, 'W21'), (5000, 14800, 'P2'), (15000, 20000, 'G21')])
 515 |         self.assertEqual(sorted(g2), [
 516 |             (1000, 1200, 'P1'), (2000, 2200, 'G12'),
 517 |             (2800, 3000, 'O21'), (4200, 4800, 'W21')])
 518 |         self.assertRaises(KeyError, list,
 519 |                           self.eaf.get_gaps_and_overlaps2('2', '3'))
 520 | 
 521 |     def test_get_controlled_vocabulary_names(self):
 522 |         self.eaf.add_controlled_vocabulary('cv1')
 523 |         self.eaf.add_controlled_vocabulary('cv2')
 524 |         self.eaf.add_controlled_vocabulary('cv3', 'er1')
 525 |         self.assertEqual(sorted(self.eaf.get_controlled_vocabulary_names()),
 526 |                          ['cv1', 'cv2', 'cv3'])
 527 | 
 528 |     def test_get_cv_entry(self):
 529 |         self.eaf.add_controlled_vocabulary('cv1')
 530 |         self.eaf.add_language('eng')
 531 |         self.eaf.add_language('nld')
 532 |         self.eaf.add_cv_entry(
 533 |             'cv1', 'cve1', [('H', 'eng', 'hold'), ('H', 'nld', None)])
 534 |         self.assertEqual(self.eaf.get_cv_entries('cv1'), {
 535 |             'cve1': ([('H', 'eng', 'hold'), ('H', 'nld', None)], None)})
 536 |         self.eaf.add_cv_entry(
 537 |             'cv1', 'cve2', [('S', 'eng', 'stroke'), ('S', 'nld', None)])
 538 |         self.assertEqual(self.eaf.get_cv_entries('cv1'), {
 539 |             'cve1': ([('H', 'eng', 'hold'), ('H', 'nld', None)], None),
 540 |             'cve2': ([('S', 'eng', 'stroke'), ('S', 'nld', None)], None)})
 541 |         self.assertRaises(KeyError, self.eaf.get_cv_entries, 'cv2')
 542 | 
 543 |     def test_get_cv_descriptions(self):
 544 |         self.eaf.add_controlled_vocabulary('cv1')
 545 |         self.eaf.add_language('eng')
 546 |         self.eaf.add_language('nld')
 547 |         self.eaf.add_cv_description('cv1', 'eng', 'Gesture Phases')
 548 |         self.eaf.add_cv_description('cv1', 'nld', None)
 549 |         self.assertEqual(self.eaf.get_cv_descriptions('cv1'), [
 550 |             ('eng', 'Gesture Phases'), ('nld', None)])
 551 |         self.assertRaises(KeyError, self.eaf.get_cv_descriptions, 'cv2')
 552 | 
 553 |     def test_get_external_ref(self):
 554 |         self.eaf.add_external_ref('er1', 'ecv', 'location')
 555 |         self.eaf.add_external_ref('er2', 'lexen_id', 'location2')
 556 |         self.assertEqual(self.eaf.get_external_ref('er1'), ('ecv', 'location'))
 557 |         self.assertRaises(KeyError, self.eaf.get_external_ref, 'er3')
 558 | 
 559 |     def test_get_external_ref_names(self):
 560 |         self.assertEqual(sorted(self.eaf.get_external_ref_names()),
 561 |                          [])
 562 |         self.eaf.add_external_ref('er1', 'ecv', 'location')
 563 |         self.eaf.add_external_ref('er2', 'lexen_id', 'location2')
 564 |         self.assertEqual(sorted(self.eaf.get_external_ref_names()),
 565 |                          ['er1', 'er2'])
 566 | 
 567 |     def test_get_lexicon_ref(self):
 568 |         self.eaf.add_lexicon_ref('id1', 'long name', 't1', 'url1', 'lid1',
 569 |                                  'lname1')
 570 |         self.eaf.add_lexicon_ref('id2', 'long name', 't2', 'url1', 'lid1',
 571 |                                  'lname1', 'dc1', 'dc1')
 572 |         self.assertEqual(self.eaf.get_lexicon_ref('id1'), {
 573 |             'DATCAT_ID': None, 'NAME': 'long name', 'DATCAT_NAME': None, 'URL':
 574 |             'url1', 'LEX_REF_ID': 'id1', 'LEXICON_NAME': 'lname1', 'TYPE':
 575 |             't1', 'LEXICON_ID': 'lid1'})
 576 |         self.assertEqual(self.eaf.get_lexicon_ref('id2'), {
 577 |             'DATCAT_ID': 'dc1', 'NAME': 'long name', 'DATCAT_NAME': 'dc1',
 578 |             'URL': 'url1', 'LEX_REF_ID': 'id2', 'LEXICON_NAME': 'lname1',
 579 |             'TYPE': 't2', 'LEXICON_ID': 'lid1'})
 580 |         self.assertRaises(KeyError, self.eaf.get_lexicon_ref, 'id3')
 581 | 
 582 |     def test_get_lexicon_ref_names(self):
 583 |         self.assertEqual(sorted(self.eaf.get_lexicon_ref_names()), [])
 584 |         self.eaf.add_lexicon_ref('id1', 'long name', 't1', 'url1', 'lid1',
 585 |                                  'lname1')
 586 |         self.eaf.add_lexicon_ref('id2', 'long name', 't2', 'url1', 'lid1',
 587 |                                  'lname1', 'dc1', 'dc1')
 588 |         self.assertEqual(sorted(self.eaf.get_lexicon_ref_names()),
 589 |                          ['id1', 'id2'])
 590 | 
 591 |     def test_get_languages(self):
 592 |         self.eaf.add_language('ru', 'RUS', 'YAWERTY (Phonetic)')
 593 |         self.eaf.add_language('en')
 594 |         self.assertEqual(
 595 |             self.eaf.get_languages(),
 596 |             {'ru': ('RUS', 'YAWERTY (Phonetic)'), 'en': (None, None)})
 597 | 
 598 |     def test_get_licenses(self):
 599 |         self.eaf.add_license('k1', 'v1')
 600 |         self.eaf.add_license('k2', 'v2')
 601 |         self.eaf.add_license('k3', 'v3')
 602 |         self.eaf.add_license('k4', 'v4')
 603 |         self.eaf.add_license('k4', 'v5')
 604 |         self.assertEqual(self.eaf.get_licenses(), [
 605 |             ('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3'), ('k4', 'v4'),
 606 |             ('k4', 'v5')])
 607 | 
 608 |     def test_get_linguistic_types_names(self):
 609 |         self.assertEqual(sorted(self.eaf.get_linguistic_type_names()),
 610 |                          ['default-lt'])
 611 |         self.eaf.add_linguistic_type('l1')
 612 |         self.eaf.add_linguistic_type('l2')
 613 |         self.eaf.add_linguistic_type('l3')
 614 |         self.assertEqual(sorted(self.eaf.get_linguistic_type_names()),
 615 |                          ['default-lt', 'l1', 'l2', 'l3'])
 616 | 
 617 |     def test_get_linked_files(self):
 618 |         self.eaf.add_linked_file('/some/file/path/test.wav')
 619 |         self.eaf.add_linked_file('/some/file/path/test.mpg', './test.mpg',
 620 |                                  time_origin=5, ex_from='ef')
 621 |         self.assertEqual(self.eaf.get_linked_files(),
 622 |                          self.eaf.media_descriptors)
 623 | 
 624 |     def test_get_locales(self):
 625 |         self.eaf.add_locale('ru', 'RUS', 'YAWERTY (Phonetic)')
 626 |         self.eaf.add_locale('en')
 627 |         self.assertEqual(
 628 |             self.eaf.get_locales(),
 629 |             {'ru': ('RUS', 'YAWERTY (Phonetic)'), 'en': (None, None)})
 630 | 
 631 |     def test_get_parameters_for_tier(self):
 632 |         self.eaf.add_tier('tier1', 'default-lt', 'tier1', None, 'person',
 633 |                           'person2')
 634 |         self.eaf.add_tier('tier2')
 635 |         self.assertEqual(self.eaf.get_parameters_for_tier('tier1'), {
 636 |             'ANNOTATOR': 'person2', 'DEFAULT_LOCALE': None, 'LANG_REF': None,
 637 |             'LINGUISTIC_TYPE_REF': 'default-lt', 'PARENT_REF': 'tier1',
 638 |             'PARTICIPANT': 'person', 'TIER_ID': 'tier1'})
 639 |         self.assertEqual(self.eaf.get_parameters_for_tier('tier2'), {
 640 |             'PARTICIPANT': None, 'DEFAULT_LOCALE': None,
 641 |             'LINGUISTIC_TYPE_REF': 'default-lt', 'ANNOTATOR': None,
 642 |             'LANG_REF': None, 'PARENT_REF': None, 'TIER_ID': 'tier2'})
 643 | 
 644 |     def test_get_parameters_for_linguistic_type(self):
 645 |         self.eaf.add_tier('tier2')
 646 |         self.eaf.add_linguistic_type('l2', 'Time_Subdivision', False, True)
 647 |         self.assertEqual(self.eaf.get_parameters_for_linguistic_type('l2'), {
 648 |             'CONSTRAINTS': 'Time_Subdivision', 'TIME_ALIGNABLE': 'false',
 649 |             'LINGUISTIC_TYPE_ID': 'l2', 'GRAPHIC_REFERENCES': 'true'})
 650 | 
 651 |     def test_get_properties(self):
 652 |         self.eaf.add_property('k1', 'v1')
 653 |         self.eaf.add_property('k2', 'v2')
 654 |         self.eaf.add_property('k3', 'v3')
 655 |         self.eaf.add_property('k4', 'v4')
 656 |         self.eaf.add_property('k4', 'v5')
 657 |         self.assertEqual(self.eaf.get_properties(), [
 658 |             ('lastUsedAnnotation', 0), ('k1', 'v1'), ('k2', 'v2'),
 659 |             ('k3', 'v3'), ('k4', 'v4'), ('k4', 'v5')])
 660 | 
 661 |     def test_get_ref_annotation_at_time(self):
 662 |         self.eaf.add_tier('p1')
 663 |         self.eaf.add_linguistic_type('c', 'Symbolic_Association')
 664 |         self.eaf.add_tier('a1', 'c', 'p1')
 665 |         self.eaf.add_annotation('p1', 0, 1000, 'a1')
 666 |         self.eaf.add_annotation('p1', 1000, 2000, 'a2')
 667 |         self.eaf.add_annotation('p1', 3000, 4000, 'a3')
 668 |         self.eaf.add_ref_annotation('a1', 'p1', 500, 'ref1')
 669 |         self.eaf.add_ref_annotation('a1', 'p1', 3000, 'ref2')
 670 |         self.assertEqual(self.eaf.get_ref_annotation_at_time('a1', 500),
 671 |                          [(0, 1000, 'ref1', 'a1')])
 672 |         self.assertEqual(self.eaf.get_ref_annotation_at_time('p1', 2500), [])
 673 |         self.assertRaises(KeyError,
 674 |                           self.eaf.get_ref_annotation_at_time, 'eau', 0)
 675 | 
 676 |     def test_ref_get_annotation_data_after_time(self):
 677 |         pass
 678 | 
 679 |     def test_ref_get_annotation_data_before_time(self):
 680 |         pass
 681 | 
 682 |     def test_get_ref_annotation_data_between_times(self):
 683 |         self.eaf.add_tier('p1')
 684 |         self.eaf.add_linguistic_type('c', 'Symbolic_Association')
 685 |         self.eaf.add_tier('a1', 'c', 'p1')
 686 |         self.eaf.add_annotation('p1', 0, 1000, 'a1')
 687 |         self.eaf.add_annotation('p1', 1000, 2000, 'a2')
 688 |         self.eaf.add_annotation('p1', 3000, 4000, 'a3')
 689 |         self.eaf.add_ref_annotation('a1', 'p1', 500, 'ref1')
 690 |         self.eaf.add_ref_annotation('a1', 'p1', 3000, 'ref2')
 691 |         self.assertEqual(sorted(self.eaf.get_ref_annotation_data_between_times(
 692 |             'a1', 500, 3500)), sorted([
 693 |                 (0, 1000, 'ref1', 'a1'), (3000, 4000, 'ref2', 'a3')]))
 694 |         self.assertRaises(KeyError,
 695 |                           self.eaf.get_ref_annotation_data_between_times,
 696 |                           'eau', 0, 1)
 697 | 
 698 |     def test_get_ref_annotation_data_for_tier(self):
 699 |         self.eaf.add_tier('p1')
 700 |         self.eaf.add_linguistic_type('c', 'Symbolic_Association')
 701 |         self.eaf.add_tier('a1', 'c', 'p1')
 702 |         self.eaf.add_annotation('p1', 0, 1000, 'a1')
 703 |         self.eaf.add_annotation('p1', 1000, 2000, 'a2')
 704 |         self.eaf.add_annotation('p1', 3000, 4000, 'a3')
 705 |         self.eaf.add_ref_annotation('a1', 'p1', 500, 'ref1')
 706 |         self.eaf.add_ref_annotation('a1', 'p1', 3000)
 707 |         self.assertEqual(
 708 |             sorted([(3000, 4000, '', 'a3'), (0, 1000, 'ref1', 'a1')]),
 709 |             sorted(self.eaf.get_ref_annotation_data_for_tier('a1')))
 710 |         self.assertRaises(KeyError,
 711 |                           self.eaf.get_ref_annotation_data_for_tier, 'aaa')
 712 |         self.assertEqual(self.eaf.get_ref_annotation_data_for_tier('p1'), [])
 713 | 
 714 |     def test_get_secondary_linked_files(self):
 715 |         self.eaf.add_secondary_linked_file('/some/file/path/test.wav')
 716 |         self.eaf.add_secondary_linked_file(
 717 |             '/some/file/path/test.mpg', './test.mpg', time_origin=5,
 718 |             assoc_with='ef')
 719 |         self.assertEqual(self.eaf.get_secondary_linked_files(),
 720 |                          self.eaf.linked_file_descriptors)
 721 | 
 722 |     def test_get_tier_ids_for_linguistic_type(self):
 723 |         self.eaf.add_linguistic_type('l1')
 724 |         self.eaf.add_linguistic_type('l2')
 725 |         self.eaf.add_tier('t1', 'l1')
 726 |         self.eaf.add_tier('t2', 'l2')
 727 |         self.eaf.add_tier('t3', 'l2')
 728 |         self.eaf.add_tier('t4', parent='t1')
 729 |         self.eaf.add_tier('t5', 'l1', parent='t1')
 730 |         self.eaf.add_tier('t6')
 731 |         self.assertEqual(sorted(self.eaf.get_tier_ids_for_linguistic_type(
 732 |                          'l1')), ['t1', 't5'])
 733 |         self.assertEqual(sorted(self.eaf.get_tier_ids_for_linguistic_type(
 734 |                                 'l2')), ['t2', 't3'])
 735 |         self.assertEqual(sorted(self.eaf.get_tier_ids_for_linguistic_type(
 736 |                                 'default-lt', 't1')), ['t4'])
 737 | 
 738 |     def test_get_tier_names(self):
 739 |         self.eaf.add_tier('tier1')
 740 |         self.eaf.add_tier('tier2')
 741 |         self.eaf.add_tier('tier3')
 742 |         self.eaf.add_tier('tier4')
 743 |         self.assertEqual(sorted(self.eaf.get_tier_names()),
 744 |                          ['default', 'tier1', 'tier2', 'tier3', 'tier4'])
 745 | 
 746 |     def test_merge_tiers(self):
 747 |         self.eaf.add_tier('tier1')
 748 |         self.eaf.add_tier('tier2')
 749 |         self.eaf.add_tier('tier3')
 750 |         # Overlap
 751 |         self.eaf.add_annotation('tier1', 0, 1000, 'a1')
 752 |         self.eaf.add_annotation('tier2', 500, 1500, 'b1')
 753 | 
 754 |         # Gap
 755 |         self.eaf.add_annotation('tier1', 2000, 2500, 'a2')
 756 |         self.eaf.add_annotation('tier2', 3000, 4000, 'b2')
 757 | 
 758 |         # Within
 759 |         self.eaf.add_annotation('tier1', 5000, 6000, 'a3')
 760 |         self.eaf.add_annotation('tier2', 5100, 5900, 'b3')
 761 | 
 762 |         # Three
 763 |         self.eaf.add_annotation('tier1', 6050, 6250, 'c')
 764 |         self.eaf.add_annotation('tier1', 6250, 6500, 'c')
 765 |         self.eaf.add_annotation('tier1', 6500, 6750, 'c')
 766 |         self.eaf.add_annotation('tier3', 6100, 6800, 'd')
 767 | 
 768 |         # Gap of 5 ms
 769 |         self.eaf.add_annotation('tier1', 7000, 7995, 'a4')
 770 |         self.eaf.add_annotation('tier2', 8000, 9000, 'b4')
 771 | 
 772 |         self.eaf.merge_tiers(['tier1', 'tier2'], 'm_0')
 773 |         self.eaf.merge_tiers(['tier1'], 'm_a', 5)
 774 |         self.eaf.merge_tiers(['tier1', 'tier2'], 'm_5', 5)
 775 |         self.eaf.merge_tiers(['tier1', 'tier2'], 'm_6', 6)
 776 |         self.eaf.merge_tiers(['tier1', 'tier2', 'tier3'], 'mm')
 777 | 
 778 |         m0 = [(0, 1500, 'a1_b1'), (2000, 2500, 'a2'), (3000, 4000, 'b2'),
 779 |               (5000, 6000, 'a3_b3'), (6050, 6250, 'c'), (6250, 6500, 'c'),
 780 |               (6500, 6750, 'c'), (7000, 7995, 'a4'), (8000, 9000, 'b4')]
 781 |         m5 = [(0, 1500, 'a1_b1'), (2000, 2500, 'a2'), (3000, 4000, 'b2'),
 782 |               (5000, 6000, 'a3_b3'), (6050, 6750, 'c_c_c'), (7000, 7995, 'a4'),
 783 |               (8000, 9000, 'b4')]
 784 |         m6 = [(0, 1500, 'a1_b1'), (2000, 2500, 'a2'), (3000, 4000, 'b2'),
 785 |               (5000, 6000, 'a3_b3'), (6050, 6750, 'c_c_c'),
 786 |               (7000, 9000, 'a4_b4')]
 787 |         mm = [(0, 1500, 'a1_b1'), (2000, 2500, 'a2'), (3000, 4000, 'b2'),
 788 |               (5000, 6000, 'a3_b3'), (6050, 6800, 'c_d_c_c'),
 789 |               (7000, 7995, 'a4'), (8000, 9000, 'b4')]
 790 |         self.assertEqual(
 791 |             sorted(self.eaf.get_annotation_data_for_tier('m_0')), m0)
 792 |         self.assertEqual(
 793 |             sorted(self.eaf.get_annotation_data_for_tier('m_5')), m5)
 794 |         self.assertEqual(
 795 |             sorted(self.eaf.get_annotation_data_for_tier('m_6')), m6)
 796 |         self.assertEqual(
 797 |             sorted(self.eaf.get_annotation_data_for_tier('mm')), mm)
 798 |         self.assertRaises(KeyError, self.eaf.merge_tiers, ['a', 'b'])
 799 | 
 800 |     def test_remove_all_annotations_from_tier(self):
 801 |         self.eaf.add_tier('tier1')
 802 |         self.eaf.add_annotation('tier1', 0, 1000, 'a1')
 803 |         self.eaf.add_annotation('tier1', 1000, 2000, 'a2')
 804 |         self.eaf.add_annotation('tier1', 2000, 3000, 'a3')
 805 |         self.eaf.add_annotation('tier1', 3000, 4000, 'a4')
 806 |         self.eaf.remove_all_annotations_from_tier('tier1')
 807 |         self.assertEqual(self.eaf.get_annotation_data_for_tier('tier1'), [])
 808 | 
 809 |     def test_remove_annotation(self):
 810 |         self.eaf.add_tier('tier1')
 811 |         self.eaf.add_annotation('tier1', 0, 1000, 'a1')
 812 |         self.eaf.add_annotation('tier1', 1000, 2000, 'a2')
 813 |         self.eaf.add_annotation('tier1', 2000, 3000, 'a3')
 814 |         self.eaf.add_annotation('tier1', 3000, 4000, 'a4')
 815 |         self.assertEqual(self.eaf.remove_annotation('tier1', 500), 1)
 816 |         self.assertEqual(
 817 |             sorted(self.eaf.get_annotation_data_for_tier('tier1')),
 818 |             sorted([(1000, 2000, 'a2'), (2000, 3000, 'a3'),
 819 |                     (3000, 4000, 'a4')]))
 820 | 
 821 |         self.assertEqual(self.eaf.remove_annotation('tier1', 2000), 2)
 822 |         self.assertEqual(
 823 |             sorted(self.eaf.get_annotation_data_for_tier('tier1')),
 824 |             sorted([(3000, 4000, 'a4')]))
 825 |         self.assertEqual(
 826 |             sorted(self.eaf.get_annotation_data_for_tier('tier1')),
 827 |             sorted([(3000, 4000, 'a4')]))
 828 |         self.assertRaises(KeyError, self.eaf.remove_annotation, 'tier2', 0)
 829 | 
 830 |     def test_remove_controlled_vocabulary(self):
 831 |         self.eaf.add_controlled_vocabulary('cv1')
 832 |         self.eaf.add_controlled_vocabulary('cv2')
 833 |         self.eaf.add_controlled_vocabulary('cv3', 'er1')
 834 |         self.eaf.remove_controlled_vocabulary('cv3')
 835 |         self.assertEqual(sorted(self.eaf.get_controlled_vocabulary_names()),
 836 |                          ['cv1', 'cv2'])
 837 |         self.eaf.remove_controlled_vocabulary('cv1')
 838 |         self.assertEqual(sorted(self.eaf.get_controlled_vocabulary_names()),
 839 |                          ['cv2'])
 840 |         self.assertRaises(KeyError, self.eaf.remove_controlled_vocabulary, 'c')
 841 | 
 842 |     def test_remove_cv_entry(self):
 843 |         self.eaf.add_controlled_vocabulary('cv1')
 844 |         self.eaf.add_language('eng')
 845 |         self.eaf.add_language('nld')
 846 |         self.eaf.add_cv_entry(
 847 |             'cv1', 'cve1', [('H', 'eng', 'hold'), ('H', 'nld', None)])
 848 |         self.eaf.add_cv_entry(
 849 |             'cv1', 'cve2', [('S', 'eng', 'stroke'), ('S', 'nld', None)])
 850 |         self.eaf.remove_cv_entry('cv1', 'cve1')
 851 |         self.assertEqual(self.eaf.get_cv_entries('cv1'), {
 852 |             'cve2': ([('S', 'eng', 'stroke'), ('S', 'nld', None)], None)})
 853 |         self.assertRaises(KeyError, self.eaf.remove_cv_entry, 'cv2', 'c')
 854 |         self.assertRaises(KeyError, self.eaf.remove_cv_entry, 'cv1', 'c')
 855 | 
 856 |     def test_remove_cv_description(self):
 857 |         self.eaf.add_controlled_vocabulary('cv1')
 858 |         self.eaf.add_language('eng')
 859 |         self.eaf.add_language('nld')
 860 |         self.eaf.add_cv_description('cv1', 'eng', 'Gesture Phases')
 861 |         self.eaf.add_cv_description('cv1', 'nld', None)
 862 |         self.assertEqual(self.eaf.get_cv_descriptions('cv1'), [
 863 |             ('eng', 'Gesture Phases'), ('nld', None)])
 864 |         self.assertRaises(KeyError, self.eaf.get_cv_descriptions, 'cv2')
 865 | 
 866 |     def test_remove_external_ref(self):
 867 |         self.eaf.add_external_ref('er1', 'ecv', 'location')
 868 |         self.eaf.add_external_ref('er2', 'lexen_id', 'location2')
 869 |         self.eaf.remove_external_ref('er1')
 870 |         self.assertEqual(sorted(self.eaf.get_external_ref_names()), ['er2'])
 871 | 
 872 |     def test_remove_language(self):
 873 |         self.eaf.add_language('ru', 'RUS', 'YAWERTY (Phonetic)')
 874 |         self.eaf.add_language('en')
 875 |         self.eaf.remove_language('ru')
 876 |         self.assertEqual(self.eaf.get_languages(), {'en': (None, None)})
 877 |         self.assertRaises(KeyError, self.eaf.remove_language, 'ru')
 878 | 
 879 |     def test_remove_lexicon_ref(self):
 880 |         self.eaf.add_lexicon_ref('id1', 'long name', 't1', 'url1', 'lid1',
 881 |                                  'lname1')
 882 |         self.eaf.add_lexicon_ref('id2', 'long name', 't2', 'url1', 'lid1',
 883 |                                  'lname1', 'dc1', 'dc1')
 884 |         self.eaf.remove_lexicon_ref('id1')
 885 |         self.assertEqual(sorted(self.eaf.get_lexicon_ref_names()),
 886 |                          ['id2'])
 887 |         self.assertRaises(KeyError, self.eaf.remove_lexicon_ref, 'i')
 888 | 
 889 |     def test_remove_license(self):
 890 |         self.eaf.add_license('k1', 'v1')
 891 |         self.eaf.add_license('k2', 'v2')
 892 |         self.eaf.add_license('k3', 'v3')
 893 |         self.eaf.add_license('k4', 'v4')
 894 |         self.eaf.add_license('k4', 'v5')
 895 |         self.eaf.remove_license('a1')
 896 |         self.assertEqual(self.eaf.get_licenses(), [
 897 |             ('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3'), ('k4', 'v4'),
 898 |             ('k4', 'v5')])
 899 |         self.eaf.remove_license('k1')
 900 |         self.assertEqual(self.eaf.get_licenses(), [
 901 |             ('k2', 'v2'), ('k3', 'v3'), ('k4', 'v4'), ('k4', 'v5')])
 902 |         self.eaf.remove_license(url='v2')
 903 |         self.assertEqual(self.eaf.get_licenses(), [
 904 |             ('k3', 'v3'), ('k4', 'v4'), ('k4', 'v5')])
 905 |         self.eaf.remove_license('k4')
 906 |         self.assertEqual(self.eaf.get_licenses(), [('k3', 'v3')])
 907 |         self.eaf.remove_license()
 908 |         self.assertEqual(self.eaf.get_licenses(), [])
 909 | 
 910 |     def test_remove_linguistic_type(self):
 911 |         self.eaf.add_linguistic_type('l1')
 912 |         self.eaf.add_linguistic_type('l2')
 913 |         self.eaf.add_linguistic_type('l3')
 914 |         self.eaf.remove_linguistic_type('l2')
 915 |         self.assertEqual(sorted(self.eaf.get_linguistic_type_names()),
 916 |                          ['default-lt', 'l1', 'l3'])
 917 |         self.assertRaises(KeyError, self.eaf.remove_linguistic_type, 'a')
 918 | 
 919 |     def test_remove_linked_files(self):
 920 |         self.eaf.add_linked_file('/some/file/path/test.wav',
 921 |                                  './test.wav', time_origin=5, ex_from='ef1')
 922 |         self.eaf.add_linked_file('/some/file/path/test2.wav',
 923 |                                  './test2.wav', time_origin=10, ex_from='ef2')
 924 |         self.eaf.add_linked_file('/some/file/path/test3.mpg',
 925 |                                  './test3.mpg', time_origin=15, ex_from='ef3')
 926 |         self.eaf.add_linked_file('/some/file/path/test4.mpg',
 927 |                                  './test4.mpg', time_origin=20, ex_from='ef3')
 928 |         self.eaf.remove_linked_files(mimetype='audio/x-wav')
 929 |         self.assertEqual(len(self.eaf.get_linked_files()), 2)
 930 |         self.eaf.remove_linked_files(ex_from='ef1')
 931 |         self.assertEqual(len(self.eaf.get_linked_files()), 2)
 932 |         self.eaf.remove_linked_files(file_path='/some/file/path/test4.mpg')
 933 |         self.assertEqual(len(self.eaf.get_linked_files()), 1)
 934 |         self.eaf.remove_linked_files(relpath='./test3.mpg')
 935 |         self.assertEqual(self.eaf.get_linked_files(), [])
 936 | 
 937 |     def test_remove_locale(self):
 938 |         self.eaf.add_locale('ru', 'RUS', 'YAWERTY (Phonetic)')
 939 |         self.eaf.add_locale('en')
 940 |         self.eaf.remove_locale('ru')
 941 |         self.assertEqual(self.eaf.get_locales(), {'en': (None, None)})
 942 |         self.assertRaises(KeyError, self.eaf.remove_locale, 'ru')
 943 | 
 944 |     def test_remove_property(self):
 945 |         self.eaf.add_property('k1', 'v1')
 946 |         self.eaf.add_property('k2', 'v2')
 947 |         self.eaf.add_property('k3', 'v3')
 948 |         self.eaf.add_property('k4', 'v4')
 949 |         self.eaf.add_property('k4', 'v5')
 950 |         self.eaf.remove_property('a1')
 951 |         self.assertEqual(self.eaf.get_properties(), [
 952 |             ('lastUsedAnnotation', 0), ('k1', 'v1'), ('k2', 'v2'),
 953 |             ('k3', 'v3'), ('k4', 'v4'), ('k4', 'v5')])
 954 |         self.eaf.remove_property('k1')
 955 |         self.assertEqual(self.eaf.get_properties(), [
 956 |             ('lastUsedAnnotation', 0), ('k2', 'v2'), ('k3', 'v3'),
 957 |             ('k4', 'v4'), ('k4', 'v5')])
 958 |         self.eaf.remove_property(value='v2')
 959 |         self.assertEqual(self.eaf.get_properties(), [
 960 |             ('lastUsedAnnotation', 0), ('k3', 'v3'), ('k4', 'v4'),
 961 |             ('k4', 'v5')])
 962 |         self.eaf.remove_property('k4')
 963 |         self.assertEqual(self.eaf.get_properties(), [
 964 |             ('lastUsedAnnotation', 0), ('k3', 'v3')])
 965 |         self.eaf.remove_property()
 966 |         self.assertEqual(self.eaf.get_properties(), [])
 967 | 
 968 |     def test_remove_ref_annotation(self):
 969 |         self.eaf.add_tier('p1')
 970 |         self.eaf.add_linguistic_type('c', 'Symbolic_Association')
 971 |         self.eaf.add_tier('a1', 'c', 'p1')
 972 |         self.eaf.add_annotation('p1', 0, 1000, 'a1')
 973 |         self.eaf.add_annotation('p1', 1000, 2000, 'a2')
 974 |         self.eaf.add_annotation('p1', 3000, 4000, 'a3')
 975 |         self.eaf.add_ref_annotation('a1', 'p1', 500, 'ref1')
 976 |         self.eaf.add_ref_annotation('a1', 'p1', 3000)
 977 |         self.assertEqual(
 978 |             sorted([(3000, 4000, '', 'a3'), (0, 1000, 'ref1', 'a1')]),
 979 |             sorted(self.eaf.get_ref_annotation_data_for_tier('a1')))
 980 |         self.eaf.remove_ref_annotation('a1', 500)
 981 |         self.assertEqual(
 982 |             sorted([(3000, 4000, '', 'a3')]),
 983 |             sorted(self.eaf.get_ref_annotation_data_for_tier('a1')))
 984 |         self.assertRaises(KeyError, self.eaf.remove_ref_annotation, 'aa', 0)
 985 | 
 986 |     def test_remove_secondary_linked_files(self):
 987 |         self.eaf.add_secondary_linked_file(
 988 |             '/some/file/path/test.wav', './test.wav', time_origin=5,
 989 |             assoc_with='ef1')
 990 |         self.eaf.add_secondary_linked_file(
 991 |             '/some/file/path/test2.wav', './test2.wav', time_origin=10,
 992 |             assoc_with='ef2')
 993 |         self.eaf.add_secondary_linked_file(
 994 |             '/some/file/path/test3.mpg', './test3.mpg', time_origin=15,
 995 |             assoc_with='ef3')
 996 |         self.eaf.add_secondary_linked_file(
 997 |             '/some/file/path/test4.mpg', './test4.mpg', time_origin=20,
 998 |             assoc_with='ef3')
 999 |         self.eaf.remove_secondary_linked_files(mimetype='audio/x-wav')
1000 |         self.assertEqual(len(self.eaf.get_secondary_linked_files()), 2)
1001 |         self.eaf.remove_secondary_linked_files(assoc_with='ef1')
1002 |         self.assertEqual(len(self.eaf.get_secondary_linked_files()), 2)
1003 |         self.eaf.remove_secondary_linked_files(
1004 |             file_path='/some/file/path/test4.mpg')
1005 |         self.assertEqual(len(self.eaf.get_secondary_linked_files()), 1)
1006 |         self.eaf.remove_secondary_linked_files(relpath='./test3.mpg')
1007 |         self.assertEqual(self.eaf.get_secondary_linked_files(), [])
1008 | 
1009 |     def test_remove_tier(self):
1010 |         self.eaf.add_tier('tier1')
1011 |         self.eaf.add_tier('tier2')
1012 |         self.eaf.add_tier('tier3')
1013 |         self.eaf.add_tier('tier4')
1014 |         self.eaf.remove_tier('tier1')
1015 |         self.assertEqual(sorted(self.eaf.get_tier_names()),
1016 |                          ['default', 'tier2', 'tier3', 'tier4'])
1017 |         self.assertRaises(KeyError, self.eaf.remove_tier, 'tier1')
1018 | 
1019 |     def test_remove_tiers(self):
1020 |         self.eaf.add_tier('tier1')
1021 |         self.eaf.add_tier('tier2')
1022 |         self.eaf.add_tier('tier3')
1023 |         self.eaf.add_tier('tier4')
1024 |         self.eaf.remove_tiers(['default', 'tier4', 'tier1'])
1025 |         self.assertEqual(sorted(self.eaf.get_tier_names()), ['tier2', 'tier3'])
1026 |         self.assertRaises(KeyError, self.eaf.remove_tiers, ['tier1'])
1027 |         self.eaf.remove_tiers(['tier2', 'tier3'])
1028 |         self.assertEqual(sorted(self.eaf.get_tier_names()), [])
1029 | 
1030 |     def test_rename_tier(self):
1031 |         self.eaf.add_tier('child', parent='default')
1032 |         self.eaf.add_tier('test1')
1033 |         self.eaf.add_tier('test2')
1034 |         self.eaf.add_tier('test3')
1035 |         self.eaf.add_tier('test4')
1036 |         self.eaf.rename_tier('test1', 'test1a')
1037 |         self.eaf.rename_tier('default', 'test5')
1038 |         self.assertEqual(sorted(self.eaf.get_tier_names()), sorted([
1039 |             'child', 'test1a', 'test2', 'test3', 'test4', 'test5']))
1040 |         self.assertEqual(sorted(self.eaf.child_tiers_for('test5')),
1041 |                          sorted(['child']))
1042 | 
1043 |     def test_shift_annotations(self):
1044 |         self.eaf.add_tier('tier1')
1045 |         self.eaf.add_tier('tier2')
1046 |         # Overlap
1047 |         self.eaf.add_annotation('tier1', 0, 100, 'a1')
1048 |         self.eaf.add_annotation('tier1', 1000, 2000, 'a2')
1049 |         self.eaf.add_annotation('tier2', 500, 1500, 'b1')
1050 |         self.eaf.add_annotation('tier2', 0, 150, 'b1')
1051 |         d1 = self.eaf.get_annotation_data_for_tier('tier1')
1052 |         d2 = self.eaf.get_annotation_data_for_tier('tier2')
1053 |         self.eaf.shift_annotations(0)
1054 |         self.assertEqual(d1, self.eaf.get_annotation_data_for_tier('tier1'))
1055 |         self.assertEqual(d2, self.eaf.get_annotation_data_for_tier('tier2'))
1056 | 
1057 |         self.eaf.shift_annotations(100)
1058 |         self.assertEqual(self.eaf.get_annotation_data_for_tier('tier1'),
1059 |                          [(x+100, y+100, v) for x, y, v in d1])
1060 |         self.assertEqual(self.eaf.get_annotation_data_for_tier('tier2'),
1061 |                          [(x+100, y+100, v) for x, y, v in d2])
1062 |         self.assertEqual(self.eaf.shift_annotations(-200),
1063 |                          ([('tier2', 100, 250, 'b1')],
1064 |                           [('tier1', 100, 200, 'a1')]))
1065 | 
1066 |     def test_to_textgrid(self):
1067 |         self.eaf.remove_tier('default')
1068 |         tg = self.eaf.to_textgrid()
1069 |         self.assertEqual(list(tg.get_tier_name_num()), [])
1070 |         self.eaf.add_tier('t1')
1071 |         self.eaf.add_annotation('t1', 0, 100, 'a11')
1072 |         self.eaf.add_annotation('t1', 100, 200, 'a21')
1073 |         self.eaf.add_annotation('t1', 200, 300, 'a31')
1074 |         self.eaf.add_annotation('t1', 300, 400, 'a41')
1075 |         self.eaf.add_tier('t2')
1076 |         self.eaf.add_annotation('t2', 0, 100, 'a12')
1077 |         self.eaf.add_annotation('t2', 100, 200, 'a22')
1078 |         self.eaf.add_annotation('t2', 200, 300, 'a32')
1079 |         self.eaf.add_annotation('t2', 300, 400, 'a42')
1080 |         self.eaf.add_tier('t3')
1081 |         self.eaf.add_annotation('t3', 0, 100, 'a13')
1082 |         self.eaf.add_annotation('t3', 100, 200, 'a23')
1083 |         self.eaf.add_annotation('t3', 200, 300, 'a33')
1084 |         self.eaf.add_annotation('t3', 300, 400, 'a43')
1085 |         self.eaf.add_tier('t4')
1086 |         self.eaf.add_annotation('t4', 0, 100, 'a14')
1087 |         self.eaf.add_annotation('t4', 100, 200, 'a24')
1088 |         self.eaf.add_annotation('t4', 200, 300, 'a34')
1089 |         self.eaf.add_annotation('t4', 300, 400, 'a44')
1090 |         self.eaf.add_tier('t5')
1091 |         self.eaf.add_annotation('t5', 0, 100, 'a15')
1092 |         self.eaf.add_annotation('t5', 100, 200, 'a25')
1093 |         self.eaf.add_annotation('t5', 200, 300, 'a35')
1094 |         self.eaf.add_annotation('t5', 300, 400, 'a45')
1095 |         self.eaf.add_tier('t6')
1096 |         self.eaf.add_annotation('t6', 0, 100, 'a16')
1097 |         self.eaf.add_annotation('t6', 100, 200, 'a26')
1098 |         self.eaf.add_annotation('t6', 200, 300, 'a36')
1099 |         self.eaf.add_annotation('t6', 300, 400, 'a46')
1100 |         tg = self.eaf.to_textgrid()
1101 |         self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()),
1102 |                          ['t1', 't2', 't3', 't4', 't5', 't6'])
1103 |         tg = self.eaf.to_textgrid(filtin=['t1', 't2', 't3'])
1104 |         self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()),
1105 |                          ['t1', 't2', 't3'])
1106 |         tg = self.eaf.to_textgrid(filtex=['t1', 't2', 't3'])
1107 |         self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()),
1108 |                          ['t4', 't5', 't6'])
1109 |         tg = self.eaf.to_textgrid(filtin=['t[123]'], regex=True)
1110 |         self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()),
1111 |                          ['t1', 't2', 't3'])
1112 |         tg = self.eaf.to_textgrid(filtex=['t[123]'], regex=True)
1113 |         self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()),
1114 |                          ['t4', 't5', 't6'])
1115 |         self.eaf.add_tier('t7')
1116 |         tg = self.eaf.to_textgrid()
1117 |         self.assertEqual(sorted(a[1] for a in tg.get_tier_name_num()),
1118 |                          ['t1', 't2', 't3', 't4', 't5', 't6', 't7'])
1119 |         self.assertEqual(list(tg.get_tier('t1').get_intervals(sort=True)),
1120 |                          [(0.0, 0.1, 'a11'), (0.1, 0.2, 'a21'),
1121 |                           (0.2, 0.3, 'a31'), (0.3, 0.4, 'a41')])
1122 |         self.assertEqual(list(tg.get_tier('t7').get_intervals()), [])
1123 | 
1124 |     def test_add_nested_reference_annotations(self):
1125 |         self.eaf.add_linguistic_type('refT')
1126 |         self.eaf.add_linguistic_type('orthT', 'Symbolic_Association')
1127 |         self.eaf.add_linguistic_type('wordT', 'Symbolic_Subdivision')
1128 | 
1129 |         self.eaf.add_tier('ref', ling='refT')
1130 |         self.eaf.add_tier('orth', ling='orthT', parent='ref')
1131 |         self.eaf.add_tier('word', ling='wordT', parent='orth')
1132 | 
1133 |         self.eaf.add_annotation('ref', 0, 1, 'test.001')
1134 |         self.eaf.add_ref_annotation('orth', 'ref', 0, 'Words here.')
1135 |         self.eaf.add_ref_annotation('word', 'orth', 0, 'Words')
1136 | 
1137 |     def test_parse_eaf(self):
1138 |         pass
1139 | 
1140 |     def test_eaf_from_chat(self):
1141 |         pass
1142 | 
1143 | 
1144 | @pytest.mark.parametrize(
1145 |     'eaf,schema',
1146 |     [
1147 |         ('sample_2.8.eaf', 'EAFv2.8.xsd'),
1148 |         ('sample_2.7.eaf', 'EAFv2.8.xsd'),
1149 |         ('sample_3.0.eaf', 'EAFv3.0.xsd'),
1150 |     ]
1151 | )
1152 | def test_to_file_to_eaf(eaf, schema, test_dir, tmp_path):
1153 |     filepath = str(tmp_path / 'test.eaf')
1154 |     eaf = Eaf(str(test_dir / eaf))
1155 |     eaf.to_file(filepath)
1156 | 
1157 |     schema = etree.XMLSchema(etree.XML(test_dir.joinpath(schema).read_text(encoding='utf8')))
1158 |     xmlparser = etree.XMLParser(schema=schema)
1159 |     etree.parse(str(filepath), xmlparser)
1160 | 
1161 | 
1162 | def test_to_textgrid(test_dir):
1163 |     _ = Eaf(str(test_dir / 'sample_2.7.eaf')).to_textgrid()
1164 | 


--------------------------------------------------------------------------------
/test/test_praat.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | import unittest
  3 | import pytest
  4 | 
  5 | from pympi.Praat import TextGrid
  6 | 
  7 | 
  8 | class PraatTest(unittest.TestCase):
  9 |     def setUp(self):
 10 |         self.tg = TextGrid(xmax=20)
 11 |         self.maxdiff = None
 12 | 
 13 | # Test all the Praat.TextGrid functions
 14 |     def test_sort_tiers(self):
 15 |         self.tg.add_tier('t2')
 16 |         self.tg.add_tier('t1')
 17 |         self.tg.add_tier('t3')
 18 |         self.tg.add_tier('t6')
 19 |         self.tg.add_tier('t4')
 20 |         self.tg.add_tier('t5')
 21 | 
 22 |         tiernames = ['t1', 't2', 't3', 't4', 't5', 't6']
 23 |         self.tg.sort_tiers()
 24 |         self.assertEqual([a[1] for a in self.tg.get_tier_name_num()],
 25 |                          tiernames)
 26 |         self.tg.sort_tiers(lambda x: list(reversed(tiernames)).index(x.name))
 27 |         self.assertEqual([a[1] for a in self.tg.get_tier_name_num()],
 28 |                          list(reversed(tiernames)))
 29 | 
 30 |     def test_add_tier(self):
 31 |         self.assertRaises(ValueError, self.tg.add_tier, 'a', number=-1)
 32 |         self.assertRaises(ValueError, self.tg.add_tier, 'a', number=10)
 33 | 
 34 |         self.tg.add_tier('tier1')
 35 |         self.assertEqual(len(self.tg.tiers), 1)
 36 |         self.assertEqual(self.tg.tiers[0].tier_type, 'IntervalTier')
 37 | 
 38 |         self.tg.add_tier('tier2', tier_type='TextTier')
 39 |         self.assertEqual(len(self.tg.tiers), 2)
 40 |         self.assertEqual(self.tg.tiers[1].tier_type, 'TextTier')
 41 | 
 42 |         self.tg.add_tier('tier3')
 43 |         self.assertEqual(len(self.tg.tiers), 3)
 44 | 
 45 |         self.assertEqual(['tier1', 'tier2', 'tier3'],
 46 |                          [a.name for a in self.tg.tiers])
 47 | 
 48 |         self.tg.add_tier('tier4', number=2)
 49 |         self.assertEqual(len(self.tg.tiers), 4)
 50 |         self.assertEqual(4, len(self.tg.tiers))
 51 | 
 52 |     def test_remove_tier(self):
 53 |         self.assertRaises(Exception, self.tg.remove_tier, -1)
 54 |         self.assertRaises(Exception, self.tg.remove_tier, 10)
 55 | 
 56 |         self.tg.add_tier('tier1')
 57 |         self.tg.add_tier('tier2')
 58 |         self.tg.add_tier('tier3')
 59 |         self.tg.add_tier('tier4', number=2)
 60 | 
 61 |         self.tg.remove_tier(3)
 62 |         self.assertEqual(len(self.tg.tiers), 3)
 63 |         self.assertEqual(['tier1', 'tier3', 'tier4'],
 64 |                          sorted(a.name for a in self.tg.tiers))
 65 | 
 66 |         self.tg.remove_tier('tier1')
 67 |         self.assertEqual(len(self.tg.tiers), 2)
 68 |         self.assertEqual(['tier3', 'tier4'],
 69 |                          sorted(a.name for a in self.tg.tiers))
 70 | 
 71 |         self.tg.remove_tier(2)
 72 |         self.assertEqual(len(self.tg.tiers), 1)
 73 |         self.assertEqual(['tier4'], [a.name for a in self.tg.tiers])
 74 | 
 75 |         self.tg.remove_tier('tier4')
 76 |         self.assertTrue(not self.tg.tiers)
 77 | 
 78 |     def test_get_tier(self):
 79 |         self.assertRaises(Exception, self.tg.get_tier, -1)
 80 |         self.assertRaises(Exception, self.tg.get_tier, 'a')
 81 |         self.assertRaises(Exception, self.tg.get_tier, 10)
 82 | 
 83 |         tier1 = self.tg.add_tier('tier1')
 84 |         tier2 = self.tg.add_tier('tier2')
 85 |         tier3 = self.tg.add_tier('tier3')
 86 | 
 87 |         self.assertEqual(tier1, self.tg.get_tier(tier1.name))
 88 |         self.assertEqual(tier3, self.tg.get_tier(tier3.name))
 89 | 
 90 |         self.assertEqual(self.tg.tiers[1], self.tg.get_tier(tier2.name))
 91 | 
 92 |     def test_change_tier_name(self):
 93 |         self.assertRaises(Exception,
 94 |                           self.tg.change_tier_name, -1, 'b')
 95 |         self.assertRaises(Exception,
 96 |                           self.tg.change_tier_name, 'a', 'b')
 97 |         self.assertRaises(Exception,
 98 |                           self.tg.change_tier_name, 10, 'b')
 99 |         self.tg.add_tier('tier1')
100 |         tier2 = self.tg.add_tier('tier2')
101 |         self.tg.add_tier('tier3')
102 | 
103 |         self.tg.change_tier_name('tier1', 'tier1a')
104 |         self.assertEqual(['tier1a', 'tier2', 'tier3'],
105 |                          [a.name for a in self.tg.tiers])
106 |         self.tg.change_tier_name(self.tg.tiers.index(tier2)+1, 'tier2a')
107 |         self.assertEqual(['tier1a', 'tier2a', 'tier3'],
108 |                          [a.name for a in self.tg.tiers])
109 |         self.tg.change_tier_name('tier1a', 'tier1')
110 |         self.assertEqual(['tier1', 'tier2a', 'tier3'],
111 |                          [a.name for a in self.tg.tiers])
112 | 
113 |     def test_get_tiers(self):
114 |         self.tg.add_tier('tier1')
115 |         self.tg.add_tier('tier2')
116 |         self.tg.add_tier('tier3')
117 |         self.assertEqual(self.tg.tiers,
118 |                          list(self.tg.get_tiers()))
119 | 
120 |     def test_get_tier_name_num(self):
121 |         self.tg.add_tier('tier1')
122 |         self.tg.add_tier('tier2')
123 |         self.tg.add_tier('tier3', number=2)
124 |         self.assertEqual([(1, 'tier1'), (2, 'tier3'), (3, 'tier2')],
125 |                          list(self.tg.get_tier_name_num()))
126 | 
127 |     def test_to_eaf(self):
128 |         tier1 = self.tg.add_tier('tier1')
129 |         tier2 = self.tg.add_tier('tier2', tier_type='TextTier')
130 |         tier1.add_interval(0, 1, 'int1')
131 |         tier1.add_interval(2, 3, 'int2')
132 |         tier1.add_interval(5, 6, 'int3')
133 |         tier2.add_point(1.5, 'point1')
134 |         tier2.add_point(2.5, 'point2')
135 |         tier2.add_point(3.5, 'point3')
136 |         eaf = self.tg.to_eaf(True, 0.03)
137 |         self.assertRaises(ValueError, self.tg.to_eaf, pointlength=-1)
138 |         self.assertEqual(sorted(eaf.get_tier_names()),
139 |                          sorted(['default', 'tier1', 'tier2']))
140 |         self.assertEqual(sorted(eaf.get_annotation_data_for_tier('tier1')),
141 |                          sorted([(0, 1000, 'int1'), (5000, 6000, 'int3'),
142 |                                  (2000, 3000, 'int2')]))
143 |         self.assertEqual(sorted(eaf.get_annotation_data_for_tier('tier2')),
144 |                          sorted([(2500, 2530, 'point2'),
145 |                                  (1500, 1530, 'point1'),
146 |                                  (3500, 3530, 'point3')]))
147 | 
148 | # Test all the Praat.Tier functions
149 |     def setup_tier(self):
150 |         self.tier1 = self.tg.add_tier('tier1')
151 |         self.tier2 = self.tg.add_tier('tier2', tier_type='TextTier')
152 | 
153 |     def test_add_point(self):
154 |         self.setup_tier()
155 |         self.assertRaises(Exception, self.tier1.add_point, 5, 'a')
156 |         self.tier2.add_point(5, 't')
157 |         self.assertEqual([(5, 't')], self.tier2.intervals)
158 |         self.assertRaises(Exception, self.tier2.add_point, 5, 'a')
159 |         self.tier2.add_point(6, 'a')
160 |         self.assertEqual([(5, 't'), (6, 'a')], self.tier2.intervals)
161 |         self.tier2.add_point(5, 'a', False)
162 | 
163 |     def test_add_interval(self):
164 |         self.setup_tier()
165 |         self.assertRaises(Exception,
166 |                           self.tier2.add_interval, 5, 6, 'a')
167 |         self.assertRaises(Exception, self.tier2.add_interval, 6, 5, 'a')
168 | 
169 |         self.tier1.add_interval(5, 6, 't')
170 |         self.assertEqual([(5, 6, 't')], self.tier1.intervals)
171 |         self.assertRaises(Exception, self.tier1.add_interval, 5.5, 6.5, 't')
172 |         self.tier1.add_interval(6, 7, 'a')
173 |         self.assertEqual([(5, 6, 't'), (6, 7, 'a')], self.tier1.intervals)
174 | 
175 |         self.tier1.add_interval(5.5, 6.5, 't', False)
176 | 
177 |     def test_remove_interval(self):
178 |         self.setup_tier()
179 |         self.assertRaises(Exception, self.tier2.remove_interval, 5)
180 |         self.tier1.add_interval(5, 6, 'a')
181 |         self.tier1.add_interval(6, 7, 'b')
182 |         self.tier1.add_interval(7, 8, 'c')
183 |         self.tier1.remove_interval(5.5)
184 |         self.assertEqual([(6, 7, 'b'), (7, 8, 'c')],
185 |                          self.tier1.intervals)
186 |         self.tier1.remove_interval(8)
187 |         self.assertEqual([(6, 7, 'b')],
188 |                          self.tier1.intervals)
189 |         self.tier1.remove_interval(8)
190 |         self.assertEqual([(6, 7, 'b')],
191 |                          self.tier1.intervals)
192 | 
193 |     def test_remove_point(self):
194 |         self.setup_tier()
195 |         self.assertRaises(Exception, self.tier1.remove_point, 5)
196 |         self.tier2.add_point(5, 'a')
197 |         self.tier2.add_point(6, 'b')
198 |         self.tier2.add_point(7, 'c')
199 |         self.tier2.remove_point(5)
200 |         self.assertEqual([(6, 'b'), (7, 'c')],
201 |                          self.tier2.intervals)
202 |         self.tier2.remove_point(7)
203 |         self.assertEqual([(6, 'b')],
204 |                          self.tier2.intervals)
205 |         self.tier2.remove_point(7)
206 |         self.assertEqual([(6, 'b')],
207 |                          self.tier2.intervals)
208 | 
209 |     def test_get_intervals(self):
210 |         self.setup_tier()
211 |         self.tier1.add_interval(5, 6, 'a')
212 |         self.tier1.add_interval(7, 8, 'c')
213 |         self.tier1.add_interval(6, 7, 'b')
214 |         self.assertEqual([(5, 6, 'a'), (6, 7, 'b'), (7, 8, 'c')],
215 |                          sorted(self.tier1.get_intervals()))
216 |         self.tier2.add_point(5, 'a')
217 |         self.tier2.add_point(7, 'c')
218 |         self.tier2.add_point(6, 'b')
219 |         self.assertEqual([(5, 'a'), (6, 'b'), (7, 'c')],
220 |                          sorted(self.tier2.get_intervals()))
221 | 
222 |     def test_clear_intervals(self):
223 |         self.setup_tier()
224 |         self.tier1.add_interval(5, 6, 'a')
225 |         self.tier1.add_interval(6, 7, 'b')
226 |         self.tier1.add_interval(7, 8, 'c')
227 |         self.tier1.clear_intervals()
228 |         self.assertEqual([], self.tier1.intervals)
229 | 
230 |         self.tier2.add_point(5, 'a')
231 |         self.tier2.add_point(6, 'b')
232 |         self.tier2.add_point(7, 'c')
233 |         self.tier2.clear_intervals()
234 |         self.assertEqual([], self.tier2.intervals)
235 | 
236 | 
237 | @pytest.mark.parametrize('codec', ['utf-8', 'latin_1', 'mac_roman'])
238 | def test_to_file(codec, tmp_path):
239 |     tg = TextGrid(xmax=20)
240 |     tier1 = tg.add_tier('tier')
241 |     tier1.add_interval(1, 2, 'i1')
242 |     tier1.add_interval(2, 3, 'i2')
243 |     tier1.add_interval(4, 5, 'i3')
244 | 
245 |     tier4 = tg.add_tier('tier')
246 |     tier4.add_interval(1, 2, u'i1ü')
247 |     tier4.add_interval(2.0, 3, 'i2')
248 |     tier4.add_interval(4, 5.0, 'i3')
249 | 
250 |     tier2 = tg.add_tier('tier2', tier_type='TextTier')
251 |     tier2.add_point(1, u'p1ü')
252 |     tier2.add_point(2, 'p1')
253 |     tier2.add_point(3, 'p1')
254 | 
255 |     tempf = str(tmp_path / 'test')
256 | 
257 |     # Normal mode
258 |     tg.to_file(pathlib.Path(tempf), codec=codec)
259 |     TextGrid(tempf, codec=codec)
260 |     # Short mode
261 |     tg.to_file(tempf, codec=codec, mode='s')
262 |     TextGrid(tempf, codec=codec)
263 |     # Binary mode
264 |     tg.to_file(tempf, mode='b')
265 |     TextGrid(tempf)
266 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py{35,36,37,38}
3 | skip_missing_interpreters = true
4 | 
5 | [testenv]
6 | extras = test
7 | commands = pytest {posargs}
8 | 


--------------------------------------------------------------------------------