├── example
    ├── example.TextGrid
    └── example2.TextGrid
├── setup.py
├── LICENSE
├── .gitignore
├── README.md
├── textgrid.py
└── test
    └── test_textgrid.py


/example/example.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 4387.9766666666665 
 6 | tiers? <exists> 
 7 | size = 1
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "Mary" 
12 |         xmin = 0 
13 |         xmax = 4387.9766666666665 
14 |         intervals: size = 1
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 5.537098932314087 
18 |             text = "z"
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(name='textgrid',
 4 |       version='0.2',
 5 |       description='tools for reading and converting textgrid files',
 6 |       url='http://github.com/kylerbrown/textgrid',
 7 |       author='Kyler Brown',
 8 |       author_email='kylerjbrown@gmail.com',
 9 |       license='MIT',
10 |       py_modules=["textgrid"],
11 |       entry_points= {
12 |           'console_scripts' : [
13 |               'textgrid2csv = textgrid:textgrid2csv',
14 |               ]
15 |           },
16 |       zip_safe=False)
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Kyler Brown
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/emacs,python
  3 | 
  4 | ### Emacs ###
  5 | # -*- mode: gitignore; -*-
  6 | *~
  7 | \#*\#
  8 | /.emacs.desktop
  9 | /.emacs.desktop.lock
 10 | *.elc
 11 | auto-save-list
 12 | tramp
 13 | .\#*
 14 | 
 15 | # Org-mode
 16 | .org-id-locations
 17 | *_archive
 18 | 
 19 | # flymake-mode
 20 | *_flymake.*
 21 | 
 22 | # eshell files
 23 | /eshell/history
 24 | /eshell/lastdir
 25 | 
 26 | # elpa packages
 27 | /elpa/
 28 | 
 29 | # reftex files
 30 | *.rel
 31 | 
 32 | # AUCTeX auto folder
 33 | /auto/
 34 | 
 35 | # cask packages
 36 | .cask/
 37 | 
 38 | 
 39 | ### Python ###
 40 | # Byte-compiled / optimized / DLL files
 41 | __pycache__/
 42 | *.py[cod]
 43 | *$py.class
 44 | 
 45 | # C extensions
 46 | *.so
 47 | 
 48 | # Distribution / packaging
 49 | .Python
 50 | env/
 51 | build/
 52 | develop-eggs/
 53 | dist/
 54 | downloads/
 55 | eggs/
 56 | .eggs/
 57 | lib/
 58 | lib64/
 59 | parts/
 60 | sdist/
 61 | var/
 62 | *.egg-info/
 63 | .installed.cfg
 64 | *.egg
 65 | 
 66 | # PyInstaller
 67 | #  Usually these files are written by a python script from a template
 68 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 69 | *.manifest
 70 | *.spec
 71 | 
 72 | # Installer logs
 73 | pip-log.txt
 74 | pip-delete-this-directory.txt
 75 | 
 76 | # Unit test / coverage reports
 77 | htmlcov/
 78 | .tox/
 79 | .coverage
 80 | .coverage.*
 81 | .cache
 82 | nosetests.xml
 83 | coverage.xml
 84 | *,cover
 85 | .hypothesis/
 86 | 
 87 | # Translations
 88 | *.mo
 89 | *.pot
 90 | 
 91 | # Django stuff:
 92 | *.log
 93 | 
 94 | # Sphinx documentation
 95 | docs/_build/
 96 | 
 97 | # PyBuilder
 98 | target/
 99 | 
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # textgrid
 2 | 
 3 | Version 0.2 
 4 | 
 5 | A minimal python TextGrid module and CSV converter.
 6 | 
 7 | Tier labels are saved as a column called "tier". Points are treated as intervals with identical start and stop values.
 8 | 
 9 | Example input:
10 | 
11 | ```
12 | File type = "ooTextFile"
13 | Object class = "TextGrid"
14 | 
15 | xmin = 0 
16 | xmax = 4387.9766666666665 
17 | tiers? <exists> 
18 | size = 1
19 | item []: 
20 |     item [1]:
21 |         class = "IntervalTier" 
22 |         name = "Mary" 
23 |         xmin = 0 
24 |         xmax = 4387.9766666666665 
25 |         intervals: size = 1
26 |         intervals [1]:
27 |             xmin = 0 
28 |             xmax = 5.537098932314087 
29 |             text = "z"
30 | ```
31 | 
32 | Example ouput using `textgrid2csv example.TextGrid`
33 | 
34 | ```
35 | start,stop,name,tier
36 | 0.0,5.537098932314087,z,Mary
37 | ```
38 | 
39 | ## installation
40 | 
41 |     git clone https://github.com/kylerbrown/textgrid.git
42 |     cd textgrid
43 |     pip install .
44 | 
45 |     # optional testing (requires pytest)
46 |     pytest -v
47 | 
48 | ## usage
49 | 
50 |     usage: textgrid2csv [-h] [-o OUTPUT] [--sep SEP] [--noheader] [--savegaps]
51 |                         TextGrid
52 | 
53 |     convert a TextGrid file to a CSV.
54 | 
55 |     positional arguments:
56 |       TextGrid              a TextGrid file to process
57 | 
58 |     optional arguments:
59 |       -h, --help            show this help message and exit
60 |       -o OUTPUT, --output OUTPUT
61 |                             (optional) outputfile
62 |       --sep SEP             separator to use in CSV output
63 |       --noheader            no header for the CSV
64 |       --savegaps            preserves intervals with no label
65 | 
66 | ## programmatic usage with Python and Pandas:
67 | ```
68 | >>> import pandas as pd
69 | >>> import textgrid
70 | >>> tgrid = textgrid.read_textgrid("example.TextGrid")
71 | >>> tgrid
72 | [Entry(start=0.0, stop=5.537098932314087, name='z', tier='Mary')]
73 | >>> pd.DataFrame(tgrid)
74 |    start      stop name    tier
75 | 0      0  5.537099     z    Mary
76 | >>> 
77 | ```
78 | 


--------------------------------------------------------------------------------
/example/example2.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 4387.9766666666665 
 6 | tiers? <exists> 
 7 | size = 3 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "Mary" 
12 |         xmin = 0 
13 |         xmax = 4387.9766666666665 
14 |         intervals: size = 4 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 5.537098932314087 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 5.537098932314087 
21 |             xmax = 18.917761588532382 
22 |             text = "bar" 
23 |         intervals [3]:
24 |             xmin = 18.917761588532382 
25 |             xmax = 23.177071623244515 
26 |             text = "" 
27 |         intervals [4]:
28 |             xmin = 23.177071623244515 
29 |             xmax = 4387.9766666666665 
30 |             text = "" 
31 |     item [2]:
32 |         class = "IntervalTier" 
33 |         name = "John" 
34 |         xmin = 0 
35 |         xmax = 4387.9766666666665 
36 |         intervals: size = 5 
37 |         intervals [1]:
38 |             xmin = 0 
39 |             xmax = 1.4385175781571313 
40 |             text = "" 
41 |         intervals [2]:
42 |             xmin = 1.4385175781571313 
43 |             xmax = 3.9699942969011333 
44 |             text = "pip" 
45 |         intervals [3]:
46 |             xmin = 3.9699942969011333 
47 |             xmax = 5.537098932314087 
48 |             text = "" 
49 |         intervals [4]:
50 |             xmin = 5.537098932314087 
51 |             xmax = 8.711490373278787 
52 |             text = "foo" 
53 |         intervals [5]:
54 |             xmin = 8.711490373278787 
55 |             xmax = 4387.9766666666665 
56 |             text = "" 
57 |     item [3]:
58 |         class = "TextTier" 
59 |         name = "bell" 
60 |         xmin = 0 
61 |         xmax = 4387.9766666666665 
62 |         points: size = 2 
63 |         points [1]:
64 |             number = 15.140637595485778 
65 |             mark = "a" 
66 |         points [2]:
67 |             number = 21.248327456582416 
68 |             mark = "cool" 
69 | 


--------------------------------------------------------------------------------
/textgrid.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from collections import namedtuple
  4 | 
  5 | Entry = namedtuple("Entry", ["start",
  6 |                              "stop",
  7 |                              "name",
  8 |                              "tier"])
  9 | 
 10 | def read_textgrid(filename, fileEncoding="utf-8"):
 11 |     """
 12 |     Reads a TextGrid file into a dictionary object
 13 |     each dictionary has the following keys:
 14 |     "start"
 15 |     "stop"
 16 |     "name"
 17 |     "tier"
 18 | 
 19 |     Points and intervals use the same format, 
 20 |     but the value for "start" and "stop" are the same
 21 | 
 22 |     Optionally, supply fileEncoding as argument. This defaults to "utf-8", tested with 'utf-16-be'.
 23 |     """
 24 |     if isinstance(filename, str):
 25 |         with open(filename, "r", encoding=fileEncoding) as f:
 26 |             content = _read(f)
 27 |     elif hasattr(filename, "readlines"):
 28 |         content = _read(filename)
 29 |     else:
 30 |         raise TypeError("filename must be a string or a readable buffer")
 31 | 
 32 |     interval_lines = [i for i, line in enumerate(content)
 33 |                       if line.startswith("intervals [")
 34 |                       or line.startswith("points [")]
 35 | #    tier_lines, tiers =  [(i, line.split('"')[-2]) 
 36 | #            for i, line in enumerate(content)
 37 | #            if line.startswith("name =")]
 38 |     tier_lines = []
 39 |     tiers = []
 40 |     for i, line in enumerate(content):
 41 |         if line.startswith("name ="):
 42 |             tier_lines.append(i)
 43 |             tiers.append(line.split('"')[-2]) 
 44 | 
 45 |     interval_tiers =  _find_tiers(interval_lines, tier_lines, tiers)
 46 |     assert len(interval_lines) == len(interval_tiers)
 47 |     return [_build_entry(i, content, t) for i, t in zip(interval_lines, interval_tiers)]
 48 | 
 49 | 
 50 | def _find_tiers(interval_lines, tier_lines, tiers):
 51 |     tier_pairs = zip(tier_lines, tiers)
 52 |     cur_tline, cur_tier = next(tier_pairs) 
 53 |     next_tline, next_tier = next(tier_pairs, (None, None))
 54 |     tiers = []
 55 |     for il in interval_lines:
 56 |         if next_tline is not None and il > next_tline:
 57 |             cur_tline, cur_tier = next_tline, next_tier
 58 |             next_tline, next_tier = next(tier_pairs, (None, None))           
 59 |         tiers.append(cur_tier)
 60 |     return tiers 
 61 | 
 62 | 
 63 | def _read(f):
 64 |     return [x.strip() for x in f.readlines()]
 65 | 
 66 | def write_csv(textgrid_list, filename=None, sep=",", header=True, save_gaps=False, meta=True):
 67 |     """
 68 |     Writes a list of textgrid dictionaries to a csv file.
 69 |     If no filename is specified, csv is printed to standard out.
 70 |     """
 71 |     columns = list(Entry._fields)
 72 |     if filename:
 73 |         f = open(filename, "w")
 74 |     if header:
 75 |         hline = sep.join(columns)
 76 |         if filename:
 77 |             f.write(hline + "\n")
 78 |         else:
 79 |             print(hline)
 80 |     for entry in textgrid_list:
 81 |         if entry.name or save_gaps:  # skip unlabeled intervals
 82 |             row = sep.join(str(x) for x in list(entry))
 83 |             if filename:
 84 |                 f.write(row + "\n")
 85 |             else:
 86 |                 print(row)
 87 |     if filename:
 88 |         f.flush()
 89 |         f.close()
 90 |     if meta:
 91 |         with open(filename + ".meta", "w") as metaf:
 92 |             metaf.write("""---\nunits: s\ndatatype: 1002\n""")
 93 |         
 94 | def _build_entry(i, content, tier):
 95 |     """
 96 |     takes the ith line that begin an interval and returns
 97 |     a dictionary of values
 98 |     """
 99 |     start = _get_float_val(content[i + 1])  # addition is cheap typechecking
100 |     if content[i].startswith("intervals ["):
101 |         offset = 1
102 |     else:
103 |         offset = 0 # for "point" objects
104 |     stop = _get_float_val(content[i + 1 + offset])
105 |     label = _get_str_val(content[i + 2 + offset])
106 |     return Entry(start=start, stop=stop, name=label, tier=tier)
107 | 
108 | 
109 | def _get_float_val(string):
110 |     """
111 |     returns the last word in a string as a float
112 |     """
113 |     return float(string.split()[-1])
114 | 
115 | 
116 | def _get_str_val(string):
117 |     """
118 |     returns the last item in quotes from a string
119 |     """
120 |     return string.split('"')[-2]
121 | 
122 | 
123 | def textgrid2csv():
124 |     import argparse
125 |     parser = argparse.ArgumentParser(description="convert a TextGrid file to a CSV.")
126 |     parser.add_argument("TextGrid",
127 |                         help="a TextGrid file to process")
128 |     parser.add_argument("-o", "--output", help="(optional) outputfile")
129 |     parser.add_argument("--sep", help="separator to use in CSV output",
130 |                         default=",")
131 |     parser.add_argument("--noheader", help="no header for the CSV",
132 |                         action="store_false")
133 |     parser.add_argument("--savegaps", help="preserves intervals with no label",
134 |             action="store_true")
135 |     args = parser.parse_args()
136 |     tgrid = read_textgrid(args.TextGrid)
137 |     write_csv(tgrid, args.output, args.sep, args.noheader, args.savegaps)
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     textgrid2csv()
142 | 
143 | 


--------------------------------------------------------------------------------
/test/test_textgrid.py:
--------------------------------------------------------------------------------
  1 | # py.test unit tests
  2 | 
  3 | from io import StringIO
  4 | import textgrid
  5 | 
  6 | example_file1 = StringIO("""File type = "ooTextFile"
  7 | Object class = "TextGrid"
  8 | 
  9 | xmin = 0 
 10 | xmax = 4387.9766666666665 
 11 | tiers? <exists> 
 12 | size = 3 
 13 | item []: 
 14 |     item [1]:
 15 |         class = "IntervalTier" 
 16 |         name = "Mary" 
 17 |         xmin = 0 
 18 |         xmax = 4387.9766666666665 
 19 |         intervals: size = 4 
 20 |         intervals [1]:
 21 |             xmin = 0 
 22 |             xmax = 5.537098932314087 
 23 |             text = "" 
 24 |         intervals [2]:
 25 |             xmin = 5.537098932314087 
 26 |             xmax = 18.917761588532382 
 27 |             text = "bar" 
 28 |         intervals [3]:
 29 |             xmin = 18.917761588532382 
 30 |             xmax = 23.177071623244515 
 31 |             text = "" 
 32 |         intervals [4]:
 33 |             xmin = 23.177071623244515 
 34 |             xmax = 4387.9766666666665 
 35 |             text = "" 
 36 |     item [2]:
 37 |         class = "IntervalTier" 
 38 |         name = "John" 
 39 |         xmin = 0 
 40 |         xmax = 4387.9766666666665 
 41 |         intervals: size = 5 
 42 |         intervals [1]:
 43 |             xmin = 0 
 44 |             xmax = 1.4385175781571313 
 45 |             text = "" 
 46 |         intervals [2]:
 47 |             xmin = 1.4385175781571313 
 48 |             xmax = 3.9699942969011333 
 49 |             text = "pip" 
 50 |         intervals [3]:
 51 |             xmin = 3.9699942969011333 
 52 |             xmax = 5.537098932314087 
 53 |             text = "" 
 54 |         intervals [4]:
 55 |             xmin = 5.537098932314087 
 56 |             xmax = 8.711490373278787 
 57 |             text = "foo" 
 58 |         intervals [5]:
 59 |             xmin = 8.711490373278787 
 60 |             xmax = 4387.9766666666665 
 61 |             text = "" 
 62 |     item [3]:
 63 |         class = "TextTier" 
 64 |         name = "bell" 
 65 |         xmin = 0 
 66 |         xmax = 4387.9766666666665 
 67 |         points: size = 2 
 68 |         points [1]:
 69 |             number = 15.140637595485778 
 70 |             mark = "a" 
 71 |         points [2]:
 72 |             number = 21.248327456582416 
 73 |             mark = "cool" 
 74 | """)
 75 | 
 76 | example_file2 = StringIO("""File type = "ooTextFile"
 77 | Object class = "TextGrid"
 78 | 
 79 | xmin = 0 
 80 | xmax = 4387.9766666666665 
 81 | tiers? <exists> 
 82 | size = 1
 83 | item []: 
 84 |     item [1]:
 85 |         class = "IntervalTier" 
 86 |         name = "Mary" 
 87 |         xmin = 0 
 88 |         xmax = 4387.9766666666665 
 89 |         intervals: size = 1
 90 |         intervals [1]:
 91 |             xmin = 0 
 92 |             xmax = 5.537098932314087 
 93 |             text = "z" 
 94 | """)
 95 | example_file3 = StringIO("""
 96 |     item [3]:
 97 |         class = "TextTier" 
 98 |         name = "bell" 
 99 |         xmin = 0 
100 |         xmax = 4387.9766666666665 
101 |         points: size = 2 
102 |         points [1]:
103 |             number = 15.140637595485778 
104 |             mark = "a" 
105 |         points [2]:
106 |             number = 21.248327456582416 
107 |             mark = "cool" 
108 |         """)
109 | def test_get_float_val():
110 |     assert textgrid._get_float_val("foo 123") == 123
111 |     assert textgrid._get_float_val("number = 21.248327456582416") == 21.248327456582416
112 |     assert textgrid._get_float_val("xmax = 4387.9766666666665") == 4387.9766666666665
113 |     assert textgrid._get_float_val("xmin = 5.537098932314087 \n") == 5.537098932314087 
114 | 
115 | 
116 | def test_get_str_val():
117 |     assert textgrid._get_str_val('mark = "cool" ') == "cool"
118 |     assert textgrid._get_str_val('text = "pip"') == "pip"
119 | 
120 | 
121 | def test_build_entry_point():
122 |     content = ["points [1]:",
123 |                "number = 15.140637595485778 ",
124 |                'mark = "a"']
125 |     entry = textgrid._build_entry(0, content, tier='testtier')
126 |     assert entry.start == 15.140637595485778
127 |     assert entry.stop == 15.140637595485778
128 |     assert entry.name == "a"
129 |     assert entry.tier == "testtier"
130 | 
131 | def test_build_entry_interval():
132 |     content = ["intervals [4]:",
133 |                 "xmin = 5.537098932314087",
134 |                 "xmax = 8.711490373278787",
135 |                 'text = "foo"']
136 |     entry = textgrid._build_entry(0, content, tier='testtier')
137 |     assert entry.start == 5.537098932314087
138 |     assert entry.stop == 8.711490373278787
139 |     assert entry.name == "foo"
140 | 
141 | 
142 | def test_points():
143 |     tgrid = textgrid.read_textgrid(example_file3)
144 |     assert len(tgrid) == 2
145 | 
146 | def test_read_short():
147 |     tgrid = textgrid.read_textgrid(example_file2)
148 |     assert len(tgrid) == 1
149 |     entry = tgrid[0]
150 |     assert entry.start == 0
151 |     assert entry.stop == 5.537098932314087
152 |     assert entry.name == "z"
153 |     assert entry.tier == "Mary"
154 | 
155 | def test_read_long():
156 |     tgrid = textgrid.read_textgrid(example_file1)
157 |     assert len(tgrid) == 11
158 |     entry = tgrid[0]
159 |     assert entry.start == 0
160 |     assert entry.stop == 5.537098932314087 
161 |     assert entry.name == ""
162 |     entry2 = tgrid[-1]
163 |     assert entry2.name == "cool"
164 |     assert entry2.tier == "bell"
165 | 


--------------------------------------------------------------------------------