├── example ├── example.TextGrid └── example2.TextGrid ├── setup.py ├── LICENSE ├── .gitignore ├── README.md ├── textgrid.py └── test └── test_textgrid.py /example/example.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 4387.9766666666665 6 | tiers? 7 | size = 1 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "Mary" 12 | xmin = 0 13 | xmax = 4387.9766666666665 14 | intervals: size = 1 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 5.537098932314087 18 | text = "z" 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='textgrid', 4 | version='0.2', 5 | description='tools for reading and converting textgrid files', 6 | url='http://github.com/kylerbrown/textgrid', 7 | author='Kyler Brown', 8 | author_email='kylerjbrown@gmail.com', 9 | license='MIT', 10 | py_modules=["textgrid"], 11 | entry_points= { 12 | 'console_scripts' : [ 13 | 'textgrid2csv = textgrid:textgrid2csv', 14 | ] 15 | }, 16 | zip_safe=False) 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Kyler Brown 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/emacs,python 3 | 4 | ### Emacs ### 5 | # -*- mode: gitignore; -*- 6 | *~ 7 | \#*\# 8 | /.emacs.desktop 9 | /.emacs.desktop.lock 10 | *.elc 11 | auto-save-list 12 | tramp 13 | .\#* 14 | 15 | # Org-mode 16 | .org-id-locations 17 | *_archive 18 | 19 | # flymake-mode 20 | *_flymake.* 21 | 22 | # eshell files 23 | /eshell/history 24 | /eshell/lastdir 25 | 26 | # elpa packages 27 | /elpa/ 28 | 29 | # reftex files 30 | *.rel 31 | 32 | # AUCTeX auto folder 33 | /auto/ 34 | 35 | # cask packages 36 | .cask/ 37 | 38 | 39 | ### Python ### 40 | # Byte-compiled / optimized / DLL files 41 | __pycache__/ 42 | *.py[cod] 43 | *$py.class 44 | 45 | # C extensions 46 | *.so 47 | 48 | # Distribution / packaging 49 | .Python 50 | env/ 51 | build/ 52 | develop-eggs/ 53 | dist/ 54 | downloads/ 55 | eggs/ 56 | .eggs/ 57 | lib/ 58 | lib64/ 59 | parts/ 60 | sdist/ 61 | var/ 62 | *.egg-info/ 63 | .installed.cfg 64 | *.egg 65 | 66 | # PyInstaller 67 | # Usually these files are written by a python script from a template 68 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 69 | *.manifest 70 | *.spec 71 | 72 | # Installer logs 73 | pip-log.txt 74 | pip-delete-this-directory.txt 75 | 76 | # Unit test / coverage reports 77 | htmlcov/ 78 | .tox/ 79 | .coverage 80 | .coverage.* 81 | .cache 82 | nosetests.xml 83 | coverage.xml 84 | *,cover 85 | .hypothesis/ 86 | 87 | # Translations 88 | *.mo 89 | *.pot 90 | 91 | # Django stuff: 92 | *.log 93 | 94 | # Sphinx documentation 95 | docs/_build/ 96 | 97 | # PyBuilder 98 | target/ 99 | 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # textgrid 2 | 3 | Version 0.2 4 | 5 | A minimal python TextGrid module and CSV converter. 6 | 7 | Tier labels are saved as a column called "tier". Points are treated as intervals with identical start and stop values. 8 | 9 | Example input: 10 | 11 | ``` 12 | File type = "ooTextFile" 13 | Object class = "TextGrid" 14 | 15 | xmin = 0 16 | xmax = 4387.9766666666665 17 | tiers? 18 | size = 1 19 | item []: 20 | item [1]: 21 | class = "IntervalTier" 22 | name = "Mary" 23 | xmin = 0 24 | xmax = 4387.9766666666665 25 | intervals: size = 1 26 | intervals [1]: 27 | xmin = 0 28 | xmax = 5.537098932314087 29 | text = "z" 30 | ``` 31 | 32 | Example ouput using `textgrid2csv example.TextGrid` 33 | 34 | ``` 35 | start,stop,name,tier 36 | 0.0,5.537098932314087,z,Mary 37 | ``` 38 | 39 | ## installation 40 | 41 | git clone https://github.com/kylerbrown/textgrid.git 42 | cd textgrid 43 | pip install . 44 | 45 | # optional testing (requires pytest) 46 | pytest -v 47 | 48 | ## usage 49 | 50 | usage: textgrid2csv [-h] [-o OUTPUT] [--sep SEP] [--noheader] [--savegaps] 51 | TextGrid 52 | 53 | convert a TextGrid file to a CSV. 54 | 55 | positional arguments: 56 | TextGrid a TextGrid file to process 57 | 58 | optional arguments: 59 | -h, --help show this help message and exit 60 | -o OUTPUT, --output OUTPUT 61 | (optional) outputfile 62 | --sep SEP separator to use in CSV output 63 | --noheader no header for the CSV 64 | --savegaps preserves intervals with no label 65 | 66 | ## programmatic usage with Python and Pandas: 67 | ``` 68 | >>> import pandas as pd 69 | >>> import textgrid 70 | >>> tgrid = textgrid.read_textgrid("example.TextGrid") 71 | >>> tgrid 72 | [Entry(start=0.0, stop=5.537098932314087, name='z', tier='Mary')] 73 | >>> pd.DataFrame(tgrid) 74 | start stop name tier 75 | 0 0 5.537099 z Mary 76 | >>> 77 | ``` 78 | -------------------------------------------------------------------------------- /example/example2.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 4387.9766666666665 6 | tiers? 7 | size = 3 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "Mary" 12 | xmin = 0 13 | xmax = 4387.9766666666665 14 | intervals: size = 4 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 5.537098932314087 18 | text = "" 19 | intervals [2]: 20 | xmin = 5.537098932314087 21 | xmax = 18.917761588532382 22 | text = "bar" 23 | intervals [3]: 24 | xmin = 18.917761588532382 25 | xmax = 23.177071623244515 26 | text = "" 27 | intervals [4]: 28 | xmin = 23.177071623244515 29 | xmax = 4387.9766666666665 30 | text = "" 31 | item [2]: 32 | class = "IntervalTier" 33 | name = "John" 34 | xmin = 0 35 | xmax = 4387.9766666666665 36 | intervals: size = 5 37 | intervals [1]: 38 | xmin = 0 39 | xmax = 1.4385175781571313 40 | text = "" 41 | intervals [2]: 42 | xmin = 1.4385175781571313 43 | xmax = 3.9699942969011333 44 | text = "pip" 45 | intervals [3]: 46 | xmin = 3.9699942969011333 47 | xmax = 5.537098932314087 48 | text = "" 49 | intervals [4]: 50 | xmin = 5.537098932314087 51 | xmax = 8.711490373278787 52 | text = "foo" 53 | intervals [5]: 54 | xmin = 8.711490373278787 55 | xmax = 4387.9766666666665 56 | text = "" 57 | item [3]: 58 | class = "TextTier" 59 | name = "bell" 60 | xmin = 0 61 | xmax = 4387.9766666666665 62 | points: size = 2 63 | points [1]: 64 | number = 15.140637595485778 65 | mark = "a" 66 | points [2]: 67 | number = 21.248327456582416 68 | mark = "cool" 69 | -------------------------------------------------------------------------------- /textgrid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from collections import namedtuple 4 | 5 | Entry = namedtuple("Entry", ["start", 6 | "stop", 7 | "name", 8 | "tier"]) 9 | 10 | def read_textgrid(filename, fileEncoding="utf-8"): 11 | """ 12 | Reads a TextGrid file into a dictionary object 13 | each dictionary has the following keys: 14 | "start" 15 | "stop" 16 | "name" 17 | "tier" 18 | 19 | Points and intervals use the same format, 20 | but the value for "start" and "stop" are the same 21 | 22 | Optionally, supply fileEncoding as argument. This defaults to "utf-8", tested with 'utf-16-be'. 23 | """ 24 | if isinstance(filename, str): 25 | with open(filename, "r", encoding=fileEncoding) as f: 26 | content = _read(f) 27 | elif hasattr(filename, "readlines"): 28 | content = _read(filename) 29 | else: 30 | raise TypeError("filename must be a string or a readable buffer") 31 | 32 | interval_lines = [i for i, line in enumerate(content) 33 | if line.startswith("intervals [") 34 | or line.startswith("points [")] 35 | # tier_lines, tiers = [(i, line.split('"')[-2]) 36 | # for i, line in enumerate(content) 37 | # if line.startswith("name =")] 38 | tier_lines = [] 39 | tiers = [] 40 | for i, line in enumerate(content): 41 | if line.startswith("name ="): 42 | tier_lines.append(i) 43 | tiers.append(line.split('"')[-2]) 44 | 45 | interval_tiers = _find_tiers(interval_lines, tier_lines, tiers) 46 | assert len(interval_lines) == len(interval_tiers) 47 | return [_build_entry(i, content, t) for i, t in zip(interval_lines, interval_tiers)] 48 | 49 | 50 | def _find_tiers(interval_lines, tier_lines, tiers): 51 | tier_pairs = zip(tier_lines, tiers) 52 | cur_tline, cur_tier = next(tier_pairs) 53 | next_tline, next_tier = next(tier_pairs, (None, None)) 54 | tiers = [] 55 | for il in interval_lines: 56 | if next_tline is not None and il > next_tline: 57 | cur_tline, cur_tier = next_tline, next_tier 58 | next_tline, next_tier = next(tier_pairs, (None, None)) 59 | tiers.append(cur_tier) 60 | return tiers 61 | 62 | 63 | def _read(f): 64 | return [x.strip() for x in f.readlines()] 65 | 66 | def write_csv(textgrid_list, filename=None, sep=",", header=True, save_gaps=False, meta=True): 67 | """ 68 | Writes a list of textgrid dictionaries to a csv file. 69 | If no filename is specified, csv is printed to standard out. 70 | """ 71 | columns = list(Entry._fields) 72 | if filename: 73 | f = open(filename, "w") 74 | if header: 75 | hline = sep.join(columns) 76 | if filename: 77 | f.write(hline + "\n") 78 | else: 79 | print(hline) 80 | for entry in textgrid_list: 81 | if entry.name or save_gaps: # skip unlabeled intervals 82 | row = sep.join(str(x) for x in list(entry)) 83 | if filename: 84 | f.write(row + "\n") 85 | else: 86 | print(row) 87 | if filename: 88 | f.flush() 89 | f.close() 90 | if meta: 91 | with open(filename + ".meta", "w") as metaf: 92 | metaf.write("""---\nunits: s\ndatatype: 1002\n""") 93 | 94 | def _build_entry(i, content, tier): 95 | """ 96 | takes the ith line that begin an interval and returns 97 | a dictionary of values 98 | """ 99 | start = _get_float_val(content[i + 1]) # addition is cheap typechecking 100 | if content[i].startswith("intervals ["): 101 | offset = 1 102 | else: 103 | offset = 0 # for "point" objects 104 | stop = _get_float_val(content[i + 1 + offset]) 105 | label = _get_str_val(content[i + 2 + offset]) 106 | return Entry(start=start, stop=stop, name=label, tier=tier) 107 | 108 | 109 | def _get_float_val(string): 110 | """ 111 | returns the last word in a string as a float 112 | """ 113 | return float(string.split()[-1]) 114 | 115 | 116 | def _get_str_val(string): 117 | """ 118 | returns the last item in quotes from a string 119 | """ 120 | return string.split('"')[-2] 121 | 122 | 123 | def textgrid2csv(): 124 | import argparse 125 | parser = argparse.ArgumentParser(description="convert a TextGrid file to a CSV.") 126 | parser.add_argument("TextGrid", 127 | help="a TextGrid file to process") 128 | parser.add_argument("-o", "--output", help="(optional) outputfile") 129 | parser.add_argument("--sep", help="separator to use in CSV output", 130 | default=",") 131 | parser.add_argument("--noheader", help="no header for the CSV", 132 | action="store_false") 133 | parser.add_argument("--savegaps", help="preserves intervals with no label", 134 | action="store_true") 135 | args = parser.parse_args() 136 | tgrid = read_textgrid(args.TextGrid) 137 | write_csv(tgrid, args.output, args.sep, args.noheader, args.savegaps) 138 | 139 | 140 | if __name__ == "__main__": 141 | textgrid2csv() 142 | 143 | -------------------------------------------------------------------------------- /test/test_textgrid.py: -------------------------------------------------------------------------------- 1 | # py.test unit tests 2 | 3 | from io import StringIO 4 | import textgrid 5 | 6 | example_file1 = StringIO("""File type = "ooTextFile" 7 | Object class = "TextGrid" 8 | 9 | xmin = 0 10 | xmax = 4387.9766666666665 11 | tiers? 12 | size = 3 13 | item []: 14 | item [1]: 15 | class = "IntervalTier" 16 | name = "Mary" 17 | xmin = 0 18 | xmax = 4387.9766666666665 19 | intervals: size = 4 20 | intervals [1]: 21 | xmin = 0 22 | xmax = 5.537098932314087 23 | text = "" 24 | intervals [2]: 25 | xmin = 5.537098932314087 26 | xmax = 18.917761588532382 27 | text = "bar" 28 | intervals [3]: 29 | xmin = 18.917761588532382 30 | xmax = 23.177071623244515 31 | text = "" 32 | intervals [4]: 33 | xmin = 23.177071623244515 34 | xmax = 4387.9766666666665 35 | text = "" 36 | item [2]: 37 | class = "IntervalTier" 38 | name = "John" 39 | xmin = 0 40 | xmax = 4387.9766666666665 41 | intervals: size = 5 42 | intervals [1]: 43 | xmin = 0 44 | xmax = 1.4385175781571313 45 | text = "" 46 | intervals [2]: 47 | xmin = 1.4385175781571313 48 | xmax = 3.9699942969011333 49 | text = "pip" 50 | intervals [3]: 51 | xmin = 3.9699942969011333 52 | xmax = 5.537098932314087 53 | text = "" 54 | intervals [4]: 55 | xmin = 5.537098932314087 56 | xmax = 8.711490373278787 57 | text = "foo" 58 | intervals [5]: 59 | xmin = 8.711490373278787 60 | xmax = 4387.9766666666665 61 | text = "" 62 | item [3]: 63 | class = "TextTier" 64 | name = "bell" 65 | xmin = 0 66 | xmax = 4387.9766666666665 67 | points: size = 2 68 | points [1]: 69 | number = 15.140637595485778 70 | mark = "a" 71 | points [2]: 72 | number = 21.248327456582416 73 | mark = "cool" 74 | """) 75 | 76 | example_file2 = StringIO("""File type = "ooTextFile" 77 | Object class = "TextGrid" 78 | 79 | xmin = 0 80 | xmax = 4387.9766666666665 81 | tiers? 82 | size = 1 83 | item []: 84 | item [1]: 85 | class = "IntervalTier" 86 | name = "Mary" 87 | xmin = 0 88 | xmax = 4387.9766666666665 89 | intervals: size = 1 90 | intervals [1]: 91 | xmin = 0 92 | xmax = 5.537098932314087 93 | text = "z" 94 | """) 95 | example_file3 = StringIO(""" 96 | item [3]: 97 | class = "TextTier" 98 | name = "bell" 99 | xmin = 0 100 | xmax = 4387.9766666666665 101 | points: size = 2 102 | points [1]: 103 | number = 15.140637595485778 104 | mark = "a" 105 | points [2]: 106 | number = 21.248327456582416 107 | mark = "cool" 108 | """) 109 | def test_get_float_val(): 110 | assert textgrid._get_float_val("foo 123") == 123 111 | assert textgrid._get_float_val("number = 21.248327456582416") == 21.248327456582416 112 | assert textgrid._get_float_val("xmax = 4387.9766666666665") == 4387.9766666666665 113 | assert textgrid._get_float_val("xmin = 5.537098932314087 \n") == 5.537098932314087 114 | 115 | 116 | def test_get_str_val(): 117 | assert textgrid._get_str_val('mark = "cool" ') == "cool" 118 | assert textgrid._get_str_val('text = "pip"') == "pip" 119 | 120 | 121 | def test_build_entry_point(): 122 | content = ["points [1]:", 123 | "number = 15.140637595485778 ", 124 | 'mark = "a"'] 125 | entry = textgrid._build_entry(0, content, tier='testtier') 126 | assert entry.start == 15.140637595485778 127 | assert entry.stop == 15.140637595485778 128 | assert entry.name == "a" 129 | assert entry.tier == "testtier" 130 | 131 | def test_build_entry_interval(): 132 | content = ["intervals [4]:", 133 | "xmin = 5.537098932314087", 134 | "xmax = 8.711490373278787", 135 | 'text = "foo"'] 136 | entry = textgrid._build_entry(0, content, tier='testtier') 137 | assert entry.start == 5.537098932314087 138 | assert entry.stop == 8.711490373278787 139 | assert entry.name == "foo" 140 | 141 | 142 | def test_points(): 143 | tgrid = textgrid.read_textgrid(example_file3) 144 | assert len(tgrid) == 2 145 | 146 | def test_read_short(): 147 | tgrid = textgrid.read_textgrid(example_file2) 148 | assert len(tgrid) == 1 149 | entry = tgrid[0] 150 | assert entry.start == 0 151 | assert entry.stop == 5.537098932314087 152 | assert entry.name == "z" 153 | assert entry.tier == "Mary" 154 | 155 | def test_read_long(): 156 | tgrid = textgrid.read_textgrid(example_file1) 157 | assert len(tgrid) == 11 158 | entry = tgrid[0] 159 | assert entry.start == 0 160 | assert entry.stop == 5.537098932314087 161 | assert entry.name == "" 162 | entry2 = tgrid[-1] 163 | assert entry2.name == "cool" 164 | assert entry2.tier == "bell" 165 | --------------------------------------------------------------------------------