├── .github
    ├── .gitignore
    ├── CODE_OF_CONDUCT.md
    └── workflows
    │   ├── pkgdown.yaml
    │   └── R-CMD-check.yaml
├── src
    ├── .gitignore
    ├── cpp11.cpp
    └── code.cpp
├── vignettes
    └── articles
    │   ├── .gitignore
    │   └── textgrid-specification.Rmd
├── .gitignore
├── man
    ├── figures
    │   ├── logo.png
    │   └── demo-textgrid.png
    ├── readtextgrid-package.Rd
    ├── example_textgrid.Rd
    ├── read_textgrid.Rd
    └── pivot_textgrid_tiers.Rd
├── inst
    ├── utf_16_be.TextGrid
    ├── Mary_John_bell.TextGrid
    ├── draw-tg-parts.praat
    ├── speaker-data
    │   ├── speaker001
    │   │   ├── s2T04.TextGrid
    │   │   ├── s2T01.TextGrid
    │   │   ├── s2T02.TextGrid
    │   │   ├── s2T03.TextGrid
    │   │   └── s2T05.TextGrid
    │   └── speaker002
    │   │   ├── s2T04.TextGrid
    │   │   ├── s2T01.TextGrid
    │   │   ├── s2T02.TextGrid
    │   │   ├── s2T03.TextGrid
    │   │   └── s2T05.TextGrid
    ├── make-logo.R
    ├── nested-intervals.TextGrid
    └── draw-tg-parts.Collection
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-data
    │       ├── short.TextGrid
    │       ├── praat-test
    │       │   ├── okay-digit-dot-space.TextGrid
    │       │   ├── fail-space-dot-digit.TextGrid
    │       │   ├── okay-plus-digit-or-minus-digit.TextGrid
    │       │   ├── fail-space-plus-dot-digit.TextGrid
    │       │   ├── okay-percents-fractions.TextGrid
    │       │   ├── okay-scientific-notation.TextGrid
    │       │   ├── fail-letters-digits.TextGrid
    │       │   ├── okay-real-with-trailing-characters.TextGrid
    │       │   ├── okay-hex-numbers.TextGrid
    │       │   ├── okay-percents.TextGrid
    │       │   └── okay-fractions.TextGrid
    │       ├── comment.TextGrid
    │       ├── points.TextGrid
    │       ├── Mary_John_bell.TextGrid
    │       ├── elan.TextGrid
    │       ├── quoted.TextGrid
    │       ├── hard-to-parse-normalized.TextGrid
    │       ├── hard-to-parse.TextGrid
    │       └── nested-intervals.TextGrid
    │   └── test-read-textgrid.R
├── _pkgdown.yml
├── cran-comments.md
├── R
    ├── readtextgrid-package.R
    ├── cpp11.R
    ├── pivot.R
    ├── pure-r-parser.R
    ├── legacy.R
    └── readtextgrid.R
├── NAMESPACE
├── .Rbuildignore
├── readtextgrid.Rproj
├── DESCRIPTION
├── NEWS.md
├── README.Rmd
├── README.md
└── LICENSE.md


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/vignettes/articles/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | docs
6 | Praat.exe
7 | 


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tjmahr/readtextgrid/HEAD/man/figures/logo.png


--------------------------------------------------------------------------------
/inst/utf_16_be.TextGrid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tjmahr/readtextgrid/HEAD/inst/utf_16_be.TextGrid


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(readtextgrid)
3 | 
4 | test_check("readtextgrid")
5 | 


--------------------------------------------------------------------------------
/man/figures/demo-textgrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tjmahr/readtextgrid/HEAD/man/figures/demo-textgrid.png


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://www.tjmahr.com/readtextgrid/
2 | template:
3 |   bootstrap: 5
4 |   theme: arrow-light
5 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | Tested on local Windows, five default GitHub actions environments, and 
2 | Devel on WinBuilder
3 | 


--------------------------------------------------------------------------------
/R/readtextgrid-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 | 
4 | ## usethis namespace: start
5 | #' @useDynLib readtextgrid, .registration = TRUE
6 | ## usethis namespace: end
7 | NULL
8 | 


--------------------------------------------------------------------------------
/R/cpp11.R:
--------------------------------------------------------------------------------
 1 | # Generated by cpp11: do not edit by hand
 2 | 
 3 | cpp_tg_scan_tokens <- function(src) {
 4 |   .Call(`_readtextgrid_cpp_tg_scan_tokens`, src)
 5 | }
 6 | 
 7 | cpp_parse_praat_numbers <- function(x) {
 8 |   .Call(`_readtextgrid_cpp_parse_praat_numbers`, x)
 9 | }
10 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(example_textgrid)
 4 | export(legacy_read_textgrid)
 5 | export(legacy_read_textgrid_lines)
 6 | export(pivot_textgrid_tiers)
 7 | export(read_textgrid)
 8 | export(read_textgrid_lines)
 9 | useDynLib(readtextgrid, .registration = TRUE)
10 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/short.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 0
 4 | 2.3
 5 | <exists>
 6 | 3
 7 | "IntervalTier"
 8 | "Mary"
 9 | 0
10 | 2.3
11 | 1
12 | 0
13 | 2.3
14 | ""
15 | "IntervalTier"
16 | "John"
17 | 0
18 | 2.3
19 | 1
20 | 0
21 | 2.3
22 | ""
23 | "TextTier"
24 | "bell"
25 | 0
26 | 2.3
27 | 0
28 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^LICENSE\.md$
 4 | ^README\.Rmd$
 5 | ^\.travis\.yml$
 6 | ^Praat.exe$
 7 | ^inst/draw-tg-parts.Collection$
 8 | ^inst/draw-tg-parts.praat$
 9 | ^inst/make-logo.R$
10 | ^inst/tg-parts.png$
11 | ^\.github$
12 | ^cran-comments\.md$
13 | ^CRAN-RELEASE$
14 | ^CODE_OF_CONDUCT\.md$
15 | ^CRAN-SUBMISSION$
16 | ^_pkgdown\.yml$
17 | ^docs$
18 | ^pkgdown$
19 | ^vignettes/articles$
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/okay-digit-dot-space.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 2 
 6 | tiers? <exists> 
 7 | size = 1 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "Mary" 
12 |         xmin = 0 
13 |         xmax = 2
14 |         intervals: size = 1 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 1.
18 |             text = "" 
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/fail-space-dot-digit.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = .0 
 5 | xmax = 1 
 6 | tiers? <exists> 
 7 | size = 1 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "Mary" 
12 |         xmin = 0 
13 |         xmax = 1. 
14 |         intervals: size = 1 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 1 
18 |             text = "" 
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/okay-plus-digit-or-minus-digit.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0
 5 | xmax = 2
 6 | tiers? <exists>
 7 | size = +1
 8 | item []:
 9 |     item [1]:
10 |         class = "IntervalTier"
11 |         name = "Mary"
12 |         xmin = -0.3
13 |         xmax = +2
14 |         intervals: size = 1
15 |         intervals [1]:
16 |             xmin = 0
17 |             xmax = +1.0
18 |             text = ""
19 | 


--------------------------------------------------------------------------------
/readtextgrid.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: No
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: XeLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/comment.TextGrid:
--------------------------------------------------------------------------------
 1 | "ooTextFile"
 2 | "TextGrid"
 3 | 0  2.3  ! time domain of TextGrid
 4 | <exists>
 5 | 3 tiers
 6 | "IntervalTier"  "Mary"  ! type and name of tier 1
 7 | 0  2.3  ! time domain of tier 1
 8 | 1 interval coming
 9 | 0  2.3  ""  ! interval 1 on tier 1
10 | "IntervalTier"  "John"  ! type and name of tier 2
11 | 0  2.3  ! time domain of tier 2
12 | 1 interval coming
13 | 0  2.3  ""  ! interval 1 on tier 2
14 | "TextTier"  "bell"  ! type and name of tier 3
15 | 0  2.3  ! time domain of tier 3
16 | 0 points coming
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/fail-space-plus-dot-digit.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = +.0 
 5 | xmax = 1 
 6 | tiers? <exists> 
 7 | size = 1 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "Mary" 
12 |         xmin = 0 
13 |         xmax = 1.0 
14 |         intervals: size = 2 
15 |         intervals [1]:
16 |             xmin = 0.0  
17 |             xmax = 0.5 
18 |             text = "zz" 
19 |         intervals [2]:
20 |             xmin = 0.5  
21 |             xmax = 1 
22 |             text = "zz" 
23 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/points.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1 
 6 | tiers? <exists> 
 7 | size = 1 
 8 | item []: 
 9 |     item [1]:
10 |         class = "TextTier" 
11 |         name = "test" 
12 |         xmin = 0 
13 |         xmax = 1 
14 |         points: size = 3 
15 |         points [1]:
16 |             number = 0.10000000000000007 
17 |             mark = "point 1" 
18 |         points [2]:
19 |             number = 0.5 
20 |             mark = "point 2" 
21 |         points [3]:
22 |             number = 0.8000000000000003 
23 |             mark = "point 3" 
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/okay-percents-fractions.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 00000.0 
 5 | xmax = 3 
 6 | tiers? <exists> 
 7 | size = 1 
 8 | item []: 
 9 |     item [1]:
10 |         class = "TextTier" 
11 |         name = "points" 
12 |         xmin = 0 
13 |         xmax = 1 
14 |         points: size = 3 
15 |         points [1]:
16 |             number = 2/200% 
17 |             mark = "2/200% -> 1.0" 
18 |         points [2]:
19 |             number = 300%/2 
20 |             mark = "300%/2 -> 1.5" 
21 |         points [3]:
22 |             number = 400%/200% 
23 |             mark = "400%/200% -> 2.0" 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/okay-scientific-notation.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0
 5 | xmax = 2e1
 6 | tiers? <exists>
 7 | size = 1
 8 | item []:
 9 |     item [1]:
10 |         class = "IntervalTier"
11 |         name = "Mary"
12 |         xmin = 0
13 |         xmax = 2E1
14 |         intervals: size = 3
15 |         intervals [1]:
16 |             xmin = 0
17 |             xmax = 5e-1
18 |             text = ""
19 |         intervals [2]:
20 |             xmin = 0.5
21 |             xmax = 1e+1
22 |             text = ""
23 |         intervals [3]:
24 |             xmin = 1.0e1
25 |             xmax = 20
26 |             text = ""
27 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/fail-letters-digits.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = nope0
 5 | xmax = 2e1 
 6 | tiers? <exists> 
 7 | size = 1 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "Mary" 
12 |         xmin = 0e 
13 |         xmax = 2E
14 |         intervals: size = 3E
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 5e-1
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.5 
21 |             xmax = 1e+1
22 |             text = "" 
23 |         intervals [3]:
24 |             xmin = 1.0e1
25 |             xmax = 20
26 |             text = "" 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/okay-real-with-trailing-characters.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0ignored
 5 | xmax = 2e1ignored
 6 | tiers? <exists>
 7 | size = 1ignored
 8 | item []:
 9 |     item [1]:
10 |         class = "IntervalTier"
11 |         name = "Mary"
12 |         xmin = 0e
13 |         xmax = 2E1ignored
14 |         intervals: size = 3E
15 |         intervals [1]:
16 |             xmin = 0
17 |             xmax = 5e-1ignored
18 |             text = ""
19 |         intervals [2]:
20 |             xmin = 0.5ignored
21 |             xmax = 1e+1ignored
22 |             text = ""
23 |         intervals [3]:
24 |             xmin = 1.0e1ignored
25 |             xmax = 20
26 |             text = ""
27 | 


--------------------------------------------------------------------------------
/inst/Mary_John_bell.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1 
 6 | tiers? <exists> 
 7 | size = 3 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "Mary" 
12 |         xmin = 0 
13 |         xmax = 1 
14 |         intervals: size = 1 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 1 
18 |             text = "" 
19 |     item [2]:
20 |         class = "IntervalTier" 
21 |         name = "John" 
22 |         xmin = 0 
23 |         xmax = 1 
24 |         intervals: size = 1 
25 |         intervals [1]:
26 |             xmin = 0 
27 |             xmax = 1 
28 |             text = "" 
29 |     item [3]:
30 |         class = "TextTier" 
31 |         name = "bell" 
32 |         xmin = 0 
33 |         xmax = 1 
34 |         points: size = 0 
35 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/Mary_John_bell.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1 
 6 | tiers? <exists> 
 7 | size = 3 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "Mary" 
12 |         xmin = 0 
13 |         xmax = 1 
14 |         intervals: size = 1 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 1 
18 |             text = "" 
19 |     item [2]:
20 |         class = "IntervalTier" 
21 |         name = "John" 
22 |         xmin = 0 
23 |         xmax = 1 
24 |         intervals: size = 1 
25 |         intervals [1]:
26 |             xmin = 0 
27 |             xmax = 1 
28 |             text = "" 
29 |     item [3]:
30 |         class = "TextTier" 
31 |         name = "bell" 
32 |         xmin = 0 
33 |         xmax = 1 
34 |         points: size = 0 
35 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/okay-hex-numbers.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 00000.0 
 5 | xmax = 0x3 
 6 | tiers? <exists> 
 7 | size = 1 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "Mary" 
12 |         xmin = 0 
13 |         xmax = +0x3. 
14 |         intervals: size = 4
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0x0.8 
18 |             text = "0 to 0.5" 
19 |         intervals [2]:
20 |             xmin = 0x1P-1 
21 |             xmax = 0x1.8p+0  
22 |             text = "0.5 to 1.5" 
23 |         intervals [3]:
24 |             xmin = +0x1.8P0 
25 |             xmax = 0x1.4p+1  
26 |             text = "1.5 to 2.5" 
27 |         intervals [4]:
28 |             xmin = 0x1.4P1 
29 |             xmax = 0x3  
30 |             text = "2.5 to 3" 
31 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/elan.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0.0
 5 | xmax = 1.41
 6 | tiers? <exists>
 7 | size = 1
 8 | item []:
 9 |     item[1]:
10 |         class = "IntervalTier"
11 |         name = "default"
12 |         xmin = 0.0
13 |         xmax = 1.41
14 |         intervals: size = 5
15 |         intervals [1]
16 |             xmin = 0.0
17 |             xmax = 0.25
18 |             text = ""
19 |         intervals [2]
20 |             xmin = 0.25
21 |             xmax = 0.54
22 |             text = "bird"
23 |         intervals [3]
24 |             xmin = 0.54
25 |             xmax = 0.56
26 |             text = ""
27 |         intervals [4]
28 |             xmin = 0.56
29 |             xmax = 0.8
30 |             text = "house"
31 |         intervals [5]
32 |             xmin = 0.8
33 |             xmax = 1.41
34 |             text = ""
35 | 


--------------------------------------------------------------------------------
/inst/draw-tg-parts.praat:
--------------------------------------------------------------------------------
 1 | Read from file: "draw-tg-parts.Collection"
 2 | 
 3 | Erase all
 4 | Black
 5 | selectObject: "TextGrid left-marginal-text"
 6 | Select outer viewport: 1, 2, 0, 4.5
 7 | Draw: 0, 0, "no", "yes", "no"
 8 | White
 9 | selectObject: "TextGrid left-marginal-text-mask"
10 | Draw: 0, 0, "no", "yes", "no"
11 | Black
12 | 
13 | Select outer viewport: 5.25, 7, 0, 4.5
14 | selectObject: "TextGrid right-marginal-text"
15 | Draw: 0, 0, "no", "yes", "no"
16 | White
17 | selectObject: "TextGrid right-marginal-text-mask"
18 | Draw: 0, 0, "no", "yes", "no"
19 | Black
20 | 
21 | Select outer viewport: 1, 6.5, 0, 4.5
22 | selectObject: "TextGrid Mary_John_bell"
23 | Draw: 0, 0, "no", "yes", "no"
24 | Select outer viewport: 1, 6.5, 3.5, 5.5
25 | selectObject: "TextGrid blue-text"
26 | Blue
27 | Draw: 0, 0, "no", "yes", "no"
28 | selectObject: "TextGrid blue-text-mask"
29 | White
30 | Draw: 0, 0, "no", "yes", "no"
31 | selectObject: "TextGrid forehead-and-chin"
32 | Black
33 | Draw: 0, 0, "no", "yes", "no"
34 | 
35 | Select outer viewport: 1, 6.5, 0, 2
36 | selectObject: "TextGrid forehead-and-chin"
37 | Black
38 | Draw: 0, 0, "no", "yes", "no"
39 | 
40 | Select outer viewport: 1, 7, 0, 5.5
41 | Save as 300-dpi PNG file: "tg-parts.png"
42 | 
43 | 


--------------------------------------------------------------------------------
/src/cpp11.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by cpp11: do not edit by hand
 2 | // clang-format off
 3 | 
 4 | 
 5 | #include "cpp11/declarations.hpp"
 6 | #include <R_ext/Visibility.h>
 7 | 
 8 | // code.cpp
 9 | list cpp_tg_scan_tokens(std::string src);
10 | extern "C" SEXP _readtextgrid_cpp_tg_scan_tokens(SEXP src) {
11 |   BEGIN_CPP11
12 |     return cpp11::as_sexp(cpp_tg_scan_tokens(cpp11::as_cpp<cpp11::decay_t<std::string>>(src)));
13 |   END_CPP11
14 | }
15 | // code.cpp
16 | list cpp_parse_praat_numbers(strings x);
17 | extern "C" SEXP _readtextgrid_cpp_parse_praat_numbers(SEXP x) {
18 |   BEGIN_CPP11
19 |     return cpp11::as_sexp(cpp_parse_praat_numbers(cpp11::as_cpp<cpp11::decay_t<strings>>(x)));
20 |   END_CPP11
21 | }
22 | 
23 | extern "C" {
24 | static const R_CallMethodDef CallEntries[] = {
25 |     {"_readtextgrid_cpp_parse_praat_numbers", (DL_FUNC) &_readtextgrid_cpp_parse_praat_numbers, 1},
26 |     {"_readtextgrid_cpp_tg_scan_tokens",      (DL_FUNC) &_readtextgrid_cpp_tg_scan_tokens,      1},
27 |     {NULL, NULL, 0}
28 | };
29 | }
30 | 
31 | extern "C" attribute_visible void R_init_readtextgrid(DllInfo* dll){
32 |   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
33 |   R_useDynamicSymbols(dll, FALSE);
34 |   R_forceSymbols(dll, TRUE);
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/okay-percents.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 00000.0 
 5 | xmax = 3 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "intervals" 
12 |         xmin = 0 
13 |         xmax = 3 
14 |         intervals: size = 2
15 |         intervals [1]:
16 |             xmin = 0% 
17 |             xmax = 100% 
18 |             text = "0% to 100%  (0 to 1)" 
19 |         intervals [2]:
20 |             xmin = 1.5e2% 
21 |             xmax = +300%  
22 |             text = "1.5e2% to +300%  (1.5 to 3.0)" 
23 |     item [2]:
24 |         class = "TextTier" 
25 |         name = "points" 
26 |         xmin = 0 
27 |         xmax = 3
28 |         points: size = 5
29 |         points [1]:
30 |             number = 10% 
31 |             mark = "10% -> 0.1" 
32 |         points [2]:
33 |             number = 5000e-2% 
34 |             mark = "500e-2% -> 0.5" 
35 |         points [3]:
36 |             number = 80%ms 
37 |             mark = "80%ms -> 0.8" 
38 |         points [4]:
39 |             number = 0.90ms% 
40 |             mark = "0.90ms% -> 0.9" 
41 |         points [5]:
42 |             number = 2E
43 |             mark = "1e% -> 0.9" 
44 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/quoted.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0
 5 | xmax = 2.3
 6 | tiers? <exists>
 7 | size = 3
 8 | item []:
 9 |     item [1]:
10 |         class = "IntervalTier"
11 |         name = "sentence"
12 |         xmin = 0
13 |         xmax = 2.3
14 |         intervals: size = 1
15 |         intervals [1]:
16 |             xmin = 0
17 |             xmax = 2.3
18 |             text = "říkej ""ahoj"" dvakrát"
19 |     item [2]:
20 |         class = "IntervalTier"
21 |         name = "phonemes"
22 |         xmin = 0
23 |         xmax = 2.3
24 |         intervals: size = 3
25 |         intervals [1]:
26 |             xmin = 0
27 |             xmax = 0.7
28 |             text = "r̝iːkɛj"
29 |         intervals [2]:
30 |             xmin = 0.7
31 |             xmax = 1.6
32 |             text = "ʔaɦɔj"
33 |         intervals [3]:
34 |             xmin = 1.6
35 |             xmax = 2.3
36 |             text = "dʋakraːt"
37 |     item [3]:
38 |         class = "TextTier"
39 |         name = "bell"
40 |         xmin = 0
41 |         xmax = 2.3
42 |         points: size = 2
43 |         points [1]:
44 |             number = 0.9
45 |             mark = "ding"
46 |         points [2]:
47 |             number = 1.3
48 |             mark = "dong"
49 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: readtextgrid
 2 | Type: Package
 3 | Title: Read in a 'Praat' 'TextGrid' File
 4 | Version: 0.2.0
 5 | Authors@R: c(
 6 |     person("Tristan", "Mahr", role = c("aut", "cre"), 
 7 |            email = "tristan.mahr@wisc.edu", 
 8 |            comment = c(ORCID = "0000-0002-8890-5116")),
 9 |     person("Dan", "Villarreal", role = "ctb"),
10 |     person("Jonathan", "Washington", role = "ctb"),
11 |     person("Josef", "Fruehwald", role = "aut"))
12 | Description: 'Praat' <https://www.fon.hum.uva.nl/praat/> is a widely 
13 |     used tool for manipulating, annotating and analyzing speech and 
14 |     acoustic data. It stores annotation data in a format called a 
15 |     'TextGrid'. This package provides a way to read these 
16 |     files into R.
17 | License: GPL-3
18 | Encoding: UTF-8
19 | Depends: R (>= 4.3.0)
20 | Suggests: 
21 |     testthat (>= 2.1.0)
22 | RoxygenNote: 7.3.3
23 | Imports: 
24 |     utils,
25 |     stats,
26 |     tibble,
27 |     purrr,
28 |     readr,
29 |     stringr,
30 |     dplyr,
31 |     rlang,
32 |     withr
33 | URL: https://github.com/tjmahr/readtextgrid, https://www.tjmahr.com/readtextgrid/
34 | BugReports: https://github.com/tjmahr/readtextgrid/issues
35 | Roxygen: list(markdown = TRUE)
36 | LinkingTo: 
37 |     cpp11
38 | Config/Needs/website: rmarkdown
39 | 


--------------------------------------------------------------------------------
/man/readtextgrid-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/readtextgrid-package.R
 3 | \docType{package}
 4 | \name{readtextgrid-package}
 5 | \alias{readtextgrid}
 6 | \alias{readtextgrid-package}
 7 | \title{readtextgrid: Read in a 'Praat' 'TextGrid' File}
 8 | \description{
 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
10 | 
11 | 'Praat' \url{https://www.fon.hum.uva.nl/praat/} is a widely used tool for manipulating, annotating and analyzing speech and acoustic data. It stores annotation data in a format called a 'TextGrid'. This package provides a way to read these files into R.
12 | }
13 | \seealso{
14 | Useful links:
15 | \itemize{
16 |   \item \url{https://github.com/tjmahr/readtextgrid}
17 |   \item \url{https://www.tjmahr.com/readtextgrid/}
18 |   \item Report bugs at \url{https://github.com/tjmahr/readtextgrid/issues}
19 | }
20 | 
21 | }
22 | \author{
23 | \strong{Maintainer}: Tristan Mahr \email{tristan.mahr@wisc.edu} (\href{https://orcid.org/0000-0002-8890-5116}{ORCID})
24 | 
25 | Authors:
26 | \itemize{
27 |   \item Josef Fruehwald
28 | }
29 | 
30 | Other contributors:
31 | \itemize{
32 |   \item Dan Villarreal [contributor]
33 |   \item Jonathan Washington [contributor]
34 | }
35 | 
36 | }
37 | \keyword{internal}
38 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/praat-test/okay-fractions.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 00000.0 
 5 | xmax = 3 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "intervals" 
12 |         xmin = 0 
13 |         xmax = 3 
14 |         intervals: size = 4
15 |         intervals [1]:
16 |             xmin = 0/1 
17 |             xmax = 1/2 
18 |             text = "0/1 to 1/2  (0 to 0.5)" 
19 |         intervals [2]:
20 |             xmin = 1/2 
21 |             xmax = 6/4  
22 |             text = "1/2 to 6/4  (0.5 to 1.5)" 
23 |         intervals [3]:
24 |             xmin = +3/2 
25 |             xmax = 5.0/+2.0  
26 |             text = "+3/2 to 5.0/+2.0  (1.5 to 2.5)" 
27 |         intervals [4]:
28 |             xmin = -5/-2 
29 |             xmax = -3e0/-1e0  
30 |             text = "-5/-2 to -3e0/-1e0  (2.5 to 3)" 
31 |     item [2]:
32 |         class = "TextTier" 
33 |         name = "points" 
34 |         xmin = 0 
35 |         xmax = 1 
36 |         points: size = 3 
37 |         points [1]:
38 |             number = 1/10 
39 |             mark = "1/10 -> 0.1" 
40 |         points [2]:
41 |             number = -1/-2 
42 |             mark = "-1/-2 -> 0.5" 
43 |         points [3]:
44 |             number = 16e-1/2 
45 |             mark = "16e-1/2 -> 0.8" 
46 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Code of Conduct
 2 | 
 3 | As contributors and maintainers of this project, we pledge to respect all people who 
 4 | contribute through reporting issues, posting feature requests, updating documentation,
 5 | submitting pull requests or patches, and other activities.
 6 | 
 7 | We are committed to making participation in this project a harassment-free experience for
 8 | everyone, regardless of level of experience, gender, gender identity and expression,
 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
10 | 
11 | Examples of unacceptable behavior by participants include the use of sexual language or
12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment,
13 | insults, or other unprofessional conduct.
14 | 
15 | Project maintainers have the right and responsibility to remove, edit, or reject comments,
16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 
17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 
18 | from the project team.
19 | 
20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 
21 | opening an issue or contacting one or more of the project maintainers.
22 | 
23 | This Code of Conduct is adapted from the Contributor Covenant 
24 | (https://www.contributor-covenant.org), version 1.0.0, available at 
25 | https://contributor-covenant.org/version/1/0/0/.
26 | 


--------------------------------------------------------------------------------
/man/example_textgrid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/readtextgrid.R
 3 | \name{example_textgrid}
 4 | \alias{example_textgrid}
 5 | \title{Locate the path of an example textgrid file}
 6 | \usage{
 7 | example_textgrid(which = 1)
 8 | }
 9 | \arguments{
10 | \item{which}{index of the textgrid to load}
11 | }
12 | \value{
13 | Path of \code{"Mary_John_bell.TextGrid"} bundled with the \code{readtextgrid}
14 | package.
15 | }
16 | \description{
17 | Locate the path of an example textgrid file
18 | }
19 | \details{
20 | This function is a wrapper over \code{\link[=system.file]{system.file()}}  to locate the
21 | paths to bundled textgrids. These files are used to test or demonstrate
22 | functionality of the package.
23 | 
24 | Two files are included:
25 | \enumerate{
26 | \item \code{"Mary_John_bell.TextGrid"} - the default TextGrid created by Praat's
27 | Create TextGrid command. This file is saved as UTF-8 encoding.
28 | \item \code{"utf_16_be.TextGrid"} - a TextGrid with some IPA characters entered using
29 | Praat's IPA character selector. This file is saved with UTF-16 encoding.
30 | \item \code{"nested-intervals.TextGrid"} - A textgrid containing an \code{"utterance"}
31 | tier, a \code{"words"} tier, and a \code{"phones"} tier. This file is typical of
32 | forced alignment textgrids where utterances contain words which contain
33 | speech segments. In this case, alignment was made by hand so that word
34 | and phone boundaries do not correspond exactly.
35 | }
36 | }
37 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |   release:
 8 |     types: [published]
 9 |   workflow_dispatch:
10 | 
11 | name: pkgdown.yaml
12 | 
13 | permissions: read-all
14 | 
15 | jobs:
16 |   pkgdown:
17 |     runs-on: ubuntu-latest
18 |     # Only restrict concurrency for non-PR jobs
19 |     concurrency:
20 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
21 |     env:
22 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
23 |     permissions:
24 |       contents: write
25 |     steps:
26 |       - uses: actions/checkout@v4
27 | 
28 |       - uses: r-lib/actions/setup-pandoc@v2
29 | 
30 |       - uses: r-lib/actions/setup-r@v2
31 |         with:
32 |           use-public-rspm: true
33 | 
34 |       - uses: r-lib/actions/setup-r-dependencies@v2
35 |         with:
36 |           extra-packages: any::pkgdown, local::.
37 |           needs: website
38 | 
39 |       - name: Build site
40 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
41 |         shell: Rscript {0}
42 | 
43 |       - name: Deploy to GitHub pages 🚀
44 |         if: github.event_name != 'pull_request'
45 |         uses: JamesIves/github-pages-deploy-action@v4.5.0
46 |         with:
47 |           clean: false
48 |           branch: gh-pages
49 |           folder: docs
50 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/hard-to-parse-normalized.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 2.3 
 6 | tiers? <exists> 
 7 | size = 4 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "! Fake Comment" 
12 |         xmin = 0 
13 |         xmax = 2.3 
14 |         intervals: size = 3 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 1 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 1 
21 |             xmax = 2 
22 |             text = """""" 
23 |         intervals [3]:
24 |             xmin = 2 
25 |             xmax = 2.3 
26 |             text = "Not a ! Comment" 
27 |     item [2]:
28 |         class = "IntervalTier" 
29 |         name = "Embedded ""String"" here" 
30 |         xmin = -0.5 
31 |         xmax = 2.3 
32 |         intervals: size = 1 
33 |         intervals [1]:
34 |             xmin = -0.1 
35 |             xmax = 2.8 
36 |             text = "this string
37 | has line breaks in it
38 | and a ! fake comment
39 | " 
40 |     item [3]:
41 |         class = "TextTier" 
42 |         name = "point tier 1" 
43 |         xmin = 0 
44 |         xmax = 2.3 
45 |         points: size = 2 
46 |         points [1]:
47 |             number = 0.1 
48 |             mark = "point label" 
49 |         points [2]:
50 |             number = 0.8000000000000003 
51 |             mark = "deliberate    extra spaces" 
52 |     item [4]:
53 |         class = "TextTier" 
54 |         name = "TextTier" 
55 |         xmin = 0 
56 |         xmax = 2.3 
57 |         points: size = 0 
58 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/hard-to-parse.TextGrid:
--------------------------------------------------------------------------------
 1 | "ooTextFile"
 2 | "TextGrid"
 3 | ! A deliberate comment starts with a bang and ends on a new line
 4 | 0  2.3  ! Here is a normal comment
 5 | <exists> ! This comment has a stray number 1
 6 | 
 7 | ! Extra unquoted text is also a comment like the word "tiers" below
 8 | 4 tiers         ! This comment has a stray quote "
 9 | "IntervalTier"  "! Fake Comment"
10 |     0              2.3  ! time domain of tier 1
11 | 3 interval coming
12 | 0  1  ""  ! interval 1 on tier 1
13 | 1  2  """"""  ! interval 2 on tier 1
14 | 2  2.3  "Not a ! Comment"  ! interval 3 on tier 1
15 | "IntervalTier"  "Embedded ""String"" here"  ! type and name of tier 2
16 | I guess some negative numbers are fine and make it into Praat
17 | -0.5s  2.3
18 | 1 interval coming
19 | -0.1  2.8s  "this string
20 | has line breaks in it
21 | and a ! fake comment
22 | "
23 | ! testing comments that touch strings and numbers
24 | ! 1. "string"! throws an error in Praat so don't test that
25 | "TextTier"  "point tier 1"
26 | ! 2. [NUMBER]!comment is handled fine by Praat
27 | 0.  2.3!touching a number
28 | okay this is weird it won't see letternumber like this10
29 | but it doesn't care about 2this
30 | 0.1
31 | "point label"
32 | 
33 | ! Finally include a bracketed number
34 | points [3]:
35 |     number = 0.8000000000000003
36 |     mark = "deliberate    extra spaces"
37 | 
38 | "TextTier"  "TextTier" 0.  2.3
39 | and now here is the weird part -10 ! points gets turned to 0
40 | 
41 | 
42 | ! and now here is another weird part. this doesn't get read because we said
43 | ! there are only 4 tiers
44 | "TextTier"  "point tier 3" 0.  2.3 0
45 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # readtextgrid 0.2.0
 2 | 
 3 | * `read_textgrid()` now manually parses textgrids and can handle short-format 
 4 |   textgrids. (#4, #16, initial parser by @JoFrhwld). Part of the parsing is done
 5 |   in C++ so the new manual parser is faster than the legacy version.
 6 | * The new parser is documented in a non-package supplemental vignette called 
 7 |   "Textgrid specification" on the package website.
 8 | * Original package functions are available in `legacy_read_textgrid()`.
 9 | * `legacy_read_textgrid()` un-escapes `A ""quote"" word` to `A "quoted" word`.
10 | * `legacy_read_textgrid()` can handle interval text with line breaks in them.
11 | * New function `pivot_textgrid_tiers()` to pivot out nested textgrid 
12 |   intervals into a wide dataframe. For example, if a forced aligner has a 
13 |   `words` and `phones` tier, we can pivot the `words` and `phones` 
14 |   intervals into a dataframe with one row per `phones` intervals and with 
15 |   columns `words`, `words_xmin`, `words_xmax`, `phones`, `phones_xmin`, 
16 |   `phones_xmax`, etc. (#10, request of @stefanocoretta)
17 | * Testing suite includes a short-format textgrid, a short-format textgrid 
18 |   with inline comments, and a textgrid with escaped `"` characters. (@JoFrhwld) 
19 | * Testing suit includes an adversarial textgrid to challenge parsing.
20 | * Support ELAN-generated textgrids. (#11, @djvill)
21 | * Raised required R version to 4.3.0. (April 2023)
22 | 
23 | 
24 | # readtextgrid 0.1.2
25 | 
26 | * Add `encoding` argument to `read_textgrid()`. (#7, #8, #9, @jonorthwash)
27 | * Include example UTF-16 textgrid for testing.
28 | * Remove magrittr dependency. (#5, @JoFrhwld)
29 | 
30 | 
31 | # readtextgrid 0.1.1
32 | 
33 | * Fixes for CRAN resubmission.
34 | 
35 | 
36 | # readtextgrid 0.1.0
37 | 
38 | * Initial release.
39 | 


--------------------------------------------------------------------------------
/inst/speaker-data/speaker001/s2T04.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.6841632653061225 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.6841632653061225 
14 |         intervals: size = 4 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.441 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.441 
21 |             xmax = 0.648 
22 |             text = "get" 
23 |         intervals [3]:
24 |             xmin = 0.648 
25 |             xmax = 1.098 
26 |             text = "off" 
27 |         intervals [4]:
28 |             xmin = 1.098 
29 |             xmax = 1.6841632653061225 
30 |             text = "" 
31 |     item [2]:
32 |         class = "IntervalTier" 
33 |         name = "phones" 
34 |         xmin = 0 
35 |         xmax = 1.6841632653061225 
36 |         intervals: size = 8 
37 |         intervals [1]:
38 |             xmin = 0 
39 |             xmax = 0.441 
40 |             text = "sil" 
41 |         intervals [2]:
42 |             xmin = 0.441 
43 |             xmax = 0.522 
44 |             text = "G" 
45 |         intervals [3]:
46 |             xmin = 0.522 
47 |             xmax = 0.5940000000000001 
48 |             text = "EH1" 
49 |         intervals [4]:
50 |             xmin = 0.5940000000000001 
51 |             xmax = 0.648 
52 |             text = "T" 
53 |         intervals [5]:
54 |             xmin = 0.648 
55 |             xmax = 0.855 
56 |             text = "AO1" 
57 |         intervals [6]:
58 |             xmin = 0.855 
59 |             xmax = 1.098 
60 |             text = "F" 
61 |         intervals [7]:
62 |             xmin = 1.098 
63 |             xmax = 1.665 
64 |             text = "sp" 
65 |         intervals [8]:
66 |             xmin = 1.665 
67 |             xmax = 1.6841632653061225 
68 |             text = "" 
69 | 


--------------------------------------------------------------------------------
/inst/speaker-data/speaker002/s2T04.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.6841632653061225 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.6841632653061225 
14 |         intervals: size = 4 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.441 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.441 
21 |             xmax = 0.648 
22 |             text = "get" 
23 |         intervals [3]:
24 |             xmin = 0.648 
25 |             xmax = 1.098 
26 |             text = "off" 
27 |         intervals [4]:
28 |             xmin = 1.098 
29 |             xmax = 1.6841632653061225 
30 |             text = "" 
31 |     item [2]:
32 |         class = "IntervalTier" 
33 |         name = "phones" 
34 |         xmin = 0 
35 |         xmax = 1.6841632653061225 
36 |         intervals: size = 8 
37 |         intervals [1]:
38 |             xmin = 0 
39 |             xmax = 0.441 
40 |             text = "sil" 
41 |         intervals [2]:
42 |             xmin = 0.441 
43 |             xmax = 0.522 
44 |             text = "G" 
45 |         intervals [3]:
46 |             xmin = 0.522 
47 |             xmax = 0.5940000000000001 
48 |             text = "EH1" 
49 |         intervals [4]:
50 |             xmin = 0.5940000000000001 
51 |             xmax = 0.648 
52 |             text = "T" 
53 |         intervals [5]:
54 |             xmin = 0.648 
55 |             xmax = 0.855 
56 |             text = "AO1" 
57 |         intervals [6]:
58 |             xmin = 0.855 
59 |             xmax = 1.098 
60 |             text = "F" 
61 |         intervals [7]:
62 |             xmin = 1.098 
63 |             xmax = 1.665 
64 |             text = "sp" 
65 |         intervals [8]:
66 |             xmin = 1.665 
67 |             xmax = 1.6841632653061225 
68 |             text = "" 
69 | 


--------------------------------------------------------------------------------
/inst/speaker-data/speaker001/s2T01.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.3485714285714285 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.3485714285714285 
14 |         intervals: size = 4 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.29700000000000004 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.29700000000000004 
21 |             xmax = 0.522 
22 |             text = "bird" 
23 |         intervals [3]:
24 |             xmin = 0.522 
25 |             xmax = 0.9720000000000001 
26 |             text = "house" 
27 |         intervals [4]:
28 |             xmin = 0.9720000000000001 
29 |             xmax = 1.3485714285714285 
30 |             text = "" 
31 |     item [2]:
32 |         class = "IntervalTier" 
33 |         name = "phones" 
34 |         xmin = 0 
35 |         xmax = 1.3485714285714285 
36 |         intervals: size = 9 
37 |         intervals [1]:
38 |             xmin = 0 
39 |             xmax = 0.29700000000000004 
40 |             text = "sil" 
41 |         intervals [2]:
42 |             xmin = 0.29700000000000004 
43 |             xmax = 0.36000000000000004 
44 |             text = "B" 
45 |         intervals [3]:
46 |             xmin = 0.36000000000000004 
47 |             xmax = 0.49500000000000005 
48 |             text = "ER1" 
49 |         intervals [4]:
50 |             xmin = 0.49500000000000005 
51 |             xmax = 0.522 
52 |             text = "D" 
53 |         intervals [5]:
54 |             xmin = 0.522 
55 |             xmax = 0.621 
56 |             text = "HH" 
57 |         intervals [6]:
58 |             xmin = 0.621 
59 |             xmax = 0.783 
60 |             text = "AW1" 
61 |         intervals [7]:
62 |             xmin = 0.783 
63 |             xmax = 0.9720000000000001 
64 |             text = "S" 
65 |         intervals [8]:
66 |             xmin = 0.9720000000000001 
67 |             xmax = 1.332 
68 |             text = "sp" 
69 |         intervals [9]:
70 |             xmin = 1.332 
71 |             xmax = 1.3485714285714285 
72 |             text = "" 
73 | 


--------------------------------------------------------------------------------
/inst/speaker-data/speaker002/s2T01.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.3485714285714285 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.3485714285714285 
14 |         intervals: size = 4 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.29700000000000004 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.29700000000000004 
21 |             xmax = 0.522 
22 |             text = "bird" 
23 |         intervals [3]:
24 |             xmin = 0.522 
25 |             xmax = 0.9720000000000001 
26 |             text = "house" 
27 |         intervals [4]:
28 |             xmin = 0.9720000000000001 
29 |             xmax = 1.3485714285714285 
30 |             text = "" 
31 |     item [2]:
32 |         class = "IntervalTier" 
33 |         name = "phones" 
34 |         xmin = 0 
35 |         xmax = 1.3485714285714285 
36 |         intervals: size = 9 
37 |         intervals [1]:
38 |             xmin = 0 
39 |             xmax = 0.29700000000000004 
40 |             text = "sil" 
41 |         intervals [2]:
42 |             xmin = 0.29700000000000004 
43 |             xmax = 0.36000000000000004 
44 |             text = "B" 
45 |         intervals [3]:
46 |             xmin = 0.36000000000000004 
47 |             xmax = 0.49500000000000005 
48 |             text = "ER1" 
49 |         intervals [4]:
50 |             xmin = 0.49500000000000005 
51 |             xmax = 0.522 
52 |             text = "D" 
53 |         intervals [5]:
54 |             xmin = 0.522 
55 |             xmax = 0.621 
56 |             text = "HH" 
57 |         intervals [6]:
58 |             xmin = 0.621 
59 |             xmax = 0.783 
60 |             text = "AW1" 
61 |         intervals [7]:
62 |             xmin = 0.783 
63 |             xmax = 0.9720000000000001 
64 |             text = "S" 
65 |         intervals [8]:
66 |             xmin = 0.9720000000000001 
67 |             xmax = 1.332 
68 |             text = "sp" 
69 |         intervals [9]:
70 |             xmin = 1.332 
71 |             xmax = 1.3485714285714285 
72 |             text = "" 
73 | 


--------------------------------------------------------------------------------
/man/read_textgrid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/readtextgrid.R, R/legacy.R
 3 | \name{read_textgrid}
 4 | \alias{read_textgrid}
 5 | \alias{read_textgrid_lines}
 6 | \alias{legacy_read_textgrid}
 7 | \alias{legacy_read_textgrid_lines}
 8 | \title{Read a textgrid file into a tibble}
 9 | \usage{
10 | read_textgrid(path, file = NULL, encoding = NULL)
11 | 
12 | read_textgrid_lines(lines, file = NULL)
13 | 
14 | legacy_read_textgrid(path, file = NULL, encoding = NULL)
15 | 
16 | legacy_read_textgrid_lines(lines, file = NULL)
17 | }
18 | \arguments{
19 | \item{path}{a path to a textgrid}
20 | 
21 | \item{file}{an optional value to use for the \code{file} column. For
22 | \code{read_textgrid()}, the default is the base filename of the input file. For
23 | \code{read_textgrid_lines()}, the default is \code{NA}.}
24 | 
25 | \item{encoding}{the encoding of the textgrid. The default value \code{NULL} uses
26 | \code{\link[readr:encoding]{readr::guess_encoding()}} to guess the encoding of the textgrid. If an
27 | encoding is provided, it is forwarded to \verb{[readr::locale()]} and
28 | \verb{[readr::read_lines()]}.}
29 | 
30 | \item{lines}{alternatively, the lines of a textgrid file}
31 | }
32 | \value{
33 | a tibble with one row per textgrid annotation
34 | }
35 | \description{
36 | Read a textgrid file into a tibble
37 | }
38 | \details{
39 | The \code{legacy_read_textgrid} functions are the original textgrid
40 | parsers provided by the package. They assume that the TextGrid file is a
41 | "long" format textgrid; this is the default format used by "Save a text
42 | file..." in Praat.
43 | 
44 | The current \code{read_textgrid()} functions are more
45 | flexible and can read in "short" format textgrids and textgrids with
46 | comments.
47 | 
48 | See \url{https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html}
49 | for a description of the textgrid file format. Note that this package does
50 | not strictly adhere to format as described in this document. For example,
51 | the document says that numbers should be freestanding (surrounded by spaces
52 | or string boundaries), but Praat.exe can handle malformed numbers like
53 | \verb{100ms}. Therefore, we tried to implement a parser that matched what Praat
54 | actually handles.
55 | }
56 | \examples{
57 | tg <- system.file("Mary_John_bell.TextGrid", package = "readtextgrid")
58 | read_textgrid(tg)
59 | }
60 | 


--------------------------------------------------------------------------------
/inst/speaker-data/speaker001/s2T02.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.5918163265306122 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.5918163265306122 
14 |         intervals: size = 4 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.29700000000000004 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.29700000000000004 
21 |             xmax = 0.7020000000000001 
22 |             text = "cowboy" 
23 |         intervals [3]:
24 |             xmin = 0.7020000000000001 
25 |             xmax = 1.1700000000000002 
26 |             text = "boots" 
27 |         intervals [4]:
28 |             xmin = 1.1700000000000002 
29 |             xmax = 1.5918163265306122 
30 |             text = "" 
31 |     item [2]:
32 |         class = "IntervalTier" 
33 |         name = "phones" 
34 |         xmin = 0 
35 |         xmax = 1.5918163265306122 
36 |         intervals: size = 11 
37 |         intervals [1]:
38 |             xmin = 0 
39 |             xmax = 0.29700000000000004 
40 |             text = "sil" 
41 |         intervals [2]:
42 |             xmin = 0.29700000000000004 
43 |             xmax = 0.378 
44 |             text = "K" 
45 |         intervals [3]:
46 |             xmin = 0.378 
47 |             xmax = 0.5040000000000001 
48 |             text = "AW1" 
49 |         intervals [4]:
50 |             xmin = 0.5040000000000001 
51 |             xmax = 0.5760000000000001 
52 |             text = "B" 
53 |         intervals [5]:
54 |             xmin = 0.5760000000000001 
55 |             xmax = 0.7020000000000001 
56 |             text = "OY2" 
57 |         intervals [6]:
58 |             xmin = 0.7020000000000001 
59 |             xmax = 0.774 
60 |             text = "B" 
61 |         intervals [7]:
62 |             xmin = 0.774 
63 |             xmax = 0.918 
64 |             text = "UW1" 
65 |         intervals [8]:
66 |             xmin = 0.918 
67 |             xmax = 1.035 
68 |             text = "T" 
69 |         intervals [9]:
70 |             xmin = 1.035 
71 |             xmax = 1.1700000000000002 
72 |             text = "S" 
73 |         intervals [10]:
74 |             xmin = 1.1700000000000002 
75 |             xmax = 1.575 
76 |             text = "sp" 
77 |         intervals [11]:
78 |             xmin = 1.575 
79 |             xmax = 1.5918163265306122 
80 |             text = "" 
81 | 


--------------------------------------------------------------------------------
/inst/speaker-data/speaker002/s2T02.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.5918163265306122 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.5918163265306122 
14 |         intervals: size = 4 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.29700000000000004 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.29700000000000004 
21 |             xmax = 0.7020000000000001 
22 |             text = "cowboy" 
23 |         intervals [3]:
24 |             xmin = 0.7020000000000001 
25 |             xmax = 1.1700000000000002 
26 |             text = "boots" 
27 |         intervals [4]:
28 |             xmin = 1.1700000000000002 
29 |             xmax = 1.5918163265306122 
30 |             text = "" 
31 |     item [2]:
32 |         class = "IntervalTier" 
33 |         name = "phones" 
34 |         xmin = 0 
35 |         xmax = 1.5918163265306122 
36 |         intervals: size = 11 
37 |         intervals [1]:
38 |             xmin = 0 
39 |             xmax = 0.29700000000000004 
40 |             text = "sil" 
41 |         intervals [2]:
42 |             xmin = 0.29700000000000004 
43 |             xmax = 0.378 
44 |             text = "K" 
45 |         intervals [3]:
46 |             xmin = 0.378 
47 |             xmax = 0.5040000000000001 
48 |             text = "AW1" 
49 |         intervals [4]:
50 |             xmin = 0.5040000000000001 
51 |             xmax = 0.5760000000000001 
52 |             text = "B" 
53 |         intervals [5]:
54 |             xmin = 0.5760000000000001 
55 |             xmax = 0.7020000000000001 
56 |             text = "OY2" 
57 |         intervals [6]:
58 |             xmin = 0.7020000000000001 
59 |             xmax = 0.774 
60 |             text = "B" 
61 |         intervals [7]:
62 |             xmin = 0.774 
63 |             xmax = 0.918 
64 |             text = "UW1" 
65 |         intervals [8]:
66 |             xmin = 0.918 
67 |             xmax = 1.035 
68 |             text = "T" 
69 |         intervals [9]:
70 |             xmin = 1.035 
71 |             xmax = 1.1700000000000002 
72 |             text = "S" 
73 |         intervals [10]:
74 |             xmin = 1.1700000000000002 
75 |             xmax = 1.575 
76 |             text = "sp" 
77 |         intervals [11]:
78 |             xmin = 1.575 
79 |             xmax = 1.5918163265306122 
80 |             text = "" 
81 | 


--------------------------------------------------------------------------------
/inst/speaker-data/speaker001/s2T03.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.7316530612244898 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.7316530612244898 
14 |         intervals: size = 5 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.369 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.369 
21 |             xmax = 0.657 
22 |             text = "hug" 
23 |         intervals [3]:
24 |             xmin = 0.657 
25 |             xmax = 0.6930000000000001 
26 |             text = "" 
27 |         intervals [4]:
28 |             xmin = 0.6930000000000001 
29 |             xmax = 1.1520000000000001 
30 |             text = "daddy" 
31 |         intervals [5]:
32 |             xmin = 1.1520000000000001 
33 |             xmax = 1.7316530612244898 
34 |             text = "" 
35 |     item [2]:
36 |         class = "IntervalTier" 
37 |         name = "phones" 
38 |         xmin = 0 
39 |         xmax = 1.7316530612244898 
40 |         intervals: size = 11 
41 |         intervals [1]:
42 |             xmin = 0 
43 |             xmax = 0.369 
44 |             text = "sil" 
45 |         intervals [2]:
46 |             xmin = 0.369 
47 |             xmax = 0.45 
48 |             text = "HH" 
49 |         intervals [3]:
50 |             xmin = 0.45 
51 |             xmax = 0.5850000000000001 
52 |             text = "AH1" 
53 |         intervals [4]:
54 |             xmin = 0.5850000000000001 
55 |             xmax = 0.657 
56 |             text = "G" 
57 |         intervals [5]:
58 |             xmin = 0.657 
59 |             xmax = 0.6930000000000001 
60 |             text = "sp" 
61 |         intervals [6]:
62 |             xmin = 0.6930000000000001 
63 |             xmax = 0.774 
64 |             text = "D" 
65 |         intervals [7]:
66 |             xmin = 0.774 
67 |             xmax = 0.927 
68 |             text = "AE1" 
69 |         intervals [8]:
70 |             xmin = 0.927 
71 |             xmax = 0.9720000000000001 
72 |             text = "D" 
73 |         intervals [9]:
74 |             xmin = 0.9720000000000001 
75 |             xmax = 1.1520000000000001 
76 |             text = "IY0" 
77 |         intervals [10]:
78 |             xmin = 1.1520000000000001 
79 |             xmax = 1.71 
80 |             text = "sp" 
81 |         intervals [11]:
82 |             xmin = 1.71 
83 |             xmax = 1.7316530612244898 
84 |             text = "" 
85 | 


--------------------------------------------------------------------------------
/inst/speaker-data/speaker002/s2T03.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.7316530612244898 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.7316530612244898 
14 |         intervals: size = 5 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.369 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.369 
21 |             xmax = 0.657 
22 |             text = "hug" 
23 |         intervals [3]:
24 |             xmin = 0.657 
25 |             xmax = 0.6930000000000001 
26 |             text = "" 
27 |         intervals [4]:
28 |             xmin = 0.6930000000000001 
29 |             xmax = 1.1520000000000001 
30 |             text = "daddy" 
31 |         intervals [5]:
32 |             xmin = 1.1520000000000001 
33 |             xmax = 1.7316530612244898 
34 |             text = "" 
35 |     item [2]:
36 |         class = "IntervalTier" 
37 |         name = "phones" 
38 |         xmin = 0 
39 |         xmax = 1.7316530612244898 
40 |         intervals: size = 11 
41 |         intervals [1]:
42 |             xmin = 0 
43 |             xmax = 0.369 
44 |             text = "sil" 
45 |         intervals [2]:
46 |             xmin = 0.369 
47 |             xmax = 0.45 
48 |             text = "HH" 
49 |         intervals [3]:
50 |             xmin = 0.45 
51 |             xmax = 0.5850000000000001 
52 |             text = "AH1" 
53 |         intervals [4]:
54 |             xmin = 0.5850000000000001 
55 |             xmax = 0.657 
56 |             text = "G" 
57 |         intervals [5]:
58 |             xmin = 0.657 
59 |             xmax = 0.6930000000000001 
60 |             text = "sp" 
61 |         intervals [6]:
62 |             xmin = 0.6930000000000001 
63 |             xmax = 0.774 
64 |             text = "D" 
65 |         intervals [7]:
66 |             xmin = 0.774 
67 |             xmax = 0.927 
68 |             text = "AE1" 
69 |         intervals [8]:
70 |             xmin = 0.927 
71 |             xmax = 0.9720000000000001 
72 |             text = "D" 
73 |         intervals [9]:
74 |             xmin = 0.9720000000000001 
75 |             xmax = 1.1520000000000001 
76 |             text = "IY0" 
77 |         intervals [10]:
78 |             xmin = 1.1520000000000001 
79 |             xmax = 1.71 
80 |             text = "sp" 
81 |         intervals [11]:
82 |             xmin = 1.71 
83 |             xmax = 1.7316530612244898 
84 |             text = "" 
85 | 


--------------------------------------------------------------------------------
/inst/make-logo.R:
--------------------------------------------------------------------------------
  1 | # install.packages("hexSticker")
  2 | library(hexSticker)
  3 | library(ggplot2)
  4 | 
  5 | # dir.create("man/figures")
  6 | 
  7 | df <- tibble::tibble(
  8 |   row = 2,
  9 |   x = .5,
 10 |   label = "read"
 11 | )
 12 | 
 13 | df2 <- tibble::tibble(
 14 |   row = 1,
 15 |   x = seq(0, 1, length.out = 8),
 16 |   label = c("t", "e", "x", "t", "g", "r", "i", "d")
 17 | )
 18 | 
 19 | lines <- purrr::map2_dbl(
 20 |   df2$x[1:7],
 21 |   df2$x[2:8],
 22 |   function(x, y) median(c(x, y))
 23 | )
 24 | 
 25 | # # from actual praat
 26 | # yellow <- "#FADF28"
 27 | # red <- "#DE0805"
 28 | # blue <- "#0000D3"
 29 | 
 30 | text <- "#404e4d"
 31 | line <- "#747e7d"
 32 | yellow <- "#fde74c"
 33 | red <- "#c3423f"
 34 | blue <- "#4D85BD"
 35 | 
 36 | grid_min <- -.143 / 2
 37 | grid_max <- 1 + .143 / 2
 38 | text_size <- 15
 39 | 
 40 | p <- ggplot(df) +
 41 |   aes(x = x, y = row) +
 42 |   geom_segment(
 43 |     aes(x = x, y = y, xend = xend, yend = yend),
 44 |     data = tibble::tibble(
 45 |       x = grid_min,
 46 |       xend = grid_max,
 47 |       y = c(.5, 1.5, 2.5),
 48 |       yend = y
 49 |     ),
 50 |     color = line
 51 |   ) +
 52 |   geom_segment(
 53 |     aes(x = x, y = y, xend = xend, yend = yend),
 54 |     data = tibble::tibble(
 55 |       x = c(grid_min, grid_max),
 56 |       xend = x,
 57 |       y = .5,
 58 |       yend = 2.5
 59 |     ),
 60 |     size = 2,
 61 |     lineend = "round",
 62 |     color = blue
 63 |   ) +
 64 |   geom_ribbon(
 65 |     aes(x = x, ymax = ymax, ymin = ymin),
 66 |     data = tibble::tibble(
 67 |       x = lines[5:6],
 68 |       ymin = .5,
 69 |       ymax = 1.5,
 70 |       row = .5
 71 |     ),
 72 |     fill = yellow
 73 |   ) +
 74 |   geom_segment(
 75 |     aes(x = x, y = y, xend = xend, yend = yend),
 76 |     data = tibble::tibble(
 77 |       x = lines[-5],
 78 |       xend = x,
 79 |       y = .5,
 80 |       yend = 1.5
 81 |     ),
 82 |     size = 2,
 83 |     lineend = "round",
 84 |     color = blue
 85 |   ) +
 86 |   geom_segment(
 87 |     aes(x = x, y = y, xend = xend, yend = yend),
 88 |     data = tibble::tibble(
 89 |       x = lines[5],
 90 |       xend = x,
 91 |       y = .5,
 92 |       yend = 1.5
 93 |     ),
 94 |     size = 2,
 95 |     lineend = "round",
 96 |     color = red
 97 |   ) +
 98 |   geom_text(aes(label = label), size = text_size, color = text) +
 99 |   geom_text(
100 |     aes(label = label),
101 |     data = df2[-6, ],
102 |     size = text_size,
103 |     color = text
104 |   ) +
105 |   geom_text(
106 |     aes(label = label),
107 |     data = df2[6, ],
108 |     size = text_size,
109 |     color = red
110 |   ) +
111 |   theme_void() +
112 |   theme_transparent()
113 | 
114 | sticker(
115 |   p,
116 |   package = "",
117 |   s_x = 1,
118 |   s_y = 1,
119 |   s_width = 1.65,
120 |   s_height = 1,
121 |   filename = "man/figures/logo.png",
122 |   h_fill = "white",
123 |   h_color = red
124 | )
125 | 
126 | system2("open", "man/figures/logo.png")
127 | 


--------------------------------------------------------------------------------
/inst/speaker-data/speaker001/s2T05.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.6920816326530612 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.6920816326530612 
14 |         intervals: size = 4 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.333 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.333 
21 |             xmax = 0.7200000000000001 
22 |             text = "animal" 
23 |         intervals [3]:
24 |             xmin = 0.7200000000000001 
25 |             xmax = 1.332 
26 |             text = "crackers" 
27 |         intervals [4]:
28 |             xmin = 1.332 
29 |             xmax = 1.6920816326530612 
30 |             text = "" 
31 |     item [2]:
32 |         class = "IntervalTier" 
33 |         name = "phones" 
34 |         xmin = 0 
35 |         xmax = 1.6920816326530612 
36 |         intervals: size = 15 
37 |         intervals [1]:
38 |             xmin = 0 
39 |             xmax = 0.333 
40 |             text = "sil" 
41 |         intervals [2]:
42 |             xmin = 0.333 
43 |             xmax = 0.459 
44 |             text = "AE1" 
45 |         intervals [3]:
46 |             xmin = 0.459 
47 |             xmax = 0.531 
48 |             text = "N" 
49 |         intervals [4]:
50 |             xmin = 0.531 
51 |             xmax = 0.558 
52 |             text = "AH0" 
53 |         intervals [5]:
54 |             xmin = 0.558 
55 |             xmax = 0.6030000000000001 
56 |             text = "M" 
57 |         intervals [6]:
58 |             xmin = 0.6030000000000001 
59 |             xmax = 0.639 
60 |             text = "AH0" 
61 |         intervals [7]:
62 |             xmin = 0.639 
63 |             xmax = 0.7200000000000001 
64 |             text = "L" 
65 |         intervals [8]:
66 |             xmin = 0.7200000000000001 
67 |             xmax = 0.792 
68 |             text = "K" 
69 |         intervals [9]:
70 |             xmin = 0.792 
71 |             xmax = 0.846 
72 |             text = "R" 
73 |         intervals [10]:
74 |             xmin = 0.846 
75 |             xmax = 0.9540000000000001 
76 |             text = "AE1" 
77 |         intervals [11]:
78 |             xmin = 0.9540000000000001 
79 |             xmax = 1.062 
80 |             text = "K" 
81 |         intervals [12]:
82 |             xmin = 1.062 
83 |             xmax = 1.1880000000000002 
84 |             text = "ER0" 
85 |         intervals [13]:
86 |             xmin = 1.1880000000000002 
87 |             xmax = 1.332 
88 |             text = "Z" 
89 |         intervals [14]:
90 |             xmin = 1.332 
91 |             xmax = 1.6740000000000002 
92 |             text = "sp" 
93 |         intervals [15]:
94 |             xmin = 1.6740000000000002 
95 |             xmax = 1.6920816326530612 
96 |             text = "" 
97 | 


--------------------------------------------------------------------------------
/inst/speaker-data/speaker002/s2T05.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.6920816326530612 
 6 | tiers? <exists> 
 7 | size = 2 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.6920816326530612 
14 |         intervals: size = 4 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.333 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.333 
21 |             xmax = 0.7200000000000001 
22 |             text = "animal" 
23 |         intervals [3]:
24 |             xmin = 0.7200000000000001 
25 |             xmax = 1.332 
26 |             text = "crackers" 
27 |         intervals [4]:
28 |             xmin = 1.332 
29 |             xmax = 1.6920816326530612 
30 |             text = "" 
31 |     item [2]:
32 |         class = "IntervalTier" 
33 |         name = "phones" 
34 |         xmin = 0 
35 |         xmax = 1.6920816326530612 
36 |         intervals: size = 15 
37 |         intervals [1]:
38 |             xmin = 0 
39 |             xmax = 0.333 
40 |             text = "sil" 
41 |         intervals [2]:
42 |             xmin = 0.333 
43 |             xmax = 0.459 
44 |             text = "AE1" 
45 |         intervals [3]:
46 |             xmin = 0.459 
47 |             xmax = 0.531 
48 |             text = "N" 
49 |         intervals [4]:
50 |             xmin = 0.531 
51 |             xmax = 0.558 
52 |             text = "AH0" 
53 |         intervals [5]:
54 |             xmin = 0.558 
55 |             xmax = 0.6030000000000001 
56 |             text = "M" 
57 |         intervals [6]:
58 |             xmin = 0.6030000000000001 
59 |             xmax = 0.639 
60 |             text = "AH0" 
61 |         intervals [7]:
62 |             xmin = 0.639 
63 |             xmax = 0.7200000000000001 
64 |             text = "L" 
65 |         intervals [8]:
66 |             xmin = 0.7200000000000001 
67 |             xmax = 0.792 
68 |             text = "K" 
69 |         intervals [9]:
70 |             xmin = 0.792 
71 |             xmax = 0.846 
72 |             text = "R" 
73 |         intervals [10]:
74 |             xmin = 0.846 
75 |             xmax = 0.9540000000000001 
76 |             text = "AE1" 
77 |         intervals [11]:
78 |             xmin = 0.9540000000000001 
79 |             xmax = 1.062 
80 |             text = "K" 
81 |         intervals [12]:
82 |             xmin = 1.062 
83 |             xmax = 1.1880000000000002 
84 |             text = "ER0" 
85 |         intervals [13]:
86 |             xmin = 1.1880000000000002 
87 |             xmax = 1.332 
88 |             text = "Z" 
89 |         intervals [14]:
90 |             xmin = 1.332 
91 |             xmax = 1.6740000000000002 
92 |             text = "sp" 
93 |         intervals [15]:
94 |             xmin = 1.6740000000000002 
95 |             xmax = 1.6920816326530612 
96 |             text = "" 
97 | 


--------------------------------------------------------------------------------
/inst/nested-intervals.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.8634920634920635 
 6 | tiers? <exists> 
 7 | size = 3 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.8634920634920635 
14 |         intervals: size = 5 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.4193533480814183 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.4193533480814183 
21 |             xmax = 0.7609484705244939 
22 |             text = "hug" 
23 |         intervals [3]:
24 |             xmin = 0.7609484705244939 
25 |             xmax = 0.8544577498524867 
26 |             text = "" 
27 |         intervals [4]:
28 |             xmin = 0.8544577498524867 
29 |             xmax = 1.4422303627712987 
30 |             text = "daddy" 
31 |         intervals [5]:
32 |             xmin = 1.4422303627712987 
33 |             xmax = 1.8634920634920635 
34 |             text = "" 
35 |     item [2]:
36 |         class = "IntervalTier" 
37 |         name = "phones" 
38 |         xmin = 0 
39 |         xmax = 1.8634920634920635 
40 |         intervals: size = 11 
41 |         intervals [1]:
42 |             xmin = 0 
43 |             xmax = 0.4193533480814183 
44 |             text = "sil" 
45 |         intervals [2]:
46 |             xmin = 0.4193533480814183 
47 |             xmax = 0.5243127432454918 
48 |             text = "HH" 
49 |         intervals [3]:
50 |             xmin = 0.5243127432454918 
51 |             xmax = 0.6369055489669525 
52 |             text = "AH1" 
53 |         intervals [4]:
54 |             xmin = 0.6369055489669525 
55 |             xmax = 0.7609484705244939 
56 |             text = "G" 
57 |         intervals [5]:
58 |             xmin = 0.7609484705244939 
59 |             xmax = 0.8544577498524867 
60 |             text = "sp" 
61 |         intervals [6]:
62 |             xmin = 0.8544577498524867 
63 |             xmax = 1.052926424344553 
64 |             text = "D" 
65 |         intervals [7]:
66 |             xmin = 1.052926424344553 
67 |             xmax = 1.2304032198038046 
68 |             text = "AE1" 
69 |         intervals [8]:
70 |             xmin = 1.2304032198038046 
71 |             xmax = 1.3220041464924508 
72 |             text = "D" 
73 |         intervals [9]:
74 |             xmin = 1.3220041464924508 
75 |             xmax = 1.4422303627712987 
76 |             text = "IY0" 
77 |         intervals [10]:
78 |             xmin = 1.4422303627712987 
79 |             xmax = 1.785733837853721 
80 |             text = "sp" 
81 |         intervals [11]:
82 |             xmin = 1.785733837853721 
83 |             xmax = 1.8634920634920635 
84 |             text = "" 
85 |     item [3]:
86 |         class = "IntervalTier" 
87 |         name = "utterance" 
88 |         xmin = 0 
89 |         xmax = 1.8634920634920635 
90 |         intervals: size = 1 
91 |         intervals [1]:
92 |             xmin = 0 
93 |             xmax = 1.8634920634920635 
94 |             text = "hug daddy" 
95 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data/nested-intervals.TextGrid:
--------------------------------------------------------------------------------
 1 | File type = "ooTextFile"
 2 | Object class = "TextGrid"
 3 | 
 4 | xmin = 0 
 5 | xmax = 1.8634920634920635 
 6 | tiers? <exists> 
 7 | size = 3 
 8 | item []: 
 9 |     item [1]:
10 |         class = "IntervalTier" 
11 |         name = "words" 
12 |         xmin = 0 
13 |         xmax = 1.8634920634920635 
14 |         intervals: size = 5 
15 |         intervals [1]:
16 |             xmin = 0 
17 |             xmax = 0.4193533480814183 
18 |             text = "" 
19 |         intervals [2]:
20 |             xmin = 0.4193533480814183 
21 |             xmax = 0.7609484705244939 
22 |             text = "hug" 
23 |         intervals [3]:
24 |             xmin = 0.7609484705244939 
25 |             xmax = 0.8544577498524867 
26 |             text = "" 
27 |         intervals [4]:
28 |             xmin = 0.8544577498524867 
29 |             xmax = 1.4422303627712987 
30 |             text = "daddy" 
31 |         intervals [5]:
32 |             xmin = 1.4422303627712987 
33 |             xmax = 1.8634920634920635 
34 |             text = "" 
35 |     item [2]:
36 |         class = "IntervalTier" 
37 |         name = "phones" 
38 |         xmin = 0 
39 |         xmax = 1.8634920634920635 
40 |         intervals: size = 11 
41 |         intervals [1]:
42 |             xmin = 0 
43 |             xmax = 0.4193533480814183 
44 |             text = "sil" 
45 |         intervals [2]:
46 |             xmin = 0.4193533480814183 
47 |             xmax = 0.5243127432454918 
48 |             text = "HH" 
49 |         intervals [3]:
50 |             xmin = 0.5243127432454918 
51 |             xmax = 0.6369055489669525 
52 |             text = "AH1" 
53 |         intervals [4]:
54 |             xmin = 0.6369055489669525 
55 |             xmax = 0.7609484705244939 
56 |             text = "G" 
57 |         intervals [5]:
58 |             xmin = 0.7609484705244939 
59 |             xmax = 0.8544577498524867 
60 |             text = "sp" 
61 |         intervals [6]:
62 |             xmin = 0.8544577498524867 
63 |             xmax = 1.052926424344553 
64 |             text = "D" 
65 |         intervals [7]:
66 |             xmin = 1.052926424344553 
67 |             xmax = 1.2304032198038046 
68 |             text = "AE1" 
69 |         intervals [8]:
70 |             xmin = 1.2304032198038046 
71 |             xmax = 1.3220041464924508 
72 |             text = "D" 
73 |         intervals [9]:
74 |             xmin = 1.3220041464924508 
75 |             xmax = 1.4422303627712987 
76 |             text = "IY0" 
77 |         intervals [10]:
78 |             xmin = 1.4422303627712987 
79 |             xmax = 1.785733837853721 
80 |             text = "sp" 
81 |         intervals [11]:
82 |             xmin = 1.785733837853721 
83 |             xmax = 1.8634920634920635 
84 |             text = "" 
85 |     item [3]:
86 |         class = "IntervalTier" 
87 |         name = "utterance" 
88 |         xmin = 0 
89 |         xmax = 1.8634920634920635 
90 |         intervals: size = 1 
91 |         intervals [1]:
92 |             xmin = 0 
93 |             xmax = 1.8634920634920635 
94 |             text = "hug daddy" 
95 | 


--------------------------------------------------------------------------------
/man/pivot_textgrid_tiers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pivot.R
 3 | \name{pivot_textgrid_tiers}
 4 | \alias{pivot_textgrid_tiers}
 5 | \title{Pivot a textgrid into wide format, respecting nested tiers}
 6 | \usage{
 7 | pivot_textgrid_tiers(data, tiers, join_cols = "file")
 8 | }
 9 | \arguments{
10 | \item{data}{a textgrid dataframe created with \code{\link[=read_textgrid]{read_textgrid()}}}
11 | 
12 | \item{tiers}{character vector of tiers to pivot into wide format. When
13 | \code{tiers} has more than 1 element, the tiers are treated as nested. For
14 | example, if \code{tiers} is \code{c("utterance", "word", "phone")}, where
15 | \code{"utterance"} intervals contain \code{"word"} intervals which in turn contain
16 | \code{"phone"} intervals, the output will have one row per \code{"phone"} interval
17 | and include \verb{utterance_*} and \verb{word_*} columns for the utterance and word
18 | intervals that contain each phone interval. \code{tiers} should be ordered from
19 | broadest to narrowest (e.g, \code{"word"} preceding \code{"phone"}).}
20 | 
21 | \item{join_cols}{character vector of the columns that will uniquely identify
22 | a textgrid file. Defaults to \code{"file"} because
23 | these columns have identical values for tiers read from the same textgrid
24 | file.}
25 | }
26 | \value{
27 | a dataframe with just the intervals from tiers named in \code{tiers}
28 | converted into a wide format. Columns are renamed so that the \code{text} column
29 | is pivot into columns named after the tier names. For example, the \code{text}
30 | column in a \code{words} tier is renamed to \code{words}. The \code{xmax}, \code{xmin},
31 | \code{annotation_num}, \code{tier_num}, \code{tier_type} are also prefixed with the tier
32 | name. For example, the \code{xmax} column in a \code{words} tier is renamed to
33 | \code{words_xmax}. An additional helper column \code{xmid} is added and prefixed
34 | appropriately. See examples below.
35 | }
36 | \description{
37 | Pivot a textgrid into wide format, respecting nested tiers
38 | }
39 | \details{
40 | For the joining nested intervals, two intervals \emph{a} and \emph{b} are combined into
41 | the same row if they match on the values in the \code{join_cols} columns and if
42 | the \code{a$xmin <= b$xmid} and \code{b$xmid <= a$xmax}. That is, if the midpoint of
43 | \emph{b} is contained inside the interval \emph{a}.
44 | }
45 | \examples{
46 | data <- example_textgrid(3) |>
47 |   read_textgrid()
48 | data
49 | 
50 | # With a single tier, we get just that tier with the columns prefixed with
51 | # the tier_name
52 | pivot_textgrid_tiers(data, "utterance")
53 | pivot_textgrid_tiers(data, "words")
54 | 
55 | # With multiple tiers, intervals in one tier that contain intervals in
56 | # another tier are combined into the same row.
57 | a <- pivot_textgrid_tiers(data, c("utterance", "words"))
58 | cols <- c(
59 |   "utterance", "utterance_xmin", "utterance_xmax",
60 |   "words", "words_xmin", "words_xmax"
61 | )
62 | a[cols]
63 | 
64 | a <- pivot_textgrid_tiers(data, c("utterance", "words", "phones"))
65 | cols <- c(cols, "phones", "phones_xmin", "phones_xmax")
66 | a[cols]
67 | }
68 | 


--------------------------------------------------------------------------------
/R/pivot.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Pivot a textgrid into wide format, respecting nested tiers
  3 | #'
  4 | #' @param data a textgrid dataframe created with [read_textgrid()]
  5 | #' @param tiers character vector of tiers to pivot into wide format. When
  6 | #'   `tiers` has more than 1 element, the tiers are treated as nested. For
  7 | #'   example, if `tiers` is `c("utterance", "word", "phone")`, where
  8 | #'   `"utterance"` intervals contain `"word"` intervals which in turn contain
  9 | #'   `"phone"` intervals, the output will have one row per `"phone"` interval
 10 | #'   and include `utterance_*` and `word_*` columns for the utterance and word
 11 | #'   intervals that contain each phone interval. `tiers` should be ordered from
 12 | #'   broadest to narrowest (e.g, `"word"` preceding `"phone"`).
 13 | #' @param join_cols character vector of the columns that will uniquely identify
 14 | #'   a textgrid file. Defaults to `"file"` because
 15 | #'   these columns have identical values for tiers read from the same textgrid
 16 | #'   file.
 17 | #' @return a dataframe with just the intervals from tiers named in `tiers`
 18 | #'   converted into a wide format. Columns are renamed so that the `text` column
 19 | #'   is pivot into columns named after the tier names. For example, the `text`
 20 | #'   column in a `words` tier is renamed to `words`. The `xmax`, `xmin`,
 21 | #'   `annotation_num`, `tier_num`, `tier_type` are also prefixed with the tier
 22 | #'   name. For example, the `xmax` column in a `words` tier is renamed to
 23 | #'   `words_xmax`. An additional helper column `xmid` is added and prefixed
 24 | #'   appropriately. See examples below.
 25 | #' @export
 26 | #'
 27 | #' @details
 28 | #' For the joining nested intervals, two intervals *a* and *b* are combined into
 29 | #' the same row if they match on the values in the `join_cols` columns and if
 30 | #' the `a$xmin <= b$xmid` and `b$xmid <= a$xmax`. That is, if the midpoint of
 31 | #' *b* is contained inside the interval *a*.
 32 | #'
 33 | #'
 34 | #' @examples
 35 | #' data <- example_textgrid(3) |>
 36 | #'   read_textgrid()
 37 | #' data
 38 | #'
 39 | #' # With a single tier, we get just that tier with the columns prefixed with
 40 | #' # the tier_name
 41 | #' pivot_textgrid_tiers(data, "utterance")
 42 | #' pivot_textgrid_tiers(data, "words")
 43 | #'
 44 | #' # With multiple tiers, intervals in one tier that contain intervals in
 45 | #' # another tier are combined into the same row.
 46 | #' a <- pivot_textgrid_tiers(data, c("utterance", "words"))
 47 | #' cols <- c(
 48 | #'   "utterance", "utterance_xmin", "utterance_xmax",
 49 | #'   "words", "words_xmin", "words_xmax"
 50 | #' )
 51 | #' a[cols]
 52 | #'
 53 | #' a <- pivot_textgrid_tiers(data, c("utterance", "words", "phones"))
 54 | #' cols <- c(cols, "phones", "phones_xmin", "phones_xmax")
 55 | #' a[cols]
 56 | pivot_textgrid_tiers <- function(
 57 |     data,
 58 |     tiers,
 59 |     join_cols = "file"
 60 | ) {
 61 |   stopifnot(
 62 |     `tier names must be used in textgrid` =
 63 |       all(tiers %in% unique(data[["tier_name"]]))
 64 |   )
 65 | 
 66 |   # TODO:
 67 |   # allow only point tiers "TextTier" at last point in nesting
 68 | 
 69 |   tiers <- unique(tiers)
 70 |   data <- data[data[["tier_name"]] %in% tiers, ]
 71 | 
 72 |   join_cols <- join_cols |>
 73 |     c("tier_xmin", "tier_xmax") |>
 74 |     unique()
 75 | 
 76 |   f <- function(x, y) left_join_nested_tiers(x, y, join_cols)
 77 | 
 78 |   l <- data |>
 79 |     split(~tier_name) |>
 80 |     _[tiers] |>
 81 |     lapply(pivot_single_tier, join_cols) |>
 82 |     Reduce(f, x = _)
 83 | 
 84 |   l[["tier_name"]] <- NULL
 85 |   l
 86 | }
 87 | 
 88 | pivot_single_tier <- function(data, join_cols) {
 89 |   tier_name <- unique(data[["tier_name"]])
 90 |   stopifnot(length(tier_name) == 1)
 91 | 
 92 |   data[["xmid"]] <- data[["xmin"]] + (data[["xmax"]] - data[["xmin"]]) / 2
 93 | 
 94 |   names_end <- c("tier_name", "tier_xmin", "tier_xmax")
 95 |   names_front <- setdiff(join_cols, names_end)
 96 |   names_mid <- c(
 97 |     "xmin", "xmax", "xmid",
 98 |     "annotation_num", "tier_num", "tier_type"
 99 |   )
100 |   name_ordering <- c(names_front, "text", names_mid, names_end)
101 |   names_new <- c(
102 |     names_front, tier_name,
103 |     paste0(tier_name, "_", names_mid),
104 |     names_end
105 |   )
106 | 
107 |   data <- data[name_ordering] |>
108 |     stats::setNames(names_new)
109 | 
110 |   data
111 | }
112 | 
113 | # For the dplyr::join_by() syntax
114 | utils::globalVariables(c("x", "y"))
115 | 
116 | left_join_nested_tiers <- function(data_parent, data_child, join_cols) {
117 |   x_names <- data_parent[["tier_name"]][1] |>
118 |     paste0("_", c("xmin", "xmax"))
119 | 
120 |   y_names <- data_child[["tier_name"]][1] |>
121 |     paste0("_", "xmid")
122 | 
123 |   e <- rlang::expr(
124 |     dplyr::between(
125 |       `$`(y, !! y_names),
126 |       `$`(x, !! x_names[1]),
127 |       `$`(x, !! x_names[2])
128 |     )
129 |   )
130 | 
131 |   data_parent[["tier_name"]] <- NULL
132 | 
133 |   dplyr::left_join(
134 |     data_parent,
135 |     data_child,
136 |     dplyr::join_by(!!! join_cols, !! e),
137 |     relationship = "one-to-many"
138 |   )
139 | }
140 | 


--------------------------------------------------------------------------------
/src/code.cpp:
--------------------------------------------------------------------------------
  1 | #include "cpp11.hpp"
  2 | using namespace cpp11;
  3 | #include <cstdlib>   // for std::strtod
  4 | 
  5 | // Generated by asking ChatGPT to translate an R-based, no-AI version
  6 | // into cpp11. I reviewed the generated code, clarified the meaning of some
  7 | // lines with AI, and made code comment notes.
  8 | 
  9 | // early declaration to satisfy RStudio warning
 10 | list cpp_parse_praat_numbers(strings x);
 11 | 
 12 | [[cpp11::register]]
 13 | list cpp_tg_scan_tokens(std::string src) {
 14 |   // final space required
 15 |   if (src.empty() || src.back() != 0x20) src.push_back(0x20);
 16 | 
 17 |   const size_t nbytes = src.size();
 18 | 
 19 |   writable::strings tokens;
 20 |   writable::logicals tokens_is_string;
 21 | 
 22 |   bool in_comment = false;
 23 |   bool in_string  = false;
 24 |   // are we in a "" sequence (escaped quotes)?
 25 |   bool esc_next   = false;
 26 | 
 27 |   bool   have_token = false;
 28 |   size_t tok_start_byte = 0;
 29 | 
 30 |   size_t prev_char_byte = 0;
 31 |   size_t curr_char_byte = 0;
 32 | 
 33 |   // Is this a UTF-8 continuation byte? (10xxxxxx)
 34 |   auto is_cont = [](unsigned char b)->bool {
 35 |     // Are the first two bits 10?
 36 |     return (b & 0xC0) == 0x80;
 37 |   };
 38 |   // Is this an ASCII whitespace?
 39 |   auto is_ws   = [](unsigned char b)->bool {
 40 |     // space, tab, CR, LF
 41 |     return b == 0x20 || b == 0x09 || b == 0x0D || b == 0x0A;
 42 |   };
 43 | 
 44 |   for (size_t i = 0; i < nbytes; ++i) {
 45 |     // char might be signed on some systems so make sure we have
 46 |     // a simple unsigned char
 47 |     unsigned char b = static_cast<unsigned char>(src[i]);
 48 |     if (is_cont(b)) continue;
 49 | 
 50 |     prev_char_byte = curr_char_byte;
 51 |     curr_char_byte = i;
 52 | 
 53 |     if (in_comment) {
 54 |       if (b == 0x0A) in_comment = false;
 55 |       continue;
 56 |     }
 57 |     if (!in_string && b == 0x21) { // '!'
 58 |       in_comment = true;
 59 |       continue;
 60 |     }
 61 |     if (esc_next) { esc_next = false; continue; }
 62 | 
 63 |     if (!in_string && is_ws(b)) {
 64 |       if (have_token) {
 65 |         size_t start = tok_start_byte;
 66 |         size_t end   = (curr_char_byte == 0 ? 0 : prev_char_byte);
 67 |         size_t len   = (end >= start) ? (end - start + 1) : 0;
 68 |         if (len > 0) {
 69 |           // do we have a string (start and end with ")
 70 |           bool q = (static_cast<unsigned char>(src[start]) == 0x22) &&
 71 |             (static_cast<unsigned char>(src[end])   == 0x22);
 72 |           tokens.push_back(src.substr(start, len));
 73 |           tokens_is_string.push_back(q);
 74 |         }
 75 |         have_token = false;
 76 |       }
 77 |       continue;
 78 |     }
 79 | 
 80 |     if (b == 0x22) { // '"'
 81 |       // peek ahead to see if we have a double "" escapement
 82 |       size_t j = i + 1;
 83 |       // We need the next character, not just the next byte, so we skip
 84 |       // continuation characters.
 85 |       while (j < nbytes && is_cont(static_cast<unsigned char>(src[j]))) ++j;
 86 |       // Use `0x00` dummy character if we are at then end of the string
 87 |       unsigned char nextb = (j < nbytes) ? static_cast<unsigned char>(src[j]) : 0x00;
 88 | 
 89 |       if (in_string && nextb == 0x22) {
 90 |         esc_next = true;    // consume next '"' once
 91 |       } else {
 92 |         in_string = !in_string;
 93 |       }
 94 |     }
 95 | 
 96 |     if (!have_token) {
 97 |       have_token = true;
 98 |       tok_start_byte = curr_char_byte;
 99 |     }
100 |   }
101 | 
102 |   list number_data = cpp_parse_praat_numbers(tokens);
103 |   writable::list out(4);
104 |   out[0] = tokens;                         // strings
105 |   out[1] = tokens_is_string;               // writable::logicals
106 |   out[2] = number_data[0];                 // prefix_len (integers)
107 |   out[3] = number_data[1];                 // value (doubles)
108 |   out.attr("names") = writable::strings({"tokens", "is_string", "num_prefix", "num_value"});
109 |   return out;
110 | }
111 | 
112 | 
113 | [[cpp11::register]]
114 | list cpp_parse_praat_numbers(strings x) {
115 |   R_xlen_t n = x.size();
116 |   writable::doubles out_len(n);  // store prefix length
117 |   writable::doubles out_val(n);  // store parsed value (for testing)
118 | 
119 |   for (R_xlen_t i = 0; i < n; i++) {
120 |     if (x[i] == NA_STRING) {
121 |       out_len[i] = NA_REAL;
122 |       out_val[i] = NA_REAL;
123 |       continue;
124 |     }
125 | 
126 |     const char* str = Rf_translateCharUTF8(x[i]);
127 | 
128 |     // Reject leading '.' per Praat rule (".4" invalid)
129 |     if (str[0] == '.' ) {
130 |       out_len[i] = 0;
131 |       out_val[i] = NA_REAL;
132 |       continue;
133 |     }
134 | 
135 |     // Allow optional sign
136 |     const char* p = str;
137 |     if (*p == '+' || *p == '-') ++p;
138 |     if (!(*p >= '0' && *p <= '9')) {
139 |       out_len[i] = 0;
140 |       out_val[i] = NA_REAL;
141 |       continue;
142 |     }
143 | 
144 |     char* endptr = nullptr;
145 |     double val = std::strtod(str, &endptr);
146 | 
147 |     if (endptr == str) {
148 |       out_len[i] = 0;
149 |       out_val[i] = NA_REAL;
150 |     } else {
151 |       out_len[i] = static_cast<double>(endptr - str);
152 |       out_val[i] = val;
153 |     }
154 |   }
155 | 
156 |   // Return as a data.frame-like list
157 |   writable::list res;
158 |   res.push_back(out_len);
159 |   res.push_back(out_val);
160 |   res.attr("names") = writable::strings({"prefix_len", "value"});
161 |   return res;
162 | }
163 | 


--------------------------------------------------------------------------------
/R/pure-r-parser.R:
--------------------------------------------------------------------------------
  1 | # Implementation of the textgrid parsing written in pure R.
  2 | # This was ported to C++ for speed but it's important to have around
  3 | # for unit tests as a reference implementation.
  4 | 
  5 | r_read_textgrid <- function(path, file = NULL, encoding = NULL) {
  6 |   if (is.null(file)) {
  7 |     file <- basename(path)
  8 |   }
  9 | 
 10 |   if (is.null(encoding)) {
 11 |     encoding <- readr::guess_encoding(path)$encoding[1]
 12 |   }
 13 |   file_locale <- readr::locale(encoding = encoding)
 14 | 
 15 |   path |>
 16 |     readr::read_lines(locale = file_locale) |>
 17 |     r_read_textgrid_lines(file = file)
 18 | }
 19 | 
 20 | r_read_textgrid_lines <- function(lines, file = NULL) {
 21 |   if (is.null(file)) {
 22 |     file <- NA_character_
 23 |   }
 24 | 
 25 |   stopifnot(str_detect_any(lines, "ooTextFile"))
 26 | 
 27 |   lines |>
 28 |     r_parse_textgrid_lines() |>
 29 |     tibble::add_column(file = file, .before = 1) |>
 30 |     tibble::as_tibble()
 31 | }
 32 | 
 33 | r_parse_textgrid_lines <- function(lines) {
 34 |   tg_characters <- lines |>
 35 |     # collapse into one string
 36 |     stringr::str_c(collapse = "\n") |>
 37 |     # concat one trailing space
 38 |     stringr::str_c(" ") |>
 39 |     # split into individual characters
 40 |     stringr::str_split("") |>
 41 |     unlist()
 42 | 
 43 |   tg_tokens <- r_tokenize_textgrid_chars(tg_characters)
 44 |   tier_indices <- find_tier_boundaries(tg_tokens)
 45 |   tier_types <- tg_tokens[tier_indices$start] |> unlist()
 46 | 
 47 |   tier_info_df <- data.frame(
 48 |     tier_num = seq_along(tier_types),
 49 |     tier_type = tier_types,
 50 |     tier_start = tier_indices$start,
 51 |     tier_end = tier_indices$end
 52 |   )
 53 | 
 54 |   data <- tier_info_df |>
 55 |     split(~tier_num) |>
 56 |     lapply(parse_tier, tg_tokens = tg_tokens) |>
 57 |     dplyr::bind_rows()
 58 | 
 59 |   data[["tier_xmin"]] <- as.numeric(data[["tier_xmin"]])
 60 |   data[["tier_xmax"]] <- as.numeric(data[["tier_xmax"]])
 61 |   data[["xmin"]] <- as.numeric(data[["xmin"]])
 62 |   data[["xmax"]] <- as.numeric(data[["xmax"]])
 63 |   data[["tier_num"]] <- as.integer(data[["tier_num"]])
 64 |   data[["annotation_num"]] <- as.integer(data[["annotation_num"]])
 65 |   data[["text"]] <- as.character(data[["text"]])
 66 |   data
 67 | }
 68 | 
 69 | r_tokenize_textgrid_chars <- function(all_char) {
 70 |   # The parser rules here follow the textgrid specifications
 71 |   # <https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html> EXCEPT
 72 |   # when they contradict the behavior of Praat.exe. For example, the specs says
 73 |   # the main literals are freestanding strings and numbers, where freestanding
 74 |   # means that they have a whitespace or boundary (newline or file start/end).
 75 |   # But Praat.exe can handle numbers like "10.00!comment". So, this parser
 76 |   # gathers freestanding literals but only keeps ones that are strings or
 77 |   # start with a valid number (the non-numeric characters are lopped off.)
 78 | 
 79 |   in_strong_comment <- FALSE         # Comment mode: ! to new line \n
 80 |   in_string <- FALSE                 # String mode: "Quote to quote"
 81 |   in_escaped_quote <- FALSE          # Escaped quote: "" inside of a string
 82 | 
 83 |   token_start <- integer(0)          # Start of current token
 84 |   values <- vector(mode = "list")    # Collects completed values
 85 | 
 86 |   for (i in seq_along(all_char)) {
 87 |     cur_value_ready <- length(token_start) != 0
 88 |     c <- all_char[i]
 89 |     c_is_whitespace <- c %in% c(" ", "\n")
 90 |     c_starts_string <- c == "\""
 91 | 
 92 |     # Comments start with ! and end with \n. Skip characters in this mode.
 93 |     if (!in_string & c == "!") {
 94 |       in_strong_comment <- TRUE
 95 |       next
 96 |     }
 97 |     if (in_strong_comment) {
 98 |       if (c == "\n") in_strong_comment <- FALSE
 99 |       next
100 |     }
101 | 
102 |     # Whitespace delimits values so collect values if we see whitespace
103 |     if (c_is_whitespace & !in_string) {
104 |       # Skip whitespace if no values collected so far
105 |       if (!cur_value_ready) next
106 | 
107 |       total_value <- all_char[seq(token_start, i - 1)] |>
108 |         paste0(collapse = "")
109 |       is_string <- all_char[token_start] == "\"" && all_char[i - 1] == "\""
110 | 
111 |       # Collect only numbers and strings
112 |       if (r_tg_parse_is_number(total_value)) {
113 |         # Keep only the numeric part.
114 |         total_value <- total_value |> r_tg_parse_extract_number()
115 |         values <- c(values, total_value)
116 |       } else if (is_string) {
117 |         values <- c(values, total_value)
118 |       }
119 |       token_start <- integer(0)
120 |       next
121 |     }
122 | 
123 |     # Store character if ending an escaped quote
124 |     if (in_escaped_quote) {
125 |       in_escaped_quote <- !in_escaped_quote
126 |       next
127 |     }
128 | 
129 |     # Start or close string mode if we see "
130 |     if (c_starts_string) {
131 |       # Check for "" escapes
132 |       peek_c <- all_char[i + 1]
133 |       if (peek_c == "\"" & in_string) {
134 |         in_escaped_quote <- TRUE
135 |       } else {
136 |         in_string <- !in_string
137 |       }
138 |     }
139 | 
140 |     if (!cur_value_ready) {
141 |       token_start <- i
142 |     }
143 |   }
144 | 
145 |   values |>
146 |     lapply(r_tg_parse_convert_value)
147 | }
148 | 
149 | # A numeric token is:
150 | # string start
151 | # (optional minus sign)
152 | # digit(s)
153 | # (optional decimal point and digit(s))
154 | r_tg_parse_is_number <- function(x) {
155 |   .NUM_RE <- "^[+-]?\\d+(?:\\.\\d*)?(?:[eE][+-]?\\d+)?"
156 |   stringr::str_detect(x, .NUM_RE)
157 | }
158 | 
159 | r_tg_parse_extract_number <- function(x) {
160 |   .NUM_RE <- "^[+-]?\\d+(?:\\.\\d*)?(?:[eE][+-]?\\d+)?"
161 |   x |>
162 |     stringr::str_extract(.NUM_RE) |>
163 |     as.numeric()
164 | }
165 | 
166 | r_tg_parse_convert_value <- function(x) {
167 |   if (is.character(x)) {
168 |     # unquote strings
169 |     x <- substr(x, 2, nchar(x) - 1)
170 |     # undo "" escapement
171 |     x <- stringr::str_replace_all(x, "\"\"", "\"")
172 |   }
173 |   x
174 | }
175 | 


--------------------------------------------------------------------------------
/R/legacy.R:
--------------------------------------------------------------------------------
  1 | # Old version of parsing code
  2 | 
  3 | 
  4 | #' @export
  5 | #' @rdname read_textgrid
  6 | #' @order 3
  7 | legacy_read_textgrid <- function(path, file = NULL, encoding = NULL) {
  8 |   if (is.null(file)) {
  9 |     file <- basename(path)
 10 |   }
 11 | 
 12 |   if (is.null(encoding)) {
 13 |     encoding <- readr::guess_encoding(path)$encoding[1]
 14 |   }
 15 |   file_locale <- readr::locale(encoding = encoding)
 16 | 
 17 |   path |>
 18 |     readr::read_lines(locale = file_locale) |>
 19 |     legacy_read_textgrid_lines(file = file)
 20 | }
 21 | 
 22 | #' @export
 23 | #' @rdname read_textgrid
 24 | #' @order 4
 25 | legacy_read_textgrid_lines <- function(lines, file = NULL) {
 26 |   if (is.null(file)) {
 27 |     file <- NA_character_
 28 |   }
 29 | 
 30 |   stopifnot(str_detect_any(lines, "ooTextFile"))
 31 | 
 32 |   df <- lines |>
 33 |     .v1_parse_textgrid_lines() |>
 34 |     tibble::as_tibble() |>
 35 |     tibble::add_column(file = file, .before = 1)
 36 | 
 37 |   df[["tier_name"]] <- .v1_str_unescape_quote(df[["tier_name"]])
 38 |   df[["text"]] <- .v1_str_unescape_quote(df[["text"]])
 39 |   df
 40 | }
 41 | 
 42 | .v1_parse_textgrid_lines <- function(lines) {
 43 |   lines |>
 44 |     .v1_slice_sections("item") |>
 45 |     purrr::map(.v1_parse_item_lines) |>
 46 |     dplyr::bind_rows()
 47 | }
 48 | 
 49 | .v1_slice_sections <- function(lines, section_head) {
 50 |   re <- sprintf("^\\s+%s ?\\[\\d+\\]:?", section_head)
 51 |   starts <- stringr::str_which(lines, re)
 52 |   ends <- c(starts[-1] - 1, length(lines))
 53 |   purrr::map2(starts, ends, function(x, y) lines[seq(x, y, by = 1)])
 54 | }
 55 | 
 56 | .v1_parse_item_lines <- function(lines_items) {
 57 |   item_num <- lines_items[1] |>
 58 |     stringr::str_extract("\\d+") |>
 59 |     as.numeric()
 60 | 
 61 |   tier_type <- .v1_get_field(lines_items, "class")
 62 |   tier_name <- .v1_get_field(lines_items, "name")
 63 |   tier_xmin <- .v1_get_field_dbl(lines_items, "xmin")
 64 |   tier_xmax <- .v1_get_field_dbl(lines_items, "xmax")
 65 | 
 66 |   stopifnot(tier_type %in% c("IntervalTier", "TextTier"))
 67 | 
 68 |   if (tier_type == "IntervalTier") {
 69 |     df <- .v1_parse_interval_tier(lines_items)
 70 |   } else {
 71 |     df <- .v1_parse_point_tier(lines_items)
 72 |   }
 73 | 
 74 |   df[["xmin"]] <- as.numeric(df[["xmin"]])
 75 |   df[["xmax"]] <- as.numeric(df[["xmax"]])
 76 | 
 77 |   tibble::add_column(
 78 |     .data = df,
 79 |     tier_num  = item_num,
 80 |     tier_name = tier_name,
 81 |     tier_type = tier_type,
 82 |     tier_xmin = tier_xmin,
 83 |     tier_xmax = tier_xmax,
 84 |     .before = 1
 85 |   )
 86 | }
 87 | 
 88 | .v1_parse_interval_tier <- function(lines_interval_tier) {
 89 |   lines_interval_tier |>
 90 |     .v1_slice_sections("intervals") |>
 91 |     purrr::map(.v1_combine_text_lines) |>
 92 |     purrr::map(.v1_get_field_list, fields = c("xmin", "xmax", "text")) |>
 93 |     purrr::imap(.v1_add_annotation_num) |>
 94 |     dplyr::bind_rows()
 95 | }
 96 | 
 97 | # If the text field spans multiple lines, combine them into one string
 98 | .v1_combine_text_lines <- function(lines_annotation) {
 99 |   loc_mark_start <- lines_annotation |> .v1_which_field("mark")
100 |   loc_text_start <- lines_annotation |> .v1_which_field("text")
101 |   loc_text_start <- c(loc_text_start, loc_mark_start)
102 | 
103 |   if (loc_text_start != length(lines_annotation)) {
104 |     loc_text_rest <- seq(loc_text_start + 1, length(lines_annotation), by = 1)
105 |     loc_text_full <- c(loc_text_start, loc_text_rest)
106 |     lines_annotation[loc_text_start] <- lines_annotation[loc_text_full] |>
107 |       paste0(collapse = "\n")
108 |     lines_annotation <- lines_annotation[-loc_text_rest]
109 |   }
110 |   lines_annotation
111 | }
112 | 
113 | .v1_parse_point_tier <- function(lines_point_tier) {
114 |   no_points <- .v1_str_detect_any(lines_point_tier, "points: size = 0")
115 | 
116 |   if (!no_points) {
117 |     df <- lines_point_tier |>
118 |       .v1_slice_sections("points") |>
119 |       purrr::map(.v1_get_field_list, fields = c("number", "mark")) |>
120 |       purrr::imap(.v1_add_annotation_num) |>
121 |       dplyr::bind_rows()
122 | 
123 |     # We treat points as zero-width intervals
124 |     df[["xmin"]] <- df[["number"]]
125 |     df[["xmax"]] <- df[["number"]]
126 |     df[["text"]] <- df[["mark"]]
127 |     df[["mark"]] <- NULL
128 |     df[["number"]] <- NULL
129 |     df <- df[c("xmin", "xmax", "text", "annotation_num")]
130 |   } else {
131 |     # A point interval with no points should be represented in the results.
132 |     df <- data.frame(
133 |       xmin = NA,
134 |       xmax = NA,
135 |       text = NA_character_,
136 |       annotation_num = NA,
137 |       stringsAsFactors = FALSE
138 |     )
139 |   }
140 | 
141 |   df
142 | }
143 | 
144 | .v1_add_annotation_num <- function(x, y) {
145 |   x[["annotation_num"]] <- y
146 |   x
147 | }
148 | 
149 | .v1_get_field_list <- function(lines, fields) {
150 |   stats::setNames(
151 |     lapply(fields, function(x) .v1_get_field(lines, x)),
152 |     fields
153 |   )
154 | }
155 | 
156 | # Find first match of "[field] = [value]", returning [value]
157 | .v1_get_field <- function(lines, field) {
158 |   re <- paste0("(?<=", field, " = ).+") |>
159 |     # "text = .*" needs to capture newlines too
160 |     stringr::regex(dotall = TRUE)
161 | 
162 |   lines |>
163 |     stringr::str_extract(re) |>
164 |     .v1_remove_na() |>
165 |     utils::head(1) |>
166 |     stringr::str_trim() |>
167 |     .v1_str_unquote()
168 | }
169 | 
170 | # Find first match of "[field] = [value]", returning [value]
171 | .v1_which_field <- function(lines, field) {
172 |   re <- paste0("(?<=", field, " = ).+")
173 |   lines |>
174 |     stringr::str_which(re)
175 | }
176 | 
177 | # Find first match of "[field] = [value]", returning [value]
178 | .v1_get_field_dbl <- function(lines, field) {
179 |   as.numeric(.v1_get_field(lines, field))
180 | }
181 | 
182 | .v1_remove_na <- function(xs) {
183 |   xs[!is.na(xs)]
184 | }
185 | 
186 | .v1_str_unquote <- function(xs) {
187 |   gsub("^\"|\"$", "", xs)
188 | }
189 | 
190 | .v1_str_unescape_quote <- function(xs) {
191 |   gsub('""', '"', xs, perl = TRUE)
192 | }
193 | 
194 | .v1_str_detect_any <- function(xs, pattern) {
195 |   any(stringr::str_detect(xs, pattern))
196 | }
197 | 


--------------------------------------------------------------------------------
/tests/testthat/test-read-textgrid.R:
--------------------------------------------------------------------------------
  1 | test_that("reading in point tiers", {
  2 |   path <- testthat::test_path("test-data/points.TextGrid")
  3 |   tg <- read_textgrid(path)
  4 |   expect_equal(nrow(tg), 3)
  5 | 
  6 |   # Validate against v1 and pure-r version
  7 |   tg2 <- legacy_read_textgrid(path)
  8 |   expect_equal(tg, tg2)
  9 |   tg3 <- r_read_textgrid(path)
 10 |   expect_equal(tg, tg3)
 11 | })
 12 | 
 13 | test_that("reading in empty point tiers", {
 14 |   path <- testthat::test_path("test-data/Mary_John_bell.TextGrid")
 15 |   tg <- read_textgrid(path)
 16 |   expect_equal(nrow(tg), 3)
 17 | 
 18 |   # Validate against v1 and pure-r version
 19 |   tg2 <- legacy_read_textgrid(path)
 20 |   expect_equal(tg, tg2)
 21 |   tg3 <- r_read_textgrid(path)
 22 |   expect_equal(tg, tg3)
 23 | })
 24 | 
 25 | test_that("result is a tibble", {
 26 |   path <- testthat::test_path("test-data/Mary_John_bell.TextGrid")
 27 |   tg <- read_textgrid(path)
 28 |   testthat::expect_s3_class(tg, "tbl")
 29 | })
 30 | 
 31 | test_that("we can parse numbers supported by Praat.exe", {
 32 |   # Files here are minimal tests for numbers that can be parsed by Praat.exe.
 33 |   # We need to match what Praat.exe supports, not what they say they support.
 34 |   path <- testthat::test_path("test-data/praat-test/okay-digit-dot-space.TextGrid")
 35 |   # if (interactive()) writeLines(readLines(path))
 36 |   tg <- read_textgrid(path)
 37 |   expect_equal(tg$xmax, 1.0)
 38 | 
 39 |   path <- testthat::test_path("test-data/praat-test/okay-plus-digit-or-minus-digit.TextGrid")
 40 |   # if (interactive()) writeLines(readLines(path))
 41 |   tg <- read_textgrid(path)
 42 |   expect_equal(tg$tier_xmin, -0.3)
 43 |   expect_equal(tg$tier_xmax, 2.0)
 44 | 
 45 |   path <- testthat::test_path("test-data/praat-test/okay-scientific-notation.TextGrid")
 46 |   # if (interactive()) writeLines(readLines(path))
 47 |   tg <- read_textgrid(path)
 48 |   expect_equal(tg$tier_xmin, c(0, 0, 0))
 49 |   expect_equal(tg$tier_xmax, c(20, 20, 20))
 50 |   expect_equal(tg$xmin, c(0, 0.5, 10))
 51 |   expect_equal(tg$xmax, c(0.5, 10, 20))
 52 | 
 53 |   path <- testthat::test_path("test-data/praat-test/okay-hex-numbers.TextGrid")
 54 |   # if (interactive()) writeLines(readLines(path))
 55 |   tg <- read_textgrid(path)
 56 |   expect_equal(tg$tier_xmin, c(0, 0, 0, 0))
 57 |   expect_equal(tg$tier_xmax, c(3, 3, 3, 3))
 58 |   expect_equal(tg$xmin, c(0, 0.5, 1.5, 2.5))
 59 |   expect_equal(tg$xmax, c(0.5, 1.5, 2.5, 3))
 60 | 
 61 |   path <- testthat::test_path("test-data/praat-test/okay-real-with-trailing-characters.TextGrid")
 62 |   # if (interactive()) writeLines(readLines(path))
 63 |   tg <- read_textgrid(path)
 64 |   expect_equal(tg$tier_xmin, c(0, 0, 0))
 65 |   expect_equal(tg$tier_xmax, c(20, 20, 20))
 66 |   expect_equal(tg$xmin, c(0, 0.5, 10))
 67 |   expect_equal(tg$xmax, c(0.5, 10, 20))
 68 | })
 69 | 
 70 | test_that("example_textgrid works", {
 71 |   path <- example_textgrid()
 72 |   tg <- read_textgrid(path)
 73 |   expect_equal(nrow(tg), 3)
 74 | 
 75 |   # Validate against v1 and pure-r version
 76 |   tg2 <- legacy_read_textgrid(path)
 77 |   expect_equal(tg, tg2)
 78 |   tg3 <- r_read_textgrid(path)
 79 |   expect_equal(tg, tg3)
 80 | })
 81 | 
 82 | test_that("comment textgrid works", {
 83 |   path <- testthat::test_path("test-data/comment.TextGrid")
 84 |   tg <- read_textgrid(path)
 85 |   testthat::expect_s3_class(tg, "tbl")
 86 |   expect_equal(nrow(tg), 3)
 87 | })
 88 | 
 89 | test_that("short format textgrid works", {
 90 |   path <- testthat::test_path("test-data/short.TextGrid")
 91 |   tg <- read_textgrid(path)
 92 |   testthat::expect_s3_class(tg, "tbl")
 93 |   expect_equal(nrow(tg), 3)
 94 | })
 95 | 
 96 | test_that("escaped quotes (\"\") are converted to single (\")", {
 97 |   path <- testthat::test_path("test-data/quoted.TextGrid")
 98 |   tg <- read_textgrid(path)
 99 |   has_double <- any(grepl('""', tg$text))
100 |   has_single <- any(grepl('"', tg$text))
101 |   expect_false(has_double)
102 |   expect_true(has_single)
103 | 
104 |   # Validate against v1 and pure-r version
105 |   tg2 <- legacy_read_textgrid(path)
106 |   expect_equal(tg, tg2)
107 |   tg3 <- r_read_textgrid(path)
108 |   expect_equal(tg, tg3)
109 | })
110 | 
111 | test_that("can read in hard-to-parse file", {
112 |   path <- testthat::test_path("test-data/hard-to-parse.TextGrid")
113 |   tg <- read_textgrid(path)
114 | 
115 |   # Validate against pure r
116 |   tg4 <- r_read_textgrid(path)
117 |   tg4$file <- "hard-to-parse.TextGrid"
118 |   expect_equal(tg, tg4)
119 | 
120 |   # a version of the TextGrid opened and saved by Praat to a long TextGrid
121 |   path2 <- testthat::test_path("test-data/hard-to-parse-normalized.TextGrid")
122 |   tg2 <- read_textgrid(path2)
123 |   tg2$file <- "hard-to-parse.TextGrid"
124 |   expect_equal(tg, tg2)
125 | 
126 |   # Validate against v1 opening the normalized version
127 |   tg3 <- legacy_read_textgrid(path2)
128 |   tg3$file <- "hard-to-parse.TextGrid"
129 |   expect_equal(tg, tg3)
130 | })
131 | 
132 | 
133 | 
134 | test_that("encoding support", {
135 |   example_textgrid(1) |>
136 |     read_textgrid(encoding = "UTF-8") |>
137 |     nrow() |>
138 |     expect_equal(3)
139 | 
140 |   example_textgrid(1) |>
141 |     read_textgrid() |>
142 |     nrow() |>
143 |     expect_equal(3)
144 | 
145 |   example_textgrid(2) |>
146 |     read_textgrid(encoding = "UTF-16") |>
147 |     nrow() |>
148 |     expect_equal(3)
149 | 
150 |   example_textgrid(2) |>
151 |     read_textgrid() |>
152 |     nrow() |>
153 |     expect_equal(3)
154 | })
155 | 
156 | test_that("reading in ELAN-generated textgrids (#11)", {
157 |   path <- testthat::test_path("test-data/elan.TextGrid")
158 | 
159 |   path |>
160 |     read_textgrid() |>
161 |     nrow() |>
162 |     expect_equal(5)
163 | })
164 | 
165 | test_that("pivoting words on a single tier", {
166 |   path <- testthat::test_path("test-data/nested-intervals.TextGrid")
167 | 
168 |   data <- path |>
169 |     read_textgrid()
170 | 
171 |   p1 <- data |> pivot_textgrid_tiers("utterance")
172 |   expect_equal(p1$utterance, "hug daddy")
173 | 
174 |   p1 |>
175 |     hasName(c("utterance_xmin", "utterance_xmid", "utterance_xmax")) |>
176 |     all() |>
177 |     expect_true()
178 | 
179 |   data |>
180 |     pivot_textgrid_tiers("fake name") |>
181 |     expect_error("must be used")
182 | })
183 | 
184 | test_that("pivoting works with multiple tiers", {
185 |   path <- testthat::test_path("test-data/nested-intervals.TextGrid")
186 |   phones <- c("sil", "HH", "AH1", "G", "sp", "D", "AE1", "D", "IY0", "sp", "")
187 |   words <- rep(c("", "hug", "", "daddy", ""), c(1, 3, 1, 4, 2))
188 | 
189 |   data <- path |>
190 |     read_textgrid()
191 | 
192 |   p2 <- data |> pivot_textgrid_tiers(c("words", "phones"))
193 | 
194 |   p2$words |>
195 |     expect_equal(words)
196 | 
197 |   p2$phones |>
198 |     expect_equal(phones)
199 | 
200 |   p2 |>
201 |     hasName(c("words_xmin", "words_xmid", "words_xmax")) |>
202 |     all() |>
203 |     expect_true()
204 | 
205 |   p2 |>
206 |     hasName(c("phones_xmin", "phones_xmid", "phones_xmax")) |>
207 |     all() |>
208 |     expect_true()
209 | 
210 | })
211 | 
212 | 
213 | 
214 | test_that("we match Praat.exe's parsing behavior", {
215 |   c("+1.0", "000", "3e", "3E", "-2", "0xA", ".5", "+.0") |>
216 |     cpp_parse_praat_numbers() |>
217 |     _$value |>
218 |     expect_equal(c(1, 0, 3, 3, -2, 10, NA_real_, NA_real_))
219 | })
220 | 


--------------------------------------------------------------------------------
/R/readtextgrid.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Read a textgrid file into a tibble
  3 | #'
  4 | #' @rdname read_textgrid
  5 | #' @param path a path to a textgrid
  6 | #' @param lines alternatively, the lines of a textgrid file
  7 | #' @param encoding the encoding of the textgrid. The default value `NULL` uses
  8 | #'   [readr::guess_encoding()] to guess the encoding of the textgrid. If an
  9 | #'   encoding is provided, it is forwarded to `[readr::locale()]` and
 10 | #'   `[readr::read_lines()]`.
 11 | #' @param file an optional value to use for the `file` column. For
 12 | #'   `read_textgrid()`, the default is the base filename of the input file. For
 13 | #'   `read_textgrid_lines()`, the default is `NA`.
 14 | #' @return a tibble with one row per textgrid annotation
 15 | #'
 16 | #' @details The `legacy_read_textgrid` functions are the original textgrid
 17 | #'   parsers provided by the package. They assume that the TextGrid file is a
 18 | #'   "long" format textgrid; this is the default format used by "Save a text
 19 | #'   file..." in Praat.
 20 | #'
 21 | #'   The current `read_textgrid()` functions are more
 22 | #'   flexible and can read in "short" format textgrids and textgrids with
 23 | #'   comments.
 24 | #'
 25 | #'   See <https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html>
 26 | #'   for a description of the textgrid file format. Note that this package does
 27 | #'   not strictly adhere to format as described in this document. For example,
 28 | #'   the document says that numbers should be freestanding (surrounded by spaces
 29 | #'   or string boundaries), but Praat.exe can handle malformed numbers like
 30 | #'   `100ms`. Therefore, we tried to implement a parser that matched what Praat
 31 | #'   actually handles.
 32 | #'
 33 | #' @export
 34 | #' @order 1
 35 | #' @examples
 36 | #' tg <- system.file("Mary_John_bell.TextGrid", package = "readtextgrid")
 37 | #' read_textgrid(tg)
 38 | read_textgrid <- function(path, file = NULL, encoding = NULL) {
 39 |   if (is.null(file)) {
 40 |     file <- basename(path)
 41 |   }
 42 | 
 43 |   if (is.null(encoding)) {
 44 |     encoding <- readr::guess_encoding(path)$encoding[1]
 45 |   }
 46 |   file_locale <- readr::locale(encoding = encoding)
 47 | 
 48 |   path |>
 49 |     readr::read_lines(locale = file_locale) |>
 50 |     read_textgrid_lines(file = file)
 51 | }
 52 | 
 53 | #' @rdname read_textgrid
 54 | #' @order 2
 55 | #' @export
 56 | read_textgrid_lines <- function(lines, file = NULL) {
 57 |   if (is.null(file)) {
 58 |     file <- NA_character_
 59 |   }
 60 | 
 61 |   stopifnot(str_detect_any(lines, "ooTextFile"))
 62 | 
 63 |   lines |>
 64 |     parse_textgrid_lines() |>
 65 |     tibble::add_column(file = file, .before = 1) |>
 66 |     tibble::as_tibble()
 67 | }
 68 | 
 69 | parse_textgrid_lines <- function(lines) {
 70 |   tg_text <- lines |>
 71 |     # collapse into one string
 72 |     stringr::str_c(collapse = "\n") |>
 73 |     # concat one trailing space
 74 |     stringr::str_c(" ")
 75 | 
 76 |   tg_tokens <- tokenize_textgrid(tg_text)
 77 |   tier_indices <- find_tier_boundaries(tg_tokens)
 78 |   tier_types <- tg_tokens[tier_indices$start] |> unlist()
 79 | 
 80 |   tier_info_df <- data.frame(
 81 |     tier_num = seq_along(tier_types),
 82 |     tier_type = tier_types,
 83 |     tier_start = tier_indices$start,
 84 |     tier_end = tier_indices$end
 85 |   )
 86 | 
 87 |   data <- tier_info_df |>
 88 |     split(~tier_num) |>
 89 |     lapply(parse_tier, tg_tokens = tg_tokens) |>
 90 |     dplyr::bind_rows()
 91 | 
 92 |   data[["tier_xmin"]] <- as.numeric(data[["tier_xmin"]])
 93 |   data[["tier_xmax"]] <- as.numeric(data[["tier_xmax"]])
 94 |   data[["xmin"]] <- as.numeric(data[["xmin"]])
 95 |   data[["xmax"]] <- as.numeric(data[["xmax"]])
 96 |   data[["tier_num"]] <- as.integer(data[["tier_num"]])
 97 |   data[["annotation_num"]] <- as.integer(data[["annotation_num"]])
 98 |   data[["text"]] <- as.character(data[["text"]])
 99 |   data
100 | }
101 | 
102 | 
103 | parse_tier <- function(tier_info, tg_tokens) {
104 |   tier_tokens <- tg_tokens[tier_info$tier_start:tier_info$tier_end]
105 | 
106 |   # An empty Interval tier always has at least one interval. So it has that
107 |   # at least 8 elements:
108 |   # - (5) class, tier name, tier xmin, tier xmax, num intervals,
109 |   # - (3) interval xmin, interval xmax, interval text
110 |   # An empty Point tier has at least 5 elements
111 |   # - (5) class, tier name, tier xmin, tier xmax, num points
112 |   LENGTH_EMPTY_POINT_INTERVAL <- 5
113 | 
114 |   if (length(tier_tokens) == LENGTH_EMPTY_POINT_INTERVAL) {
115 |     outer_df <- data.frame(
116 |       tier_num = tier_info[["tier_num"]],
117 |       tier_name = tier_tokens[[2]],
118 |       tier_type = tier_tokens[[1]],
119 |       tier_xmin = tier_tokens[[3]],
120 |       tier_xmax = tier_tokens[[4]],
121 |       xmin = NA_real_,
122 |       xmax = NA_real_,
123 |       text = NA_character_,
124 |       annotation_num = NA_integer_
125 |     )
126 |     return(outer_df)
127 |   }
128 | 
129 |   if (tier_info$tier_type == "IntervalTier") {
130 |     marks_df <- make_intervals(tier_tokens, tg_tokens)
131 |   }
132 | 
133 |   if (tier_info$tier_type == "TextTier") {
134 |     marks_df <- make_points(tier_tokens, tg_tokens)
135 |   }
136 | 
137 |   marks_df[["tier_num"]] <- tier_info[["tier_num"]]
138 |   marks_df
139 | }
140 | 
141 | 
142 | make_intervals <- function(tier_tokens, tg_tokens) {
143 |   # Skip first five elements (tier-level data)
144 |   interval_data <- tier_tokens[-(1:5)]
145 |   start_idx <- seq(1, length(interval_data) - 2, by = 3)
146 | 
147 |   data.frame(
148 |     tier_num = NA_integer_,
149 |     tier_name = tier_tokens[[2]],
150 |     tier_type = tier_tokens[[1]],
151 |     tier_xmin = tier_tokens[[3]],
152 |     tier_xmax = tier_tokens[[4]],
153 |     xmin = interval_data[start_idx] |> unlist(),
154 |     xmax = interval_data[start_idx + 1] |> unlist(),
155 |     text = interval_data[start_idx + 2] |> unlist(),
156 |     annotation_num = seq_along(start_idx)
157 |   )
158 | }
159 | 
160 | 
161 | make_points <- function(tier_tokens, tg_tokens) {
162 |   # Skip first five elements (tier-level data)
163 |   point_data <- tier_tokens[-(1:5)]
164 |   start_idx <- seq(1, length(point_data) - 1, by = 2)
165 | 
166 |   data.frame(
167 |     tier_num = NA_integer_,
168 |     tier_name = tier_tokens[[2]],
169 |     tier_type = tier_tokens[[1]],
170 |     tier_xmin = tier_tokens[[3]],
171 |     tier_xmax = tier_tokens[[4]],
172 |     xmin = point_data[start_idx] |> unlist(),
173 |     xmax = point_data[start_idx] |> unlist(),
174 |     text = point_data[start_idx + 1] |> unlist(),
175 |     annotation_num = seq_along(start_idx)
176 |   )
177 | }
178 | 
179 | tokenize_textgrid <- function(tg_text) {
180 |   # C++ scan for tokens
181 |   res <- withr::with_locale(
182 |     c(LC_NUMERIC = "C"),
183 |     cpp_tg_scan_tokens(tg_text)
184 |   )
185 |   toks <- res$tokens
186 |   is_string <- res$is_string
187 |   is_number <- (res$num_prefix != 0) & !is_string
188 |   keep <- is_number | is_string
189 | 
190 |   toks <- toks[keep]
191 |   out <- vector("list", length(toks))
192 | 
193 |   strings <- toks[is_string[keep]]
194 |   strings <- substring(strings, 2L, nchar(strings) - 1L)
195 |   strings <- gsub('""', '"', strings, fixed = TRUE)
196 |   out[is_string[keep]] <- strings
197 |   out[is_number[keep]] <- res$num_value[is_number]
198 | 
199 |   out
200 | }
201 | 
202 | 
203 | 
204 | find_tier_boundaries <- function(tg_tokens) {
205 |   # TODO:
206 |   # TextGrid_checkInvariants_e() in Praat source provides strong and weak
207 |   # invariants
208 |   # https://github.com/praat/praat.github.io/blob/master/fon/TextGrid.cpp#L1402
209 | 
210 | 
211 |   # A textgrid interval might legitimately have the text "Tier" in it so
212 |   # don't use regexes. Just consume tokens.
213 |   num_tiers <- tg_tokens[[5]]
214 |   tier_starts <- integer(num_tiers)
215 |   tier_ends <- integer(num_tiers)
216 |   tier_starts[1] <- 6L
217 | 
218 |   for (tier_i in seq_len(num_tiers)) {
219 |     type <- tg_tokens[[tier_starts[tier_i]]]
220 |     size <- tg_tokens[[tier_starts[tier_i] + 4]]
221 |     # promote negative size to 0
222 |     size <- max(c(0, size))
223 | 
224 |     if (type == "IntervalTier") {
225 |       tier_end <- tier_starts[tier_i] + 4 + 3 * size
226 |     } else {
227 |       # 2 lines per point but they can have size 0
228 |       tier_end <- tier_starts[tier_i] + 4 + 2 * size
229 |     }
230 | 
231 |     if (tier_i != num_tiers) {
232 |       tier_starts[tier_i + 1] <- tier_end + 1
233 |     }
234 |     tier_ends[tier_i] <- tier_end
235 |   }
236 | 
237 |   tier_types <- tg_tokens[tier_starts] |> unlist()
238 |   valid_tier_types <- tier_types |>
239 |     is.element(c("IntervalTier", "TextTier")) |>
240 |     all()
241 | 
242 |   if (!valid_tier_types) {
243 |     rlang::abort("TextGrid appears misformatted")
244 |   }
245 | 
246 |   list(start = tier_starts, end = tier_ends)
247 | }
248 | 
249 | 
250 | str_detect_any <- function(xs, pattern) {
251 |   any(stringr::str_detect(xs, pattern))
252 | }
253 | 
254 | 
255 | #' Locate the path of an example textgrid file
256 | #'
257 | #' Locate the path of an example textgrid file
258 | #'
259 | #' @param which index of the textgrid to load
260 | #' @return Path of `"Mary_John_bell.TextGrid"` bundled with the `readtextgrid`
261 | #'   package.
262 | #'
263 | #' @details This function is a wrapper over [`system.file()`]  to locate the
264 | #' paths to bundled textgrids. These files are used to test or demonstrate
265 | #' functionality of the package.
266 | #'
267 | #' Two files are included:
268 | #'
269 | #' 1. `"Mary_John_bell.TextGrid"` - the default TextGrid created by Praat's
270 | #'    Create TextGrid command. This file is saved as UTF-8 encoding.
271 | #' 2. `"utf_16_be.TextGrid"` - a TextGrid with some IPA characters entered using
272 | #'    Praat's IPA character selector. This file is saved with UTF-16 encoding.
273 | #' 3. `"nested-intervals.TextGrid"` - A textgrid containing an `"utterance"`
274 | #'    tier, a `"words"` tier, and a `"phones"` tier. This file is typical of
275 | #'    forced alignment textgrids where utterances contain words which contain
276 | #'    speech segments. In this case, alignment was made by hand so that word
277 | #'    and phone boundaries do not correspond exactly.
278 | #'
279 | #' @export
280 | example_textgrid <- function(which = 1) {
281 |   choices <- c(
282 |     "Mary_John_bell.TextGrid",
283 |     "utf_16_be.TextGrid",
284 |     "nested-intervals.TextGrid"
285 |   )
286 | 
287 |   system.file(choices[which], package = "readtextgrid")
288 | }
289 | 


--------------------------------------------------------------------------------
/inst/draw-tg-parts.Collection:
--------------------------------------------------------------------------------
  1 | File type = "ooTextFile"
  2 | Object class = "Collection"
  3 | 
  4 | size = 8 
  5 | item []: 
  6 |     item [1]:
  7 |         class = "TextGrid" 
  8 |         name = "Mary_John_bell" 
  9 |         xmin = 0 
 10 |         xmax = 1 
 11 |         tiers? <exists> 
 12 |         size = 3 
 13 |         item []: 
 14 |             item [1]:
 15 |                 class = "IntervalTier" 
 16 |                 name = "Mary" 
 17 |                 xmin = 0 
 18 |                 xmax = 1 
 19 |                 intervals: size = 1 
 20 |                 intervals [1]:
 21 |                     xmin = 0 
 22 |                     xmax = 1 
 23 |                     text = "" 
 24 |             item [2]:
 25 |                 class = "IntervalTier" 
 26 |                 name = "John" 
 27 |                 xmin = 0 
 28 |                 xmax = 1 
 29 |                 intervals: size = 1 
 30 |                 intervals [1]:
 31 |                     xmin = 0 
 32 |                     xmax = 1 
 33 |                     text = "" 
 34 |             item [3]:
 35 |                 class = "TextTier" 
 36 |                 name = "bell" 
 37 |                 xmin = 0 
 38 |                 xmax = 1 
 39 |                 points: size = 0 
 40 |     item [2]:
 41 |         class = "TextGrid" 
 42 |         name = "blue-text" 
 43 |         xmin = 0 
 44 |         xmax = 1 
 45 |         tiers? <exists> 
 46 |         size = 3 
 47 |         item []: 
 48 |             item [1]:
 49 |                 class = "IntervalTier" 
 50 |                 name = "Mary" 
 51 |                 xmin = 0 
 52 |                 xmax = 1 
 53 |                 intervals: size = 1 
 54 |                 intervals [1]:
 55 |                     xmin = 0 
 56 |                     xmax = 1 
 57 |                     text = "" 
 58 |             item [2]:
 59 |                 class = "IntervalTier" 
 60 |                 name = "John" 
 61 |                 xmin = 0 
 62 |                 xmax = 1 
 63 |                 intervals: size = 3 
 64 |                 intervals [1]:
 65 |                     xmin = 0 
 66 |                     xmax = 0.1850158439093439 
 67 |                     text = "\s{0}" 
 68 |                 intervals [2]:
 69 |                     xmin = 0.1850158439093439 
 70 |                     xmax = 0.7817226699265979 
 71 |                     text = "" 
 72 |                 intervals [3]:
 73 |                     xmin = 0.7817226699265979 
 74 |                     xmax = 1 
 75 |                     text = "\s{1.000000}" 
 76 |             item [3]:
 77 |                 class = "IntervalTier" 
 78 |                 name = "" 
 79 |                 xmin = 0 
 80 |                 xmax = 1 
 81 |                 intervals: size = 1 
 82 |                 intervals [1]:
 83 |                     xmin = 0 
 84 |                     xmax = 1 
 85 |                     text = "" 
 86 |     item [3]:
 87 |         class = "TextGrid" 
 88 |         name = "forehead-and-chin" 
 89 |         xmin = 0 
 90 |         xmax = 1 
 91 |         tiers? <exists> 
 92 |         size = 3 
 93 |         item []: 
 94 |             item [1]:
 95 |                 class = "IntervalTier" 
 96 |                 name = "Mary" 
 97 |                 xmin = 0 
 98 |                 xmax = 1 
 99 |                 intervals: size = 1 
100 |                 intervals [1]:
101 |                     xmin = 0 
102 |                     xmax = 1 
103 |                     text = "\s{1.00000 (1.000 / s)}" 
104 |             item [2]:
105 |                 class = "IntervalTier" 
106 |                 name = "John" 
107 |                 xmin = 0 
108 |                 xmax = 1 
109 |                 intervals: size = 1 
110 |                 intervals [1]:
111 |                     xmin = 0 
112 |                     xmax = 1 
113 |                     text = "\s{Visible part 1.000000 seconds}" 
114 |             item [3]:
115 |                 class = "IntervalTier" 
116 |                 name = "" 
117 |                 xmin = 0 
118 |                 xmax = 1 
119 |                 intervals: size = 1 
120 |                 intervals [1]:
121 |                     xmin = 0 
122 |                     xmax = 1 
123 |                     text = "\s{Total duration 1.000000 seconds}" 
124 |     item [4]:
125 |         class = "TextGrid" 
126 |         name = "right-marginal-text" 
127 |         xmin = 0 
128 |         xmax = 1 
129 |         tiers? <exists> 
130 |         size = 3 
131 |         item []: 
132 |             item [1]:
133 |                 class = "IntervalTier" 
134 |                 name = "Mary" 
135 |                 xmin = 0 
136 |                 xmax = 1 
137 |                 intervals: size = 1 
138 |                 intervals [1]:
139 |                     xmin = 0 
140 |                     xmax = 1 
141 |                     text = "\s{Mary}
142 | \s{(1)}" 
143 |             item [2]:
144 |                 class = "IntervalTier" 
145 |                 name = "John" 
146 |                 xmin = 0 
147 |                 xmax = 1 
148 |                 intervals: size = 1 
149 |                 intervals [1]:
150 |                     xmin = 0 
151 |                     xmax = 1 
152 |                     text = "\s{John}
153 | \s{(1)}" 
154 |             item [3]:
155 |                 class = "IntervalTier" 
156 |                 name = "" 
157 |                 xmin = 0 
158 |                 xmax = 1 
159 |                 intervals: size = 1 
160 |                 intervals [1]:
161 |                     xmin = 0 
162 |                     xmax = 1 
163 |                     text = "\s{bell}
164 | \s{(0)}" 
165 |     item [5]:
166 |         class = "TextGrid" 
167 |         name = "left-marginal-text" 
168 |         xmin = 0 
169 |         xmax = 1 
170 |         tiers? <exists> 
171 |         size = 3 
172 |         item []: 
173 |             item [1]:
174 |                 class = "IntervalTier" 
175 |                 name = "Mary" 
176 |                 xmin = 0 
177 |                 xmax = 1 
178 |                 intervals: size = 1 
179 |                 intervals [1]:
180 |                     xmin = 0 
181 |                     xmax = 1 
182 |                     text = "1" 
183 |             item [2]:
184 |                 class = "IntervalTier" 
185 |                 name = "John" 
186 |                 xmin = 0 
187 |                 xmax = 1 
188 |                 intervals: size = 1 
189 |                 intervals [1]:
190 |                     xmin = 0 
191 |                     xmax = 1 
192 |                     text = "2" 
193 |             item [3]:
194 |                 class = "IntervalTier" 
195 |                 name = "" 
196 |                 xmin = 0 
197 |                 xmax = 1 
198 |                 intervals: size = 1 
199 |                 intervals [1]:
200 |                     xmin = 0 
201 |                     xmax = 1 
202 |                     text = "3" 
203 |     item [6]:
204 |         class = "TextGrid" 
205 |         name = "blue-text-mask" 
206 |         xmin = 0 
207 |         xmax = 1 
208 |         tiers? <exists> 
209 |         size = 3 
210 |         item []: 
211 |             item [1]:
212 |                 class = "IntervalTier" 
213 |                 name = "Mary" 
214 |                 xmin = 0 
215 |                 xmax = 1 
216 |                 intervals: size = 1 
217 |                 intervals [1]:
218 |                     xmin = 0 
219 |                     xmax = 1 
220 |                     text = "" 
221 |             item [2]:
222 |                 class = "IntervalTier" 
223 |                 name = "John" 
224 |                 xmin = 0 
225 |                 xmax = 1 
226 |                 intervals: size = 3 
227 |                 intervals [1]:
228 |                     xmin = 0 
229 |                     xmax = 0.1850158439093439 
230 |                     text = "" 
231 |                 intervals [2]:
232 |                     xmin = 0.1850158439093439 
233 |                     xmax = 0.7817226699265979 
234 |                     text = "" 
235 |                 intervals [3]:
236 |                     xmin = 0.7817226699265979 
237 |                     xmax = 1 
238 |                     text = "" 
239 |             item [3]:
240 |                 class = "IntervalTier" 
241 |                 name = "" 
242 |                 xmin = 0 
243 |                 xmax = 1 
244 |                 intervals: size = 1 
245 |                 intervals [1]:
246 |                     xmin = 0 
247 |                     xmax = 1 
248 |                     text = "" 
249 |     item [7]:
250 |         class = "TextGrid" 
251 |         name = "left-marginal-text-mask" 
252 |         xmin = 0 
253 |         xmax = 1 
254 |         tiers? <exists> 
255 |         size = 3 
256 |         item []: 
257 |             item [1]:
258 |                 class = "IntervalTier" 
259 |                 name = "Mary" 
260 |                 xmin = 0 
261 |                 xmax = 1 
262 |                 intervals: size = 1 
263 |                 intervals [1]:
264 |                     xmin = 0 
265 |                     xmax = 1 
266 |                     text = "" 
267 |             item [2]:
268 |                 class = "IntervalTier" 
269 |                 name = "John" 
270 |                 xmin = 0 
271 |                 xmax = 1 
272 |                 intervals: size = 1 
273 |                 intervals [1]:
274 |                     xmin = 0 
275 |                     xmax = 1 
276 |                     text = "" 
277 |             item [3]:
278 |                 class = "IntervalTier" 
279 |                 name = "" 
280 |                 xmin = 0 
281 |                 xmax = 1 
282 |                 intervals: size = 1 
283 |                 intervals [1]:
284 |                     xmin = 0 
285 |                     xmax = 1 
286 |                     text = "" 
287 |     item [8]:
288 |         class = "TextGrid" 
289 |         name = "right-marginal-text-mask" 
290 |         xmin = 0 
291 |         xmax = 1 
292 |         tiers? <exists> 
293 |         size = 3 
294 |         item []: 
295 |             item [1]:
296 |                 class = "IntervalTier" 
297 |                 name = "Mary" 
298 |                 xmin = 0 
299 |                 xmax = 1 
300 |                 intervals: size = 1 
301 |                 intervals [1]:
302 |                     xmin = 0 
303 |                     xmax = 1 
304 |                     text = "" 
305 |             item [2]:
306 |                 class = "IntervalTier" 
307 |                 name = "John" 
308 |                 xmin = 0 
309 |                 xmax = 1 
310 |                 intervals: size = 1 
311 |                 intervals [1]:
312 |                     xmin = 0 
313 |                     xmax = 1 
314 |                     text = "" 
315 |             item [3]:
316 |                 class = "IntervalTier" 
317 |                 name = "" 
318 |                 xmin = 0 
319 |                 xmax = 1 
320 |                 intervals: size = 1 
321 |                 intervals [1]:
322 |                     xmin = 0 
323 |                     xmax = 1 
324 |                     text = "" 
325 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/README-",
 12 |   out.width = "100%"
 13 | )
 14 | ```
 15 | 
 16 | # readtextgrid <img src="man/figures/logo.png" width = "150" align="right" />
 17 | 
 18 | <!-- badges: start -->
 19 | [![CRAN status](https://www.r-pkg.org/badges/version/readtextgrid)](https://CRAN.R-project.org/package=readtextgrid)
 20 | [![readtextgrid status badge](https://tjmahr.r-universe.dev/readtextgrid/badges/version)](https://tjmahr.r-universe.dev/readtextgrid)
 21 | [![R-CMD-check](https://github.com/tjmahr/readtextgrid/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tjmahr/readtextgrid/actions/workflows/R-CMD-check.yaml)
 22 | <!-- badges: end -->
 23 | 
 24 | readtextgrid parses [Praat](https://www.fon.hum.uva.nl/praat/) textgrids
 25 | into tidy R dataframes.
 26 | 
 27 | ## Features
 28 | 
 29 |   - **Simple**: Minimal package with two core functions (`read_textgrid()` and
 30 |     `read_textgrid_lines()`).
 31 |   - **Tidy**: Returns rectangular tibbles ready for downstream processing with
 32 |     dplyr and tidyr.
 33 |   - **Flexible**: Supports both long and short textgrid file formats.
 34 |   - **Fast**: Uses a compiled C++ tokenizer for high-throughput parsing.
 35 | 
 36 | 
 37 | ## Installation
 38 | 
 39 | Install readtextgrid from CRAN:
 40 | 
 41 | ``` r 
 42 | install.packages("readtextgrid")
 43 | ```
 44 | 
 45 | **Development version**. Install precompiled version of readtextgrid from 
 46 | R-universe:
 47 | 
 48 | ``` r
 49 | install.packages(
 50 |   "readtextgrid", 
 51 |   repos = c("https://tjmahr.r-universe.dev", "https://cloud.r-project.org")
 52 | )
 53 | ```
 54 | 
 55 | 
 56 | 
 57 | ## Basic usage
 58 | 
 59 | Here is the example textgrid created by Praat. It was created using 
 60 | `New > Create TextGrid...` with default settings in Praat.
 61 | 
 62 | <img src="man/figures/demo-textgrid.png" width="600" alt="Textgrid drawing from Praat with three tiers (Mary, John, and Bell)"/> 
 63 | 
 64 | This textgrid is bundled with this R package. We can locate the file with
 65 | `example_textgrid()`. We read in the textgrid with `read_textgrid()`.
 66 | 
 67 | ```{r example, R.options = list(tibble.width = 100)}
 68 | library(readtextgrid)
 69 | 
 70 | # Locates path to an example textgrid bundled with this package
 71 | tg <- example_textgrid()
 72 | 
 73 | read_textgrid(path = tg)
 74 | ```
 75 | 
 76 | The dataframe contains one row per annotation: one row for each interval on an
 77 | interval tier and one row for each point on a point tier. If a point tier has no
 78 | points, it is represented with single row with `NA` values.
 79 | 
 80 | The columns encode the following information:
 81 | 
 82 |   - `file` filename of the textgrid. By default this column uses the filename in
 83 |     `path`. A user can override this value by setting the `file` argument in
 84 |     `read_textgrid(path, file)`, which can be useful if textgrids are stored in
 85 |     speaker-specific folders.
 86 |   - `tier_num` the number of the tier (as in the left margin of Praat's textgrid
 87 |     editor)
 88 |   - `tier_name` the name of the tier (as in the right margin of Praat's textgrid
 89 |     editor)
 90 |   - `tier_type` the type of the tier. `"IntervalTier"` for interval tiers and
 91 |     `"TextTier"` for point tiers (this is the terminology used inside of the
 92 |     textgrid file format).
 93 |   - `tier_xmin`, `tier_xmax` start and end times of the tier in seconds
 94 |   - `xmin`, `xmax` start and end times of the textgrid interval or point tier
 95 |     annotation in seconds
 96 |   - `text` the text in the annotation
 97 |   - `annotation_num` the number of the annotation in that tier (1 for the first
 98 |     annotation, etc.)
 99 | 
100 | ## Reading in directories of textgrids
101 | 
102 | Suppose we have data on multiple speakers with one folder of textgrids per
103 | speaker. As an example, this package has a folder called `speaker_data` bundled
104 | with it representing 5 five textgrids from 2 speakers.
105 | 
106 | ```
107 | 📂 speaker-data
108 | ├── 📂 speaker001
109 | │   ├── s2T01.TextGrid
110 | │   ├── s2T02.TextGrid
111 | │   ├── s2T03.TextGrid
112 | │   ├── s2T04.TextGrid
113 | │   └── s2T05.TextGrid
114 | └── 📂 speaker002
115 |     ├── s2T01.TextGrid
116 |     ├── s2T02.TextGrid
117 |     ├── s2T03.TextGrid
118 |     ├── s2T04.TextGrid
119 |     └── s2T05.TextGrid
120 | ```
121 | 
122 | First, we create a vector of file-paths to read into R.
123 | 
124 | ```{r}
125 | # Get the path of the folder bundled with the package
126 | data_dir <- system.file(package = "readtextgrid", "speaker-data")
127 | 
128 | # Get the full paths to all the textgrids
129 | paths <- list.files(
130 |   path = data_dir, 
131 |   pattern = "TextGrid$",
132 |   full.names = TRUE, 
133 |   recursive = TRUE
134 | )
135 | ```
136 | 
137 | We can use `purrr::map()`--*map* the `read_textgrid()` function over the
138 | `paths`---to read all these textgrids into R and combine them from a list to a
139 | single dataframe with `purrr::list_rbind()`. But note that this way doesn't
140 | track any speaker information.
141 | 
142 | ```{r, R.options = list(tibble.width = 100)}
143 | library(purrr)
144 | 
145 | paths |> 
146 |   map(read_textgrid) |> 
147 |   list_rbind()
148 | ```
149 | 
150 | By default, `read_textgrid()` uses the file basename (the file-path minus the
151 | directory part) for the `file` column. But we can manually set the `file` value.
152 | Here, we use `purrr::map2()` to map the function over `read_textgrid(path, file)`
153 | over `path` and `file` pairs. Then we add the speaker information with 
154 | some dataframe manipulation functions.
155 | 
156 | ```{r, R.options = list(tibble.width = 100), message = FALSE, warning = FALSE}
157 | library(dplyr)
158 | 
159 | # This tells read_textgrid() to set the file column to the full path
160 | data <- map2(paths, paths, read_textgrid) |> 
161 |   list_rbind() |> 
162 |   mutate(
163 |     # basename() removes the folder part from a path, 
164 |     # dirname() removes the file part from a path
165 |     speaker = basename(dirname(file)),
166 |     file = basename(file),
167 |   ) |> 
168 |   select(
169 |     speaker, everything()
170 |   )
171 | 
172 | data
173 | ```
174 | 
175 | Another strategy would be to read the textgrid dataframes into a list column and
176 | `tidyr::unnest()` them.
177 | 
178 | ```{r}
179 | # Read dataframes into a list column
180 | data_nested <- tibble(
181 |   speaker = basename(dirname(paths)),
182 |   data = map(paths, read_textgrid)
183 | )
184 | 
185 | # We have one row per textgrid dataframe because `data` is a list column
186 | data_nested
187 | 
188 | # promote the nested dataframes into the main dataframe
189 | tidyr::unnest(data_nested, "data")
190 | ```
191 | 
192 | 
193 | 
194 | 
195 | ## Pivoting nested intervals in textgrids
196 | 
197 | In the textgrids above, there is a natural nesting or hierarchy to the tiers.
198 | Intervals in `words` tier contain intervals in the `phones` tier. It is often
199 | necessary to group intervals by their parent intervals (group phones by words).
200 | This package provides the `pivot_textgrid_tiers()` function to convert textgrids
201 | into a wide format in a way that respects the nesting/hierarchy of tiers.
202 | 
203 | ```{r}
204 | data_wide <- pivot_textgrid_tiers(
205 |   data, 
206 |   tiers = c("words", "phones"), 
207 |   join_cols = c("speaker", "file")
208 | )
209 | 
210 | data_wide
211 | 
212 | # more clearly
213 | data_wide |> 
214 |   select(
215 |     speaker, file, words, phones, 
216 |     words_xmin, words_xmax, phones_xmin, phones_xmax
217 |   )
218 | ```
219 | 
220 | Some remarks:
221 | 
222 |   - Each tier in `tiers` becomes a batch of columns. For the rows for the
223 |     `words` tier become the batch of columns `words` (the original `text`
224 |     value), `words_xmin`, `words_xmax`, etc.
225 |   - The columns in `join_cols` should uniquely identify a textgrid file, so the
226 |     combination of `speaker` and `file` is needed in the case where different
227 |     speakers have the same file.
228 |   - The tier names in `tiers` should be given in the order of their nesting from
229 |     outside to inside (e.g., `words` contain `phones`). Behind the scenes,
230 |     `dplyr::left_join(..., relationship = "one-to-many")` is used to constrain
231 |     how intervals are combined. 
232 | 
233 | This function also works on a single `tiers` value. In this case, the function
234 | returns just the intervals in that tier with the columns renamed and prefixed.
235 | 
236 | ```{r}
237 | data |> 
238 |   pivot_textgrid_tiers(
239 |     tiers = "words", 
240 |     join_cols = c("speaker", "file")
241 |   )
242 | ```
243 | 
244 | 
245 | ## Speeding things up
246 | 
247 | Do you have thousands of textgrids to read? The following workflow can speed
248 | things up. We are going to **read the textgrids in parallel**. Below are two 
249 | approaches:
250 | 
251 | - future backend and furrr frontend
252 | - mirai backend and purrr frontend
253 | 
254 | The backend manages the parallel computation, and the frontend provides the 
255 | syntax for calling a function with parallelism. 
256 | 
257 | **Approach 1**: We tell future to use a `multisession` `plan` for parallelism,
258 | so the computations are done on separate R sessions in the background. The
259 | syntax is like the above purrr code, but we replace `map()` with `future_map()`.
260 | 
261 | ```{r, warning = FALSE}
262 | library(future)
263 | library(furrr)
264 | plan(multisession, workers = 4)
265 | 
266 | data_nested <- tibble(
267 |   speaker = basename(dirname(paths)),
268 |   data = future_map(paths, read_textgrid)
269 | )
270 | ```
271 | 
272 | **Approach 2**: We have mirai set up 4 daemons (background processes), and then
273 | we use purrr's `in_parallel()` helper to signal to `map()` that the function
274 | should be run in parallel. We need to give *all* the information needed for the
275 | daemons to run the function, so we 1) provide a complete function definition
276 | (including `function(x) ...`) and 2) spell out the package namespace
277 | `readtextgrid::read_textgrid()`.
278 | 
279 | ```{r, warning = FALSE}
280 | mirai::daemons(4)
281 | data_nested <- tibble(
282 |   speaker = basename(dirname(paths)),
283 |   data = map(paths, in_parallel(function(x) readtextgrid::read_textgrid(x)))
284 | )
285 | mirai::daemons(0)
286 | ```
287 | 
288 | Another way to eke out performance is to set the encoding. By default,
289 | readtextgrid uses `readr::guess_encoding()` to determine the encoding of the
290 | textgrid before reading it in. But if you know the encoding beforehand, you can
291 | skip this guessing. In my limited testing, I found that **setting the encoding**
292 | could reduce benchmark times by 3--4% compared to guessing the encoding.
293 | 
294 | Here, we read 100 textgrids using different approaches to benchmark the 
295 | results. 
296 | 
297 | ```{r}
298 | paths_bench <- withr::with_seed(1, sample(paths, 100, replace = TRUE))
299 | 
300 | mirai::daemons(4)
301 | bench::mark(
302 |   lapply_guess = lapply(paths_bench, read_textgrid),
303 |   lapply_set   = lapply(paths_bench, read_textgrid, encoding = "UTF-8"),
304 |   future_guess = future_map(paths_bench, read_textgrid),
305 |   future_set   = future_map(paths_bench, read_textgrid, encoding = "UTF-8"), 
306 |   mirai_guess = purrr::map(
307 |     paths_bench, 
308 |     in_parallel(function(x) readtextgrid::read_textgrid(x))
309 |   ),
310 |   mirai_set = purrr::map(
311 |     paths_bench, 
312 |     in_parallel(function(x) readtextgrid::read_textgrid(x, encoding = "UTF-8"))
313 |   ),
314 |   check = TRUE
315 | )
316 | mirai::daemons(0)
317 | ```
318 | 
319 | ## Legacy behavior and supported textgrid formats
320 | 
321 | ```{r, include = FALSE}
322 | examples <- c(
323 | 'File type = "ooTextFile"
324 | Object class = "TextGrid"
325 | 
326 | xmin = 0 
327 | xmax = 1 
328 | tiers? <exists> 
329 | size = 1 
330 | item []: 
331 |     item [1]:
332 |         class = "IntervalTier" 
333 |         name = "Mary" 
334 |         xmin = 0 
335 |         xmax = 1 
336 |         intervals: size = 1 
337 |         intervals [1]:
338 |             xmin = 0 
339 |             xmax = 1 
340 |             text = "" 
341 | ',
342 | 
343 | 'File type = "ooTextFile"
344 | Object class = "TextGrid"
345 | 
346 | 0
347 | 1
348 | <exists>
349 | 1
350 | "IntervalTier"
351 | "Mary"
352 | 0
353 | 1
354 | 1
355 | 0
356 | 1
357 | ""
358 | ',
359 | 'File type = "ooTextFile"
360 | Object class = "TextGrid"
361 | 
362 | ! info about the grid
363 | 0s 1s <exists> 1
364 | ! info about the tier
365 | "IntervalTier" "Mary" 0s 1s 1 ! type, name, xmin, xmax, size
366 | 0s 1s "" ! interval xmin, xmax, size
367 | '
368 | )
369 | # make sure these don't actually error
370 | example_tgs <- lapply(examples, read_textgrid_lines)
371 | ```
372 | 
373 | 
374 | The original version of this package assumed the textgrid text files followed a
375 | "long" format with helpful labels and annotations. For example, in the following
376 | textgrid, each number has a label that makes it easy and fast to parse the
377 | textgrid with regular expressions:
378 | 
379 | ```{r, echo = FALSE, comment = ""}
380 | writeLines(examples[1])
381 | ```
382 | 
383 | The original version of the parser designed for this textgrid format is still
384 | provided with the `legacy_read_textgrid()` and `legacy_read_textgrid_lines()`
385 | functions.
386 | 
387 | Version 0.2.0 of readtextgrid added a C++ based parser that can handle many more
388 | textgrid formats. For example, it can "short" format textgrids like the
389 | following:
390 | 
391 | ```{r, echo = FALSE, comment = ""}
392 | writeLines(examples[2])
393 | ```
394 | 
395 | The "long" format textgrids are outputted in Praat with `Save > Save as text
396 | file...`, and the "short" format textgrids are outputted with `Save > Save as
397 | short textfile...`.
398 | 
399 | readtextgrid's parser can also handle [esoteric
400 | features](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html)
401 | like comments (that start with `!`) or arbitrary text attached to a number, as 
402 | in the following example;:
403 | 
404 | ```{r, echo = FALSE, comment = ""}
405 | writeLines(examples[3])
406 | ```
407 | 
408 | Because the new parser uses C++ for tokenization---that is, the part scans the
409 | contents character by character and determines whether the inputs are strings,
410 | numbers, or skipped---it is much faster the legacy version.
411 | 
412 | ```{r}
413 | paths_bench <- withr::with_seed(2, sample(paths, 10, replace = TRUE))
414 | 
415 | bench::mark(
416 |   current = lapply(paths_bench, read_textgrid),
417 |   legacy = lapply(paths_bench, legacy_read_textgrid),
418 |   min_iterations = 10, 
419 |   filter_gc = FALSE,
420 |   check = TRUE
421 | )
422 | ```
423 | 
424 | ## Other tips
425 | 
426 | ### Helpful columns
427 | 
428 | The following columns are often helpful:
429 | 
430 | - `duration` of an interval
431 | - `xmid` midpoint of an interval
432 | - `total_annotations` total number of annotations on a tier
433 | 
434 | Here is how to create them:
435 | 
436 | ```{r}
437 | data |>
438 |   # grouping needed for counting annotations per tier per file per speaker
439 |   group_by(speaker, file, tier_num) |>
440 |   mutate(
441 |     duration = xmax - xmin,
442 |     xmid = xmin + (xmax - xmin) / 2,
443 |     total_annotations = sum(!is.na(annotation_num))
444 |   ) |> 
445 |   ungroup() |> 
446 |   glimpse()
447 | ```
448 | 
449 | 
450 | ### Launching Praat
451 | 
452 | *This tip is written from the perspective of a Windows user who uses git bash
453 | for a terminal*. 
454 | 
455 | To open textgrids in Praat, you can tell R to call Praat from
456 | the command line. You have to know where the location of the Praat binary is
457 | though. I like to keep a copy in my project directories. So, assuming that
458 | Praat.exe in my working folder, the following would open the 10 textgrids in
459 | `paths` in Praat.
460 | 
461 | ```{r, eval = FALSE}
462 | system2(
463 |   command = "./Praat.exe",
464 |   args = c("--open", paths),
465 |   wait = FALSE
466 | )
467 | ```
468 | 
469 | 
470 | ## Acknowledgments
471 | 
472 | readtextgrid was created to process data from the [WISC Lab
473 | project](https://kidspeech.wisc.edu/). Thus, development of this package was
474 | supported by NIH R01DC009411 and NIH R01DC015653.
475 | 
476 | ***
477 | 
478 | Please note that the 'readtextgrid' project is released with a
479 | [Contributor Code of Conduct](https://www.contributor-covenant.org/version/1/0/0/code-of-conduct.html).
480 | By contributing to this project, you agree to abide by its terms.
481 | 


--------------------------------------------------------------------------------
/vignettes/articles/textgrid-specification.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Textgrid specification"
  3 | author: Tristan Mahr
  4 | ---
  5 | 
  6 | ```{r, include = FALSE}
  7 | knitr::opts_chunk$set(
  8 |   collapse = TRUE,
  9 |   comment = "#>"
 10 | )
 11 | 
 12 | examples <- c(
 13 | 'File type = "ooTextFile"
 14 | Object class = "TextGrid"
 15 | 
 16 | xmin = 0 
 17 | xmax = 1 
 18 | tiers? <exists> 
 19 | size = 1 
 20 | item []: 
 21 |     item [1]:
 22 |         class = "IntervalTier" 
 23 |         name = "Mary" 
 24 |         xmin = 0 
 25 |         xmax = 1 
 26 |         intervals: size = 1 
 27 |         intervals [1]:
 28 |             xmin = 0 
 29 |             xmax = 1 
 30 |             text = "" 
 31 | ',
 32 | 
 33 | 'File type = "ooTextFile"
 34 | Object class = "TextGrid"
 35 | 
 36 | 0
 37 | 1
 38 | <exists>
 39 | 1
 40 | "IntervalTier"
 41 | "Mary"
 42 | 0
 43 | 1
 44 | 1
 45 | 0
 46 | 1
 47 | ""
 48 | ',
 49 | 'File type = "ooTextFile"
 50 | Object class = "TextGrid"
 51 | 
 52 | ! info about the grid
 53 | 0s 1s <exists> 1
 54 | ! info about the tier
 55 | "IntervalTier" "Mary" 0s 1s 1 ! type, name, xmin, xmax, size
 56 | 0s 1s "" ! interval xmin, xmax, size
 57 | '
 58 | )
 59 | # make sure these don't actually error
 60 | example_tgs <- lapply(examples, readtextgrid::read_textgrid_lines)
 61 | stopifnot(length(unique(example_tgs)) == 1)
 62 | ```
 63 | 
 64 | 
 65 | _This vignette documents the internal parsing logic of the **readtextgrid**
 66 | package. It is intended for developers maintaining the parser or for developers
 67 | in other languages, not for end users of the package._
 68 | 
 69 | In this article, I describe the specification of the `.TextGrid` file format used
 70 | in this package, note how it differs from the documented specification provided
 71 | by Praat, and provide a high-level overview of R code and a C++ translation that
 72 | can parse `.TextGrid` files.
 73 | 
 74 | ## Example `.TextGrid` file contents
 75 | 
 76 | The `.TextGrid` file format used by Praat is very flexible. Below are three
 77 | different `.TextGrid` files representing the same Praat textgrid.
 78 | 
 79 | Long format:
 80 | 
 81 | ```{r, echo = FALSE, comment = ""}
 82 | writeLines(examples[1])
 83 | ```
 84 | 
 85 | Short format:
 86 | 
 87 | ```{r, echo = FALSE, comment = ""}
 88 | writeLines(examples[2])
 89 | ```
 90 | 
 91 | Custom format with comments and other noise:
 92 | 
 93 | ```{r, echo = FALSE, comment = ""}
 94 | writeLines(examples[3])
 95 | ```
 96 | 
 97 | readtextgrid can handle all three of these files in the same way because the
 98 | Praat textgrid specification is simple---once you figure it out. I developed the
 99 | readtextgrid specification by reading Praat's description of the format, testing
100 | various edge cases in the format and testing whether Praat would open the test
101 | file. If Praat could handle the file, it had to be supported by this package's
102 | textgrid parser.
103 | 
104 | 
105 | ## Package design
106 | 
107 | To read in `.TextGrid` file, we do the following:
108 | 
109 | - read it in with the proper character encoding
110 | - tokenize the file contents from a sequence of characters into a list of 
111 |   Praat strings and Praat numbers 
112 | - identify the start and end tokens of each textgrid tier
113 | - split those tokens up into batches of data and assemble dataframes
114 | 
115 | This document concerns the tokenization step. The remaining parsing steps 
116 | follow straightforward split-apply-combine programming in R.
117 | 
118 | 
119 | ## Documented `.TextGrid` file format specification
120 | 
121 | First, let's start with [Paul Boersma's own
122 | description](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html)
123 | of the file format. He notes that the long format contains several comments to
124 | help a person read the file, and that these are ultimately ignored by Praat.
125 | Instead, there are only a few important tokens:
126 | 
127 | > Praat will consider as data only the following types of information in the
128 | > file:
129 | > 
130 | >   - free-standing numbers, such as `0` and `2.3` above, but not `[1]` or
131 | >     `[3]`;
132 | >   - free-standing text enclosed within double quotes, such as `"TextGrid"` and
133 | >     `""` above;
134 | >   - free-standing flags, such as `<exists>` above (this is the only flag that
135 | >     appears in TextGrid files [...]).
136 | > 
137 | > In this list, "free-standing" means that the number, text or flag is preceded
138 | > by the beginning of the file, the beginning of a line, or a space, and that it
139 | > is followed by the end of the file, the end of a line, or a space.
140 | 
141 | He also mentions additional features about the format:
142 | 
143 |   - `!` comments: "everything that follows an exclamation mark on the same line
144 |     is considered a comment".
145 |   - `""` escapement by doubling: "a double quote that appears in a text
146 |     [*i.e.*, a string] is written as a *doubled* double quote in the text
147 |     file."
148 |   - ignore the `<flag>` tokens anyway: "The flag `<exists>`, which tells us that
149 |     this TextGrid contains tiers (this value would be `<absent>` if the TextGrid
150 |     contained no tiers, in which case the file would end here; however, you
151 |     cannot really create TextGrid objects without tiers in Praat, so this issue
152 |     can be ignored)."
153 | 
154 | These details are mostly accurate and simple enough, but they don't specify
155 | what to do with `.1` for example (Praat treat it as an error).
156 | 
157 | 
158 | ## Our specification of the `.TextGrid` file format
159 | 
160 | After testing, I developed the following specification for this R package. 
161 | 
162 |   - There are two kinds of tokens: strings and numbers.
163 |   
164 |   - **Strings** start and end with a `"`. If a string is supposed to have a
165 |     double-quote character `"` inside of it, double the quote characters
166 |     instead. The textgrid interval text *He said "hello" to me* would have the
167 |     string `"He said ""hello"" to me"`. Everything inside of the `"` pair
168 |     belongs to the string, even line breaks and comments.
169 |     
170 |   - A string is fully "free-standing". It should be preceded and followed by a
171 |     space, newline, or the start or end of a file. `I said"Hello"` does not
172 |     contain a string because there is no space before the `"` character.
173 | 
174 |   - **Numbers** start with a plus, minus or digit. Decimal, hexadecimal, and
175 |     scientific notation are supported. Fractions are supported. A number ending
176 |     with a `%` (a percentage) is divided by 100. Numbers use a `.` for the
177 |     decimal point character. `.5` is not a number because it doesn't start with
178 |     a plus, minus or digit.
179 |     
180 |   - A number is "left free-standing" (my terminology). It must be preceded by a
181 |     space or newline. (Using the file start doesn't make sense for a boundary).
182 |     From a valid start of a number, characters are read until the sequence of
183 |     characters would no longer yield a number. Any additional characters until
184 |     the next space, newline, or file boundary are ignored. In `100ms` and
185 |     `+100e1ms`, for example, the final `ms` characters are ignored.
186 |   
187 |   - Praat does not support real numbers with a stranded exponent (`1e`). These
188 |     kinds of numbers are an exception to the left-free-standing feature
189 |     described earlier.
190 |   
191 |   - Everything else is a comment and ignored. I differentiate between two kinds
192 |     of comments. This is my terminology, not Praat's.
193 |   
194 |   - **"Strong" comments** start with a `!` and end with a newline (`\n`).
195 |   
196 |   - **"Weak" comments** would be any token that does not start like a string or
197 |     number. In the long format textgrid, `size = 1` would be two ignored weak
198 |     comments (`size`, `=`) and a number (`1`).
199 | 
200 | The allowance for characters on the right side of numbers is the major
201 | difference between the description of the Praat format and the one used in this
202 | package.
203 | 
204 | 
205 | 
206 | 
207 | 
208 | ## Reference R implementation for textgrid tokenization
209 | 
210 | Given a vector of characters from a Praat `.TextGrid` file, we want a list of 
211 | strings and numbers contained in the file. For example, here are the characters
212 | from the short textgrid file and the output of the R-based tokenization:
213 | 
214 | ```{r}
215 | tg_characters <- examples[2] |> 
216 |   strsplit("") |> 
217 |   unlist()
218 | 
219 | tg_characters
220 | 
221 | tg_characters |> 
222 |   readtextgrid:::r_tokenize_textgrid_chars() |> 
223 |   str()
224 | ```
225 | 
226 | Some comments about this function:
227 | 
228 |   - `r_tokenize_textgrid_chars()` is not an exported or supported function. That
229 |     is why it needs to be accessed with the triple colon namespace operator
230 |     `:::`.
231 |   - The function was the intended implementation for the package until I
232 |     converted the implementation to C++. I keep this R version around as a
233 |     reference implementation for testing the current C++ implementation.
234 |   - Don't use it.
235 | 
236 | 
237 | The big ideas in `r_tokenize_textgrid_chars()` are the following:
238 | 
239 |   - We have three special states: `in_strong_comment`, `in_string`, and
240 |     `in_escaped_quote`. These determine how we interpret spaces, newlines, and
241 |     `"` characters. When `in_strong_comment` is true, we skip the character
242 |     iteration loop with `next` until we see a newline. When `in_escaped_quote`
243 |     is true, we skip the next iteration of the loop (to catch next to `"`).
244 |     When `in_string` is true, we keep collecting characters for the current
245 |     token until we see a closing `"`.
246 | 
247 |   - When these states are all false *and* we see a space or newline, then we
248 |     have the end of current token. We extract the characters for the current
249 |     token, combine them into a single value, check the value and keep it if it
250 |     is a Praat string or Praat number. Then we reset the current token position
251 |     and advance.
252 | 
253 | Everything else is book-keeping to check for a special state or initialize a new
254 | token.
255 | 
256 | The complete code is given below. It is fairly well-commented but you don't 
257 | have to read it---just knowing the high-level details is sufficient.
258 | 
259 | ```{r, eval = FALSE}
260 | function(all_char) {
261 |   # The parser rules here follow the textgrid specifications
262 |   # <https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html> EXCEPT
263 |   # when they contradict the behavior of Praat.exe. For example, the specs says
264 |   # the main literals are freestanding strings and numbers, where freestanding
265 |   # means that they have a whitespace or boundary (newline or file start/end).
266 |   # But Praat.exe can handle numbers like "10.00!comment". So, this parser
267 |   # gathers freestanding literals but only keeps ones that are strings or
268 |   # start with a valid number (the non-numeric characters are lopped off.)
269 | 
270 |   in_strong_comment <- FALSE         # Comment mode: ! to new line \n
271 |   in_string <- FALSE                 # String mode: "Quote to quote"
272 |   in_escaped_quote <- FALSE          # Escaped quote: "" inside of a string
273 | 
274 |   token_start <- integer(0)          # Start of current token
275 |   values <- vector(mode = "list")    # Collects completed values
276 | 
277 |   for (i in seq_along(all_char)) {
278 |     cur_value_ready <- length(token_start) != 0
279 |     c <- all_char[i]
280 |     c_is_whitespace <- c %in% c(" ", "\n")
281 |     c_starts_string <- c == "\""
282 | 
283 |     # Comments start with ! and end with \n. Skip characters in this mode.
284 |     if (!in_string & c == "!") {
285 |       in_strong_comment <- TRUE
286 |       next
287 |     }
288 |     if (in_strong_comment) {
289 |       if (c == "\n") in_strong_comment <- FALSE
290 |       next
291 |     }
292 | 
293 |     # Whitespace delimits values so collect values if we see whitespace
294 |     if (c_is_whitespace & !in_string) {
295 |       # Skip whitespace if no values collected so far
296 |       if (!cur_value_ready) next
297 | 
298 |       total_value <- all_char[seq(token_start, i - 1)] |>
299 |         paste0(collapse = "")
300 |       is_string <- all_char[token_start] == "\"" && all_char[i - 1] == "\""
301 | 
302 |       # Collect only numbers and strings
303 |       if (r_tg_parse_is_number(total_value)) {
304 |         # Keep only the numeric part.
305 |         total_value <- total_value |> r_tg_parse_extract_number()
306 |         values <- c(values, total_value)
307 |       } else if (is_string) {
308 |         values <- c(values, total_value)
309 |       }
310 |       token_start <- integer(0)
311 |       next
312 |     }
313 | 
314 |     # Store character if ending an escaped quote
315 |     if (in_escaped_quote) {
316 |       in_escaped_quote <- !in_escaped_quote
317 |       next
318 |     }
319 | 
320 |     # Start or close string mode if we see "
321 |     if (c_starts_string) {
322 |       # Check for "" escapes
323 |       peek_c <- all_char[i + 1]
324 |       if (peek_c == "\"" & in_string) {
325 |         in_escaped_quote <- TRUE
326 |       } else {
327 |         in_string <- !in_string
328 |       }
329 |     }
330 | 
331 |     if (!cur_value_ready) {
332 |       token_start <- i
333 |     }
334 |   }
335 | 
336 |   values |>
337 |     lapply(r_tg_parse_convert_value)
338 | }
339 | ```
340 | 
341 | 
342 | ## C++ implementation
343 | 
344 | Given the simple nature of the R code and its relatively slow performance
345 | compared to the legacy version of the parser, I used ChatGPT to help convert
346 | the R code into a C++ implementation built on the cpp11 package. I tried to make
347 | sure I understood every line and made my own comments to help my understanding.
348 | 
349 | The C++ code is a straightforward translation of the R version. For example, 
350 | here is the part of the function that collects tokens when we see
351 | a space or newline:
352 | 
353 | ```c++
354 |     if (!in_string && is_ws(b)) {
355 |       if (have_token) {
356 |         size_t start = tok_start_byte;
357 |         size_t end   = (curr_char_byte == 0 ? 0 : prev_char_byte);
358 |         size_t len   = (end >= start) ? (end - start + 1) : 0;
359 |         if (len > 0) {
360 |           // do we have a string (start and end with ")
361 |           bool q = (static_cast<unsigned char>(src[start]) == 0x22) &&
362 |             (static_cast<unsigned char>(src[end])   == 0x22);
363 |           tokens.push_back(src.substr(start, len));
364 |           tokens_is_string.push_back(q);
365 |         }
366 |         have_token = false;
367 |       }
368 |       continue;
369 |     }
370 | ```
371 | 
372 | Some details are different: The C++ version extracts tokens with a substring
373 | (`.substr()`) method, delays checking whether the token is a number until
374 | later on, and accumulates results into lists (`tokens` and
375 | `tokens_is_string`). But the underlying logic is the same as the R version.
376 | 
377 | The C++ function takes a single character value (one whole string) for the file
378 | contents and returns a list of the tokens in the file, whether each token is a
379 | Praat string, the numbers of characters of each token that form a number, and
380 | the value of that token's number:
381 | 
382 | ```{r}
383 | examples[2] |> 
384 |   readtextgrid:::cpp_tg_scan_tokens() |> 
385 |   as.data.frame()
386 | ```
387 | 
388 | Before I had figured out how to parse numbers with C++, I originally was going
389 | to use R code on the `token` column to figure out whether each token is a legal
390 | number or not. That's why this function returns a list of vectors with
391 | information about the tokens.
392 | 
393 | Back in the R layer, the final tokens are selected using really basic vector 
394 | operations:
395 | 
396 | ```{r}
397 | readtextgrid:::tokenize_textgrid
398 | ```
399 | 
400 | An important part of this function is the `withr::with_locale(c(LC_NUMERIC =
401 | "C"), ... )` call. We are setting the locale for numbers to the C locale which
402 | means that `.` is the decimal point character, and not a comma as in some
403 | locales. 
404 | 
405 | Parsing numbers is also handled by C++. I discovered that the standard 
406 | library `strtod()` function does exactly what we need:
407 | 
408 | > Interprets a floating-point value in a byte string pointed to by `str`.
409 | > 
410 | > Function discards any whitespace characters (as determined by `isspace`) until
411 | > first non-whitespace character is found. Then it takes as many characters as
412 | > possible to form a valid floating-point representation and converts them to a
413 | > floating-point value. 
414 | >
415 | > -- https://en.cppreference.com/w/c/string/byte/strtof
416 | 
417 | We include some additional logic to make sure that `.4` is illegal and to output `NA_real_` for missing values, but otherwise, `strtod()` does the work for us.
418 | 
419 | One consequence of this approach is that we can parse other kinds of numbers
420 | like hexadecimal with exponents. It turns out that Praat can also parse these 
421 | numbers in a `.TextGrid` file as well.
422 | 
423 | The number-parsing logic has its own function, so we can test how tokens
424 | specific tokens are parsed:
425 | 
426 | ```{r}
427 | test_nums <- c("+1.0", "000ms", "-2", "0xA", ".5", "+.0") 
428 | 
429 | as.data.frame(c(
430 |   test_nums = list(test_nums),
431 |   readtextgrid:::cpp_parse_praat_numbers(test_nums)
432 | ))
433 | ```
434 | 
435 | There are two limitations with the number parser used in this package:
436 | 
437 | - We do not support fractions and percentages. (Praat does.) 
438 | - We accept stranded exponents. (Praat does not.)
439 | 
440 | ```{r}
441 | test_nums <- c("1e", "1E", "20/10", "1000%") 
442 | expected <- c(NA_real_, NA_real_, 2.0, 10.0) 
443 | 
444 | as.data.frame(c(
445 |   test_nums = list(test_nums),
446 |   readtextgrid:::cpp_parse_praat_numbers(test_nums),
447 |   expected_value = list(expected)
448 | ))
449 | ```
450 | 
451 | These are not high-priority limitations until we find a case where a software
452 | program writes out `.TextGrid` files that uses these features.
453 | 
454 | ## Notes on testing 
455 | 
456 | The package's folder `tests/testthat/test-data`  includes a series of `.TextGrid` 
457 | files for testing the parsing functions. One of these, `hard-to-parse.TextGrid`, 
458 | collects as many edge cases as I can imagine.
459 | 
460 | The C++ implementation is tested against the legacy parser on easy long-format 
461 | textgrid files and against the pure R implementation on other test textgrid 
462 | files, including `hard-to-parse.TextGrid`.
463 | 
464 | The folder `tests/testthat/test-data/praat-test` include some tests of whether
465 | Praat can open a file or not. Files that fail to open start with `fail-` and 
466 | files that open start with `okay-`. We support the only the syntactic
467 | features in the `okay-` files.
468 | 
469 | ## Notes on the Praat source code
470 | 
471 | I did not rely on the Praat source code but I tried! The Praat source code has
472 | to read in all kinds of text files so there is not an obvious
473 | `read_textgrid()`-like function for parsing a `.TextGrid` file. Still, I was
474 | able to find how numbers a read in from a text file.
475 | 
476 | The primitive data types of Praat are defined in the `Melder` folder. The
477 | `abcio.cpp` files has functions like `getReal()` for reading a float from text.
478 | `getReal()` calls `Melder_a8tof()` function in `melder_atof.cpp` to convert
479 | strings into numbers, and this function in turn calls `findEndOfNumericString()`
480 | which processes numbers character by character.
481 | 
482 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # readtextgrid <img src="man/figures/logo.png" width = "150" align="right" />
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | [![CRAN
  9 | status](https://www.r-pkg.org/badges/version/readtextgrid)](https://CRAN.R-project.org/package=readtextgrid)
 10 | [![readtextgrid status
 11 | badge](https://tjmahr.r-universe.dev/readtextgrid/badges/version)](https://tjmahr.r-universe.dev/readtextgrid)
 12 | [![R-CMD-check](https://github.com/tjmahr/readtextgrid/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tjmahr/readtextgrid/actions/workflows/R-CMD-check.yaml)
 13 | <!-- badges: end -->
 14 | 
 15 | readtextgrid parses [Praat](https://www.fon.hum.uva.nl/praat/) textgrids
 16 | into tidy R dataframes.
 17 | 
 18 | ## Features
 19 | 
 20 | - **Simple**: Minimal package with two core functions (`read_textgrid()`
 21 |   and `read_textgrid_lines()`).
 22 | - **Tidy**: Returns rectangular tibbles ready for downstream processing
 23 |   with dplyr and tidyr.
 24 | - **Flexible**: Supports both long and short textgrid file formats.
 25 | - **Fast**: Uses a compiled C++ tokenizer for high-throughput parsing.
 26 | 
 27 | ## Installation
 28 | 
 29 | Install readtextgrid from CRAN:
 30 | 
 31 | ``` r
 32 | install.packages("readtextgrid")
 33 | ```
 34 | 
 35 | **Development version**. Install precompiled version of readtextgrid
 36 | from R-universe:
 37 | 
 38 | ``` r
 39 | install.packages(
 40 |   "readtextgrid", 
 41 |   repos = c("https://tjmahr.r-universe.dev", "https://cloud.r-project.org")
 42 | )
 43 | ```
 44 | 
 45 | ## Basic usage
 46 | 
 47 | Here is the example textgrid created by Praat. It was created using
 48 | `New > Create TextGrid...` with default settings in Praat.
 49 | 
 50 | <img src="man/figures/demo-textgrid.png" width="600" alt="Textgrid drawing from Praat with three tiers (Mary, John, and Bell)"/>
 51 | 
 52 | This textgrid is bundled with this R package. We can locate the file
 53 | with `example_textgrid()`. We read in the textgrid with
 54 | `read_textgrid()`.
 55 | 
 56 | ``` r
 57 | library(readtextgrid)
 58 | 
 59 | # Locates path to an example textgrid bundled with this package
 60 | tg <- example_textgrid()
 61 | 
 62 | read_textgrid(path = tg)
 63 | #> # A tibble: 3 × 10
 64 | #>   file                    tier_num tier_name tier_type    tier_xmin tier_xmax
 65 | #>   <chr>                      <int> <chr>     <chr>            <dbl>     <dbl>
 66 | #> 1 Mary_John_bell.TextGrid        1 Mary      IntervalTier         0         1
 67 | #> 2 Mary_John_bell.TextGrid        2 John      IntervalTier         0         1
 68 | #> 3 Mary_John_bell.TextGrid        3 bell      TextTier             0         1
 69 | #>    xmin  xmax text  annotation_num
 70 | #>   <dbl> <dbl> <chr>          <int>
 71 | #> 1     0     1 ""                 1
 72 | #> 2     0     1 ""                 1
 73 | #> 3    NA    NA <NA>              NA
 74 | ```
 75 | 
 76 | The dataframe contains one row per annotation: one row for each interval
 77 | on an interval tier and one row for each point on a point tier. If a
 78 | point tier has no points, it is represented with single row with `NA`
 79 | values.
 80 | 
 81 | The columns encode the following information:
 82 | 
 83 | - `file` filename of the textgrid. By default this column uses the
 84 |   filename in `path`. A user can override this value by setting the
 85 |   `file` argument in `read_textgrid(path, file)`, which can be useful if
 86 |   textgrids are stored in speaker-specific folders.
 87 | - `tier_num` the number of the tier (as in the left margin of Praat’s
 88 |   textgrid editor)
 89 | - `tier_name` the name of the tier (as in the right margin of Praat’s
 90 |   textgrid editor)
 91 | - `tier_type` the type of the tier. `"IntervalTier"` for interval tiers
 92 |   and `"TextTier"` for point tiers (this is the terminology used inside
 93 |   of the textgrid file format).
 94 | - `tier_xmin`, `tier_xmax` start and end times of the tier in seconds
 95 | - `xmin`, `xmax` start and end times of the textgrid interval or point
 96 |   tier annotation in seconds
 97 | - `text` the text in the annotation
 98 | - `annotation_num` the number of the annotation in that tier (1 for the
 99 |   first annotation, etc.)
100 | 
101 | ## Reading in directories of textgrids
102 | 
103 | Suppose we have data on multiple speakers with one folder of textgrids
104 | per speaker. As an example, this package has a folder called
105 | `speaker_data` bundled with it representing 5 five textgrids from 2
106 | speakers.
107 | 
108 |     📂 speaker-data
109 |     ├── 📂 speaker001
110 |     │   ├── s2T01.TextGrid
111 |     │   ├── s2T02.TextGrid
112 |     │   ├── s2T03.TextGrid
113 |     │   ├── s2T04.TextGrid
114 |     │   └── s2T05.TextGrid
115 |     └── 📂 speaker002
116 |         ├── s2T01.TextGrid
117 |         ├── s2T02.TextGrid
118 |         ├── s2T03.TextGrid
119 |         ├── s2T04.TextGrid
120 |         └── s2T05.TextGrid
121 | 
122 | First, we create a vector of file-paths to read into R.
123 | 
124 | ``` r
125 | # Get the path of the folder bundled with the package
126 | data_dir <- system.file(package = "readtextgrid", "speaker-data")
127 | 
128 | # Get the full paths to all the textgrids
129 | paths <- list.files(
130 |   path = data_dir, 
131 |   pattern = "TextGrid$",
132 |   full.names = TRUE, 
133 |   recursive = TRUE
134 | )
135 | ```
136 | 
137 | We can use `purrr::map()`–*map* the `read_textgrid()` function over the
138 | `paths`—to read all these textgrids into R and combine them from a list
139 | to a single dataframe with `purrr::list_rbind()`. But note that this way
140 | doesn’t track any speaker information.
141 | 
142 | ``` r
143 | library(purrr)
144 | 
145 | paths |> 
146 |   map(read_textgrid) |> 
147 |   list_rbind()
148 | #> # A tibble: 150 × 10
149 | #>    file           tier_num tier_name tier_type    tier_xmin tier_xmax  xmin
150 | #>    <chr>             <int> <chr>     <chr>            <dbl>     <dbl> <dbl>
151 | #>  1 s2T01.TextGrid        1 words     IntervalTier         0      1.35 0    
152 | #>  2 s2T01.TextGrid        1 words     IntervalTier         0      1.35 0.297
153 | #>  3 s2T01.TextGrid        1 words     IntervalTier         0      1.35 0.522
154 | #>  4 s2T01.TextGrid        1 words     IntervalTier         0      1.35 0.972
155 | #>  5 s2T01.TextGrid        2 phones    IntervalTier         0      1.35 0    
156 | #>  6 s2T01.TextGrid        2 phones    IntervalTier         0      1.35 0.297
157 | #>  7 s2T01.TextGrid        2 phones    IntervalTier         0      1.35 0.36 
158 | #>  8 s2T01.TextGrid        2 phones    IntervalTier         0      1.35 0.495
159 | #>  9 s2T01.TextGrid        2 phones    IntervalTier         0      1.35 0.522
160 | #> 10 s2T01.TextGrid        2 phones    IntervalTier         0      1.35 0.621
161 | #>     xmax text    annotation_num
162 | #>    <dbl> <chr>            <int>
163 | #>  1 0.297 ""                   1
164 | #>  2 0.522 "bird"               2
165 | #>  3 0.972 "house"              3
166 | #>  4 1.35  ""                   4
167 | #>  5 0.297 "sil"                1
168 | #>  6 0.36  "B"                  2
169 | #>  7 0.495 "ER1"                3
170 | #>  8 0.522 "D"                  4
171 | #>  9 0.621 "HH"                 5
172 | #> 10 0.783 "AW1"                6
173 | #> # ℹ 140 more rows
174 | ```
175 | 
176 | By default, `read_textgrid()` uses the file basename (the file-path
177 | minus the directory part) for the `file` column. But we can manually set
178 | the `file` value. Here, we use `purrr::map2()` to map the function over
179 | `read_textgrid(path, file)` over `path` and `file` pairs. Then we add
180 | the speaker information with some dataframe manipulation functions.
181 | 
182 | ``` r
183 | library(dplyr)
184 | 
185 | # This tells read_textgrid() to set the file column to the full path
186 | data <- map2(paths, paths, read_textgrid) |> 
187 |   list_rbind() |> 
188 |   mutate(
189 |     # basename() removes the folder part from a path, 
190 |     # dirname() removes the file part from a path
191 |     speaker = basename(dirname(file)),
192 |     file = basename(file),
193 |   ) |> 
194 |   select(
195 |     speaker, everything()
196 |   )
197 | 
198 | data
199 | #> # A tibble: 150 × 11
200 | #>    speaker    file           tier_num tier_name tier_type    tier_xmin tier_xmax
201 | #>    <chr>      <chr>             <int> <chr>     <chr>            <dbl>     <dbl>
202 | #>  1 speaker001 s2T01.TextGrid        1 words     IntervalTier         0      1.35
203 | #>  2 speaker001 s2T01.TextGrid        1 words     IntervalTier         0      1.35
204 | #>  3 speaker001 s2T01.TextGrid        1 words     IntervalTier         0      1.35
205 | #>  4 speaker001 s2T01.TextGrid        1 words     IntervalTier         0      1.35
206 | #>  5 speaker001 s2T01.TextGrid        2 phones    IntervalTier         0      1.35
207 | #>  6 speaker001 s2T01.TextGrid        2 phones    IntervalTier         0      1.35
208 | #>  7 speaker001 s2T01.TextGrid        2 phones    IntervalTier         0      1.35
209 | #>  8 speaker001 s2T01.TextGrid        2 phones    IntervalTier         0      1.35
210 | #>  9 speaker001 s2T01.TextGrid        2 phones    IntervalTier         0      1.35
211 | #> 10 speaker001 s2T01.TextGrid        2 phones    IntervalTier         0      1.35
212 | #>     xmin  xmax text    annotation_num
213 | #>    <dbl> <dbl> <chr>            <int>
214 | #>  1 0     0.297 ""                   1
215 | #>  2 0.297 0.522 "bird"               2
216 | #>  3 0.522 0.972 "house"              3
217 | #>  4 0.972 1.35  ""                   4
218 | #>  5 0     0.297 "sil"                1
219 | #>  6 0.297 0.36  "B"                  2
220 | #>  7 0.36  0.495 "ER1"                3
221 | #>  8 0.495 0.522 "D"                  4
222 | #>  9 0.522 0.621 "HH"                 5
223 | #> 10 0.621 0.783 "AW1"                6
224 | #> # ℹ 140 more rows
225 | ```
226 | 
227 | Another strategy would be to read the textgrid dataframes into a list
228 | column and `tidyr::unnest()` them.
229 | 
230 | ``` r
231 | # Read dataframes into a list column
232 | data_nested <- tibble(
233 |   speaker = basename(dirname(paths)),
234 |   data = map(paths, read_textgrid)
235 | )
236 | 
237 | # We have one row per textgrid dataframe because `data` is a list column
238 | data_nested
239 | #> # A tibble: 10 × 2
240 | #>    speaker    data              
241 | #>    <chr>      <list>            
242 | #>  1 speaker001 <tibble [13 × 10]>
243 | #>  2 speaker001 <tibble [15 × 10]>
244 | #>  3 speaker001 <tibble [16 × 10]>
245 | #>  4 speaker001 <tibble [12 × 10]>
246 | #>  5 speaker001 <tibble [19 × 10]>
247 | #>  6 speaker002 <tibble [13 × 10]>
248 | #>  7 speaker002 <tibble [15 × 10]>
249 | #>  8 speaker002 <tibble [16 × 10]>
250 | #>  9 speaker002 <tibble [12 × 10]>
251 | #> 10 speaker002 <tibble [19 × 10]>
252 | 
253 | # promote the nested dataframes into the main dataframe
254 | tidyr::unnest(data_nested, "data")
255 | #> # A tibble: 150 × 11
256 | #>    speaker    file  tier_num tier_name tier_type tier_xmin tier_xmax  xmin  xmax
257 | #>    <chr>      <chr>    <int> <chr>     <chr>         <dbl>     <dbl> <dbl> <dbl>
258 | #>  1 speaker001 s2T0…        1 words     Interval…         0      1.35 0     0.297
259 | #>  2 speaker001 s2T0…        1 words     Interval…         0      1.35 0.297 0.522
260 | #>  3 speaker001 s2T0…        1 words     Interval…         0      1.35 0.522 0.972
261 | #>  4 speaker001 s2T0…        1 words     Interval…         0      1.35 0.972 1.35 
262 | #>  5 speaker001 s2T0…        2 phones    Interval…         0      1.35 0     0.297
263 | #>  6 speaker001 s2T0…        2 phones    Interval…         0      1.35 0.297 0.36 
264 | #>  7 speaker001 s2T0…        2 phones    Interval…         0      1.35 0.36  0.495
265 | #>  8 speaker001 s2T0…        2 phones    Interval…         0      1.35 0.495 0.522
266 | #>  9 speaker001 s2T0…        2 phones    Interval…         0      1.35 0.522 0.621
267 | #> 10 speaker001 s2T0…        2 phones    Interval…         0      1.35 0.621 0.783
268 | #> # ℹ 140 more rows
269 | #> # ℹ 2 more variables: text <chr>, annotation_num <int>
270 | ```
271 | 
272 | ## Pivoting nested intervals in textgrids
273 | 
274 | In the textgrids above, there is a natural nesting or hierarchy to the
275 | tiers. Intervals in `words` tier contain intervals in the `phones` tier.
276 | It is often necessary to group intervals by their parent intervals
277 | (group phones by words). This package provides the
278 | `pivot_textgrid_tiers()` function to convert textgrids into a wide
279 | format in a way that respects the nesting/hierarchy of tiers.
280 | 
281 | ``` r
282 | data_wide <- pivot_textgrid_tiers(
283 |   data, 
284 |   tiers = c("words", "phones"), 
285 |   join_cols = c("speaker", "file")
286 | )
287 | 
288 | data_wide
289 | #> # A tibble: 108 × 18
290 | #>    speaker    file   words words_xmin words_xmax words_xmid words_annotation_num
291 | #>    <chr>      <chr>  <chr>      <dbl>      <dbl>      <dbl>                <int>
292 | #>  1 speaker001 s2T01… ""         0          0.297      0.149                    1
293 | #>  2 speaker001 s2T01… "bir…      0.297      0.522      0.410                    2
294 | #>  3 speaker001 s2T01… "bir…      0.297      0.522      0.410                    2
295 | #>  4 speaker001 s2T01… "bir…      0.297      0.522      0.410                    2
296 | #>  5 speaker001 s2T01… "hou…      0.522      0.972      0.747                    3
297 | #>  6 speaker001 s2T01… "hou…      0.522      0.972      0.747                    3
298 | #>  7 speaker001 s2T01… "hou…      0.522      0.972      0.747                    3
299 | #>  8 speaker001 s2T01… ""         0.972      1.35       1.16                     4
300 | #>  9 speaker001 s2T01… ""         0.972      1.35       1.16                     4
301 | #> 10 speaker001 s2T02… ""         0          0.297      0.149                    1
302 | #> # ℹ 98 more rows
303 | #> # ℹ 11 more variables: words_tier_num <int>, words_tier_type <chr>,
304 | #> #   tier_xmin <dbl>, tier_xmax <dbl>, phones <chr>, phones_xmin <dbl>,
305 | #> #   phones_xmax <dbl>, phones_xmid <dbl>, phones_annotation_num <int>,
306 | #> #   phones_tier_num <int>, phones_tier_type <chr>
307 | 
308 | # more clearly
309 | data_wide |> 
310 |   select(
311 |     speaker, file, words, phones, 
312 |     words_xmin, words_xmax, phones_xmin, phones_xmax
313 |   )
314 | #> # A tibble: 108 × 8
315 | #>    speaker    file    words phones words_xmin words_xmax phones_xmin phones_xmax
316 | #>    <chr>      <chr>   <chr> <chr>       <dbl>      <dbl>       <dbl>       <dbl>
317 | #>  1 speaker001 s2T01.… ""    "sil"       0          0.297       0           0.297
318 | #>  2 speaker001 s2T01.… "bir… "B"         0.297      0.522       0.297       0.36 
319 | #>  3 speaker001 s2T01.… "bir… "ER1"       0.297      0.522       0.36        0.495
320 | #>  4 speaker001 s2T01.… "bir… "D"         0.297      0.522       0.495       0.522
321 | #>  5 speaker001 s2T01.… "hou… "HH"        0.522      0.972       0.522       0.621
322 | #>  6 speaker001 s2T01.… "hou… "AW1"       0.522      0.972       0.621       0.783
323 | #>  7 speaker001 s2T01.… "hou… "S"         0.522      0.972       0.783       0.972
324 | #>  8 speaker001 s2T01.… ""    "sp"        0.972      1.35        0.972       1.33 
325 | #>  9 speaker001 s2T01.… ""    ""          0.972      1.35        1.33        1.35 
326 | #> 10 speaker001 s2T02.… ""    "sil"       0          0.297       0           0.297
327 | #> # ℹ 98 more rows
328 | ```
329 | 
330 | Some remarks:
331 | 
332 | - Each tier in `tiers` becomes a batch of columns. For the rows for the
333 |   `words` tier become the batch of columns `words` (the original `text`
334 |   value), `words_xmin`, `words_xmax`, etc.
335 | - The columns in `join_cols` should uniquely identify a textgrid file,
336 |   so the combination of `speaker` and `file` is needed in the case where
337 |   different speakers have the same file.
338 | - The tier names in `tiers` should be given in the order of their
339 |   nesting from outside to inside (e.g., `words` contain `phones`).
340 |   Behind the scenes,
341 |   `dplyr::left_join(..., relationship = "one-to-many")` is used to
342 |   constrain how intervals are combined.
343 | 
344 | This function also works on a single `tiers` value. In this case, the
345 | function returns just the intervals in that tier with the columns
346 | renamed and prefixed.
347 | 
348 | ``` r
349 | data |> 
350 |   pivot_textgrid_tiers(
351 |     tiers = "words", 
352 |     join_cols = c("speaker", "file")
353 |   )
354 | #> # A tibble: 42 × 11
355 | #>    speaker    file   words words_xmin words_xmax words_xmid words_annotation_num
356 | #>    <chr>      <chr>  <chr>      <dbl>      <dbl>      <dbl>                <int>
357 | #>  1 speaker001 s2T01… ""         0          0.297      0.149                    1
358 | #>  2 speaker001 s2T01… "bir…      0.297      0.522      0.410                    2
359 | #>  3 speaker001 s2T01… "hou…      0.522      0.972      0.747                    3
360 | #>  4 speaker001 s2T01… ""         0.972      1.35       1.16                     4
361 | #>  5 speaker001 s2T02… ""         0          0.297      0.149                    1
362 | #>  6 speaker001 s2T02… "cow…      0.297      0.702      0.500                    2
363 | #>  7 speaker001 s2T02… "boo…      0.702      1.17       0.936                    3
364 | #>  8 speaker001 s2T02… ""         1.17       1.59       1.38                     4
365 | #>  9 speaker001 s2T03… ""         0          0.369      0.184                    1
366 | #> 10 speaker001 s2T03… "hug"      0.369      0.657      0.513                    2
367 | #> # ℹ 32 more rows
368 | #> # ℹ 4 more variables: words_tier_num <int>, words_tier_type <chr>,
369 | #> #   tier_xmin <dbl>, tier_xmax <dbl>
370 | ```
371 | 
372 | ## Speeding things up
373 | 
374 | Do you have thousands of textgrids to read? The following workflow can
375 | speed things up. We are going to **read the textgrids in parallel**.
376 | Below are two approaches:
377 | 
378 | - future backend and furrr frontend
379 | - mirai backend and purrr frontend
380 | 
381 | The backend manages the parallel computation, and the frontend provides
382 | the syntax for calling a function with parallelism.
383 | 
384 | **Approach 1**: We tell future to use a `multisession` `plan` for
385 | parallelism, so the computations are done on separate R sessions in the
386 | background. The syntax is like the above purrr code, but we replace
387 | `map()` with `future_map()`.
388 | 
389 | ``` r
390 | library(future)
391 | library(furrr)
392 | plan(multisession, workers = 4)
393 | 
394 | data_nested <- tibble(
395 |   speaker = basename(dirname(paths)),
396 |   data = future_map(paths, read_textgrid)
397 | )
398 | ```
399 | 
400 | **Approach 2**: We have mirai set up 4 daemons (background processes),
401 | and then we use purrr’s `in_parallel()` helper to signal to `map()` that
402 | the function should be run in parallel. We need to give *all* the
403 | information needed for the daemons to run the function, so we 1) provide
404 | a complete function definition (including `function(x) ...`) and 2)
405 | spell out the package namespace `readtextgrid::read_textgrid()`.
406 | 
407 | ``` r
408 | mirai::daemons(4)
409 | data_nested <- tibble(
410 |   speaker = basename(dirname(paths)),
411 |   data = map(paths, in_parallel(function(x) readtextgrid::read_textgrid(x)))
412 | )
413 | mirai::daemons(0)
414 | ```
415 | 
416 | Another way to eke out performance is to set the encoding. By default,
417 | readtextgrid uses `readr::guess_encoding()` to determine the encoding of
418 | the textgrid before reading it in. But if you know the encoding
419 | beforehand, you can skip this guessing. In my limited testing, I found
420 | that **setting the encoding** could reduce benchmark times by 3–4%
421 | compared to guessing the encoding.
422 | 
423 | Here, we read 100 textgrids using different approaches to benchmark the
424 | results.
425 | 
426 | ``` r
427 | paths_bench <- withr::with_seed(1, sample(paths, 100, replace = TRUE))
428 | 
429 | mirai::daemons(4)
430 | bench::mark(
431 |   lapply_guess = lapply(paths_bench, read_textgrid),
432 |   lapply_set   = lapply(paths_bench, read_textgrid, encoding = "UTF-8"),
433 |   future_guess = future_map(paths_bench, read_textgrid),
434 |   future_set   = future_map(paths_bench, read_textgrid, encoding = "UTF-8"), 
435 |   mirai_guess = purrr::map(
436 |     paths_bench, 
437 |     in_parallel(function(x) readtextgrid::read_textgrid(x))
438 |   ),
439 |   mirai_set = purrr::map(
440 |     paths_bench, 
441 |     in_parallel(function(x) readtextgrid::read_textgrid(x, encoding = "UTF-8"))
442 |   ),
443 |   check = TRUE
444 | )
445 | #> Warning: Some expressions had a GC in every iteration; so filtering is
446 | #> disabled.
447 | #> # A tibble: 6 × 6
448 | #>   expression        min   median `itr/sec` mem_alloc `gc/sec`
449 | #>   <bch:expr>   <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
450 | #> 1 lapply_guess    1.17s    1.17s     0.852   13.32MB     5.96
451 | #> 2 lapply_set   883.69ms 883.69ms     1.13     5.41MB     6.79
452 | #> 3 future_guess 407.83ms 421.37ms     2.37   627.53KB     2.37
453 | #> 4 future_set   356.49ms 358.09ms     2.79   627.53KB     2.79
454 | #> 5 mirai_guess  315.85ms 338.82ms     2.95  1006.66KB     0   
455 | #> 6 mirai_set    258.63ms 259.45ms     3.85  1006.66KB     0
456 | mirai::daemons(0)
457 | ```
458 | 
459 | ## Legacy behavior and supported textgrid formats
460 | 
461 | The original version of this package assumed the textgrid text files
462 | followed a “long” format with helpful labels and annotations. For
463 | example, in the following textgrid, each number has a label that makes
464 | it easy and fast to parse the textgrid with regular expressions:
465 | 
466 |     File type = "ooTextFile"
467 |     Object class = "TextGrid"
468 | 
469 |     xmin = 0 
470 |     xmax = 1 
471 |     tiers? <exists> 
472 |     size = 1 
473 |     item []: 
474 |         item [1]:
475 |             class = "IntervalTier" 
476 |             name = "Mary" 
477 |             xmin = 0 
478 |             xmax = 1 
479 |             intervals: size = 1 
480 |             intervals [1]:
481 |                 xmin = 0 
482 |                 xmax = 1 
483 |                 text = "" 
484 | 
485 | The original version of the parser designed for this textgrid format is
486 | still provided with the `legacy_read_textgrid()` and
487 | `legacy_read_textgrid_lines()` functions.
488 | 
489 | Version 0.2.0 of readtextgrid added a C++ based parser that can handle
490 | many more textgrid formats. For example, it can “short” format textgrids
491 | like the following:
492 | 
493 |     File type = "ooTextFile"
494 |     Object class = "TextGrid"
495 | 
496 |     0
497 |     1
498 |     <exists>
499 |     1
500 |     "IntervalTier"
501 |     "Mary"
502 |     0
503 |     1
504 |     1
505 |     0
506 |     1
507 |     ""
508 | 
509 | The “long” format textgrids are outputted in Praat with
510 | `Save > Save as text file...`, and the “short” format textgrids are
511 | outputted with `Save > Save as short textfile...`.
512 | 
513 | readtextgrid’s parser can also handle [esoteric
514 | features](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html)
515 | like comments (that start with `!`) or arbitrary text attached to a
516 | number, as in the following example;:
517 | 
518 |     File type = "ooTextFile"
519 |     Object class = "TextGrid"
520 | 
521 |     ! info about the grid
522 |     0s 1s <exists> 1
523 |     ! info about the tier
524 |     "IntervalTier" "Mary" 0s 1s 1 ! type, name, xmin, xmax, size
525 |     0s 1s "" ! interval xmin, xmax, size
526 | 
527 | Because the new parser uses C++ for tokenization—that is, the part scans
528 | the contents character by character and determines whether the inputs
529 | are strings, numbers, or skipped—it is much faster the legacy version.
530 | 
531 | ``` r
532 | paths_bench <- withr::with_seed(2, sample(paths, 10, replace = TRUE))
533 | 
534 | bench::mark(
535 |   current = lapply(paths_bench, read_textgrid),
536 |   legacy = lapply(paths_bench, legacy_read_textgrid),
537 |   min_iterations = 10, 
538 |   filter_gc = FALSE,
539 |   check = TRUE
540 | )
541 | #> # A tibble: 2 × 6
542 | #>   expression      min   median `itr/sec` mem_alloc `gc/sec`
543 | #>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
544 | #> 1 current       114ms    123ms      8.11    1.31MB     4.86
545 | #> 2 legacy        332ms    342ms      2.89   19.57MB     6.06
546 | ```
547 | 
548 | ## Other tips
549 | 
550 | ### Helpful columns
551 | 
552 | The following columns are often helpful:
553 | 
554 | - `duration` of an interval
555 | - `xmid` midpoint of an interval
556 | - `total_annotations` total number of annotations on a tier
557 | 
558 | Here is how to create them:
559 | 
560 | ``` r
561 | data |>
562 |   # grouping needed for counting annotations per tier per file per speaker
563 |   group_by(speaker, file, tier_num) |>
564 |   mutate(
565 |     duration = xmax - xmin,
566 |     xmid = xmin + (xmax - xmin) / 2,
567 |     total_annotations = sum(!is.na(annotation_num))
568 |   ) |> 
569 |   ungroup() |> 
570 |   glimpse()
571 | #> Rows: 150
572 | #> Columns: 14
573 | #> $ speaker           <chr> "speaker001", "speaker001", "speaker001", "speaker00…
574 | #> $ file              <chr> "s2T01.TextGrid", "s2T01.TextGrid", "s2T01.TextGrid"…
575 | #> $ tier_num          <int> 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2…
576 | #> $ tier_name         <chr> "words", "words", "words", "words", "phones", "phone…
577 | #> $ tier_type         <chr> "IntervalTier", "IntervalTier", "IntervalTier", "Int…
578 | #> $ tier_xmin         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
579 | #> $ tier_xmax         <dbl> 1.348571, 1.348571, 1.348571, 1.348571, 1.348571, 1.…
580 | #> $ xmin              <dbl> 0.000, 0.297, 0.522, 0.972, 0.000, 0.297, 0.360, 0.4…
581 | #> $ xmax              <dbl> 0.297000, 0.522000, 0.972000, 1.348571, 0.297000, 0.…
582 | #> $ text              <chr> "", "bird", "house", "", "sil", "B", "ER1", "D", "HH…
583 | #> $ annotation_num    <int> 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 1…
584 | #> $ duration          <dbl> 0.29700000, 0.22500000, 0.45000000, 0.37657143, 0.29…
585 | #> $ xmid              <dbl> 0.148500, 0.409500, 0.747000, 1.160286, 0.148500, 0.…
586 | #> $ total_annotations <int> 4, 4, 4, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 4, 4, 4, 1…
587 | ```
588 | 
589 | ### Launching Praat
590 | 
591 | *This tip is written from the perspective of a Windows user who uses git
592 | bash for a terminal*.
593 | 
594 | To open textgrids in Praat, you can tell R to call Praat from the
595 | command line. You have to know where the location of the Praat binary is
596 | though. I like to keep a copy in my project directories. So, assuming
597 | that Praat.exe in my working folder, the following would open the 10
598 | textgrids in `paths` in Praat.
599 | 
600 | ``` r
601 | system2(
602 |   command = "./Praat.exe",
603 |   args = c("--open", paths),
604 |   wait = FALSE
605 | )
606 | ```
607 | 
608 | ## Acknowledgments
609 | 
610 | readtextgrid was created to process data from the [WISC Lab
611 | project](https://kidspeech.wisc.edu/). Thus, development of this package
612 | was supported by NIH R01DC009411 and NIH R01DC015653.
613 | 
614 | ------------------------------------------------------------------------
615 | 
616 | Please note that the ‘readtextgrid’ project is released with a
617 | [Contributor Code of
618 | Conduct](https://www.contributor-covenant.org/version/1/0/0/code-of-conduct.html).
619 | By contributing to this project, you agree to abide by its terms.
620 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | GNU General Public License
  2 | ==========================
  3 | 
  4 | _Version 3, 29 June 2007_  
  5 | _Copyright © 2007 Free Software Foundation, Inc. &lt;<http://fsf.org/>&gt;_
  6 | 
  7 | Everyone is permitted to copy and distribute verbatim copies of this license
  8 | document, but changing it is not allowed.
  9 | 
 10 | ## Preamble
 11 | 
 12 | The GNU General Public License is a free, copyleft license for software and other
 13 | kinds of works.
 14 | 
 15 | The licenses for most software and other practical works are designed to take away
 16 | your freedom to share and change the works. By contrast, the GNU General Public
 17 | License is intended to guarantee your freedom to share and change all versions of a
 18 | program--to make sure it remains free software for all its users. We, the Free
 19 | Software Foundation, use the GNU General Public License for most of our software; it
 20 | applies also to any other work released this way by its authors. You can apply it to
 21 | your programs, too.
 22 | 
 23 | When we speak of free software, we are referring to freedom, not price. Our General
 24 | Public Licenses are designed to make sure that you have the freedom to distribute
 25 | copies of free software (and charge for them if you wish), that you receive source
 26 | code or can get it if you want it, that you can change the software or use pieces of
 27 | it in new free programs, and that you know you can do these things.
 28 | 
 29 | To protect your rights, we need to prevent others from denying you these rights or
 30 | asking you to surrender the rights. Therefore, you have certain responsibilities if
 31 | you distribute copies of the software, or if you modify it: responsibilities to
 32 | respect the freedom of others.
 33 | 
 34 | For example, if you distribute copies of such a program, whether gratis or for a fee,
 35 | you must pass on to the recipients the same freedoms that you received. You must make
 36 | sure that they, too, receive or can get the source code. And you must show them these
 37 | terms so they know their rights.
 38 | 
 39 | Developers that use the GNU GPL protect your rights with two steps: **(1)** assert
 40 | copyright on the software, and **(2)** offer you this License giving you legal permission
 41 | to copy, distribute and/or modify it.
 42 | 
 43 | For the developers' and authors' protection, the GPL clearly explains that there is
 44 | no warranty for this free software. For both users' and authors' sake, the GPL
 45 | requires that modified versions be marked as changed, so that their problems will not
 46 | be attributed erroneously to authors of previous versions.
 47 | 
 48 | Some devices are designed to deny users access to install or run modified versions of
 49 | the software inside them, although the manufacturer can do so. This is fundamentally
 50 | incompatible with the aim of protecting users' freedom to change the software. The
 51 | systematic pattern of such abuse occurs in the area of products for individuals to
 52 | use, which is precisely where it is most unacceptable. Therefore, we have designed
 53 | this version of the GPL to prohibit the practice for those products. If such problems
 54 | arise substantially in other domains, we stand ready to extend this provision to
 55 | those domains in future versions of the GPL, as needed to protect the freedom of
 56 | users.
 57 | 
 58 | Finally, every program is threatened constantly by software patents. States should
 59 | not allow patents to restrict development and use of software on general-purpose
 60 | computers, but in those that do, we wish to avoid the special danger that patents
 61 | applied to a free program could make it effectively proprietary. To prevent this, the
 62 | GPL assures that patents cannot be used to render the program non-free.
 63 | 
 64 | The precise terms and conditions for copying, distribution and modification follow.
 65 | 
 66 | ## TERMS AND CONDITIONS
 67 | 
 68 | ### 0. Definitions
 69 | 
 70 | “This License” refers to version 3 of the GNU General Public License.
 71 | 
 72 | “Copyright” also means copyright-like laws that apply to other kinds of
 73 | works, such as semiconductor masks.
 74 | 
 75 | “The Program” refers to any copyrightable work licensed under this
 76 | License. Each licensee is addressed as “you”. “Licensees” and
 77 | “recipients” may be individuals or organizations.
 78 | 
 79 | To “modify” a work means to copy from or adapt all or part of the work in
 80 | a fashion requiring copyright permission, other than the making of an exact copy. The
 81 | resulting work is called a “modified version” of the earlier work or a
 82 | work “based on” the earlier work.
 83 | 
 84 | A “covered work” means either the unmodified Program or a work based on
 85 | the Program.
 86 | 
 87 | To “propagate” a work means to do anything with it that, without
 88 | permission, would make you directly or secondarily liable for infringement under
 89 | applicable copyright law, except executing it on a computer or modifying a private
 90 | copy. Propagation includes copying, distribution (with or without modification),
 91 | making available to the public, and in some countries other activities as well.
 92 | 
 93 | To “convey” a work means any kind of propagation that enables other
 94 | parties to make or receive copies. Mere interaction with a user through a computer
 95 | network, with no transfer of a copy, is not conveying.
 96 | 
 97 | An interactive user interface displays “Appropriate Legal Notices” to the
 98 | extent that it includes a convenient and prominently visible feature that **(1)**
 99 | displays an appropriate copyright notice, and **(2)** tells the user that there is no
100 | warranty for the work (except to the extent that warranties are provided), that
101 | licensees may convey the work under this License, and how to view a copy of this
102 | License. If the interface presents a list of user commands or options, such as a
103 | menu, a prominent item in the list meets this criterion.
104 | 
105 | ### 1. Source Code
106 | 
107 | The “source code” for a work means the preferred form of the work for
108 | making modifications to it. “Object code” means any non-source form of a
109 | work.
110 | 
111 | A “Standard Interface” means an interface that either is an official
112 | standard defined by a recognized standards body, or, in the case of interfaces
113 | specified for a particular programming language, one that is widely used among
114 | developers working in that language.
115 | 
116 | The “System Libraries” of an executable work include anything, other than
117 | the work as a whole, that **(a)** is included in the normal form of packaging a Major
118 | Component, but which is not part of that Major Component, and **(b)** serves only to
119 | enable use of the work with that Major Component, or to implement a Standard
120 | Interface for which an implementation is available to the public in source code form.
121 | A “Major Component”, in this context, means a major essential component
122 | (kernel, window system, and so on) of the specific operating system (if any) on which
123 | the executable work runs, or a compiler used to produce the work, or an object code
124 | interpreter used to run it.
125 | 
126 | The “Corresponding Source” for a work in object code form means all the
127 | source code needed to generate, install, and (for an executable work) run the object
128 | code and to modify the work, including scripts to control those activities. However,
129 | it does not include the work's System Libraries, or general-purpose tools or
130 | generally available free programs which are used unmodified in performing those
131 | activities but which are not part of the work. For example, Corresponding Source
132 | includes interface definition files associated with source files for the work, and
133 | the source code for shared libraries and dynamically linked subprograms that the work
134 | is specifically designed to require, such as by intimate data communication or
135 | control flow between those subprograms and other parts of the work.
136 | 
137 | The Corresponding Source need not include anything that users can regenerate
138 | automatically from other parts of the Corresponding Source.
139 | 
140 | The Corresponding Source for a work in source code form is that same work.
141 | 
142 | ### 2. Basic Permissions
143 | 
144 | All rights granted under this License are granted for the term of copyright on the
145 | Program, and are irrevocable provided the stated conditions are met. This License
146 | explicitly affirms your unlimited permission to run the unmodified Program. The
147 | output from running a covered work is covered by this License only if the output,
148 | given its content, constitutes a covered work. This License acknowledges your rights
149 | of fair use or other equivalent, as provided by copyright law.
150 | 
151 | You may make, run and propagate covered works that you do not convey, without
152 | conditions so long as your license otherwise remains in force. You may convey covered
153 | works to others for the sole purpose of having them make modifications exclusively
154 | for you, or provide you with facilities for running those works, provided that you
155 | comply with the terms of this License in conveying all material for which you do not
156 | control copyright. Those thus making or running the covered works for you must do so
157 | exclusively on your behalf, under your direction and control, on terms that prohibit
158 | them from making any copies of your copyrighted material outside their relationship
159 | with you.
160 | 
161 | Conveying under any other circumstances is permitted solely under the conditions
162 | stated below. Sublicensing is not allowed; section 10 makes it unnecessary.
163 | 
164 | ### 3. Protecting Users' Legal Rights From Anti-Circumvention Law
165 | 
166 | No covered work shall be deemed part of an effective technological measure under any
167 | applicable law fulfilling obligations under article 11 of the WIPO copyright treaty
168 | adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention
169 | of such measures.
170 | 
171 | When you convey a covered work, you waive any legal power to forbid circumvention of
172 | technological measures to the extent such circumvention is effected by exercising
173 | rights under this License with respect to the covered work, and you disclaim any
174 | intention to limit operation or modification of the work as a means of enforcing,
175 | against the work's users, your or third parties' legal rights to forbid circumvention
176 | of technological measures.
177 | 
178 | ### 4. Conveying Verbatim Copies
179 | 
180 | You may convey verbatim copies of the Program's source code as you receive it, in any
181 | medium, provided that you conspicuously and appropriately publish on each copy an
182 | appropriate copyright notice; keep intact all notices stating that this License and
183 | any non-permissive terms added in accord with section 7 apply to the code; keep
184 | intact all notices of the absence of any warranty; and give all recipients a copy of
185 | this License along with the Program.
186 | 
187 | You may charge any price or no price for each copy that you convey, and you may offer
188 | support or warranty protection for a fee.
189 | 
190 | ### 5. Conveying Modified Source Versions
191 | 
192 | You may convey a work based on the Program, or the modifications to produce it from
193 | the Program, in the form of source code under the terms of section 4, provided that
194 | you also meet all of these conditions:
195 | 
196 | * **a)** The work must carry prominent notices stating that you modified it, and giving a
197 | relevant date.
198 | * **b)** The work must carry prominent notices stating that it is released under this
199 | License and any conditions added under section 7. This requirement modifies the
200 | requirement in section 4 to “keep intact all notices”.
201 | * **c)** You must license the entire work, as a whole, under this License to anyone who
202 | comes into possession of a copy. This License will therefore apply, along with any
203 | applicable section 7 additional terms, to the whole of the work, and all its parts,
204 | regardless of how they are packaged. This License gives no permission to license the
205 | work in any other way, but it does not invalidate such permission if you have
206 | separately received it.
207 | * **d)** If the work has interactive user interfaces, each must display Appropriate Legal
208 | Notices; however, if the Program has interactive interfaces that do not display
209 | Appropriate Legal Notices, your work need not make them do so.
210 | 
211 | A compilation of a covered work with other separate and independent works, which are
212 | not by their nature extensions of the covered work, and which are not combined with
213 | it such as to form a larger program, in or on a volume of a storage or distribution
214 | medium, is called an “aggregate” if the compilation and its resulting
215 | copyright are not used to limit the access or legal rights of the compilation's users
216 | beyond what the individual works permit. Inclusion of a covered work in an aggregate
217 | does not cause this License to apply to the other parts of the aggregate.
218 | 
219 | ### 6. Conveying Non-Source Forms
220 | 
221 | You may convey a covered work in object code form under the terms of sections 4 and
222 | 5, provided that you also convey the machine-readable Corresponding Source under the
223 | terms of this License, in one of these ways:
224 | 
225 | * **a)** Convey the object code in, or embodied in, a physical product (including a
226 | physical distribution medium), accompanied by the Corresponding Source fixed on a
227 | durable physical medium customarily used for software interchange.
228 | * **b)** Convey the object code in, or embodied in, a physical product (including a
229 | physical distribution medium), accompanied by a written offer, valid for at least
230 | three years and valid for as long as you offer spare parts or customer support for
231 | that product model, to give anyone who possesses the object code either **(1)** a copy of
232 | the Corresponding Source for all the software in the product that is covered by this
233 | License, on a durable physical medium customarily used for software interchange, for
234 | a price no more than your reasonable cost of physically performing this conveying of
235 | source, or **(2)** access to copy the Corresponding Source from a network server at no
236 | charge.
237 | * **c)** Convey individual copies of the object code with a copy of the written offer to
238 | provide the Corresponding Source. This alternative is allowed only occasionally and
239 | noncommercially, and only if you received the object code with such an offer, in
240 | accord with subsection 6b.
241 | * **d)** Convey the object code by offering access from a designated place (gratis or for
242 | a charge), and offer equivalent access to the Corresponding Source in the same way
243 | through the same place at no further charge. You need not require recipients to copy
244 | the Corresponding Source along with the object code. If the place to copy the object
245 | code is a network server, the Corresponding Source may be on a different server
246 | (operated by you or a third party) that supports equivalent copying facilities,
247 | provided you maintain clear directions next to the object code saying where to find
248 | the Corresponding Source. Regardless of what server hosts the Corresponding Source,
249 | you remain obligated to ensure that it is available for as long as needed to satisfy
250 | these requirements.
251 | * **e)** Convey the object code using peer-to-peer transmission, provided you inform
252 | other peers where the object code and Corresponding Source of the work are being
253 | offered to the general public at no charge under subsection 6d.
254 | 
255 | A separable portion of the object code, whose source code is excluded from the
256 | Corresponding Source as a System Library, need not be included in conveying the
257 | object code work.
258 | 
259 | A “User Product” is either **(1)** a “consumer product”, which
260 | means any tangible personal property which is normally used for personal, family, or
261 | household purposes, or **(2)** anything designed or sold for incorporation into a
262 | dwelling. In determining whether a product is a consumer product, doubtful cases
263 | shall be resolved in favor of coverage. For a particular product received by a
264 | particular user, “normally used” refers to a typical or common use of
265 | that class of product, regardless of the status of the particular user or of the way
266 | in which the particular user actually uses, or expects or is expected to use, the
267 | product. A product is a consumer product regardless of whether the product has
268 | substantial commercial, industrial or non-consumer uses, unless such uses represent
269 | the only significant mode of use of the product.
270 | 
271 | “Installation Information” for a User Product means any methods,
272 | procedures, authorization keys, or other information required to install and execute
273 | modified versions of a covered work in that User Product from a modified version of
274 | its Corresponding Source. The information must suffice to ensure that the continued
275 | functioning of the modified object code is in no case prevented or interfered with
276 | solely because modification has been made.
277 | 
278 | If you convey an object code work under this section in, or with, or specifically for
279 | use in, a User Product, and the conveying occurs as part of a transaction in which
280 | the right of possession and use of the User Product is transferred to the recipient
281 | in perpetuity or for a fixed term (regardless of how the transaction is
282 | characterized), the Corresponding Source conveyed under this section must be
283 | accompanied by the Installation Information. But this requirement does not apply if
284 | neither you nor any third party retains the ability to install modified object code
285 | on the User Product (for example, the work has been installed in ROM).
286 | 
287 | The requirement to provide Installation Information does not include a requirement to
288 | continue to provide support service, warranty, or updates for a work that has been
289 | modified or installed by the recipient, or for the User Product in which it has been
290 | modified or installed. Access to a network may be denied when the modification itself
291 | materially and adversely affects the operation of the network or violates the rules
292 | and protocols for communication across the network.
293 | 
294 | Corresponding Source conveyed, and Installation Information provided, in accord with
295 | this section must be in a format that is publicly documented (and with an
296 | implementation available to the public in source code form), and must require no
297 | special password or key for unpacking, reading or copying.
298 | 
299 | ### 7. Additional Terms
300 | 
301 | “Additional permissions” are terms that supplement the terms of this
302 | License by making exceptions from one or more of its conditions. Additional
303 | permissions that are applicable to the entire Program shall be treated as though they
304 | were included in this License, to the extent that they are valid under applicable
305 | law. If additional permissions apply only to part of the Program, that part may be
306 | used separately under those permissions, but the entire Program remains governed by
307 | this License without regard to the additional permissions.
308 | 
309 | When you convey a copy of a covered work, you may at your option remove any
310 | additional permissions from that copy, or from any part of it. (Additional
311 | permissions may be written to require their own removal in certain cases when you
312 | modify the work.) You may place additional permissions on material, added by you to a
313 | covered work, for which you have or can give appropriate copyright permission.
314 | 
315 | Notwithstanding any other provision of this License, for material you add to a
316 | covered work, you may (if authorized by the copyright holders of that material)
317 | supplement the terms of this License with terms:
318 | 
319 | * **a)** Disclaiming warranty or limiting liability differently from the terms of
320 | sections 15 and 16 of this License; or
321 | * **b)** Requiring preservation of specified reasonable legal notices or author
322 | attributions in that material or in the Appropriate Legal Notices displayed by works
323 | containing it; or
324 | * **c)** Prohibiting misrepresentation of the origin of that material, or requiring that
325 | modified versions of such material be marked in reasonable ways as different from the
326 | original version; or
327 | * **d)** Limiting the use for publicity purposes of names of licensors or authors of the
328 | material; or
329 | * **e)** Declining to grant rights under trademark law for use of some trade names,
330 | trademarks, or service marks; or
331 | * **f)** Requiring indemnification of licensors and authors of that material by anyone
332 | who conveys the material (or modified versions of it) with contractual assumptions of
333 | liability to the recipient, for any liability that these contractual assumptions
334 | directly impose on those licensors and authors.
335 | 
336 | All other non-permissive additional terms are considered “further
337 | restrictions” within the meaning of section 10. If the Program as you received
338 | it, or any part of it, contains a notice stating that it is governed by this License
339 | along with a term that is a further restriction, you may remove that term. If a
340 | license document contains a further restriction but permits relicensing or conveying
341 | under this License, you may add to a covered work material governed by the terms of
342 | that license document, provided that the further restriction does not survive such
343 | relicensing or conveying.
344 | 
345 | If you add terms to a covered work in accord with this section, you must place, in
346 | the relevant source files, a statement of the additional terms that apply to those
347 | files, or a notice indicating where to find the applicable terms.
348 | 
349 | Additional terms, permissive or non-permissive, may be stated in the form of a
350 | separately written license, or stated as exceptions; the above requirements apply
351 | either way.
352 | 
353 | ### 8. Termination
354 | 
355 | You may not propagate or modify a covered work except as expressly provided under
356 | this License. Any attempt otherwise to propagate or modify it is void, and will
357 | automatically terminate your rights under this License (including any patent licenses
358 | granted under the third paragraph of section 11).
359 | 
360 | However, if you cease all violation of this License, then your license from a
361 | particular copyright holder is reinstated **(a)** provisionally, unless and until the
362 | copyright holder explicitly and finally terminates your license, and **(b)** permanently,
363 | if the copyright holder fails to notify you of the violation by some reasonable means
364 | prior to 60 days after the cessation.
365 | 
366 | Moreover, your license from a particular copyright holder is reinstated permanently
367 | if the copyright holder notifies you of the violation by some reasonable means, this
368 | is the first time you have received notice of violation of this License (for any
369 | work) from that copyright holder, and you cure the violation prior to 30 days after
370 | your receipt of the notice.
371 | 
372 | Termination of your rights under this section does not terminate the licenses of
373 | parties who have received copies or rights from you under this License. If your
374 | rights have been terminated and not permanently reinstated, you do not qualify to
375 | receive new licenses for the same material under section 10.
376 | 
377 | ### 9. Acceptance Not Required for Having Copies
378 | 
379 | You are not required to accept this License in order to receive or run a copy of the
380 | Program. Ancillary propagation of a covered work occurring solely as a consequence of
381 | using peer-to-peer transmission to receive a copy likewise does not require
382 | acceptance. However, nothing other than this License grants you permission to
383 | propagate or modify any covered work. These actions infringe copyright if you do not
384 | accept this License. Therefore, by modifying or propagating a covered work, you
385 | indicate your acceptance of this License to do so.
386 | 
387 | ### 10. Automatic Licensing of Downstream Recipients
388 | 
389 | Each time you convey a covered work, the recipient automatically receives a license
390 | from the original licensors, to run, modify and propagate that work, subject to this
391 | License. You are not responsible for enforcing compliance by third parties with this
392 | License.
393 | 
394 | An “entity transaction” is a transaction transferring control of an
395 | organization, or substantially all assets of one, or subdividing an organization, or
396 | merging organizations. If propagation of a covered work results from an entity
397 | transaction, each party to that transaction who receives a copy of the work also
398 | receives whatever licenses to the work the party's predecessor in interest had or
399 | could give under the previous paragraph, plus a right to possession of the
400 | Corresponding Source of the work from the predecessor in interest, if the predecessor
401 | has it or can get it with reasonable efforts.
402 | 
403 | You may not impose any further restrictions on the exercise of the rights granted or
404 | affirmed under this License. For example, you may not impose a license fee, royalty,
405 | or other charge for exercise of rights granted under this License, and you may not
406 | initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging
407 | that any patent claim is infringed by making, using, selling, offering for sale, or
408 | importing the Program or any portion of it.
409 | 
410 | ### 11. Patents
411 | 
412 | A “contributor” is a copyright holder who authorizes use under this
413 | License of the Program or a work on which the Program is based. The work thus
414 | licensed is called the contributor's “contributor version”.
415 | 
416 | A contributor's “essential patent claims” are all patent claims owned or
417 | controlled by the contributor, whether already acquired or hereafter acquired, that
418 | would be infringed by some manner, permitted by this License, of making, using, or
419 | selling its contributor version, but do not include claims that would be infringed
420 | only as a consequence of further modification of the contributor version. For
421 | purposes of this definition, “control” includes the right to grant patent
422 | sublicenses in a manner consistent with the requirements of this License.
423 | 
424 | Each contributor grants you a non-exclusive, worldwide, royalty-free patent license
425 | under the contributor's essential patent claims, to make, use, sell, offer for sale,
426 | import and otherwise run, modify and propagate the contents of its contributor
427 | version.
428 | 
429 | In the following three paragraphs, a “patent license” is any express
430 | agreement or commitment, however denominated, not to enforce a patent (such as an
431 | express permission to practice a patent or covenant not to sue for patent
432 | infringement). To “grant” such a patent license to a party means to make
433 | such an agreement or commitment not to enforce a patent against the party.
434 | 
435 | If you convey a covered work, knowingly relying on a patent license, and the
436 | Corresponding Source of the work is not available for anyone to copy, free of charge
437 | and under the terms of this License, through a publicly available network server or
438 | other readily accessible means, then you must either **(1)** cause the Corresponding
439 | Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the
440 | patent license for this particular work, or **(3)** arrange, in a manner consistent with
441 | the requirements of this License, to extend the patent license to downstream
442 | recipients. “Knowingly relying” means you have actual knowledge that, but
443 | for the patent license, your conveying the covered work in a country, or your
444 | recipient's use of the covered work in a country, would infringe one or more
445 | identifiable patents in that country that you have reason to believe are valid.
446 | 
447 | If, pursuant to or in connection with a single transaction or arrangement, you
448 | convey, or propagate by procuring conveyance of, a covered work, and grant a patent
449 | license to some of the parties receiving the covered work authorizing them to use,
450 | propagate, modify or convey a specific copy of the covered work, then the patent
451 | license you grant is automatically extended to all recipients of the covered work and
452 | works based on it.
453 | 
454 | A patent license is “discriminatory” if it does not include within the
455 | scope of its coverage, prohibits the exercise of, or is conditioned on the
456 | non-exercise of one or more of the rights that are specifically granted under this
457 | License. You may not convey a covered work if you are a party to an arrangement with
458 | a third party that is in the business of distributing software, under which you make
459 | payment to the third party based on the extent of your activity of conveying the
460 | work, and under which the third party grants, to any of the parties who would receive
461 | the covered work from you, a discriminatory patent license **(a)** in connection with
462 | copies of the covered work conveyed by you (or copies made from those copies), or **(b)**
463 | primarily for and in connection with specific products or compilations that contain
464 | the covered work, unless you entered into that arrangement, or that patent license
465 | was granted, prior to 28 March 2007.
466 | 
467 | Nothing in this License shall be construed as excluding or limiting any implied
468 | license or other defenses to infringement that may otherwise be available to you
469 | under applicable patent law.
470 | 
471 | ### 12. No Surrender of Others' Freedom
472 | 
473 | If conditions are imposed on you (whether by court order, agreement or otherwise)
474 | that contradict the conditions of this License, they do not excuse you from the
475 | conditions of this License. If you cannot convey a covered work so as to satisfy
476 | simultaneously your obligations under this License and any other pertinent
477 | obligations, then as a consequence you may not convey it at all. For example, if you
478 | agree to terms that obligate you to collect a royalty for further conveying from
479 | those to whom you convey the Program, the only way you could satisfy both those terms
480 | and this License would be to refrain entirely from conveying the Program.
481 | 
482 | ### 13. Use with the GNU Affero General Public License
483 | 
484 | Notwithstanding any other provision of this License, you have permission to link or
485 | combine any covered work with a work licensed under version 3 of the GNU Affero
486 | General Public License into a single combined work, and to convey the resulting work.
487 | The terms of this License will continue to apply to the part which is the covered
488 | work, but the special requirements of the GNU Affero General Public License, section
489 | 13, concerning interaction through a network will apply to the combination as such.
490 | 
491 | ### 14. Revised Versions of this License
492 | 
493 | The Free Software Foundation may publish revised and/or new versions of the GNU
494 | General Public License from time to time. Such new versions will be similar in spirit
495 | to the present version, but may differ in detail to address new problems or concerns.
496 | 
497 | Each version is given a distinguishing version number. If the Program specifies that
498 | a certain numbered version of the GNU General Public License “or any later
499 | version” applies to it, you have the option of following the terms and
500 | conditions either of that numbered version or of any later version published by the
501 | Free Software Foundation. If the Program does not specify a version number of the GNU
502 | General Public License, you may choose any version ever published by the Free
503 | Software Foundation.
504 | 
505 | If the Program specifies that a proxy can decide which future versions of the GNU
506 | General Public License can be used, that proxy's public statement of acceptance of a
507 | version permanently authorizes you to choose that version for the Program.
508 | 
509 | Later license versions may give you additional or different permissions. However, no
510 | additional obligations are imposed on any author or copyright holder as a result of
511 | your choosing to follow a later version.
512 | 
513 | ### 15. Disclaimer of Warranty
514 | 
515 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
516 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
517 | PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER
518 | EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
519 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
520 | QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
521 | DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
522 | 
523 | ### 16. Limitation of Liability
524 | 
525 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY
526 | COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS
527 | PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
528 | INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
529 | PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE
530 | OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE
531 | WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
532 | POSSIBILITY OF SUCH DAMAGES.
533 | 
534 | ### 17. Interpretation of Sections 15 and 16
535 | 
536 | If the disclaimer of warranty and limitation of liability provided above cannot be
537 | given local legal effect according to their terms, reviewing courts shall apply local
538 | law that most closely approximates an absolute waiver of all civil liability in
539 | connection with the Program, unless a warranty or assumption of liability accompanies
540 | a copy of the Program in return for a fee.
541 | 
542 | _END OF TERMS AND CONDITIONS_
543 | 
544 | ## How to Apply These Terms to Your New Programs
545 | 
546 | If you develop a new program, and you want it to be of the greatest possible use to
547 | the public, the best way to achieve this is to make it free software which everyone
548 | can redistribute and change under these terms.
549 | 
550 | To do so, attach the following notices to the program. It is safest to attach them
551 | to the start of each source file to most effectively state the exclusion of warranty;
552 | and each file should have at least the “copyright” line and a pointer to
553 | where the full notice is found.
554 | 
555 |     <one line to give the program's name and a brief idea of what it does.>
556 |     Copyright (C) 2019 Tristan Mahr
557 | 
558 |     This program is free software: you can redistribute it and/or modify
559 |     it under the terms of the GNU General Public License as published by
560 |     the Free Software Foundation, either version 3 of the License, or
561 |     (at your option) any later version.
562 | 
563 |     This program is distributed in the hope that it will be useful,
564 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
565 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
566 |     GNU General Public License for more details.
567 | 
568 |     You should have received a copy of the GNU General Public License
569 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
570 | 
571 | Also add information on how to contact you by electronic and paper mail.
572 | 
573 | If the program does terminal interaction, make it output a short notice like this
574 | when it starts in an interactive mode:
575 | 
576 |     readtextgrid Copyright (C) 2019 Tristan Mahr
577 |     This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'.
578 |     This is free software, and you are welcome to redistribute it
579 |     under certain conditions; type 'show c' for details.
580 | 
581 | The hypothetical commands `show w` and `show c` should show the appropriate parts of
582 | the General Public License. Of course, your program's commands might be different;
583 | for a GUI interface, you would use an “about box”.
584 | 
585 | You should also get your employer (if you work as a programmer) or school, if any, to
586 | sign a “copyright disclaimer” for the program, if necessary. For more
587 | information on this, and how to apply and follow the GNU GPL, see
588 | &lt;<http://www.gnu.org/licenses/>&gt;.
589 | 
590 | The GNU General Public License does not permit incorporating your program into
591 | proprietary programs. If your program is a subroutine library, you may consider it
592 | more useful to permit linking proprietary applications with the library. If this is
593 | what you want to do, use the GNU Lesser General Public License instead of this
594 | License. But first, please read
595 | &lt;<http://www.gnu.org/philosophy/why-not-lgpl.html>&gt;.
596 | 


--------------------------------------------------------------------------------