├── .gitignore
├── audio_cases
    ├── afjiv.wav
    ├── afjiv.rttm
    └── afjiv.txt
├── imgs
    ├── praat_import.png
    ├── via_example.png
    ├── via_import.png
    ├── via_shortcut.png
    └── praat_overview.png
├── audio_visual_cases
    ├── 00115.mp4
    └── 00115.rttm
├── via_template.json
├── README.md
├── audio_visualized.py
└── audio_visual_visualized.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/audio_cases/afjiv.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/audio_cases/afjiv.wav


--------------------------------------------------------------------------------
/imgs/praat_import.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/imgs/praat_import.png


--------------------------------------------------------------------------------
/imgs/via_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/imgs/via_example.png


--------------------------------------------------------------------------------
/imgs/via_import.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/imgs/via_import.png


--------------------------------------------------------------------------------
/imgs/via_shortcut.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/imgs/via_shortcut.png


--------------------------------------------------------------------------------
/imgs/praat_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/imgs/praat_overview.png


--------------------------------------------------------------------------------
/audio_visual_cases/00115.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/audio_visual_cases/00115.mp4


--------------------------------------------------------------------------------
/audio_visual_cases/00115.rttm:
--------------------------------------------------------------------------------
 1 | SPEAKER 00115 0 0.000000 1.606000 <NA> <NA> 1 <NA> <NA>
 2 | SPEAKER 00115 0 2.315000 3.521000 <NA> <NA> 1 <NA> <NA>
 3 | SPEAKER 00115 0 6.899000 1.978000 <NA> <NA> 1 <NA> <NA>
 4 | SPEAKER 00115 0 9.294000 10.225000 <NA> <NA> 1 <NA> <NA>
 5 | SPEAKER 00115 0 1.898000 0.771000 <NA> <NA> 0 <NA> <NA>
 6 | SPEAKER 00115 0 5.885000 4.118750 <NA> <NA> 0 <NA> <NA>
 7 | SPEAKER 00115 0 10.927000 1.000000 <NA> <NA> 0 <NA> <NA>
 8 | SPEAKER 00115 0 13.836000 0.625000 <NA> <NA> 0 <NA> <NA>
 9 | SPEAKER 00115 0 19.565000 0.459000 <NA> <NA> 0 <NA> <NA>
10 | 


--------------------------------------------------------------------------------
/audio_cases/afjiv.rttm:
--------------------------------------------------------------------------------
 1 | SPEAKER afjiv 1 41.120000 39.360000 <NA> <NA> spk00 <NA> <NA>
 2 | SPEAKER afjiv 1 140.640000 1.000000 <NA> <NA> spk01 <NA> <NA>
 3 | SPEAKER afjiv 1 142.200000 2.120000 <NA> <NA> spk01 <NA> <NA>
 4 | SPEAKER afjiv 1 144.600000 0.720000 <NA> <NA> spk01 <NA> <NA>
 5 | SPEAKER afjiv 1 80.920000 2.800000 <NA> <NA> spk02 <NA> <NA>
 6 | SPEAKER afjiv 1 84.840000 1.240000 <NA> <NA> spk02 <NA> <NA>
 7 | SPEAKER afjiv 1 87.080000 0.920000 <NA> <NA> spk02 <NA> <NA>
 8 | SPEAKER afjiv 1 88.720000 4.320000 <NA> <NA> spk02 <NA> <NA>
 9 | SPEAKER afjiv 1 93.080000 1.280000 <NA> <NA> spk02 <NA> <NA>
10 | SPEAKER afjiv 1 5.280000 29.240000 <NA> <NA> spk03 <NA> <NA>
11 | SPEAKER afjiv 1 34.680000 5.400000 <NA> <NA> spk03 <NA> <NA>
12 | SPEAKER afjiv 1 119.440000 2.880000 <NA> <NA> spk01 <NA> <NA>
13 | SPEAKER afjiv 1 40.120000 0.840000 <NA> <NA> spk03 <NA> <NA>
14 | SPEAKER afjiv 1 95.120000 2.880000 <NA> <NA> spk04 <NA> <NA>
15 | SPEAKER afjiv 1 98.720000 1.720000 <NA> <NA> spk04 <NA> <NA>
16 | SPEAKER afjiv 1 101.120000 3.480000 <NA> <NA> spk04 <NA> <NA>
17 | SPEAKER afjiv 1 105.560000 1.520000 <NA> <NA> spk04 <NA> <NA>
18 | SPEAKER afjiv 1 107.160000 0.440000 <NA> <NA> spk04 <NA> <NA>
19 | SPEAKER afjiv 1 108.160000 1.880000 <NA> <NA> spk04 <NA> <NA>
20 | SPEAKER afjiv 1 111.000000 2.280000 <NA> <NA> spk04 <NA> <NA>
21 | SPEAKER afjiv 1 113.840000 4.280000 <NA> <NA> spk04 <NA> <NA>
22 | SPEAKER afjiv 1 122.960000 1.600000 <NA> <NA> spk01 <NA> <NA>
23 | SPEAKER afjiv 1 125.200000 3.200000 <NA> <NA> spk01 <NA> <NA>
24 | SPEAKER afjiv 1 128.480000 1.160000 <NA> <NA> spk01 <NA> <NA>
25 | SPEAKER afjiv 1 130.880000 2.640000 <NA> <NA> spk01 <NA> <NA>
26 | SPEAKER afjiv 1 133.680000 0.440000 <NA> <NA> spk01 <NA> <NA>
27 | SPEAKER afjiv 1 135.720000 2.440000 <NA> <NA> spk01 <NA> <NA>
28 | SPEAKER afjiv 1 138.920000 1.560000 <NA> <NA> spk01 <NA> <NA>
29 | 


--------------------------------------------------------------------------------
/via_template.json:
--------------------------------------------------------------------------------
1 | {"project":{"pid":"__VIA_PROJECT_ID__","rev":"__VIA_PROJECT_REV_ID__","rev_timestamp":"__VIA_PROJECT_REV_TIMESTAMP__","pname":"multimodalspeakerdarization","creator":"VGG Image Annotator (http://www.robots.ox.ac.uk/~vgg/software/via)","created":1631583533823,"vid_list":["1","2","3","4"]},"config":{"file":{"loc_prefix":{"1":"","2":"","3":"","4":""}},"ui":{"file_content_align":"center","file_metadata_editor_visible":true,"spatial_metadata_editor_visible":true,"spatial_region_label_attribute_id":"","gtimeline_visible_row_count":"4"}},"attribute":{"1":{"aname":"Speaker","anchor_id":"FILE1_Z2_XY0","type":1,"desc":"Speaker timeline","options":{},"default_option_id":""}},"file":{"1":{"fid":"1","fname":"demo1.mp4","type":4,"loc":2,"src":"http://xxx.cn:8111/videos/demo1.mp4"},"2":{"fid":"2","fname":"demo1","type":4,"loc":2,"src":"http://xxx.cn:8111/videos/demo1"},"3":{"fid":"3","fname":"http://xxx.cn:8111/videos/demo1","type":4,"loc":2,"src":"http://xxx.cn:8111/videos/demo1"},"4":{"fid":"4","fname":"http://xxx.cn:8111/videos/demo1","type":4,"loc":2,"src":"http://xxx.cn:8111/videos/demo1"}},"metadata":{"1_hm4qcmrT":{"vid":"1","flg":0,"z":[0,1],"xy":[],"av":{"1":"speaker0"}},"1_S5f7fUv5":{"vid":"1","flg":0,"z":[2.13,4.484],"xy":[],"av":{"1":"speaker1"}},"1_Fsx9V7km":{"vid":"1","flg":0,"z":[1.65,3.004],"xy":[],"av":{"1":"speaker2"}},"1_C1021UfQ":{"vid":"1","flg":0,"z":[4.962,11.837],"xy":[],"av":{"1":"speaker2"}},"1_vAtWYixe":{"vid":"1","flg":0,"z":[11.795,14.358],"xy":[],"av":{"1":"speaker0"}},"1_UxW4rCi0":{"vid":"1","flg":0,"z":[14.129,17.087],"xy":[],"av":{"1":"speaker2"}},"1_ofdOvLZX":{"vid":"1","flg":0,"z":[16.9,20.733],"xy":[],"av":{"1":"speaker0"}},"2_nbCgiha8":{"vid":"2","flg":0,"z":[0.566,5.775],"xy":[],"av":{"1":"speaker3"}},"2_0FlFBaWZ":{"vid":"2","flg":0,"z":[6.016,10.691],"xy":[],"av":{"1":"speaker0"}},"2_C3lubRiL":{"vid":"2","flg":0,"z":[13.004,14.945],"xy":[],"av":{"1":"speaker2"}},"2_VE9c7l1A":{"vid":"2","flg":0,"z":[18.233,19.379],"xy":[],"av":{"1":"speaker2"}},"2_Y6JIKqiy":{"vid":"2","flg":0,"z":[21.858,28.712],"xy":[],"av":{"1":"speaker3"}}},"view":{"1":{"fid_list":["1"]},"2":{"fid_list":["2"]},"3":{"fid_list":["3"]},"4":{"fid_list":["4"]}}}


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Visualization Tools for Speaker Diarization
 2 | ## Introduction
 3 | 
 4 | The current landscape lacks a robust tool for diarization visualization, which is critical for the analysis of datasets and algorithm outcomes. In this repository, we offer intuitive methods to illustrate speaker diarization results. A pivotal criterion for selecting this visualization software was its capacity for interactive operation. While these visualization tools have room for improvement, they are the best available options at present.
 5 | 
 6 | [Go to: Visualization tool for Audio-only datasets ](#anchor_ao) 
 7 | 
 8 | [Go to: Visualization tool for Audio-visual datasets ](#anchor_av) 
 9 | 
10 | 
11 | <p id="anchor_ao"></p>  
12 | 
13 | ## Visualization for Audio-only datasets 
14 | 
15 | ### Step 1: Generating praat format:
16 | 
17 | ```
18 | python audio_visualized.py -rttm audio_cases/afjiv.rttm -audio_path audio_cases/afjiv.wav -praat_result audio_cases/afjiv.txt
19 | ```
20 | 
21 | * ``rttm`` --- the reference or system rttm 
22 | * ``audio_path`` --- the audio path
23 | * ``praat_result`` --- visualized result for praat software
24 | 
25 | (Example is from [VoxConverse](https://github.com/joonson/voxconverse))
26 | 
27 | ### Step 2: Import ``praat_result`` into Praat:
28 | - Install Praat [Mac](https://www.fon.hum.uva.nl/praat/download_mac.html) or [Windows](https://www.fon.hum.uva.nl/praat/download_win.html)
29 | - import ``praat_result`` into Praat 
30 |     - Open ``praat_result`` and ``audio``
31 |     - <img src='imgs/praat_import.png' width=50% />
32 |     - Select them all
33 |     - Click ``View & Edit``
34 | 
35 | ### Step3: Overview
36 | 
37 | ![](imgs/praat_overview.png)
38 | 
39 | You can slide with a horizontal scroll. Speaker labels are shown in each timeline (e.g., ``spk00``, ``spk01`` ...).
40 | 
41 | Some useful shortcuts:
42 | 
43 | - ``CMD + A``: Show all utterances in one screen.
44 | - ``CMD + N``: Dive into selected areas.
45 | 
46 | <p id="anchor_av"></p> 
47 | 
48 | ## Visualization for Audio-visual datasets
49 | 
50 | ### Step 1: Generating VIA format
51 | 
52 | ```
53 | python audio_visual_visualized.py -rttm audio_visual_cases/00115.rttm -mp4_path audio_visual_cases/00115.rttm -via_json_result audio_visual_cases/00115.json
54 | ```
55 | 
56 | * ``rttm`` --- the reference or system rttm 
57 | * ``mp4_path`` --- the mp4 path
58 | * ``via_json_result`` --- visualized result for VIA software
59 | 
60 | (Example is from [MSDWild](https://github.com/X-LANCE/MSDWILD))
61 | 
62 | > If the video cannot be previewed or quickly previewed, please try to convert them to support the specific mp4 format of HTML5.
63 | > ```
64 | > ffmpeg -i original.mp4 -vcodec libx264 -acodec aac -preset fast -movflags +faststart  previewed.mp4
65 | > ```
66 | 
67 | ### Step 2: Import ``via_format.json`` into VIA tools
68 | 
69 | - Download ``via_video_annotator.html`` from [URL](https://www.robots.ox.ac.uk/~vgg/software/via/downloads/via3/via-3.0.11.zip) or directly use a [online demo](https://www.robots.ox.ac.uk/~vgg/software/via/demo/via_video_annotator.html). This website is an offline client, and we have tested on version ``via-3.0.11``(see file: ``via_video_annotator_3.0.11.html`` in this repo).
70 | - Import JSON by clicking the ``folder button`` as follows:<img src='imgs/via_import.png' width=90% />
71 | - You can also modify the script to support online URLs from OSS (Object Storage Service).
72 | 
73 | ### Step3: Overview
74 | 
75 | ![](imgs/via_example.png)
76 | 
77 | You can use the ``Space`` key to control ``Play/Pause Media.``
78 | 
79 | More keys can be found on:
80 | 
81 |  <img src='imgs/via_shortcut.png' width=20% />
82 |  
83 | 
84 | References
85 | =========
86 | - https://www.fon.hum.uva.nl/praat/
87 | - https://www.robots.ox.ac.uk/~vgg/software/via/
88 | 


--------------------------------------------------------------------------------
/audio_visualized.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | from scipy.io import wavfile
 3 | 
 4 | def get_rttm_dict(rttm_file):
 5 |     rttm_dict = dict()
 6 |     for line in open(rttm_file).readlines():
 7 |         items = line.replace("\n", "").split()
 8 |         filename, start_time, duration, spk_name = items[1], float(items[3]), float(items[4]), items[7]
 9 |         end_time = start_time + duration
10 | 
11 |         if spk_name not in rttm_dict.keys():
12 |             rttm_dict[spk_name] = []
13 |         rttm_dict[spk_name].append((start_time,end_time))
14 |     return rttm_dict
15 | 
16 | def saved_to_text_grid(saved_to_path, current_speaker_num, wav_end_time, spk_dict):
17 |     saved_obj = open(saved_to_path,"w")
18 |     saved_obj.write("File type = \"ooTextFile\"\n")
19 |     saved_obj.write("Object class = \"TextGrid\"\n")
20 |     saved_obj.write("xmin = 0\n")
21 |     saved_obj.write("xmax = %f\n"%(wav_end_time))
22 |     saved_obj.write("tiers? <exists>\n")
23 |     saved_obj.write("size = %d\n"%(current_speaker_num))
24 |     saved_obj.write("item []: \n")
25 | 
26 |     for i, speaker_id in enumerate(list(spk_dict.keys())):
27 |         saved_obj.write("    item [%d]:\n"%(i))
28 |         saved_obj.write(" \n")
29 |         saved_obj.write("        class = \"IntervalTier\"  \n")
30 |         saved_obj.write("        name = \"%s\"  \n"%(speaker_id))
31 |         saved_obj.write("        xmin = 0  \n")
32 |         saved_obj.write("        xmax = %f  \n"%(wav_end_time))
33 |         intervals = list(spk_dict[speaker_id])
34 |         intervals.sort()
35 | 
36 |         if len(intervals) == 0:
37 |             saved_obj.write("        intervals: size = 0  \n")
38 |             continue
39 | 
40 |         all_intervals = []
41 |         if len(intervals) == 1:
42 |             all_intervals.append((0, intervals[0][0], ''))
43 |             all_intervals.append((intervals[0][0], intervals[0][1], 'speech'))
44 |             all_intervals.append((intervals[0][1], wav_end_time, ''))
45 |         else:
46 |             for index, period in enumerate(intervals):
47 |                 if index == 0 :
48 |                     all_intervals.append((0, period[0], ''))
49 |                 elif index == len(intervals) - 1 :
50 |                     all_intervals.append((period[1], wav_end_time, ''))
51 |                     all_intervals.append((intervals[index-1][1], period[0], ''))
52 |                 else:
53 |                     all_intervals.append((intervals[index-1][1], period[0], ''))
54 |                 all_intervals.append((period[0], period[1], 'speech'))
55 | 
56 |         saved_obj.write("        intervals: size = %d  \n"%(len(all_intervals)))
57 | 
58 |         for index, items in enumerate(all_intervals):
59 |             saved_obj.write("        intervals [%d]:\n"%(index))
60 |             saved_obj.write("            xmin = %0.15f \n"%(items[0]))
61 |             saved_obj.write("            xmax = %0.15f \n"%(items[1]))
62 |             saved_obj.write("            text = \"%s\" \n"%(items[2]))
63 | 
64 | 
65 |     saved_obj.close()
66 | 
67 | def main():
68 |     parser = ArgumentParser(
69 |         description='Speaker diarization visualization tool for audio modality.', add_help=True,
70 |         usage='%(prog)s [options]')
71 |     parser.add_argument('-rttm', dest='rttm_fns', help='reference or system RTTM files (default: %(default)s)')
72 |     parser.add_argument('-audio_path', dest='audio_path', help='reference or system audio files (default: %(default)s)')
73 |     parser.add_argument('-praat_result', dest='praat_result_path', help='praat_result_path', default='praat_result.txt')
74 |     args = parser.parse_args()
75 | 
76 |     rttm_dict = get_rttm_dict(args.rttm_fns)
77 | 
78 |     sr, audio = wavfile.read(args.audio_path)
79 |     length = audio.shape[0] / sr
80 | 
81 |     saved_to_text_grid(args.praat_result_path, len(rttm_dict.keys()), length, rttm_dict)
82 | 
83 | if __name__ == "__main__":
84 |     main()
85 | 


--------------------------------------------------------------------------------
/audio_visual_visualized.py:
--------------------------------------------------------------------------------
  1 | from argparse import ArgumentParser
  2 | from scipy.io import wavfile
  3 | import os,pdb,json,random
  4 | 
  5 | def get_random_sample():
  6 |     return random.sample('zyxwvutsrqponmlkjihgfedcba',8)
  7 | 
  8 | def read_rttm_duration(rttm_file_path):
  9 |     duration_dict = dict()
 10 |     duration_dict_index_to_file = dict()
 11 |     duration_dict_file_to_index = dict()
 12 | 
 13 |     index = 0
 14 |     for line in open(rttm_file_path).readlines():
 15 |         items = line.replace("\n","").split()
 16 |         #SPEAKER xxx 0 start duration <NA> <NA> spk_name <NA> <NA>
 17 |         filename = items[1]
 18 |         start_time = float(items[3])
 19 |         duration_time = float(items[4])
 20 |         end_time = start_time + duration_time
 21 |         speaker_name = items[7]
 22 |         if filename not in duration_dict.keys():
 23 |             duration_dict[filename] = []
 24 |             duration_dict_index_to_file[str(index)] = filename
 25 |             duration_dict_file_to_index[filename] = str(index)
 26 |             index += 1
 27 | 
 28 |         duration_dict[filename].append((duration_dict_file_to_index[filename],start_time, end_time,speaker_name))
 29 | 
 30 |     return duration_dict, duration_dict_index_to_file, duration_dict_file_to_index
 31 | 
 32 | def get_local_prefix(duration_dict_index_to_file):
 33 |     dict_ = dict()
 34 | 
 35 |     for i in duration_dict_index_to_file.keys():
 36 |         dict_[i] = ''
 37 |     return dict_
 38 | 
 39 | 
 40 | def get_file_json(duration_dict_index_to_file):
 41 |     file_json = dict()
 42 | 
 43 |     for index in duration_dict_index_to_file.keys():
 44 |         filename = duration_dict_index_to_file[index]
 45 |         file_json[index] = dict()
 46 |         file_json[index]["fid"] = index
 47 |         file_json[index]["fname"] = filename
 48 |         file_json[index]["type"] = 4
 49 |         file_json[index]["loc"] = 0
 50 |         file_json[index]["src"] =  "file:///%s/audio_visual_cases/%s.mp4"%(os.path.abspath('.'),filename) # local file or oss file path
 51 | 
 52 |     return file_json
 53 | 
 54 | 
 55 | def get_duration_json(duration_dict):
 56 |     dict_ = dict()
 57 | 
 58 |     count = 0
 59 |     for key in duration_dict:
 60 |         for item in duration_dict[key]:
 61 |             index = item[0]
 62 |             id_ =  str(count)+ "_"+''.join(get_random_sample())
 63 |             start_time = item[1]
 64 |             end_time = item[2]
 65 |             spk_name = item[3]
 66 |             dict_[id_] = dict()
 67 |             dict_[id_]['vid'] = index
 68 |             dict_[id_]['flg'] = 0
 69 |             dict_[id_]['z'] = [start_time, end_time]
 70 |             dict_[id_]['xy'] = []
 71 |             dict_[id_]['av'] = dict()
 72 |             dict_[id_]['av']["1"] = spk_name
 73 |             dict_[id_]['av']["5"] = "0"
 74 |             count += 1
 75 |     return dict_
 76 | 
 77 | def get_view_dict(duration_dict_index_to_file):
 78 |     dict_ = dict()
 79 |     for index in duration_dict_index_to_file.keys():
 80 |         dict_[index] = dict()
 81 |         dict_[index]['fid_list'] = [index]
 82 |     return dict_
 83 | 
 84 | def generating_json(duration_dict,duration_dict_index_to_file, duration_dict_file_to_index, output_json_file):
 85 |     via_json_obj = json.load(open("via_template.json"))
 86 | 
 87 |     via_json_obj["config"]["ui"]["gtimeline_visible_row_count"] = 6
 88 |     via_json_obj["config"]["ui"]["file_content_align"] = "center"
 89 |     via_json_obj["config"]["ui"]["file_metadata_editor_visible"] = True
 90 |     via_json_obj["config"]["ui"]["spatial_metadata_editor_visible"] = True
 91 |     via_json_obj["config"]["ui"]["temporal_segment_metadata_editor_visible"] = True
 92 |     via_json_obj["config"]["file"] = {
 93 |                                         "loc_prefix": {
 94 |                                             "0": "",
 95 |                                         }
 96 |                                      }
 97 | 
 98 |     via_json_obj['project']["vid_list"] = list(duration_dict_index_to_file.keys())
 99 | 
100 |     via_json_obj['config']['file']['loc_prefix'] = get_local_prefix(duration_dict_index_to_file)
101 | 
102 |     via_json_obj['file'] = get_file_json(duration_dict_index_to_file)
103 | 
104 |     via_json_obj['metadata'] = get_duration_json(duration_dict)
105 | 
106 |     via_json_obj['view'] = get_view_dict(duration_dict_index_to_file)
107 | 
108 |     open(output_json_file,"w").write(json.dumps(via_json_obj))
109 | 
110 | def main():
111 |     parser = ArgumentParser(
112 |         description='Speaker diarization visualization tool for audio-visual modality.', add_help=True,
113 |         usage='%(prog)s [options]')
114 |     parser.add_argument('-rttm', dest='rttm_fns', help='reference or system RTTM files (default: %(default)s)')
115 |     parser.add_argument('-mp4_path', dest='video_path', help='mp4 local files (default: %(default)s)')
116 |     parser.add_argument('-via_json_result', dest='via_json_result', help='VIA JSON output path', default='via_result.json')
117 |     args = parser.parse_args()
118 | 
119 |     duration_dict,duration_dict_index_to_file, duration_dict_file_to_index = read_rttm_duration(args.rttm_fns)
120 |     generating_json(duration_dict,duration_dict_index_to_file, duration_dict_file_to_index, args.via_json_result)
121 | 
122 | if __name__ == "__main__":
123 |     main()


--------------------------------------------------------------------------------
/audio_cases/afjiv.txt:
--------------------------------------------------------------------------------
  1 | File type = "ooTextFile"
  2 | Object class = "TextGrid"
  3 | xmin = 0
  4 | xmax = 151.248000
  5 | tiers? <exists>
  6 | size = 5
  7 | item []: 
  8 |     item [0]:
  9 |  
 10 |         class = "IntervalTier"  
 11 |         name = "spk00"  
 12 |         xmin = 0  
 13 |         xmax = 151.248000  
 14 |         intervals: size = 3  
 15 |         intervals [0]:
 16 |             xmin = 0.000000000000000 
 17 |             xmax = 41.119999999999997 
 18 |             text = "" 
 19 |         intervals [1]:
 20 |             xmin = 41.119999999999997 
 21 |             xmax = 80.479999999999990 
 22 |             text = "speech" 
 23 |         intervals [2]:
 24 |             xmin = 80.479999999999990 
 25 |             xmax = 151.247999999999990 
 26 |             text = "" 
 27 |     item [1]:
 28 |  
 29 |         class = "IntervalTier"  
 30 |         name = "spk01"  
 31 |         xmin = 0  
 32 |         xmax = 151.248000  
 33 |         intervals: size = 22  
 34 |         intervals [0]:
 35 |             xmin = 0.000000000000000 
 36 |             xmax = 119.439999999999998 
 37 |             text = "" 
 38 |         intervals [1]:
 39 |             xmin = 119.439999999999998 
 40 |             xmax = 122.319999999999993 
 41 |             text = "speech" 
 42 |         intervals [2]:
 43 |             xmin = 122.319999999999993 
 44 |             xmax = 122.959999999999994 
 45 |             text = "" 
 46 |         intervals [3]:
 47 |             xmin = 122.959999999999994 
 48 |             xmax = 124.559999999999988 
 49 |             text = "speech" 
 50 |         intervals [4]:
 51 |             xmin = 124.559999999999988 
 52 |             xmax = 125.200000000000003 
 53 |             text = "" 
 54 |         intervals [5]:
 55 |             xmin = 125.200000000000003 
 56 |             xmax = 128.400000000000006 
 57 |             text = "speech" 
 58 |         intervals [6]:
 59 |             xmin = 128.400000000000006 
 60 |             xmax = 128.479999999999990 
 61 |             text = "" 
 62 |         intervals [7]:
 63 |             xmin = 128.479999999999990 
 64 |             xmax = 129.639999999999986 
 65 |             text = "speech" 
 66 |         intervals [8]:
 67 |             xmin = 129.639999999999986 
 68 |             xmax = 130.879999999999995 
 69 |             text = "" 
 70 |         intervals [9]:
 71 |             xmin = 130.879999999999995 
 72 |             xmax = 133.519999999999982 
 73 |             text = "speech" 
 74 |         intervals [10]:
 75 |             xmin = 133.519999999999982 
 76 |             xmax = 133.680000000000007 
 77 |             text = "" 
 78 |         intervals [11]:
 79 |             xmin = 133.680000000000007 
 80 |             xmax = 134.120000000000005 
 81 |             text = "speech" 
 82 |         intervals [12]:
 83 |             xmin = 134.120000000000005 
 84 |             xmax = 135.719999999999999 
 85 |             text = "" 
 86 |         intervals [13]:
 87 |             xmin = 135.719999999999999 
 88 |             xmax = 138.159999999999997 
 89 |             text = "speech" 
 90 |         intervals [14]:
 91 |             xmin = 138.159999999999997 
 92 |             xmax = 138.919999999999987 
 93 |             text = "" 
 94 |         intervals [15]:
 95 |             xmin = 138.919999999999987 
 96 |             xmax = 140.479999999999990 
 97 |             text = "speech" 
 98 |         intervals [16]:
 99 |             xmin = 140.479999999999990 
100 |             xmax = 140.639999999999986 
101 |             text = "" 
102 |         intervals [17]:
103 |             xmin = 140.639999999999986 
104 |             xmax = 141.639999999999986 
105 |             text = "speech" 
106 |         intervals [18]:
107 |             xmin = 141.639999999999986 
108 |             xmax = 142.199999999999989 
109 |             text = "" 
110 |         intervals [19]:
111 |             xmin = 142.199999999999989 
112 |             xmax = 144.319999999999993 
113 |             text = "speech" 
114 |         intervals [20]:
115 |             xmin = 145.319999999999993 
116 |             xmax = 151.247999999999990 
117 |             text = "" 
118 |         intervals [21]:
119 |             xmin = 144.599999999999994 
120 |             xmax = 145.319999999999993 
121 |             text = "speech" 
122 |     item [2]:
123 |  
124 |         class = "IntervalTier"  
125 |         name = "spk02"  
126 |         xmin = 0  
127 |         xmax = 151.248000  
128 |         intervals: size = 10  
129 |         intervals [0]:
130 |             xmin = 0.000000000000000 
131 |             xmax = 80.920000000000002 
132 |             text = "" 
133 |         intervals [1]:
134 |             xmin = 80.920000000000002 
135 |             xmax = 83.719999999999999 
136 |             text = "speech" 
137 |         intervals [2]:
138 |             xmin = 83.719999999999999 
139 |             xmax = 84.840000000000003 
140 |             text = "" 
141 |         intervals [3]:
142 |             xmin = 84.840000000000003 
143 |             xmax = 86.079999999999998 
144 |             text = "speech" 
145 |         intervals [4]:
146 |             xmin = 86.079999999999998 
147 |             xmax = 87.079999999999998 
148 |             text = "" 
149 |         intervals [5]:
150 |             xmin = 87.079999999999998 
151 |             xmax = 88.000000000000000 
152 |             text = "speech" 
153 |         intervals [6]:
154 |             xmin = 88.000000000000000 
155 |             xmax = 88.719999999999999 
156 |             text = "" 
157 |         intervals [7]:
158 |             xmin = 88.719999999999999 
159 |             xmax = 93.039999999999992 
160 |             text = "speech" 
161 |         intervals [8]:
162 |             xmin = 94.359999999999999 
163 |             xmax = 151.247999999999990 
164 |             text = "" 
165 |         intervals [9]:
166 |             xmin = 93.079999999999998 
167 |             xmax = 94.359999999999999 
168 |             text = "speech" 
169 |     item [3]:
170 |  
171 |         class = "IntervalTier"  
172 |         name = "spk03"  
173 |         xmin = 0  
174 |         xmax = 151.248000  
175 |         intervals: size = 6  
176 |         intervals [0]:
177 |             xmin = 0.000000000000000 
178 |             xmax = 5.280000000000000 
179 |             text = "" 
180 |         intervals [1]:
181 |             xmin = 5.280000000000000 
182 |             xmax = 34.519999999999996 
183 |             text = "speech" 
184 |         intervals [2]:
185 |             xmin = 34.519999999999996 
186 |             xmax = 34.680000000000000 
187 |             text = "" 
188 |         intervals [3]:
189 |             xmin = 34.680000000000000 
190 |             xmax = 40.079999999999998 
191 |             text = "speech" 
192 |         intervals [4]:
193 |             xmin = 40.960000000000001 
194 |             xmax = 151.247999999999990 
195 |             text = "" 
196 |         intervals [5]:
197 |             xmin = 40.119999999999997 
198 |             xmax = 40.960000000000001 
199 |             text = "speech" 
200 |     item [4]:
201 |  
202 |         class = "IntervalTier"  
203 |         name = "spk04"  
204 |         xmin = 0  
205 |         xmax = 151.248000  
206 |         intervals: size = 16  
207 |         intervals [0]:
208 |             xmin = 0.000000000000000 
209 |             xmax = 95.120000000000005 
210 |             text = "" 
211 |         intervals [1]:
212 |             xmin = 95.120000000000005 
213 |             xmax = 98.000000000000000 
214 |             text = "speech" 
215 |         intervals [2]:
216 |             xmin = 98.000000000000000 
217 |             xmax = 98.719999999999999 
218 |             text = "" 
219 |         intervals [3]:
220 |             xmin = 98.719999999999999 
221 |             xmax = 100.439999999999998 
222 |             text = "speech" 
223 |         intervals [4]:
224 |             xmin = 100.439999999999998 
225 |             xmax = 101.120000000000005 
226 |             text = "" 
227 |         intervals [5]:
228 |             xmin = 101.120000000000005 
229 |             xmax = 104.600000000000009 
230 |             text = "speech" 
231 |         intervals [6]:
232 |             xmin = 104.600000000000009 
233 |             xmax = 105.560000000000002 
234 |             text = "" 
235 |         intervals [7]:
236 |             xmin = 105.560000000000002 
237 |             xmax = 107.079999999999998 
238 |             text = "speech" 
239 |         intervals [8]:
240 |             xmin = 107.079999999999998 
241 |             xmax = 107.159999999999997 
242 |             text = "" 
243 |         intervals [9]:
244 |             xmin = 107.159999999999997 
245 |             xmax = 107.599999999999994 
246 |             text = "speech" 
247 |         intervals [10]:
248 |             xmin = 107.599999999999994 
249 |             xmax = 108.159999999999997 
250 |             text = "" 
251 |         intervals [11]:
252 |             xmin = 108.159999999999997 
253 |             xmax = 110.039999999999992 
254 |             text = "speech" 
255 |         intervals [12]:
256 |             xmin = 110.039999999999992 
257 |             xmax = 111.000000000000000 
258 |             text = "" 
259 |         intervals [13]:
260 |             xmin = 111.000000000000000 
261 |             xmax = 113.280000000000001 
262 |             text = "speech" 
263 |         intervals [14]:
264 |             xmin = 118.120000000000005 
265 |             xmax = 151.247999999999990 
266 |             text = "" 
267 |         intervals [15]:
268 |             xmin = 113.840000000000003 
269 |             xmax = 118.120000000000005 
270 |             text = "speech" 
271 | 


--------------------------------------------------------------------------------