├── .gitignore ├── audio_cases ├── afjiv.wav ├── afjiv.rttm └── afjiv.txt ├── imgs ├── praat_import.png ├── via_example.png ├── via_import.png ├── via_shortcut.png └── praat_overview.png ├── audio_visual_cases ├── 00115.mp4 └── 00115.rttm ├── via_template.json ├── README.md ├── audio_visualized.py └── audio_visual_visualized.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /audio_cases/afjiv.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/audio_cases/afjiv.wav -------------------------------------------------------------------------------- /imgs/praat_import.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/imgs/praat_import.png -------------------------------------------------------------------------------- /imgs/via_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/imgs/via_example.png -------------------------------------------------------------------------------- /imgs/via_import.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/imgs/via_import.png -------------------------------------------------------------------------------- /imgs/via_shortcut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/imgs/via_shortcut.png -------------------------------------------------------------------------------- /imgs/praat_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/imgs/praat_overview.png -------------------------------------------------------------------------------- /audio_visual_cases/00115.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liutaocode/DiarizationVisualization/HEAD/audio_visual_cases/00115.mp4 -------------------------------------------------------------------------------- /audio_visual_cases/00115.rttm: -------------------------------------------------------------------------------- 1 | SPEAKER 00115 0 0.000000 1.606000 1 2 | SPEAKER 00115 0 2.315000 3.521000 1 3 | SPEAKER 00115 0 6.899000 1.978000 1 4 | SPEAKER 00115 0 9.294000 10.225000 1 5 | SPEAKER 00115 0 1.898000 0.771000 0 6 | SPEAKER 00115 0 5.885000 4.118750 0 7 | SPEAKER 00115 0 10.927000 1.000000 0 8 | SPEAKER 00115 0 13.836000 0.625000 0 9 | SPEAKER 00115 0 19.565000 0.459000 0 10 | -------------------------------------------------------------------------------- /audio_cases/afjiv.rttm: -------------------------------------------------------------------------------- 1 | SPEAKER afjiv 1 41.120000 39.360000 spk00 2 | SPEAKER afjiv 1 140.640000 1.000000 spk01 3 | SPEAKER afjiv 1 142.200000 2.120000 spk01 4 | SPEAKER afjiv 1 144.600000 0.720000 spk01 5 | SPEAKER afjiv 1 80.920000 2.800000 spk02 6 | SPEAKER afjiv 1 84.840000 1.240000 spk02 7 | SPEAKER afjiv 1 87.080000 0.920000 spk02 8 | SPEAKER afjiv 1 88.720000 4.320000 spk02 9 | SPEAKER afjiv 1 93.080000 1.280000 spk02 10 | SPEAKER afjiv 1 5.280000 29.240000 spk03 11 | SPEAKER afjiv 1 34.680000 5.400000 spk03 12 | SPEAKER afjiv 1 119.440000 2.880000 spk01 13 | SPEAKER afjiv 1 40.120000 0.840000 spk03 14 | SPEAKER afjiv 1 95.120000 2.880000 spk04 15 | SPEAKER afjiv 1 98.720000 1.720000 spk04 16 | SPEAKER afjiv 1 101.120000 3.480000 spk04 17 | SPEAKER afjiv 1 105.560000 1.520000 spk04 18 | SPEAKER afjiv 1 107.160000 0.440000 spk04 19 | SPEAKER afjiv 1 108.160000 1.880000 spk04 20 | SPEAKER afjiv 1 111.000000 2.280000 spk04 21 | SPEAKER afjiv 1 113.840000 4.280000 spk04 22 | SPEAKER afjiv 1 122.960000 1.600000 spk01 23 | SPEAKER afjiv 1 125.200000 3.200000 spk01 24 | SPEAKER afjiv 1 128.480000 1.160000 spk01 25 | SPEAKER afjiv 1 130.880000 2.640000 spk01 26 | SPEAKER afjiv 1 133.680000 0.440000 spk01 27 | SPEAKER afjiv 1 135.720000 2.440000 spk01 28 | SPEAKER afjiv 1 138.920000 1.560000 spk01 29 | -------------------------------------------------------------------------------- /via_template.json: -------------------------------------------------------------------------------- 1 | {"project":{"pid":"__VIA_PROJECT_ID__","rev":"__VIA_PROJECT_REV_ID__","rev_timestamp":"__VIA_PROJECT_REV_TIMESTAMP__","pname":"multimodalspeakerdarization","creator":"VGG Image Annotator (http://www.robots.ox.ac.uk/~vgg/software/via)","created":1631583533823,"vid_list":["1","2","3","4"]},"config":{"file":{"loc_prefix":{"1":"","2":"","3":"","4":""}},"ui":{"file_content_align":"center","file_metadata_editor_visible":true,"spatial_metadata_editor_visible":true,"spatial_region_label_attribute_id":"","gtimeline_visible_row_count":"4"}},"attribute":{"1":{"aname":"Speaker","anchor_id":"FILE1_Z2_XY0","type":1,"desc":"Speaker timeline","options":{},"default_option_id":""}},"file":{"1":{"fid":"1","fname":"demo1.mp4","type":4,"loc":2,"src":"http://xxx.cn:8111/videos/demo1.mp4"},"2":{"fid":"2","fname":"demo1","type":4,"loc":2,"src":"http://xxx.cn:8111/videos/demo1"},"3":{"fid":"3","fname":"http://xxx.cn:8111/videos/demo1","type":4,"loc":2,"src":"http://xxx.cn:8111/videos/demo1"},"4":{"fid":"4","fname":"http://xxx.cn:8111/videos/demo1","type":4,"loc":2,"src":"http://xxx.cn:8111/videos/demo1"}},"metadata":{"1_hm4qcmrT":{"vid":"1","flg":0,"z":[0,1],"xy":[],"av":{"1":"speaker0"}},"1_S5f7fUv5":{"vid":"1","flg":0,"z":[2.13,4.484],"xy":[],"av":{"1":"speaker1"}},"1_Fsx9V7km":{"vid":"1","flg":0,"z":[1.65,3.004],"xy":[],"av":{"1":"speaker2"}},"1_C1021UfQ":{"vid":"1","flg":0,"z":[4.962,11.837],"xy":[],"av":{"1":"speaker2"}},"1_vAtWYixe":{"vid":"1","flg":0,"z":[11.795,14.358],"xy":[],"av":{"1":"speaker0"}},"1_UxW4rCi0":{"vid":"1","flg":0,"z":[14.129,17.087],"xy":[],"av":{"1":"speaker2"}},"1_ofdOvLZX":{"vid":"1","flg":0,"z":[16.9,20.733],"xy":[],"av":{"1":"speaker0"}},"2_nbCgiha8":{"vid":"2","flg":0,"z":[0.566,5.775],"xy":[],"av":{"1":"speaker3"}},"2_0FlFBaWZ":{"vid":"2","flg":0,"z":[6.016,10.691],"xy":[],"av":{"1":"speaker0"}},"2_C3lubRiL":{"vid":"2","flg":0,"z":[13.004,14.945],"xy":[],"av":{"1":"speaker2"}},"2_VE9c7l1A":{"vid":"2","flg":0,"z":[18.233,19.379],"xy":[],"av":{"1":"speaker2"}},"2_Y6JIKqiy":{"vid":"2","flg":0,"z":[21.858,28.712],"xy":[],"av":{"1":"speaker3"}}},"view":{"1":{"fid_list":["1"]},"2":{"fid_list":["2"]},"3":{"fid_list":["3"]},"4":{"fid_list":["4"]}}} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Visualization Tools for Speaker Diarization 2 | ## Introduction 3 | 4 | The current landscape lacks a robust tool for diarization visualization, which is critical for the analysis of datasets and algorithm outcomes. In this repository, we offer intuitive methods to illustrate speaker diarization results. A pivotal criterion for selecting this visualization software was its capacity for interactive operation. While these visualization tools have room for improvement, they are the best available options at present. 5 | 6 | [Go to: Visualization tool for Audio-only datasets ](#anchor_ao) 7 | 8 | [Go to: Visualization tool for Audio-visual datasets ](#anchor_av) 9 | 10 | 11 |

12 | 13 | ## Visualization for Audio-only datasets 14 | 15 | ### Step 1: Generating praat format: 16 | 17 | ``` 18 | python audio_visualized.py -rttm audio_cases/afjiv.rttm -audio_path audio_cases/afjiv.wav -praat_result audio_cases/afjiv.txt 19 | ``` 20 | 21 | * ``rttm`` --- the reference or system rttm 22 | * ``audio_path`` --- the audio path 23 | * ``praat_result`` --- visualized result for praat software 24 | 25 | (Example is from [VoxConverse](https://github.com/joonson/voxconverse)) 26 | 27 | ### Step 2: Import ``praat_result`` into Praat: 28 | - Install Praat [Mac](https://www.fon.hum.uva.nl/praat/download_mac.html) or [Windows](https://www.fon.hum.uva.nl/praat/download_win.html) 29 | - import ``praat_result`` into Praat 30 | - Open ``praat_result`` and ``audio`` 31 | - 32 | - Select them all 33 | - Click ``View & Edit`` 34 | 35 | ### Step3: Overview 36 | 37 | ![](imgs/praat_overview.png) 38 | 39 | You can slide with a horizontal scroll. Speaker labels are shown in each timeline (e.g., ``spk00``, ``spk01`` ...). 40 | 41 | Some useful shortcuts: 42 | 43 | - ``CMD + A``: Show all utterances in one screen. 44 | - ``CMD + N``: Dive into selected areas. 45 | 46 |

47 | 48 | ## Visualization for Audio-visual datasets 49 | 50 | ### Step 1: Generating VIA format 51 | 52 | ``` 53 | python audio_visual_visualized.py -rttm audio_visual_cases/00115.rttm -mp4_path audio_visual_cases/00115.rttm -via_json_result audio_visual_cases/00115.json 54 | ``` 55 | 56 | * ``rttm`` --- the reference or system rttm 57 | * ``mp4_path`` --- the mp4 path 58 | * ``via_json_result`` --- visualized result for VIA software 59 | 60 | (Example is from [MSDWild](https://github.com/X-LANCE/MSDWILD)) 61 | 62 | > If the video cannot be previewed or quickly previewed, please try to convert them to support the specific mp4 format of HTML5. 63 | > ``` 64 | > ffmpeg -i original.mp4 -vcodec libx264 -acodec aac -preset fast -movflags +faststart previewed.mp4 65 | > ``` 66 | 67 | ### Step 2: Import ``via_format.json`` into VIA tools 68 | 69 | - Download ``via_video_annotator.html`` from [URL](https://www.robots.ox.ac.uk/~vgg/software/via/downloads/via3/via-3.0.11.zip) or directly use a [online demo](https://www.robots.ox.ac.uk/~vgg/software/via/demo/via_video_annotator.html). This website is an offline client, and we have tested on version ``via-3.0.11``(see file: ``via_video_annotator_3.0.11.html`` in this repo). 70 | - Import JSON by clicking the ``folder button`` as follows: 71 | - You can also modify the script to support online URLs from OSS (Object Storage Service). 72 | 73 | ### Step3: Overview 74 | 75 | ![](imgs/via_example.png) 76 | 77 | You can use the ``Space`` key to control ``Play/Pause Media.`` 78 | 79 | More keys can be found on: 80 | 81 | 82 | 83 | 84 | References 85 | ========= 86 | - https://www.fon.hum.uva.nl/praat/ 87 | - https://www.robots.ox.ac.uk/~vgg/software/via/ 88 | -------------------------------------------------------------------------------- /audio_visualized.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from scipy.io import wavfile 3 | 4 | def get_rttm_dict(rttm_file): 5 | rttm_dict = dict() 6 | for line in open(rttm_file).readlines(): 7 | items = line.replace("\n", "").split() 8 | filename, start_time, duration, spk_name = items[1], float(items[3]), float(items[4]), items[7] 9 | end_time = start_time + duration 10 | 11 | if spk_name not in rttm_dict.keys(): 12 | rttm_dict[spk_name] = [] 13 | rttm_dict[spk_name].append((start_time,end_time)) 14 | return rttm_dict 15 | 16 | def saved_to_text_grid(saved_to_path, current_speaker_num, wav_end_time, spk_dict): 17 | saved_obj = open(saved_to_path,"w") 18 | saved_obj.write("File type = \"ooTextFile\"\n") 19 | saved_obj.write("Object class = \"TextGrid\"\n") 20 | saved_obj.write("xmin = 0\n") 21 | saved_obj.write("xmax = %f\n"%(wav_end_time)) 22 | saved_obj.write("tiers? \n") 23 | saved_obj.write("size = %d\n"%(current_speaker_num)) 24 | saved_obj.write("item []: \n") 25 | 26 | for i, speaker_id in enumerate(list(spk_dict.keys())): 27 | saved_obj.write(" item [%d]:\n"%(i)) 28 | saved_obj.write(" \n") 29 | saved_obj.write(" class = \"IntervalTier\" \n") 30 | saved_obj.write(" name = \"%s\" \n"%(speaker_id)) 31 | saved_obj.write(" xmin = 0 \n") 32 | saved_obj.write(" xmax = %f \n"%(wav_end_time)) 33 | intervals = list(spk_dict[speaker_id]) 34 | intervals.sort() 35 | 36 | if len(intervals) == 0: 37 | saved_obj.write(" intervals: size = 0 \n") 38 | continue 39 | 40 | all_intervals = [] 41 | if len(intervals) == 1: 42 | all_intervals.append((0, intervals[0][0], '')) 43 | all_intervals.append((intervals[0][0], intervals[0][1], 'speech')) 44 | all_intervals.append((intervals[0][1], wav_end_time, '')) 45 | else: 46 | for index, period in enumerate(intervals): 47 | if index == 0 : 48 | all_intervals.append((0, period[0], '')) 49 | elif index == len(intervals) - 1 : 50 | all_intervals.append((period[1], wav_end_time, '')) 51 | all_intervals.append((intervals[index-1][1], period[0], '')) 52 | else: 53 | all_intervals.append((intervals[index-1][1], period[0], '')) 54 | all_intervals.append((period[0], period[1], 'speech')) 55 | 56 | saved_obj.write(" intervals: size = %d \n"%(len(all_intervals))) 57 | 58 | for index, items in enumerate(all_intervals): 59 | saved_obj.write(" intervals [%d]:\n"%(index)) 60 | saved_obj.write(" xmin = %0.15f \n"%(items[0])) 61 | saved_obj.write(" xmax = %0.15f \n"%(items[1])) 62 | saved_obj.write(" text = \"%s\" \n"%(items[2])) 63 | 64 | 65 | saved_obj.close() 66 | 67 | def main(): 68 | parser = ArgumentParser( 69 | description='Speaker diarization visualization tool for audio modality.', add_help=True, 70 | usage='%(prog)s [options]') 71 | parser.add_argument('-rttm', dest='rttm_fns', help='reference or system RTTM files (default: %(default)s)') 72 | parser.add_argument('-audio_path', dest='audio_path', help='reference or system audio files (default: %(default)s)') 73 | parser.add_argument('-praat_result', dest='praat_result_path', help='praat_result_path', default='praat_result.txt') 74 | args = parser.parse_args() 75 | 76 | rttm_dict = get_rttm_dict(args.rttm_fns) 77 | 78 | sr, audio = wavfile.read(args.audio_path) 79 | length = audio.shape[0] / sr 80 | 81 | saved_to_text_grid(args.praat_result_path, len(rttm_dict.keys()), length, rttm_dict) 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /audio_visual_visualized.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from scipy.io import wavfile 3 | import os,pdb,json,random 4 | 5 | def get_random_sample(): 6 | return random.sample('zyxwvutsrqponmlkjihgfedcba',8) 7 | 8 | def read_rttm_duration(rttm_file_path): 9 | duration_dict = dict() 10 | duration_dict_index_to_file = dict() 11 | duration_dict_file_to_index = dict() 12 | 13 | index = 0 14 | for line in open(rttm_file_path).readlines(): 15 | items = line.replace("\n","").split() 16 | #SPEAKER xxx 0 start duration spk_name 17 | filename = items[1] 18 | start_time = float(items[3]) 19 | duration_time = float(items[4]) 20 | end_time = start_time + duration_time 21 | speaker_name = items[7] 22 | if filename not in duration_dict.keys(): 23 | duration_dict[filename] = [] 24 | duration_dict_index_to_file[str(index)] = filename 25 | duration_dict_file_to_index[filename] = str(index) 26 | index += 1 27 | 28 | duration_dict[filename].append((duration_dict_file_to_index[filename],start_time, end_time,speaker_name)) 29 | 30 | return duration_dict, duration_dict_index_to_file, duration_dict_file_to_index 31 | 32 | def get_local_prefix(duration_dict_index_to_file): 33 | dict_ = dict() 34 | 35 | for i in duration_dict_index_to_file.keys(): 36 | dict_[i] = '' 37 | return dict_ 38 | 39 | 40 | def get_file_json(duration_dict_index_to_file): 41 | file_json = dict() 42 | 43 | for index in duration_dict_index_to_file.keys(): 44 | filename = duration_dict_index_to_file[index] 45 | file_json[index] = dict() 46 | file_json[index]["fid"] = index 47 | file_json[index]["fname"] = filename 48 | file_json[index]["type"] = 4 49 | file_json[index]["loc"] = 0 50 | file_json[index]["src"] = "file:///%s/audio_visual_cases/%s.mp4"%(os.path.abspath('.'),filename) # local file or oss file path 51 | 52 | return file_json 53 | 54 | 55 | def get_duration_json(duration_dict): 56 | dict_ = dict() 57 | 58 | count = 0 59 | for key in duration_dict: 60 | for item in duration_dict[key]: 61 | index = item[0] 62 | id_ = str(count)+ "_"+''.join(get_random_sample()) 63 | start_time = item[1] 64 | end_time = item[2] 65 | spk_name = item[3] 66 | dict_[id_] = dict() 67 | dict_[id_]['vid'] = index 68 | dict_[id_]['flg'] = 0 69 | dict_[id_]['z'] = [start_time, end_time] 70 | dict_[id_]['xy'] = [] 71 | dict_[id_]['av'] = dict() 72 | dict_[id_]['av']["1"] = spk_name 73 | dict_[id_]['av']["5"] = "0" 74 | count += 1 75 | return dict_ 76 | 77 | def get_view_dict(duration_dict_index_to_file): 78 | dict_ = dict() 79 | for index in duration_dict_index_to_file.keys(): 80 | dict_[index] = dict() 81 | dict_[index]['fid_list'] = [index] 82 | return dict_ 83 | 84 | def generating_json(duration_dict,duration_dict_index_to_file, duration_dict_file_to_index, output_json_file): 85 | via_json_obj = json.load(open("via_template.json")) 86 | 87 | via_json_obj["config"]["ui"]["gtimeline_visible_row_count"] = 6 88 | via_json_obj["config"]["ui"]["file_content_align"] = "center" 89 | via_json_obj["config"]["ui"]["file_metadata_editor_visible"] = True 90 | via_json_obj["config"]["ui"]["spatial_metadata_editor_visible"] = True 91 | via_json_obj["config"]["ui"]["temporal_segment_metadata_editor_visible"] = True 92 | via_json_obj["config"]["file"] = { 93 | "loc_prefix": { 94 | "0": "", 95 | } 96 | } 97 | 98 | via_json_obj['project']["vid_list"] = list(duration_dict_index_to_file.keys()) 99 | 100 | via_json_obj['config']['file']['loc_prefix'] = get_local_prefix(duration_dict_index_to_file) 101 | 102 | via_json_obj['file'] = get_file_json(duration_dict_index_to_file) 103 | 104 | via_json_obj['metadata'] = get_duration_json(duration_dict) 105 | 106 | via_json_obj['view'] = get_view_dict(duration_dict_index_to_file) 107 | 108 | open(output_json_file,"w").write(json.dumps(via_json_obj)) 109 | 110 | def main(): 111 | parser = ArgumentParser( 112 | description='Speaker diarization visualization tool for audio-visual modality.', add_help=True, 113 | usage='%(prog)s [options]') 114 | parser.add_argument('-rttm', dest='rttm_fns', help='reference or system RTTM files (default: %(default)s)') 115 | parser.add_argument('-mp4_path', dest='video_path', help='mp4 local files (default: %(default)s)') 116 | parser.add_argument('-via_json_result', dest='via_json_result', help='VIA JSON output path', default='via_result.json') 117 | args = parser.parse_args() 118 | 119 | duration_dict,duration_dict_index_to_file, duration_dict_file_to_index = read_rttm_duration(args.rttm_fns) 120 | generating_json(duration_dict,duration_dict_index_to_file, duration_dict_file_to_index, args.via_json_result) 121 | 122 | if __name__ == "__main__": 123 | main() -------------------------------------------------------------------------------- /audio_cases/afjiv.txt: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | xmin = 0 4 | xmax = 151.248000 5 | tiers? 6 | size = 5 7 | item []: 8 | item [0]: 9 | 10 | class = "IntervalTier" 11 | name = "spk00" 12 | xmin = 0 13 | xmax = 151.248000 14 | intervals: size = 3 15 | intervals [0]: 16 | xmin = 0.000000000000000 17 | xmax = 41.119999999999997 18 | text = "" 19 | intervals [1]: 20 | xmin = 41.119999999999997 21 | xmax = 80.479999999999990 22 | text = "speech" 23 | intervals [2]: 24 | xmin = 80.479999999999990 25 | xmax = 151.247999999999990 26 | text = "" 27 | item [1]: 28 | 29 | class = "IntervalTier" 30 | name = "spk01" 31 | xmin = 0 32 | xmax = 151.248000 33 | intervals: size = 22 34 | intervals [0]: 35 | xmin = 0.000000000000000 36 | xmax = 119.439999999999998 37 | text = "" 38 | intervals [1]: 39 | xmin = 119.439999999999998 40 | xmax = 122.319999999999993 41 | text = "speech" 42 | intervals [2]: 43 | xmin = 122.319999999999993 44 | xmax = 122.959999999999994 45 | text = "" 46 | intervals [3]: 47 | xmin = 122.959999999999994 48 | xmax = 124.559999999999988 49 | text = "speech" 50 | intervals [4]: 51 | xmin = 124.559999999999988 52 | xmax = 125.200000000000003 53 | text = "" 54 | intervals [5]: 55 | xmin = 125.200000000000003 56 | xmax = 128.400000000000006 57 | text = "speech" 58 | intervals [6]: 59 | xmin = 128.400000000000006 60 | xmax = 128.479999999999990 61 | text = "" 62 | intervals [7]: 63 | xmin = 128.479999999999990 64 | xmax = 129.639999999999986 65 | text = "speech" 66 | intervals [8]: 67 | xmin = 129.639999999999986 68 | xmax = 130.879999999999995 69 | text = "" 70 | intervals [9]: 71 | xmin = 130.879999999999995 72 | xmax = 133.519999999999982 73 | text = "speech" 74 | intervals [10]: 75 | xmin = 133.519999999999982 76 | xmax = 133.680000000000007 77 | text = "" 78 | intervals [11]: 79 | xmin = 133.680000000000007 80 | xmax = 134.120000000000005 81 | text = "speech" 82 | intervals [12]: 83 | xmin = 134.120000000000005 84 | xmax = 135.719999999999999 85 | text = "" 86 | intervals [13]: 87 | xmin = 135.719999999999999 88 | xmax = 138.159999999999997 89 | text = "speech" 90 | intervals [14]: 91 | xmin = 138.159999999999997 92 | xmax = 138.919999999999987 93 | text = "" 94 | intervals [15]: 95 | xmin = 138.919999999999987 96 | xmax = 140.479999999999990 97 | text = "speech" 98 | intervals [16]: 99 | xmin = 140.479999999999990 100 | xmax = 140.639999999999986 101 | text = "" 102 | intervals [17]: 103 | xmin = 140.639999999999986 104 | xmax = 141.639999999999986 105 | text = "speech" 106 | intervals [18]: 107 | xmin = 141.639999999999986 108 | xmax = 142.199999999999989 109 | text = "" 110 | intervals [19]: 111 | xmin = 142.199999999999989 112 | xmax = 144.319999999999993 113 | text = "speech" 114 | intervals [20]: 115 | xmin = 145.319999999999993 116 | xmax = 151.247999999999990 117 | text = "" 118 | intervals [21]: 119 | xmin = 144.599999999999994 120 | xmax = 145.319999999999993 121 | text = "speech" 122 | item [2]: 123 | 124 | class = "IntervalTier" 125 | name = "spk02" 126 | xmin = 0 127 | xmax = 151.248000 128 | intervals: size = 10 129 | intervals [0]: 130 | xmin = 0.000000000000000 131 | xmax = 80.920000000000002 132 | text = "" 133 | intervals [1]: 134 | xmin = 80.920000000000002 135 | xmax = 83.719999999999999 136 | text = "speech" 137 | intervals [2]: 138 | xmin = 83.719999999999999 139 | xmax = 84.840000000000003 140 | text = "" 141 | intervals [3]: 142 | xmin = 84.840000000000003 143 | xmax = 86.079999999999998 144 | text = "speech" 145 | intervals [4]: 146 | xmin = 86.079999999999998 147 | xmax = 87.079999999999998 148 | text = "" 149 | intervals [5]: 150 | xmin = 87.079999999999998 151 | xmax = 88.000000000000000 152 | text = "speech" 153 | intervals [6]: 154 | xmin = 88.000000000000000 155 | xmax = 88.719999999999999 156 | text = "" 157 | intervals [7]: 158 | xmin = 88.719999999999999 159 | xmax = 93.039999999999992 160 | text = "speech" 161 | intervals [8]: 162 | xmin = 94.359999999999999 163 | xmax = 151.247999999999990 164 | text = "" 165 | intervals [9]: 166 | xmin = 93.079999999999998 167 | xmax = 94.359999999999999 168 | text = "speech" 169 | item [3]: 170 | 171 | class = "IntervalTier" 172 | name = "spk03" 173 | xmin = 0 174 | xmax = 151.248000 175 | intervals: size = 6 176 | intervals [0]: 177 | xmin = 0.000000000000000 178 | xmax = 5.280000000000000 179 | text = "" 180 | intervals [1]: 181 | xmin = 5.280000000000000 182 | xmax = 34.519999999999996 183 | text = "speech" 184 | intervals [2]: 185 | xmin = 34.519999999999996 186 | xmax = 34.680000000000000 187 | text = "" 188 | intervals [3]: 189 | xmin = 34.680000000000000 190 | xmax = 40.079999999999998 191 | text = "speech" 192 | intervals [4]: 193 | xmin = 40.960000000000001 194 | xmax = 151.247999999999990 195 | text = "" 196 | intervals [5]: 197 | xmin = 40.119999999999997 198 | xmax = 40.960000000000001 199 | text = "speech" 200 | item [4]: 201 | 202 | class = "IntervalTier" 203 | name = "spk04" 204 | xmin = 0 205 | xmax = 151.248000 206 | intervals: size = 16 207 | intervals [0]: 208 | xmin = 0.000000000000000 209 | xmax = 95.120000000000005 210 | text = "" 211 | intervals [1]: 212 | xmin = 95.120000000000005 213 | xmax = 98.000000000000000 214 | text = "speech" 215 | intervals [2]: 216 | xmin = 98.000000000000000 217 | xmax = 98.719999999999999 218 | text = "" 219 | intervals [3]: 220 | xmin = 98.719999999999999 221 | xmax = 100.439999999999998 222 | text = "speech" 223 | intervals [4]: 224 | xmin = 100.439999999999998 225 | xmax = 101.120000000000005 226 | text = "" 227 | intervals [5]: 228 | xmin = 101.120000000000005 229 | xmax = 104.600000000000009 230 | text = "speech" 231 | intervals [6]: 232 | xmin = 104.600000000000009 233 | xmax = 105.560000000000002 234 | text = "" 235 | intervals [7]: 236 | xmin = 105.560000000000002 237 | xmax = 107.079999999999998 238 | text = "speech" 239 | intervals [8]: 240 | xmin = 107.079999999999998 241 | xmax = 107.159999999999997 242 | text = "" 243 | intervals [9]: 244 | xmin = 107.159999999999997 245 | xmax = 107.599999999999994 246 | text = "speech" 247 | intervals [10]: 248 | xmin = 107.599999999999994 249 | xmax = 108.159999999999997 250 | text = "" 251 | intervals [11]: 252 | xmin = 108.159999999999997 253 | xmax = 110.039999999999992 254 | text = "speech" 255 | intervals [12]: 256 | xmin = 110.039999999999992 257 | xmax = 111.000000000000000 258 | text = "" 259 | intervals [13]: 260 | xmin = 111.000000000000000 261 | xmax = 113.280000000000001 262 | text = "speech" 263 | intervals [14]: 264 | xmin = 118.120000000000005 265 | xmax = 151.247999999999990 266 | text = "" 267 | intervals [15]: 268 | xmin = 113.840000000000003 269 | xmax = 118.120000000000005 270 | text = "speech" 271 | --------------------------------------------------------------------------------