├── README.md
├── trans_to_wav.py
├── test.py
├── trian_crnn.py
├── get_feature.py
└── detect_gui.py


/README.md:
--------------------------------------------------------------------------------
 1 | # 基于随机森林的语音情感识别
 2 | 
 3 | ### 原数据集enterface database下载
 4 | 链接：[https://pan.baidu.com/s/1AXb31ov3kJhg5_Bo4C-ElA?pwd=5kxk](https://pan.baidu.com/s/1AXb31ov3kJhg5_Bo4C-ElA?pwd=5kxk)  
 5 | 提取码：5kxk
 6 | 
 7 | ## 系统要求
 8 | 建议使用 python3 或以上版本。
 9 | 
10 | ## 数据集格式
11 | 请将数据集调整为以下格式：
12 | - 数据集主文件夹包含若干子文件夹
13 | - 每个子文件夹中有6中情绪的子文件夹，每个子文件夹名对应情绪标签
14 | - 每个情绪子文件夹中包含若干个子文件夹，每个子文件夹中包含一个语音文件
15 | 
16 | 调整每个.py代码中的路径参数为相应的路径。
17 | 
18 | ## 项目运行流程
19 | 1. 运行 `trans_to_wav.py`：调整语音文件格式（此步可省略如果语音文件已经为wav格式）
20 | 2. 运行 `get_feature.py`：提取特征
21 | 3. 运行 `train_rcnn.py`：训练模型
22 | 4. 运行 `test.py`：测试模型
23 | 
24 | ## 可视化界面
25 | 项目包含一个可视化界面文件 `detect_gui.py`，其中集成了完整功能，方便用户使用。
26 | 
27 | ## 使用说明
28 | 1. 下载并解压项目文件。
29 | 2. 打开 MATLAB，并将当前文件夹设置为项目根目录。
30 | 3. 按照上述运行流程执行对应的 MATLAB 文件。
31 | 4. 运行 `detect_gui.py` 以使用可视化界面进行导入模型、导入待测音频、预处理音频、提取特征、情感识别。
32 | 
33 | ## 联系我们
34 | 如果您在使用过程中有任何问题，请通过以下方式联系我：
35 | - 邮箱：w1372988970@gmail.com
36 | 
37 | ![star-history-2025627 (2)](https://github.com/user-attachments/assets/abf72bca-c068-4839-a220-c3c9cea6e789)
38 | 


--------------------------------------------------------------------------------
/trans_to_wav.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import moviepy.editor as mp
 4 | from scipy.io import wavfile
 5 | 
 6 | # 设置文件夹路径
 7 | root_path = 'E:/代码接单/rcnn语音情感识别/project2_database/enterface database'
 8 | subject_folders = glob.glob(os.path.join(root_path, 'subject *'))
 9 | emotions = ['anger', 'disgust', 'fear', 'happiness', 'sadness', 'surprise']
10 | 
11 | # 遍历所有志愿者文件夹
12 | for subject_folder in subject_folders:
13 | 
14 |     # 遍历所有情感文件夹
15 |     for emotion in emotions:
16 |         emotion_folder = os.path.join(subject_folder, emotion)
17 | 
18 |         # 遍历所有句子文件夹
19 |         for sentence_index in range(1, 6):
20 |             sentence_folder = os.path.join(emotion_folder, f'sentence {sentence_index}')
21 | 
22 |             # 检查句子文件夹是否存在
23 |             if os.path.isdir(sentence_folder):
24 |                 avi_files = glob.glob(os.path.join(sentence_folder, '*.avi'))
25 | 
26 |                 # 检查AVI文件是否存在
27 |                 if avi_files:
28 |                     avi_file_path = avi_files[0]
29 | 
30 |                     # 转换视频文件为音频文件
31 |                     video = mp.VideoFileClip(avi_file_path)
32 |                     audio = video.audio
33 |                     audio_data = audio.to_soundarray()
34 |                     audio_fs = audio.fps
35 | 
36 |                     # 保存音频文件为WAV格式
37 |                     wav_file_path = os.path.join(sentence_folder,
38 |                                                  f'{os.path.splitext(os.path.basename(avi_file_path))[0]}.wav')
39 |                     wavfile.write(wav_file_path, audio_fs, audio_data)
40 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from tensorflow.keras.models import load_model
 3 | from tensorflow.keras.utils import to_categorical
 4 | 
 5 | # 定义情感列表
 6 | emotions = ['anger', 'disgust', 'fear', 'happiness', 'sadness', 'surprise']
 7 | 
 8 | # 加载特征矩阵和标签向量
 9 | pitch_features_labels = np.load('pitch_features_labels.npy')
10 | timbre_features_labels = np.load('timbre_features_labels.npy')
11 | loudness_features_labels = np.load('loudness_features_labels.npy')
12 | duration_features_labels = np.load('duration_features_labels.npy')
13 | 
14 | # 提取特征和标签
15 | pitch_features, pitch_labels = pitch_features_labels[:, :-1], pitch_features_labels[:, -1]
16 | timbre_features, timbre_labels = timbre_features_labels[:, :-1], timbre_features_labels[:, -1]
17 | loudness_features, loudness_labels = loudness_features_labels[:, :-1], loudness_features_labels[:, -1]
18 | duration_features, duration_labels = duration_features_labels[:, :-1], duration_features_labels[:, -1]
19 | 
20 | # 将特征堆叠在一起
21 | stacked_features = np.hstack((pitch_features, timbre_features, loudness_features, duration_features))
22 | 
23 | # 数据预处理：将特征数据转换为适合CRNN的形状
24 | n_features = stacked_features.shape[1]
25 | all_data = stacked_features.reshape(-1, n_features, 1)
26 | 
27 | # 将标签转换为分类形式
28 | num_classes = len(emotions)
29 | all_labels = to_categorical(pitch_labels, num_classes)
30 | 
31 | # 加载训练好的模型
32 | model_path = 'C:/Users/13729/PycharmProjects/mood/models/emotion_recognition_crnn_epoch067.h5'
33 | trained_model = load_model(model_path)
34 | 
35 | # 评估模型在整个数据集上的准确率
36 | _, accuracy = trained_model.evaluate(all_data, all_labels, batch_size=32)
37 | print(f"模型在整个数据集上的准确率为：{accuracy * 100:.2f}%")
38 | 


--------------------------------------------------------------------------------
/trian_crnn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.model_selection import train_test_split
 3 | from tensorflow.keras.models import Sequential
 4 | from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, LSTM, TimeDistributed, Conv1D, MaxPooling1D
 5 | from tensorflow.keras.optimizers import Adam
 6 | from tensorflow.keras.utils import to_categorical
 7 | from tensorflow.keras.callbacks import ModelCheckpoint
 8 | from tensorflow.keras.models import load_model
 9 | 
10 | # 定义情感列表
11 | emotions = ['anger', 'disgust', 'fear', 'happiness', 'sadness', 'surprise']
12 | 
13 | # 加载特征矩阵和标签向量
14 | pitch_features_labels = np.load('pitch_features_labels.npy')
15 | timbre_features_labels = np.load('timbre_features_labels.npy')
16 | loudness_features_labels = np.load('loudness_features_labels.npy')
17 | duration_features_labels = np.load('duration_features_labels.npy')
18 | 
19 | # 提取特征和标签
20 | pitch_features, pitch_labels = pitch_features_labels[:, :-1], pitch_features_labels[:, -1]
21 | timbre_features, timbre_labels = timbre_features_labels[:, :-1], timbre_features_labels[:, -1]
22 | loudness_features, loudness_labels = loudness_features_labels[:, :-1], loudness_features_labels[:, -1]
23 | duration_features, duration_labels = duration_features_labels[:, :-1], duration_features_labels[:, -1]
24 | 
25 | # 将特征堆叠在一起
26 | stacked_features = np.hstack((pitch_features, timbre_features, loudness_features, duration_features))
27 | 
28 | # 数据划分为训练集和测试集
29 | X_train, X_test, y_train, y_test = train_test_split(stacked_features, pitch_labels, test_size=0.2, random_state=42)
30 | 
31 | # 数据预处理：将特征数据转换为适合CRNN的形状
32 | n_features = stacked_features.shape[1]
33 | X_train = X_train.reshape(-1, n_features, 1)
34 | X_test = X_test.reshape(-1, n_features, 1)
35 | 
36 | # 将标签转换为分类形式
37 | num_classes = len(emotions)
38 | y_train = to_categorical(y_train, num_classes)
39 | y_test = to_categorical(y_test, num_classes)
40 | 
41 | # 构建CRNN模型
42 | model = Sequential()
43 | 
44 | # 卷积层
45 | model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=(n_features, 1)))
46 | 
47 | # 循环层
48 | model.add(LSTM(128, return_sequences=True))
49 | model.add(Dropout(0.5))
50 | model.add(LSTM(128, return_sequences=False))
51 | model.add(Dropout(0.5))
52 | 
53 | # 全连接层
54 | model.add(Dense(128, activation='relu'))
55 | model.add(Dropout(0.5))
56 | model.add(Dense(num_classes, activation='softmax'))
57 | 
58 | # 编译模型
59 | model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
60 | 
61 | # 加载已训练的模型
62 | # model_path = 'emotion_recognition_crnn_epoch030.h5'
63 | # trained_model = load_model(model_path)
64 | 
65 | # 创建回调每轮都保存模型
66 | checkpoint = ModelCheckpoint('emotion_recognition_crnn_epoch{epoch:03d}.h5', save_freq='epoch')
67 | 
68 | # 训练模型
69 | model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=[checkpoint])
70 | 
71 | # 从第30轮开始训练模型
72 | # initial_epoch = 30
73 | # trained_model.fit(X_train, y_train, epochs=100, initial_epoch=initial_epoch, batch_size=32, validation_data=(X_test, y_test), callbacks=[checkpoint])
74 | 
75 | # 保存模型
76 | model.save('emotion_recognition_crnn.h5')
77 | 


--------------------------------------------------------------------------------
/get_feature.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import scipy.io.wavfile as wavfile
  4 | import librosa
  5 | from python_speech_features import mfcc
  6 | from scipy.signal.windows import hamming
  7 | 
  8 | # 设置文件夹路径
  9 | root_path = r'E:\代码接单\rcnn语音情感识别\project2_database\enterface database'
 10 | subject_folders = [folder for folder in os.listdir(root_path) if folder.startswith('subject')]
 11 | emotions = ['anger', 'disgust', 'fear', 'happiness', 'sadness', 'surprise']
 12 | 
 13 | # 预处理参数
 14 | target_sample_rate = 16000  # 重采样目标采样率（Hz）
 15 | frame_length = 0.025  # 帧长度（s）
 16 | frame_overlap = 0.01  # 帧重叠（s）
 17 | 
 18 | # 初始化特征矩阵和标签向量
 19 | pitch_features = None
 20 | timbre_features = None
 21 | loudness_features = None
 22 | duration_features = None
 23 | labels = None
 24 | 
 25 | # 遍历所有志愿者文件夹
 26 | for subject_index in range(len(subject_folders)):
 27 |     subject_folder = os.path.join(root_path, subject_folders[subject_index])
 28 | 
 29 |     # 遍历所有情感文件夹
 30 |     for emotion_index in range(len(emotions)):
 31 |         emotion_folder = os.path.join(subject_folder, emotions[emotion_index])
 32 | 
 33 |         # 遍历所有句子文件夹
 34 |         for sentence_index in range(1, 6):
 35 |             sentence_folder = os.path.join(emotion_folder, f'sentence {sentence_index}')
 36 | 
 37 |             # 检查句子文件夹是否存在
 38 |             if os.path.isdir(sentence_folder):
 39 |                 wav_files = [file for file in os.listdir(sentence_folder) if file.endswith('.wav')]
 40 | 
 41 |                 # 检查WAV文件是否存在
 42 |                 if wav_files:
 43 |                     wav_file_path = os.path.join(sentence_folder, wav_files[0])
 44 | 
 45 |                     # 读取音频文件
 46 |                     audio_fs, audio_data = wavfile.read(wav_file_path)
 47 | 
 48 |                     # 重采样
 49 |                     if audio_fs != target_sample_rate:
 50 |                         audio_data = librosa.resample(audio_data.astype(np.float32), orig_sr=audio_fs,
 51 |                                                       target_sr=target_sample_rate)
 52 |                         audio_fs = target_sample_rate
 53 | 
 54 |                     # 转换为单声道
 55 |                     if audio_data.ndim > 1:
 56 |                         audio_data = np.mean(audio_data, axis=1)
 57 | 
 58 |                     # 提取音色特征 (MFCC)
 59 |                     mfccs = mfcc(audio_data, audio_fs)
 60 | 
 61 |                     # 提取音高特征
 62 |                     pitch_values = librosa.yin(audio_data, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
 63 |                     pitch_values = np.mean(pitch_values) * np.ones((mfccs.shape[0], 1))
 64 | 
 65 |                     # 计算音强特征
 66 |                     frame_length_samples = int(round(frame_length * audio_fs))
 67 |                     frame_overlap_samples = int(round(frame_overlap * audio_fs))
 68 |                     rms_window = hamming(frame_length_samples, sym=False)
 69 |                     frame_starts = np.arange(0, len(audio_data) - frame_length_samples + 1, frame_overlap_samples)
 70 |                     rms_values = np.zeros((len(frame_starts), 1))
 71 |                     for i in range(len(frame_starts)):
 72 |                         frame = audio_data[frame_starts[i]:frame_starts[i] + frame_length_samples]
 73 |                         rms_values[i] = np.sqrt(np.mean(frame ** 2))
 74 |                     rms_values = rms_values[:mfccs.shape[0], :]
 75 | 
 76 |                     # 提取持续时间特征
 77 |                     duration_value = len(audio_data) / audio_fs
 78 | 
 79 |                     # 将特征添加到特征矩阵中
 80 |                     if pitch_features is None:
 81 |                         pitch_features = pitch_values
 82 |                     else:
 83 |                         pitch_features = np.vstack((pitch_features, pitch_values))
 84 | 
 85 |                     if timbre_features is None:
 86 |                         timbre_features = mfccs
 87 |                     else:
 88 |                         timbre_features = np.vstack((timbre_features, mfccs))
 89 | 
 90 |                     if loudness_features is None:
 91 |                         loudness_features = rms_values
 92 |                     else:
 93 |                         loudness_features = np.vstack((loudness_features, rms_values))
 94 | 
 95 |                     if duration_features is None:
 96 |                         duration_features = np.full((mfccs.shape[0], 1), duration_value)
 97 |                     else:
 98 |                         duration_features = np.vstack((duration_features, np.full((mfccs.shape[0], 1), duration_value)))
 99 | 
100 |                     # 将情感标签添加到标签向量中
101 |                     emotion_label = emotion_index
102 |                     if labels is None:
103 |                         labels = np.full((len(pitch_values), 1), emotion_label)
104 |                     else:
105 |                         labels = np.vstack((labels, np.full((len(pitch_values), 1), emotion_label)))
106 | 
107 |                 # 转换为NumPy数组
108 |                 pitch_features = np.array(pitch_features)
109 |                 timbre_features = np.array(timbre_features)
110 |                 loudness_features = np.array(loudness_features)
111 |                 duration_features = np.array(duration_features)
112 |                 labels = np.array(labels)
113 | 
114 |                 # 确保特征矩阵和标签向量的长度一致
115 |                 min_length = min(
116 |                     [len(pitch_features), len(timbre_features), len(loudness_features), len(duration_features),
117 |                      len(labels)])
118 |                 pitch_features = pitch_features[:min_length, :]
119 |                 timbre_features = timbre_features[:min_length, :]
120 |                 loudness_features = loudness_features[:min_length, :]
121 |                 duration_features = duration_features[:min_length, :]
122 |                 labels = labels[:min_length]
123 | 
124 |                 # 保存特征矩阵和标签向量
125 |                 np.save('pitch_features_labels.npy', np.hstack((pitch_features, labels)))
126 |                 np.save('timbre_features_labels.npy', np.hstack((timbre_features, labels)))
127 |                 np.save('loudness_features_labels.npy', np.hstack((loudness_features, labels)))
128 |                 np.save('duration_features_labels.npy', np.hstack((duration_features, labels)))
129 | 


--------------------------------------------------------------------------------
/detect_gui.py:
--------------------------------------------------------------------------------
  1 | import tkinter as tk
  2 | from tkinter import filedialog
  3 | import matplotlib.pyplot as plt
  4 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
  5 | import os
  6 | import numpy as np
  7 | import scipy.io.wavfile as wavfile
  8 | import librosa
  9 | from python_speech_features import mfcc
 10 | from scipy.signal.windows import hamming
 11 | from matplotlib.font_manager import FontProperties
 12 | from keras.models import load_model as keras_load_model
 13 | from tkinter import messagebox
 14 | 
 15 | # 使用黑体字体
 16 | font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=14)
 17 | plt.rcParams['font.sans-serif'] = ['SimHei']  # 设置全局字体
 18 | plt.rcParams['axes.unicode_minus'] = False  # 解决保存图像时负号'-'显示为方块的问题
 19 | 
 20 | def load_saved_model(model_path):
 21 |     global model
 22 |     model = keras_load_model(model_path)
 23 | 
 24 | def load_wav_file():
 25 |     global wav_file
 26 |     wav_file = filedialog.askopenfilename(filetypes=[('WAV files', '*.wav')])
 27 |     rate, data = wavfile.read(wav_file)
 28 |     plot_waveform(rate, data)
 29 | 
 30 | def plot_waveform(rate, data):
 31 |     fig, ax = plt.subplots()
 32 |     ax.plot(data)
 33 |     ax.set_title('待测语音原始波形')
 34 |     ax.set_xlabel('时间')
 35 |     ax.set_ylabel('振幅')
 36 |     waveform_plot = FigureCanvasTkAgg(fig, window)
 37 |     waveform_plot.get_tk_widget().grid(row=2, column=0)
 38 | 
 39 | def extract_features():
 40 |     global wav_file, extracted_features
 41 |     rate, data = wavfile.read(wav_file)
 42 |     extracted_features = extract_features_from_audio(data, rate)
 43 | 
 44 | def extract_features_from_audio(audio_data, audio_fs):
 45 |     # 预处理参数
 46 |     target_sample_rate = 16000  # 重采样目标采样率（Hz）
 47 |     frame_length = 0.025  # 帧长度（s）
 48 |     frame_overlap = 0.01  # 帧重叠（s）
 49 | 
 50 |     # 重采样
 51 |     if audio_fs != target_sample_rate:
 52 |         audio_data = librosa.resample(audio_data.astype(np.float32), orig_sr=audio_fs,
 53 |                                       target_sr=target_sample_rate)
 54 |         audio_fs = target_sample_rate
 55 | 
 56 |     # 转换为单声道
 57 |     if audio_data.ndim > 1:
 58 |         audio_data = np.mean(audio_data, axis=1)
 59 | 
 60 |     # 提取音色特征 (MFCC)
 61 |     mfccs = mfcc(audio_data, audio_fs)
 62 | 
 63 |     # 提取音高特征
 64 |     pitch_values = librosa.yin(audio_data, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
 65 |     pitch_values = np.mean(pitch_values) * np.ones((mfccs.shape[0], 1))
 66 | 
 67 |     # 计算音强特征
 68 |     frame_length_samples = int(round(frame_length * audio_fs))
 69 |     frame_overlap_samples = int(round(frame_overlap * audio_fs))
 70 |     rms_window = hamming(frame_length_samples, sym=False)
 71 |     frame_starts = np.arange(0, len(audio_data) - frame_length_samples + 1, frame_overlap_samples)
 72 |     rms_values = np.zeros((len(frame_starts), 1))
 73 |     for i in range(len(frame_starts)):
 74 |         frame = audio_data[frame_starts[i]:frame_starts[i] + frame_length_samples]
 75 |         rms_values[i] = np.sqrt(np.mean(frame ** 2))
 76 |     rms_values = rms_values[:mfccs.shape[0], :]
 77 | 
 78 |     # 将所有特征裁剪为相同的长度
 79 |     min_len = min(mfccs.shape[0], rms_values.shape[0], pitch_values.shape[0])
 80 |     mfccs = mfccs[:min_len, :]
 81 |     rms_values = rms_values[:min_len, :]
 82 |     pitch_values = pitch_values[:min_len, :]
 83 | 
 84 |     # 提取持续时间特征
 85 |     duration_value = len(audio_data) / audio_fs
 86 | 
 87 |     # 将特征堆叠在一起
 88 |     stacked_features = np.hstack((pitch_values, mfccs, rms_values, np.full((mfccs.shape[0], 1), duration_value)))
 89 | 
 90 |     return stacked_features
 91 | 
 92 | def plot_features():
 93 |     global extracted_features
 94 | 
 95 |     # 获取各种特征的长度
 96 |     pitch_length = len(extracted_features[:, 0])
 97 |     mfcc_length = extracted_features.shape[1] - 2 - 1
 98 |     rms_length = len(extracted_features[:, -2])
 99 |     duration_length = len(extracted_features[:, -1])
100 | 
101 |     # 创建一个带有4个子图的画布
102 |     fig, axes = plt.subplots(4, 1, figsize=(12, 16))
103 | 
104 |     # 绘制音高特征
105 |     axes[0].plot(np.arange(pitch_length), extracted_features[:, 0], color='blue', label='Pitch')
106 |     axes[0].set_title('音高特征')
107 |     axes[0].set_xlabel('时间')
108 |     axes[0].set_ylabel('频率')
109 |     axes[0].legend()
110 | 
111 |     # 绘制音色特征 (MFCC)
112 |     img = axes[1].imshow(extracted_features[:, 1:1+mfcc_length].T, origin='lower', aspect='auto', cmap='viridis')
113 |     axes[1].set_title('音色特征')
114 |     axes[1].set_xlabel('时间')
115 |     axes[1].set_ylabel('MFCC系数')
116 |     fig.colorbar(img, ax=axes[1], label='MFCC Value')
117 | 
118 |     # 绘制音强特征
119 |     axes[2].plot(np.arange(rms_length), extracted_features[:, -2], color='green', label='RMS')
120 |     axes[2].set_title('音强特征')
121 |     axes[2].set_xlabel('时间')
122 |     axes[2].set_ylabel('均方根振幅')
123 |     axes[2].legend()
124 | 
125 |     # 绘制持续时间特征
126 |     axes[3].bar(np.arange(duration_length), extracted_features[:, -1], color='red', label='Duration')
127 |     axes[3].set_title('持续时间特征')
128 |     axes[3].set_xlabel('时间')
129 |     axes[3].set_ylabel('持续时间')
130 |     axes[3].legend()
131 | 
132 |     # 调整布局并显示图像
133 |     plt.tight_layout()
134 |     plt.show()
135 | 
136 | def detect_emotion():
137 |     global model, extracted_features
138 | 
139 |     # 预处理特征以匹配模型输入
140 |     input_features = np.expand_dims(extracted_features, axis=0)  # 增加批次维度
141 | 
142 |     # 调整输入特征的形状
143 |     n_features = extracted_features.shape[1]
144 |     input_features = input_features.reshape(-1, n_features, 1)
145 | 
146 |     # 预测情感类别概率
147 |     emotion_probabilities = model.predict(input_features)[0]
148 | 
149 |     # 获取最大概率对应的情感标签
150 |     emotions = ['anger', 'disgust', 'fear', 'happiness', 'sadness', 'surprise']
151 |     predicted_emotion = emotions[np.argmax(emotion_probabilities)]
152 | 
153 |     # 显示预测结果
154 |     tk.messagebox.showinfo('预测结果', f'预测情感: {predicted_emotion}\n各类别概率: {emotion_probabilities}')
155 | 
156 | # 创建主窗口
157 | window = tk.Tk()
158 | window.title('语音情感识别')
159 | 
160 | # 创建并放置按钮
161 | load_model_button = tk.Button(window, text='加载模型', command=lambda: load_saved_model(filedialog.askopenfilename(filetypes=[('HDF5 files', '*.h5')])))
162 | load_model_button.grid(row=0, column=0)
163 | 
164 | load_wav_button = tk.Button(window, text='加载WAV文件', command=load_wav_file)
165 | load_wav_button.grid(row=1, column=0)
166 | 
167 | extract_features_button = tk.Button(window, text='提取特征', command=extract_features)
168 | extract_features_button.grid(row=3, column=0)
169 | 
170 | plot_features_button = tk.Button(window, text='显示特征', command=plot_features)
171 | plot_features_button.grid(row=4, column=0)
172 | 
173 | detect_emotion_button = tk.Button(window, text='检测情感', command=detect_emotion)
174 | detect_emotion_button.grid(row=5, column=0)
175 | 
176 | # 运行主循环
177 | window.mainloop()
178 | 


--------------------------------------------------------------------------------