├── .gitignore ├── README.md ├── part_voice_files ├── 12.wav ├── minute_part_wav_path.wav ├── ms_part_voice.wav └── second_part_wav_path.wav ├── run.py ├── voice_files ├── demo.pcm ├── demo.wav └── test.wav └── wavTools.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | .idea/ 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python_wav 2 | 对音频文件的处理:音频信息,读取内容,获取时长,切割音频,pcm与wav互转 3 | 4 | 文章简介:公众号-Python疯子 5 | https://mp.weixin.qq.com/s/Kw_n3RgYfZCn_0ZOJpaxHg 6 | 7 | ###### 获取音频信息 8 | ret = wav_infos(wav_path) 9 | print(ret) 10 | 11 | ###### 读取音频文件内容 12 | ret = read_wav(wav_path) 13 | print(ret) 14 | 15 | ###### 获取音频时长(单位秒) 16 | ret = get_wav_time(wav_path) 17 | print(ret) 18 | 19 | 20 | ###### 音频切片,获取部分音频 时间的单位是毫秒 21 | start_time = 13950 22 | end_time = 15200 23 | get_ms_part_wav(main_wav_path, start_time, end_time, part_wav_path) 24 | 25 | 26 | ###### 音频切片,获取部分音频 时间的单位是秒 27 | start_time = 35 28 | end_time = 38 29 | get_second_part_wav(main_wav_path, start_time, end_time, second_part_wav_path) 30 | 31 | ###### 音频切片,获取部分音频 时间的单位是分钟和秒 样式:0:12 32 | start_time = "0:35" 33 | end_time = "0:38" 34 | get_minute_part_wav(main_wav_path, start_time, end_time, minute_part_wav_path) 35 | 36 | 37 | ###### wav文件转为pcm文件 38 | wav_to_pcm(wav_path, pcm_path) 39 | 40 | ###### pcm文件转为wav文件 41 | pcm_to_wav(pcm_path, wav_path2) 42 | -------------------------------------------------------------------------------- /part_voice_files/12.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/silencesmile/python_wav/6032da475fac3cfd20178a9bb1cddefeb67b1927/part_voice_files/12.wav -------------------------------------------------------------------------------- /part_voice_files/minute_part_wav_path.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/silencesmile/python_wav/6032da475fac3cfd20178a9bb1cddefeb67b1927/part_voice_files/minute_part_wav_path.wav -------------------------------------------------------------------------------- /part_voice_files/ms_part_voice.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/silencesmile/python_wav/6032da475fac3cfd20178a9bb1cddefeb67b1927/part_voice_files/ms_part_voice.wav -------------------------------------------------------------------------------- /part_voice_files/second_part_wav_path.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/silencesmile/python_wav/6032da475fac3cfd20178a9bb1cddefeb67b1927/part_voice_files/second_part_wav_path.wav -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf8 -*- 2 | 3 | from wavTools import * 4 | 5 | wav_path = "./voice_files/demo.wav" 6 | pcm_path = "./voice_files/demo.pcm" 7 | wav_path2 = "./voice_files/test.wav" 8 | 9 | # 音频切割的文件路径 10 | main_wav_path = "./part_voice_files/12.wav" 11 | part_wav_path = "./part_voice_files/ms_part_voice.wav" 12 | second_part_wav_path = "./part_voice_files/second_part_wav_path.wav" 13 | minute_part_wav_path = "./part_voice_files/minute_part_wav_path.wav" 14 | 15 | # 获取音频信息 16 | ret = wav_infos(wav_path) 17 | print(ret) 18 | 19 | # 读取音频文件内容 20 | ret = read_wav(wav_path) 21 | print(ret) 22 | 23 | # 获取音频时长(单位秒) 24 | ret = get_wav_time(wav_path) 25 | print(ret) 26 | 27 | 28 | # 音频切片,获取部分音频 时间的单位是毫秒 29 | start_time = 13950 30 | end_time = 15200 31 | get_ms_part_wav(main_wav_path, start_time, end_time, part_wav_path) 32 | 33 | 34 | # 音频切片,获取部分音频 时间的单位是秒 35 | start_time = 35 36 | end_time = 38 37 | get_second_part_wav(main_wav_path, start_time, end_time, second_part_wav_path) 38 | 39 | # 音频切片,获取部分音频 时间的单位是分钟和秒 样式:0:12 40 | start_time = "0:35" 41 | end_time = "0:38" 42 | get_minute_part_wav(main_wav_path, start_time, end_time, minute_part_wav_path) 43 | 44 | 45 | # wav文件转为pcm文件 46 | wav_to_pcm(wav_path, pcm_path) 47 | 48 | # pcm文件转为wav文件 49 | pcm_to_wav(pcm_path, wav_path2) 50 | 51 | # 音频对应的波形图 52 | wav_waveform(wav_path) 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /voice_files/demo.pcm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/silencesmile/python_wav/6032da475fac3cfd20178a9bb1cddefeb67b1927/voice_files/demo.pcm -------------------------------------------------------------------------------- /voice_files/demo.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/silencesmile/python_wav/6032da475fac3cfd20178a9bb1cddefeb67b1927/voice_files/demo.wav -------------------------------------------------------------------------------- /voice_files/test.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/silencesmile/python_wav/6032da475fac3cfd20178a9bb1cddefeb67b1927/voice_files/test.wav -------------------------------------------------------------------------------- /wavTools.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf8 -*- 2 | ''' 3 | auth: Young 4 | 公众号:Python疯子 (Hold2Crazy) 5 | ''' 6 | import wave 7 | import contextlib 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | from scipy.io import wavfile 12 | from pydub import AudioSegment 13 | 14 | 15 | def wav_infos(wav_path): 16 | ''' 17 | 获取音频信息 18 | 19 | :param wav_path: 音频路径 20 | :return: [1, 2, 8000, 51158, 'NONE', 'not compressed'] 21 | 对应关系:声道,采样宽度,帧速率,帧数,唯一标识,无损 22 | ''' 23 | with wave.open(wav_path, "rb") as f: 24 | f = wave.open(wav_path) 25 | 26 | return list(f.getparams()) 27 | 28 | def read_wav(wav_path): 29 | ''' 30 | 读取音频文件内容:只能读取单声道的音频文件, 这个比较耗时 31 | 32 | :param wav_path: 音频路径 33 | :return: 音频内容 34 | ''' 35 | with wave.open(wav_path, "rb") as f: 36 | # 读取格式信息 37 | # 一次性返回所有的WAV文件的格式信息,它返回的是一个组元(tuple):声道数, 量化位数(byte单位), 采 38 | # 样频率, 采样点数, 压缩类型, 压缩类型的描述。wave模块只支持非压缩的数据,因此可以忽略最后两个信息 39 | params = f.getparams() 40 | nchannels, sampwidth, framerate, nframes = params[:4] 41 | 42 | # 读取声音数据,传递一个参数指定需要读取的长度(以取样点为单位) 43 | str_data = f.readframes(nframes) 44 | 45 | return str_data 46 | 47 | def get_wav_time(wav_path): 48 | ''' 49 | 获取音频文件是时长 50 | 51 | :param wav_path: 音频路径 52 | :return: 音频时长 (单位秒) 53 | ''' 54 | with contextlib.closing(wave.open(wav_path, 'r')) as f: 55 | frames = f.getnframes() 56 | rate = f.getframerate() 57 | duration = frames / float(rate) 58 | return duration 59 | 60 | 61 | def get_ms_part_wav(main_wav_path, start_time, end_time, part_wav_path): 62 | ''' 63 | 音频切片,获取部分音频 单位是毫秒级别 64 | 65 | :param main_wav_path: 原音频文件路径 66 | :param start_time: 截取的开始时间 67 | :param end_time: 截取的结束时间 68 | :param part_wav_path: 截取后的音频路径 69 | :return: 70 | ''' 71 | start_time = int(start_time) 72 | end_time = int(end_time) 73 | 74 | sound = AudioSegment.from_mp3(main_wav_path) 75 | word = sound[start_time:end_time] 76 | 77 | word.export(part_wav_path, format="wav") 78 | 79 | 80 | def get_second_part_wav(main_wav_path, start_time, end_time, part_wav_path): 81 | ''' 82 | 音频切片,获取部分音频 单位是秒级别 83 | 84 | :param main_wav_path: 原音频文件路径 85 | :param start_time: 截取的开始时间 86 | :param end_time: 截取的结束时间 87 | :param part_wav_path: 截取后的音频路径 88 | :return: 89 | ''' 90 | start_time = int(start_time) * 1000 91 | end_time = int(end_time) * 1000 92 | 93 | sound = AudioSegment.from_mp3(main_wav_path) 94 | word = sound[start_time:end_time] 95 | 96 | word.export(part_wav_path, format="wav") 97 | 98 | def get_minute_part_wav(main_wav_path, start_time, end_time, part_wav_path): 99 | ''' 100 | 音频切片,获取部分音频 分钟:秒数 时间样式:"12:35" 101 | 102 | :param main_wav_path: 原音频文件路径 103 | :param start_time: 截取的开始时间 104 | :param end_time: 截取的结束时间 105 | :param part_wav_path: 截取后的音频路径 106 | :return: 107 | ''' 108 | 109 | start_time = (int(start_time.split(':')[0])*60+int(start_time.split(':')[1]))*1000 110 | end_time = (int(end_time.split(':')[0])*60+int(end_time.split(':')[1]))*1000 111 | 112 | sound = AudioSegment.from_mp3(main_wav_path) 113 | word = sound[start_time:end_time] 114 | 115 | word.export(part_wav_path, format="wav") 116 | 117 | 118 | def wav_to_pcm(wav_path, pcm_path): 119 | ''' 120 | wav文件转为pcm文件 121 | 122 | :param wav_path:wav文件路径 123 | :param pcm_path:要存储的pcm文件路径 124 | :return: 返回结果 125 | ''' 126 | f = open(wav_path, "rb") 127 | f.seek(0) 128 | f.read(44) 129 | 130 | data = np.fromfile(f, dtype=np.int16) 131 | data.tofile(pcm_path) 132 | 133 | def pcm_to_wav(pcm_path, wav_path): 134 | ''' 135 | pcm文件转为wav文件 136 | 137 | :param pcm_path: pcm文件路径 138 | :param wav_path: wav文件路径 139 | :return: 140 | ''' 141 | f = open(pcm_path,'rb') 142 | str_data = f.read() 143 | wave_out=wave.open(wav_path,'wb') 144 | wave_out.setnchannels(1) 145 | wave_out.setsampwidth(2) 146 | wave_out.setframerate(8000) 147 | wave_out.writeframes(str_data) 148 | 149 | # 音频对应的波形图 150 | def wav_waveform(wave_path): 151 | ''' 152 | 音频对应的波形图 153 | :param wave_path: 音频路径 154 | :return: 155 | ''' 156 | file = wave.open(wave_path) 157 | # print('---------声音信息------------') 158 | # for item in enumerate(WAVE.getparams()): 159 | # print(item) 160 | a = file.getparams().nframes # 帧总数 161 | f = file.getparams().framerate # 采样频率 162 | sample_time = 1 / f # 采样点的时间间隔 163 | time = a / f # 声音信号的长度 164 | sample_frequency, audio_sequence = wavfile.read(wave_path) 165 | # print(audio_sequence) # 声音信号每一帧的“大小” 166 | x_seq = np.arange(0, time, sample_time) 167 | 168 | plt.plot(x_seq, audio_sequence, 'blue') 169 | plt.xlabel("time (s)") 170 | plt.show() 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | --------------------------------------------------------------------------------