目录
- 麦克风获取
- 设置参数
- 读取数据块
- 阿里语音识别模型加载
- 阿里语音识别
- 整体代码
python麦克风获取音频并识别
麦克风获取
# 打开麦克风流 stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
设置参数
# 录音参数 FORMAT = pyaudio.paInt16 # 16-bit resolution CHANNELS = 1 # 单声道 RATE = 16000 # 采样率 16kHz CHUNK = 1024 # 数据块大小 RECORD_SECONDS = 5 # 录制时长 (秒) WAVE_OUTPUT_FILENAME = "output.wav"
读取数据块
# 循环读取数据块 for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data)
阿里语音识别模型加载
paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn" fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large" cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common" model = AutoModel(model=paraformer_path, model_revision="v2.0.4", vad_model=fsmn_path, vad_model_revision="v2.0.4", punc_model=ct_punc_path, punc_model_revision="v2.0.4", spk_model=cam_path, spk_model_revisionwww.devze.com="v2.0.2", device="cpu" )
阿里语音识别
res = model.generate(input=WAVE_OUTPUT_FILENAME, BATch_size_s=16000, hotword='魔搭')
整体代码
import pyaudio import wave import threading import keyboard from funasr import AutoModel #javascript 录音参数 FORMAT = pyaudio.paInt16 # 16-bit resolution CHANNELS = 1 # 单声道 RATE = 16000 # 采样率 16kHz CHUNK = 1024 # 数据块大小 WAVE_OUTPUT_FILENAME = "./wav_data/output.wav" # 初始化 PyAudio audio = pyaudio.PyAudio() frames = [] stphpream = None recording = False paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn" fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" ct_punc_pajavascriptth = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large" cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common" model = AutoModel(model=paraformer_path, model_revision="v2.0.4", vad_model=fsmn_path, vad_model_revision="v2.0.4", punc_model=ct_punc_path, punc_model_revision="v2.0.4", spk_model=cam_path, spk_model_revision="v2.0.2", device="cpu" ) print("加载模型完成!!!") def start_recording(): """ 开始录音 """ global stream, recording if not recording: print("开始录音...") recording = True stream = audio.open(format=FORMAT, php channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) record_thread = threading.Thread(target=record_audio) record_thread.start() def stop_recording(): """ 停止录音并进行识别 """ global stream, recording if recording: print("录音结束.") recording = False stream.stop_stream() stream.close() save_audio() audio.terminate() print("开始识别!!!") res = model.generate(input=WAVE_OUTPUT_FILENAME, batch_size_s=16000, hotword='魔搭') print("识别结束!!!") print("识别结果:", res) def record_audio(): """ 录音功能实现 """ while recording: data = stream.read(CHUNK) frames.append(data) def save_audio(): """ 保存录音文件 """ wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(audio.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() # 监听 Ctrl 键开始录音 keyboard.add_hotkey('ctrl', start_recording) # 监听 Alt 键结束录音 keyboard.add_hotkey('alt', stop_recording) print("按 Ctrl 开始录音,按 Alt 结束录音") keyboard.wait() # 保持程序运行
到此这篇关于使用Python实现从麦克风获取音频并识别的文章就介绍到这了,更多相关Python麦克风获取音频内容请搜索编程客栈(www.devze.com)以前的文章或继续浏览下面的相关文章希望大家以后多多支持编程客栈(www.devze.com)!
精彩评论