-
Notifications
You must be signed in to change notification settings - Fork 69
/
Copy pathAudioToText.py
140 lines (128 loc) · 4.67 KB
/
AudioToText.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# 使用百度api 语音转文字功能 [对源语音要求太高了] 本文件代码运行有问题不要联系我,告诉你环境问题,整这个环境很花时间,一时半会根本说不清,自行google搜索解决
import requests as req
import json,base64,os
import speech_recognition as sr
from pocketsphinx import AudioFile
from Speech_and_Text import speech_to_text_cmu
import ffmpeg
baiduaudioappapiid='xxx' #文字转语音的apiid
baiduaudioappAPI_Key='xxx'
baiduaudioappSecret_Key='xxx'
#获取百度对应的appid的token
def getbaiduApiToken(grant_type,API_Key,Secret_Key):
try:
host = f'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={API_Key}&client_secret={Secret_Key}'
response = req.get(host)
if response:
return response.json()['access_token']
except BaseException as b:
print(b)
return None #wg
#使用百度的语音识别
def audioTotextForBaidu(audiofile):
try:
if os.path.exists(audiofile):
pass
else:
return None
token=getbaiduApiToken('client_credentials',baiduaudioappAPI_Key,baiduaudioappSecret_Key)
if token is None:
return None
print(token)
filetext=''
with open(audiofile,'rb') as fr:
filetext = fr.read() #将读取的二进制内容转成base64
filelen = len(filetext)
base64text=base64.b64encode(filetext)
if str(base64text).strip() =='' or base64text is None:
return None
url='http://vop.baidu.com/server_api'
header={'Content-Type':'application/json'}
postdata={
"format":"pcm",
"rate":16000,
#"dev_pid":1537,
"channel":1,
"token":token,
"cuid":'baidu_workshop', #用户唯一标识
"len":filelen,
"speech":base64text,
}
response=req.post(url=url,json=postdata,headers=header,timeout=60)
result=response.json()['result']
print(result)
return result
except BaseException as b :
print(b)
return b
#使用本地安装的speech_recognition 语音包[PocketSphinx]
def audioTotextForlocal_zh_CN(audiofile):
try:
r = sr.Recognizer()
# print(r)
wavFile = sr.AudioFile(audiofile)
# print(wavFile)
with wavFile as source:
audio = r.record(source)
# recognize speech using Sphinx
try:
return r.recognize_sphinx(audio, language="zh-CN")
except sr.UnknownValueError:
print("无法理解")
except sr.RequestError as e:
print("error; {0}".format(e))
except BaseException as b:
print(b)
# print("听起来像英文 " + r.recognize_sphinx(audio))
#使用本地安装的speech_recognition 语音包[PocketSphinx]
def audioTotextForlocal_en_US(audiofile): #使用本地安装的speech_recognition 语音包[PocketSphinx]
try:
r = sr.Recognizer()
# print(r)
wavFile = sr.AudioFile(audiofile)
# print(wavFile)
with wavFile as source:
audio = r.record(source)
# recognize speech using Sphinx
try:
return r.recognize_sphinx(audio, language="en-US")
except sr.UnknownValueError:
print("无法理解")
except sr.RequestError as e:
print("error; {0}".format(e))
except BaseException as b:
print(b)
#调用科大讯飞
def audioTotext_by_ifly():
from Speech_and_Text import speech_to_text_ifly
# 从文件读入
# speech_to_text_ifly(audio_path="path_of_audio", if_microphone=False)
# 从麦克风读入
speech_to_text_ifly(if_microphone=True)
#从麦克风读取声音转文字
def frommkaudiotoText_by_cmu(filename):
# 从文件读入
speech_to_text_cmu(audio_path=filename, if_microphone=False)
# 从麦克风读入
# speech_to_text_cmu(if_microphone=True)
#采样率转换 也达到压缩的效果
def cylchange(srcfile,targetfile,cyl):
try:
ffmpeg.input(srcfile).output(targetfile, ar=cyl).run()
except BaseException as b:
print(b)
return None
#本地语音文件float数据 转int
def local_audiofile_OnFloatToInt(srcfile):
pass
if __name__ == '__main__':
# audiofile_cn=u'./data/textToaudioTest.mp3'
audiofile_cn=u'./data/test.wav'
audiofile_en=u'./data/testfile.mp3'
# audioTotextForBaidu(audiofile_cn)
sample_rate=25050
print(audioTotextForlocal_zh_CN(audiofile_cn))
targetfile=u'./data/testfile'+str(sample_rate)+'.mp3'
cylchange(audiofile_en,targetfile,sample_rate)
print(audioTotextForlocal_en_US(audiofile_cn))
# frommkaudiotoText_by_cmu(audiofile_en)