python调用百度语音识别实现大音频文件语音识别功能
作者:septwolves2015 发布时间:2023-11-29 00:59:53
标签:python,语音识别,百度语音
本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下
实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。
支持的时长(30秒和60秒2种,本程序用的是30秒)。
# coding: utf-8
import json
import time
import base64
from inc import rtysdb
import urllib2
import requests
import os
import uuid
from inc import db_config
class BaiduRest:
def __init__(self, cu_id, api_key, api_secert):
self.token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
self.getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
self.upvoice_url = 'http://vop.baidu.com/server_api'
self.cu_id = cu_id
self.get_token(api_key, api_secert)
return
def get_token(self, api_key, api_secert):
token_url = self.token_url % (api_key, api_secert)
r_str = urllib2.urlopen(token_url).read()
token_data = json.loads(r_str)
self.token_str = token_data['access_token']
return True
# 语音合成
def text2audio(self, text, filename):
get_url = self.getvoice_url % (urllib2.quote(text), self.cu_id, self.token_str)
voice_data = urllib2.urlopen(get_url).read()
voice_fp = open(filename, 'wb+')
voice_fp.write(voice_data)
voice_fp.close()
return True
##语音识别
def audio2text(self, filename):
data = {}
data['format'] = 'wav'
data['rate'] = 8000
data['channel'] = 1
data['cuid'] = self.cu_id
data['token'] = self.token_str
wav_fp = open(filename, 'rb')
voice_data = wav_fp.read()
data['len'] = len(voice_data)
# data['speech'] = base64.b64encode(voice_data).decode('utf-8')
data['speech'] = base64.b64encode(voice_data).replace('\n', '')
# post_data = json.dumps(data)
result = requests.post(self.upvoice_url, json=data, headers={'Content-Type': 'application/json'})
data_result = result.json()
if(data_result['err_msg'] == 'success.'):
return data_result['result'][0]
else:
return False
def test_voice(voice_file):
api_key = "vossGHIgEETS6IMRxBDeahv8"
api_secert = "3c1fe6a6312f41fa21fa2c394dad5510"
bdr = BaiduRest("0-57-7B-9F-1F-A1", api_key, api_secert)
# 生成
#start = time.time()
#bdr.text2audio("你好啊", "out.wav")
#using = time.time() - start
#print using
# 识别
#start = time.time()
result = bdr.audio2text(voice_file)
# result = bdr.audio2text("weather.pcm")
#using = time.time() - start
return result
def get_master_audio(check_status='cut_status'):
if check_status == 'cut_status':
sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0"
elif check_status == 'finished_status':
sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0"
else:
return False
data = rtysdb.select_data(sql,'more')
if data:
return data
else:
return False
def go_recognize(master_id):
section_path = db_config.SYS_PATH
sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id)
#print sql
record = rtysdb.select_data(sql,'more')
#print record
if not record:
return False
for rec in record:
#print section_path+'/'+rec[1]
voice_file = section_path+'/'+rec[2]
if not os.path.exists(voice_file):
continue
result = test_voice(voice_file)
print result
exit(0)
if result:
#rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1})
sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result,1,rec[0]) #print sql
rtysdb.do_exec_sql(sql)
parent_content = rtysdb.select_data("SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[1]))
#print parent_content
if parent_content:
new_content = parent_content[1]+result
update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[1])
rtysdb.do_exec_sql(update_content_sql)
else:
rtysdb.do_exec_sql("update ocenter_section set status='%d' where id=%d" % (result,1,rec[0]))
time.sleep(5)
else:
rtysdb.do_exec_sql("UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id))
#对百度语音识别不了的音频文件进行转换
def ffmpeg_convert():
section_path = db_config.SYS_PATH
#print section_path
used_audio = get_master_audio('cut_status')
#print used_audio
if used_audio:
for audio in used_audio:
audio_path = section_path+'/'+audio[1]
new_audio = uuid.uuid1()
command_line = "ffmpeg -i "+audio_path +" -ar 8000 -ac 1 -f wav "+section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav";
#print command_line
os.popen(command_line)
if os.path.exists(section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav"):
convert_name = "Uploads/Convert/convert_" + str(new_audio) +".wav"
ffmpeg_cut(convert_name,audio[3],audio[0])
sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[0])
rtysdb.do_exec_sql(sql)
#将大音频文件切成碎片
def ffmpeg_cut(convert_name,sharps,master_id):
section_path = db_config.SYS_PATH
if sharps>0:
for i in range(0,sharps):
timeArray = time.localtime(i*30)
h = time.strftime("%H", timeArray)
h = int(h) - 8
h = "0" + str(h)
ms = time.strftime("%M:%S",timeArray)
start_time = h+':'+str(ms)
cut_name = section_path+'/'+convert_name
db_store_name = "Uploads/Section/"+str(uuid.uuid1())+'-'+str(i+1)+".wav"
section_name = section_path+"/"+db_store_name
command_line = "ffmpeg.exe -i "+cut_name+" -vn -acodec copy -ss "+start_time+" -t 00:00:30 "+section_name
#print command_line
os.popen(command_line)
data = {}
data['rid'] = master_id
data['url'] = db_store_name
data['create_time'] = int(time.time())
data['status'] = 0
rtysdb.insert_one('ocenter_section',data)
if __name__ == "__main__":
ffmpeg_convert()
audio = get_master_audio('finished_status')
if audio:
for ad in audio:
go_recognize(ad[0])
来源:https://blog.csdn.net/septwolves2015/article/details/78554524
0
投稿
猜你喜欢
- javascript代码编写在页面中实现页内搜索功能,类似Word等文本编辑软件里的搜索功能,只要是页面中的字符(别管是显在的还是隐蔽在文本
- ‘Microsoft OLE DB Provider for ODBC Drivers (0x80004005) [Microsoft][O
- 关于IE9,微软逐渐提供越来越多的内容。很多人想知道IE9有多少模式和渲染引擎,今天IE项目主管Marc Silbey就此问题进行了解答。S
- php中session_id()函数原型及说明session_id()函数说明:stringsession_id([string$id])s
- 1.漏洞介绍在XHTML 1.0标准下,使用特殊构造的CSS样式,在Internet Explorer 7.0
- 基本信息程序包名称:基于.net开发的遵循web标准的个人站点程序包下载开发者主页:www.ceocio.net软件大小:1.84mb安装环
- Python支持多种图形界面的第三方库,包括:wxWidgetsQtGTKTkinter: Tkinter 模块(Tk 接口)是 Pytho
- 一、图像二值化1.效果2.源码import cv2import numpy as npimport matplotlib.pyplot as
- #/usr/bin/env python#-*- coding:utf-8 -*-"""1.解析 cronta
- 全文索引在 MySQL 中是一个 FULLTEXT 类型索引。FULLTEXT 索引用于 MyISAM 表,可以在 CREATE TABLE
- 对于Dreamweaver这个广大用户早有听闻的网页编辑工具,除功能强大外,Dreamweaver一向被推崇为同类网页编辑软件中产生垃圾代码
- documentFragment 是一個無父對象的document對象.他支持以下DOM2方法:appendChild, cloneNode
- \\create by ahuinan 2009-6-22 \\up by ahuian 2009-6-23 \\up by ahuinan
- 前言说到二手房信息,不知道你们心里最先跳出来的公司(网站)是什么,反正我心里第一个跳出来的是网站是 58 同城。哎呦,我这暴脾气,想到就赶紧
- 获取首页元素信息:目标 test_URL:http://www.xxx.com.cn/首先检查元素,a 标签下是我们需要爬取得链接,通过获取
- 前言其实Beautiful Soup 模块除了能够搜索和导航之外,还能够修改 HTML/XML 文档的内容。这就意味着能够添加或删除标签、修
- 事件流浏览器中的事件流意味着页面上可有不仅一个,甚至多个元素响应同一个事件。而这一个或多个元素响应事件发生的先后顺序在各个浏览器(主要针对I
- 我们到目前为止所谈到的SQL语句相对较为简单,如果再能通过标准的recordset循环查询,那么这些语句也能满足一些更复杂的要求。不过,何必
- 由于分形树具有对称性,自相似性,所以我们可以用递归来完成绘制。只要确定开始树枝长、每层树枝的减短长度和树枝分叉的角度,我们就可以把分形树画出
- 本文实例为大家分享了JS实现canvas简单小画板的具体代码,供大家参考,具体内容如下Html部分:<!DOCTYPE html>