用python呼叫百度語音識別api批量處理本地語音檔案

Mr.Hou(2015)發表於2020-11-08

需要對本地的錄音檔案(.wav格式)進行語音識別,因阿里賬號的試用版已過期,所以嘗試通過百度的api進行識別,程式碼如下:

#匯入需要用到的庫
import os
import re
import requests
import time
import base64
import webbrowser
import xlrd
import pandas as pd
from copy import deepcopy

#需要安裝百度api的庫 pip install baidu_aip
from aip import AipSpeech

#獲取資料夾下所有的錄音檔案
allfiles = os.listdir(r'你的錄音檔案存放的資料夾地址')

#將讀取的錄音檔案路徑存放在一個列表
filesList = []
for i in allfiles:
    f_adress = r'你的錄音檔案存放的資料夾地址\' + i
    filesList.append(f_adress)

def get audio(file):
"""用於讀取檔案“”“
    with open(file,'rb') as f:
        data = f.read()
    return data

def getToken(HOST):
    """獲取token"""
    r = requests.get(HOST).text
    evalr = eval(r)
    accesstoken = evalr['access_token']
    return evalr_accesstoken

#定義引數
dev_pid = 1637
framerate = 16000  # 取樣率
num_samples = 2000  # 取樣點
channels = 1  # 聲道
sampwidth = 2  # 取樣寬度2bytes

base_url =  "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"

#在百度控制檯上建立識別任務,可以獲得下面的ID和KEY,領取免費額度(有效期6個月,只有2萬次,用完之後估計就要付費了)
APP_ID = '*******'
API_KEY = '*************'
SECRET_KEY = '*********'

APIKey = API_KEY
SecretKey = SECRET_KEY
HOST = base_url % (APIKey, SecretKey)
FORMAT = 'wav'
RATE = '16000'
CHANNEL = 1
#CUID = '*******'
CUID = APP_ID   

def speech_to_text(fpath,HOST):
    speech_data = get_audio(fpath)
    speech = base64.b64encode(speech_data).decode('utf-8')
    token = getToken(HOST)
    
    data = {
        'format':FORMAT,
        'rate':RATE,
        'channel':CHANNEL,
        'cuid':CUID,
        'len':len(speech_data),
        'speech':speech,
        'token':token,
        'dev_pid':dev_pid
    }
    
    url = 'https://vop.baidu.com/server_api'
    headers = {'Content-Type':'application/json'
    r = requests.post(url,json = data, headers = headers)
    Result = r.json()
    try:
        result_text = Result['result']
        err_msg = Result['err_msg']
    except:
        result_text = ['異常']
        err_msg = "異常"
    res = {
        "result_text":result_text,
        "err_msg":err_msg
        }
    return res

#批量進行識別儲存
Result_text0 = []
Result_text1 = []
err_msg = []
allfiles_da = []
testcount = 0

for fs in fileList:
    try:
        
        yuyins = speech_to_text(fs,HOST)
        Result_text1.append(yuyins["result_text"])
        try:
            Result_text0.append(yuyins["result_text"][0])
        except:
            Result_text0.append("")
        err_msg.append(yuyins["err_msg"])
        allfiles_da.append(allfiles[testcount])
    except:
        print("全部異常")
        Result_text0.append("全部異常")
        Result_text1.append("全部異常")
        err_msg.append("全部異常")
        allfiles_da.append(allfiles[testcount])
        print("全部異常")
    if testcount % 10 == 0: #沒識別10個儲存一次
        f1 = pd.DataFrame({'allfiles_da':allfiles_da,'Result_text1':Result_text1,'Result_text':Result_text0,'err_msg':err_msg})
        f1.to_excel(r'你需要儲存的路徑',index=False)
    print("正在識別第{}個".format(testcount))
    testcount += 1









 

相關文章