Bing Speech微軟必應語音認知服務-文字語音互轉

山谷大叔發表於2017-09-06

原文網址 : https://blog.csdn.net/shanguuncle/article/details/77867424

微軟

本專案使用 Bing Speech API, Bot Framework, LUIS 實現在Unity內的中文語音文字互轉，還可以在UWP應用完成語義分析。

1.新增必應語音 API 到你的訂閱，地址  ，點選建立，並獲取API金鑰；

2.修改 SpeechManager.cs的Subscription Key；

3.進入 LUIS管理後臺，新增LUIS App　　

4、獲取 LUIS App Id 和 LUIS Endpoint Key

5.修改SpeechToText.cs的App Id

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;
using System.IO;
using System;

public class TextToSpeech : Singleton<TextToSpeech>
{
    private static string SSML = "<speak version='1.0' xml:lang='zh-CN'><voice xml:lang='zh-CN' xml:gender='Male' name='Microsoft Server Speech Text to Speech Voice (zh-CN, Kangkang, Apollo)'>{0}</voice></speak>";
    AudioSource audioSource;
    public InputField inputText;
    // Use this for initialization
    void Start () {
        audioSource = gameObject.GetComponent<AudioSource>();
    }
	
	// Update is called once per frame
	void Update () {
		
	}

    /// <summary>
    /// 使用bing speech api,將文字轉為中文語音
    /// </summary>
    /// <param name="text"></param>
    /// <returns></returns>
    public IEnumerator<object> TextToAudio(string text)
    {
        string requestUri = "https://speech.platform.bing.com/synthesize";
        byte[] buffer = System.Text.Encoding.UTF8.GetBytes(string.Format(SSML, text));
        var headers = new Dictionary<string, string>() {
            { "Authorization", "Bearer " + SpeechManager.Instance.GetToken() },
            { "Content-Type", @"application/ssml+xml" },
            { "X-Microsoft-OutputFormat", @"riff-16khz-16bit-mono-pcm"},
            { "X-Search-AppId", Guid.NewGuid().ToString().Replace("-", "")},
            { "X-Search-ClientID", Guid.NewGuid().ToString().Replace("-", "")},
            { "User-Agent", "TTSHololens"}
        };
        audioSource.Stop();
        WWW www = new WWW(requestUri, buffer, headers);
        yield return www;
        audioSource.clip = www.GetAudioClip(false, true, AudioType.WAV);
        audioSource.Play();
    }

    public void SpeakText(string text)
    {
        StartCoroutine(TextToAudio(text));
    }
    public void SpeakText()
    {
        StartCoroutine(TextToAudio(inputText.text));
    }
}

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System.IO;
using System;
using BotClient;
using UnityEngine.UI;
#if WINDOWS_UWP
using System.Threading.Tasks;
#endif

public class SpeechToText :Singleton<SpeechToText>
{
    public int messageLength = 3;      //錄音時間，單位：秒
    private bool recording = false;
    private static string deviceName = string.Empty;  //microphone裝置名稱
    private int samplingRate;          //取樣率
    private AudioClip audioClip;

    BotService botService;

    private AudioSource audioSource;

    void Start () {
        int unUsed;
        Microphone.GetDeviceCaps(deviceName, out unUsed, out samplingRate);
        botService = new BotService();
        audioSource = gameObject.GetComponent<AudioSource>();
#if WINDOWS_UWP
        botService.StartConversation();
#endif
    }

    void Update () {
        if (recording && !Microphone.IsRecording(deviceName))
        {
            RecordStop();
        }
	}


    public bool IsRecording()
    {
        return recording;
    }

    /// <summary>
    /// 使用Bing Speech API，將語音檔案轉成text
    /// </summary>
    /// <param name="filepath"></param>
    /// <returns></returns>
    private IEnumerator<object> AudioToText(string filepath)
    {
        string requestUri = "https://speech.platform.bing.com/recognize";
        requestUri += @"?scenarios=smd";
        requestUri += @"&appid=xxxxxxxxxxxxxxxxxxxxxxxxxxxxx";//你的APP ID.
        requestUri += @"&locale=zh-CN";
        requestUri += @"&device.os=win10";
        requestUri += @"&version=3.0";
        requestUri += @"&format=json";
        requestUri += @"&instanceid=565D69FF-E928-4B7E-87DA-9A750B96D9E3";
        requestUri += @"&requestid=" + Guid.NewGuid().ToString();

        FileStream fs = null;
        using (fs = new FileStream(filepath, FileMode.Open, FileAccess.Read))
        {
            byte[] buffer = null;
            buffer = new Byte[(int)fs.Length];
            fs.Read(buffer, 0, buffer.Length);
            var headers = new Dictionary<string, string>() {
                { "Authorization", "Bearer " + SpeechManager.Instance.GetToken()},
                { "Content-Type", @"audio/wav; codec=""audio/pcm""; samplerate=16000" }
            };
            WWW www = new WWW(requestUri, buffer, headers);

            yield return www;
            try
            {
                string result = www.text;
                JSONObject jsonObj = new JSONObject(result);
                string resultStr = jsonObj.GetField("header").GetField("name").str;
                resultStr = TrimResultStr(resultStr);
                SpeechContronller.Instance.SetResponseText("口令：" + resultStr);
                TextToSpeech.Instance.SpeakText(resultStr);
                SpeechContronller.Instance.tipText.text ="";
#if WINDOWS_UWP
                SendMessage(resultStr);
#endif
            }
            catch
            {
               SpeechContronller.Instance.tipText.text="對不起，沒聽清";
               TextToSpeech.Instance.SpeakText("對不起，沒聽清");
            }
        }
    }
    //使用BotService進行語義分析(uwp平臺執行)
#if WINDOWS_UWP
    private async void SendMessage(string message)
    {
        string result = "對不起，無法回答您的問題";
        if (await botService.SendMessage(message))
        {
            ActivitySet messages = await botService.GetMessages();
            if (messages != null)
            {
                for (int i = 1; i < messages.activities.Length; i++)
                {
                    result = messages.activities[i].text;
                }
            }
        }
        UnityEngine.WSA.Application.InvokeOnAppThread(() =>
        {
            //把返回的文字讀出來
            TextToSpeech.Instance.SpeakText(result);
        }, false); 
    } 
#endif

    /// <summary>
    /// 對Speech API返回的結果進行處理，去除最後的句號，防止影響結果
    /// </summary>
    /// <param name="result"></param>
    /// <returns></returns>
    private string TrimResultStr(string result)
    {
        string resultStr = result;
        if (resultStr != null)
        {
            int index = resultStr.LastIndexOf("。");
            if (index > 0)
            {
                resultStr = resultStr.Remove(index, 1);
            }
        }
        return resultStr;
    }

    /// <summary>
    /// 開始錄音
    /// </summary>
    public void Record()
    {
        recording = true;
        audioSource.Stop();

        SpeechContronller.Instance.SetMicrophoneIcon(true);
        SpeechContronller.Instance.SetTipText("正在聆聽中");
        SpeechContronller.Instance.SetResponseText("");

        if (Microphone.IsRecording(deviceName))
        {
            return;
        }
        audioClip = StartRecording();
    }

    /// <summary>
    /// 停止錄音,將語音儲存成檔案
    /// </summary>
    public void RecordStop()
    {
        recording = false;

        SpeechContronller.Instance.SetMicrophoneIcon(false);
        SpeechContronller.Instance.SetTipText("思考中，請稍候");

        StopRecording();
        string filename = "myfile.wav";
        var filepath = Path.Combine(Application.persistentDataPath, filename);
        SavWav.Save(filename, audioClip);
        StartCoroutine(AudioToText(filepath));
    }

    /// <summary>
    /// 開始錄音
    /// </summary>
    /// <returns></returns>
    private AudioClip StartRecording()
    {
        return Microphone.Start(deviceName, false, messageLength, 16000);
    }

    /// <summary>
    /// 停止錄音
    /// </summary>
    private void StopRecording()
    {
        if (Microphone.IsRecording(deviceName))
        {
            Microphone.End(deviceName);
        }
    }
}

工程檔案地址

http://download.csdn.net/download/shanguuncle/9967050

參考資料

Bing Speech API文件
 Bot Framework API文件

chrome語音文字互轉
2024-11-04
Chrome
語音轉文字工具，語音轉文字怎樣轉？
2019-06-12
Mac字轉語音工具Speech for Mac
2022-08-17
Mac
構建一個語音轉文字的WebApi服務
2023-12-07
WebAPI
Fish Speech 更新V1.5：領先的多語言文字轉語音模型
2024-12-06
模型
如何用Python語音合成，以及文字轉語音~
2022-09-23
Python
iOS---語音轉文字
2018-05-26
iOS
文字轉語音軟體 VPot v2411
2024-11-22
[js常用]文字轉化成語音
2018-12-01
JS
微軟利用AI技術使文字轉語音只需20分鐘
2019-05-28
微軟AI
AVFoundation 文字轉語音和音訊錄製播放
2019-04-19
音訊
PR語音轉字幕轉換外掛Speech to Text for Premiere Pro 2022
2023-02-06
REM
C# TTS-文字轉語音
2018-03-28
C#TTS
蘋果手機文字轉語音方法
2019-01-04
蘋果
Windows部署語音轉文字專案_Whisper
2024-07-04
Windows
前端語音轉文字實踐總結
2022-05-19
前端
文字到語音(tts)
2024-05-02
TTS
口播神器,基於Edge,微軟TTS(text-to-speech)文字轉語音免費開源庫edge-tts實踐(Python3.10)
2023-03-07
微軟TTSPython
使用科大訊飛語音轉文字的服務進行電話錄音分析
2020-02-16
5 款不錯的開源語音識別/語音文字轉換系統
2019-06-22
文字語音互相轉換系統設計
2024-04-24
gTTS: 強大的Python文字轉語音庫
2024-10-18
TTSPython
樹莓派語音互動--語音輸入識別
2020-11-24
樹莓派
雲知聲推出音庫定製服務，“硬核”智慧語音技術再上新高
2022-01-18
HTML5語音合成Speech Synthesis API簡介
2019-03-08
HTMLAPI
#801 - 認知協作#1：語音助手Webex Assistant
2019-11-30
Web
有這5款開源軟體，語音轉文字很簡單！
2020-04-06
aardio實現語音閱讀文字【包含選擇語音庫】
2024-08-02
語音轉文字從裡面擷取出時間
2018-11-02
如何在Python中將語音轉換為文字
2020-07-29
Python
語音互動的前世今生
2018-06-12
快速實現語音轉文字，還自帶翻譯
2019-06-25
Premiere Pro 2022離線語音轉文字教程，圖文！
2022-03-10
REM
搜狗輸入法語音轉文字型驗報告
2020-11-26
利用FastAPI和OpenAI-Whisper打造高效的語音轉錄服務
2024-07-17
ASTAPIOpenAI
ChatTTS,語氣韻律媲美真人的開源TTS模型,文字轉語音界的新魁首,對標微軟Azure-tts
2024-05-31
TTS模型微軟
微軟成功收購語音技術公司Nuance
2022-03-08
微軟
OpenAI Java SDK——chatgpt-java-v1.0.3更新支援GPT-3.5-Turbo，支援語音轉文字，語音翻譯。
2023-03-03
OpenAIJavaChatGPT
TTS 擂臺: 文字轉語音模型的自由搏擊場
2024-03-16
TTS模型

Bing Speech微軟必應語音認知服務-文字語音互轉

1.新增必應語音 API 到你的訂閱，地址 ，點選建立，並獲取API金鑰；

2.修改 SpeechManager.cs的Subscription Key；

3.進入 LUIS管理後臺 ，新增LUIS App

4、獲取 LUIS App Id 和 LUIS Endpoint Key

參考資料

相關文章

1.新增必應語音 API 到你的訂閱，地址，點選建立，並獲取API金鑰；

3.進入 LUIS管理後臺，新增LUIS App