好玩的github專案-golang實現的gmm-ubm演算法的說話人識別(聲紋識別)引擎

wwdyy發表於2016-11-05

轉自github使用者liuxp0827

簡介

govpr是golang 實現的基於 GMM-UBM 說話人識別引擎(聲紋識別),可用於語音驗證,身份識別的場景. 目前暫時僅支援漢語數字的語音,語音格式為wav格式(位元率16000,16bits,單聲道)

安裝

go get github.com/liuxp0827/govpr

示例

如下是一個簡單的示例. 可跳轉至 example 檢視詳細的例子,示例中的語音為純數字8位數字.語音驗證後得到一個得分,可設定閾值來判斷驗證語音是否為註冊訓練者本人.

package main

import (
    "github.com/liuxp0827/govpr"
    "github.com/liuxp0827/govpr/log"
    "github.com/liuxp0827/govpr/waveIO"
    "io/ioutil"
)

type engine struct {
    vprEngine *govpr.VPREngine
}

func NewEngine(sampleRate, delSilRange int, ubmFile, userModelFile string) *engine {
    return &engine{
        vprEngine: govpr.NewVPREngine(sampleRate, delSilRange, ubmFile, userModelFile),
    }
}

func (this *engine) DestroyEngine() {
    this.vprEngine = nil
}

func (this *engine) TrainSpeech(buffers [][]byte) error {

    var err error
    count := len(buffers)
    for i := 0; i < count; i++ {
        err = this.vprEngine.AddTrainBuffer(buffers[i])
        if err != nil {
            log.Error(err)
            return err
        }
    }

    defer this.vprEngine.ClearTrainBuffer()
    defer this.vprEngine.ClearAllBuffer()

    err = this.vprEngine.TrainModel()
    if err != nil {
        log.Error(err)
        return err
    }

    return nil
}

func (this *engine) RecSpeech(buffer []byte) error {

    err := this.vprEngine.AddVerifyBuffer(buffer)
    defer this.vprEngine.ClearVerifyBuffer()
    if err != nil {
        log.Error(err)
        return err
    }

    err = this.vprEngine.VerifyModel()
    if err != nil {
        log.Error(err)
        return err
    }

    Score := this.vprEngine.GetScore()
    log.Infof("vpr score: %f", Score)
    return nil
}

func main() {
    log.SetLevel(log.LevelDebug)

    vprEngine := NewEngine(16000, 50, "../ubm/ubm", "model/test.dat")
    trainlist := []string{
        "wav/train/01_32468975.wav",
        "wav/train/02_58769423.wav",
        "wav/train/03_59682734.wav",
        "wav/train/04_64958273.wav",
        "wav/train/05_65432978.wav",
    }

    trainBuffer := make([][]byte, 0)

    for _, file := range trainlist {
        buf, err := loadWaveData(file)
        if err != nil {
            log.Error(err)
            return
        }
        trainBuffer = append(trainBuffer, buf)
    }

    verifyBuffer, err := waveIO.WaveLoad("wav/verify/34986527.wav")
    if err != nil {
        log.Error(err)
        return
    }

    vprEngine.TrainSpeech(trainBuffer)
    vprEngine.RecSpeech(verifyBuffer)
}

func loadWaveData(file string) ([]byte, error) {
    data, err := ioutil.ReadFile(file)
    if err != nil {
        return nil, err
    }
    // remove .wav header info 44 bits
    data = data[44:]
    return data, nil
}

Github地址:https://github.com/liuxp0827/govpr

相關文章