ONNXRuntime學習筆記(四)

zq發表於2022-05-03

原文網址 : https://www.cnblogs.com/lee-zq/p/16219963.html

接上一篇在Python端的onnx模型驗證結果，上一篇在Pytorch和onnxruntime-gpu推理庫上分別進行效果效率統計分析，結論要比最初設定的50ms高很多，這一篇我將在C++端寫個測試程式碼進行推理驗證。

一、onnxruntime的C++庫

AI模型部署肯定是要用C++的，這是毋庸置疑的，目前onnxruntime提供了適配很多程式語言介面的API，最常用的就是Python和C++，一個簡易一個高效，Python介面可用於快速驗證idea，C++介面適用於整合到推理引擎中來呼叫。C++總的來說是把效率排在第一位的，所以沒有像Python那樣強封裝，相對而言比較靈活，但又不像C那樣瑣碎，畢竟C++也是OOP語言。扯遠了，onnxruntime的c++庫可以從官方github下載到，可以直接下載對應的release版本，裡面包含了動態庫和標頭檔案，如下圖，我下載的是windows版本的。直接匯入到我們的推理引擎中來呼叫就可以了。

一般我們引入第三方庫會包含兩部分內容，一個是標頭檔案，這裡面是所有我們可以呼叫的函式宣告、錯誤型別等等，另一部分是庫檔案，庫檔案分動態庫和靜態庫，win版的動態庫檔案還有對應的動態庫的匯入庫（.lib結尾），這很容易和靜態庫混淆。對於linux來說，動態庫.so檔案中已經包含了符號表，符號表儲存所有函式地址；而對於win來說，動態庫的函式實現都儲存在.dll中，與之還有一個配套的同名.lib檔案單獨儲存函式符號表，這個匯入庫是在編譯期間就需要明確位置的，需要配置到庫目錄列表裡面，並確定是哪一個.lib檔案，編譯的時候會將其和可執行檔案打包融合，而真正的dll是在執行期間才去載入的，所以dll需要放置到合適的位置，讓可執行檔案能找到。

二、測試程式碼

這裡我把建立一個呼叫onnxruntime庫進行推理的相關配置都打包到一個class裡面，這樣方便管理，程式碼如下：

#include <onnxruntime_cxx_api.h>
#include <cmath>
#include <time.h>
#include <algorithm>
#include <fstream>
#include "opencv2/opencv.hpp"

using namespace cv;
using namespace std;

const int class_num = 10;
const int input_height = 32;
const int input_width = 32;
const int input_channel = 3;

const int batch_size = 1;

class Classifier {
public:
	Classifier(const wchar_t* onnx_path) {
		auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
		input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, input_.data(), input_.size(), input_shape_.data(), input_shape_.size());
		output_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, output_.data(), output_.size(), output_shape_.data(), output_shape_.size());

		OrtSessionOptionsAppendExecutionProvider_CUDA(session_option, 0);
		session =  Ort::Session(env, onnx_path, session_option);
	}

	int set_input(string& img_paht) {
		Mat img = imread(img_paht);

		//Mat dst(input_height, input_width, CV_8UC3);
		//resize(img, dst, Size(row, col));
		//cvtColor(img, dst, COLOR_BGR2RGB);
		float* input_prt = input_.data();
		for (int c = 0; c < 3; c++) {
			for (int i = 0; i < input_height; i++) {
				for (int j = 0; j < input_width; j++) {
					float tmp = img.ptr<uchar>(i)[j * 3 + c];
					input_prt[c * input_height * input_width + i * input_width + j] = ((tmp) / 255.0 - mean_[c]) / std_[c];
				}
			}
		}
		return 0;
	}

	int forward() {
		session.Run(Ort::RunOptions{ nullptr }, input_names.data(), &input_tensor_, 1, output_names.data(), &output_tensor_, 1);
		return 0;
	}

	int get_result(int& result) {
		result = std::distance(output_.begin(), std::max_element(output_.begin(), output_.end()));
		return 0;
	}

private:
	vector<const char*> input_names{ "img" };
	vector<const char*> output_names{ "output" };
	std::array<float, batch_size* input_height* input_width* input_channel> input_;
	std::array<float, batch_size* class_num> output_;
	std::array<int64_t, 4> input_shape_{ batch_size, input_channel, input_width, input_height };
	std::array<int64_t, 2> output_shape_{ batch_size, class_num };

	Ort::Value input_tensor_{ nullptr };
	Ort::Value output_tensor_{ nullptr };


	Ort::SessionOptions session_option;
	Ort::Env env{ ORT_LOGGING_LEVEL_WARNING, "test" };

	Ort::Session session{ nullptr };

	std::vector<float> mean_{ 0.4914, 0.4822, 0.4465 };
	std::vector<float> std_{ 0.2023, 0.1994, 0.2010 };
};

int load_img_path(string& file_path, vector<string>& img_lst, vector<int>& label_lst) {
	ifstream f(file_path.c_str());
	if (!f.is_open()) {
		cout << "檔案開啟失敗" << endl;
		return -1;
	}
	string img_path;
	int label;
	while (getline(f, img_path)) {
		if (img_path.size() > 0) {
			img_lst.push_back(img_path);
			auto iter = img_path.find(".");
			label = std::atoi(img_path.substr(--iter, iter).c_str());
			label_lst.push_back(label);
		}
	}
	f.close();
	return 0;
}

float cal_acc(vector<int>& labels, vector<int>& results) {
	float TP = 0.;
	for (int i = 0; i < labels.size(); i++) {
		if (labels[i] == results[i]) {
			TP++;
		}
	}
	return TP / labels.size();
}

int main()
{
	const wchar_t* onnx_path = L"D:/Files/projects/vs/onnxruntimelib/onnxruntime-win-x64-gpu-1.11.1/output/resnet_best.onnx";
	string img_path_file = "D:/Files/projects/Py/CNN-Backbone/data/testimg.lst";
	vector<string> img_lst;
	vector<int>  label_lst;
	vector<int> results;
	load_img_path(img_path_file, img_lst, label_lst);
	clock_t start;
	float time_cost;
	int result;
	Classifier classifier(onnx_path);

	start = clock();
	for (int i = 0; i < img_lst.size(); i++) {
		result = -1;
		classifier.set_input(img_lst[i]);
		classifier.forward();
		classifier.get_result(result);
		results.push_back(result);
	}

	time_cost = clock()-start;
	float acc = cal_acc(label_lst, results);
	std::cout << "Total Time cost: " << time_cost << "ms" << std::endl;
	std::cout << "Average Time cost: " << time_cost/img_lst.size() << "ms" << std::endl;
	std::cout << "Test Acc:  " << acc << std::endl;

	system("pause");
	return 0;
}

測試程式碼比較簡單，裡面核心呼叫onnxruntime的程式碼是Ort::Session和Ort::SessionOptions,Sessionoption是呼叫onnxruntime的一些配置選項，預設使用CPU推理，這裡使用OrtSessionOptionsAppendExecutionProvider_CUDA(session_option, 0)可以選用0號gpu計算，建立好的session_option再拿去初始化session，然後是輸入輸出有定義好的特殊型別Ort::Value，這裡分別採用一個固定大小的array去構建輸入輸出。最後測試結論為：
CPU下：

Total Time cost: 36289ms
Average Time cost: 3.6289ms
Test Acc:  0.9483

GPU下：

Total Time cost: 29861ms
Average Time cost: 2.9861ms
Test Acc:  0.9483

效果和在Python介面上測試的一致的，GPU下的平均響應時間要比Python介面的3.1ms更快一些。神奇的是CPU下的速度也很快，可能是我這個模型太小沒體現出GPU的優勢。另外有一個問題是在gpu上測試的時候，退出main函式的時候析構失敗，沒查出原因。

三、總結

技術總結：原型訓練中大模型的擬合能力比小模型強得多，此外資料增強帶來的收益也很明顯；模型較簡單所以匯出onnx沒出啥問題，匯出的onnx效果也沒降低；C++端的驗證表明，推理速度遠高於最初設定的50ms/張，符合預期。
反思：走了一遍這整個流程，發現還是有很多地方不瞭解，有待進一步學習，五月份主攻工程方向，這個系列到這裡暫時告一段落，接下來要深入一下onnxruntime的介面設計，有突破了再繼續更新。

ONNXRuntime學習筆記(三)
2022-05-01
筆記
springboot 學習筆記（四）
2019-01-14
Spring Boot筆記
goLang學習筆記(四）
2018-08-20
Golang筆記
TS學習筆記（四）
2024-10-30
筆記
activiti學習筆記(四)managementService
2018-10-07
筆記
四元數學習筆記
2024-04-20
筆記
DP學習筆記（四）（2024.10.2）
2024-10-02
筆記
c++學習筆記（四）
2024-07-25
C++筆記
Gradle外掛學習筆記（四)
2019-03-03
Gradle筆記
ES6 學習筆記四
2019-11-09
筆記
Kubernetes學習筆記（四）：服務
2020-05-23
筆記
TS學習筆記（四）：函式
2019-04-20
筆記函式
Java學習筆記第四天
2020-11-12
Java筆記
springcloud學習筆記（四）Spring Cloud Hystrix
2019-02-14
SpringGCCloud筆記
李巨集毅深度學習筆記(四)
2020-12-10
深度學習筆記
HexMap學習筆記(四)——不規則化
2019-03-20
筆記
機器學習筆記（四）決策樹
2020-10-28
機器學習筆記
圖論進階學習筆記（四）（2024.10.4）
2024-10-05
圖論筆記
Vue學習筆記(四) 久處不厭
2023-01-01
Vue筆記
hive學習筆記之四：分割槽表
2021-07-02
Hive筆記
Java IO學習筆記四：Socket基礎
2021-06-14
Java筆記
ES6學習筆記(四)【正則，集合】
2019-04-06
筆記
kettle學習筆記（四）——kettle輸入步驟
2020-09-26
筆記
async-validator 原始碼學習筆記（四）：validator
2022-03-24
原始碼筆記
numpy的學習筆記\pandas學習筆記
2018-03-18
筆記
Java_EE企業級開發學習筆記——spring學習筆記第四章
2024-03-19
Java筆記Spring
SpringBoot + Spring Security 學習筆記（四）記住我功能實現
2019-04-14
Spring Boot筆記
Nginx虛擬主機常用配置(學習筆記四)
2018-05-16
Nginx筆記
我的 golang 學習筆記系列四：反射初識
2021-10-16
Golang筆記反射
Docker學習筆記 - 第四篇：映象倉庫
2021-09-09
Docker筆記
TypeScript 學習筆記 — 函式中的型別（四）
2023-02-08
TypeScript筆記函式型別
學習筆記
2024-04-14
筆記
《跟我學Shiro》學習筆記第四章:編碼/加密
2018-05-02
筆記加密
重學前端學習筆記（四）--div和span不是夠用嗎？
2019-04-26
前端筆記
架構學習筆記系列四——架構師軟文
2018-06-26
架構筆記
Java設計模式學習筆記(四) 抽象工廠模式
2019-07-17
Java設計模式筆記抽象
Vue.js 學習筆記之四：Vue 元件基礎
2020-10-12
Vue.js筆記元件
計算機網路傳輸層學習筆記---（四）
2020-10-19
計算機網路筆記

ONNXRuntime學習筆記(四)

一、onnxruntime的C++庫

二、測試程式碼

三、總結

相關文章