Jetson Nano配置YOLOv5並實現FPS=25
一、版本說明
二、配置CUDA
sudo gedit ~/.bashrc
export CUDA_HOME=/usr/local/cuda-10.2 export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:$LD_LIBRARY_PATH export PATH=/usr/local/cuda-10.2/bin:$PATH
source ~/.bashrc nvcc -V #如果配置成功可以看到CUDA的版本號
三、修改Nano板視訊記憶體
1.開啟終端輸入:
sudo gedit /etc/systemd/nvzramconfig.sh
2.修改nvzramconfig.sh
修改mem = $((("${totalmem}"/2/"${NRDEVICES}")*1024)) 為mem = $((("${totalmem}"*2/"${NRDEVICES}")*1024))
3.重啟Jetson Nano
4.終端中輸入:
free -h
四、配置Pytorch1.8
1.下載torch-1.8.0-cp36-cp36m-linux_aarch64.whl
2.安裝所需的依賴包及pytorch
sudo apt-get update sudo apt-get upgrade sudo apt-get dist-upgrade sudo apt-get install python3-pip libopenblas-base libopenmpi-dev
mkdir ~/.pip sudo gedit ~/.pip/pip.conf
[global] index-url= [install] trusted-host=mirrors.aliyun.com
pip3 install --upgrade pip #如果pip已是最新,可不執行 pip3 install Cython pip3 install numpy pip3 install torch-1.8.0-cp36-cp36m-linux_aarch64.whl #注意要在存放該檔案下的位置開啟終端並執行 sudo apt-get install libjpeg-dev zlib1g-dev libpython3-dev libavcodec-dev libavformat-dev libswscale-dev git clone --branch v0.9.0 下載torchvision,會下載一個資料夾 cd torchvision #或者進入到這個資料夾,右鍵開啟終端 export BUILD_VERSION=0.9.0 python3 setup.py install --user #時間較久 #驗證torch和torchvision這兩個模組是否安裝成功 python3 import torch print(torch.__version__) #注意version前後都是有兩個橫槓 #如果安裝成功會列印出版本號 import torchvision print(torchvision.__version__) #如果安裝成功會列印出版本號
五、搭建yolov5環境
git clone #因為不開VPN很容易下載出錯,建議在電腦中下載後拷貝到jetson nano中 python3 -m pip install --upgrade pip cd yolov5 #如果是手動下載的,檔名稱為yolov5-master.zip壓縮包格式,所以要對用unzip yolov5-master.zip進行解壓,然後再進入到該資料夾 pip3 install -r requirements.txt #我的問題是對matplotlib包裝不上,解決辦法,在下方。如果其他包安裝不上,可去重新執行換源那一步,更換另一種國內源。 python3 -m pip list #可檢視python中安裝的包 以下指令可以用來測試yolov5 python3 detect.py --source data/images/bus.jpg --weights yolov5n.pt --img 640 #圖片測試 python3 detect.py --source video.mp4 --weights yolov5n.pt --img 640 #視訊測試,需要自己準備視訊 python3 detect.py --source 0 --weights yolov5n.pt --img 640 #攝像頭測試
sudo gedit ~/.bashrc 末尾新增 export OPENBLAS_CORETYPE=ARMV8 保持關閉 source ~/.bashrc
六、利用tensorrtx加速推理
1.下載tensorrtx
git clone
2.編譯
python3 gen_wts.py -w yolov5n.pt -o yolov5n.wts #生成wts檔案,要先把yolov5n.pt檔案放在此處再去執行 cd ~/tensorrtx/yolov5/ #如果是手動下載的名稱可能是tensorrtx-master mkdir build cd build 將生成的wts檔案複製到build下 #手動下載的,名稱為yolov5-master cmake .. make -j4 sudo ./yolov5 -s yolov5n.wts yolov5n.engine n #生成engine檔案 sudo ./yolov5 -d yolov5n.engine ../samples/ #測試圖片檢視效果,發現在檢測zidane.jpg時漏檢,這時可以返回上一層資料夾找到yolov5.cpp中的CONF_THRESH=0.25再進入到build中重新make -j4,再重新執行該指令即可
3.呼叫USB攝像頭
#include <iostream> #include <chrono> #include "cuda_utils.h" #include "logging.h" #include "common.hpp" #include "utils.h" #include "calibrator.h" #define USE_FP32 // set USE_INT8 or USE_FP16 or USE_FP32 #define DEVICE 0 // GPU id #define NMS_THRESH 0.4 //0.4 #define CONF_THRESH 0.25 //置信度,預設值為0.5,由於效果不好修改為0.25取得了較好的效果 #define BATCH_SIZE 1 // stuff we know about the network and the input/output blobs static const int INPUT_H = Yolo::INPUT_H; static const int INPUT_W = Yolo::INPUT_W; static const int CLASS_NUM = Yolo::CLASS_NUM; static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1 const char* INPUT_BLOB_NAME = "data"; const char* OUTPUT_BLOB_NAME = "prob"; static Logger gLogger; char* my_classes[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard","surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" }; static int get_width(int x, float gw, int divisor = 8) { //return math.ceil(x / divisor) * divisor if (int(x * gw) % divisor == 0) { return int(x * gw); } return (int(x * gw / divisor) + 1) * divisor; } static int get_depth(int x, float gd) { if (x == 1) { return 1; } else { return round(x * gd) > 1 ? round(x * gd) : 1; } } ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) { INetworkDefinition* network = builder->createNetworkV2(0U); // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W }); assert(data); std::map<std::string, Weights> weightMap = loadWeights(wts_name); /* ------ yolov5 backbone------ */ auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0"); auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1"); auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2"); auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3"); auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4"); auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5"); auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6"); auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7"); auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, 9, 13, "model.8"); /* ------ yolov5 head ------ */ auto bottleneck_csp9 = C3(network, weightMap, *spp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.9"); auto conv10 = convBlock(network, weightMap, *bottleneck_csp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10"); auto upsample11 = network->addResize(*conv10->getOutput(0)); assert(upsample11); upsample11->setResizeMode(ResizeMode::kNEAREST); upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions()); ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) }; auto cat12 = network->addConcatenation(inputTensors12, 2); auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13"); auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14"); auto upsample15 = network->addResize(*conv14->getOutput(0)); assert(upsample15); upsample15->setResizeMode(ResizeMode::kNEAREST); upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions()); ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) }; auto cat16 = network->addConcatenation(inputTensors16, 2); auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17"); // yolo layer 0 IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]); auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18"); ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) }; auto cat19 = network->addConcatenation(inputTensors19, 2); auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20"); //yolo layer 1 IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]); auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21"); ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) }; auto cat22 = network->addConcatenation(inputTensors22, 2); auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23"); IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]); auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2}); yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME); network->markOutput(*yolo->getOutput(0)); // Build engine builder->setMaxBatchSize(maxBatchSize); config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB #if defined(USE_FP16) config->setFlag(BuilderFlag::kFP16); #elif defined(USE_INT8) std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; assert(builder->platformHasFastInt8()); config->setFlag(BuilderFlag::kINT8); Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME); config->setInt8Calibrator(calibrator); #endif std::cout << "Building engine, please wait for a while..." << std::endl; ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); std::cout << "Build engine successfully!" << std::endl; // Don't need the network any more network->destroy(); // Release host memory for (auto& mem : weightMap) { free((void*)(mem.second.values)); } return engine; } ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) { INetworkDefinition* network = builder->createNetworkV2(0U); // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W }); assert(data); std::map<std::string, Weights> weightMap = loadWeights(wts_name); /* ------ yolov5 backbone------ */ auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0"); auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1"); auto c3_2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2"); auto conv3 = convBlock(network, weightMap, *c3_2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3"); auto c3_4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4"); auto conv5 = convBlock(network, weightMap, *c3_4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5"); auto c3_6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6"); auto conv7 = convBlock(network, weightMap, *c3_6->getOutput(0), get_width(768, gw), 3, 2, 1, "model.7"); auto c3_8 = C3(network, weightMap, *conv7->getOutput(0), get_width(768, gw), get_width(768, gw), get_depth(3, gd), true, 1, 0.5, "model.8"); auto conv9 = convBlock(network, weightMap, *c3_8->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.9"); auto spp10 = SPP(network, weightMap, *conv9->getOutput(0), get_width(1024, gw), get_width(1024, gw), 3, 5, 7, "model.10"); auto c3_11 = C3(network, weightMap, *spp10->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.11"); /* ------ yolov5 head ------ */ auto conv12 = convBlock(network, weightMap, *c3_11->getOutput(0), get_width(768, gw), 1, 1, 1, "model.12"); auto upsample13 = network->addResize(*conv12->getOutput(0)); assert(upsample13); upsample13->setResizeMode(ResizeMode::kNEAREST); upsample13->setOutputDimensions(c3_8->getOutput(0)->getDimensions()); ITensor* inputTensors14[] = { upsample13->getOutput(0), c3_8->getOutput(0) }; auto cat14 = network->addConcatenation(inputTensors14, 2); auto c3_15 = C3(network, weightMap, *cat14->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.15"); auto conv16 = convBlock(network, weightMap, *c3_15->getOutput(0), get_width(512, gw), 1, 1, 1, "model.16"); auto upsample17 = network->addResize(*conv16->getOutput(0)); assert(upsample17); upsample17->setResizeMode(ResizeMode::kNEAREST); upsample17->setOutputDimensions(c3_6->getOutput(0)->getDimensions()); ITensor* inputTensors18[] = { upsample17->getOutput(0), c3_6->getOutput(0) }; auto cat18 = network->addConcatenation(inputTensors18, 2); auto c3_19 = C3(network, weightMap, *cat18->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.19"); auto conv20 = convBlock(network, weightMap, *c3_19->getOutput(0), get_width(256, gw), 1, 1, 1, "model.20"); auto upsample21 = network->addResize(*conv20->getOutput(0)); assert(upsample21); upsample21->setResizeMode(ResizeMode::kNEAREST); upsample21->setOutputDimensions(c3_4->getOutput(0)->getDimensions()); ITensor* inputTensors21[] = { upsample21->getOutput(0), c3_4->getOutput(0) }; auto cat22 = network->addConcatenation(inputTensors21, 2); auto c3_23 = C3(network, weightMap, *cat22->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.23"); auto conv24 = convBlock(network, weightMap, *c3_23->getOutput(0), get_width(256, gw), 3, 2, 1, "model.24"); ITensor* inputTensors25[] = { conv24->getOutput(0), conv20->getOutput(0) }; auto cat25 = network->addConcatenation(inputTensors25, 2); auto c3_26 = C3(network, weightMap, *cat25->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.26"); auto conv27 = convBlock(network, weightMap, *c3_26->getOutput(0), get_width(512, gw), 3, 2, 1, "model.27"); ITensor* inputTensors28[] = { conv27->getOutput(0), conv16->getOutput(0) }; auto cat28 = network->addConcatenation(inputTensors28, 2); auto c3_29 = C3(network, weightMap, *cat28->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.29"); auto conv30 = convBlock(network, weightMap, *c3_29->getOutput(0), get_width(768, gw), 3, 2, 1, "model.30"); ITensor* inputTensors31[] = { conv30->getOutput(0), conv12->getOutput(0) }; auto cat31 = network->addConcatenation(inputTensors31, 2); auto c3_32 = C3(network, weightMap, *cat31->getOutput(0), get_width(2048, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.32"); /* ------ detect ------ */ IConvolutionLayer* det0 = network->addConvolutionNd(*c3_23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.0.weight"], weightMap["model.33.m.0.bias"]); IConvolutionLayer* det1 = network->addConvolutionNd(*c3_26->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.1.weight"], weightMap["model.33.m.1.bias"]); IConvolutionLayer* det2 = network->addConvolutionNd(*c3_29->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.2.weight"], weightMap["model.33.m.2.bias"]); IConvolutionLayer* det3 = network->addConvolutionNd(*c3_32->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.3.weight"], weightMap["model.33.m.3.bias"]); auto yolo = addYoLoLayer(network, weightMap, "model.33", std::vector<IConvolutionLayer*>{det0, det1, det2, det3}); yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME); network->markOutput(*yolo->getOutput(0)); // Build engine builder->setMaxBatchSize(maxBatchSize); config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB #if defined(USE_FP16) config->setFlag(BuilderFlag::kFP16); #elif defined(USE_INT8) std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl; assert(builder->platformHasFastInt8()); config->setFlag(BuilderFlag::kINT8); Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME); config->setInt8Calibrator(calibrator); #endif std::cout << "Building engine, please wait for a while..." << std::endl; ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); std::cout << "Build engine successfully!" << std::endl; // Don't need the network any more network->destroy(); // Release host memory for (auto& mem : weightMap) { free((void*)(mem.second.values)); } return engine; } void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream, float& gd, float& gw, std::string& wts_name) { // Create builder IBuilder* builder = createInferBuilder(gLogger); IBuilderConfig* config = builder->createBuilderConfig(); // Create model to populate the network, then set the outputs and create an engine ICudaEngine* engine = build_engine(maxBatchSize, builder, config, DataType::kFLOAT, gd, gw, wts_name); assert(engine != nullptr); // Serialize the engine (*modelStream) = engine->serialize(); // Close everything down engine->destroy(); builder->destroy(); config->destroy(); } void doInference(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* input, float* output, int batchSize) { // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host CUDA_CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream)); context.enqueue(batchSize, buffers, stream, nullptr); CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream)); cudaStreamSynchronize(stream); } bool parse_args(int argc, char** argv, std::string& engine) { if (argc < 3) return false; if (std::string(argv[1]) == "-v" && argc == 3) { engine = std::string(argv[2]); } else { return false; } return true; } int main(int argc, char** argv) { cudaSetDevice(DEVICE); //std::string wts_name = ""; std::string engine_name = ""; //float gd = 0.0f, gw = 0.0f; //std::string img_dir; if (!parse_args(argc, argv, engine_name)) { std::cerr << "arguments not right!" << std::endl; std::cerr << "./yolov5 -v [.engine] // run inference with camera" << std::endl; return -1; } std::ifstream file(engine_name, std::ios::binary); if (!file.good()) { std::cerr << " read " << engine_name << " error! " << std::endl; return -1; } char* trtModelStream{ nullptr }; size_t size = 0; file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); trtModelStream = new char[size]; assert(trtModelStream); file.read(trtModelStream, size); file.close(); // prepare input data --------------------------- static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W]; //for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++) // data[i] = 1.0; static float prob[BATCH_SIZE * OUTPUT_SIZE]; IRuntime* runtime = createInferRuntime(gLogger); assert(runtime != nullptr); ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size); assert(engine != nullptr); IExecutionContext* context = engine->createExecutionContext(); assert(context != nullptr); delete[] trtModelStream; assert(engine->getNbBindings() == 2); void* buffers[2]; // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME); const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME); assert(inputIndex == 0); assert(outputIndex == 1); // Create GPU buffers on device CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float))); CUDA_CHECK(cudaMalloc(&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float))); // Create stream cudaStream_t stream; CUDA_CHECK(cudaStreamCreate(&stream)); cv::VideoCapture capture("/home/cao-yolox/yolov5/tensorrtx-master/yolov5/samples/1.mp4"); #修改為自己要檢測的視訊或者圖片,注意要寫全路徑,如果呼叫攝像頭,則括號內的引數設為0,注意引號要去掉。 //cv::VideoCapture capture("../overpass.mp4"); //int fourcc = cv::VideoWriter::fourcc('M','J','P','G'); //capture.set(cv::CAP_PROP_FOURCC, fourcc); if (!capture.isOpened()) { std::cout << "Error opening video stream or file" << std::endl; return -1; } int key; int fcount = 0; while (1) { cv::Mat frame; capture >> frame; if (frame.empty()) { std::cout << "Fail to read image from camera!" << std::endl; break; } fcount++; //if (fcount < BATCH_SIZE && f + 1 != (int)file_names.size()) continue; for (int b = 0; b < fcount; b++) { //cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]); cv::Mat img = frame; if (img.empty()) continue; cv::Mat pr_img = preprocess_img(img, INPUT_W, INPUT_H); // letterbox BGR to RGB int i = 0; for (int row = 0; row < INPUT_H; ++row) { uchar* uc_pixel = pr_img.data + row * pr_img.step; for (int col = 0; col < INPUT_W; ++col) { data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0; data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0; data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0; uc_pixel += 3; ++i; } } } // Run inference auto start = std::chrono::system_clock::now(); doInference(*context, stream, buffers, data, prob, BATCH_SIZE); auto end = std::chrono::system_clock::now(); //std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl; int fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); std::vector<std::vector<Yolo::Detection>> batch_res(fcount); for (int b = 0; b < fcount; b++) { auto& res = batch_res[b]; nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH); } for (int b = 0; b < fcount; b++) { auto& res = batch_res[b]; //std::cout << res.size() << std::endl; //cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]); for (size_t j = 0; j < res.size(); j++) { cv::Rect r = get_rect(frame, res[j].bbox); cv::rectangle(frame, r, cv::Scalar(0x27, 0xC1, 0x36), 2); std::string label = my_classes[(int)res[j].class_id]; cv::putText(frame, label, cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2); std::string jetson_fps = "Jetson Nano FPS: " + std::to_string(fps); cv::putText(frame, jetson_fps, cv::Point(11, 80), cv::FONT_HERSHEY_PLAIN, 3, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); } //cv::imwrite("_" + file_names[f - fcount + 1 + b], img); } cv::imshow("yolov5", frame); key = cv::waitKey(1); if (key == 'q') { break; } fcount = 0; } capture.release(); // Release stream and buffers cudaStreamDestroy(stream); CUDA_CHECK(cudaFree(buffers[inputIndex])); CUDA_CHECK(cudaFree(buffers[outputIndex])); // Destroy the engine context->destroy(); engine->destroy(); runtime->destroy(); return 0; }
4.重新編譯
sudo ./yolov5 -v yolov5n.engine #注意要提前插好攝像頭
sudo apt-get install libcanberra-gtk-module
5.效果
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/70003733/viewspace-2888852/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- 【jetson nano】yolov5環境配置tensorrt部署加速NaNYOLO
- Jetson Nano 更換映象源NaN
- nano實用配置NaN
- 生命不息,折騰不止:Jetson Nano上手篇NaN
- 【1】Jetson Nano 燒系統jetpack4.4.1NaNJetpack
- [譯] 邊緣AI計算新品:NVIDIA Jetson NanoAINaN
- Jetson Nano( 六)Deepstream5.0環境安裝NaN
- Jetson Nano 2GB現在可以在NVIDIA.com上預購。NaN
- jetson nano jetpack4.4.1 下安裝 tensorflow gpu + kerasNaNJetpackGPUKeras
- jetson nano開發使用的基礎詳細分享NaN
- jetson nano安裝vscode及新增環境變數NaNVSCode變數
- 生命不息,折騰不止:Jetson Nano填坑之軟體篇NaN
- jetson nano 安裝python機器學習三方庫問題NaNPython機器學習
- Nvidia AGX Xavier Jetson 開機配置
- Ai實現FPS遊戲自動瞄準 yolov5fps自瞄AI遊戲YOLO
- 基於yolov5實現的AI智慧盒子框架YOLOAI框架
- 【YOLOv5】手把手教你使用LabVIEW ONNX Runtime部署 TensorRT加速,實現YOLOv5實時物體識別YOLOView
- 安裝YOLOv5模型環境並測試YOLO模型
- 【YOLOv5】實現撲克牌的點數識別YOLO
- 物聯網:實現sIoT配置並實現sIoT上mqtt訊息的通訊MQQT
- 【YOLOv5】手把手教你使用LabVIEW ONNX Runtime部署 TensorRT加速,實現YOLOv5實時物體識別(含原始碼)YOLOView原始碼
- C++ yolov5 jetson NX盒子部署安全帽檢測 TensorRT, 支援INT8/FP16C++YOLO
- JetsonNano2GB編譯Yolov5並用TensorRT加速教程NaN編譯YOLO
- 【jetson orin】Jetson Containers介紹安裝與應用AI
- 專為FPS玩家精心打造,戰術型電競顯示器AORUS KD25F華麗現身!
- Linux系統安裝並配置nginx實現多服務同一埠LinuxNginx
- 並查集java實現並查集Java
- 並查集-Java實現並查集Java
- C#實現控制檯傳參呼叫YoloV5進行人體識別C#YOLO
- 【YOLOv5】LabVIEW+YOLOv5快速實現實時物體識別(Object Detection)含原始碼YOLOViewObject原始碼
- python能實現並行嗎Python並行
- DockerCompose編排Nginx+Mysql並實現Nginx配置Mysql(TCP協議)負載均衡DockerNginxMySqlTCP協議負載
- yolov5實戰之二維碼檢測YOLO
- yolov5實戰之皮卡丘檢測YOLO
- 並查集(一)並查集的幾種實現並查集
- YOLOv5系列(3)——YOLOv5修改網路結構YOLO
- Nano編輯器常用NaN
- PAT甲級1122 Hamiltonian Cycle (25分)|C++實現C++