C++實現客戶端與伺服器的通訊(三)：在遠端伺服器中處理本地攝像頭資料

這次我們要走得稍微遠一點，我需要直接從本地攝像頭中讀取視訊流、傳送到伺服器、經過伺服器上的dlib人臉檢測演算法、返回bounding box並在本地顯示。

不過，有了前面資料傳輸的基礎，只要在合適的位置新增一些程式就可以了。

一、影象的序列化

當我們從攝像頭中讀取出Mat型別的影象資料（img）後，需要首先對其進行序列化使其能夠在http下傳輸，需要藉助opencv的imencode函式，程式如下：

std::vector<unsigned char> buffer;
imencode(".jpg", img, buffer);
string src(buffer.begin(), buffer.end());
string base64_src = base64_encode((BYTE const*)src.c_str(), src.length());

影象的解碼需要先解碼base64字串，存放到vector<BYTE>中，然後使用opencv的imdecode函式轉換為Mat格式，程式如下：

std::vector<BYTE> str_decoded_byte = base64_decode(str_encoded);
Mat mat = imdecode(str_decoded_byte, CV_LOAD_IMAGE_COLOR);

二、bounding box的序列化

這一步本來準備用cJSON的，但是後來想到人臉檢測的標註資料格式比較簡單，例如程式檢測到了一張人臉，那麼就可以返回一個這樣的字串：

"127 131 200 204"

上述四個數字分別表示人臉的左、上、右、下位置的畫素座標。如果程式檢測到多張人臉，也可以用類似方法：

"127 131 200 204 235 87 309 156"

然後只要通過一個split函式就可以將資料分隔開了。C++的string庫並沒有提供標準的split函式，所以我們要自己定義一個：

void split(const std::string& s, std::vector<std::string>& v, const std::string& c)
{
  std::string::size_type pos1, pos2;
  pos2 = s.find(c);
  pos1 = 0;
  while(std::string::npos != pos2)
  {
    v.push_back(s.substr(pos1, pos2-pos1));
 
    pos1 = pos2 + c.size();
    pos2 = s.find(c, pos1);
  }
  if(pos1 != s.length())
    v.push_back(s.substr(pos1));
}

三、C++ dlib人臉檢測演算法

以前用conda或pip安裝的dlib只有python介面，要使用C++介面還是需要原始碼安裝。

github原始碼地址：https://github.com/davisking/dlib

在dlib根目錄下依次執行以下指令：

mkdir build
cd build
cmake ..
cmake --build . --config Release
sudo make install
sudo ldconfig

不出意外的話，就可以安裝成功了。

dlib人臉檢測演算法需要先將Mat格式圖片轉換成dlib圖片格式（dlib::array_2d<bgr_pixel>），然後使用dlib人臉檢測器做檢測：

dlib::frontal_face_detector detector = dlib::get_frontal_face_detector();
dlib::array2d<bgr_pixel> img;  
dlib::assign_image(img, dlib::cv_image<bgr_pixel>(mat));
std::vector<dlib::rectangle> dets = detector(img);

四、修改程式

然後，只要結合上述方法，對程式進行一些修改就可以了，修改後的程式如下：

client test.cpp：

#include <stdio.h>
#include <string.h>
#include <time.h>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>  
#include <opencv2/imgproc/imgproc.hpp>  
#include <opencv2/core/core.hpp>
#include <iostream>

#include "http.h"
#include "base64.h"

using namespace std;

void split(const std::string& s, std::vector<std::string>& v, const std::string& c);

int main(int argc, char *argv[])
{
  CurlHttp curl_http;
  string str_url = "http://10.108.233.26:8003";  // 地址、埠號

  cv::VideoCapture cap(0);
  if(!cap.isOpened())
  {
    cout << "Something Wrong with the Camera!!" << endl;
    return -1;
  }

  cv::Mat frame, img;
  timeval start, end;
  while (1) {

    gettimeofday(&start, NULL);

    cap >> frame;
    if (frame.empty()) {
      cout << "No Image!!";
      continue;
    }

    float scale = 0.5;
    int width = frame.size().width * scale;
    int height = frame.size().height * scale;
    cv::Size size(width, height);
    cv::resize(frame, img, size);

    std::vector<unsigned char> buffer;
    cv::imencode(".jpg", img, buffer);
    string src(buffer.begin(), buffer.end());
    string base64_src  = base64_encode((BYTE const*)src.c_str(), src.length());
    
    string result;
    int res = curl_http.http_post(str_url.c_str(), base64_src.c_str(), &result);
    cout << "[Response]: " << result << '\n';

    vector<string> str_bbox;
    split(result, str_bbox, " ");
    if (str_bbox.size() % 4 != 0) {
      cout << "Response Error!!";
      return -1;
    }
    int num_bbox = str_bbox.size() / 4;
    for(int i = 0; i < num_bbox; i++) {
      int left   = atoi(str_bbox[i*4].c_str());
      int top    = atoi(str_bbox[i*4 + 1].c_str());
      int right  = atoi(str_bbox[i*4 + 2].c_str());
      int bottom = atoi(str_bbox[i*4 + 3].c_str());
      cv::rectangle(img, cv::Point(left, top + 10), cv::Point(right, bottom), cv::Scalar(255, 0, 0), 2);
    }

    cv::imshow("new", img);
    if (cv::waitKey(5) == 27) {
        break;
    }

    gettimeofday(&end, NULL);
    printf("[Time]: %f\n", (double)((end.tv_sec - start.tv_sec)*1000.0 + (end.tv_usec - start.tv_usec)/1000.0));
  }
  
  return 0;
}

void split(const std::string& s, std::vector<std::string>& v, const std::string& c)
{
  std::string::size_type pos1, pos2;
  pos2 = s.find(c);
  pos1 = 0;
  while(std::string::npos != pos2)
  {
    v.push_back(s.substr(pos1, pos2-pos1));
 
    pos1 = pos2 + c.size();
    pos2 = s.find(c, pos1);
  }
  if(pos1 != s.length())
    v.push_back(s.substr(pos1));
}

server test.cpp：

#include <stdio.h>
#include <string.h>
#include <time.h>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>  
#include <opencv2/imgproc/imgproc.hpp>  
#include <opencv2/core/core.hpp>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/opencv.h>
#include <iostream>

#include "mongoose.h"
#include "base64.h"

using namespace std;
using namespace cv;
using namespace dlib;

int env_handler(struct mg_connection *conn);

int main(int argc, char *argv[])
{
  struct mg_server* server;
  server = mg_create_server(NULL);                  // 初始化一個mongoose server
  mg_set_option(server, "listening_port", "8003");  // 設定埠號為8003
  mg_add_uri_handler(server, "/", env_handler);     // 設定回撥函式
  printf("Starting on port %s ...\n", mg_get_option(server, "listening_port"));
  while (1) {
    mg_poll_server(server, 100);  // 超時時間（ms）
  }
  mg_destroy_server(&server);
  
  return 0;
}


int env_handler(struct mg_connection *conn) 
{
  static dlib::frontal_face_detector detector = dlib::get_frontal_face_detector();
  static int counter = 0;
  counter++;

  const char * encoded_data = conn->content;  // 服務端收到的訊息
  int encoded_len = conn->content_len;        // 服務端收到的訊息長度
  string str_encoded(encoded_data, encoded_len);

  std::vector<BYTE> str_decoded_byte = base64_decode(str_encoded);
  Mat mat = imdecode(str_decoded_byte, CV_LOAD_IMAGE_COLOR);

  // 開始人臉檢測演算法
  dlib::array2d<bgr_pixel> img;  
  dlib::assign_image(img, dlib::cv_image<bgr_pixel>(mat));

  timeval start, end;
  gettimeofday(&start, NULL);
  std::vector<dlib::rectangle> dets = detector(img);
  gettimeofday(&end, NULL);

  std::string detect_result = "";
  for (int i = 0; i < dets.size(); i++)
  {
    if (!detect_result.empty()) detect_result += " ";

    char ptr_result[30];
    sprintf(ptr_result, "%d %d %d %d", (int)dets[i].left(), (int)dets[i].top(), (int)dets[i].right(), (int)dets[i].bottom());
    string str_result(ptr_result);
    detect_result += str_result;
  }

  printf("Counter: %3d, BBOX: %s, Time of Detect: %f\n", counter, 
                                                         detect_result.empty() ? "Null" : detect_result.c_str(), 
                                                         (double)((end.tv_sec - start.tv_sec)*1000.0 + (end.tv_usec - start.tv_usec)/1000.0));
  mg_printf(conn, "%s", detect_result.c_str());
  
  return 0;
}

五、後記

完成上述修改後，執行程式，發現程式執行速度巨慢。經過簡單的測試就能發現，dlib人臉檢測演算法佔用了大量的時間（0.6s左右），看一下gpu發現並沒有呼叫，看起來dlib在cpu下執行速度大概就是這樣了，而且我還不知道怎麼配置dlib支援gpu，豹怒。。

不過程式還是達成了預定的目標，只要將dlib替換成任何需要進行實時檢測的其它演算法的介面，就可以利用遠端伺服器來跑演算法了。缺點是隻有C++介面，遇到python指令碼還是沒辦法。

以後會考慮解決python介面的問題。

C++實現客戶端與伺服器的通訊(三)：在遠端伺服器中處理本地攝像頭資料

相關文章