Linux企業級專案實踐之網路爬蟲（26）——執行緒池

一旦有一個抓取請求開始，就建立一個新的執行緒，由該執行緒執行任務，任務執行完畢之後，執行緒就退出。這就是"即時建立，即時銷燬"的策略。儘管與建立程式相比，建立執行緒的時間已經大大的縮短，但是如果提交給執行緒的任務是執行時間較短，而且執行次數非常頻繁，那麼伺服器就將處於一個不停的建立執行緒和銷燬執行緒的狀態。這筆開銷是不可忽略的，尤其是執行緒執行的時間非常非常短的情況。

執行緒池就是為了解決上述問題的，它的實現原理是這樣的：在應用程式啟動之後，就馬上建立一定數量的執行緒，放入空閒的佇列中。這些執行緒都是處於阻塞狀態，這些執行緒只佔一點記憶體，不佔用CPU。當任務到來後，執行緒池將選擇一個空閒的執行緒，將任務傳入此執行緒中執行。當所有的執行緒都處在處理任務的時候，執行緒池將自動建立一定的數量的新執行緒，用於處理更多的任務。執行任務完成之後執行緒並不退出，而是繼續線上程池中等待下一次任務。當大部分執行緒處於阻塞狀態時，執行緒池將自動銷燬一部分的執行緒，回收系統資源。

下面是通過一個執行緒池的實現，來解決多工抓取問題。

處理流程如下：

程式啟動之前，初始化執行緒池，啟動執行緒池中的執行緒，由於還沒有任務到來，執行緒池中的所有執行緒都處在阻塞狀態，當一有任務到達就從執行緒池中取出一個空閒執行緒處理，如果所有的執行緒都處於工作狀態，就新增到佇列，進行排隊。如果佇列中的任務個數大於佇列的所能容納的最大數量，那就不能新增任務到佇列中，只能等待佇列不滿才能新增任務到佇列中。

struct job
{
   void* (*callback_function)(void *arg);   //執行緒回撥函式
   void *arg;                               //回撥函式引數
   struct job *next;
};
 
struct threadpool
{
   int thread_num;                  //執行緒池中開啟執行緒的個數
   int queue_max_num;               //佇列中最大job的個數
   struct job *head;                //指向job的頭指標
   struct job *tail;                //指向job的尾指標
   pthread_t *pthreads;             //執行緒池中所有執行緒的pthread_t
   pthread_mutex_t mutex;            //互斥訊號量
   pthread_cond_t queue_empty;      //佇列為空的條件變數
   pthread_cond_t queue_not_empty;  //佇列不為空的條件變數
   pthread_cond_t queue_not_full;   //佇列不為滿的條件變數
   int queue_cur_num;               //佇列當前的job個數
   int queue_close;                 //佇列是否已經關閉
   int pool_close;                  //執行緒池是否已經關閉
};
 
 
struct threadpool* threadpool_init(intthread_num, int queue_max_num)
{
   struct threadpool *pool = NULL;
   do
    {
       pool = malloc(sizeof(struct threadpool));
       if (NULL == pool)
       {
           printf("failed to malloc threadpool!\n");
           break;
       }
       pool->thread_num = thread_num;
       pool->queue_max_num = queue_max_num;
       pool->queue_cur_num = 0;
       pool->head = NULL;
       pool->tail = NULL;
       if (pthread_mutex_init(&(pool->mutex), NULL))
       {
           printf("failed to init mutex!\n");
           break;
       }
       if (pthread_cond_init(&(pool->queue_empty), NULL))
       {
           printf("failed to init queue_empty!\n");
           break;
       }
       if (pthread_cond_init(&(pool->queue_not_empty), NULL))
       {
           printf("failed to init queue_not_empty!\n");
           break;
       }
       if (pthread_cond_init(&(pool->queue_not_full), NULL))
       {
           printf("failed to init queue_not_full!\n");
           break;
       }
       pool->pthreads = malloc(sizeof(pthread_t) * thread_num);
       if (NULL == pool->pthreads)
       {
           printf("failed to malloc pthreads!\n");
           break;
       }
       pool->queue_close = 0;
       pool->pool_close = 0;
       int i;
       for (i = 0; i < pool->thread_num; ++i)
       {
           pthread_create(&(pool->pthreads[i]), NULL, threadpool_function,(void *)pool);
       }
       
       return pool;   
    }while (0);
   
   return NULL;
}
 
int threadpool_add_job(struct threadpool*pool, void* (*callback_function)(void *arg), void *arg)
{
   assert(pool != NULL);
   assert(callback_function != NULL);
   assert(arg != NULL);
 
   pthread_mutex_lock(&(pool->mutex));
   while ((pool->queue_cur_num == pool->queue_max_num) &&!(pool->queue_close || pool->pool_close))
    {
       pthread_cond_wait(&(pool->queue_not_full),&(pool->mutex));   //佇列滿的時候就等待
    }
   if (pool->queue_close || pool->pool_close)    //佇列關閉或者執行緒池關閉就退出
    {
       pthread_mutex_unlock(&(pool->mutex));
       return -1;
    }
   struct job *pjob =(struct job*) malloc(sizeof(struct job));
   if (NULL == pjob)
    {
       pthread_mutex_unlock(&(pool->mutex));
       return -1;
    }
   pjob->callback_function = callback_function;   
   pjob->arg = arg;
   pjob->next = NULL;
   if (pool->head == NULL)  
    {
       pool->head = pool->tail = pjob;
       pthread_cond_broadcast(&(pool->queue_not_empty));  //佇列空的時候，有任務來時就通知執行緒池中的執行緒：佇列非空
    }
   else
    {
       pool->tail->next = pjob;
       pool->tail = pjob;   
    }
   pool->queue_cur_num++;
   pthread_mutex_unlock(&(pool->mutex));
   return 0;
}
 
void* threadpool_function(void* arg)
{
   struct threadpool *pool = (struct threadpool*)arg;
   struct job *pjob = NULL;
   while (1)  //死迴圈
    {
       pthread_mutex_lock(&(pool->mutex));
       while ((pool->queue_cur_num == 0) &&!pool->pool_close)   //佇列為空時，就等待佇列非空
       {
           pthread_cond_wait(&(pool->queue_not_empty),&(pool->mutex));
       }
       if (pool->pool_close)   //執行緒池關閉，執行緒就退出
       {
           pthread_mutex_unlock(&(pool->mutex));
           pthread_exit(NULL);
       }
       pool->queue_cur_num--;
       pjob = pool->head;
       if (pool->queue_cur_num == 0)
       {
           pool->head = pool->tail = NULL;
       }
       else
       {
           pool->head = pjob->next;
       }
       if (pool->queue_cur_num == 0)
       {
           pthread_cond_signal(&(pool->queue_empty));        //佇列為空，就可以通知threadpool_destroy函式，銷燬執行緒函式
       }
       if (pool->queue_cur_num == pool->queue_max_num - 1)
       {
           pthread_cond_broadcast(&(pool->queue_not_full));  //佇列非滿，就可以通知threadpool_add_job函式，新增新任務
       }
       pthread_mutex_unlock(&(pool->mutex));
       
       (*(pjob->callback_function))(pjob->arg);   //執行緒真正要做的工作，回撥函式的呼叫
       free(pjob);
       pjob = NULL;   
    }
}
int threadpool_destroy(struct threadpool*pool)
{
   assert(pool != NULL);
   pthread_mutex_lock(&(pool->mutex));
   if (pool->queue_close || pool->pool_close)   //執行緒池已經退出了，就直接返回
    {
       pthread_mutex_unlock(&(pool->mutex));
       return -1;
    }
   
   pool->queue_close = 1;       //置佇列關閉標誌
   while (pool->queue_cur_num != 0)
    {
       pthread_cond_wait(&(pool->queue_empty), &(pool->mutex));  //等待佇列為空
   }   
   
   pool->pool_close = 1;      //置執行緒池關閉標誌
   pthread_mutex_unlock(&(pool->mutex));
   pthread_cond_broadcast(&(pool->queue_not_empty));  //喚醒執行緒池中正在阻塞的執行緒
   pthread_cond_broadcast(&(pool->queue_not_full));   //喚醒新增任務的threadpool_add_job函式
   int i;
   for (i = 0; i < pool->thread_num; ++i)
    {
       pthread_join(pool->pthreads[i], NULL);    //等待執行緒池的所有執行緒執行完畢
    }
   
   pthread_mutex_destroy(&(pool->mutex));          //清理資源
   pthread_cond_destroy(&(pool->queue_empty));
   pthread_cond_destroy(&(pool->queue_not_empty));  
   pthread_cond_destroy(&(pool->queue_not_full));   
   free(pool->pthreads);
   struct job *p;
   while (pool->head != NULL)
    {
       p = pool->head;
       pool->head = p->next;
       free(p);
    }
   free(pool);
   return 0;
}

Linux企業級專案實踐之網路爬蟲（26）——執行緒池

相關文章