swoole 協程原始碼解讀 (協程的排程)

樂觀的摸一摸頭發表於2019-09-10

以下程式碼基於swoole4.4.5-alpha, php7.1.26

那麼定時事件什麼時候會被執行呢? 這是透過內部的Reactor事件迴圈去實現的, 下面來看具體實現:

建立協程時會判斷reactor是否已經初始化, 沒有初始化則會呼叫activate函式初始化reactor, activate函式大概有這幾個步驟:

  • 初始化reactor結構, 註冊各種回撥函式(讀寫事件採用對應平臺效率最高的多路複用api, 封裝成統一的回撥函式有助於遮蔽不同api實現細節)
  • 透過php_swoole_register_shutdown_function("Swoole\Event::rshutdown")註冊一個在request_shutdown階段呼叫的函式(回憶一下php的生命週期, 指令碼結束的時候會呼叫此函式), 實際上事件迴圈就在這個階段執行
  • 開啟搶佔式排程執行緒(這個後面會說)

    long PHPCoroutine::create(zend_fcall_info_cache *fci_cache, uint32_t argc, zval *argv)
    {
        ...
        if (sw_unlikely(!active))
        {
            activate();
        }
            ...
    }
    
    inline void PHPCoroutine::activate()
    {
        ...
        /* init reactor and register event wait */
        php_swoole_check_reactor();
    
        /* replace interrupt function */
        orig_interrupt_function = zend_interrupt_function; // 儲存原來的中斷回撥函式
        zend_interrupt_function = coro_interrupt_function; // 替換中斷函式
    
          // 開啟搶佔式排程
        if (SWOOLE_G(enable_preemptive_scheduler) || config.enable_preemptive_scheduler)
        {
            /* create a thread to interrupt the coroutine that takes up too much time */
            interrupt_thread_start();
        }
        ...
        active = true;
    }
    
    static sw_inline int php_swoole_check_reactor()
    {
        ...
        if (sw_unlikely(!SwooleG.main_reactor))
        {
            return php_swoole_reactor_init() == SW_OK ? 1 : -1;
        }
        ...
    }
    
    int php_swoole_reactor_init()
    {
            ...
        if (!SwooleG.main_reactor)
        {
            swoole_event_init();
            SwooleG.main_reactor->wait_exit = 1;
                 // 註冊rshutdown函式
            php_swoole_register_shutdown_function("Swoole\\Event::rshutdown");
        }
            ...
    }
    
    #define sw_reactor()           (SwooleG.main_reactor)
    #define SW_REACTOR_MAXEVENTS             4096
    
    int swoole_event_init()
    {
        SwooleG.main_reactor = (swReactor *) sw_malloc(sizeof(swReactor));
    
        if (swReactor_create(sw_reactor(), SW_REACTOR_MAXEVENTS) < 0)
        {
            ...
        }
            ...
    }
    
    int swReactor_create(swReactor *reactor, int max_event)
    {
        int ret;
        bzero(reactor, sizeof(swReactor));
    
    #ifdef HAVE_EPOLL
        ret = swReactorEpoll_create(reactor, max_event);
    #elif defined(HAVE_KQUEUE)
        ret = swReactorKqueue_create(reactor, max_event);
    #elif defined(HAVE_POLL)
        ret = swReactorPoll_create(reactor, max_event);
    #else
        ret = swReactorSelect_create(reactor);
    #endif
            ...
        reactor->onTimeout = reactor_timeout; // 有定時器超時時觸發的回撥
            ...
    
        Socket::init_reactor(reactor);
        ...
    }
    
    int swReactorEpoll_create(swReactor *reactor, int max_event_num)
    {
        ...
        //binding method
        reactor->add = swReactorEpoll_add;
        reactor->set = swReactorEpoll_set;
        reactor->del = swReactorEpoll_del;
        reactor->wait = swReactorEpoll_wait;
        reactor->free = swReactorEpoll_free;
    }

request_shutdown階段會執行註冊的Swoole\Event::rshutdown函式, swoole_event_rshutdown會執行之前註冊的wait函式:

static PHP_FUNCTION(swoole_event_rshutdown)
{
    /* prevent the program from jumping out of the rshutdown */
    zend_try
    {
        PHP_FN(swoole_event_wait)(INTERNAL_FUNCTION_PARAM_PASSTHRU);
    }
    zend_end_try();
}

int swoole_event_wait()
{
    int retval = sw_reactor()->wait(sw_reactor(), NULL);
    swoole_event_free();
    return retval;
}

我們再來看看定時事件的註冊, 首先會初始化timer:

int System::sleep(double sec)
{
    Coroutine* co = Coroutine::get_current_safe(); // 獲取當前coroutine
    if (swoole_timer_add((long) (sec * 1000), SW_FALSE, sleep_timeout, co) == NULL)
    {
        ...
    }
}

swTimer_node* swoole_timer_add(long ms, uchar persistent, swTimerCallback callback, void *private_data)
{
    return swTimer_add(sw_timer(), ms, persistent, private_data, callback);
}

swTimer_node* swTimer_add(swTimer *timer, long _msec, int interval, void *data, swTimerCallback callback)
{
        if (sw_unlikely(!timer->initialized))
    {
        if (sw_unlikely(swTimer_init(timer, _msec) != SW_OK)) // 初始化timer
        {
            return NULL;
        }
    }
        ...
}

static int swTimer_init(swTimer *timer, long msec)
{
      ...
    timer->heap = swHeap_new(1024, SW_MIN_HEAP); // 初始化最小堆
    timer->map = swHashMap_new(SW_HASHMAP_INIT_BUCKET_N, NULL);
    timer->_current_id = -1; // 當前定時器id
    timer->_next_msec = msec; // 定時器裡最短的超時時間
    timer->_next_id = 1;
    timer->round = 0;
    ret = swReactorTimer_init(SwooleG.main_reactor, timer, msec);
    ...
}

static int swReactorTimer_init(swReactor *reactor, swTimer *timer, long exec_msec)
{
    reactor->check_timer = SW_TRUE;
    reactor->timeout_msec = exec_msec; // 定時器裡最短的超時時間
    reactor->timer = timer;
    timer->reactor = reactor;
    timer->set = swReactorTimer_set;
    timer->close = swReactorTimer_close;
        ...
}

接著是新增事件, 需要注意的是:

  • time._next_msec和reactor.timeout_msec一直保持所有計時器裡最短的超時時間(相對值)
  • tnode.exec_msec和tnode用最小堆來儲存, 這樣一來堆頂的元素就是最早超時的元素

    swTimer_node* swTimer_add(swTimer *timer, long _msec, int interval, void *data, swTimerCallback callback)
    {
        swTimer_node *tnode = sw_malloc(sizeof(swTimer_node));
    
        int64_t now_msec = swTimer_get_relative_msec();
    
        tnode->data = data;
        tnode->type = SW_TIMER_TYPE_KERNEL;
        tnode->exec_msec = now_msec + _msec; // 絕對時間
        tnode->interval = interval ? _msec : 0; // 是否需要一直呼叫
        tnode->removed = 0;
        tnode->callback = callback;
        tnode->round = timer->round;
        tnode->dtor = NULL;
    
        if (timer->_next_msec < 0 || timer->_next_msec > _msec) // 必要時更新, 始終保持最小超時時間
        {
            timer->set(timer, _msec);
            timer->_next_msec = _msec;
        }
    
        tnode->id = timer->_next_id++;
    
        tnode->heap_node = swHeap_push(timer->heap, tnode->exec_msec, tnode); // 放入堆, priority = tnode->exec_msec
        if (sw_unlikely(swHashMap_add_int(timer->map, tnode->id, tnode) != SW_OK)) // hashmap儲存tnodeid和tnode對映關係
        {
            ...
        }
        ...
    }

定時時間註冊完就可以等待被事件迴圈執行了, 我們以epoll為例:

使用epoll_wait等待fd讀寫事件, 傳入reactor->timeout_msec, 等待fd事件到來

  • 如果epoll_wait超時時還未獲取到任何fd讀寫事件, 執行onTimeout函式, 處理定時事件
  • 有fd事件則處理fd讀寫事件, 處理完這次所以觸發的事件後, 進入下一次迴圈

    static int swReactorEpoll_wait(swReactor *reactor, struct timeval *timeo)
    {
        ...
        reactor->running = 1;
        reactor->start = 1;
    
        while (reactor->running > 0)
        {
            ...
            n = epoll_wait(epoll_fd, events, max_event_num, reactor->timeout_msec);
            if (n < 0)
            {
                            ...
                // 錯誤處理
            }
            else if (n == 0)
            {
                reactor->onTimeout(reactor);
            }
            for (i = 0; i < n; i++)
            {
                            ...
                            // fd讀寫事件處理
            }
                    ...
        }
        return 0;
    }

如果這期間沒有任何fd事件, 定時事件會被執行, onTimeout是之前已經註冊過的函式reactor_timeout, swTimer_select函式會把當前所以已經到期的事件執行完再退出迴圈, 執行到上文我們註冊的sleep_timeout函式時, 就會喚醒因為sleep休眠的協程繼續執行:

static void reactor_timeout(swReactor *reactor)
{
    reactor_finish(reactor);
        ...
}

static void reactor_finish(swReactor *reactor)
{
    //check timer
    if (reactor->check_timer)
    {
        swTimer_select(reactor->timer);
    }
        ...
    //the event loop is empty
    if (reactor->wait_exit && reactor->is_empty(reactor)) // 沒有任務了, 退出迴圈
    {
        reactor->running = 0;
    }
}

int swTimer_select(swTimer *timer)
{
    int64_t now_msec = swTimer_get_relative_msec(); // 當前時間

    while ((tmp = swHeap_top(timer->heap))) // 獲取最早到期的事件
    {
        tnode = tmp->data;
        if (tnode->exec_msec > now_msec) // 未到時間
        {
            break;
        }

                if (!tnode->removed)
        {
                        tnode->callback(timer, tnode); // 執行定時事件註冊的回撥函式
        }

        timer->num--;
        swHeap_pop(timer->heap);
        swHashMap_del_int(timer->map, tnode->id);
    }
       ...
}

到這裡, 整個流程都已經介紹完了, 總結一下:

  • 在沒有主動干預協程排程的情況下, 協程都是在執行IO/定時事件時主動讓出, 註冊對應事件, 然後透過request_shutdown階段裡的事件迴圈等待事件到來, 觸發協程的resume, 達到多協程併發的效果
  • IO/定時事件不一定準時

透過上面我們可以知道, 如果協程裡沒有任何IO/定時事件, 實際上協程是沒有切換時機的, 對於CPU密集型的場景,一些協程會因為得不到CPU時間片被餓死, Swoole 4.4引入了搶佔式排程就是為了解決這個問題.

vm interrupt是php7.1.0後引入的執行機制, swoole就是使用這個特性實現的搶佔式排程:

  • ZEND_VM_INTERRUPT_CHECK會在指令是jumpcall的時候執行
  • ZEND_VM_INTERRUPT_CHECK會檢查EG(vm_interrupt)這個標誌位, 如果為1, 則觸發zend_interrupt_function的執行

    // php 7.1.26 src
    #define ZEND_VM_INTERRUPT_CHECK() do { \
        if (UNEXPECTED(EG(vm_interrupt))) { \
                ZEND_VM_INTERRUPT(); \
          } \
    } while (0)
    
    #define ZEND_VM_INTERRUPT()      ZEND_VM_TAIL_CALL(zend_interrupt_helper_SPEC(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU));
    
    static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL zend_interrupt_helper_SPEC(ZEND_OPCODE_HANDLER_ARGS)
    {
          ...
          EG(vm_interrupt) = 0;
          if (zend_interrupt_function) {
                zend_interrupt_function(execute_data);
          }
    }

下面來看具體實現:
初始化:

  • 儲存原來的中斷函式, zend_interrupt_function替換成新的中斷函式
  • 開啟執行緒執行interrupt_thread_loop
  • interrupt_thread_loop裡每隔5ms將EG(vm_interrupt)設定為1

    inline void PHPCoroutine::activate()
    {
        ...
        /* replace interrupt function */
        orig_interrupt_function = zend_interrupt_function; // 儲存原來的中斷回撥函式
        zend_interrupt_function = coro_interrupt_function; // 替換中斷函式
    
          // 開啟搶佔式排程
        if (SWOOLE_G(enable_preemptive_scheduler) || config.enable_preemptive_scheduler) // 配置要開啟enable_preemptive_scheduler選項
        {
            /* create a thread to interrupt the coroutine that takes up too much time */
            interrupt_thread_start();
        }
    }
    
    void PHPCoroutine::interrupt_thread_start()
    {
        zend_vm_interrupt = &EG(vm_interrupt);
        interrupt_thread_running = true;
        if (pthread_create(&interrupt_thread_id, NULL, (void * (*)(void *)) interrupt_thread_loop, NULL) < 0)
        {
            ...
        }
    }
    
    static const uint8_t MAX_EXEC_MSEC = 10;
    void PHPCoroutine::interrupt_thread_loop()
    {
        static const useconds_t interval = (MAX_EXEC_MSEC / 2) * 1000;
        while (interrupt_thread_running)
        {
            *zend_vm_interrupt = 1; // EG(vm_interrupt) = 1
            usleep(interval); // 休眠5ms
        }
        pthread_exit(0);
    }

中斷函式coro_interrupt_function會檢查當前的協程是否可排程(距離上一次切換時間超過10ms), 如果可以, 直接讓出當前協程, 完成搶佔排程

static void coro_interrupt_function(zend_execute_data *execute_data)
{
    php_coro_task *task = PHPCoroutine::get_task();
    if (task && task->co && PHPCoroutine::is_schedulable(task))
    {
        task->co->yield(); // 讓出當前協程
    }
    if (orig_interrupt_function)
    {
        orig_interrupt_function(execute_data); // 執行原有的中斷函式
    }
}

static const uint8_t MAX_EXEC_MSEC = 10;
static inline bool is_schedulable(php_coro_task *task)
{
        // enable_scheduler屬性為1並且已經連續執行超過10ms了
    return task->enable_scheduler && (swTimer_get_absolute_msec() - task->last_msec > MAX_EXEC_MSEC); 
}
本作品採用《CC 協議》,轉載必須註明作者和本文連結

相關文章