swPort_onRead_check_eof
EOF 自動分包
- 我們前面說過,
swPort_onRead_raw
是最簡單的向worker
程式傳送資料包的方法,swoole
會將從客戶端接受到的資料包,立刻傳送給worker
程式,使用者自己把資料包拼接起來 - 如果啟用了
EOF
自動分包,那麼swoole
會檢測EOF
符號,拼接完畢資料之後再向worker
傳送資料 -
swProtocol_recv_check_eof
用於檢測EOF
符號,如果沒有檢測到資料就儲存到buffer
。
static int swPort_onRead_check_eof(swReactor *reactor, swListenPort *port, swEvent *event)
{
swConnection *conn = event->socket;
swProtocol *protocol = &port->protocol;
swServer *serv = reactor->ptr;
swString *buffer = swServer_get_buffer(serv, event->fd);
if (!buffer)
{
return SW_ERR;
}
if (swProtocol_recv_check_eof(protocol, conn, buffer) < 0)
{
swReactorThread_onClose(reactor, event);
}
return SW_OK;
}
static sw_inline swString *swServer_get_buffer(swServer *serv, int fd)
{
swString *buffer = serv->connection_list[fd].recv_buffer;
if (buffer == NULL)
{
buffer = swString_new(SW_BUFFER_SIZE_STD);
//alloc memory failed.
if (!buffer)
{
return NULL;
}
serv->connection_list[fd].recv_buffer = buffer;
}
return buffer;
}
swProtocol_recv_check_eof
檢測 EOF
- 首先需要呼叫
swConnection_recv
函式接受客戶端發來的資料,如果發生錯誤返回SW_OK
,等待socket
讀就緒重新讀取;如果錯誤是SW_CLOSE
,那麼就要返回SW_ERR
,然後讓swPort_onRead_check_eof
函式呼叫swReactorThread_onClose
函式。 -
EOF
自動分包也有兩種方式,分別是open_eof_check
和open_eof_split
,open_eof_check
只檢查接收資料的末尾是否為EOF
,因此它的效能最好,幾乎沒有消耗,但是無法解決多個資料包合併的問題,比如同時傳送兩條帶有EOF
的資料,底層可能會一次全部返回;open_eof_split
會從左到右對資料進行逐位元組對比,查詢資料中的EOF
進行分包,效能較差。但是每次只會返回一個資料包 - 如果採用
open_eof_check
,那麼只需要簡單的memcmp
對比資料包的最後字元即可,如果符合條件就會呼叫protocol->onPackage
函式,也就是swReactorThread_dispatch
- 如果採用的是
open_eof_split
就會比較麻煩,需要呼叫swProtocol_split_package_by_eof
逐個去找EOF
- 如果超過了
protocol->package_max_length
大小,那麼說明一直沒有傳送成功,就會返回錯誤,結束當前連線 - 如果緩衝區不足,那麼就將緩衝區擴容到
protocol->package_max_length
,繼續接受資料
int swProtocol_recv_check_eof(swProtocol *protocol, swConnection *conn, swString *buffer)
{
int recv_again = SW_FALSE;
int buf_size;
recv_data: buf_size = buffer->size - buffer->length;
char *buf_ptr = buffer->str + buffer->length;
if (buf_size > SW_BUFFER_SIZE_STD)
{
buf_size = SW_BUFFER_SIZE_STD;
}
int n = swConnection_recv(conn, buf_ptr, buf_size, 0);
if (n < 0)
{
switch (swConnection_error(errno))
{
case SW_ERROR:
swSysError("recv from socket#%d failed.", conn->fd);
return SW_OK;
case SW_CLOSE:
conn->close_errno = errno;
return SW_ERR;
default:
return SW_OK;
}
}
else if (n == 0)
{
return SW_ERR;
}
else
{
buffer->length += n;
if (buffer->length < protocol->package_eof_len)
{
return SW_OK;
}
if (protocol->split_by_eof)
{
if (swProtocol_split_package_by_eof(protocol, conn, buffer) == 0)
{
return SW_OK;
}
else
{
recv_again = SW_TRUE;
}
}
else if (memcmp(buffer->str + buffer->length - protocol->package_eof_len, protocol->package_eof, protocol->package_eof_len) == 0)
{
if (protocol->onPackage(conn, buffer->str, buffer->length) < 0)
{
return SW_ERR;
}
if (conn->removed)
{
return SW_OK;
}
swString_clear(buffer);
return SW_OK;
}
//over max length, will discard
if (buffer->length == protocol->package_max_length)
{
swWarn("Package is too big. package_length=%d", (int )buffer->length);
return SW_ERR;
}
//buffer is full, may have not read data
if (buffer->length == buffer->size)
{
recv_again = SW_TRUE;
if (buffer->size < protocol->package_max_length)
{
uint32_t extend_size = swoole_size_align(buffer->size * 2, SwooleG.pagesize);
if (extend_size > protocol->package_max_length)
{
extend_size = protocol->package_max_length;
}
if (swString_extend(buffer, extend_size) < 0)
{
return SW_ERR;
}
}
}
//no eof
if (recv_again)
{
goto recv_data;
}
}
return SW_OK;
}
swProtocol_split_package_by_eof
尋找 EOF
- 如果當前快取中資料連
package_eof_len
也就是EOF
的長度都不夠,那麼就直接返回,繼續接受資料 - 根據
package_eof
來查詢第一個EOF
的位置,如果沒有找到EOF
,那麼遞增buffer->offset
,返回繼續接受資料 - 找到了
EOF
之後,就要呼叫protocol->onPackage
函式,傳送給worker
程式 - 接著就要從剩餘的資料裡面迴圈不斷尋找
EOF
,呼叫protocol->onPackage
函式
static sw_inline int swProtocol_split_package_by_eof(swProtocol *protocol, swConnection *conn, swString *buffer)
{
#if SW_LOG_TRACE_OPEN > 0
static int count;
count++;
#endif
int eof_pos;
if (buffer->length - buffer->offset < protocol->package_eof_len)
{
eof_pos = -1;
}
else
{
eof_pos = swoole_strnpos(buffer->str + buffer->offset, buffer->length - buffer->offset, protocol->package_eof, protocol->package_eof_len);
}
swTraceLog(SW_TRACE_EOF_PROTOCOL, "#[0] count=%d, length=%ld, size=%ld, offset=%ld.", count, buffer->length, buffer->size, (long)buffer->offset);
//waiting for more data
if (eof_pos < 0)
{
buffer->offset = buffer->length - protocol->package_eof_len;
return buffer->length;
}
uint32_t length = buffer->offset + eof_pos + protocol->package_eof_len;
swTraceLog(SW_TRACE_EOF_PROTOCOL, "#[4] count=%d, length=%d", count, length);
if (protocol->onPackage(conn, buffer->str, length) < 0)
{
return SW_ERR;
}
if (conn->removed)
{
return SW_OK;
}
//there are remaining data
if (length < buffer->length)
{
uint32_t remaining_length = buffer->length - length;
char *remaining_data = buffer->str + length;
swTraceLog(SW_TRACE_EOF_PROTOCOL, "#[5] count=%d, remaining_length=%d", count, remaining_length);
while (1)
{
if (remaining_length < protocol->package_eof_len)
{
goto wait_more_data;
}
eof_pos = swoole_strnpos(remaining_data, remaining_length, protocol->package_eof, protocol->package_eof_len);
if (eof_pos < 0)
{
wait_more_data:
swTraceLog(SW_TRACE_EOF_PROTOCOL, "#[1] count=%d, remaining_length=%d, length=%d", count, remaining_length, length);
memmove(buffer->str, remaining_data, remaining_length);
buffer->length = remaining_length;
buffer->offset = 0;
return SW_OK;
}
else
{
length = eof_pos + protocol->package_eof_len;
if (protocol->onPackage(conn, remaining_data, length) < 0)
{
return SW_ERR;
}
if (conn->removed)
{
return SW_OK;
}
swTraceLog(SW_TRACE_EOF_PROTOCOL, "#[2] count=%d, remaining_length=%d, length=%d", count, remaining_length, length);
remaining_data += length;
remaining_length -= length;
}
}
}
swTraceLog(SW_TRACE_EOF_PROTOCOL, "#[3] length=%ld, size=%ld, offset=%ld", buffer->length, buffer->size, (long)buffer->offset);
swString_clear(buffer);
return SW_OK;
}
swPort_onRead_check_length
包長檢測
- 類似地本函式也是呼叫
swProtocol_recv_check_length
來進行包長檢測
static int swPort_onRead_check_length(swReactor *reactor, swListenPort *port, swEvent *event)
{
swServer *serv = reactor->ptr;
swConnection *conn = event->socket;
swProtocol *protocol = &port->protocol;
swString *buffer = swServer_get_buffer(serv, event->fd);
if (!buffer)
{
return SW_ERR;
}
if (swProtocol_recv_check_length(protocol, conn, buffer) < 0)
{
swTrace("Close Event.FD=%d|From=%d", event->fd, event->from_id);
swReactorThread_onClose(reactor, event);
}
return SW_OK;
}
swProtocol_recv_check_length
函式
- 進行包長檢測的時候,每次讀取資料之前都要先讀取
header
,從header
中獲取到資料包的大小後,再去讀取真正的資料 - 當我們不知道包長大小的時候,
buffer->offset
為 0,此時需要讀取length
大小,但是這個資料位於header
的protocol->package_length_offset
位置,假設length
位於header
的第 8 個位元組;length
自身資料大小為protocol->package_length_size
,例如int_32
型別,這個值就是 4,因此我們需要先讀取 12 個位元組,這 12 個位元組的最後 4 個位元組就是length
的值,也就是包長。 -
將資料拿到後(此時
recv_wait
為 0),呼叫protocol->get_package_length
就可以獲取length
的值,根據buffer->offset
的值為包長值,- 如果此時
buffer->length
已接收的資料大於這個包長,那麼就呼叫onPackage
傳送給worker
程式 -
如果此時已接收的資料不足,那麼
recv_size
就是剩餘需要接受的資料大小,此時recv_wait
為 1,繼續接受資料- 如果接受到的資料已經大於包長,那麼就呼叫
onPackage
傳送。之後如果仍然有剩餘未傳送的資料,那麼就do_get_length
;如果已經沒有剩餘資料了,繼續去取下一個資料包。 - 如果資料還是不夠,那麼就返回,等待讀就緒事件
- 如果接受到的資料已經大於包長,那麼就呼叫
- 如果此時
int swProtocol_recv_check_length(swProtocol *protocol, swConnection *conn, swString *buffer)
{
int package_length;
uint32_t recv_size;
char swap[SW_BUFFER_SIZE_STD];
if (conn->skip_recv)
{
conn->skip_recv = 0;
goto do_get_length;
}
do_recv:
if (conn->active == 0)
{
return SW_OK;
}
if (buffer->offset > 0)
{
recv_size = buffer->offset - buffer->length;
}
else
{
recv_size = protocol->package_length_offset + protocol->package_length_size;
}
int n = swConnection_recv(conn, buffer->str + buffer->length, recv_size, 0);
if (n < 0)
{
switch (swConnection_error(errno))
{
case SW_ERROR:
swSysError("recv(%d, %d) failed.", conn->fd, recv_size);
return SW_OK;
case SW_CLOSE:
conn->close_errno = errno;
return SW_ERR;
default:
return SW_OK;
}
}
else if (n == 0)
{
return SW_ERR;
}
else
{
buffer->length += n;
if (conn->recv_wait)
{
if (buffer->length >= buffer->offset)
{
do_dispatch:
if (protocol->onPackage(conn, buffer->str, buffer->offset) < 0)
{
return SW_ERR;
}
if (conn->removed)
{
return SW_OK;
}
conn->recv_wait = 0;
int remaining_length = buffer->length - buffer->offset;
if (remaining_length > 0)
{
assert(remaining_length < sizeof(swap));
memcpy(swap, buffer->str + buffer->offset, remaining_length);
memcpy(buffer->str, swap, remaining_length);
buffer->offset = 0;
buffer->length = remaining_length;
goto do_get_length;
}
else
{
swString_clear(buffer);
goto do_recv;
}
}
else
{
return SW_OK;
}
}
else
{
do_get_length: package_length = protocol->get_package_length(protocol, conn, buffer->str, buffer->length);
//invalid package, close connection.
if (package_length < 0)
{
return SW_ERR;
}
//no length
else if (package_length == 0)
{
return SW_OK;
}
else if (package_length > protocol->package_max_length)
{
swWarn("package is too big, remote_addr=%s:%d, length=%d.", swConnection_get_ip(conn), swConnection_get_port(conn), package_length);
return SW_ERR;
}
//get length success
else
{
if (buffer->size < package_length)
{
if (swString_extend(buffer, package_length) < 0)
{
return SW_ERR;
}
}
conn->recv_wait = 1;
buffer->offset = package_length;
if (buffer->length >= package_length)
{
goto do_dispatch;
}
else
{
goto do_recv;
}
}
}
}
return SW_OK;
}
swProtocol_get_package_length
獲取包長
本函式邏輯很簡單,如果長度連 length
都不夠,那麼包長資訊並不在 data
中,直接返回繼續接受資料。拿到 length
後,要用 swoole_unpack
函式轉化為相應的型別即可得到包長值。
int swProtocol_get_package_length(swProtocol *protocol, swConnection *conn, char *data, uint32_t size)
{
uint16_t length_offset = protocol->package_length_offset;
int32_t body_length;
/**
* no have length field, wait more data
*/
if (size < length_offset + protocol->package_length_size)
{
return 0;
}
body_length = swoole_unpack(protocol->package_length_type, data + length_offset);
//Length error
//Protocol length is not legitimate, out of bounds or exceed the allocated length
if (body_length < 0)
{
swWarn("invalid package, remote_addr=%s:%d, length=%d, size=%d.", swConnection_get_ip(conn), swConnection_get_port(conn), body_length, size);
return SW_ERR;
}
//total package length
return protocol->package_body_offset + body_length;
}
static sw_inline int32_t swoole_unpack(char type, void *data)
{
switch(type)
{
/*-------------------------16bit-----------------------------*/
case `c`:
return *((int8_t *) data);
case `C`:
return *((uint8_t *) data);
/*-------------------------16bit-----------------------------*/
/**
* signed short (always 16 bit, machine byte order)
*/
case `s`:
return *((int16_t *) data);
/**
* unsigned short (always 16 bit, machine byte order)
*/
case `S`:
return *((uint16_t *) data);
/**
* unsigned short (always 16 bit, big endian byte order)
*/
case `n`:
return ntohs(*((uint16_t *) data));
/**
* unsigned short (always 32 bit, little endian byte order)
*/
case `v`:
return swoole_swap_endian16(ntohs(*((uint16_t *) data)));
/*-------------------------32bit-----------------------------*/
/**
* unsigned long (always 32 bit, machine byte order)
*/
case `L`:
return *((uint32_t *) data);
/**
* signed long (always 32 bit, machine byte order)
*/
case `l`:
return *((int *) data);
/**
* unsigned long (always 32 bit, big endian byte order)
*/
case `N`:
return ntohl(*((uint32_t *) data));
/**
* unsigned short (always 32 bit, little endian byte order)
*/
case `V`:
return swoole_swap_endian32(ntohl(*((uint32_t *) data)));
default:
return *((uint32_t *) data);
}
}
swReactorThread_onPipeWrite
寫事件回撥
- 當
reactor
執行緒檢測到相對應的worker
程式的pipe_master
寫就緒的時候,就會呼叫swReactorThread_onPipeWrite
- 當
in_buffer
不是空的話,就會迴圈拿出單連結串列的資料,呼叫swServer_connection_verify
驗證session_id
是否正確,然後呼叫write
傳送資料 - 當返回的錯誤是
EAGAIN
的時候,說明socket
已經不可用,返回等待下一次寫就緒即可 - 值得注意的是
write
的返回結果不需要關心到底寫入了多少,因為對於linux
來說,pipe
可以保證write
小於PIPE_BUF
大小資料的原子性,不是全部寫入成功,就是寫入失敗,不會出現寫入部分資料的可能。 - 當所有的資料都傳送成功後,取消寫就緒監控,防止重複浪費呼叫
static int swReactorThread_onPipeWrite(swReactor *reactor, swEvent *ev)
{
int ret;
swBuffer_trunk *trunk = NULL;
swEventData *send_data;
swConnection *conn;
swServer *serv = reactor->ptr;
swBuffer *buffer = serv->connection_list[ev->fd].in_buffer;
swLock *lock = serv->connection_list[ev->fd].object;
//lock thread
lock->lock(lock);
while (!swBuffer_empty(buffer))
{
trunk = swBuffer_get_trunk(buffer);
send_data = trunk->store.ptr;
//server active close, discard data.
if (swEventData_is_stream(send_data->info.type))
{
//send_data->info.fd is session_id
conn = swServer_connection_verify(serv, send_data->info.fd);
if (conn == NULL || conn->closed)
{
#ifdef SW_USE_RINGBUFFER
swReactorThread *thread = swServer_get_thread(SwooleG.serv, SwooleTG.id);
swPackage package;
memcpy(&package, send_data->data, sizeof(package));
thread->buffer_input->free(thread->buffer_input, package.data);
#endif
if (conn && conn->closed)
{
swoole_error_log(SW_LOG_NOTICE, SW_ERROR_SESSION_CLOSED_BY_SERVER, "Session#%d is closed by server.", send_data->info.fd);
}
swBuffer_pop_trunk(buffer, trunk);
continue;
}
}
ret = write(ev->fd, trunk->store.ptr, trunk->length);
if (ret < 0)
{
//release lock
lock->unlock(lock);
#ifdef HAVE_KQUEUE
return (errno == EAGAIN || errno == ENOBUFS) ? SW_OK : SW_ERR;
#else
return errno == EAGAIN ? SW_OK : SW_ERR;
#endif
}
else
{
swBuffer_pop_trunk(buffer, trunk);
}
}
//remove EPOLLOUT event
if (swBuffer_empty(buffer))
{
if (SwooleG.serv->connection_list[ev->fd].from_id == SwooleTG.id)
{
ret = reactor->set(reactor, ev->fd, SW_FD_PIPE | SW_EVENT_READ);
}
else
{
ret = reactor->del(reactor, ev->fd);
}
if (ret < 0)
{
swSysError("reactor->set(%d) failed.", ev->fd);
}
}
//release lock
lock->unlock(lock);
return SW_OK;
}
swReactorThread_onPipeReceive
讀事件就緒
- 從
worker
程式返回的資料有三種:SW_RESPONSE_SMALL
(少量資料)、SW_RESPONSE_SHM
(大資料包儲存在共享記憶體中)、SW_RESPONSE_TMPFILE
(臨時檔案) - 需要將從
worker
接受到的swEventData
物件轉化為swSendData
- 對於大資料包,
worker
並不會將資料通過socket
來傳遞,而是將work_id
傳送過來,資料存放在worker->send_shm
中 - 如果是臨時檔案,
worker
傳送過來的資料是臨時檔案的名字,需要呼叫swTaskWorker_large_unpack
將檔案內容讀取到SwooleTG.buffer_stack
中去 -
swReactorThread_send
函式用於向客戶端傳送資料
typedef struct _swSendData
{
swDataHead info;
/**
* for big package
*/
uint32_t length;
char *data;
} swSendData;
typedef struct
{
int length;
int worker_id;
} swPackage_response;
static int swReactorThread_onPipeReceive(swReactor *reactor, swEvent *ev)
{
int n;
swEventData resp;
swSendData _send;
swPackage_response pkg_resp;
swWorker *worker;
#ifdef SW_REACTOR_RECV_AGAIN
while (1)
#endif
{
n = read(ev->fd, &resp, sizeof(resp));
if (n > 0)
{
memcpy(&_send.info, &resp.info, sizeof(resp.info));
//pipe data
if (_send.info.from_fd == SW_RESPONSE_SMALL)
{
_send.data = resp.data;
_send.length = resp.info.len;
swReactorThread_send(&_send);
}
//use send shm
else if (_send.info.from_fd == SW_RESPONSE_SHM)
{
memcpy(&pkg_resp, resp.data, sizeof(pkg_resp));
worker = swServer_get_worker(SwooleG.serv, pkg_resp.worker_id);
_send.data = worker->send_shm;
_send.length = pkg_resp.length;
swReactorThread_send(&_send);
worker->lock.unlock(&worker->lock);
}
//use tmp file
else if (_send.info.from_fd == SW_RESPONSE_TMPFILE)
{
swString *data = swTaskWorker_large_unpack(&resp);
if (data == NULL)
{
return SW_ERR;
}
_send.data = data->str;
_send.length = data->length;
swReactorThread_send(&_send);
}
//reactor thread exit
else if (_send.info.from_fd == SW_RESPONSE_EXIT)
{
reactor->running = 0;
return SW_OK;
}
//will never be here
else
{
abort();
}
}
else if (errno == EAGAIN)
{
return SW_OK;
}
else
{
swWarn("read(worker_pipe) failed. Error: %s[%d]", strerror(errno), errno);
return SW_ERR;
}
}
return SW_OK;
}
static sw_inline swString* swTaskWorker_large_unpack(swEventData *task_result)
{
swPackage_task _pkg;
memcpy(&_pkg, task_result->data, sizeof(_pkg));
int tmp_file_fd = open(_pkg.tmpfile, O_RDONLY);
if (tmp_file_fd < 0)
{
swSysError("open(%s) failed.", _pkg.tmpfile);
return NULL;
}
if (SwooleTG.buffer_stack->size < _pkg.length && swString_extend_align(SwooleTG.buffer_stack, _pkg.length) < 0)
{
close(tmp_file_fd);
return NULL;
}
if (swoole_sync_readfile(tmp_file_fd, SwooleTG.buffer_stack->str, _pkg.length) < 0)
{
close(tmp_file_fd);
return NULL;
}
close(tmp_file_fd);
if (!(swTask_type(task_result) & SW_TASK_PEEK))
{
unlink(_pkg.tmpfile);
}
SwooleTG.buffer_stack->length = _pkg.length;
return SwooleTG.buffer_stack;
}
swReactorThread_send
函式
- 首先要獲取連線的
session_id
,利用session_id
獲取swConnection
物件,進而拿到負責該連線的reactor
物件 -
SW_EVENT_CONFIRM
代表worker
確認接收該連線(當服務端使用enable_delay_receive
選項時) - 當呼叫
swoole_server->pause
函式時,BASE
模式會呼叫本函式,將不會讀取客戶端資料,去除reactor
對讀就緒事件的監聽 - 類似地
swoole_server->resume
函式用於恢復當前連線,重新將讀就緒放入reactor
的監聽事件中 - 如果
conn->out_buffer
為空,那麼就嘗試向socket
寫資料,如果沒有全部寫入成功,那麼就將資料放入conn->out_buffer
中去,並開啟事件監聽 - 如果
conn->out_buffe
資料量過大,需要設定conn->high_watermark
為 1,呼叫onBufferFull
回撥
int swReactorThread_send(swSendData *_send)
{
swServer *serv = SwooleG.serv;
uint32_t session_id = _send->info.fd;
void *_send_data = _send->data;
uint32_t _send_length = _send->length;
swConnection *conn;
if (_send->info.type != SW_EVENT_CLOSE)
{
conn = swServer_connection_verify(serv, session_id);
}
else
{
conn = swServer_connection_verify_no_ssl(serv, session_id);
}
int fd = conn->fd;
swReactor *reactor;
{
reactor = &(serv->reactor_threads[conn->from_id].reactor);
assert(fd % serv->reactor_num == reactor->id);
assert(fd % serv->reactor_num == SwooleTG.id);
}
/**
* Reset send buffer, Immediately close the connection.
*/
if (_send->info.type == SW_EVENT_CLOSE && (conn->close_reset || conn->removed))
{
goto close_fd;
}
else if (_send->info.type == SW_EVENT_CONFIRM)
{
reactor->add(reactor, conn->fd, conn->fdtype | SW_EVENT_READ);
conn->listen_wait = 0;
return SW_OK;
}
/**
* pause recv data
*/
else if (_send->info.type == SW_EVENT_PAUSE_RECV)
{
if (conn->events & SW_EVENT_WRITE)
{
return reactor->set(reactor, conn->fd, conn->fdtype | SW_EVENT_WRITE);
}
else
{
return reactor->del(reactor, conn->fd);
}
}
/**
* resume recv data
*/
else if (_send->info.type == SW_EVENT_RESUME_RECV)
{
if (conn->events & SW_EVENT_WRITE)
{
return reactor->set(reactor, conn->fd, conn->fdtype | SW_EVENT_READ | SW_EVENT_WRITE);
}
else
{
return reactor->add(reactor, conn->fd, conn->fdtype | SW_EVENT_READ);
}
}
if (swBuffer_empty(conn->out_buffer))
{
/**
* close connection.
*/
if (_send->info.type == SW_EVENT_CLOSE)
{
close_fd:
reactor->close(reactor, fd);
return SW_OK;
}
#ifdef SW_REACTOR_SYNC_SEND
//Direct send
if (_send->info.type != SW_EVENT_SENDFILE)
{
if (!conn->direct_send)
{
goto buffer_send;
}
int n;
direct_send:
n = swConnection_send(conn, _send_data, _send_length, 0);
if (n == _send_length)
{
return SW_OK;
}
else if (n > 0)
{
_send_data += n;
_send_length -= n;
goto buffer_send;
}
else if (errno == EINTR)
{
goto direct_send;
}
else
{
goto buffer_send;
}
}
#endif
//buffer send
else
{
#ifdef SW_REACTOR_SYNC_SEND
buffer_send:
#endif
if (!conn->out_buffer)
{
conn->out_buffer = swBuffer_new(SW_BUFFER_SIZE);
if (conn->out_buffer == NULL)
{
return SW_ERR;
}
}
}
}
swBuffer_trunk *trunk;
//close connection
if (_send->info.type == SW_EVENT_CLOSE)
{
trunk = swBuffer_new_trunk(conn->out_buffer, SW_CHUNK_CLOSE, 0);
trunk->store.data.val1 = _send->info.type;
}
//sendfile to client
else if (_send->info.type == SW_EVENT_SENDFILE)
{
swSendFile_request *req = (swSendFile_request *) _send_data;
swConnection_sendfile(conn, req->filename, req->offset, req->length);
}
//send data
else
{
//connection is closed
if (conn->removed)
{
swWarn("connection#%d is closed by client.", fd);
return SW_ERR;
}
//connection output buffer overflow
if (conn->out_buffer->length >= conn->buffer_size)
{
if (serv->send_yield)
{
SwooleG.error = SW_ERROR_OUTPUT_BUFFER_OVERFLOW;
}
else
{
swoole_error_log(SW_LOG_WARNING, SW_ERROR_OUTPUT_BUFFER_OVERFLOW, "connection#%d output buffer overflow.", fd);
}
conn->overflow = 1;
if (serv->onBufferEmpty && serv->onBufferFull == NULL)
{
conn->high_watermark = 1;
}
}
int _length = _send_length;
void* _pos = _send_data;
int _n;
//buffer enQueue
while (_length > 0)
{
_n = _length >= SW_BUFFER_SIZE_BIG ? SW_BUFFER_SIZE_BIG : _length;
swBuffer_append(conn->out_buffer, _pos, _n);
_pos += _n;
_length -= _n;
}
swListenPort *port = swServer_get_port(serv, fd);
if (serv->onBufferFull && conn->high_watermark == 0 && conn->out_buffer->length >= port->buffer_high_watermark)
{
swServer_tcp_notify(serv, conn, SW_EVENT_BUFFER_FULL);
conn->high_watermark = 1;
}
}
//listen EPOLLOUT event
if (reactor->set(reactor, fd, SW_EVENT_TCP | SW_EVENT_WRITE | SW_EVENT_READ) < 0
&& (errno == EBADF || errno == ENOENT))
{
goto close_fd;
}
return SW_OK;
}
swConnection_sendfile
傳送檔案
對於檔案的傳送,swoole
將檔案的資訊儲存在 swTask_sendfile
物件中,然後將其放入 conn->out_buffer
中。
typedef struct {
char *filename;
uint16_t name_len;
int fd;
size_t length;
off_t offset;
} swTask_sendfile;
int swConnection_sendfile(swConnection *conn, char *filename, off_t offset, size_t length)
{
if (conn->out_buffer == NULL)
{
conn->out_buffer = swBuffer_new(SW_BUFFER_SIZE);
if (conn->out_buffer == NULL)
{
return SW_ERR;
}
}
swBuffer_trunk error_chunk;
swTask_sendfile *task = sw_malloc(sizeof(swTask_sendfile));
if (task == NULL)
{
swWarn("malloc for swTask_sendfile failed.");
return SW_ERR;
}
bzero(task, sizeof(swTask_sendfile));
task->filename = sw_strdup(filename);
int file_fd = open(filename, O_RDONLY);
if (file_fd < 0)
{
sw_free(task->filename);
sw_free(task);
swSysError("open(%s) failed.", filename);
return SW_OK;
}
task->fd = file_fd;
task->offset = offset;
struct stat file_stat;
if (fstat(file_fd, &file_stat) < 0)
{
swSysError("fstat(%s) failed.", filename);
error_chunk.store.ptr = task;
swConnection_sendfile_destructor(&error_chunk);
return SW_ERR;
}
if (offset < 0 || (length + offset > file_stat.st_size))
{
swoole_error_log(SW_LOG_WARNING, SW_ERROR_INVALID_PARAMS, "length or offset is invalid.");
error_chunk.store.ptr = task;
swConnection_sendfile_destructor(&error_chunk);
return SW_OK;
}
if (length == 0)
{
task->length = file_stat.st_size;
}
else
{
task->length = length + offset;
}
swBuffer_trunk *chunk = swBuffer_new_trunk(conn->out_buffer, SW_CHUNK_SENDFILE, 0);
if (chunk == NULL)
{
swWarn("get out_buffer trunk failed.");
error_chunk.store.ptr = task;
swConnection_sendfile_destructor(&error_chunk);
return SW_ERR;
}
chunk->store.ptr = (void *) task;
chunk->destroy = swConnection_sendfile_destructor;
return SW_OK;
}
swConnection_onSendfile
向客戶端傳送檔案
-
HAVE_TCP_NOPUSH
是避免TCP
延遲接受的一種方法,為了避免Nagle
演算法造成的延遲,我們需要設定TCP_NODELAY
選項和TCP_CORK
選項來避免延遲接受和合並資料包(詳情可以看 Nagle 演算法與 TCP socket 選項 TCP_CORK) - 獲取到
sendn
後,就要呼叫swoole_sendfile
讀取檔案內容,傳送資料 - 傳送資料結束後,再將
TCP_CORK
設定為 0
static sw_inline int swSocket_tcp_nopush(int sock, int nopush)
{
return setsockopt(sock, IPPROTO_TCP, TCP_CORK, (const void *) &nopush, sizeof(int));
}
int swConnection_onSendfile(swConnection *conn, swBuffer_trunk *chunk)
{
int ret;
swTask_sendfile *task = chunk->store.ptr;
#ifdef HAVE_TCP_NOPUSH
if (task->offset == 0 && conn->tcp_nopush == 0)
{
/**
* disable tcp_nodelay
*/
if (conn->tcp_nodelay)
{
int tcp_nodelay = 0;
if (setsockopt(conn->fd, IPPROTO_TCP, TCP_NODELAY, (const void *) &tcp_nodelay, sizeof(int)) == -1)
{
swWarn("setsockopt(TCP_NODELAY) failed. Error: %s[%d]", strerror(errno), errno);
}
}
/**
* enable tcp_nopush
*/
if (swSocket_tcp_nopush(conn->fd, 1) == -1)
{
swWarn("swSocket_tcp_nopush() failed. Error: %s[%d]", strerror(errno), errno);
}
conn->tcp_nopush = 1;
}
#endif
int sendn = (task->length - task->offset > SW_SENDFILE_CHUNK_SIZE) ? SW_SENDFILE_CHUNK_SIZE : task->length - task->offset;
{
ret = swoole_sendfile(conn->fd, task->fd, &task->offset, sendn);
}
swTrace("ret=%d|task->offset=%ld|sendn=%d|filesize=%ld", ret, (long)task->offset, sendn, task->length);
if (ret <= 0)
{
switch (swConnection_error(errno))
{
case SW_ERROR:
swSysError("sendfile(%s, %ld, %d) failed.", task->filename, (long)task->offset, sendn);
swBuffer_pop_trunk(conn->out_buffer, chunk);
return SW_OK;
case SW_CLOSE:
conn->close_wait = 1;
return SW_ERR;
case SW_WAIT:
conn->send_wait = 1;
return SW_ERR;
default:
break;
}
}
//sendfile finish
if (task->offset >= task->length)
{
swBuffer_pop_trunk(conn->out_buffer, chunk);
#ifdef HAVE_TCP_NOPUSH
/**
* disable tcp_nopush
*/
if (swSocket_tcp_nopush(conn->fd, 0) == -1)
{
swWarn("swSocket_tcp_nopush() failed. Error: %s[%d]", strerror(errno), errno);
}
conn->tcp_nopush = 0;
/**
* enable tcp_nodelay
*/
if (conn->tcp_nodelay)
{
int value = 1;
if (setsockopt(conn->fd, IPPROTO_TCP, TCP_NODELAY, (const void *) &value, sizeof(int)) == -1)
{
swWarn("setsockopt(TCP_NODELAY) failed. Error: %s[%d]", strerror(errno), errno);
}
}
#endif
}
return SW_OK;
}
int swoole_sendfile(int out_fd, int in_fd, off_t *offset, size_t size)
{
char buf[SW_BUFFER_SIZE_BIG];
int readn = size > sizeof(buf) ? sizeof(buf) : size;
int ret;
int n = pread(in_fd, buf, readn, *offset);
if (n > 0)
{
ret = write(out_fd, buf, n);
if (ret < 0)
{
swSysError("write() failed.");
}
else
{
*offset += ret;
}
return ret;
}
else
{
swSysError("pread() failed.");
return SW_ERR;
}
}