Node.js Writable Stream的實現簡析

菠蘿小蘿蔔發表於2018-03-23

作者:肖磊

個人主頁:github

可寫流是對資料寫入“目的地”的一種抽象,可作為可讀流的一種消費者。資料來源可能多種多樣,如果使用了可寫流來完成資料的消費,那麼就有可寫流的內部機制來控制資料在生產及消費過程中的各狀態的扭轉等。

Node.js Writable Stream的實現簡析

首先來看下可寫流內部幾個比較關鍵的狀態:

function WritableState(options, stream) {
  options = options || {};

  // Duplex streams are both readable and writable, but share
  // the same options object.
  // However, some cases require setting options to different
  // values for the readable and the writable sides of the duplex stream.
  // These options can be provided separately as readableXXX and writableXXX.
  var isDuplex = stream instanceof Stream.Duplex;

  // object stream flag to indicate whether or not this stream
  // contains buffers or objects.
  this.objectMode = !!options.objectMode;

  if (isDuplex)
    this.objectMode = this.objectMode || !!options.writableObjectMode;

  // the point at which write() starts returning false
  // Note: 0 is a valid value, means that we always return false if
  // the entire buffer is not flushed immediately on write()
  var hwm = options.highWaterMark;
  var writableHwm = options.writableHighWaterMark;
  var defaultHwm = this.objectMode ? 16 : 16 * 1024;

  if (hwm || hwm === 0)
    this.highWaterMark = hwm;
  else if (isDuplex && (writableHwm || writableHwm === 0))
    this.highWaterMark = writableHwm;
  else
    this.highWaterMark = defaultHwm;

  // cast to ints.
  this.highWaterMark = Math.floor(this.highWaterMark);

  // if _final has been called
  this.finalCalled = false;

  // drain event flag.
  this.needDrain = false;
  // at the start of calling end()
  this.ending = false;
  // when end() has been called, and returned
  this.ended = false;
  // when 'finish' is emitted
  this.finished = false;

  // has it been destroyed
  this.destroyed = false;

  // should we decode strings into buffers before passing to _write?
  // this is here so that some node-core streams can optimize string
  // handling at a lower level.
  var noDecode = options.decodeStrings === false;
  this.decodeStrings = !noDecode;

  // Crypto is kind of old and crusty.  Historically, its default string
  // encoding is 'binary' so we have to make this configurable.
  // Everything else in the universe uses 'utf8', though.
  this.defaultEncoding = options.defaultEncoding || 'utf8';

  // not an actual buffer we keep track of, but a measurement
  // of how much we're waiting to get pushed to some underlying
  // socket or file.
  // 不是真實buffer的長度,而是等待被寫入檔案或者socket等的資料的長度
  this.length = 0;

  // a flag to see when we're in the middle of a write.
  this.writing = false;

  // when true all writes will be buffered until .uncork() call
  this.corked = 0;

  // a flag to be able to tell if the onwrite cb is called immediately,
  // or on a later tick.  We set this to true at first, because any
  // actions that shouldn't happen until "later" should generally also
  // not happen before the first write call.
  this.sync = true;

  // a flag to know if we're processing previously buffered items, which
  // may call the _write() callback in the same tick, so that we don't
  // end up in an overlapped onwrite situation.
  this.bufferProcessing = false;

  // the callback that's passed to _write(chunk,cb)
  // onwrite偏函式,stream始終作為一個引數
  this.onwrite = onwrite.bind(undefined, stream);

  // the callback that the user supplies to write(chunk,encoding,cb)
  this.writecb = null;

  // the amount that is being written when _write is called.
  this.writelen = 0;

  // 快取池中的頭結點
  this.bufferedRequest = null;
  // 快取池中的尾結點
  this.lastBufferedRequest = null;

  // number of pending user-supplied write callbacks
  // this must be 0 before 'finish' can be emitted
  this.pendingcb = 0;

  // emit prefinish if the only thing we're waiting for is _write cbs
  // This is relevant for synchronous Transform streams
  this.prefinished = false;

  // True if the error was already emitted and should not be thrown again
  this.errorEmitted = false;

  // count buffered requests
  this.bufferedRequestCount = 0;

  // allocate the first CorkedRequest, there is always
  // one allocated and free to use, and we maintain at most two
  var corkReq = { next: null, entry: null, finish: undefined };
  corkReq.finish = onCorkedFinish.bind(undefined, corkReq, this);
  this.corkedRequestsFree = corkReq;
}
複製程式碼

在實現的可寫流當中必須要定義一個write方法,在可寫流內部,這個方法會被賦值給一個內部_write方法,主要是在資料被消費的時候呼叫:

const { Writable } = require('stream')

const ws = new Writable({
  write (chunk, encoding, cb) {
    // chunk 即要被消費的資料
    // encoding為編碼方式
    // cb為內部實現的一個onwrite方法,上面說的狀態定義裡面有關於這個說明,主要是在完成一次消費後需要手動呼叫這個cb方法來扭轉內部狀態,下面會專門講解這個方法
  }
})
複製程式碼

可寫流對開發者暴露了一個write方法,這個方法用於接收資料來源的資料,同時來完成資料向消費者的傳遞或者是將資料暫存於緩衝區當中。

讓我們來看下一個簡單的例子:

function writeOneMillionTimes(writer, data, encoding, callback) {
  let i = 1000000;
  write();
  function write() {
    let ok = true;
    do {
      i--;
      if (i === 0) {
        // 最後 一次
        writer.write(data, encoding, callback);
      } else {
        // 檢查是否可以繼續寫入。 
        // 這裡不要傳遞 callback, 因為寫入還沒有結束! 
        ok = writer.write(data, encoding);
      }
    } while (i > 0 && ok);
    if (i > 0) {
      // 不得不提前停下!
      // 當 'drain' 事件觸發後繼續寫入  
      writer.once('drain', write);
    }
  }
}

const { Writable } = require('stream')
const ws = new Writable({
  write (chunk, encoding, cb) {
    // do something to consume the chunk
  }
})

writeOneMillionTimes(ws, 'aaaaaa', 'utf8', function () {
  console.log('this is Writable')
})
複製程式碼

程式開始後,首先可寫流呼叫writer.write方法,將資料data傳入到可寫流當中,然後可寫流內部來判斷將資料是直接提供給資料消費者還是暫時先存放到緩衝區。

Writable.prototype.write = function (data, encoding, callback) {
  var state = this._writableState;
  // 是否可向可寫流當中繼續寫入資料
  var ret = false;
  var isBuf = !state.objectMode && Stream._isUint8Array(chunk);

  // 轉化成buffer
  if (isBuf && Object.getPrototypeOf(chunk) !== Buffer.prototype) {
    chunk = Stream._uint8ArrayToBuffer(chunk);
  }

  // 對於可選引數的處理
  if (typeof encoding === 'function') {
    cb = encoding;
    encoding = null;
  }

  // 編碼
  if (isBuf)
    encoding = 'buffer';
  else if (!encoding)
    encoding = state.defaultEncoding;

  if (typeof cb !== 'function')
    cb = nop;

  // 如果已經停止了向資料消費者繼續提供資料
  if (state.ended)
    writeAfterEnd(this, cb);
  else if (isBuf || validChunk(this, state, chunk, cb)) {
    state.pendingcb++;
    // 是將資料直接提供給消費者還是暫時存放到緩衝區
    ret = writeOrBuffer(this, state, isBuf, chunk, encoding, cb);
  }

  return ret;
}

function writeOrBuffer (stream, state, isBuf, chunk, encoding, cb) {
  ...
  var len = state.objectMode ? 1 : chunk.length;

  state.length += len;

  var ret = state.length < state.highWaterMark;
  // we must ensure that previous needDrain will not be reset to false.
  // 如果state.length長度大於hwm,將needDrain置為true,需要觸發drain事件,開發者通過監聽這個事件可以重新恢復可寫流對於資料來源的獲取
  if (!ret)
    state.needDrain = true;

  // state.writing 代表現在可寫流正處於將資料傳遞給消費者使用的狀態
  // 或 當前處於corked狀態時,就將資料寫入buffer緩衝區內
  // writeable的buffer緩衝區也是連結串列結構
  if (state.writing || state.corked) {
    var last = state.lastBufferedRequest;
    state.lastBufferedRequest = {
      chunk,
      encoding,
      isBuf,
      callback: cb,
      next: null
    };
    if (last) {
      last.next = state.lastBufferedRequest;
    } else {
      state.bufferedRequest = state.lastBufferedRequest;
    }
    state.bufferedRequestCount += 1;
  } else {
    // 將資料寫入底層資料即傳遞給消費者
    doWrite(stream, state, false, len, chunk, encoding, cb);
  }

  return ret;
}


function doWrite(stream, state, writev, len, chunk, encoding, cb) {
  // chunk的資料長度
  state.writelen = len;
  // chunk傳遞給消費者後的回撥函式
  state.writecb = cb;
  // 可寫流正在將資料傳遞給消費者的狀態
  state.writing = true;
  // 同步態
  state.sync = true;
  // 如果定義了writev批量寫入資料資料的就呼叫此方法
  if (writev)
    stream._writev(chunk, state.onwrite);
  else
  // 這個方法即完成將資料傳遞給消費者,並傳入onwrite回撥,這個onwrite函式必須要呼叫來告知寫資料是完成還是失敗
  // 這3個引數也對應著上面提到的在自定義實現可寫流時需要定義的write方法所接受的3個引數
  // 可寫流向消費者提供資料是同步的,但是消費者拿到資料後同步可寫流的狀態可能是同步,也可能是非同步的
    stream._write(chunk, encoding, state.onwrite);
  state.sync = false;
}
複製程式碼

doWrite方法中呼叫了開發者定義的write方法來完成資料的消費,即stream._write(),同時也提到了關於當資料被消費完了後需要呼叫state.onwrite這個方法來同步可寫流的狀態。接下來就來看下這個方法的內部實現:

// 完成一次_write方法後,更新相關的state狀態
function onwriteStateUpdate(state) {
  state.writing = false;  // 已經寫完資料
  state.writecb = null;   // 回撥
  state.length -= state.writelen;
  state.writelen = 0;     // 需要被寫入資料的長度
}

// 資料被寫入底層資源後必須要呼叫這個callback,其中stream是被作為預設函式,可引數上面Writeable中關於onwrite的定義
function onwrite(stream, er) {
  var state = stream._writableState;
  var sync = state.sync;
  var cb = state.writecb;

  // 首先更新可寫流的狀態
  onwriteStateUpdate(state);

  if (er)
    onwriteError(stream, state, sync, er, cb);
  else {
    // Check if we're actually ready to finish, but don't emit yet
    // 檢驗是否要結束這個writeable的流
    var finished = needFinish(state);

    // 每次寫完一次資料後都需要檢驗
    // 如果finished代表可寫流裡面還儲存著有資料,那麼需要呼叫clearBuffer,將可寫流的緩衝區的資料提供給消費者
    if (!finished &&
        !state.corked &&
        !state.bufferProcessing &&
        state.bufferedRequest) {
      clearBuffer(stream, state);
    }

    // 始終是非同步的呼叫afterWrite方法
    if (sync) {
      process.nextTick(afterWrite, stream, state, finished, cb);
    } else {
      afterWrite(stream, state, finished, cb);
    }
  }
}

function afterWrite(stream, state, finished, cb) {
  if (!finished)
    onwriteDrain(stream, state);
  state.pendingcb--;
  cb();
  finishMaybe(stream, state);
}

// 是否要結束這個writeable的流,需要將內部緩衝區的資料全部寫入底層資源池
function needFinish(state) {
  return (state.ending &&
          state.length === 0 &&
          state.bufferedRequest === null &&
          !state.finished &&
          !state.writing);
}

// if there's something in the buffer waiting, then process it
// 內部遞迴呼叫doWrite方法來完成將資料從緩衝區傳遞給消費者
function clearBuffer(stream, state) {
  // 這個欄位代表正在處理緩衝區buffer
  state.bufferProcessing = true;
  var entry = state.bufferedRequest;

  // 在定義了writev方法的情況下才可能呼叫,批量將資料傳遞給消費者
  if (stream._writev && entry && entry.next) {
    // Fast case, write everything using _writev()
    ...
  } else {
    // Slow case, write chunks one-by-one
    // 一個一個將資料傳遞給消費者
    while (entry) {
      var chunk = entry.chunk;
      var encoding = entry.encoding;
      var cb = entry.callback;
      var len = state.objectMode ? 1 : chunk.length;

      // 繼續將緩衝區的資料提供給消費者
      doWrite(stream, state, false, len, chunk, encoding, cb);
      entry = entry.next;
      state.bufferedRequestCount--;
      // if we didn't call the onwrite immediately, then
      // it means that we need to wait until it does.
      // also, that means that the chunk and cb are currently
      // being processed, so move the buffer counter past them.
      if (state.writing) {
        break;
      }
    }

    if (entry === null)
      state.lastBufferedRequest = null;
  }

  state.bufferedRequest = entry;
  // 緩衝區buffer已經處理完
  state.bufferProcessing = false;
}
複製程式碼

每次呼叫onWrite方法時,首先都會呼叫onwriteStateUpdate方法來更新這個可寫流的狀態,具體見上面的方法定義。同時需要對這個可寫流進行判斷,是否要關閉這個可寫流。同時還進行判斷buffer是否還有可供消費者使用的資料。如果有那麼就呼叫clearBuffer方法用以將緩衝區的資料提供給消費者來使用。

背壓

當資料來源提供給可寫流的資料過快的時候有可能出現背壓的情況,這個時候資料來源不再提供資料給可寫流,是否出現背壓的情況,可通過可寫流的write方法的返回值來進行判斷,如果返回的是false,那麼就出現的了背壓。

參見這個例子在實現的write方法中通過setTimeout來延遲一段時間呼叫onwrite方法,這個時候每次資料消費者都拿到了資料,但是因為這個地方延遲了更新可寫流的狀態,但是從資料來源向可寫流中還是同步的寫入資料,因此可能會出現在可寫流的緩衝區儲存的資料大於hmw的情況。

writeOrBuffer方法中有關於可寫流緩衝區儲存的資料長度和hwm的比較:

var ret = state.length < state.highWaterMark;
// we must ensure that previous needDrain will not be reset to false.
// 如果state.length長度大於hwm,將needDrain置為true,可能需要觸發drain事件,
if (!ret)
  state.needDrain = true;
複製程式碼

needDrain置為true。出現背壓後,資料來源不再提供資料給可寫流,這個時候只有等可寫流將緩衝區的所有完成全部提供給消費者消耗,同時更新完可寫流的狀態後,會觸發一個drain事件。

function onwrite(stream, er) {
  ...
  if (er)
    ...
  else {
    ...

    if (sync) {
      process.nextTick(afterWrite, stream, state, finished, cb);
    } else {
      afterWrite(stream, state, finished, cb);
    }
  }
}

function afterWrite(stream, state, finished, cb) {
  if (!finished)
    onwriteDrain(stream, state);
  state.pendingcb--;
  cb();
  finishMaybe(stream, state);
}

// 緩衝區的資料已經全部提供給消費者,同時needDrain被置為了true 觸發drain事件
function onwriteDrain(stream, state) {
  if (state.length === 0 && state.needDrain) {
    state.needDrain = false;
    stream.emit('drain');
  }
}
複製程式碼

這個時候如果你的程式提前定義的監聽drain事件的方法,那麼可以在回撥裡面再次呼叫可寫流的write方法來讓資料來源繼續提供資料給可寫流。

相關文章