每日原始碼分析-Lodash(uniq.js)

breakinferno發表於2017-12-18

本系列使用lodash 4.17.4

前言

引用internal檔案下的baseUniq.js

正文

import baseUniq from './.internal/baseUniq.js'

/**
 * Creates a duplicate-free version of an array, using
 * [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero)
 * for equality comparisons, in which only the first occurrence of each element
 * is kept. The order of result values is determined by the order they occur
 * in the array.
 *
 * @since 0.1.0
 * @category Array
 * @param {Array} array The array to inspect.
 * @returns {Array} Returns the new duplicate free array.
 * @see uniqBy, uniqWith
 * @example
 *
 * uniq([2, 1, 2])
 * // => [2, 1]
 */
function uniq(array) {
  return (array != null && array.length)
    ? baseUniq(array)
    : []
}

export default uniq
複製程式碼

可以看到完完全全是呼叫baseUniq函式,如果這樣就完的話感覺今天就比較水,所以我們來看看這個baseUniq函式

import SetCache from './SetCache.js'
import arrayIncludes from './arrayIncludes.js'
import arrayIncludesWith from './arrayIncludesWith.js'
import cacheHas from './cacheHas.js'
import createSet from './createSet.js'
import setToArray from './setToArray.js'

/** Used as the size to enable large array optimizations. */
const LARGE_ARRAY_SIZE = 200

/**
 * The base implementation of `uniqBy`.
 *
 * @private
 * @param {Array} array The array to inspect.
 * @param {Function} [iteratee] The iteratee invoked per element.
 * @param {Function} [comparator] The comparator invoked per element.
 * @returns {Array} Returns the new duplicate free array.
 */
function baseUniq(array, iteratee, comparator) {
  let index = -1
  let includes = arrayIncludes
  let isCommon = true

  const { length } = array
  const result = []
  let seen = result

  if (comparator) {
    isCommon = false
    includes = arrayIncludesWith
  }
  else if (length >= LARGE_ARRAY_SIZE) {
    const set = iteratee ? null : createSet(array)
    if (set) {
      return setToArray(set)
    }
    isCommon = false
    includes = cacheHas
    seen = new SetCache
  }
  else {
    seen = iteratee ? [] : result
  }
  outer:
  while (++index < length) {
    let value = array[index]
    const computed = iteratee ? iteratee(value) : value

    value = (comparator || value !== 0) ? value : 0
    if (isCommon && computed === computed) {
      let seenIndex = seen.length
      while (seenIndex--) {
        if (seen[seenIndex] === computed) {
          continue outer
        }
      }
      if (iteratee) {
        seen.push(computed)
      }
      result.push(value)
    }
    else if (!includes(seen, computed, comparator)) {
      if (seen !== result) {
        seen.push(computed)
      }
      result.push(value)
    }
  }
  return result
}
export default baseUniq
複製程式碼

由於這個函式還考慮了對資料的'處理器'和'比較器',如果有興趣的話可以再仔細看看整體的實現,今天我們就只看uniq函式呼叫時的情況(既沒有'處理器'也沒有'比較器')。我做了個簡化,程式碼如下:

import SetCache from './SetCache.js'
import arrayIncludes from './arrayIncludes.js'
import arrayIncludesWith from './arrayIncludesWith.js'
import cacheHas from './cacheHas.js'
import createSet from './createSet.js'
import setToArray from './setToArray.js'

/** Used as the size to enable large array optimizations. */
const LARGE_ARRAY_SIZE = 200

function baseUniq(array) {
  let index = -1
  let includes = arrayIncludes
  let isCommon = true

  const { length } = array
  const result = []
  let seen = result
  
  if (length >= LARGE_ARRAY_SIZE) {
    const set = createSet(array)
    if (set) {
      return setToArray(set)
    }
    isCommon = false
    includes = cacheHas
    seen = new SetCache
  }
  else {
    seen = result
  }
  outer:
  while (++index < length) {
    let value = array[index]
    
    value = value !== 0 ? value : 0
    if (isCommon) {
      let seenIndex = seen.length
      while (seenIndex--) {
        if (seen[seenIndex] === value) {
          continue outer
        }
      }
      result.push(value)
    }
    else if (!includes(seen, value)) {
      if (seen !== result) {
        seen.push(value)
      }
      result.push(value)
    }
  }
  return result
}
複製程式碼

這樣一來就很明確了。如果陣列長度大於200(LARGE_ARRAY_SIZE),則使用cache那套來判斷cache裡有沒有對應的資料,沒有就新增進cache和結果陣列。如果是普通的長度小於200的陣列,那麼就和我們平時寫的差不多了:遍歷獲取目標陣列的值並且遍歷查詢結果陣列判斷該值是否已經存在,不存在存入結果陣列。

使用方式

  _.uniq([2, 1, 2])
  // => [2, 1]
複製程式碼

使用場景

該函式作用是將一個陣列去重,由於內部判斷相等機制是採用===,所以只能滿足我們常見的判斷簡單的陣列相同,類似於[[2],[2]]等hash值不同的物件不能去重。

  let t = [2]
  _.uniq([2, 1, 2, t, t])
  // => [2, 1, [2]]
複製程式碼

結語

個人感覺如果要簡單的進行類似的去重可以使用es6的set直接達到目的:Array.from(new Set(yourArray))或者[...new Set(yourArray)]就可以了。如果要對內容去重則需要進行遞迴操作。

補充 其實還可以使用正則來去重。比如

var str_arr = ["a", "b", "c", "a", "b", "c"]

function unique(arr) {
    return arr.sort().join(",,").                       
    replace(/(,|^)([^,]+)(,,\2)+(,|$)/g, "$1$2$4").     
    replace(/,,+/g, ",").
    replace(/,$/, "").
    split(",")
}
console.log(unique(str_arr)) // ["a","b","c"]

複製程式碼

相關文章