HM變化量化中的Scaling 操作(解碼器)

师映川發表於2024-10-05

(1)xIntraRecBlk呼叫invTransformNxN處理TU塊

if (pcCU->getCbf(uiAbsPartIdx, compID, rTu.GetTransformDepthRel()) != 0)
  {
    m_pcTrQuant->invTransformNxN( rTu, compID, piResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO(psDebug) );
  }

(2)invTransformNxN 用於執行逆量化逆變換操作,將編碼時的變換系數(Transform Coefficients)轉換回原始的殘差值(Residuals)

Void TComTrQuant::invTransformNxN(      TComTU        &rTu,
                                  const ComponentID    compID,
                                        Pel          *pcResidual,
                                  const UInt           uiStride,
                                        TCoeff       * pcCoeff,
                                  const QpParam       &cQP
                                        DEBUG_STRING_FN_DECLAREP(psDebug))
{
  TComDataCU* pcCU=rTu.getCU();
  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
  const TComRectangle &rect = rTu.getRect(compID);
  const UInt uiWidth = rect.width;
  const UInt uiHeight = rect.height;
// 對於非正方形的 TU,需要進一步遞迴分割處理
if (uiWidth != uiHeight) //for intra, the TU will have been split above this level, so this condition won't be true, hence this only affects inter { TComTURecurse subTURecurse(rTu, false, TComTU::VERTICAL_SPLIT, true, compID); do { const UInt lineOffset = subTURecurse.GetSectionNumber() * subTURecurse.getRect(compID).height; Pel *subTUResidual = pcResidual + (lineOffset * uiStride); TCoeff *subTUCoefficients = pcCoeff + (lineOffset * subTURecurse.getRect(compID).width); invTransformNxN(subTURecurse, compID, subTUResidual, uiStride, subTUCoefficients, cQP DEBUG_STRING_PASS_INTO(psDebug)); } while (subTURecurse.nextSection(rTu)); return; } #if DEBUG_STRING if (psDebug) { std::stringstream ss(stringstream::out); printBlockToStream(ss, (compID==0)?"###InvTran ip Ch0: " : ((compID==1)?"###InvTran ip Ch1: ":"###InvTran ip Ch2: "), pcCoeff, uiWidth, uiHeight, uiWidth); DEBUG_STRING_APPEND((*psDebug), ss.str()) } #endif

// 如果開啟了旁路模式,直接將係數複製為殘差 if(pcCU->getCUTransquantBypass(uiAbsPartIdx)) { const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID); const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1; for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++) { for (UInt x = 0; x<uiWidth; x++, coefficientIndex++) { pcResidual[(y * uiStride) + x] = Pel(pcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex]); } } } else { #if DEBUG_TRANSFORM_AND_QUANTISE std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to dequantiser\n"; printBlock(pcCoeff, uiWidth, uiHeight, uiWidth); #endif
// xDeQuant 對變換系數進行反量化,結果儲存在 m_plTempCoeff 中。 xDeQuant(rTu, pcCoeff, m_plTempCoeff, compID, cQP);
#if DEBUG_TRANSFORM_AND_QUANTISE std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between dequantiser and inverse-transform\n"; printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth); #endif #if DEBUG_STRING if (psDebug) { std::stringstream ss(stringstream::out); printBlockToStream(ss, "###InvTran deq: ", m_plTempCoeff, uiWidth, uiHeight, uiWidth); (*psDebug)+=ss.str(); } #endif // 是否使用了變換跳過(Transform Skip)模式 if(pcCU->getTransformSkip(uiAbsPartIdx, compID)) { xITransformSkip( m_plTempCoeff, pcResidual, uiStride, rTu, compID ); #if DEBUG_STRING if (psDebug) { std::stringstream ss(stringstream::out); printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride); (*psDebug)+=ss.str(); (*psDebug)+="(<- was a Transform-skipped block)\n"; } #endif } else { #if O0043_BEST_EFFORT_DECODING const Int channelBitDepth = pcCU->getSlice()->getSPS()->getStreamBitDepth(toChannelType(compID)); #else const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID)); #endif

// 呼叫 xIT 執行逆變換

      xIT( channelBitDepth, rTu.useDST(compID), m_plTempCoeff, pcResidual, uiStride, uiWidth, uiHeight, pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)) );

#if DEBUG_STRING
      if (psDebug)
      {
        std::stringstream ss(stringstream::out);
        printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
        (*psDebug)+=ss.str();
        (*psDebug)+="(<- was a Transformed block)\n";
      }
#endif
    }

#if DEBUG_TRANSFORM_AND_QUANTISE
    std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of inverse-transform\n";
    printBlock(pcResidual, uiWidth, uiHeight, uiStride);
    g_debugCounter++;
#endif
  }

  invRdpcmNxN( rTu, compID, pcResidual, uiStride );
}

(3)量化

(4)xIT 用於執行二維逆變換

Void TComTrQuant::xIT( const Int channelBitDepth, Bool useDST, TCoeff* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight, const Int maxLog2TrDynamicRange )
{
#if MATRIX_MULT
  if( iWidth == iHeight )
  {
// 對於方形矩陣,使用高效的 NxN 逆變換函式(預設關閉) xITr(channelBitDepth, plCoef, pResidual, uiStride, (UInt)iWidth, useDST, maxLog2TrDynamicRange);
return; } #endif TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ]; TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ]; memcpy(coeff, plCoef, (iWidth * iHeight * sizeof(TCoeff))); // 對於非方形塊,呼叫 xITrMxN 函式,處理 MxN 的逆變換。 xITrMxN( channelBitDepth, coeff, block, iWidth, iHeight, useDST, maxLog2TrDynamicRange ); for (Int y = 0; y < iHeight; y++) { for (Int x = 0; x < iWidth; x++) { pResidual[(y * uiStride) + x] = Pel(block[(y * iWidth) + x]); } } }

(5)xITrMxN 實現 MxN 矩陣的二維逆變換。主要根據輸入矩陣的寬度 (iWidth) 和高度 (iHeight),應用逆變換演算法(如部分蝶形逆變換、DST)

注意函式中的:

 Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
 Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
在 HEVC 中需要進行六次會導致計算結果數量級增大的操作,分別為 2 次 DCT(一次二維 DCT 可以被分為兩次一維 DCT)、1 次量化、1 次反量化以及 2 次反 DCT。
在 HEVC 中同樣設定了六次對應位置的 Scaling 操作,其 Scaling 係數分別為 ST1,ST2,SQ,SIQ,SIT1,SIT2。
這裡的shift_1st,shift_2nd 分別對應 SIT1,SIT2。TRANSFORM_MATRIX_SHIFT預設為6。
  • STI=2−(B+M−9)
  • ST2=2−(M+6)
  • SQ=2−(29−M−B)
  • SIT1=2−7
  • SIT2=2−(20−B)
  • SIQ=2−(M−5+B)
Void xITrMxN(Int bitDepth, TCoeff *coeff, TCoeff *block, Int iWidth, Int iHeight, Bool useDST, const Int maxLog2TrDynamicRange)
{
  const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];

  Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
  Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
  const TCoeff clipMinimum = -(1 << maxLog2TrDynamicRange);
  const TCoeff clipMaximum =  (1 << maxLog2TrDynamicRange) - 1;

  assert(shift_1st >= 0);
  assert(shift_2nd >= 0);

  TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];

  switch (iHeight)
  {
    case 4:
      {
        if ((iWidth == 4) && useDST)    // Check for DCT or DST
        {
          fastInverseDst( coeff, tmp, shift_1st, clipMinimum, clipMaximum);
        }
        else
        {
          partialButterflyInverse4 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum);
        }
      }
      break;

    case  8: partialButterflyInverse8 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
    case 16: partialButterflyInverse16( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
    case 32: partialButterflyInverse32( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;

    default:
      assert(0); exit (1); break;
  }

  switch (iWidth)
  {
    // Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
    case 4:
      {
        if ((iHeight == 4) && useDST)    // Check for DCT or DST
        {
          fastInverseDst( tmp, block, shift_2nd, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max() );
        }
        else
        {
          partialButterflyInverse4 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max());
        }
      }
      break;

    case  8: partialButterflyInverse8 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
    case 16: partialButterflyInverse16( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
    case 32: partialButterflyInverse32( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;

    default:
      assert(0); exit (1); break;
  }
}

再看看對應的變換函式:

其中g_aucConvertToBit[iHeight]函式:from width to log2(width)-2

Void xTrMxN(Int bitDepth, TCoeff *block, TCoeff *coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxLog2TrDynamicRange)
{
  const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];

  const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) +  bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
  const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT;

  assert(shift_1st >= 0);
  assert(shift_2nd >= 0);

  TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ];

  switch (iWidth)
  {
    case 4:
      {
        if ((iHeight == 4) && useDST)    // Check for DCT or DST
        {
           fastForwardDst( block, tmp, shift_1st );
        }
        else
        {
          partialButterfly4 ( block, tmp, shift_1st, iHeight );
        }
      }
      break;

    case 8:     partialButterfly8 ( block, tmp, shift_1st, iHeight );  break;
    case 16:    partialButterfly16( block, tmp, shift_1st, iHeight );  break;
    case 32:    partialButterfly32( block, tmp, shift_1st, iHeight );  break;
    default:
      assert(0); exit (1); break;
  }

  switch (iHeight)
  {
    case 4:
      {
        if ((iWidth == 4) && useDST)    // Check for DCT or DST
        {
          fastForwardDst( tmp, coeff, shift_2nd );
        }
        else
        {
          partialButterfly4 ( tmp, coeff, shift_2nd, iWidth );
        }
      }
      break;

    case 8:     partialButterfly8 ( tmp, coeff, shift_2nd, iWidth );    break;
    case 16:    partialButterfly16( tmp, coeff, shift_2nd, iWidth );    break;
    case 32:    partialButterfly32( tmp, coeff, shift_2nd, iWidth );    break;
    default:
      assert(0); exit (1); break;
  }
}

(6)對於4*4使用DST變換的塊:

Void fastInverseDst(TCoeff *tmp, TCoeff *block, Int shift, const TCoeff outputMinimum, const TCoeff outputMaximum)  // input tmp, output block
{
  Int i;
  TCoeff c[4];
  TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
  for (i=0; i<4; i++)
  {
    // Intermediate Variables
    c[0] = tmp[   i];
    c[1] = tmp[4 +i];
    c[2] = tmp[8 +i];
    c[3] = tmp[12+i];

    for (Int column = 0; column < 4; column++)
    {
      TCoeff &result = block[(i * 4) + column];

      result = 0;
      for (Int row = 0; row < 4; row++)
      {
        result += c[row] * g_as_DST_MAT_4[TRANSFORM_INVERSE][row][column]; // use the defined matrix, rather than hard-wired numbers
      }

      result = Clip3( outputMinimum, outputMaximum, rightShift((result + rnd_factor), shift));
    }
  }
}

SIT1,SIT2

相關文章