最快的演算法alphablend彙編原始碼,Intel官方提供(轉)

post0發表於2007-08-12
最快的演算法alphablend彙編原始碼,Intel官方提供(轉)[@more@]

  Intel官方網站有一個ablend_565的快速彙編演算法,理論上是是把一塊32bit RGBA渲染到16bit的buffer上,我的機器是PIII800,函式在system menory中進行,640*480的256級alpha blending,達到100fps,我想可以滿足絕大部分的要求了,在這裡,我提供了這個演算法的應用,希望可以對大家有所幫助。

  

  ablend_565函式,原始碼可以直接編譯使用,無需其他庫函式,感謝intel提供這麼好的東西。

  首先,我提供一些本人編寫的把32bit tga檔案讀入pRGBABuffer的函式

  檔案尺寸儲存在 width,height

  

  //-----------------------------------------------------------------------

  // Name: LoadTgaFile( TCHAR* strPathname, DWORD** pRGBABuffer, long* width, long* height )

  // Desc: 讀取32bit tga檔案到DWORD緩衝裡,返回其尺寸

  // Time: 2002.06.22 00:36

  // Author: RealRender

  // Para:

  // Return:

  // Note: 這段程式碼來自directx 7.0 sample中的d3dtextr.cpp,我把他提取了出來

  // 方便使用

  //-----------------------------------------------------------------------

  BOOL LoadTgaFile( TCHAR* strPathname, DWORD** pRGBABuffer, long* width, long* height )

  {

  FILE* file = fopen( strPathname, "rb" );

  if( NULL == file )

  return false;

  

  struct TargaHeader

  {

  BYTE IDLength;

  BYTE ColormapType;

  BYTE ImageType;

  BYTE ColormapSpecification[5];

  WORD XOrigin;

  WORD YOrigin;

  WORD ImageWidth;

  WORD ImageHeight;

  BYTE PixelDepth;

  BYTE ImageDescriptor;

  } tga;

  

  fread( &tga, sizeof(TargaHeader), 1, file );

  

  // Only true color, non-mapped images are supported

  if( ( 0 != tga.ColormapType ) ||

  ( tga.ImageType != 10 && tga.ImageType != 2 ) )

  {

  fclose( file );

  return false;

  }

  

  // Skip the ID field. The first byte of the header is the length of this field

  if( tga.IDLength )

  fseek( file, tga.IDLength, SEEK_CUR );

  

  DWORD m_dwWidth = tga.ImageWidth;

  DWORD m_dwHeight = tga.ImageHeight;

  DWORD m_dwBPP = tga.PixelDepth;

  DWORD *m_pRGBAData = new DWORD[m_dwWidth*m_dwHeight];

  

  if( m_pRGBAData == NULL )

  {

  fclose(file);

  return false;

  }

  

  for( DWORD y=0; y  {

  DWORD dwOffset = y*m_dwWidth;

  

  if( 0 == ( tga.ImageDescriptor & 0x0010 ) )

  dwOffset = (m_dwHeight-y-1)*m_dwWidth;

  

  for( DWORD x=0; x  {

  if( tga.ImageType == 10 )

  {

  BYTE PacketInfo = getc( file );

  WORD PacketType = 0x80 & PacketInfo;

  WORD PixelCount = ( 0x007f & PacketInfo ) + 1;

  

  if( PacketType )

  {

  DWORD b = getc( file );

  DWORD g = getc( file );

  DWORD r = getc( file );

  DWORD a = 0xff;

  if( m_dwBPP == 32 )

  a = getc( file );

  

  while( PixelCount-- )

  {

  m_pRGBAData[dwOffset+x] = (r<<24L)+(g<<16L)+(b<<8L)+(a);

  x++;

  }

  }

  else

  {

  while( PixelCount-- )

  {

  BYTE b = getc( file );

  BYTE g = getc( file );

  BYTE r = getc( file );

  BYTE a = 0xff;

  if( m_dwBPP == 32 )

  a = getc( file );

  

  m_pRGBAData[dwOffset+x] = (r<<24L)+(g<<16L)+(b<<8L)+(a);

  x++;

  }

  }

  }

  else

  {

  BYTE b = getc( file );

  BYTE g = getc( file );

  BYTE r = getc( file );

  BYTE a = 0xff;

  if( m_dwBPP == 32 )

  a = getc( file );

  

  m_pRGBAData[dwOffset+x] = (r<<24L)+(g<<16L)+(b<<8L)+(a);

  x++;

  }

  }

  }

  

  fclose( file );

  

  // Check for alpha content

  for( DWORD i=0; i  {

  if( m_pRGBAData[i] & 0x000000ff != 0xff )

  {

  //m_bHasAlpha = TRUE;

  break;

  }

  }

  

  *pRGBABuffer = m_pRGBAData;

  *width = m_dwWidth;

  *height = m_dwHeight;

  return true;

  }

  

  把32bit buffer分割為rgb和alpha的程式碼。

  注意,分割後的pBitmap一定要是8位元組對齊,這是最佳化的一個重要條件,所以,我的演算法中:

  

  BYTE* p = new BYTE[lSize*2+8];

  BYTE* pOrig = p;

  p += (DWORD)p%8;

  WORD* color = (WORD*)p;

  

  這是不規範的寫法,把指標強行改變為8位對齊,實際使用的時候,要記住釋放的原始指標不是p,而是pOrig,在這裡,我沒有釋放分配的記憶體,請諒解。

  //-----------------------------------------------------------------------

  // Name: SplitRGBA( DWORD* pRGBABuffer, LPBYTE* pAlpha, LPWORD* pBitmap, long lWidth, long lHeight )

  // Desc:

  // Time: 2002.06.22 00:36

  // Author: RealRender

  // Para:

  // Return:

  // Note: 把從32bit的緩衝建立16bit的565緩衝和8bit的alpha通道

  //-----------------------------------------------------------------------

  void SplitRGBA( DWORD* pRGBABuffer, LPBYTE* pAlpha, LPWORD* pBitmap, long lWidth, long lHeight )

  {

  long lSize = lWidth*lHeight;

  BYTE* alpha = new BYTE[lSize];

  BYTE* p = new BYTE[lSize*2+8];

  // 強行轉換為8位元組對齊

  p += (DWORD)p%8;

  WORD* color = (WORD*)p;

  

  DWORD dwPixel;

  DWORD r, g, b, a;

  for( int i = 0; i < lSize; i++ )

  {

  dwPixel = pRGBABuffer[i];

  r = ((dwPixel>>24)&0x000000ff);

  g = ((dwPixel>>16)&0x000000ff);

  b = ((dwPixel>> 8)&0x000000ff);

  a = ((dwPixel>> 0)&0x000000ff);

  

  alpha[i] = a;

  // 888i轉化為565

  color[i] = RGBTo16( r, g, b );

  }

  *pAlpha = alpha;

  *pBitmap = color;

  }

  

  //

  這個視intel官方提供的函式,函式的描述,用我的話來說就是把一個帶有256級alpha通道的565顏色資料繪製到16位目標頁面。

  

  函式說明:

  unsigned char *lpAlpha, // 256 級alpha通道

  unsigned int iAlpPitch, // alpha通道的pitch

  unsigned char *lpSrc, // 原色彩緩衝

  unsigned int iSrcX, //

  unsigned int iSrcY, // 原色彩位置

  unsigned int iSrcPitch, // 原色彩pitch

  unsigned char *lpDst, // 目標緩衝

  unsigned int iDstX,

  unsigned int iDstY, // 目標位置

  unsigned int iDstW,

  unsigned int iDstH, // 目標緩衝的尺寸

  unsigned int iDstPitch // 目標緩衝的pitch

  

  void ablend_565(unsigned char *lpAlpha,unsigned int iAlpPitch,

  unsigned char *lpSrc,unsigned int iSrcX, unsigned int iSrcY,

  unsigned int iSrcPitch, unsigned char *lpDst,

  unsigned int iDstX, unsigned int iDstY,

  unsigned int iDstW, unsigned int iDstH,

  unsigned int iDstPitch)

  {

  //Mask for isolating the red,green, and blue components

  

  static __int64 MASKB=0x001F001F001F001F;

  

  static __int64 MASKG=0x07E007E007E007E0;

  

  static __int64 MASKSHIFTG=0x03F003F003F003F0;

  

  static __int64 MASKR=0xF800F800F800F800;

  

  //constants used by the integer alpha blending equation

  

  static __int64 SIXTEEN=0x0010001000100010;

  

  static __int64 FIVETWELVE=0x0200020002000200;

  

  static __int64 SIXONES=0x003F003F003F003F;

  

  unsigned char *lpLinearDstBp=(iDstX<<1)+(iDstY*iDstPitch)+lpDst; //base pointer for linear destination

  

  unsigned char *lpLinearSrcBp=(iSrcX<<1)+(iSrcY*iSrcPitch)+lpSrc; //base pointer for linear source

  

  unsigned char *lpLinearAlpBp=iSrcX+(iSrcY*iAlpPitch)+lpAlpha; //base pointer for linear alpha

  

  _asm{

  

  mov esi,lpLinearSrcBp; //src

  

  mov edi,lpLinearDstBp; //dst

  

  mov eax,lpLinearAlpBp; //alpha

  

  mov ecx,iDstH; //ecx=number of lines to copy

  

  mov ebx,iDstW; //ebx=span width to copy

  

  test esi,6; //check if source address is qword aligned

  

  //since addr coming in is always word aligned(16bit)

  

  jnz done; //if not qword

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/8225414/viewspace-952079/,如需轉載,請註明出處,否則將追究法律責任。

相關文章