

  Intel官方網站有一個ablend_565的快速彙編演算法,理論上是是把一塊32bit RGBA渲染到16bit的buffer上,我的機器是PIII800,函式在system menory中進行,640*480的256級alpha blending,達到100fps,我想可以滿足絕大部分的要求了,在這裡,我提供了這個演算法的應用,希望可以對大家有所幫助。



  首先,我提供一些本人編寫的把32bit tga檔案讀入pRGBABuffer的函式

  檔案尺寸儲存在 width,height



  // Name: LoadTgaFile( TCHAR* strPathname, DWORD** pRGBABuffer, long* width, long* height )

  // Desc: 讀取32bit tga檔案到DWORD緩衝裡,返回其尺寸

  // Time: 2002.06.22 00:36

  // Author: RealRender

  // Para:

  // Return:

  // Note: 這段程式碼來自directx 7.0 sample中的d3dtextr.cpp,我把他提取了出來

  // 方便使用


  BOOL LoadTgaFile( TCHAR* strPathname, DWORD** pRGBABuffer, long* width, long* height )


  FILE* file = fopen( strPathname, "rb" );

  if( NULL == file )

  return false;


  struct TargaHeader


  BYTE IDLength;

  BYTE ColormapType;

  BYTE ImageType;

  BYTE ColormapSpecification[5];

  WORD XOrigin;

  WORD YOrigin;

  WORD ImageWidth;

  WORD ImageHeight;

  BYTE PixelDepth;

  BYTE ImageDescriptor;

  } tga;


  fread( &tga, sizeof(TargaHeader), 1, file );


  // Only true color, non-mapped images are supported

  if( ( 0 != tga.ColormapType ) ||

  ( tga.ImageType != 10 && tga.ImageType != 2 ) )


  fclose( file );

  return false;



  // Skip the ID field. The first byte of the header is the length of this field

  if( tga.IDLength )

  fseek( file, tga.IDLength, SEEK_CUR );


  DWORD m_dwWidth = tga.ImageWidth;

  DWORD m_dwHeight = tga.ImageHeight;

  DWORD m_dwBPP = tga.PixelDepth;

  DWORD *m_pRGBAData = new DWORD[m_dwWidth*m_dwHeight];


  if( m_pRGBAData == NULL )



  return false;



  for( DWORD y=0; y  {

  DWORD dwOffset = y*m_dwWidth;


  if( 0 == ( tga.ImageDescriptor & 0x0010 ) )

  dwOffset = (m_dwHeight-y-1)*m_dwWidth;


  for( DWORD x=0; x  {

  if( tga.ImageType == 10 )


  BYTE PacketInfo = getc( file );

  WORD PacketType = 0x80 & PacketInfo;

  WORD PixelCount = ( 0x007f & PacketInfo ) + 1;


  if( PacketType )


  DWORD b = getc( file );

  DWORD g = getc( file );

  DWORD r = getc( file );

  DWORD a = 0xff;

  if( m_dwBPP == 32 )

  a = getc( file );


  while( PixelCount-- )


  m_pRGBAData[dwOffset+x] = (r<<24L)+(g<<16L)+(b<<8L)+(a);






  while( PixelCount-- )


  BYTE b = getc( file );

  BYTE g = getc( file );

  BYTE r = getc( file );

  BYTE a = 0xff;

  if( m_dwBPP == 32 )

  a = getc( file );


  m_pRGBAData[dwOffset+x] = (r<<24L)+(g<<16L)+(b<<8L)+(a);







  BYTE b = getc( file );

  BYTE g = getc( file );

  BYTE r = getc( file );

  BYTE a = 0xff;

  if( m_dwBPP == 32 )

  a = getc( file );


  m_pRGBAData[dwOffset+x] = (r<<24L)+(g<<16L)+(b<<8L)+(a);






  fclose( file );


  // Check for alpha content

  for( DWORD i=0; i  {

  if( m_pRGBAData[i] & 0x000000ff != 0xff )


  //m_bHasAlpha = TRUE;





  *pRGBABuffer = m_pRGBAData;

  *width = m_dwWidth;

  *height = m_dwHeight;

  return true;



  把32bit buffer分割為rgb和alpha的程式碼。



  BYTE* p = new BYTE[lSize*2+8];

  BYTE* pOrig = p;

  p += (DWORD)p%8;

  WORD* color = (WORD*)p;




  // Name: SplitRGBA( DWORD* pRGBABuffer, LPBYTE* pAlpha, LPWORD* pBitmap, long lWidth, long lHeight )

  // Desc:

  // Time: 2002.06.22 00:36

  // Author: RealRender

  // Para:

  // Return:

  // Note: 把從32bit的緩衝建立16bit的565緩衝和8bit的alpha通道


  void SplitRGBA( DWORD* pRGBABuffer, LPBYTE* pAlpha, LPWORD* pBitmap, long lWidth, long lHeight )


  long lSize = lWidth*lHeight;

  BYTE* alpha = new BYTE[lSize];

  BYTE* p = new BYTE[lSize*2+8];

  // 強行轉換為8位元組對齊

  p += (DWORD)p%8;

  WORD* color = (WORD*)p;


  DWORD dwPixel;

  DWORD r, g, b, a;

  for( int i = 0; i < lSize; i++ )


  dwPixel = pRGBABuffer[i];

  r = ((dwPixel>>24)&0x000000ff);

  g = ((dwPixel>>16)&0x000000ff);

  b = ((dwPixel>> 8)&0x000000ff);

  a = ((dwPixel>> 0)&0x000000ff);


  alpha[i] = a;

  // 888i轉化為565

  color[i] = RGBTo16( r, g, b );


  *pAlpha = alpha;

  *pBitmap = color;







  unsigned char *lpAlpha, // 256 級alpha通道

  unsigned int iAlpPitch, // alpha通道的pitch

  unsigned char *lpSrc, // 原色彩緩衝

  unsigned int iSrcX, //

  unsigned int iSrcY, // 原色彩位置

  unsigned int iSrcPitch, // 原色彩pitch

  unsigned char *lpDst, // 目標緩衝

  unsigned int iDstX,

  unsigned int iDstY, // 目標位置

  unsigned int iDstW,

  unsigned int iDstH, // 目標緩衝的尺寸

  unsigned int iDstPitch // 目標緩衝的pitch


  void ablend_565(unsigned char *lpAlpha,unsigned int iAlpPitch,

  unsigned char *lpSrc,unsigned int iSrcX, unsigned int iSrcY,

  unsigned int iSrcPitch, unsigned char *lpDst,

  unsigned int iDstX, unsigned int iDstY,

  unsigned int iDstW, unsigned int iDstH,

  unsigned int iDstPitch)


  //Mask for isolating the red,green, and blue components


  static __int64 MASKB=0x001F001F001F001F;


  static __int64 MASKG=0x07E007E007E007E0;


  static __int64 MASKSHIFTG=0x03F003F003F003F0;


  static __int64 MASKR=0xF800F800F800F800;


  //constants used by the integer alpha blending equation


  static __int64 SIXTEEN=0x0010001000100010;


  static __int64 FIVETWELVE=0x0200020002000200;


  static __int64 SIXONES=0x003F003F003F003F;


  unsigned char *lpLinearDstBp=(iDstX<<1)+(iDstY*iDstPitch)+lpDst; //base pointer for linear destination


  unsigned char *lpLinearSrcBp=(iSrcX<<1)+(iSrcY*iSrcPitch)+lpSrc; //base pointer for linear source


  unsigned char *lpLinearAlpBp=iSrcX+(iSrcY*iAlpPitch)+lpAlpha; //base pointer for linear alpha




  mov esi,lpLinearSrcBp; //src


  mov edi,lpLinearDstBp; //dst


  mov eax,lpLinearAlpBp; //alpha


  mov ecx,iDstH; //ecx=number of lines to copy


  mov ebx,iDstW; //ebx=span width to copy


  test esi,6; //check if source address is qword aligned


  //since addr coming in is always word aligned(16bit)


  jnz done; //if not qword

