bencoding編碼解析器

lightest發表於2005-01-09

BT客戶端開始一個下載首先要處理的就是torrent檔案.
而torrent檔案使用bencoding編碼.
所以實現bencoding編碼的解析器,就是第一步工作.

Bencoding is done as follows:
Strings are length-prefixed base ten followed by a colon and the string. For example /'4:spam/' corresponds to /'spam/'.
Integers are represented by an /'i/' followed by the number in base 10 followed by an /'e/'. For example /'i3e/' corresponds to 3 and /'i-3e/' corresponds to -3. Integers have no size limitation. /'i-0e/' is invalid. All encodings with a leading zero, such as /'i03e/', are invalid, other than /'i0e/', which of course corresponds to 0.
Lists are encoded as an /'l/' followed by their elements (also bencoded) followed by an /'e/'. For example /'l4:spam4:eggse/' corresponds to [/'spam/', /'eggs/'].
Dictionaries are encoded as a /'d/' followed by a list of alternating keys and their corresponding values followed by an /'e/'. For example, /'d3:cow3:moo4:spam4:eggse/' corresponds to {/'cow/': /'moo/', /'spam/': /'eggs/'} and /'d4:spaml1:a1:bee/' corresponds to {/'spam/': [/'a/', /'b/']} . Keys must be strings and appear in sorted order (sorted as raw strings, not alphanumerics).

下面是實現的bencoding解碼器的VC++原始碼:

// BEncode.h: interface for the CBEncode class.
//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)
#define AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#pragma warning( disable : 4786 )
#pragma warning( disable : 4355 )
#include <list>
#include <map>
#include <string>
#include <vector>
using namespace std;
enum BEncodeParserErrorCode
{
	enm_BEncodeErr_noerr = 0,//沒有錯誤	
	enm_BEncodeErr_errString,//錯誤的字串
	enm_BEncodeErr_errInt,//錯誤的整型資料
	enm_BEncodeErr_errList,//錯誤的列表
	enm_BEncodeErr_errDict,//錯誤的詞典結構
	enm_BEncodeErr_End,//文字結束
	enm_BEncodeErr_unknown//未知錯誤
};
enum BEncodeObjectType
{
	enum_BEncodeType_Objectbase = 0,	
	enum_BEncodeType_String,
	enum_BEncodeType_Int,
	enum_BEncodeType_List,
	enum_BEncodeType_Dict,
};
class CBEncodeObjectBase
{
public:
	CBEncodeObjectBase(BEncodeObjectType type = enum_BEncodeType_Objectbase){m_type = type;clear();}
	virtual ~CBEncodeObjectBase(){};
	void clear(){szPos = NULL;m_error = enm_BEncodeErr_noerr;}
public:
	BEncodeObjectType m_type;	//物件型別
	char * szPos;	//物件在字串中的位置
	int ilen;//物件的資料長度
	BEncodeParserErrorCode m_error;//錯誤值	
};
class CBEncodeInt : public CBEncodeObjectBase
{
public:
	CBEncodeInt() : CBEncodeObjectBase(enum_BEncodeType_Int) {}
	virtual ~CBEncodeInt(){}		
public:	
	int m_iValue;//整型物件的值
};
class CBEncodeString : public CBEncodeObjectBase
{
public:
	CBEncodeString() : CBEncodeObjectBase(enum_BEncodeType_String) {m_szData = NULL;}
	virtual ~CBEncodeString(){}		
public:	
	bool getstring(string & strValue)
	{
		if(m_error == enm_BEncodeErr_noerr && m_szData)
		{
			strValue.assign(m_szData,m_ilen);
			return true;
		}
		return false;
	}
	char * m_szData;
	int m_ilen;
};
class CBEncodeList : public CBEncodeObjectBase
{
public:
	CBEncodeList() : CBEncodeObjectBase(enum_BEncodeType_List) {}
	virtual ~CBEncodeList(){clear();}
	void clear()
	{			
		list<CBEncodeObjectBase *>::iterator it;
		for(it = m_listObj.begin();it!=m_listObj.end();++it)
			delete (*it);
		m_listObj.clear();
	}
public:	
	list<CBEncodeObjectBase*> m_listObj;
};
class CBEncodeDict : public CBEncodeObjectBase
{
public:
	CBEncodeDict() : CBEncodeObjectBase(enum_BEncodeType_Dict) {}
	virtual ~CBEncodeDict(){clear();}
	CBEncodeObjectBase* getvalue(const char * szName)
	{
		map<string,CBEncodeObjectBase*>::iterator it = m_mapObj.find(szName);
		if(it != m_mapObj.end())
			return it->second;
		return NULL;
	}
	void clear()
	{			
		list<CBEncodeObjectBase *>::iterator it;
		for(it = m_listObj.begin();it!=m_listObj.end();++it)
			delete (*it);
		m_listObj.clear();
		m_mapObj.clear();
	}
public:	
	map<string,CBEncodeObjectBase*> m_mapObj;//
	list<CBEncodeObjectBase*> m_listObj;//真正的物件儲存在list中,list是一個name物件一個value物件.map只是一個對映表,引用了指標而已
};
class CBEncode
{
public:
	bool readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);
	bool readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);	
	bool readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);	
	bool readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);	
	bool parse(const char * szData);
	CBEncode();
	virtual ~CBEncode();
	void clear()
	{	
		list<CBEncodeObjectBase *>::iterator it;
		for(it = m_listObj.begin();it!=m_listObj.end();++it)
			delete (*it);
		m_listObj.clear();	
	}
public:
	list<CBEncodeObjectBase*> m_listObj;
	CBEncodeObjectBase* m_plastObj;//解析出來的最後一個物件
	char * m_szTxt;
};

#endif // !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)
// BEncode.cpp: implementation of the CBEncode class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "BEncode.h"

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CBEncode::CBEncode()
{
	m_plastObj = NULL;
	m_szTxt = NULL;
}

CBEncode::~CBEncode()
{
	clear();
}

bool CBEncode::parse(const char *szData)
{
	if(szData == NULL||*szData==NULL)
		return false;
	clear();
	m_szTxt = (char*)szData;
	char * szCurPos = (char*)szData;
	int iendpos;
	while(*szCurPos)
	{	
		if(*szCurPos== 'i')
		{
			if(!readint(szCurPos,iendpos,m_listObj))
				break;//遇到任何錯誤都終止整個解析
			szCurPos+=iendpos;
		}
		else if(*szCurPos== 'l')
		{
			if(!readlist(szCurPos,iendpos,m_listObj))
				break;
			szCurPos+=iendpos;
		}
		else if(*szCurPos== 'd')
		{
			if(!readdict(szCurPos,iendpos,m_listObj))
				break;
			szCurPos+=iendpos;
		}
		else			
		{
			if(!readstring(szCurPos,iendpos,m_listObj))
				break;
			szCurPos+=iendpos;
		}
	}
	if(*szCurPos==0&&m_plastObj->m_error == enm_BEncodeErr_noerr)
		return true;
	return false;
}
//從當前位置讀取一個字串
bool CBEncode::readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)
{	
	char * szTmp = szCurPos;
	CBEncodeString * pNewString = new CBEncodeString;	
	pNewString->szPos = szCurPos;

	char szLen[20];
	int i = 0;
	while(*szTmp>='0'&&*szTmp<='9')
		szLen[i++]=*(szTmp++);
	szLen[i]=0;
	if(*szTmp==':')
	{
		int ilen = atoi(szLen);
		if(ilen>0)
		{	
			pNewString->m_szData = ++szTmp;
			pNewString->m_ilen = ilen;
			szTmp+=ilen;
		}
		else
			pNewString->m_error = enm_BEncodeErr_errString;
	}
	else
		pNewString->m_error = enm_BEncodeErr_errString;
	listObj.push_back(pNewString);
	iendpos = szTmp-szCurPos;			
	m_plastObj = pNewString;
	m_plastObj->ilen = iendpos;
	return pNewString->m_error == enm_BEncodeErr_noerr?true:false;
}
//讀取一個整型資料
bool CBEncode::readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)
{
	char * szTmp = szCurPos;
	CBEncodeInt * pNewInt= new CBEncodeInt;	
	pNewInt->szPos = szCurPos;

	if(*szTmp == 'i')
	{	
		szTmp++;
		char szLen[20];
		int i = 0;
		while(*szTmp>='0'&&*szTmp<='9')
			szLen[i++]=*(szTmp++);
		szLen[i]=0;
		if(*szTmp=='e')
		{
			pNewInt->m_iValue = atoi(szLen);			
			++szTmp;
		}
		else
			pNewInt->m_error = enm_BEncodeErr_errInt;
	}
	else
		pNewInt->m_error = enm_BEncodeErr_errInt;
	listObj.push_back(pNewInt);
	iendpos = szTmp-szCurPos;	
	m_plastObj = pNewInt;
	m_plastObj->ilen = iendpos;
	return pNewInt->m_error == enm_BEncodeErr_noerr?true:false;
}
//讀取一個列表 bool CBEncode::readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj) { char * szTmp = szCurPos; CBEncodeList * pNewList= new CBEncodeList; pNewList->szPos = szCurPos; if(*szTmp == 'l') { szTmp++; int ilistendpos; while(*szTmp!='e') { if(*szTmp== 'i') { if(!readint(szTmp,ilistendpos,pNewList->m_listObj)) break;//遇到任何錯誤都終止整個解析 szTmp+=ilistendpos; } else if(*szTmp== 'l') { if(!readlist(szTmp,ilistendpos,pNewList->m_listObj)) break; szTmp+=ilistendpos; } else if(*szTmp== 'd') { if(!readdict(szTmp,ilistendpos,pNewList->m_listObj)) break; szTmp+=ilistendpos; } else { if(!readstring(szTmp,ilistendpos,pNewList->m_listObj)) break; szTmp+=ilistendpos; } } if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr) pNewList->m_error = enm_BEncodeErr_errList; else szTmp++; } else pNewList->m_error = enm_BEncodeErr_errList; listObj.push_back(pNewList); iendpos = szTmp-szCurPos; m_plastObj = pNewList; m_plastObj->ilen = iendpos; return pNewList->m_error == enm_BEncodeErr_noerr?true:false; } //讀取一個字典 bool CBEncode::readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj) { char * szTmp = szCurPos; CBEncodeDict * pNewDict= new CBEncodeDict; pNewDict->szPos = szCurPos; if(*szTmp == 'd') { szTmp++; int ilistendpos; string strname; while(*szTmp!='e') { if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj)) break; if(m_plastObj->m_type !=enum_BEncodeType_String) break; strname.assign(((CBEncodeString *)m_plastObj)->m_szData,((CBEncodeString *)m_plastObj)->m_ilen); szTmp+=ilistendpos; if(*szTmp== 'i') { if(!readint(szTmp,ilistendpos,pNewDict->m_listObj)) break;//遇到任何錯誤都終止整個解析 szTmp+=ilistendpos; } else if(*szTmp== 'l') { if(!readlist(szTmp,ilistendpos,pNewDict->m_listObj)) break; szTmp+=ilistendpos; } else if(*szTmp== 'd') { if(!readdict(szTmp,ilistendpos,pNewDict->m_listObj)) break; szTmp+=ilistendpos; } else { if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj)) break; szTmp+=ilistendpos; } pNewDict->m_mapObj.insert(pair<string,CBEncodeObjectBase*>(strname,m_plastObj)); } if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr) pNewDict->m_error = enm_BEncodeErr_errDict; else szTmp++; } else pNewDict->m_error = enm_BEncodeErr_errDict; listObj.push_back(pNewDict); iendpos = szTmp-szCurPos; m_plastObj = pNewDict; m_plastObj->ilen = iendpos; return pNewDict->m_error == enm_BEncodeErr_noerr?true:false; }


 

相關文章