bencoding編碼解析器
BT客戶端開始一個下載首先要處理的就是torrent檔案.
而torrent檔案使用bencoding編碼.
所以實現bencoding編碼的解析器,就是第一步工作.
Bencoding is done as follows:
Strings are length-prefixed base ten followed by a colon and the string. For example /'4:spam/' corresponds to /'spam/'.
Integers are represented by an /'i/' followed by the number in base 10 followed by an /'e/'. For example /'i3e/' corresponds to 3 and /'i-3e/' corresponds to -3. Integers have no size limitation. /'i-0e/' is invalid. All encodings with a leading zero, such as /'i03e/', are invalid, other than /'i0e/', which of course corresponds to 0.
Lists are encoded as an /'l/' followed by their elements (also bencoded) followed by an /'e/'. For example /'l4:spam4:eggse/' corresponds to [/'spam/', /'eggs/'].
Dictionaries are encoded as a /'d/' followed by a list of alternating keys and their corresponding values followed by an /'e/'. For example, /'d3:cow3:moo4:spam4:eggse/' corresponds to {/'cow/': /'moo/', /'spam/': /'eggs/'} and /'d4:spaml1:a1:bee/' corresponds to {/'spam/': [/'a/', /'b/']} . Keys must be strings and appear in sorted order (sorted as raw strings, not alphanumerics).
下面是實現的bencoding解碼器的VC++原始碼:
// BEncode.h: interface for the CBEncode class. // ////////////////////////////////////////////////////////////////////// #if !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_) #define AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_ #if _MSC_VER > 1000 #pragma once #endif // _MSC_VER > 1000 #pragma warning( disable : 4786 ) #pragma warning( disable : 4355 ) #include <list> #include <map> #include <string> #include <vector> using namespace std; enum BEncodeParserErrorCode { enm_BEncodeErr_noerr = 0,//沒有錯誤 enm_BEncodeErr_errString,//錯誤的字串 enm_BEncodeErr_errInt,//錯誤的整型資料 enm_BEncodeErr_errList,//錯誤的列表 enm_BEncodeErr_errDict,//錯誤的詞典結構 enm_BEncodeErr_End,//文字結束 enm_BEncodeErr_unknown//未知錯誤 }; enum BEncodeObjectType { enum_BEncodeType_Objectbase = 0, enum_BEncodeType_String, enum_BEncodeType_Int, enum_BEncodeType_List, enum_BEncodeType_Dict, }; class CBEncodeObjectBase { public: CBEncodeObjectBase(BEncodeObjectType type = enum_BEncodeType_Objectbase){m_type = type;clear();} virtual ~CBEncodeObjectBase(){}; void clear(){szPos = NULL;m_error = enm_BEncodeErr_noerr;} public: BEncodeObjectType m_type; //物件型別 char * szPos; //物件在字串中的位置 int ilen;//物件的資料長度 BEncodeParserErrorCode m_error;//錯誤值 }; class CBEncodeInt : public CBEncodeObjectBase { public: CBEncodeInt() : CBEncodeObjectBase(enum_BEncodeType_Int) {} virtual ~CBEncodeInt(){} public: int m_iValue;//整型物件的值 }; class CBEncodeString : public CBEncodeObjectBase { public: CBEncodeString() : CBEncodeObjectBase(enum_BEncodeType_String) {m_szData = NULL;} virtual ~CBEncodeString(){} public: bool getstring(string & strValue) { if(m_error == enm_BEncodeErr_noerr && m_szData) { strValue.assign(m_szData,m_ilen); return true; } return false; } char * m_szData; int m_ilen; }; class CBEncodeList : public CBEncodeObjectBase { public: CBEncodeList() : CBEncodeObjectBase(enum_BEncodeType_List) {} virtual ~CBEncodeList(){clear();} void clear() { list<CBEncodeObjectBase *>::iterator it; for(it = m_listObj.begin();it!=m_listObj.end();++it) delete (*it); m_listObj.clear(); } public: list<CBEncodeObjectBase*> m_listObj; }; class CBEncodeDict : public CBEncodeObjectBase { public: CBEncodeDict() : CBEncodeObjectBase(enum_BEncodeType_Dict) {} virtual ~CBEncodeDict(){clear();} CBEncodeObjectBase* getvalue(const char * szName) { map<string,CBEncodeObjectBase*>::iterator it = m_mapObj.find(szName); if(it != m_mapObj.end()) return it->second; return NULL; } void clear() { list<CBEncodeObjectBase *>::iterator it; for(it = m_listObj.begin();it!=m_listObj.end();++it) delete (*it); m_listObj.clear(); m_mapObj.clear(); } public: map<string,CBEncodeObjectBase*> m_mapObj;// list<CBEncodeObjectBase*> m_listObj;//真正的物件儲存在list中,list是一個name物件一個value物件.map只是一個對映表,引用了指標而已 }; class CBEncode { public: bool readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj); bool readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj); bool readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj); bool readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj); bool parse(const char * szData); CBEncode(); virtual ~CBEncode(); void clear() { list<CBEncodeObjectBase *>::iterator it; for(it = m_listObj.begin();it!=m_listObj.end();++it) delete (*it); m_listObj.clear(); } public: list<CBEncodeObjectBase*> m_listObj; CBEncodeObjectBase* m_plastObj;//解析出來的最後一個物件 char * m_szTxt; }; #endif // !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)
// BEncode.cpp: implementation of the CBEncode class. // ////////////////////////////////////////////////////////////////////// #include "stdafx.h" #include "BEncode.h" ////////////////////////////////////////////////////////////////////// // Construction/Destruction ////////////////////////////////////////////////////////////////////// CBEncode::CBEncode() { m_plastObj = NULL; m_szTxt = NULL; } CBEncode::~CBEncode() { clear(); } bool CBEncode::parse(const char *szData) { if(szData == NULL||*szData==NULL) return false; clear(); m_szTxt = (char*)szData; char * szCurPos = (char*)szData; int iendpos; while(*szCurPos) { if(*szCurPos== 'i') { if(!readint(szCurPos,iendpos,m_listObj)) break;//遇到任何錯誤都終止整個解析 szCurPos+=iendpos; } else if(*szCurPos== 'l') { if(!readlist(szCurPos,iendpos,m_listObj)) break; szCurPos+=iendpos; } else if(*szCurPos== 'd') { if(!readdict(szCurPos,iendpos,m_listObj)) break; szCurPos+=iendpos; } else { if(!readstring(szCurPos,iendpos,m_listObj)) break; szCurPos+=iendpos; } } if(*szCurPos==0&&m_plastObj->m_error == enm_BEncodeErr_noerr) return true; return false; } //從當前位置讀取一個字串 bool CBEncode::readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj) { char * szTmp = szCurPos; CBEncodeString * pNewString = new CBEncodeString; pNewString->szPos = szCurPos; char szLen[20]; int i = 0; while(*szTmp>='0'&&*szTmp<='9') szLen[i++]=*(szTmp++); szLen[i]=0; if(*szTmp==':') { int ilen = atoi(szLen); if(ilen>0) { pNewString->m_szData = ++szTmp; pNewString->m_ilen = ilen; szTmp+=ilen; } else pNewString->m_error = enm_BEncodeErr_errString; } else pNewString->m_error = enm_BEncodeErr_errString; listObj.push_back(pNewString); iendpos = szTmp-szCurPos; m_plastObj = pNewString; m_plastObj->ilen = iendpos; return pNewString->m_error == enm_BEncodeErr_noerr?true:false; } //讀取一個整型資料 bool CBEncode::readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj) { char * szTmp = szCurPos; CBEncodeInt * pNewInt= new CBEncodeInt; pNewInt->szPos = szCurPos; if(*szTmp == 'i') { szTmp++; char szLen[20]; int i = 0; while(*szTmp>='0'&&*szTmp<='9') szLen[i++]=*(szTmp++); szLen[i]=0; if(*szTmp=='e') { pNewInt->m_iValue = atoi(szLen); ++szTmp; } else pNewInt->m_error = enm_BEncodeErr_errInt; } else pNewInt->m_error = enm_BEncodeErr_errInt; listObj.push_back(pNewInt); iendpos = szTmp-szCurPos; m_plastObj = pNewInt; m_plastObj->ilen = iendpos; return pNewInt->m_error == enm_BEncodeErr_noerr?true:false; }
//讀取一個列表 bool CBEncode::readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj) { char * szTmp = szCurPos; CBEncodeList * pNewList= new CBEncodeList; pNewList->szPos = szCurPos; if(*szTmp == 'l') { szTmp++; int ilistendpos; while(*szTmp!='e') { if(*szTmp== 'i') { if(!readint(szTmp,ilistendpos,pNewList->m_listObj)) break;//遇到任何錯誤都終止整個解析 szTmp+=ilistendpos; } else if(*szTmp== 'l') { if(!readlist(szTmp,ilistendpos,pNewList->m_listObj)) break; szTmp+=ilistendpos; } else if(*szTmp== 'd') { if(!readdict(szTmp,ilistendpos,pNewList->m_listObj)) break; szTmp+=ilistendpos; } else { if(!readstring(szTmp,ilistendpos,pNewList->m_listObj)) break; szTmp+=ilistendpos; } } if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr) pNewList->m_error = enm_BEncodeErr_errList; else szTmp++; } else pNewList->m_error = enm_BEncodeErr_errList; listObj.push_back(pNewList); iendpos = szTmp-szCurPos; m_plastObj = pNewList; m_plastObj->ilen = iendpos; return pNewList->m_error == enm_BEncodeErr_noerr?true:false; } //讀取一個字典 bool CBEncode::readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj) { char * szTmp = szCurPos; CBEncodeDict * pNewDict= new CBEncodeDict; pNewDict->szPos = szCurPos; if(*szTmp == 'd') { szTmp++; int ilistendpos; string strname; while(*szTmp!='e') { if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj)) break; if(m_plastObj->m_type !=enum_BEncodeType_String) break; strname.assign(((CBEncodeString *)m_plastObj)->m_szData,((CBEncodeString *)m_plastObj)->m_ilen); szTmp+=ilistendpos; if(*szTmp== 'i') { if(!readint(szTmp,ilistendpos,pNewDict->m_listObj)) break;//遇到任何錯誤都終止整個解析 szTmp+=ilistendpos; } else if(*szTmp== 'l') { if(!readlist(szTmp,ilistendpos,pNewDict->m_listObj)) break; szTmp+=ilistendpos; } else if(*szTmp== 'd') { if(!readdict(szTmp,ilistendpos,pNewDict->m_listObj)) break; szTmp+=ilistendpos; } else { if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj)) break; szTmp+=ilistendpos; } pNewDict->m_mapObj.insert(pair<string,CBEncodeObjectBase*>(strname,m_plastObj)); } if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr) pNewDict->m_error = enm_BEncodeErr_errDict; else szTmp++; } else pNewDict->m_error = enm_BEncodeErr_errDict; listObj.push_back(pNewDict); iendpos = szTmp-szCurPos; m_plastObj = pNewDict; m_plastObj->ilen = iendpos; return pNewDict->m_error == enm_BEncodeErr_noerr?true:false; }
相關文章
- 關於使用Markdown解析器Parsedown應該注意的編碼...
- Vue原始碼模板編譯階段----HTML解析器腦圖Vue原始碼編譯HTML
- 二維碼解析器
- 簡易表示式解析器編寫
- 工程中的編譯原理 -- Mapfile解析器編譯原理
- 70 行 Python 程式碼編寫一個遞迴下降解析器Python遞迴
- (一) Mybatis原始碼分析-解析器模組MyBatis原始碼
- C語言編譯器開發之旅(二):解析器C語言編譯
- 為 man 手冊頁編寫解析器的備忘錄
- Gumbo:Google推出純C編寫的HTML5解析器GoHTML
- 解析-解析器
- 500 行 Python 程式碼做一個英文解析器Python
- DRF-Parser解析器元件原始碼分析和應用元件原始碼
- Mysql編碼, Mysql編碼流程, Mysql編碼順序, Mysql編碼原理, Mysql編碼修改依據MySql
- 解析-HTML 解析器HTML
- Day70 Spring MVC的響應方式,檢視解析器以及上傳下載和編碼過濾器SpringMVC過濾器
- MySQL核心原始碼解讀-SQL解析之解析器淺析MySql原始碼
- 使用Python語言編寫簡單的HTML5語法解析器PythonHTML
- 【字元編碼】字元編碼 && Base64編碼演算法字元演算法
- SQL解析器詳解SQL
- XML DOM 解析器概述XML
- 第15章:解析器
- IDEA如何設定編碼格式,字元編碼,全域性編碼和專案編碼格式Idea字元
- 六十行程式碼完成 四則運算 語法解析器行程
- 幾百行程式碼實現一個 JSON 解析器行程JSON
- 自定義Lua解析器管理器-------演化指令碼V0.5指令碼
- 【字元編碼】Java編碼格式探祕字元Java
- 熵編碼(四)-算術編碼(二)熵
- 手寫一個解析器
- .OBJ解析器的實現OBJ
- 配置多檢視解析器
- json 解析器哪家強?JSON
- Facebook面試題 | 迷你解析器面試題
- 【演算法】友誼與雪花的舞動,指令碼解析器原理演算法指令碼
- java安全編碼指南之:字串和編碼Java字串
- 字符集編碼(二):字元編碼模型字元模型
- 關於URL編碼/javascript/jsurl編碼JavaScriptJS
- 【字元編碼】徹底理解字元編碼字元