由pcap檔案提取IPv6的flow資料

shallowlearning發表於2015-08-19

一、目標:

現有兩個大學抓取的packet資料,分片成若干部分,需要從中抽取出流資訊( 源mac地址、目的mac地址、源ip地址、目的IP地址、源埠、目的埠、vlan、協議型別、流首包時間、流末包時間、流總大小),並按檔案中的packet順序,將結果輸出出來。

二、實現過程:

1、沒有溝通好需求是個硬傷,直接導致前面幾次提交結果不合格。
2、pcap是二進位制檔案,花了點時間研究pcap的檔案結構和二進位制檔案的讀寫方式。
3、資料量非常大,第一組資料packet數約一千萬,第二組有一億以上,對記憶體分配是個考驗。
4、一開始採用了順序查詢,速度非常慢,執行到後期,10秒只能遍歷100組資料。在老師提示下采用了hash表來儲存,速度果然不同凡響。
5、hash表關鍵字的選擇:
(1)初期:觀察了packet資料,發現源ip地址(SrcIP)不一樣的情況比較多,因此雜湊函式和關鍵字都直接用它了。衝突解決採用線性雜湊(加1)。
( 2)中期:初期的衝突還是太多了,到後面速度異常慢,因此雜湊函式和關鍵字都改成了源mac地址、目的mac地址、源ip地址、目的IP地址、源埠、目的埠、vlan、協議型別的和(當然還要對hash表長取模),這麼一來衝突小了不少。然而執行至檔案末時速度仍然不夠。
(3)後期:導師提示了md5演算法,即Message Digest Algorithm MD5(訊息摘要演算法第五版),它的特點是:
①、壓縮性:任意長度的資料,算出的MD5值長度都是固定的。
②、容易計算:從原資料計算出MD5值很容易。
③、抗修改性:對原資料進行任何改動,哪怕只修改1個位元組,所得到的MD5值都有很大區別。
④、弱抗碰撞:已知原資料和其MD5值,想找到一個具有相同MD5值的資料(即偽造資料)是非常困難的。
⑤、強抗碰撞:想找到兩個不同的資料,使它們具有相同的MD5值,是非常困難的。
這就意味著用md5演算法作為hash函式可以大大減少衝突(實際情況是基本不出現衝突),畢竟不同關鍵字算出來的值差異太大了(但並沒有直接驗證,只是通過執行時間間接得出的結論)。
6、執行時間:一百萬個flow大概200s,這比順序查詢簡直快了四五個量級。

三、程式碼

(執行環境為Visual Studio 2012)

主程式碼如下
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include "md5.h"
#include<math.h>
#include<time.h>
#define N 13000000LL
#define NUM_OF_MAX_FLOW 13000000LL
static long long index[NUM_OF_MAX_FLOW]={0},FlowCnt=1;
typedef unsigned char uint8;
typedef struct{
    int TimeStart;
    int MicroSec;
    int Caplen;
    uint8 SrcMac[6];
    uint8 DstMac[6];
    uint8 SrcIP[4];
    uint8 DstIP[4];
    uint8 SrcPort[2];
    uint8 DstPort[2];
    uint8 VlanID[2];
    uint8 VlanType[2];
    uint8 Protocol;
    int FlowBytes;
}STREAM;
typedef struct{
    uint8 flag;
    double timeStart;
    double timeEnd;
    uint8 SrcMac[6];
    uint8 DstMac[6];
    uint8 SrcIP[4];
    uint8 DstIP[4];
    uint8 SrcPort[2];
    uint8 DstPort[2];
    uint8 VlanID[2];
    uint8 VlanType[2];
    uint8 Protocol;
    unsigned long FlowBytes;
    unsigned long PacketsNum;
}FLOW;
static FLOW FlowTable[NUM_OF_MAX_FLOW] = { 0 };
void writeHex(FILE *fw, uint8 *str, int n)
{
    int i;
    for (i = 0; i<n; i++){
        fprintf(fw, "%x%x", (str[i] - str[i] % 16) >> 4, str[i] % 16);
    }
    return;
}
int isEqual(uint8 *old,uint8 *now,int n)
{
    int i;
    for(i=0;i<n;i++)
       if(old[i]!=now[i]) return 0;
    return 1;
}
int Equal(STREAM data,FLOW FlowData)
{
    if(isEqual(data.SrcMac,FlowData.SrcMac,6))
      if(isEqual(data.DstMac,FlowData.DstMac,6))
        if(isEqual(data.SrcIP,FlowData.SrcIP,4))
          if(isEqual(data.DstIP,FlowData.DstIP,4))
            if(isEqual(data.SrcPort,FlowData.SrcPort,2))
              if(isEqual(data.DstPort,FlowData.DstPort,2))
                if(isEqual(data.VlanID,FlowData.VlanID,2))
                  if(isEqual(data.VlanType,FlowData.VlanType,2))
                    if(data.Protocol==FlowData.Protocol)
                      return 1;
    return 0;
}
long long string2num(uint8 *IP)
{
    long long temp = (long long)IP[3];
    temp += (long long)IP[2] << 8;
    temp += (long long)IP[1] << 16;
    temp += (long long)IP[0] << 24;
     return temp;
}
int add2Hash(STREAM data,FLOW *FlowTable,long long HashNum)
{
     int i,flag=0,cnt=0;
     double temp;
 loop:
     if(cnt++>100000) return 0;
     if(FlowTable[HashNum].flag==0){
           FlowTable[HashNum].flag=1;
           FlowTable[HashNum].timeStart=data.TimeStart+(int)data.MicroSec/1000+(data.MicroSec%1000)/1000.0;
           FlowTable[HashNum].timeEnd=FlowTable[HashNum].timeStart;
           for(i=0;i<6;i++) FlowTable[HashNum].SrcMac[i]=data.SrcMac[i];
           for(i=0;i<6;i++) FlowTable[HashNum].DstMac[i]=data.DstMac[i];
           for(i=0;i<4;i++) FlowTable[HashNum].SrcIP[i]=data.SrcIP[i];
           for(i=0;i<4;i++) FlowTable[HashNum].DstIP[i]=data.DstIP[i];
           for(i=0;i<2;i++) FlowTable[HashNum].SrcPort[i]=data.SrcPort[i];
           for(i=0;i<2;i++) FlowTable[HashNum].DstPort[i]=data.DstPort[i];
           for(i=0;i<2;i++) FlowTable[HashNum].VlanID[i]=data.VlanID[i];
           for(i=0;i<2;i++) FlowTable[HashNum].VlanType[i]=data.VlanType[i];
           FlowTable[HashNum].Protocol=data.Protocol;
           FlowTable[HashNum].FlowBytes=data.FlowBytes;
           FlowTable[HashNum].PacketsNum=1;
           index[FlowCnt++]=HashNum;
           return 1;
     }
     else{//???? 
           if(Equal(data,FlowTable[HashNum]))
           {//???????flow 
              temp=data.TimeStart+(int)data.MicroSec/1000+(data.MicroSec%1000)/1000.0;
              if (fabs(temp - FlowTable[HashNum].timeEnd)<5 && temp>FlowTable[HashNum].timeEnd){//?????????flow 
                 FlowTable[HashNum].timeEnd=temp;
                 FlowTable[HashNum].FlowBytes += data.FlowBytes;
                 FlowTable[HashNum].PacketsNum++;
                 return 1;
              }
              else{
                   /*HashNum=(HashNum*HashNum)%NUM_OF_MAX_FLOW;
                   add2Hash(data,FlowTable,HashNum);//????????flow?*/
                      HashNum = (HashNum + 1) % NUM_OF_MAX_FLOW;
                    while (FlowTable[HashNum].flag == 1){
                      if (Equal(data, FlowTable[HashNum])){
                          temp = data.TimeStart + (int)data.MicroSec / 1000 + (data.MicroSec % 1000) / 1000.0;
                          if (fabs(temp - FlowTable[HashNum].timeEnd) < 5 && temp>FlowTable[HashNum].timeEnd){//?????????flow 
                              FlowTable[HashNum].timeEnd = temp;
                              FlowTable[HashNum].FlowBytes += data.FlowBytes;
                              FlowTable[HashNum].PacketsNum++;
                              flag = 1;
                              return 1;
                          }
                          else
                              HashNum = (HashNum + 1) % NUM_OF_MAX_FLOW;
                      }
                      else
                          HashNum = (HashNum + 1) % NUM_OF_MAX_FLOW;
                    }
                    if (flag==0){//????? 
                      FlowTable[HashNum].flag = 1;
                      FlowTable[HashNum].timeStart = data.TimeStart + (int)data.MicroSec / 1000 + (data.MicroSec % 1000) / 1000.0;
                      FlowTable[HashNum].timeEnd = FlowTable[HashNum].timeStart;
                      for (i = 0; i<6; i++) FlowTable[HashNum].SrcMac[i] = data.SrcMac[i];
                      for (i = 0; i<6; i++) FlowTable[HashNum].DstMac[i] = data.DstMac[i];
                      for (i = 0; i<4; i++) FlowTable[HashNum].SrcIP[i] = data.SrcIP[i];
                      for (i = 0; i<4; i++) FlowTable[HashNum].DstIP[i] = data.DstIP[i];
                      for (i = 0; i<2; i++) FlowTable[HashNum].SrcPort[i] = data.SrcPort[i];
                      for (i = 0; i<2; i++) FlowTable[HashNum].DstPort[i] = data.DstPort[i];
                      for (i = 0; i<2; i++) FlowTable[HashNum].VlanID[i] = data.VlanID[i];
                      for (i = 0; i<2; i++) FlowTable[HashNum].VlanType[i] = data.VlanType[i];
                      FlowTable[HashNum].Protocol = data.Protocol;
                      FlowTable[HashNum].FlowBytes = data.FlowBytes;
                      FlowTable[HashNum].PacketsNum = 1;
                      index[FlowCnt++] = HashNum;
                      return 1;
                  }
              }
           }
           else 
           {
                   /*HashNum=(HashNum*HashNum)%NUM_OF_MAX_FLOW;
                   add2Hash(data,FlowTable,HashNum);//????????flow? */
               HashNum = (HashNum+1) % NUM_OF_MAX_FLOW;
                      goto loop;
           }
     }
     return 1;
}
void print2file(FILE *fw,FLOW *FlowTable)
{
     long long i,j,k;
     for(j=0;j<FlowCnt;j++){
        i=index[j];
        {
                        if (FlowTable[i].timeStart == FlowTable[i - 1].timeStart&&FlowTable[i].timeEnd == FlowTable[i - 1].timeEnd) 
                            break;
                        writeHex(fw, FlowTable[i].SrcMac, 6);
                        fprintf(fw," ");
                        writeHex(fw, FlowTable[i].DstMac, 6);
                        fprintf(fw,"\t");
                        for (k = 0; k < 3;k++)
                            fprintf(fw, "%d.", FlowTable[i].SrcIP[k]);
                        fprintf(fw, "%d\t", FlowTable[i].SrcIP[3]);
                        for (k = 0; k < 3; k++)
                            fprintf(fw, "%d.", FlowTable[i].DstIP[k]);
                        fprintf(fw, "%d\t", FlowTable[i].DstIP[3]);
                        writeHex(fw, FlowTable[i].SrcPort, 2);
                        fprintf(fw," ");
                        writeHex(fw, FlowTable[i].DstPort, 2);
                        fprintf(fw," ");
                        writeHex(fw, FlowTable[i].VlanType, 2);
                        fprintf(fw," ");
                        writeHex(fw, FlowTable[i].VlanID, 2);
                        fprintf(fw," ");
                        fprintf(fw, "%d", FlowTable[i].Protocol);
                        fprintf(fw,"\t%.3lf",FlowTable[i].timeStart);
                        fprintf(fw,"\t%.3lf",FlowTable[i].timeEnd);
                        fprintf(fw, "\t%10.3lf", FlowTable[i].timeEnd - FlowTable[i].timeStart);
                        //fprintf(fw,"\t%ld",FlowTable[i].FlowBytes);
                        fprintf(fw, " ");
                        fprintf(fw, "%8lu", FlowTable[i].FlowBytes); 
                        fprintf(fw, "\t%ld\n", FlowTable[i].PacketsNum);
        }
     }

}
void myfun(FILE *fp,FILE *fw)
{
    long long num = 1, HashNum=0,i;
    int flag=1;
    STREAM data;
    uint8 result[16] = { 0 },temp[4];
    fseek(fp, 24, SEEK_SET);//???????
    do{
    //////////////////////////////////////////
        fread(&data.TimeStart, sizeof(int), 1, fp);
        fread(&data.MicroSec, sizeof(int), 1, fp);
        fread(&data.Caplen, sizeof(int), 1, fp);
        fread(&data.FlowBytes, sizeof(int), 1, fp);
        fread(data.DstMac, sizeof(uint8), 6, fp);//?DstMac 
        fread(data.SrcMac, sizeof(uint8), 6, fp);
        fseek(fp, 2, SEEK_CUR);
        fread(data.VlanID, sizeof(uint8), 2, fp);
        fread(data.VlanType, sizeof(uint8), 2, fp);
        fseek(fp, 9, SEEK_CUR);
        fread(&data.Protocol, sizeof(uint8), 1, fp);
        fseek(fp, 2, SEEK_CUR);
        fread(data.SrcIP, sizeof(uint8), 4, fp);
        fread(data.DstIP, sizeof(uint8), 4, fp);
        fread(data.SrcPort, sizeof(uint8), 2, fp);
        fread(data.DstPort, sizeof(uint8), 2, fp);
        //////////////////////////////////////////
        for (i = 0; i < 4; i++){
            temp[i] = data.SrcMac[i] + data.DstMac[i];
        }
        for (i = 0; i < 4; i++){
            temp[i] += data.SrcIP[i] + data.DstIP[i];
        }
        for (i = 0; i < 2; i++){
            temp[i] += data.SrcPort[i] + data.DstPort[i] + data.VlanID[i] + data.VlanType[i] + data.Protocol;
        }
        ZEN_LIB::md5(temp, 4, result);
        HashNum = string2num(result)%NUM_OF_MAX_FLOW;
        /////////////////////
        flag=add2Hash(data,FlowTable,abs(HashNum));
        //index[num-1]=HashNum;
        ///////////////////
        fseek(fp, data.Caplen - 42, SEEK_CUR);
    } while (num++<N&&flag==1);
    print2file(fw,FlowTable);
    fclose(fp);
    fclose(fw);
}

int main()
{
    FILE *fp, *fw, *ftime;
    long long j;
    int i;
    double start, finish;
    char str[15]="univ2_pt2",temp[30]="result2.txt";
    start = clock();//取開始時間
    for (j = 0; j < NUM_OF_MAX_FLOW; j++) index[j] = 0;
    for (j = 0; j < NUM_OF_MAX_FLOW; j++) FlowTable[j].flag = 0;
    for(i=3;i<=7;i++){
        FlowCnt = 0;
       str[8]=i+'0';
       temp[6]=i+'0';
       fp = fopen(str, "rb");
       fw = fopen(temp, "w+");
       myfun(fp,fw);
    }
    finish = clock();//取結束時間
    ftime = fopen("time.txt", "w+");
    fprintf(ftime,"%f seconds\n", (finish - start) / CLOCKS_PER_SEC);//以秒為單位顯示之
    putchar('\a');
    putchar('\a');
    //system("pause");
}
其中,MD5函式如下(系轉載,很抱歉來源忘記了,若有知情者請告知我補上)
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>

//位元組序的小頭和大頭的問題
#define ZEN_LITTLE_ENDIAN  0x0123
#define ZEN_BIG_ENDIAN     0x3210

//目前所有的程式碼都是為了小頭黨服務的,不知道有生之年這套程式碼是否還會為大頭黨服務一次?
#ifndef ZEN_BYTES_ORDER
#define ZEN_BYTES_ORDER    ZEN_LITTLE_ENDIAN
#endif

#ifndef ZEN_SWAP_UINT16
#define ZEN_SWAP_UINT16(x)  ((((x) & 0xff00) >>  8) | (((x) & 0x00ff) <<  8))
#endif
#ifndef ZEN_SWAP_UINT32
#define ZEN_SWAP_UINT32(x)  ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) | \
     (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
#endif
#ifndef ZEN_SWAP_UINT64
#define ZEN_SWAP_UINT64(x)  ((((x) & 0xff00000000000000) >> 56) | (((x) & 0x00ff000000000000) >>  40) | \
     (((x) & 0x0000ff0000000000) >> 24) | (((x) & 0x000000ff00000000) >>  8) | \
     (((x) & 0x00000000ff000000) << 8 ) | (((x) & 0x0000000000ff0000) <<  24) | \
     (((x) & 0x000000000000ff00) << 40 ) | (((x) & 0x00000000000000ff) <<  56))
#endif

//將一個(字串)陣列,拷貝到另外一個uint32_t陣列,同時每個uint32_t反位元組序
void *swap_uint32_memcpy(void *to, const void *from, size_t length)
{
    memcpy(to, from, length);
    size_t remain_len = (4 - (length & 3)) & 3;

    //資料不是4位元組的倍數,補充0
    if (remain_len)
    {
        for (size_t i = 0; i < remain_len; ++i)
        {
            *((char *)(to)+length + i) = 0;
        }
        //調整成4的倍數
        length += remain_len;
    }

    //所有的資料反轉
    for (size_t i = 0; i < length / 4; ++i)
    {
        ((uint32_t *)to)[i] = ZEN_SWAP_UINT32(((uint32_t *)to)[i]);
    }

    return to;
}

///MD5的結果資料長度
static const size_t ZEN_MD5_HASH_SIZE = 16;
///SHA1的結果資料長度
static const size_t ZEN_SHA1_HASH_SIZE = 20;



namespace ZEN_LIB
{


    /*!
    @brief      求某個記憶體塊的MD5,
    @return     unsigned char* 返回的的結果,
    @param[in]  buf    求MD5的記憶體BUFFER指標
    @param[in]  size   BUFFER長度
    @param[out] result 結果
    */
    unsigned char *md5(const unsigned char *buf,
        size_t size,
        unsigned char result[ZEN_MD5_HASH_SIZE]);


    /*!
    @brief      求記憶體塊BUFFER的SHA1值
    @return     unsigned char* 返回的的結果
    @param[in]  buf    求SHA1的記憶體BUFFER指標
    @param[in]  size   BUFFER長度
    @param[out] result 結果
    */
    unsigned char *sha1(const unsigned char *buf,
        size_t size,
        unsigned char result[ZEN_SHA1_HASH_SIZE]);
};


//================================================================================================
//MD5的演算法

//每次處理的BLOCK的大小
static const size_t ZEN_MD5_BLOCK_SIZE = 64;

//md5演算法的上下文,儲存一些狀態,中間資料,結果
typedef struct md5_ctx
{
    //處理的資料的長度
    uint64_t length_;
    //還沒有處理的資料長度
    uint64_t unprocessed_;
    //取得的HASH結果(中間資料)
    uint32_t  hash_[4];
} md5_ctx;


#define ROTL32(dword, n) ((dword) << (n) ^ ((dword) >> (32 - (n))))
#define ROTR32(dword, n) ((dword) >> (n) ^ ((dword) << (32 - (n))))
#define ROTL64(qword, n) ((qword) << (n) ^ ((qword) >> (64 - (n))))
#define ROTR64(qword, n) ((qword) >> (n) ^ ((qword) << (64 - (n))))


/*!
@brief      內部函式,初始化MD5的context,內容
@param      ctx
*/
static void zen_md5_init(md5_ctx *ctx)
{
    ctx->length_ = 0;
    ctx->unprocessed_ = 0;

    /* initialize state */
    ctx->hash_[0] = 0x67452301;
    ctx->hash_[1] = 0xefcdab89;
    ctx->hash_[2] = 0x98badcfe;
    ctx->hash_[3] = 0x10325476;
}

/* First, define four auxiliary functions that each take as input
* three 32-bit words and returns a 32-bit word.*/

/* F(x,y,z) = ((y XOR z) AND x) XOR z - is faster then original version */
#define MD5_F(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
#define MD5_G(x, y, z) (((x) & (z)) | ((y) & (~z)))
#define MD5_H(x, y, z) ((x) ^ (y) ^ (z))
#define MD5_I(x, y, z) ((y) ^ ((x) | (~z)))

/* transformations for rounds 1, 2, 3, and 4. */
#define MD5_ROUND1(a, b, c, d, x, s, ac) { \
         (a) += MD5_F((b), (c), (d)) + (x) + (ac); \
         (a) = ROTL32((a), (s)); \
         (a) += (b); \
     }
#define MD5_ROUND2(a, b, c, d, x, s, ac) { \
         (a) += MD5_G((b), (c), (d)) + (x) + (ac); \
         (a) = ROTL32((a), (s)); \
         (a) += (b); \
     }
#define MD5_ROUND3(a, b, c, d, x, s, ac) { \
         (a) += MD5_H((b), (c), (d)) + (x) + (ac); \
         (a) = ROTL32((a), (s)); \
         (a) += (b); \
     }
#define MD5_ROUND4(a, b, c, d, x, s, ac) { \
         (a) += MD5_I((b), (c), (d)) + (x) + (ac); \
         (a) = ROTL32((a), (s)); \
         (a) += (b); \
     }


/*!
@brief      內部函式,將64個位元組,16個uint32_t的陣列進行摘要(雜湊)處理,處理的資料自己序是小頭資料
@param      state 存放處理的hash資料結果
@param      block 要處理的block,64個位元組,16個uint32_t的陣列
*/
static void zen_md5_process_block(uint32_t state[4], const uint32_t block[ZEN_MD5_BLOCK_SIZE / 4])
{
    register unsigned a, b, c, d;
    a = state[0];
    b = state[1];
    c = state[2];
    d = state[3];

    const uint32_t *x = NULL;

    //MD5裡面計算的資料都是小頭資料.大頭黨的資料要處理
#if ZEN_BYTES_ORDER == ZEN_LITTLE_ENDIAN
    x = block;
#else
    uint32_t swap_block[ZEN_MD5_BLOCK_SIZE / 4];
    swap_uint32_memcpy(swap_block, block, 64);
    x = swap_block;
#endif


    MD5_ROUND1(a, b, c, d, x[0], 7, 0xd76aa478);
    MD5_ROUND1(d, a, b, c, x[1], 12, 0xe8c7b756);
    MD5_ROUND1(c, d, a, b, x[2], 17, 0x242070db);
    MD5_ROUND1(b, c, d, a, x[3], 22, 0xc1bdceee);
    MD5_ROUND1(a, b, c, d, x[4], 7, 0xf57c0faf);
    MD5_ROUND1(d, a, b, c, x[5], 12, 0x4787c62a);
    MD5_ROUND1(c, d, a, b, x[6], 17, 0xa8304613);
    MD5_ROUND1(b, c, d, a, x[7], 22, 0xfd469501);
    MD5_ROUND1(a, b, c, d, x[8], 7, 0x698098d8);
    MD5_ROUND1(d, a, b, c, x[9], 12, 0x8b44f7af);
    MD5_ROUND1(c, d, a, b, x[10], 17, 0xffff5bb1);
    MD5_ROUND1(b, c, d, a, x[11], 22, 0x895cd7be);
    MD5_ROUND1(a, b, c, d, x[12], 7, 0x6b901122);
    MD5_ROUND1(d, a, b, c, x[13], 12, 0xfd987193);
    MD5_ROUND1(c, d, a, b, x[14], 17, 0xa679438e);
    MD5_ROUND1(b, c, d, a, x[15], 22, 0x49b40821);

    MD5_ROUND2(a, b, c, d, x[1], 5, 0xf61e2562);
    MD5_ROUND2(d, a, b, c, x[6], 9, 0xc040b340);
    MD5_ROUND2(c, d, a, b, x[11], 14, 0x265e5a51);
    MD5_ROUND2(b, c, d, a, x[0], 20, 0xe9b6c7aa);
    MD5_ROUND2(a, b, c, d, x[5], 5, 0xd62f105d);
    MD5_ROUND2(d, a, b, c, x[10], 9, 0x2441453);
    MD5_ROUND2(c, d, a, b, x[15], 14, 0xd8a1e681);
    MD5_ROUND2(b, c, d, a, x[4], 20, 0xe7d3fbc8);
    MD5_ROUND2(a, b, c, d, x[9], 5, 0x21e1cde6);
    MD5_ROUND2(d, a, b, c, x[14], 9, 0xc33707d6);
    MD5_ROUND2(c, d, a, b, x[3], 14, 0xf4d50d87);
    MD5_ROUND2(b, c, d, a, x[8], 20, 0x455a14ed);
    MD5_ROUND2(a, b, c, d, x[13], 5, 0xa9e3e905);
    MD5_ROUND2(d, a, b, c, x[2], 9, 0xfcefa3f8);
    MD5_ROUND2(c, d, a, b, x[7], 14, 0x676f02d9);
    MD5_ROUND2(b, c, d, a, x[12], 20, 0x8d2a4c8a);

    MD5_ROUND3(a, b, c, d, x[5], 4, 0xfffa3942);
    MD5_ROUND3(d, a, b, c, x[8], 11, 0x8771f681);
    MD5_ROUND3(c, d, a, b, x[11], 16, 0x6d9d6122);
    MD5_ROUND3(b, c, d, a, x[14], 23, 0xfde5380c);
    MD5_ROUND3(a, b, c, d, x[1], 4, 0xa4beea44);
    MD5_ROUND3(d, a, b, c, x[4], 11, 0x4bdecfa9);
    MD5_ROUND3(c, d, a, b, x[7], 16, 0xf6bb4b60);
    MD5_ROUND3(b, c, d, a, x[10], 23, 0xbebfbc70);
    MD5_ROUND3(a, b, c, d, x[13], 4, 0x289b7ec6);
    MD5_ROUND3(d, a, b, c, x[0], 11, 0xeaa127fa);
    MD5_ROUND3(c, d, a, b, x[3], 16, 0xd4ef3085);
    MD5_ROUND3(b, c, d, a, x[6], 23, 0x4881d05);
    MD5_ROUND3(a, b, c, d, x[9], 4, 0xd9d4d039);
    MD5_ROUND3(d, a, b, c, x[12], 11, 0xe6db99e5);
    MD5_ROUND3(c, d, a, b, x[15], 16, 0x1fa27cf8);
    MD5_ROUND3(b, c, d, a, x[2], 23, 0xc4ac5665);

    MD5_ROUND4(a, b, c, d, x[0], 6, 0xf4292244);
    MD5_ROUND4(d, a, b, c, x[7], 10, 0x432aff97);
    MD5_ROUND4(c, d, a, b, x[14], 15, 0xab9423a7);
    MD5_ROUND4(b, c, d, a, x[5], 21, 0xfc93a039);
    MD5_ROUND4(a, b, c, d, x[12], 6, 0x655b59c3);
    MD5_ROUND4(d, a, b, c, x[3], 10, 0x8f0ccc92);
    MD5_ROUND4(c, d, a, b, x[10], 15, 0xffeff47d);
    MD5_ROUND4(b, c, d, a, x[1], 21, 0x85845dd1);
    MD5_ROUND4(a, b, c, d, x[8], 6, 0x6fa87e4f);
    MD5_ROUND4(d, a, b, c, x[15], 10, 0xfe2ce6e0);
    MD5_ROUND4(c, d, a, b, x[6], 15, 0xa3014314);
    MD5_ROUND4(b, c, d, a, x[13], 21, 0x4e0811a1);
    MD5_ROUND4(a, b, c, d, x[4], 6, 0xf7537e82);
    MD5_ROUND4(d, a, b, c, x[11], 10, 0xbd3af235);
    MD5_ROUND4(c, d, a, b, x[2], 15, 0x2ad7d2bb);
    MD5_ROUND4(b, c, d, a, x[9], 21, 0xeb86d391);

    state[0] += a;
    state[1] += b;
    state[2] += c;
    state[3] += d;
}


/*!
@brief      內部函式,處理資料的前面部分(>64位元組的部分),每次組成一個64位元組的block就進行雜湊處理
@param[out] ctx  演算法的context,用於記錄一些處理的上下文和結果
@param[in]  buf  處理的資料,
@param[in]  size 處理的資料長度
*/
static void zen_md5_update(md5_ctx *ctx, const unsigned char *buf, size_t size)
{
    //為什麼不是=,因為在某些環境下,可以多次呼叫zen_md5_update,但這種情況,必須保證前面的呼叫,每次都沒有unprocessed_
    ctx->length_ += size;

    //每個處理的塊都是64位元組
    while (size >= ZEN_MD5_BLOCK_SIZE)
    {
        zen_md5_process_block(ctx->hash_, reinterpret_cast<const uint32_t *>(buf));
        buf += ZEN_MD5_BLOCK_SIZE;
        size -= ZEN_MD5_BLOCK_SIZE;
    }

    ctx->unprocessed_ = size;
}


/*!
@brief      內部函式,處理資料的末尾部分,我們要拼出最後1個(或者兩個)要處理的BLOCK,加上0x80,加上長度進行處理
@param[in]  ctx    演算法的context,用於記錄一些處理的上下文和結果
@param[in]  buf    處理的資料
@param[in]  size   處理buffer的長度
@param[out] result 返回的結果,
*/
static void zen_md5_final(md5_ctx *ctx, const unsigned char *buf, size_t size, unsigned char *result)
{
    uint32_t message[ZEN_MD5_BLOCK_SIZE / 4];

    //儲存剩餘的資料,我們要拼出最後1個(或者兩個)要處理的塊,前面的演算法保證了,最後一個塊肯定小於64個位元組
    if (ctx->unprocessed_)
    {
        memcpy(message, buf + size - ctx->unprocessed_, static_cast<size_t>(ctx->unprocessed_));
    }

    //得到0x80要新增在的位置(在uint32_t 陣列中),
    uint32_t index = ((uint32_t)ctx->length_ & 63) >> 2;
    uint32_t shift = ((uint32_t)ctx->length_ & 3) * 8;

    //新增0x80進去,並且把餘下的空間補充0
    message[index] &= ~(0xFFFFFFFF << shift);
    message[index++] ^= 0x80 << shift;

    //如果這個block還無法處理,其後面的長度無法容納長度64bit,那麼先處理這個block
    if (index > 14)
    {
        while (index < 16)
        {
            message[index++] = 0;
        }

        zen_md5_process_block(ctx->hash_, message);
        index = 0;
    }

    //補0
    while (index < 14)
    {
        message[index++] = 0;
    }

    //儲存長度,注意是bit位的長度,這個問題讓我看著鬱悶了半天,
    uint64_t data_len = (ctx->length_) << 3;

    //注意MD5演算法要求的64bit的長度是小頭LITTLE-ENDIAN編碼,注意下面的比較是!=
#if ZEN_BYTES_ORDER != ZEN_LITTLE_ENDIAN
    data_len = ZEN_SWAP_UINT64(data_len);
#endif

    message[14] = (uint32_t)(data_len & 0x00000000FFFFFFFF);
    message[15] = (uint32_t)((data_len & 0xFFFFFFFF00000000ULL) >> 32);

    zen_md5_process_block(ctx->hash_, message);

    //注意結果是小頭黨的,在大頭的世界要進行轉換
#if ZEN_BYTES_ORDER == ZEN_LITTLE_ENDIAN
    memcpy(result, &ctx->hash_, ZEN_MD5_HASH_SIZE);
#else
    swap_uint32_memcpy(result, &ctx->hash_, ZEN_MD5_HASH_SIZE);
#endif

}


//計算一個記憶體資料的MD5值
unsigned char *ZEN_LIB::md5(const unsigned char *buf,
    size_t size,
    unsigned char result[ZEN_MD5_HASH_SIZE])
{
    assert(result != NULL);

    md5_ctx ctx;
    zen_md5_init(&ctx);
    zen_md5_update(&ctx, buf, size);
    zen_md5_final(&ctx, buf, size, result);
    return result;
}

四、結果

最終提取出來的結果示例如下,其中,排列格式為(從左至右):
源mac地址、目的mac地址、源ip地址、目的IP地址、源埠、目的埠、vlan、協議型別、流首包時間、流末包時間、流總大小

0030488b0be8 00000c07ac00   244.157.209.31  27.88.27.89 0022 e984 0038 4500 157 1264194698.490  1264194698.490       0.000       70 1
0009e9b6e18a 01005e000002   210.218.218.164 7.193.7.193 001c 6679 0030 45c0 157 1264194698.836  1264194698.836       0.000       62 1
0009e97fd60a 0030488b0be8   244.157.82.156  27.89.27.88 0022 a1ea 005d 4500 157 1264194698.845  1264194698.845       0.000      107 1
0009e97fd60a 003048859c18   244.157.82.106  27.89.27.88 0022 3a82 00ec 4500 157 1264194698.887  1264194698.887       0.000      250 1
0009e9b6e18a 01005e000002   210.218.218.164 7.193.7.193 001c 167a 0030 45c0 157 1264194698.936  1264194698.936       0.000       62 1
003048859c18 00000c07ac00   244.157.209.31  27.88.27.89 0022 e50a 005e 4500 157 1264194698.986  1264194698.986       0.000      108 1
0009e97fd60a 003048859c18   244.157.82.106  27.89.27.88 0022 ca8f 005d 4500 157 1264194699.335  1264194699.335       0.000      107 1
003048859c18 00000c07ac00   244.157.209.31  27.88.27.89 0022 0b70 0038 4500 157 1264194699.734  1264194699.734       0.000       70 1
0009e97fd60a 003048859c18   244.157.82.106  27.89.27.88 0022 cb4a 005d 4500 157 1264194700.084  1264194700.084       0.000      107 1
0009e97fd60a 0030488b0cf6   244.157.82.176  27.89.27.88 0022 d07f 0080 4500 157 1264194700.088  1264194700.088       0.000      142 1 

相關文章