awk分析日誌

redhouser發表於2011-05-26

awk非常適合於文字檔案處理,特別是利用內建陣列,可以實現多個資料行關聯處理。
比如,從應用日誌中分析各類請求數量、處理時間等。

1,待處理日誌檔案a.trc
-------------------------------------------------------------------------
request=100001,amt=30,time=20110407152900
request=100002,amt=30,time=20110407152900
response=100001,time=20110407152910
request=100003,amt=30,time=20110407152900
response=100002,time=20110407152920
request=100004,amt=30,time=20110407152900
response=100003,time=20110407152930
request=100005,amt=30,time=20110407152900
response=100004,time=20110407152940
request=100006,amt=30,time=20110407152900
response=100005,time=20110407152950
request=100007,amt=30,time=20110407152900
response=100006,time=20110407152950
request=100008,amt=30,time=20110407152900
response=100007,time=20110407152950
-------------------------------------------------------------------------

2,命令:
使用三種方法實現相同的邏輯,可以根據需要,透過比較記憶體、處理效率、靈活性選擇。
-------------------------------------------------------------------------
awk -f a1.awk a.trc
awk -f a2.awk a.trc
awk -f a3.awk a.trc
-------------------------------------------------------------------------

3,處理結果:
-------------------------------------------------------------------------
reqid:100001 amt:30 bgn:20110407152900 end:20110407152910 ela:10
reqid:100002 amt:30 bgn:20110407152900 end:20110407152920 ela:20
reqid:100003 amt:30 bgn:20110407152900 end:20110407152930 ela:30
reqid:100004 amt:30 bgn:20110407152900 end:20110407152940 ela:40
reqid:100005 amt:30 bgn:20110407152900 end:20110407152950 ela:50
reqid:100006 amt:30 bgn:20110407152900 end:20110407152950 ela:50
reqid:100007 amt:30 bgn:20110407152900 end:20110407152950 ela:50
resp:7 sumamt:240
-------------------------------------------------------------------------

4.1 a1.awk
-------------------------------------------------------------------------
function myprintf(v1,v2,v3,v4){
      t1 = v3;
      t1f = substr(t1,1,4) " " substr(t1,5,2) " " substr(t1,7,2) " " substr(t1,9,2) " " substr(t1,11,2) " " substr(t1,13,2);
      t2 = v4;
      t2f = substr(t2,1,4) " " substr(t2,5,2) " " substr(t2,7,2) " " substr(t2,9,2) " " substr(t2,11,2) " " substr(t2,13,2);
      if(length(t1)>0 && length(t2)>0) {ela = mktime(t2f)-mktime(t1f);} else {ela = 0;}
      printf("reqid:%s amt:%s bgn:%s end:%s ela:%d \n",v1,v2,v3,v4,ela);
}
BEGIN {FS="=";
}
/request/ {reqp++;
           vbid=$2;sub(",amt","",vbid);
           vb=$3;sub(",time","",vb);reqa[vbid,1]=vb;
           sumamt=sumamt+vb;
           reqa[vbid,2]=$4;
           next;
}

/response/{vb=$2;sub(",time","",vb);
           myprintf(vb,reqa[vb,1],reqa[vb,2],$3);
           delete reqa[vb];
           resp++;
           next;
}
END{
   printf("resp:%d sumamt:%d \n",resp,sumamt);
}

-------------------------------------------------------------------------

4.2 a2.awk
-------------------------------------------------------------------------
BEGIN {FS="=";
}
/request/ {reqp++;
           #print($0);
           vb=$2;sub(",amt","",vb);reqa[reqp,1]=vb;
           reqida[vb]=reqp;
           vb=$3;sub(",time","",vb);reqa[reqp,2]=vb;sumamt=sumamt+vb;
           reqa[reqp,3]=$4;
           next;
}

/response/{vb=$2;sub(",time","",vb);
           #printf("%s %s \n",vb,$0);
           vb3 = reqida[vb];
           if(vb3>0){reqa[vb3,4]=$3;}
           resp++;
           next;
}

END{
   for(i=1;i<=reqp;i++){
      t1 = reqa[i,3];
      t1f = substr(t1,1,4) " " substr(t1,5,2) " " substr(t1,7,2) " " substr(t1,9,2) " " substr(t1,11,2) " " substr(t1,13,2);
      t2 = reqa[i,4];
      t2f = substr(t2,1,4) " " substr(t2,5,2) " " substr(t2,7,2) " " substr(t2,9,2) " " substr(t2,11,2) " " substr(t2,13,2);
      if(length(t1)>0 && length(t2)>0) {ela = mktime(t2f)-mktime(t1f);} else {ela = 0;}
      printf("reqid:%s amt:%s bgn:%s end:%s ela:%d \n",reqa[i,1],reqa[i,2],reqa[i,3],reqa[i,4],ela);
   }
   printf("resp:%d sumamt:%d \n",resp,sumamt);
}

-------------------------------------------------------------------------

4.3 a3.awk
-------------------------------------------------------------------------
BEGIN {FS="=";
}
/request/ {reqp++;
           #print($0);
           vb=$2;sub(",amt","",vb);reqa[reqp,1]=vb;
           vb=$3;sub(",time","",vb);reqa[reqp,2]=vb;sumamt=sumamt+vb;
           reqa[reqp,3]=$4;          
           next;
}

/response/{vb=$2;sub(",time","",vb);
           #printf("%s %s \n",vb,$0);
           for(i=1;i<=reqp;i++){
             if(reqa[i,1] == vb) {
               vb2=$3;
               reqa[i,4]=vb2;
               resp++;
               break;
             }
           }
           next;
}

END{
   for(i=1;i<=reqp;i++){
      t1 = reqa[i,3];
      t1f = substr(t1,1,4) " " substr(t1,5,2) " " substr(t1,7,2) " " substr(t1,9,2) " " substr(t1,11,2) " " substr(t1,13,2);
      t2 = reqa[i,4];
      t2f = substr(t2,1,4) " " substr(t2,5,2) " " substr(t2,7,2) " " substr(t2,9,2) " " substr(t2,11,2) " " substr(t2,13,2);
      if(length(t1)>0 && length(t2)>0) {ela = mktime(t2f)-mktime(t1f);} else {ela = 0;}
      printf("reqid:%s amt:%s bgn:%s end:%s ela:%d \n",reqa[i,1],reqa[i,2],reqa[i,3],reqa[i,4],ela);
   }
   printf("resp:%d sumamt:%d \n",resp,sumamt);
}
-------------------------------------------------------------------------

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/18922393/viewspace-696345/,如需轉載,請註明出處,否則將追究法律責任。