python解析fiddler匯出txt流量資料為json格式

相交的直線發表於2020-10-29

fiddler代理獲取的資料,如何解析為流量json格式
txt 格式為
在這裡插入圖片描述

#! -*- coding:utf-8 -*-
import argparse
import copy
import os
import sys
import traceback
import urlparse

path_cur = os.path.dirname(os.path.realpath(__file__))
path_parent = "%s/../" % path_cur
sys.path.append(path_parent)
from public.DDlog import DDlog

logger = DDlog(log_type='api')
# logger = log("HttpParseLog")


class HttpParseLog:
    '''
    http fiddler 日誌解析
    '''
    def __init__(self):
        logger.info("HttpParseLog")
        self.request_end = "------------------------------------------------------------------"

    def work(self, http_data_path):
        request_json = {
            "uri": "",
            "method": "",
            "scheme": "",
            "host": "",
            "params": "",
            "request_header": [],
            "referer": "",
            # 空兩行(Get 空行+空行 post:空行+post引數)
            "request_body": "",
            "response_status": "",
            "response_header": [],
            "response_content_type": "",
            "discovery_time": "",
            # 空一行
            "response_body": "",
            "response_body_raw_length":0
        }
        logger.info("work:%s" % http_data_path)
        requests_data = []
        request_d = []
        request_flag = True
        for line in open(http_data_path):
            if line.startswith(self.request_end):
                requests_data.append(request_d)
                request_d = []
                request_flag = True
                continue
            if line and line != self.request_end:
                line_data_tmp = line.replace("\r",'').replace("\n",'')
                if request_flag == True and line_data_tmp == '':
                    logger.info(u"請求開始前的空格忽略")
                    request_flag = False
                    continue
                request_flag = False
                request_d.append(line)
        http_models = []
        for request_data in requests_data:
            request_json_tmp = copy.deepcopy(request_json)
            null_line_num = 0
            request_header = []
            response_header = []
            response_body = []
            # postparam行號
            first_null_line_num = 0
            # status行號
            response_status_line_num = 0
            for i in range(len(request_data)):
                # 第一行解析 host scheme uri  method
                # 知道第一個空行  request header
                # 獲取Referer
                # 空行下第一行 request_body,若是Get 為空
                # 空行下第二行  response_status
                # 接著就是response_header  ,並且獲取response_content_type(Content-Type)及discovery_time(Date)屬性
                # 第二個空行下面全都是response_body
                # response_body_raw_length 取len(response_body)
                line = request_data[i]
                if i == 0:
                    url = line
                    url_property = url.split(" ")
                    method = url_property[0]
                    url_tmp = url_property[1]
                    r = urlparse.urlparse(url_tmp)
                    host = r.netloc
                    uri = r.path + r.fragment
                    params = r.query
                    scheme = r.scheme
                    request_json_tmp['host'] = host
                    request_json_tmp['method'] = method
                    request_json_tmp['uri'] = uri
                    request_json_tmp['scheme'] = scheme
                    request_json_tmp['params'] = params
                    continue
                line_data = line.replace("\r",'').replace("\n",'')
                if null_line_num == 1 and i == first_null_line_num:

                    request_json_tmp['request_body'] = line_data
                    logger.info("get request_body data:%s" % line_data)
                    continue
                if null_line_num == 1 and i == response_status_line_num:
                    logger.info("get response status data:%s" % line_data)
                    response_status_data = line_data.split(" ")
                    status = response_status_data[1]
                    request_json_tmp['status'] = status
                    continue
                if line_data == '':
                    null_line_num += 1
                    if null_line_num == 1:
                        first_null_line_num = i+1
                        response_status_line_num = i + 2
                        request_json_tmp['request_header'] = request_header
                    if null_line_num == 2:
                        # response_status_line_num = i+1
                        request_json_tmp['response_header'] = response_header
                else:
                    if null_line_num == 0:
                        # request
                        # 檢視referer
                        if line_data.startswith("Referer"):
                            referer_line = line_data.split(":")
                            request_json_tmp['referer'] = referer_line[1]
                            continue
                        request_header.append(line_data)
                    elif null_line_num == 1:
                        # response_header
                        if line_data.startswith("Content-Type"):
                            referer_line = line_data.split(":")
                            request_json_tmp['response_content_type'] = referer_line[1]
                            continue
                        if line_data.startswith("Date"):
                            referer_line = line_data[line_data.index(':')+1:] #.split(":")
                            request_json_tmp['discovery_time'] = referer_line
                            continue
                        response_header.append(line_data)
                    elif null_line_num == 2:
                        response_body.append(line_data)
            request_json_tmp["response_body"] = " ".join(response_body)
            request_json_tmp["response_body_raw_length"] = len(request_json_tmp["response_body"])
            http_models.append(request_json_tmp)
        return http_models


if __name__ == '__main__':
    try:

        parser = argparse.ArgumentParser()
        parser.add_argument("path", type=str, help="path")
        args = parser.parse_args()
        path = args.path
        http_parse_cli = HttpParseLog()
        meta = http_parse_cli.work(path)
        print(meta)
    except Exception, ex:
        logger.error("Error: %s" % ex)
        logger.error(traceback.format_exc())

相關文章