excel轉json操作

胡桃夾子zy發表於2021-01-01

工作中需要用到將從資料庫中下載的excel每行資料轉成json檔案,用於規則回溯,參考網上資料,通過以下程式碼可實現:

import pandas as pd
import numpy as np
import json
import datetime

# 匯入資料
# 由於phone2有缺失值,如果不加converters ={'phone2':str},導致讀入會變成float形式,導致有值的手機號碼後會加點0,如13812341234.0
data= pd.read_excel(r'C:\Users\Administrator\Desktop\20201229142002.xlsx',converters ={'phone2':str})

# 資料處理
#時間轉成datetime 後再轉成str,這樣匯出json後時間格式到時分秒,後面不會加.0
data.apply_submit_time = pd.to_datetime(data.apply_submit_time)
data.apply_submit_time = data.apply_submit_time.astype('str')
#將缺失值填充""空字元,即使nan轉json程式不會報錯,但是把轉好的json放在json格式校正中,會提示錯誤,所以都填充空字串。
data.fillna(value="",inplace=True)

# 拆分資料
# 由於匯出的資料帶有連續人資訊,每個聯絡人一行,如果提供多個連續人,會導致同一進件多條記錄,需要將資料做區分
# 將資料分成兩部分 聯絡人人及非聯絡人
# 第一步:非聯絡人部分去重,寫唯一值
# 第二步:聯絡人部分,迴圈寫入列表
# 用apply_id 或 transport_id 關聯

data_a = data.iloc[:,:52]
data_a.drop_duplicates(inplace=True)
data_contact = data[['old_transport_id','apply_id','contact_name','relation_me','relation_me_desc','telephone','mortgagor_contact_type','mortgagor_contact_type_desc']]

# 轉json用到的格式,可根據需求更改
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return str(obj)
        elif isinstance(obj, np.floating):
            return str(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)


# 具體轉換
starttime = datetime.datetime.now()

for i in data.old_transport_id.unique():
    l = data_a[data_a.old_transport_id == i].reset_index()
    transprot_id = l.old_transport_id[0]    # 當檔名
    test_dict = {}
# 層級一級,沒有巢狀
    test_dict['name']=l.cust_name[0]
    test_dict['idNumber']=l.certificate_num[0]
    test_dict['mobile']=l.phone1[0]
    test_dict['mobile2']=l.phone2[0]
    test_dict['intoTime']=l.apply_submit_time[0]
    test_dict['submitDeptName']=l.submit_dept_name[0]
    test_dict['submitDeptNo']=l.submit_dept_no[0]
    test_dict['submitDeptDescInfo']=l.submit_dept_desc_info[0]
    test_dict['transport_id']=l.old_transport_id[0]
    test_dict['productType']=l.apply_product[0]
    test_dict['applyId']=l.apply_id[0]
    test_dict['occupationalCode'] = l.POSITION_CODE[0]
 # 公司      
    company_dict = {}

    company_dict['name'] =l.org_name[0]
    company_dict['recruitmentDate'] =l.entry_date[0]
    # 地址
    company_adress_dict = {}
    company_adress_dict['province'] =l.org_province_desc[0]
    company_adress_dict['city'] =l.org_city_desc[0]
    company_adress_dict['district'] =l.org_county_desc[0]
    company_adress_dict['detail'] =l.org_addr[0]

    company_dict['address'] =company_adress_dict
        
    # 電話
    company_phone_dict = {}
    company_phone_dict['phoneNumber'] = l.org_phone[0]
    company_phone_dict['number'] = l.dept_phone[0]
    company_phone_dict['areaCode'] = ""

    company_dict['phoneNumber'] =company_phone_dict
    # 合併到大字典中
    test_dict['company'] = company_dict
# 居住地
    adress_dict={}
    adress_dict['province'] = l.house_province_desc[0]
    adress_dict['city'] = l.house_city_desc[0]
    adress_dict['district'] = l.house_county_desc[0]
    adress_dict['detail'] = l.house_addr[0]
    # 合併到大字典中
    test_dict['address'] = adress_dict
# 戶籍地
    domicile_dict={}
    domicile_dict['province'] =l.census_province_desc[0]
    domicile_dict['city'] =l.census_city_desc[0]
    domicile_dict['district'] =l.census_county_desc[0]
    domicile_dict['detail'] =l.census_addr[0]
    # 合併到大字典彙總
    test_dict['domicile'] = domicile_dict
#車輛資訊
    car_dict={}
    car_dict['vehicleChassisNumber'] = l.car_no[0]
    car_dict['carFactoryDate'] = l.car_factory_date[0]
    car_dict['carRegister'] = l.car_register[0]
    car_dict['recentlyReplacementDate'] = l.recently_replacement_date[0]
    car_dict['carBrandRemark'] = l.car_brand_remark[0]
    car_dict['carBrand'] = l.car_brand_desc[0]
    car_dict['carSeriesRemark'] = l.car_series_remark[0]
    car_dict['carSeries'] = l.car_series_desc[0]

    # 合併到大字典中
    test_dict['car'] = car_dict
# 聯絡人資訊
    df = data_contact[data_contact.old_transport_id == i]
    contact_list=[]
    for j in range(df.shape[0]): 
            s = df.iloc[j,:]
            contact_dict = {}
            contact_dict['borrowerRelation'] = s.relation_me
            contact_dict['name'] = s.contact_name
            contact_dict['mobile'] = s.telephone
            contact_dict['relationship'] = s.mortgagor_contact_type_desc
            contact_dict['borrowerRelationDesc'] = s.relation_me_desc
            contact_list.append(contact_dict)
    # 合併到大字典中
    test_dict['contacts'] = contact_list
    # 寫出
    b = json.dumps(test_dict,ensure_ascii=False,cls=NpEncoder)
    with open(r'C:\Users\Administrator\Desktop\json\{}.json'.format(transprot_id),'w',encoding='utf-8') as f_w:
        f_w.write(b)
        
endtime = datetime.datetime.now()  
print (endtime - starttime)

相關文章