(新手)使用pandas操作EXCEL

一個專注於學習的屌絲發表於2019-01-08
import pandas as pd
import numpy as np
from pandas import DataFrame,Series
#path = r`C:Users slDesktop資料.xlsx`
#一列資料前面新增字串
def add_C(village_data):
village_data[`電話`] = [`C%s` % y for y in village_data[`電話`]]
return village_data[`電話`]
#讀取excel
data = pd.read_excel(r`C:Users slDesktop資料.xlsx`)
#判定某列中是否有null,如果有刪除null 行
if data[`電話`].isnull().any():
#將excel裡面空值修改
data[`電話`] = data[`電話`].fillna(`999`)
#得到999值的索引室號
data_index = data[data.電話==`999`].index.tolist()
#刪除
data = data.drop(data_index)
#以街道分組
group_by_name = data.groupby(`街道`)
#需要groups得出具體結果
for i in group_by_name.groups:
village_data = data.loc[data[`街道`] == i ]
add_C(village_data)
  #以樓棟分組
group_by_name_build = village_data.groupby(`樓棟`)
for build_name in group_by_name_build.groups:
build_data = village_data.loc[village_data[`樓棟`] == build_name]
build_data = build_data.drop_duplicates(subset=`室號`, keep=`first`)
      #更改列的資料型別
build_data[`室號`] = build_data[`室號`].astype(np.str)
row_index = 0
for house_num in build_data[`室號`]:
if `-` in house_num:
house_num = house_num.replace(`-`,`9`) #並沒有修改原記憶體地址的值,固需要賦值給原有記憶體地址
#選區某行某列並賦值
build_data.iat[row_index,2] = house_num
row_index +=1
del build_data[`街道`]
del build_data[`樓棟`]
#修改列名
build_data.columns = [`使用者編碼`,`指定開門`,`電話號碼`]
build_data[`指定開門`] = `雙門`
build_data_row_index = 0
for modify_num in build_data[`使用者編碼`]:
if len(modify_num) < 4:
modify_num = `0` + modify_num #並沒有修改原記憶體地址的值,固需要賦值給原有記憶體地址
build_data.iat[build_data_row_index,0] = modify_num
build_data_row_index +=1
#儲存/生成新的excle
DataFrame(build_data).to_excel(`C:\Users\tslDesktopinfo\%s.xls` % (build_name),sheet_name=`Sheet1`,index=False,header=True )

##最佳化後
import pandas as pd
import numpy as np
from pandas import DataFrame,Series
#讀取excel
#path = r`C:Users slDesktop資料.xlsx`
def Build_data(build_data):
#一個有索引和行內容的迭代器
for data_line_index,data_line in build_data.iterrows():
if `-` in data_line[`使用者編碼`]:
data_line[`使用者編碼`] = data_line[`使用者編碼`].replace(`-`,`9`)
# house_num = house
if len(data_line[`使用者編碼`])<4:
data_line[`使用者編碼`] = `0`+ data_line[`使用者編碼`]
data_line[`電話號碼`] = data_line[`電話號碼`][:-2]
return build_data
def add_C(village_data):
village_data[`電話`] = [`C%s` % y for y in village_data[`電話`]]
return village_data[`電話`]
data = pd.read_excel(r`C:Users slDesktop資料.xlsx`)
data_index = 0
#判定列中是否有null,如果有刪除null 行
if data[`電話`].isnull().any():
#將excel裡面空值修改
data[`電話`] = data[`電話`].fillna(`999`)
#得到999值的索引室號
data_index_1 = data[data.電話==`999`].index.tolist()
#刪除
data = data.drop(data_index_1)
#已街道分組
group_by_name = data.groupby(`街道`)
#需要groups得出具體結果
for i in group_by_name.groups:
village_data = data.loc[data[`街道`] == i ]
add_C(village_data)
group_by_name_build = village_data.groupby(`樓棟`)
for build_name in group_by_name_build.groups:
build_data = village_data.loc[village_data[`樓棟`] == build_name]
build_data = build_data.drop_duplicates(subset=`室號`, keep=`first`)
del build_data[`街道`]
del build_data[`樓棟`]
#修改列名
build_data.columns = [`使用者編碼`,`指定開門`,`電話號碼`]
build_data[`指定開門`] = `雙門`
build_data = build_data.astype(np.str)
build_data = Build_data(build_data)
DataFrame(build_data).to_excel(`C:\Users\tslDesktopinfo\%s.xls` % (build_name),sheet_name=`Sheet1`,index=False,header=True )


相關文章