對一個20667行的xlsx檔案進行遍歷測試
import pandas as pd
# 定義一個計算執行時間的函式作裝飾器,傳入引數為裝飾的函式或方法
def print_execute_time(func):
from time import time
# 定義巢狀函式,用來列印出裝飾的函式的執行時間
def wrapper(*args, **kwargs):
# 定義開始時間和結束時間,將func夾在中間執行,取得其返回值
start = time()
func_return = func(*args, **kwargs)
end = time()
# 列印方法名稱和其執行時間
print(f'{func.__name__}() execute time: {end - start}s')
# 返回func的返回值
return func_return
# 返回巢狀的函式
return wrapper
file_path = r"D:\git\xxxx\dev\pd-xxx1.2\合併.xlsx"
data = pd.read_excel(file_path,sheet_name="xxxx",engine='openpyxl')
# 空值處理
df = data.where(data.notnull(),None)
@print_execute_time
def iterrows():
for index, row in df.iterrows():
# print(index," = ",row['機號'])
pass
@print_execute_time
def itertuples():
for row in df.itertuples():
# print(row['機號'])
pass
@print_execute_time
def iteritems():
for index, row in df.iteritems():
# print(index," = ",row['機號'])
pass
@print_execute_time
def index():
for i in df.index:
# print(i," = ",df['機號'].at[i])
pass
if __name__ == '__main__':
print('begining ...')
print(iterrows(),itertuples(),iteritems(),index())
print('Done !')
測試結果
begining ...
iterrows() execute time: 2.003657817840576s
itertuples() execute time: 0.04618692398071289s
iteritems() execute time: 0.0009987354278564453s
index() execute time: 0.0029909610748291016s
Done !
iterrows() execute time: 2.2464449405670166s
itertuples() execute time: 0.08178043365478516s
iteritems() execute time: 0.000997781753540039s
index() execute time: 0.0059833526611328125s
因此從效率上考慮,優先採用iteritems
或index
來進行遍歷資料