import pandas as pd from typing import Union, List from copy import deepcopy
def deduct_by_sort(basedf: pd.DataFrame, sortby: List[str], ascending: List[bool], deductdf: pd.DataFrame, key: Union[str, List[str]], deductfield:str) -> pd.DataFrame: """ 按排序後扣減每行的數量. df = pd.DataFrame([ {'item': 'A', 'qty': 10}, {'item': 'A', 'qty': 20}, {'item': 'B', 'qty': 100}, {'item': 'B', 'qty': 200}, {'item': 'C', 'qty': 400}, {'item': 'D', 'qty': 400}, ]) df1 = pd.DataFrame([ {'item': 'A', 'qty': 14}, {'item': 'B', 'qty': 140}, {'item': 'C', 'qty': 30}, ]) # 結果 item qty 0 A 0 1 A 16 2 B 0 3 B 160 4 C 370 5 D 400 """ def validate(keys: list, df: pd.DataFrame): if not set(keys).issubset(set(list(df.columns))): raise KeyError('df必須包含所有的key') keylist = key if isinstance(key, list) else [key] for tmpdf in [basedf, deductdf]: validate(keylist, tmpdf) basedf = basedf.sort_values(by=sortby, ascending=ascending) basedf_not_in_deductdf = deepcopy(basedf) for k in keylist: basedf_not_in_deductdf = basedf_not_in_deductdf[~basedf_not_in_deductdf[k].isin(deductdf[k])] deductdf = deductdf.groupby(key, as_index=False)[[deductfield]].sum() r = pd.DataFrame() for _, row in deductdf.iterrows(): balance = row[deductfield] for _, hrow in basedf.iterrows(): a = True for k in keylist: if hrow[k] != row[k]: a = False break if a: if balance >= hrow[deductfield]: balance = balance - hrow[deductfield] hrow[deductfield] = 0 else: hrow[deductfield] = hrow[deductfield] - balance balance = 0 r = pd.concat([r, pd.DataFrame([hrow])], axis=0) r = pd.concat([r, basedf_not_in_deductdf], axis=0) return r