排序後扣減每行的數量

CrossPython發表於2024-08-13
import pandas as pd
from typing import Union, List
from copy import deepcopy

  

def deduct_by_sort(basedf: pd.DataFrame, sortby: List[str], ascending: List[bool],
                   deductdf: pd.DataFrame, key: Union[str, List[str]], deductfield:str) -> pd.DataFrame:
    """
    按排序後扣減每行的數量.
    df = pd.DataFrame([
        {'item': 'A', 'qty': 10},
        {'item': 'A', 'qty': 20},
        {'item': 'B', 'qty': 100},
        {'item': 'B', 'qty': 200},
        {'item': 'C', 'qty': 400},
        {'item': 'D', 'qty': 400},
    ])
    df1 = pd.DataFrame([
        {'item': 'A', 'qty': 14},
        {'item': 'B', 'qty': 140},
        {'item': 'C', 'qty': 30},
    ])
    # 結果
       item  qty
    0    A    0
    1    A   16
    2    B    0
    3    B  160
    4    C  370
    5    D  400
    """
    def validate(keys: list, df: pd.DataFrame):
        if not set(keys).issubset(set(list(df.columns))):
            raise KeyError('df必須包含所有的key')
    keylist = key if isinstance(key, list) else [key]
    for tmpdf in [basedf, deductdf]:
        validate(keylist, tmpdf)
    basedf = basedf.sort_values(by=sortby, ascending=ascending)
    basedf_not_in_deductdf = deepcopy(basedf)
    for k in keylist:
        basedf_not_in_deductdf = basedf_not_in_deductdf[~basedf_not_in_deductdf[k].isin(deductdf[k])]
    deductdf = deductdf.groupby(key, as_index=False)[[deductfield]].sum()
    r = pd.DataFrame()
    for _, row in deductdf.iterrows():
        balance = row[deductfield]
        for _, hrow in basedf.iterrows():
            a = True
            for k in keylist:
                if hrow[k] != row[k]:
                    a = False
                    break
            if a:
                if balance >= hrow[deductfield]:
                    balance = balance - hrow[deductfield]
                    hrow[deductfield] = 0
                else:
                    hrow[deductfield] = hrow[deductfield] - balance
                    balance = 0
                r = pd.concat([r, pd.DataFrame([hrow])], axis=0)
    r = pd.concat([r, basedf_not_in_deductdf], axis=0)
    return r

  

相關文章