源码仅供学习参考用,请勿用于其他用途
import pandas as pd
import numpy as np
import math
# ================= 1. 读取与合并 =================
df_sz = pd.read_excel('对账单.xlsx', sheet_name='深圳', skiprows=4, keep_default_na=False, dtype=str)
df_sz['发货地'] = '深圳'
df_sh = pd.read_excel('对账单.xlsx', sheet_name='上海', skiprows=4, keep_default_na=False, dtype=str)
df_sh['发货地'] = '上海'
df_all = pd.concat([df_sz, df_sh], ignore_index=True)
df_all = df_all[df_all['原单号'] != '']
# ================= 2. 数据类型提前统一清洗 =================
# 将所有参与计算的列一次性转化为浮点数,无法转换的直接变 0,省去后续无尽的报错检查
num_cols =['称重', '抛重', '长宽高之和', '计费重量', '欠费总金额']
for col in num_cols:
df_all[col] = pd.to_numeric(df_all[col], errors='coerce').fillna(0)
# ================= 3. 渠道清洗与标准化 =================
df_map = pd.read_excel('映射表.xlsx')
channel_map = dict(zip(df_map['账单中的渠道字段'], df_map['清洗后']))
df_all['渠道_清洗'] = df_all['渠道'].map(channel_map).fillna(df_all['渠道'])
conditions = [
df_all['渠道_清洗'].str.contains('佐川', na=False),
df_all['渠道_清洗'].str.contains('黑猫', na=False)
]
choices = [df_all['发货地'] + df_all['渠道_清洗'], '黑猫投函']
df_all['标准渠道名称'] = np.select(conditions, choices, default=df_all['渠道_清洗'])
# ================= 4. 处理计费重量 (5%免抛逻辑) =================
df_all['系统计费重量'] = 0.0
total_tickets = len(df_all)
max_exemptions = int(total_tickets * 0.05)
count_over_120 = len(df_all[df_all['长宽高之和'] >= 120])
mask_under_120 = df_all['长宽高之和'] < 120
mask_120_140 = (df_all['长宽高之和'] >= 120) & (df_all['长宽高之和'] < 140)
mask_over_140 = df_all['长宽高之和'] >= 140
# 规则1 & 规则3 (利用 Numpy 向量化取最大值)
df_all.loc[mask_under_120, '系统计费重量'] = df_all.loc[mask_under_120, '称重']
df_all.loc[mask_over_140, '系统计费重量'] = df_all.loc[mask_over_140, ['称重', '抛重']].max(axis=1)
# 规则2
idx_120_140 = df_all[mask_120_140].index.tolist()
if count_over_120 <= max_exemptions:
df_all.loc[idx_120_140, '系统计费重量'] = df_all.loc[idx_120_140, '称重']
else:
excess_count = count_over_120 - max_exemptions
if excess_count >= len(idx_120_140):
df_all.loc[idx_120_140, '系统计费重量'] = df_all.loc[idx_120_140,['称重', '抛重']].max(axis=1)
else:
exempt_idx = idx_120_140[:(len(idx_120_140) - excess_count)]
throw_idx = idx_120_140[(len(idx_120_140) - excess_count):]
df_all.loc[exempt_idx, '系统计费重量'] = df_all.loc[exempt_idx, '称重']
df_all.loc[throw_idx, '系统计费重量'] = df_all.loc[throw_idx, ['称重', '抛重']].max(axis=1)
df_all['重量'] = df_all['系统计费重量']
df_all = df_all[df_all['重量'] > 0]
# ================= 5. 计算基础运费 (提速优化) =================
df_rule = pd.read_excel('规则表.xlsx')
# 【优化】:按渠道名称将规则分组缓存为字典,避免 apply 循环中重复检索 DataFrame,提速极其明显!
rules_dict = {channel: group for channel, group in df_rule.groupby('渠道名称')}
def compute_fee_fast(row):
channel = row['标准渠道名称']
weight = row['重量']
sum_cm = row['长宽高之和'] # 前面已经转为float了,这里直接用
if channel not in rules_dict:
return 0.0
lookup_weight = max(weight, 10.01) if sum_cm >= 120 else weight
sub = rules_dict[channel] # 直接从字典取规则,瞬间完成
matched = sub[(sub['重量下限 (含)'] <= lookup_weight) & (lookup_weight < sub['重量上限 (不含)'])]
if matched.empty:
return 0.0
rule = matched.iloc[0]
if weight <= rule['首重 (kg)']:
return rule['首重费']
else:
extra_units = math.ceil(round((weight - rule['首重 (kg)']) / rule['续重单位 (kg)'], 5))
return rule['首重费'] + extra_units * rule['续重单价']
df_all['运费'] = df_all.apply(compute_fee_fast, axis=1)
# ================= 6. 其他附加费 (全面向量化优化) =================
df_all['检品费'] = 2.0
# 6.1 偏远费优化:使用 np.select,告别逐行扫描
is_remote_addr = df_all['收件地址'].str.contains('北海道|沖縄', na=False)
is_sagawa = df_all['标准渠道名称'].str.contains('佐川', na=False)
is_jhs = df_all['标准渠道名称'].str.contains('聚划算', na=False)
remote_conds =[
(~is_remote_addr), # 不偏远
(is_remote_addr & is_sagawa & (df_all['长宽高之和'] >= 120)), # 偏远+佐川+>=120
(is_remote_addr & is_sagawa & (df_all['长宽高之和'] < 120)), # 偏远+佐川+<120
(is_remote_addr & is_jhs) # 偏远+聚划算
]
remote_vals =[0, 70, 50, 20]
df_all['计算偏远费'] = np.select(remote_conds, remote_vals, default=0)
# 6.2 超长费优化:使用 pd.cut 区间映射神器
# 因为 >=240 都是 300 元,所以最后一段直接写 240 到 无穷大(float('inf'))
bins =[-1, 159.99, 180, 200, 220, 240, float('inf')]
labels =[0, 100, 120, 150, 200, 300]
# 直接映射
df_all['计算超长费'] = pd.cut(df_all['长宽高之和'], bins=bins, labels=labels).astype(float)
# ================= 7. 汇总与输出 =================
df_all['运费'] = df_all['运费'] + df_all['检品费'] + df_all['计算偏远费'] + df_all['计算超长费']
# 计算差异
df_all['运费差异'] = df_all['运费'] - df_all['欠费总金额']
df_all.to_excel('对账单_运费计算结果.xlsx', index=False)
print("✅ 对账计算完成!已输出文件。")