当我们在Python中调用model.fit()时,很少意识到这行简单的代码可能正在固化社会中的结构性不平等。2018年亚马逊AI招聘工具事件就是典型案例——这个基于历史招聘数据训练的模型,系统性地降低了女性求职者的评分。这不是代码错误,而是数据中隐藏的偏见通过机器学习被放大后的必然结果。
算法偏见本质上反映的是数据采集、特征工程和模型设计过程中人为决策的盲区。以信贷场景为例,如果历史数据中某地区居民因政策限制获得贷款较少,模型会"合理"地延续这种歧视模式。我们常用三个维度识别偏见:
注意:p-value < 0.05只是统计显著性的门槛,实际业务中即使p=0.06,若群体间差异具有商业或伦理影响,仍需干预
在建模前,我们需要对原始数据进行偏见审计。以下是用Pandas和SciPy实现的完整流程:
python复制import numpy as np
from scipy.stats import ttest_ind
def detect_feature_bias(df, target, sensitive_feature, threshold=0.05):
"""
检测特征层面的偏见
参数:
df: DataFrame 包含特征和目标值
target: str 目标变量名
sensitive_feature: str 敏感特征名(如'gender')
threshold: float 显著性阈值
返回:
bias_report: dict 包含各组的均值差异和统计显著性
"""
groups = df[sensitive_feature].unique()
bias_report = {}
for group in groups:
group_data = df[df[sensitive_feature] == group][target]
others_data = df[df[sensitive_feature] != group][target]
# 计算均值差异
mean_diff = group_data.mean() - others_data.mean()
# 独立样本t检验
t_stat, p_val = ttest_ind(group_data, others_data, equal_var=False)
bias_report[group] = {
'mean_difference': mean_diff,
'p_value': p_val,
'is_biased': p_val < threshold
}
return bias_report
# 使用示例
bias_report = detect_feature_bias(df, 'loan_approved', 'gender')
print(pd.DataFrame(bias_report).T)
输出示例:
code复制 mean_difference p_value is_biased
F -0.125 0.1234 False
M 0.125 0.1234 False
对于生产级系统,推荐使用Fairlearn进行多维评估:
python复制from fairlearn.metrics import (
demographic_parity_ratio,
equalized_odds_difference,
true_positive_rate_difference
)
def comprehensive_bias_audit(y_true, y_pred, sensitive_features):
metrics = {
'demographic_parity': demographic_parity_ratio,
'equalized_odds': equalized_odds_difference,
'tpr_diff': true_positive_rate_difference
}
results = {}
for name, metric in metrics.items():
try:
results[name] = metric(y_true, y_pred,
sensitive_features=sensitive_features)
except Exception as e:
results[name] = f"Error: {str(e)}"
# 添加分组指标
metric_frame = MetricFrame(
metrics={
'selection_rate': selection_rate,
'fpr': false_positive_rate,
'fnr': false_negative_rate
},
y_true=y_true,
y_pred=y_pred,
sensitive_features=sensitive_features
)
return {
'summary_metrics': results,
'detailed_breakdown': metric_frame.by_group
}
# 使用示例
audit_results = comprehensive_bias_audit(
y_test,
predictions,
sensitive_features=s_test
)
print(audit_results['summary_metrics'])
print(audit_results['detailed_breakdown'])
关键指标解读:
数据层面的干预是最根本的解决方案。以下是改进版的重加权实现:
python复制from sklearn.utils import resample
class BiasMitigationSampler:
def __init__(self, sensitive_feature, target, strategy='reweight'):
self.sensitive_feature = sensitive_feature
self.target = target
self.strategy = strategy
self.sample_weights_ = None
def fit(self, X, y):
df = X.copy()
df[self.target] = y
# 计算各组在正负样本中的比例
group_counts = df.groupby([self.sensitive_feature, self.target]).size()
global_pos_rate = df[self.target].mean()
# 计算重加权系数
weights = {}
for (group, target_val), count in group_counts.items():
group_pos_rate = df[df[self.sensitive_feature]==group][self.target].mean()
if self.strategy == 'reweight':
# 反比于群体正样本比例与全局比例的差异
weights[(group, target_val)] = global_pos_rate / group_pos_rate
elif self.strategy == 'undersample':
# 欠采样系数
weights[(group, target_val)] = min(1, group_pos_rate / global_pos_rate)
self.sample_weights_ = weights
return self
def transform(self, X, y):
if self.strategy == 'reweight':
df = X.copy()
df[self.target] = y
df['weight'] = 1
for (group, target_val), weight in self.sample_weights_.items():
mask = (df[self.sensitive_feature]==group) & (df[self.target]==target_val)
df.loc[mask, 'weight'] = weight
return X, y, df['weight'].values
elif self.strategy == 'undersample':
df = X.copy()
df[self.target] = y
sampled_dfs = []
for (group, target_val), weight in self.sample_weights_.items():
group_df = df[(df[self.sensitive_feature]==group) &
(df[self.target]==target_val)]
sampled_df = resample(group_df,
replace=False,
n_samples=int(len(group_df)*weight),
random_state=42)
sampled_dfs.append(sampled_df)
sampled_df = pd.concat(sampled_dfs)
return sampled_df.drop(columns=[self.target]), sampled_df[self.target], None
# 使用示例
sampler = BiasMitigationSampler('gender', 'loan_approved', strategy='reweight')
X_resampled, y_resampled, sample_weights = sampler.fit_transform(X_train, y_train)
# 训练时传入样本权重
model = RandomForestClassifier()
model.fit(X_resampled, y_resampled, sample_weight=sample_weights)
Fairlearn提供的Reduction方法可以在训练时直接加入公平约束:
python复制from fairlearn.reductions import ExponentiatedGradient, DemographicParity
# 定义基础模型
base_model = LogisticRegression(max_iter=1000)
# 设置公平约束
constraint = DemographicParity(difference_bound=0.05) # 允许最大5%的差异
# 创建优化器
mitigator = ExponentiatedGradient(
estimator=base_model,
constraints=constraint,
eps=0.01 # 收敛阈值
)
# 训练带约束的模型
mitigator.fit(X_train, y_train, sensitive_features=s_train)
# 评估
fair_predictions = mitigator.predict(X_test)
print(classification_report(y_test, fair_predictions))
对于已训练好的模型,可以通过调整决策阈值实现公平:
python复制from fairlearn.postprocessing import ThresholdOptimizer
postprocessor = ThresholdOptimizer(
estimator=model, # 预训练好的基础模型
constraints="equalized_odds", # 均衡假阳性和假阴性
objective="balanced_accuracy_score", # 优化目标
prefit=True
)
postprocessor.fit(X_train, y_train, sensitive_features=s_train)
fair_predictions = postprocessor.predict(X_test, sensitive_features=s_test)
偏见治理不是一次性任务,需要建立持续监控机制:
python复制from evidently import ColumnMapping
from evidently.report import Report
from evidently.metrics import *
def generate_fairness_report(reference_data, current_data, sensitive_features):
column_mapping = ColumnMapping(
target='loan_approved',
prediction='prediction',
numerical_features=['age', 'income'],
categorical_features=['gender']
)
report = Report(metrics=[
DataDriftTable(),
ClassificationQualityMetric(),
ProbClassificationPerformanceMetric(),
ClassificationClassBalance(),
ClassificationConfusionMatrix(),
ClassificationQualityByClass(),
ClassificationQualityByFeatureTable(columns=sensitive_features)
])
report.run(
reference_data=reference_data,
current_data=current_data,
column_mapping=column_mapping
)
return report
关键监控指标:
python复制from great_expectations import Dataset
validator = Dataset.from_pandas(df)
validator.expect_column_values_to_not_be_null('gender')
validator.expect_column_distribution_to_match_expected('income',
expected_distribution='normal')
python复制from aif360.sklearn.preprocessing import LearnedFairRepresentation
transformer = LearnedFairRepresentation(
prot_attr='gender',
random_state=42
)
X_fair = transformer.fit_transform(X, y)
python复制@app.route('/predict', methods=['POST'])
def predict():
data = request.json
pred = model.predict(data['features'])
fairness_metrics = {
'demographic_parity': demographic_parity_ratio(
y_true=None,
y_pred=pred,
sensitive_features=data['gender']
)
}
return {
'prediction': pred.tolist(),
'fairness_metrics': fairness_metrics
}
python复制def impact_assessment(old_model, new_model, X_test, s_test):
old_pred = old_model.predict(X_test)
new_pred = new_model.predict(X_test)
old_dpd = demographic_parity_difference(None, old_pred, s_test)
new_dpd = demographic_parity_difference(None, new_pred, s_test)
return {
'delta_dpd': new_dpd - old_dpd,
'improvement': abs(new_dpd) < abs(old_dpd)
}
在实际项目中,我们发现最有效的策略是组合方法:预处理阶段进行数据平衡,训练时加入公平约束,最后通过阈值优化微调。重要的是要建立跨职能的AI伦理审查机制,将公平性指标纳入模型KPI体系,而不仅仅是准确率或AUC。