在日常开发中,我们经常需要将 Python 对象数据导出到 Excel 表格,或者从 Excel 导入数据到 Python 程序中。传统做法是直接操作单元格坐标,比如:
python复制ws['A1'] = "姓名"
ws['B1'] = "年龄"
ws['A2'] = user.name
ws['B2'] = user.age
这种方式的缺点显而易见:
我在实际项目中遇到过这样一个案例:一个包含50多个字段的报表导出功能,因为新增了一个字段,导致开发人员需要手动修改几十处单元格引用,不仅耗时还容易遗漏。
openpyxl 提供了三种主要的工作模式:
python复制from openpyxl import Workbook
wb = Workbook() # 创建新工作簿
ws = wb.active # 获取活动工作表
ws.title = "员工数据" # 重命名工作表
python复制from openpyxl import load_workbook
wb = load_workbook('existing_file.xlsx') # 加载现有文件
ws = wb['Sheet1'] # 按名称获取工作表
python复制wb = Workbook(write_only=True) # 启用只写模式
ws = wb.create_sheet() # 必须显式创建工作表
重要提示:在只写模式下,无法读取已写入的数据,也无法使用某些高级功能,但内存占用极低。
高效读取数据:
python复制# 读取表头
headers = [cell.value for cell in ws[1]]
# 按行读取数据(推荐方式)
for row in ws.iter_rows(min_row=2, values_only=True):
print(row) # 输出如:('张三', 28, '研发部')
# 按列读取数据(特定场景使用)
for col in ws.iter_cols(min_row=2, values_only=True):
print(col)
批量写入数据:
python复制data = [
['张三', 28, '研发部'],
['李四', 32, '市场部']
]
# 方法1:逐行追加(适合小数据量)
for row in data:
ws.append(row)
# 方法2:批量写入(性能更好)
for row_idx, row_data in enumerate(data, start=1):
for col_idx, value in enumerate(row_data, start=1):
ws.cell(row=row_idx, column=col_idx, value=value)
让 Excel 报表看起来更专业的关键在于样式控制:
python复制from openpyxl.styles import Font, Alignment, Border, Side, PatternFill
# 定义常用样式
header_font = Font(bold=True, color="FFFFFF", size=12)
header_fill = PatternFill(
start_color="4F81BD",
end_color="4F81BD",
fill_type="solid"
)
border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
# 应用样式到表头
for cell in ws[1]:
cell.font = header_font
cell.fill = header_fill
cell.border = border
cell.alignment = Alignment(horizontal="center")
# 设置数据行样式
for row in ws.iter_rows(min_row=2):
for cell in row:
cell.border = border
使用 Python 的 dataclass 定义数据模型是最佳实践:
python复制from dataclasses import dataclass, field
from typing import Optional, List
from datetime import date
@dataclass
class Employee:
id: int
name: str
department: str
salary: float
join_date: date = field(default_factory=date.today)
skills: List[str] = field(default_factory=list)
notes: Optional[str] = None
我们扩展基础映射器,增加更多实用功能:
python复制from openpyxl.utils import get_column_letter
from typing import Type, Dict, Any, List
import inspect
class ExcelMapper:
def __init__(self, model_class: Type,
header_style: Dict[str, Any] = None,
data_style: Dict[str, Any] = None):
self.model_class = model_class
self.fields = inspect.get_annotations(model_class)
self.header_style = header_style or {}
self.data_style = data_style or {}
def write_header(self, worksheet, start_row=1):
"""写入表头并应用样式"""
for col_idx, (field_name, _) in enumerate(self.fields.items(), 1):
cell = worksheet.cell(row=start_row, column=col_idx, value=field_name)
for attr, value in self.header_style.items():
setattr(cell, attr, value)
def obj_to_row(self, obj, worksheet, row_idx):
"""将对象写入指定行"""
for col_idx, (field_name, field_type) in enumerate(self.fields.items(), 1):
value = getattr(obj, field_name)
# 特殊类型处理
if isinstance(value, (list, tuple)):
value = ", ".join(map(str, value))
elif isinstance(value, bool):
value = "是" if value else "否"
cell = worksheet.cell(row=row_idx, column=col_idx, value=value)
for attr, value in self.data_style.items():
setattr(cell, attr, value)
def rows_to_objs(self, worksheet, start_row=2):
"""从Excel读取数据并转换为对象列表"""
objs = []
for row in worksheet.iter_rows(
min_row=start_row,
max_col=len(self.fields),
values_only=True
):
if not any(row): # 跳过空行
continue
data = {}
for (field_name, field_type), value in zip(self.fields.items(), row):
# 类型转换逻辑
if field_type == bool and isinstance(value, str):
value = value.lower() in ('true', '是', 'yes', '1')
elif field_type == int and isinstance(value, float):
value = int(value)
elif field_type == float and isinstance(value, str):
value = float(value.replace(',', ''))
elif inspect.isclass(field_type) and issubclass(field_type, list):
value = [item.strip() for item in value.split(',')] if value else []
data[field_name] = value
objs.append(self.model_class(**data))
return objs
def auto_fit_columns(self, worksheet):
"""自动调整列宽"""
for col_idx, field_name in enumerate(self.fields.keys(), 1):
max_length = len(str(field_name))
for row in worksheet.iter_rows():
cell_value = row[col_idx-1].value
if cell_value:
max_length = max(max_length, len(str(cell_value)))
worksheet.column_dimensions[get_column_letter(col_idx)].width = max_length + 2
python复制from datetime import date
# 1. 准备数据
employees = [
Employee(
id=1,
name="张三",
department="研发部",
salary=15000,
join_date=date(2020, 5, 10),
skills=["Python", "SQL", "Docker"],
notes="优秀员工"
),
Employee(
id=2,
name="李四",
department="市场部",
salary=12000,
join_date=date(2021, 3, 15),
skills=["市场营销", "PPT"],
notes="新员工"
)
]
# 2. 初始化Excel
wb = Workbook()
ws = wb.active
ws.title = "员工数据"
# 3. 配置映射器
header_style = {
'font': Font(bold=True, color="FFFFFF"),
'fill': PatternFill(start_color="4F81BD", fill_type="solid"),
'alignment': Alignment(horizontal="center")
}
mapper = ExcelMapper(Employee, header_style=header_style)
# 4. 写入数据
mapper.write_header(ws)
for idx, emp in enumerate(employees, 2):
mapper.obj_to_row(emp, ws, idx)
# 5. 自动调整列宽
mapper.auto_fit_columns(ws)
# 6. 保存文件
wb.save("employees_with_style.xlsx")
实际业务中经常遇到多级表头的情况:
python复制def write_complex_header(ws, headers):
"""处理多级表头
headers示例:
[
{"name": "基本信息", "children": ["姓名", "年龄"]},
{"name": "工作信息", "children": ["部门", "职位"]}
]
"""
# 第一级表头
col_idx = 1
for header in headers:
span = len(header["children"])
if span > 1:
start_col = col_idx
end_col = col_idx + span - 1
ws.merge_cells(
start_row=1, end_row=1,
start_column=start_col, end_column=end_col
)
ws.cell(row=1, column=start_col, value=header["name"])
col_idx += span
# 第二级表头
col_idx = 1
for header in headers:
for child in header["children"]:
ws.cell(row=2, column=col_idx, value=child)
col_idx += 1
当处理超过10万行数据时,需要特殊优化:
python复制def generate_large_data():
"""模拟大数据生成器"""
for i in range(1, 100001):
yield [
i,
f"员工_{i}",
random.choice(["研发部", "市场部", "人事部", "财务部"]),
random.randint(5000, 30000),
date(2010 + random.randint(0, 10),
random.randint(1, 12),
random.randint(1, 28))
]
# 使用只写模式处理大数据
wb = Workbook(write_only=True)
ws = wb.create_sheet("大数据示例")
# 先写入表头
ws.append(["ID", "姓名", "部门", "薪资", "入职日期"])
# 流式写入数据
for row in generate_large_data():
ws.append(row)
wb.save("large_data.xlsx")
处理特殊数据类型如日期、枚举等:
python复制class EnhancedExcelMapper(ExcelMapper):
def obj_to_row(self, obj, worksheet, row_idx):
for col_idx, (field_name, field_type) in enumerate(self.fields.items(), 1):
value = getattr(obj, field_name)
# 处理日期类型
if isinstance(value, date):
value = value.strftime('%Y-%m-%d')
# 处理枚举类型
elif hasattr(field_type, '__members__'): # 判断是否是枚举
value = value.name if value else None
worksheet.cell(row=row_idx, column=col_idx, value=value)
read_only模式读取大文件问题1:打开文件时报错"File contains corrupted data"
问题2:写入的数字在Excel中显示为文本
python复制from openpyxl.styles import numbers
cell.number_format = numbers.FORMAT_NUMBER
问题3:日期显示为数字
python复制cell.number_format = 'YYYY-MM-DD'
我在实际项目中总结出一个经验:对于复杂的报表导出需求,最好先设计好数据模型和映射关系,然后再实现具体的导出逻辑。这样可以避免后期频繁调整代码结构。