Python办公自动化实战：从文件处理到邮件通知

sched yield

1. Python自动化办公的价值与准备

作为一名长期与办公自动化打交道的开发者，我深刻体会到Python在这个领域的强大能力。每天我们都在处理大量重复性工作：整理文件、合并表格、发送邮件、抓取数据...这些看似简单的任务，累积起来却消耗了大量宝贵时间。而Python正是解决这些痛点的利器。

1.1 为什么选择Python实现办公自动化

Python在自动化办公领域具有三大核心优势：

丰富的库生态系统：从文件操作(os, shutil)到数据处理(pandas, numpy)，从邮件处理(smtplib)到网页抓取(requests, BeautifulSoup)，几乎所有办公场景都有成熟的解决方案。
简洁高效的语法：相比其他语言，Python代码更接近自然语言，开发效率极高。一个几十行的Python脚本往往能替代数小时的手工操作。
跨平台兼容性：Python脚本可以在Windows、macOS和Linux系统上无缝运行，确保自动化流程在不同环境中都能稳定执行。

1.2 环境配置与工具准备

在开始编写自动化脚本前，我们需要搭建好开发环境。以下是经过我多年实践验证的最佳配置方案：

bash复制# 推荐使用Python 3.8+版本
python --version  # 检查Python版本

# 安装核心库
pip install pandas openpyxl python-docx pyautogui 
pip install PyPDF2 pillow requests beautifulsoup4
pip install schedule smtplib email-to

# 开发工具建议
- VS Code + Python插件：轻量级但功能强大
- Jupyter Notebook：适合数据分析和快速原型开发
- PyCharm Professional：大型项目首选

提示：建议使用虚拟环境管理项目依赖，避免库版本冲突：
bash复制python -m venv office_auto
source office_auto/bin/activate  # Linux/macOS
office_auto\Scripts\activate    # Windows

2. 文件处理自动化实战

2.1 智能文件批量重命名

文件重命名是最常见的重复性工作之一。下面这个增强版脚本不仅能添加前缀，还能根据文件类型、创建日期等属性进行智能命名：

python复制import os
from datetime import datetime

def smart_rename(folder_path, naming_rule='type_date'):
    """
    智能批量重命名文件
    :param folder_path: 目标文件夹路径
    :param naming_rule: 命名规则(type_date/type_seq/date_seq)
    """
    for filename in os.listdir(folder_path):
        filepath = os.path.join(folder_path, filename)
        if not os.path.isfile(filepath):
            continue
            
        # 获取文件信息
        name, ext = os.path.splitext(filename)
        stat = os.stat(filepath)
        create_time = datetime.fromtimestamp(stat.st_ctime)
        
        # 应用命名规则
        if naming_rule == 'type_date':
            new_name = f"{ext[1:]}_{create_time:%Y%m%d}{ext}"
        elif naming_rule == 'type_seq':
            counter = len([f for f in os.listdir(folder_path) 
                         if f.endswith(ext)])
            new_name = f"{ext[1:]}_{counter:03d}{ext}"
        else:  # date_seq
            new_name = f"{create_time:%Y%m%d}_{len(os.listdir(folder_path))}{ext}"
        
        # 执行重命名
        new_path = os.path.join(folder_path, new_name)
        os.rename(filepath, new_path)
        print(f"Renamed: {filename} -> {new_name}")

# 使用示例
smart_rename("./documents", naming_rule='type_date')

关键改进点：

支持多种命名策略，适应不同场景需求
自动提取文件扩展名和创建时间
避免文件名冲突的计数器机制

2.2 多维度文件分类整理

传统的文件分类往往只考虑扩展名，而实际工作中我们可能需要更复杂的分类逻辑。下面这个脚本支持基于内容、大小、时间等多维度分类：

python复制import os
import shutil
import filetype  # pip install filetype

def advanced_file_organizer(source_dir, target_dir):
    """
    高级文件分类器
    :param source_dir: 源目录
    :param target_dir: 目标目录
    """
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    
    # 分类规则配置
    size_threshold = 10 * 1024 * 1024  # 10MB
    date_cutoff = '2022-01-01'
    
    for filename in os.listdir(source_dir):
        filepath = os.path.join(source_dir, filename)
        if not os.path.isfile(filepath):
            continue
            
        # 获取文件特征
        size = os.path.getsize(filepath)
        mtime = datetime.fromtimestamp(os.path.getmtime(filepath))
        kind = filetype.guess(filepath)
        
        # 确定分类
        if size > size_threshold:
            category = 'Large_Files'
        elif mtime < datetime.strptime(date_cutoff, '%Y-%m-%d'):
            category = 'Archive'
        elif kind and kind.mime.split('/')[0] == 'image':
            category = 'Images'
        else:
            category = 'Others'
            
        # 创建分类目录并移动文件
        category_dir = os.path.join(target_dir, category)
        os.makedirs(category_dir, exist_ok=True)
        shutil.move(filepath, os.path.join(category_dir, filename))
        
# 使用示例
advanced_file_organizer('./unorganized', './organized')

实用技巧：

使用filetype库比单纯检查扩展名更可靠
exist_ok=True避免重复创建目录时的错误
可根据实际需求调整分类规则和阈值

3. 数据处理自动化方案

3.1 智能Excel数据合并与清洗

处理多个Excel文件时，常规合并往往不能满足复杂需求。这个增强版合并脚本包含自动类型推断、数据校验和智能合并功能：

python复制import pandas as pd
import os
from datetime import datetime

def smart_excel_merger(input_dir, output_file):
    """
    智能Excel合并器
    :param input_dir: 输入目录
    :param output_file: 输出文件路径
    """
    dfs = []
    schema = None
    
    for file in os.listdir(input_dir):
        if not file.endswith(('.xlsx', '.xls')):
            continue
            
        filepath = os.path.join(input_dir, file)
        try:
            df = pd.read_excel(filepath)
            
            # 自动标准化列名
            df.columns = df.columns.str.strip().str.lower()
            
            # 首次循环时建立数据模式
            if schema is None:
                schema = {col: str(df[col].dtype) for col in df.columns}
                
            # 数据校验与修复
            for col in df.columns:
                # 处理日期列
                if 'date' in col:
                    df[col] = pd.to_datetime(df[col], errors='coerce')
                
                # 处理数值列
                if df[col].dtype == 'object':
                    df[col] = pd.to_numeric(df[col], errors='ignore')
            
            df['source_file'] = file
            dfs.append(df)
            
        except Exception as e:
            print(f"Error processing {file}: {str(e)}")
            continue
    
    if dfs:
        merged = pd.concat(dfs, ignore_index=True)
        
        # 自动识别并处理重复数据
        dup_cols = [col for col in merged.columns if col != 'source_file']
        merged = merged.drop_duplicates(subset=dup_cols)
        
        # 保存合并结果
        merged.to_excel(output_file, index=False)
        print(f"成功合并 {len(dfs)} 个文件，共 {len(merged)} 条记录")
    else:
        print("没有找到可合并的Excel文件")

# 使用示例
smart_excel_merger('./sales_reports', './merged_sales.xlsx')

核心优势：

自动标准化列名和数据类型
内置数据质量检查与修复
智能处理日期和数值格式
完善的错误处理机制

3.2 自动化数据质量报告生成

合并数据后，我们通常需要评估数据质量。这个脚本会自动生成包含关键指标的数据质量报告：

python复制def generate_data_quality_report(data_file, report_file):
    """
    生成数据质量报告
    :param data_file: 数据文件路径
    :param report_file: 报告输出路径
    """
    df = pd.read_excel(data_file)
    report = []
    
    # 基本信息
    report.append(f"数据质量报告 - {datetime.now():%Y-%m-%d}")
    report.append(f"数据集: {os.path.basename(data_file)}")
    report.append(f"总记录数: {len(df):,}")
    report.append(f"时间段: {df['date'].min():%Y-%m-%d} 至 {df['date'].max():%Y-%m-%d}")
    
    # 各列分析
    for col in df.columns:
        report.append(f"\n## {col} 分析")
        report.append(f"- 数据类型: {df[col].dtype}")
        
        if df[col].dtype in ['int64', 'float64']:
            report.append(f"- 缺失值: {df[col].isna().sum():,} ({df[col].isna().mean():.1%})")
            report.append(f"- 唯一值: {df[col].nunique():,}")
            report.append(f"- 统计量: 均值={df[col].mean():.2f}, 中位数={df[col].median():.2f}")
            
        elif df[col].dtype == 'object':
            report.append(f"- 缺失值: {df[col].isna().sum():,}")
            report.append(f"- 唯一值: {df[col].nunique():,}")
            report.append(f"- 高频值: {df[col].value_counts().nlargest(3).to_dict()}")
            
        elif 'date' in str(df[col].dtype):
            report.append(f"- 时间范围: {df[col].min():%Y-%m-%d} 至 {df[col].max():%Y-%m-%d}")
    
    # 保存报告
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write('\n'.join(report))
    
    print(f"数据质量报告已生成: {report_file}")

# 使用示例
generate_data_quality_report('./merged_sales.xlsx', './data_quality_report.md')

报告内容包含：

数据集基本信息概览
各列详细统计分析
缺失值和异常值检测
数据分布特征
时间范围分析

4. 邮件与通知自动化

4.1 增强版邮件发送系统

基础的邮件发送功能往往不能满足企业级需求。这个增强版邮件系统支持HTML模板、多附件、抄送密送等功能：

python复制import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from email.utils import formatdate
import os

class EmailSender:
    def __init__(self, smtp_server, port, username, password):
        self.smtp_server = smtp_server
        self.port = port
        self.username = username
        self.password = password
        
    def send(self, to, subject, content, 
             attachments=None, cc=None, bcc=None, 
             content_type='html'):
        """
        发送邮件
        :param to: 收件人列表
        :param subject: 邮件主题
        :param content: 邮件内容
        :param attachments: 附件路径列表
        :param cc: 抄送列表
        :param bcc: 密送列表
        :param content_type: 内容类型(html/plain)
        """
        msg = MIMEMultipart()
        msg['From'] = self.username
        msg['To'] = ', '.join(to) if isinstance(to, list) else to
        msg['Subject'] = subject
        msg['Date'] = formatdate(localtime=True)
        
        if cc:
            msg['Cc'] = ', '.join(cc) if isinstance(cc, list) else cc
        if bcc:
            msg['Bcc'] = ', '.join(bcc) if isinstance(bcc, list) else bcc
            
        # 添加邮件正文
        msg.attach(MIMEText(content, content_type))
        
        # 添加附件
        if attachments:
            for filepath in attachments:
                with open(filepath, 'rb') as f:
                    part = MIMEApplication(f.read())
                    filename = os.path.basename(filepath)
                    part.add_header('Content-Disposition', 'attachment', 
                                  filename=filename)
                    msg.attach(part)
        
        # 发送邮件
        try:
            with smtplib.SMTP_SSL(self.smtp_server, self.port) as server:
                server.login(self.username, self.password)
                recipients = to
                if cc:
                    recipients += cc if isinstance(cc, list) else [cc]
                if bcc:
                    recipients += bcc if isinstance(bcc, list) else [bcc]
                server.sendmail(self.username, recipients, msg.as_string())
            print("邮件发送成功")
        except Exception as e:
            print(f"邮件发送失败: {str(e)}")

# 使用示例
sender = EmailSender('smtp.qq.com', 465, 'your_email@qq.com', 'your_password')
sender.send(
    to=['recipient1@qq.com', 'recipient2@qq.com'],
    cc='manager@qq.com',
    subject='月度销售报告',
    content='<h1>2023年5月销售报告</h1><p>详见附件...</p>',
    attachments=['./report.pdf', './data.xlsx']
)

企业级功能：

支持SMTP SSL加密连接
完善的收件人管理（收件人、抄送、密送）
多附件支持
HTML和纯文本内容可选
上下文管理器确保连接安全关闭

4.2 邮件自动化工作流

单一邮件发送还不够，我们通常需要实现完整的邮件工作流。这个类封装了常见邮件自动化场景：

python复制class EmailWorkflow:
    def __init__(self, sender):
        self.sender = sender
        self.template_dir = './email_templates'
        
    def send_daily_report(self, recipients, data_file):
        """发送日报"""
        # 从数据文件生成报告内容
        report_content = self._generate_report_content(data_file)
        
        # 使用模板
        with open(f'{self.template_dir}/daily.html', 'r') as f:
            template = f.read()
            content = template.replace('{{content}}', report_content)
        
        # 发送邮件
        self.sender.send(
            to=recipients,
            subject=f"日报 {datetime.now():%Y-%m-%d}",
            content=content,
            attachments=[data_file]
        )
    
    def send_reminder(self, recipients, task, due_date):
        """发送提醒邮件"""
        with open(f'{self.template_dir}/reminder.html', 'r') as f:
            template = f.read()
            content = template.replace('{{task}}', task) \
                             .replace('{{due_date}}', due_date)
        
        self.sender.send(
            to=recipients,
            subject=f"待办事项提醒: {task}",
            content=content
        )
    
    def _generate_report_content(self, data_file):
        """生成报告内容（简化版）"""
        df = pd.read_excel(data_file)
        summary = df.describe().to_html()
        return f"<h2>数据概览</h2>{summary}"

# 使用示例
workflow = EmailWorkflow(sender)
workflow.send_daily_report(['team@company.com'], './daily_data.xlsx')
workflow.send_reminder('john@company.com', '提交季度报告', '2023-06-30')

工作流特点：

模板化内容管理
预置常用邮件场景
数据驱动内容生成
易于扩展新场景

5. 高级自动化技巧

5.1 自动化任务调度与管理

简单的定时任务不能满足复杂调度需求。这个增强版调度系统支持错峰执行、任务依赖和失败重试：

python复制import schedule
import time
from threading import Thread
import logging

class AdvancedScheduler:
    def __init__(self):
        self.jobs = {}
        self.logger = logging.getLogger('scheduler')
        
    def add_job(self, name, func, interval='daily', at=None, 
               retry=0, depends_on=None):
        """
        添加定时任务
        :param name: 任务名称
        :param func: 执行函数
        :param interval: 执行间隔(daily/hourly/weekly/monthly)
        :param at: 具体执行时间(如'10:30')
        :param retry: 失败重试次数
        :param depends_on: 依赖的其他任务
        """
        job_config = {
            'func': func,
            'retry': retry,
            'depends_on': depends_on,
            'last_run': None,
            'status': 'pending'
        }
        
        # 配置调度规则
        if interval == 'daily' and at:
            job = schedule.every().day.at(at).do(self._run_job, name)
        elif interval == 'hourly':
            job = schedule.every().hour.do(self._run_job, name)
        elif interval == 'weekly' and at:
            job = schedule.every().week.at(at).do(self._run_job, name)
        elif interval == 'monthly' and at:
            job = schedule.every().month.at(at).do(self._run_job, name)
        else:
            raise ValueError("Invalid schedule configuration")
            
        job_config['job'] = job
        self.jobs[name] = job_config
        
    def _run_job(self, name):
        """执行任务并处理重试逻辑"""
        job = self.jobs[name]
        
        # 检查依赖任务
        if job['depends_on'] and not all(
            self.jobs[dep]['status'] == 'success'
            for dep in job['depends_on']
        ):
            self.logger.warning(f"Job {name} skipped due to unmet dependencies")
            return
            
        job['status'] = 'running'
        job['last_run'] = datetime.now()
        
        for attempt in range(job['retry'] + 1):
            try:
                job['func']()
                job['status'] = 'success'
                self.logger.info(f"Job {name} completed successfully")
                break
            except Exception as e:
                if attempt == job['retry']:
                    job['status'] = 'failed'
                    self.logger.error(f"Job {name} failed after {attempt+1} attempts: {str(e)}")
                else:
                    time.sleep(5 * (attempt + 1))  # 指数退避
                    
    def start(self):
        """启动调度器"""
        self.logger.info("Scheduler started")
        while True:
            schedule.run_pending()
            time.sleep(1)
            
    def start_in_thread(self):
        """在后台线程中启动调度器"""
        thread = Thread(target=self.start, daemon=True)
        thread.start()
        return thread

# 使用示例
def backup_task():
    print("执行备份...")

def report_task():
    print("生成报告...")

scheduler = AdvancedScheduler()
scheduler.add_job('backup', backup_task, interval='daily', at='23:00')
scheduler.add_job('report', report_task, interval='daily', at='09:00',
                 depends_on=['backup'], retry=2)
scheduler.start_in_thread()

高级功能：

多种调度频率配置
任务依赖管理
自动重试机制
线程安全执行
完善的状态跟踪

5.2 自动化异常处理框架

简单的try-except不能满足生产环境需求。这个框架提供了系统化的异常处理和恢复机制：

python复制class AutomationErrorHandler:
    def __init__(self):
        self.handlers = {}
        self.logger = logging.getLogger('error_handler')
        
    def register_handler(self, exception_type, handler, max_retries=0):
        """
        注册异常处理器
        :param exception_type: 要处理的异常类型
        :param handler: 处理函数
        :param max_retries: 最大重试次数
        """
        self.handlers[exception_type] = {
            'handler': handler,
            'max_retries': max_retries
        }
        
    def execute(self, func, *args, **kwargs):
        """
        安全执行函数
        :param func: 要执行的函数
        :return: 函数结果或None(执行失败)
        """
        retries = {}
        
        while True:
            try:
                return func(*args, **kwargs)
            except Exception as e:
                exception_type = type(e)
                
                # 查找匹配的处理器
                handler_info = None
                for et in self.handlers:
                    if issubclass(exception_type, et):
                        handler_info = self.handlers[et]
                        break
                
                if not handler_info:
                    self.logger.error(f"Unhandled exception: {str(e)}")
                    return None
                
                # 更新重试计数
                retries[exception_type] = retries.get(exception_type, 0) + 1
                
                if retries[exception_type] > handler_info['max_retries']:
                    self.logger.error(f"Max retries exceeded for {exception_type.__name__}")
                    return None
                
                # 执行处理逻辑
                self.logger.warning(
                    f"Caught {exception_type.__name__}, "
                    f"retry {retries[exception_type]}/{handler_info['max_retries']}"
                )
                handler_info['handler'](e)

# 使用示例
def unstable_operation():
    import random
    if random.random() < 0.7:
        raise ConnectionError("模拟连接错误")
    return "成功"

handler = AutomationErrorHandler()

# 注册连接错误的处理器
def handle_connection_error(e):
    print(f"处理连接错误: {e}")
    time.sleep(1)  # 等待后重试

handler.register_handler(ConnectionError, handle_connection_error, max_retries=3)

# 安全执行
result = handler.execute(unstable_operation)
print(f"执行结果: {result}")

框架优势：

异常类型分级处理
可配置的重试策略
灵活的处理器注册
透明的执行流程
详细的日志记录

6. 自动化办公最佳实践

6.1 配置管理与环境隔离

经过多个项目的实践，我总结出以下配置管理经验：

多环境配置：使用不同的配置文件区分开发、测试和生产环境

python复制# config_dev.py
DATABASE = "sqlite:///dev.db"
LOG_LEVEL = "DEBUG"

# config_prod.py  
DATABASE = "postgresql://user:pass@prod-db:5432/app"
LOG_LEVEL = "INFO"

# 使用示例
import os
env = os.getenv("ENV", "dev")
config = __import__(f"config_{env}")

敏感信息处理：永远不要将密码等敏感信息硬编码在脚本中

python复制# 推荐方案1：环境变量
import os
db_password = os.getenv('DB_PASSWORD')

# 推荐方案2：加密配置文件
from cryptography.fernet import Fernet

# 生成密钥
key = Fernet.generate_key()
cipher = Fernet(key)

# 加密密码
encrypted = cipher.encrypt(b"my_password")  

# 存储加密后的密码和密钥(分开保存)

依赖隔离：为每个项目创建独立的虚拟环境

bash复制# 创建虚拟环境
python -m venv my_project_env

# 激活环境(Linux/macOS)
source my_project_env/bin/activate

# 激活环境(Windows)  
my_project_env\Scripts\activate

# 导出依赖
pip freeze > requirements.txt

# 安装依赖
pip install -r requirements.txt

6.2 日志记录与监控

完善的日志系统是自动化脚本稳定运行的保障。我推荐以下日志配置：

python复制import logging
from logging.handlers import RotatingFileHandler
import sys

def setup_logging(name):
    """配置完善的日志系统"""
    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)
    
    # 控制台日志
    console = logging.StreamHandler(sys.stdout)
    console.setLevel(logging.INFO)
    console_formatter = logging.Formatter(
        '%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(console_formatter)
    
    # 文件日志(自动轮转)
    file = RotatingFileHandler(
        f'{name}.log', maxBytes=10*1024*1024, backupCount=5)
    file.setLevel(logging.DEBUG)
    file_formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file.setFormatter(file_formatter)
    
    logger.addHandler(console)
    logger.addHandler(file)
    
    return logger

# 使用示例
logger = setup_logging('my_automation')
logger.info("系统启动")
try:
    # 业务逻辑
    logger.debug("详细调试信息")
except Exception as e:
    logger.error(f"发生错误: {str(e)}", exc_info=True)

日志系统特点：

多级别日志记录(DEBUG, INFO, WARNING, ERROR)
控制台和文件双输出
自动日志轮转(防止单个日志文件过大)
详细的错误堆栈记录
按模块区分的日志命名空间

6.3 性能优化技巧

处理大量数据时，性能优化至关重要。以下是我总结的Python自动化脚本优化技巧：

批量处理代替循环：尽可能使用Pandas等库的向量化操作

python复制# 不推荐：逐行处理
for index, row in df.iterrows():
    df.at[index, 'new_col'] = row['col1'] * 2
    
# 推荐：向量化操作
df['new_col'] = df['col1'] * 2

内存管理：处理大文件时使用分块读取

python复制# 分块读取大文件
chunk_size = 10000
for chunk in pd.read_csv('large_file.csv', chunksize=chunk_size):
    process(chunk)

并行处理：利用多核CPU加速任务

python复制from concurrent.futures import ThreadPoolExecutor

def process_file(file):
    # 文件处理逻辑
    pass

files = ['file1.csv', 'file2.csv', 'file3.csv']
with ThreadPoolExecutor(max_workers=4) as executor:
    executor.map(process_file, files)

缓存中间结果：避免重复计算

python复制from functools import lru_cache

@lru_cache(maxsize=128)
def expensive_operation(param):
    # 耗时计算
    return result

选择合适的数据结构：根据场景使用最佳数据结构

python复制# 频繁成员检查使用集合
valid_items = {'item1', 'item2', 'item3'}
if new_item in valid_items:  # O(1)时间复杂度
    process(new_item)

7. 自动化办公扩展应用

7.1 与云服务集成

现代办公自动化往往需要与各种云服务集成。以下是几个常见场景的实现方法：

1. 云存储自动同步

python复制from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload

def upload_to_drive(file_path, folder_id):
    """上传文件到Google Drive"""
    creds = service_account.Credentials.from_service_account_file(
        'service_account.json',
        scopes=['https://www.googleapis.com/auth/drive']
    )
    
    service = build('drive', 'v3', credentials=creds)
    
    file_metadata = {
        'name': os.path.basename(file_path),
        'parents': [folder_id]
    }
    
    media = MediaFileUpload(file_path, resumable=True)
    file = service.files().create(
        body=file_metadata,
        media_body=media,
        fields='id'
    ).execute()
    
    print(f"上传成功，文件ID: {file.get('id')}")

# 使用示例
upload_to_drive('./report.pdf', 'your_folder_id')

2. 云数据库操作

python复制import psycopg2
from sqlalchemy import create_engine

def cloud_db_operations():
    """云数据库操作示例"""
    # 使用psycopg2直接连接
    conn = psycopg2.connect(
        host="your-rds-instance.rds.amazonaws.com",
        database="mydb",
        user="user",
        password="password"
    )
    
    # 执行SQL查询
    with conn.cursor() as cur:
        cur.execute("SELECT * FROM sales WHERE date > %s", ('2023-01-01',))
        results = cur.fetchall()
    
    conn.close()
    
    # 使用SQLAlchemy ORM
    engine = create_engine(
        'postgresql://user:password@your-rds-instance.rds.amazonaws.com/mydb')
    
    df = pd.read_sql("SELECT * FROM products", engine)
    
    # 将处理后的数据写回数据库
    df.to_sql('processed_products', engine, if_exists='replace')

# 使用示例
cloud_db_operations()

7.2 办公聊天机器人集成

将自动化脚本与办公聊天工具集成，可以实现更友好的交互体验：

1. 企业微信机器人通知

python复制import requests
import json

def wechat_work_bot(message, webhook_url):
    """发送消息到企业微信机器人"""
    headers = {'Content-Type': 'application/json'}
    data = {
        "msgtype": "text",
        "text": {
            "content": message,
            "mentioned_mobile_list": ["13800001111"]  # 要@的成员手机号
        }
    }
    
    response = requests.post(
        webhook_url,
        headers=headers,
        data=json.dumps(data)
    )
    
    if response.status_code == 200:
        print("消息发送成功")
    else:
        print(f"发送失败: {response.text}")

# 使用示例
webhook_url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=your_key"
wechat_work_bot("数据处理已完成，请查收附件", webhook_url)

2. Slack自动化通知

python复制def slack_notification(message, channel, webhook_url):
    """发送Slack通知"""
    payload = {
        "text": message,
        "channel": channel,
        "username": "Office Bot",
        "icon_emoji": ":robot_face:"
    }
    
    response = requests.post(
        webhook_url,
        json=payload
    )
    
    if response.status_code == 200:
        print("Slack通知发送成功")
    else:
        print(f"发送失败: {response.text}")

# 使用示例
slack_notification(
    "每日报表已生成: https://example.com/report",
    "#automation",
    "https://hooks.slack.com/services/your/webhook"
)

7.3 办公文档智能处理

1. Word文档自动化生成

python复制from docx import Document
from docx.shared import Pt, RGBColor

def generate_word_report(data, output_file):
    """生成Word格式报告"""
    doc = Document()
    
    # 添加标题
    doc.add_heading('业务分析报告', 0)
    
    # 添加基本信息
    doc.add_paragraph(f"生成时间: {datetime.now():%Y-%m-%d %H:%M:%S}")
    doc.add_paragraph(f"数据记录数: {len(data):,}")
    
    # 添加表格
    table = doc.add_table(rows=1, cols=3)
    hdr_cells = table.rows[0].cells
    hdr_cells[0].text = '指标'
    hdr_cells[1].text = '数值'
    hdr_cells[2].text = '备注'
    
    # 填充数据
    for item in data:
        row_cells = table.add_row().cells
        row_cells[0].text = item['metric']
        row_cells[1].text = str(item['value'])
        row_cells[2].text = item.get('note', '')
    
    # 样式调整
    for paragraph in doc.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(12)
    
    # 保存文档
    doc.save(output_file)
    print(f"Word报告已生成: {output_file}")

# 使用示例
data = [
    {'metric': '销售额', 'value': 1500000, 'note': '同比增长20%'},
    {'metric': '客户数', 'value': 350, 'note': '新增客户45家'}
]
generate_word_report(data, './business_report.docx')

2. PDF高级处理

python复制from PyPDF2 import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from io import BytesIO

def add_watermark(input_pdf, output_pdf, watermark_text):
    """添加PDF水印"""
    # 创建水印
    packet = BytesIO()
    can = canvas.Canvas(packet)
    can.setFont("Helvetica", 50)
    can.setFillColorRGB(0.8, 0.8, 0.8, alpha=0.3)
    can.rotate(45)
    can.drawString(100, 100, watermark_text)
    can.save()
    
    # 获取水印PDF
    packet.seek(0)
    watermark = PdfReader(packet)
    watermark_page = watermark.pages[0]
    
    # 读取原始PDF
    reader = PdfReader(input_pdf)
    writer = PdfWriter()
    
    # 逐页添加水印
    for page in reader.pages:
        page.merge_page(watermark_page)
        writer.add_page(page)
    
    # 保存结果
    with open(output_pdf, 'wb') as out:
        writer.write(out)
    
    print(f"水印添加完成: {output_pdf}")

# 使用示例
add_watermark('original.pdf', 'watermarked.pdf', 'CONFIDENTIAL')

8. 自动化办公安全实践

8.1 安全编码准则

在开发自动化脚本时，必须遵循以下安全准则：

最小权限原则：脚本只应拥有完成工作所需的最小权限

python复制# 不推荐：使用高权限账户
conn = psycopg2.connect(
    host="db.example.com",
    user="admin",
    password="SuperSecret123",
    database="production"
)

# 推荐：使用专用服务账户
conn = psycopg2.connect(
    host="db.example.com",
    user="report_reader",
    password="LimitedAccess456",
    database="reports"
)

敏感信息保护：永远不要硬编码密码或API密钥

python复制#

已经到底了哦

精选内容

1 基于MOPSO算法的冷热电联供系统多目标优化实践 2 AI如何重构网络安全：从规则匹配到智能防御 3 智慧医院建设方案：95页PPT详解医疗数字化转型 4 ETCD磁盘延迟监控与性能优化实践 5 Python实现EBSD数据到有限元分析的跨尺度转换 6 KKFileView与Docker-Compose实现高效文件在线预览 7 Python算法优化实战：从时间复杂度到性能提升 8 React Native登录页面开发实战与Expo优化技巧 9 n8n文件处理性能优化：队列调度与I/O调优实践 10 COMSOL激光熔覆仿真：多物理场建模与工艺优化