1. 为什么Python是自动化办公的首选工具
在当今快节奏的工作环境中,效率就是生命线。作为一名长期与代码打交道的开发者,我发现Python凭借其简洁的语法和丰富的生态库,成为了自动化办公的不二之选。相比其他编程语言,Python的学习曲线平缓,即使是非计算机专业背景的职场人士,经过短期学习也能掌握基础自动化技能。
Python的自动化优势主要体现在三个方面:首先,标准库提供了os、shutil等模块,可以轻松实现文件系统操作;其次,第三方库生态极其丰富,如pandas处理Excel、BeautifulSoup抓取网页、Pillow处理图像等;最后,Python代码可读性强,维护成本低,特别适合编写一次后长期使用的自动化脚本。
提示:对于完全没有编程基础的用户,建议从Anaconda发行版开始,它预装了数据分析常用的库,避免了环境配置的麻烦。
2. 文件管理自动化实战
2.1 批量文件重命名脚本深度解析
文件重命名是日常工作中最高频的重复操作之一。传统手动修改不仅效率低下,而且容易出错。下面这个增强版脚本增加了更多实用功能:
python复制import os
from datetime import datetime
def smart_rename(folder_path, prefix="", suffix="",
replace_rules=None,
add_date=False,
case_convert=None):
"""
智能批量重命名工具
:param folder_path: 目标文件夹路径
:param prefix: 文件名前缀
:param suffix: 文件名后缀(不含扩展名)
:param replace_rules: 替换规则字典 {'旧字符':'新字符'}
:param add_date: 是否添加日期标记
:param case_convert: 大小写转换('upper'/'lower'/'title')
"""
if not os.path.exists(folder_path):
raise FileNotFoundError(f"文件夹不存在: {folder_path}")
date_str = datetime.now().strftime("%Y%m%d") if add_date else ""
for filename in os.listdir(folder_path):
old_path = os.path.join(folder_path, filename)
if os.path.isfile(old_path):
name, ext = os.path.splitext(filename)
# 应用替换规则
if replace_rules:
for old, new in replace_rules.items():
name = name.replace(old, new)
# 应用大小写转换
if case_convert == 'upper':
name = name.upper()
elif case_convert == 'lower':
name = name.lower()
elif case_convert == 'title':
name = name.title()
# 构建新文件名
new_name = f"{prefix}{name}{suffix}{date_str}{ext}"
new_path = os.path.join(folder_path, new_name)
# 避免文件名冲突
counter = 1
while os.path.exists(new_path):
new_name = f"{prefix}{name}{suffix}{date_str}_{counter}{ext}"
new_path = os.path.join(folder_path, new_name)
counter += 1
os.rename(old_path, new_path)
print(f"Renamed: {filename} → {new_name}")
# 使用示例
smart_rename(
folder_path="./project_docs",
prefix="FINAL_",
replace_rules={'草案':'',' ':'_'},
add_date=True,
case_convert='title'
)
这个增强版脚本具有以下特点:
- 支持多重替换规则,可以一次性清除文件中的多余字符
- 可选添加当前日期,方便版本管理
- 提供大小写转换功能,统一文件名风格
- 自动处理文件名冲突,避免覆盖原有文件
- 完善的错误处理机制
2.2 文件备份自动化方案
数据丢失是职场中最令人痛心的事故之一。我设计了这个智能备份方案,它不仅支持常规备份,还能实现增量备份和版本管理:
python复制import shutil
import os
import hashlib
from datetime import datetime
class SmartBackup:
def __init__(self, source_dir, backup_dir):
self.source_dir = source_dir
self.backup_dir = backup_dir
os.makedirs(backup_dir, exist_ok=True)
def _get_file_hash(self, filepath):
"""计算文件哈希值用于比较"""
hasher = hashlib.md5()
with open(filepath, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
hasher.update(chunk)
return hasher.hexdigest()
def _clean_old_backups(self, max_versions=5):
"""清理旧备份,保留最近max_versions个版本"""
backups = sorted(
[f for f in os.listdir(self.backup_dir) if f.startswith('backup_')],
reverse=True
)
for old_backup in backups[max_versions:]:
old_path = os.path.join(self.backup_dir, old_backup)
if os.path.isdir(old_path):
shutil.rmtree(old_path)
else:
os.remove(old_path)
print(f"Cleaned old backup: {old_backup}")
def run_backup(self, backup_type='full'):
"""执行备份操作"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"backup_{timestamp}_{backup_type}"
backup_path = os.path.join(self.backup_dir, backup_name)
if backup_type == 'full':
shutil.copytree(self.source_dir, backup_path)
elif backup_type == 'incremental':
# 实现增量备份逻辑...
pass
print(f"Backup completed: {backup_name}")
self._clean_old_backups()
# 使用示例
backup = SmartBackup(
source_dir="./important_documents",
backup_dir="./backups"
)
backup.run_backup()
重要提示:定期测试备份文件的恢复流程,确保在真正需要时能够顺利恢复数据。建议将备份文件存储在物理隔离的位置,如外部硬盘或云存储。
3. 数据处理自动化技巧
3.1 Excel数据合并的进阶方案
处理多个结构相似的Excel文件时,这个增强版合并脚本提供了更多实用功能:
python复制import pandas as pd
import os
def advanced_merge_excels(folder_path, output_file="merged.xlsx",
sheet_name="Data",
skip_rows=0,
columns_mapping=None,
data_cleaners=None):
"""
高级Excel合并工具
:param folder_path: 包含Excel文件的文件夹
:param output_file: 输出文件名
:param sheet_name: 要读取的工作表名
:param skip_rows: 跳过的行数(表头)
:param columns_mapping: 列名映射 {'原列名':'新列名'}
:param data_cleaners: 数据清洗函数字典 {'列名': 清洗函数}
"""
dfs = []
error_files = []
for filename in os.listdir(folder_path):
if filename.endswith(('.xlsx', '.xls')) and not filename.startswith('~$'):
try:
file_path = os.path.join(folder_path, filename)
df = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=skip_rows)
# 应用列名映射
if columns_mapping:
df.rename(columns=columns_mapping, inplace=True)
# 添加来源标记
df['_source_file'] = filename
# 数据清洗
if data_cleaners:
for col, cleaner in data_cleaners.items():
if col in df.columns:
df[col] = df[col].apply(cleaner)
dfs.append(df)
print(f"Processed: {filename}")
except Exception as e:
error_files.append((filename, str(e)))
print(f"Error processing {filename}: {e}")
if dfs:
merged_df = pd.concat(dfs, ignore_index=True)
# 自动调整列宽
writer = pd.ExcelWriter(output_file, engine='xlsxwriter')
merged_df.to_excel(writer, index=False, sheet_name='Merged')
worksheet = writer.sheets['Merged']
for i, col in enumerate(merged_df.columns):
max_len = max((
merged_df[col].astype(str).map(len).max(),
len(str(col))
)) + 2
worksheet.set_column(i, i, max_len)
writer.close()
print(f"Merged {len(dfs)} files with {len(merged_df)} rows")
else:
print("No valid files processed")
if error_files:
print("\nError summary:")
for file, error in error_files:
print(f"{file}: {error}")
# 使用示例
def clean_date(date_str):
"""统一日期格式"""
import pandas as pd
try:
return pd.to_datetime(date_str).strftime("%Y-%m-%d")
except:
return None
advanced_merge_excels(
folder_path="./sales_reports",
output_file="combined_sales.xlsx",
skip_rows=1,
columns_mapping={
"销售日期": "date",
"金额": "amount",
"客户名称": "customer"
},
data_cleaners={
"date": clean_date,
"amount": lambda x: float(x.replace(",", "")) if isinstance(x, str) else x
}
)
这个脚本的特色功能包括:
- 支持跳过指定行数,处理非标准表头的Excel文件
- 提供列名映射功能,统一不同文件中的列名
- 内置数据清洗功能,可以自定义各种清洗规则
- 自动记录数据来源,方便追踪问题
- 智能调整输出Excel的列宽
- 完善的错误处理和报告机制
3.2 PDF文本提取的实用技巧
PDF文档中的文本提取经常遇到格式混乱的问题,这个增强版脚本可以更好地保持原文结构:
python复制import PyPDF2
import re
from typing import List, Dict
class PDFExtractor:
def __init__(self):
self._header_footer_threshold = 0.1 # 页眉页脚识别阈值
self._page_numbers = set()
def _clean_text(self, text: str) -> str:
"""清理提取的文本"""
# 移除多余的空白字符
text = re.sub(r'\s+', ' ', text).strip()
# 处理连字符换行
text = re.sub(r'(\w)-\s+(\w)', r'\1\2', text)
return text
def _is_header_footer(self, text: str, page_height: float, y_pos: float) -> bool:
"""判断是否为页眉页脚"""
relative_pos = y_pos / page_height
return relative_pos < self._header_footer_threshold or relative_pos > (1 - self._header_footer_threshold)
def extract_text(self, pdf_path: str,
skip_header_footer: bool = True,
remove_page_numbers: bool = True) -> Dict[int, List[str]]:
"""
提取PDF文本内容
:param pdf_path: PDF文件路径
:param skip_header_footer: 是否跳过页眉页脚
:param remove_page_numbers: 是否移除页码
:return: 按页组织的文本字典
"""
result = {}
with open(pdf_path, 'rb') as f:
reader = PyPDF2.PdfReader(f)
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
page_text = []
if '/Annots' in page:
# 提取注释内容
for annot in page['/Annots']:
annot_obj = annot.get_object()
if '/Contents' in annot_obj:
page_text.append(f"[NOTE]: {annot_obj['/Contents']}")
if '/Contents' in page:
content = page['/Contents'].get_object()
if isinstance(content, PyPDF2.generic.ArrayObject):
content = b''.join([x.get_object().get_data() for x in content])
# 提取文本及其位置信息
text_objects = []
for operands, operator in PyPDF2.filters.PageObject._parse_content(content):
if operator == b'Tj':
text = operands[0]
if isinstance(text, PyPDF2.generic.ByteStringObject):
text = text.decode('utf-8', errors='replace')
text_objects.append((text, operands[1], operands[2]))
# 按垂直位置排序文本
text_objects.sort(key=lambda x: -x[2])
current_block = []
last_y = None
for text, x, y in text_objects:
text = self._clean_text(text)
# 跳过空文本
if not text:
continue
# 跳过页眉页脚
if skip_header_footer and self._is_header_footer(text, page.mediabox[3], y):
continue
# 跳过页码
if remove_page_numbers and text.isdigit():
self._page_numbers.add(text)
continue
# 判断是否新段落
if last_y is not None and abs(y - last_y) > 15:
if current_block:
page_text.append(' '.join(current_block))
current_block = []
current_block.append(text)
last_y = y
if current_block:
page_text.append(' '.join(current_block))
result[page_num + 1] = page_text
return result
# 使用示例
extractor = PDFExtractor()
text_data = extractor.extract_text("./report.pdf")
# 保存提取结果
with open("./extracted_text.txt", "w", encoding="utf-8") as f:
for page, content in text_data.items():
f.write(f"=== Page {page} ===\n")
f.write("\n".join(content))
f.write("\n\n")
这个PDF提取器具有以下优势:
- 保持原文的逻辑段落结构
- 智能识别并跳过页眉页脚
- 自动移除页码等干扰元素
- 支持提取注释内容
- 处理特殊编码和排版问题
- 输出结构化的文本内容
4. 网络自动化操作
4.1 智能网页抓取框架
简单的网页抓取经常遇到反爬虫限制,这个增强版框架包含了多种反反爬策略:
python复制import requests
from bs4 import BeautifulSoup
import random
import time
from urllib.parse import urljoin
import csv
class SmartCrawler:
def __init__(self, base_url, output_file="data.csv"):
self.base_url = base_url
self.output_file = output_file
self.session = requests.Session()
self._setup_headers()
self.visited_urls = set()
self.data = []
def _setup_headers(self):
"""设置随机请求头"""
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Safari/605.1.15",
"Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1"
]
self.session.headers.update({
"User-Agent": random.choice(user_agents),
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive"
})
def _random_delay(self, min=1, max=3):
"""随机延迟避免被封"""
time.sleep(random.uniform(min, max))
def _get_page(self, url):
"""获取页面内容,带错误处理"""
try:
self._random_delay()
response = self.session.get(url)
response.raise_for_status()
# 检查是否被重定向到验证页面
if "verify" in response.url.lower() or "captcha" in response.text.lower():
raise Exception("Triggered anti-bot verification")
return response.text
except Exception as e:
print(f"Error fetching {url}: {e}")
return None
def _extract_data(self, soup):
"""提取页面数据,需根据具体网站实现"""
items = soup.select(".product-item")
for item in items:
try:
name = item.select_one(".product-name").text.strip()
price = item.select_one(".price").text.strip()
self.data.append({
"name": name,
"price": price,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
})
except Exception as e:
print(f"Error extracting data: {e}")
def _find_links(self, soup):
"""发现新的链接"""
links = set()
for a in soup.select("a[href]"):
href = a["href"]
full_url = urljoin(self.base_url, href)
if full_url.startswith(self.base_url) and full_url not in self.visited_urls:
links.add(full_url)
return links
def crawl(self, max_pages=10):
"""执行爬取"""
queue = {self.base_url}
page_count = 0
while queue and page_count < max_pages:
url = queue.pop()
self.visited_urls.add(url)
print(f"Crawling: {url}")
html = self._get_page(url)
if not html:
continue
soup = BeautifulSoup(html, "html.parser")
self._extract_data(soup)
new_links = self._find_links(soup)
queue.update(new_links - self.visited_urls)
page_count += 1
# 每5页保存一次数据
if page_count % 5 == 0:
self._save_data()
self._save_data()
def _save_data(self):
"""保存数据到CSV"""
if not self.data:
return
keys = self.data[0].keys()
with open(self.output_file, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=keys)
writer.writeheader()
writer.writerows(self.data)
print(f"Saved {len(self.data)} records to {self.output_file}")
# 使用示例
crawler = SmartCrawler(
base_url="https://example.com/products",
output_file="products.csv"
)
crawler.crawl(max_pages=20)
这个爬虫框架包含以下高级功能:
- 随机User-Agent和请求延迟,降低被封风险
- 自动检测验证码和反爬机制
- 广度优先的URL发现策略
- 增量式数据保存
- 完善的错误处理和日志记录
- 可扩展的数据提取逻辑
4.2 邮件自动化的最佳实践
批量发送邮件时需要考虑很多细节问题,这个增强版邮件发送器解决了常见痛点:
python复制import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
import os
from typing import List, Dict
import logging
from jinja2 import Template
class EmailSender:
def __init__(self, smtp_server: str, smtp_port: int,
sender_email: str, sender_password: str):
self.smtp_server = smtp_server
self.smtp_port = smtp_port
self.sender_email = sender_email
self.sender_password = sender_password
self.logger = self._setup_logger()
def _setup_logger(self):
"""配置日志记录"""
logger = logging.getLogger("EmailSender")
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
# 控制台输出
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
# 文件日志
fh = logging.FileHandler("email_sender.log")
fh.setFormatter(formatter)
logger.addHandler(fh)
return logger
def _render_template(self, template_file: str, context: Dict) -> str:
"""渲染HTML模板"""
with open(template_file, "r", encoding="utf-8") as f:
template = Template(f.read())
return template.render(**context)
def send_emails(self, recipients: List[Dict],
subject: str,
content_template: str,
attachments: List[str] = None,
cc_recipients: List[str] = None,
bcc_recipients: List[str] = None,
max_retries: int = 3) -> Dict[str, List[str]]:
"""
批量发送邮件
:param recipients: 收件人列表 [{"email":"a@b.com", "name":"John", ...}]
:param subject: 邮件主题
:param content_template: 内容模板文件路径
:param attachments: 附件路径列表
:param cc_recipients: 抄送列表
:param bcc_recipients: 密送列表
:param max_retries: 最大重试次数
:return: 发送结果 {"success":[], "failed":[]}
"""
results = {"success": [], "failed": []}
with smtplib.SMTP_SSL(self.smtp_server, self.smtp_port) as server:
server.login(self.sender_email, self.sender_password)
for recipient in recipients:
msg = MIMEMultipart()
msg["From"] = self.sender_email
msg["Subject"] = subject
# 个性化邮件内容
try:
html_content = self._render_template(
content_template,
{"recipient": recipient}
)
msg.attach(MIMEText(html_content, "html", "utf-8"))
except Exception as e:
self.logger.error(f"Template error for {recipient['email']}: {e}")
results["failed"].append(recipient["email"])
continue
# 添加附件
if attachments:
for file_path in attachments:
try:
with open(file_path, "rb") as f:
part = MIMEApplication(
f.read(),
Name=os.path.basename(file_path)
)
part["Content-Disposition"] = f'attachment; filename="{os.path.basename(file_path)}"'
msg.attach(part)
except Exception as e:
self.logger.warning(f"Failed to attach {file_path}: {e}")
# 设置收件人
to_email = recipient["email"]
msg["To"] = to_email
if cc_recipients:
msg["Cc"] = ", ".join(cc_recipients)
all_recipients = [to_email]
if cc_recipients:
all_recipients.extend(cc_recipients)
if bcc_recipients:
all_recipients.extend(bcc_recipients)
# 尝试发送
for attempt in range(max_retries):
try:
server.sendmail(
self.sender_email,
all_recipients,
msg.as_string()
)
self.logger.info(f"Email sent to {to_email}")
results["success"].append(to_email)
break
except Exception as e:
if attempt == max_retries - 1:
self.logger.error(f"Failed to send to {to_email}: {e}")
results["failed"].append(to_email)
else:
time.sleep(2 ** attempt) # 指数退避
return results
# 使用示例
sender = EmailSender(
smtp_server="smtp.example.com",
smtp_port=465,
sender_email="your_email@example.com",
sender_password="your_password"
)
results = sender.send_emails(
recipients=[
{"email": "user1@example.com", "name": "Alice"},
{"email": "user2@example.com", "name": "Bob"}
],
subject="Monthly Report",
content_template="./email_template.html",
attachments=["./report.pdf"],
cc_recipients=["manager@example.com"]
)
print(f"Sent successfully: {len(results['success'])}")
print(f"Failed: {len(results['failed'])}")
这个邮件发送器解决了以下常见问题:
- 支持HTML模板和个性化内容
- 完善的错误处理和重试机制
- 详细的日志记录
- 支持抄送和密送
- 大附件处理能力
- 批量发送结果统计
5. 系统监控与维护自动化
5.1 增强型系统监控工具
基础的系统监控往往不能满足实际需求,这个增强版监控工具提供了更多实用功能:
python复制import psutil
import time
import socket
import logging
from datetime import datetime
import json
class SystemMonitor:
def __init__(self, config_file="monitor_config.json"):
self.config = self._load_config(config_file)
self.logger = self._setup_logger()
self.alert_counters = {}
def _load_config(self, config_file):
"""加载监控配置"""
default_config = {
"thresholds": {
"cpu": 85,
"memory": 85,
"disk": 90,
"temperature": 80,
"network_down": 1024, # KB/s
"network_up": 512
},
"check_interval": 60,
"alert_intervals": {
"cpu": 300,
"memory": 300,
"disk": 600
},
"log_file": "system_monitor.log",
"max_log_size": 1048576 # 1MB
}
try:
with open(config_file) as f:
user_config = json.load(f)
default_config.update(user_config)
except FileNotFoundError:
print(f"Config file not found, using defaults")
except json.JSONDecodeError:
print(f"Invalid config file, using defaults")
return default_config
def _setup_logger(self):
"""配置日志系统"""
logger = logging.getLogger("SystemMonitor")
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
"%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
# 控制台输出
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
# 文件输出,带轮转
file_handler = logging.handlers.RotatingFileHandler(
self.config["log_file"],
maxBytes=self.config["max_log_size"],
backupCount=3
)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
return logger
def _check_alert_interval(self, metric_name):
"""检查是否达到告警间隔"""
now = time.time()
last_alert = self.alert_counters.get(metric_name, 0)
interval = self.config["alert_intervals"].get(metric_name, 300)
if now - last_alert >= interval:
self.alert_counters[metric_name] = now
return True
return False
def _send_alert(self, message):
"""发送告警通知"""
# 这里可以实现邮件、短信、Webhook等通知方式
self.logger.error(f"ALERT: {message}")
print(f"\033[91mALERT: {message}\033[0m") # 红色输出
def _get_network_traffic(self):
"""获取网络流量统计"""
net_io = psutil.net_io_counters()
return {
"bytes_sent": net_io.bytes_sent,
"bytes_recv": net_io.bytes_recv
}
def collect_metrics(self):
"""收集系统指标"""
metrics = {
"timestamp": datetime.now().isoformat(),
"cpu": psutil.cpu_percent(interval=1),
"memory": psutil.virtual_memory().percent,
"disk": {},
"network": {},
"temperature": None,
"processes": len(psutil.pids()),
"hostname": socket.gethostname()
}
# 磁盘使用情况
for part in psutil.disk_partitions():
try:
usage = psutil.disk_usage(part.mountpoint)
metrics["disk"][part.mountpoint] = {
"total": usage.total,
"used": usage.used,
"free": usage.free,
"percent": usage.percent
}
except Exception as e:
self.logger.warning(f"Failed to check disk {part.mountpoint}: {e}")
# 网络流量
net_start = self._get_network_traffic()
time.sleep(1) # 计算1秒内的流量
net_end = self._get_network_traffic()
metrics["network"] = {
"sent_kbps": (net_end["bytes_sent"] - net_start["bytes_sent"]) / 1024,
"recv_kbps": (net_end["bytes_recv"] - net_start["bytes_recv"]) / 1024
}
# 温度(如果可用)
try:
temps = psutil.sensors_temperatures()
if temps:
metrics["temperature"] = {
name: max(chip.current for chip in chips)
for name, chips in temps.items()
}
except AttributeError:
pass # 不支持温度监测
return metrics
def check_thresholds(self, metrics):
"""检查阈值并触发告警"""
thresholds = self.config["thresholds"]
# CPU检查
if metrics["cpu"] > thresholds["cpu"]:
if self._check_alert_interval("cpu"):
self._send_alert(
f"CPU usage {metrics['cpu']}% exceeds threshold {thresholds['cpu']}%"
)
# 内存检查
if metrics["memory"] > thresholds["memory"]:
if self._check_alert_interval("memory"):
self._send_alert(
f"Memory usage {metrics['memory']}% exceeds threshold {thresholds['memory']}%"
)
# 磁盘检查
for mount, usage in metrics["disk"].items():
if usage["percent"] > thresholds["disk"]:
if self._check_alert_interval(f"disk_{mount}"):
self._send_alert(
f"Disk {mount} usage {usage['percent']}% exceeds threshold {thresholds['disk']}%"
)
# 网络检查
if metrics["network"]["sent_kbps"] > thresholds["network_up"]:
self._send_alert(
f"Network upload traffic {metrics['network']['sent_kbps']:.2f}KB/s "
f"exceeds threshold {thresholds['network_up']}KB/s"
)
if metrics["network"]["recv_kbps"] > thresholds["network_down"]:
self._send_alert(
f"Network download traffic {metrics['network']['recv_kbps']:.2f}KB/s "
f"exceeds threshold {thresholds['network_down']}KB/s"
)
# 温度检查
if metrics["temperature"]:
for name, temp in metrics["temperature"].items():
if temp > thresholds["temperature"]:
self._send_alert(
f"Temperature {name} {temp}°C exceeds threshold {thresholds['temperature']}°C"
)
def run(self):
"""启动监控"""
self.logger.info("Starting system monitor")
try:
while True:
metrics = self.collect_metrics()
self.check_thresholds(metrics)
# 记录指标
self.logger.info(
f"CPU: {metrics['cpu']}% | "
f"Memory: {metrics['memory']}% | "
f"Disk: {max(u['percent'] for u in metrics['disk'].values()) if metrics['disk'] else 0}% | "
f"Network: ↑{metrics['network']['sent_kbps']:.1f} ↓{metrics['network']['recv_kbps']:.1f} KB/s"
)
time.sleep(self.config["check_interval"])
except KeyboardInterrupt:
self.logger.info("Monitor stopped by user")
except Exception as e:
self.logger.error(f"Monitor crashed: {e}")
raise
# 使用示例
if __name__ == "__main__":
monitor = SystemMonitor()
monitor.run()
这个系统监控工具提供了以下增强功能:
- 可配置的监控阈值和告警间隔
- 全面的系统指标收集(CPU、内存、磁盘、网络、温度等)
- 智能告警抑制,避免告警风暴
- 详细的日志记录和轮转
- 支持多种告警通知方式(需自行实现)
- 易于扩展的架构设计
6. 图像处理自动化
6.1 专业级图片批量处理工具
简单的图片压缩往往不能满足专业需求,这个增强版工具提供了完整的图片处理流水线:
python复制from PIL import Image, ImageOps, ImageEnhance, ImageFilter
import os
from concurrent.futures import ThreadPoolExecutor
import time
from typing import List, Dict
class ImageProcessor:
def __init__(self, config: Dict = None):
self.default_config = {
"output_format": "JPEG",
"quality": 85,
"max_width": None,
"max_height": None,
"auto_orient": True,
"enhance": {
"sharpness": 1.2,
"contrast": 1.1,
"brightness": 1.05
},
"watermark": None,
"output_folder": "./processed",
"concurrency": 4
}
self.config = {**self.default_config, **(config or {})}
os.makedirs(self.config["output_folder"], exist_ok=True)
def _process_single_image(self, input_path: str):
"""处理单个图片文件"""
try:
with Image.open(input_path) as img:
# 自动旋转
if self.config["auto_orient"]:
img = ImageOps.exif_transpose(img)
# 调整尺寸
if self.config["max_width"] or self.config["max_height"]:
img = self._resize_image(img)
# 应用增强
img = self._enhance_image(img)
# 添加水印
if self.config["watermark"]:
img = self._add_watermark(img)
# 准备输出路径
filename = os.path.basename(input_path)
name, ext = os.path.splitext(filename)
output_ext = f".{self.config