在日常文件管理中,我们经常会遇到需要批量修改文件扩展名的场景。比如摄影师需要将.cr2格式的RAW照片批量转换为.dng格式,或者开发者需要把.txt日志文件统一改为.log格式。手动一个个重命名不仅效率低下,还容易出错。
Python作为一门强大的脚本语言,特别适合处理这类重复性文件操作任务。通过编写简单的脚本,我们可以实现:
注意:文件扩展名修改本质上只是改变了文件的标识符,并不会改变文件的实际内容。但对于某些专业软件(如Photoshop、CAD等),错误的扩展名可能导致文件无法正常打开。
实现这个功能需要以下几个关键组件:
Python的标准库已经提供了我们所需的所有工具:
os模块:处理文件路径和目录操作os.path:路径相关工具函数glob:文件模式匹配logging:操作日志记录选择这些内置模块而非第三方库的原因:
python复制import os
import glob
import logging
def batch_rename_files(root_dir, old_exts, new_ext):
"""
批量修改文件扩展名
:param root_dir: 要扫描的根目录
:param old_exts: 要修改的原始扩展名列表(如['.txt', '.doc'])
:param new_ext: 新的扩展名(如'.md')
"""
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
filename='file_rename.log'
)
# 遍历目录
for foldername, subfolders, filenames in os.walk(root_dir):
for filename in filenames:
file_path = os.path.join(foldername, filename)
# 检查文件扩展名
file_ext = os.path.splitext(filename)[1].lower()
if file_ext in old_exts:
try:
# 构造新文件名
new_filename = os.path.splitext(filename)[0] + new_ext
new_path = os.path.join(foldername, new_filename)
# 执行重命名
os.rename(file_path, new_path)
logging.info(f"Renamed: {file_path} -> {new_path}")
except Exception as e:
logging.error(f"Error renaming {file_path}: {str(e)}")
if __name__ == "__main__":
# 示例用法
batch_rename_files(
root_dir="/path/to/your/files",
old_exts=['.txt', '.doc'],
new_ext='.md'
)
os.walk()的使用:
扩展名处理技巧:
os.path.splitext()将文件名拆分为(名称,扩展名)错误处理机制:
在实际操作前,可以先预览将要执行的操作而不实际修改文件:
python复制def batch_rename_files(root_dir, old_exts, new_ext, dry_run=False):
# ...(前面的代码不变)
if file_ext in old_exts:
new_filename = os.path.splitext(filename)[0] + new_ext
new_path = os.path.join(foldername, new_filename)
if dry_run:
logging.info(f"[Dry Run] Would rename: {file_path} -> {new_path}")
else:
try:
os.rename(file_path, new_path)
logging.info(f"Renamed: {file_path} -> {new_path}")
except Exception as e:
logging.error(f"Error renaming {file_path}: {str(e)}")
对于更复杂的匹配需求,可以引入re模块:
python复制import re
def batch_rename_files(root_dir, pattern, new_ext, dry_run=False):
# ...(遍历目录代码不变)
if re.match(pattern, filename, re.IGNORECASE):
# 重命名逻辑...
示例pattern可以是:r'.*\.(txt|doc)$'(匹配.txt或.doc结尾的文件)
对于大量文件操作,添加进度反馈很有必要:
python复制from tqdm import tqdm
def batch_rename_files(root_dir, old_exts, new_ext):
# 先统计符合条件的文件总数
total_files = 0
for _, _, filenames in os.walk(root_dir):
for filename in filenames:
if os.path.splitext(filename)[1].lower() in old_exts:
total_files += 1
# 带进度条的重命名
processed = 0
with tqdm(total=total_files, desc="Processing files") as pbar:
for foldername, _, filenames in os.walk(root_dir):
for filename in filenames:
# ...(重命名逻辑)
if file_ext in old_exts:
# ...执行重命名
processed += 1
pbar.update(1)
当目标文件名已存在时,当前脚本会直接覆盖。更安全的做法是:
实现示例:
python复制def get_unique_filename(path):
"""
生成不重复的文件名,如果存在则添加(1)、(2)等后缀
"""
if not os.path.exists(path):
return path
base, ext = os.path.splitext(path)
counter = 1
while True:
new_path = f"{base}({counter}){ext}"
if not os.path.exists(new_path):
return new_path
counter += 1
权限问题:
os.access(file_path, os.W_OK)特殊字符处理:
os.path处理路径而非字符串拼接处理大量文件时:
示例优化代码:
python复制from concurrent.futures import ThreadPoolExecutor
def batch_rename_files(root_dir, old_exts, new_ext, max_workers=4):
# 先收集所有待处理文件
file_pairs = []
for foldername, _, filenames in os.walk(root_dir):
for filename in filenames:
file_ext = os.path.splitext(filename)[1].lower()
if file_ext in old_exts:
old_path = os.path.join(foldername, filename)
new_path = os.path.join(
foldername,
os.path.splitext(filename)[0] + new_ext
)
file_pairs.append((old_path, new_path))
# 多线程处理
def rename_file(pair):
old_path, new_path = pair
try:
os.rename(old_path, new_path)
logging.info(f"Renamed: {old_path} -> {new_path}")
except Exception as e:
logging.error(f"Error renaming {old_path}: {str(e)}")
with ThreadPoolExecutor(max_workers=max_workers) as executor:
executor.map(rename_file, file_pairs)
现象:修改扩展名后,专业软件(如Photoshop)打不开文件
原因:某些文件格式有特殊要求:
解决方案:
现象:处理中文文件名时出现乱码或错误
原因:不同系统默认编码不同
解决方案:
python复制import sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
os.fsencode()/os.fsdecode()现象:脚本修改了符号链接而非原始文件
解决方案:
python复制if os.path.islink(file_path):
logging.warning(f"Skipping symlink: {file_path}")
continue
结合所有优化后的完整实现:
python复制#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import re
import logging
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
def setup_logging(log_file='file_rename.log'):
"""配置日志记录"""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file, encoding='utf-8'),
logging.StreamHandler()
]
)
def get_unique_filename(path):
"""生成唯一的文件名,避免冲突"""
if not os.path.exists(path):
return path
base, ext = os.path.splitext(path)
counter = 1
while True:
new_path = f"{base}({counter}){ext}"
if not os.path.exists(new_path):
return new_path
counter += 1
def batch_rename_files(root_dir, old_exts, new_ext, dry_run=False, max_workers=1):
"""
增强版批量重命名工具
:param root_dir: 要扫描的根目录
:param old_exts: 原始扩展名列表(如['.txt', '.doc'])
:param new_ext: 新扩展名(如'.md')
:param dry_run: 只显示将要执行的操作而不实际修改
:param max_workers: 并发线程数
"""
# 统一扩展名格式(确保以点开头)
old_exts = [ext if ext.startswith('.') else f'.{ext}' for ext in old_exts]
new_ext = new_ext if new_ext.startswith('.') else f'.{new_ext}'
# 收集所有待处理文件
file_pairs = []
for foldername, _, filenames in os.walk(root_dir):
for filename in filenames:
file_ext = os.path.splitext(filename)[1].lower()
if file_ext in old_exts:
old_path = os.path.join(foldername, filename)
new_filename = os.path.splitext(filename)[0] + new_ext
new_path = os.path.join(foldername, new_filename)
# 处理文件名冲突
if os.path.exists(new_path) and old_path != new_path:
new_path = get_unique_filename(new_path)
file_pairs.append((old_path, new_path))
# 进度条显示
with tqdm(total=len(file_pairs), desc="Processing files") as pbar:
def rename_file(pair):
old_path, new_path = pair
try:
if dry_run:
logging.info(f"[Dry Run] Would rename: {old_path} -> {new_path}")
else:
os.rename(old_path, new_path)
logging.info(f"Renamed: {old_path} -> {new_path}")
except Exception as e:
logging.error(f"Error renaming {old_path}: {str(e)}")
finally:
pbar.update(1)
# 单线程或多线程执行
if max_workers > 1:
with ThreadPoolExecutor(max_workers=max_workers) as executor:
executor.map(rename_file, file_pairs)
else:
for pair in file_pairs:
rename_file(pair)
if __name__ == "__main__":
# 示例:将/path/to/files下的.txt和.doc改为.md,并发4线程
setup_logging()
batch_rename_files(
root_dir="/path/to/files",
old_exts=['txt', '.doc'],
new_ext='md',
dry_run=False,
max_workers=4
)
这个增强版脚本包含了我们讨论的所有最佳实践:
在实际使用中,建议先使用dry_run=True参数测试,确认无误后再执行实际修改。对于特别重要的文件,操作前最好先进行完整备份。