1. Python与MySQL交互基础
PyMySQL是Python中用于连接MySQL数据库的纯Python实现库,它完全遵循Python DB-API 2.0规范。相比MySQLdb,PyMySQL的优势在于:
- 纯Python实现,无需编译安装
- 兼容Python 3.x
- 支持最新的MySQL协议和特性
- 活跃的社区维护
在实际项目中,PyMySQL常用于:
- Web应用的后端数据存储
- 数据分析前的数据提取
- 自动化脚本的数据持久化
- 数据库迁移和管理工具开发
提示:虽然PyMySQL性能略低于C实现的MySQLdb,但对于大多数应用场景已经完全够用。只有在极端性能要求的场景下才需要考虑使用MySQLdb或mysqlclient。
2. 环境准备与安装
2.1 安装PyMySQL
安装PyMySQL非常简单,使用pip即可完成:
bash复制pip install pymysql
对于生产环境,建议固定版本号安装:
bash复制pip install pymysql==1.0.2
2.2 MySQL服务准备
在使用PyMySQL前,确保你已经:
- 安装并运行了MySQL服务(5.7或8.0版本)
- 创建了具有适当权限的数据库用户
- 知道数据库的连接参数(主机、端口、用户名、密码等)
可以通过以下命令检查MySQL服务状态:
bash复制# Linux系统
systemctl status mysql
# macOS
brew services list | grep mysql
3. 数据库连接管理
3.1 基础连接配置
以下是标准的数据库连接方式:
python复制import pymysql
# 创建连接
connection = pymysql.connect(
host='localhost', # 数据库服务器地址
user='db_user', # 用户名
password='db_password', # 密码
database='test_db', # 数据库名
port=3306, # 端口(默认3306)
charset='utf8mb4', # 字符编码
cursorclass=pymysql.cursors.DictCursor # 返回字典格式结果
)
关键参数说明:
charset:必须设置为utf8mb4以支持完整的Unicode字符(包括emoji)cursorclass:DictCursor使返回结果为字典,默认是元组autocommit:默认为False,需要手动提交事务
3.2 连接池实现
对于高并发应用,建议使用连接池管理数据库连接。可以使用DBUtils库:
python复制from dbutils.pooled_db import PooledDB
pool = PooledDB(
creator=pymysql,
maxconnections=10, # 连接池最大连接数
mincached=2, # 初始化时创建的连接数
host='localhost',
user='db_user',
password='db_password',
database='test_db',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
# 从连接池获取连接
connection = pool.connection()
3.3 连接最佳实践
- 使用上下文管理器确保连接正确关闭:
python复制with pymysql.connect(...) as conn:
with conn.cursor() as cursor:
cursor.execute("SELECT * FROM users")
result = cursor.fetchall()
# 退出with块后连接自动关闭
- 连接超时设置:
python复制connection = pymysql.connect(
...,
connect_timeout=10, # 连接超时时间(秒)
read_timeout=30, # 读取超时时间
write_timeout=30 # 写入超时时间
)
- SSL连接(生产环境推荐):
python复制connection = pymysql.connect(
...,
ssl={
'ca': '/path/to/ca.pem',
'cert': '/path/to/client-cert.pem',
'key': '/path/to/client-key.pem'
}
)
4. 数据库与表操作
4.1 数据库管理
python复制def create_database(conn, db_name):
"""创建数据库"""
with conn.cursor() as cursor:
sql = f"CREATE DATABASE IF NOT EXISTS {db_name} CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci"
cursor.execute(sql)
conn.commit()
def drop_database(conn, db_name):
"""删除数据库"""
with conn.cursor() as cursor:
cursor.execute(f"DROP DATABASE IF EXISTS {db_name}")
conn.commit()
注意:直接拼接SQL语句存在安全风险,此处仅作演示。实际应用中,数据库名应通过白名单验证。
4.2 表设计与管理
4.2.1 创建表
python复制def create_user_table(conn):
"""创建用户表"""
with conn.cursor() as cursor:
sql = """
CREATE TABLE IF NOT EXISTS users (
id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT,
username VARCHAR(50) NOT NULL UNIQUE,
password CHAR(60) NOT NULL COMMENT '存储bcrypt哈希值',
email VARCHAR(100) UNIQUE,
age TINYINT UNSIGNED,
is_active BOOLEAN DEFAULT TRUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
INDEX idx_email (email),
INDEX idx_created_at (created_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='系统用户表'
"""
cursor.execute(sql)
conn.commit()
表设计要点:
- 使用
UNSIGNED表示无符号数,扩大正数范围 - 密码字段应足够长以存储哈希值
- 为常用查询字段添加索引
- 使用
ON UPDATE CURRENT_TIMESTAMP自动更新修改时间
4.2.2 修改表结构
python复制def add_column_to_table(conn, table_name, column_definition):
"""添加列"""
with conn.cursor() as cursor:
sql = f"ALTER TABLE {table_name} ADD COLUMN {column_definition}"
cursor.execute(sql)
conn.commit()
# 示例:添加手机号字段
add_column_to_table(
conn,
"users",
"phone VARCHAR(20) NULL COMMENT '用户手机号' AFTER email"
)
5. CRUD操作详解
5.1 插入数据
5.1.1 单条插入
python复制def insert_user(conn, user_data):
"""插入用户数据"""
with conn.cursor() as cursor:
sql = """
INSERT INTO users (username, password, email, age)
VALUES (%(username)s, %(password)s, %(email)s, %(age)s)
"""
cursor.execute(sql, user_data)
user_id = cursor.lastrowid # 获取自增ID
conn.commit()
return user_id
# 使用示例
user_id = insert_user(conn, {
'username': 'john_doe',
'password': 'hashed_password',
'email': 'john@example.com',
'age': 30
})
5.1.2 批量插入
python复制def batch_insert_users(conn, users):
"""批量插入用户"""
with conn.cursor() as cursor:
sql = """
INSERT INTO users (username, password, email, age)
VALUES (%s, %s, %s, %s)
"""
cursor.executemany(sql, [
(u['username'], u['password'], u['email'], u['age'])
for u in users
])
conn.commit()
# 性能对比:executemany比循环执行execute快5-10倍
5.2 查询数据
5.2.1 基础查询
python复制def get_user_by_id(conn, user_id):
"""根据ID查询用户"""
with conn.cursor() as cursor:
sql = "SELECT * FROM users WHERE id = %s"
cursor.execute(sql, (user_id,))
return cursor.fetchone() # 返回单条记录
def get_active_users(conn):
"""查询活跃用户"""
with conn.cursor() as cursor:
sql = "SELECT id, username, email FROM users WHERE is_active = TRUE"
cursor.execute(sql)
return cursor.fetchall() # 返回所有记录
5.2.2 分页查询
python复制def get_users_paginated(conn, page=1, per_page=10):
"""分页查询用户"""
with conn.cursor() as cursor:
offset = (page - 1) * per_page
sql = """
SELECT id, username, email, created_at
FROM users
ORDER BY created_at DESC
LIMIT %s OFFSET %s
"""
cursor.execute(sql, (per_page, offset))
return cursor.fetchall()
5.2.3 复杂查询
python复制def search_users(conn, filters):
"""多条件搜索用户"""
with conn.cursor() as cursor:
conditions = []
params = []
if 'username' in filters:
conditions.append("username LIKE %s")
params.append(f"%{filters['username']}%")
if 'min_age' in filters:
conditions.append("age >= %s")
params.append(filters['min_age'])
if 'is_active' in filters:
conditions.append("is_active = %s")
params.append(filters['is_active'])
where_clause = " AND ".join(conditions) if conditions else "1=1"
sql = f"""
SELECT id, username, email, age, created_at
FROM users
WHERE {where_clause}
ORDER BY created_at DESC
"""
cursor.execute(sql, params)
return cursor.fetchall()
5.3 更新数据
python复制def update_user(conn, user_id, update_data):
"""更新用户信息"""
with conn.cursor() as cursor:
set_clause = ", ".join([f"{k} = %s" for k in update_data])
sql = f"""
UPDATE users
SET {set_clause}
WHERE id = %s
"""
params = list(update_data.values()) + [user_id]
cursor.execute(sql, params)
conn.commit()
return cursor.rowcount # 返回受影响的行数
# 使用示例
update_user(conn, 1, {
'email': 'new_email@example.com',
'age': 31
})
5.4 删除数据
python复制def delete_user(conn, user_id):
"""删除用户(软删除)"""
with conn.cursor() as cursor:
sql = "UPDATE users SET is_active = FALSE WHERE id = %s"
cursor.execute(sql, (user_id,))
conn.commit()
return cursor.rowcount
def hard_delete_user(conn, user_id):
"""物理删除用户"""
with conn.cursor() as cursor:
sql = "DELETE FROM users WHERE id = %s"
cursor.execute(sql, (user_id,))
conn.commit()
return cursor.rowcount
最佳实践:生产环境尽量使用软删除(标记删除),避免数据永久丢失。
6. 事务处理
6.1 基础事务
python复制def transfer_funds(conn, from_account, to_account, amount):
"""转账事务示例"""
try:
with conn.cursor() as cursor:
# 检查转出账户余额
cursor.execute(
"SELECT balance FROM accounts WHERE id = %s FOR UPDATE",
(from_account,)
)
balance = cursor.fetchone()['balance']
if balance < amount:
raise ValueError("Insufficient funds")
# 扣除转出账户金额
cursor.execute(
"UPDATE accounts SET balance = balance - %s WHERE id = %s",
(amount, from_account)
)
# 增加转入账户金额
cursor.execute(
"UPDATE accounts SET balance = balance + %s WHERE id = %s",
(amount, to_account)
)
# 提交事务
conn.commit()
return True
except Exception as e:
# 回滚事务
conn.rollback()
print(f"Transfer failed: {e}")
return False
6.2 保存点(Savepoint)
python复制def complex_operation(conn):
"""使用保存点的复杂事务"""
try:
with conn.cursor() as cursor:
# 主操作
cursor.execute("INSERT INTO table1 VALUES (...)")
# 设置保存点
cursor.execute("SAVEPOINT sp1")
try:
# 可能失败的操作
cursor.execute("UPDATE table2 SET ...")
except Exception as e:
# 回滚到保存点
cursor.execute("ROLLBACK TO SAVEPOINT sp1")
# 继续其他操作
cursor.execute("INSERT INTO table3 VALUES (...)")
conn.commit()
except Exception as e:
conn.rollback()
7. 高级特性
7.1 存储过程调用
python复制def call_stored_procedure(conn, user_id):
"""调用存储过程"""
with conn.cursor() as cursor:
cursor.callproc('get_user_details', (user_id,))
result = cursor.fetchall()
return result
7.2 批量操作优化
python复制def bulk_insert_performance(conn, data):
"""批量插入性能优化"""
with conn.cursor() as cursor:
# 方法1:使用executemany
cursor.executemany("INSERT INTO table VALUES (%s, %s)", data)
# 方法2:使用LOAD DATA INFILE(更快)
with tempfile.NamedTemporaryFile(mode='w') as f:
for row in data:
f.write("\t".join(map(str, row)) + "\n")
f.flush()
cursor.execute(f"""
LOAD DATA LOCAL INFILE '{f.name}'
INTO TABLE table
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
""")
conn.commit()
7.3 连接池与多线程
python复制from concurrent.futures import ThreadPoolExecutor
def multi_thread_query(pool, query, params_list):
"""多线程查询"""
results = []
def worker(params):
with pool.connection() as conn:
with conn.cursor() as cursor:
cursor.execute(query, params)
return cursor.fetchall()
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(worker, p) for p in params_list]
for future in futures:
results.extend(future.result())
return results
8. 性能优化与安全
8.1 查询优化技巧
- 使用索引提示:
python复制cursor.execute("""
SELECT /*+ INDEX(users idx_username) */ *
FROM users
WHERE username LIKE %s
""", ('john%',))
- **避免SELECT ***:
python复制# 不好
cursor.execute("SELECT * FROM users")
# 好 - 只查询需要的列
cursor.execute("SELECT id, username, email FROM users")
- 使用延迟加载:
python复制# 使用SSCursor流式获取大量数据
with conn.cursor(pymysql.cursors.SSCursor) as cursor:
cursor.execute("SELECT * FROM large_table")
for row in cursor:
process_row(row)
8.2 安全最佳实践
- 永远使用参数化查询:
python复制# 危险 - SQL注入风险
cursor.execute(f"SELECT * FROM users WHERE username = '{username}'")
# 安全
cursor.execute("SELECT * FROM users WHERE username = %s", (username,))
- 最小权限原则:
python复制# 创建专用数据库用户
"""
CREATE USER 'app_user'@'%' IDENTIFIED BY 'strong_password';
GRANT SELECT, INSERT, UPDATE ON app_db.* TO 'app_user'@'%';
"""
- 敏感数据加密:
python复制from cryptography.fernet import Fernet
# 加密敏感字段
def encrypt_data(key, data):
fernet = Fernet(key)
return fernet.encrypt(data.encode())
# 在存储前加密
user_data['password'] = encrypt_data(secret_key, raw_password)
9. 常见问题与解决方案
9.1 连接问题
问题1:连接超时
python复制try:
conn = pymysql.connect(..., connect_timeout=10)
except pymysql.OperationalError as e:
if "timeout" in str(e).lower():
print("连接超时,请检查网络或增加超时时间")
问题2:连接过多
python复制# 查看当前连接数
cursor.execute("SHOW STATUS LIKE 'Threads_connected'")
print(cursor.fetchone())
# 解决方案:使用连接池或增加max_connections
9.2 数据一致性问题
问题:部分更新成功
python复制try:
with conn.cursor() as cursor:
cursor.execute("UPDATE table1 SET ...")
cursor.execute("UPDATE table2 SET ...") # 可能失败
conn.commit()
except Exception:
conn.rollback()
print("已回滚所有更改")
9.3 性能问题
问题:慢查询
python复制# 开启慢查询日志
cursor.execute("""
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 1;
SET GLOBAL slow_query_log_file = '/var/log/mysql/mysql-slow.log';
""")
# 分析慢查询
cursor.execute("""
SELECT * FROM performance_schema.events_statements_summary_by_digest
ORDER BY SUM_TIMER_WAIT DESC LIMIT 10
""")
10. 实际应用案例
10.1 Web应用集成
python复制# Flask示例
from flask import Flask, g
app = Flask(__name__)
app.config['DB_CONFIG'] = {
'host': 'localhost',
'user': 'web_user',
'password': 'web_password',
'database': 'web_db'
}
def get_db():
if 'db' not in g:
g.db = pymysql.connect(**app.config['DB_CONFIG'])
return g.db
@app.teardown_appcontext
def close_db(e=None):
db = g.pop('db', None)
if db is not None:
db.close()
@app.route('/users')
def list_users():
db = get_db()
with db.cursor() as cursor:
cursor.execute("SELECT id, username FROM users LIMIT 100")
users = cursor.fetchall()
return {'users': users}
10.2 数据分析管道
python复制def export_to_pandas(conn, query, params=None):
"""将查询结果导出到Pandas DataFrame"""
import pandas as pd
with conn.cursor() as cursor:
cursor.execute(query, params or ())
columns = [col[0] for col in cursor.description]
data = cursor.fetchall()
return pd.DataFrame(data, columns=columns)
# 使用示例
df = export_to_pandas(conn, "SELECT * FROM sales WHERE date > %s", ('2023-01-01',))
print(df.describe())
10.3 数据库迁移脚本
python复制def migrate_data(source_conn, target_conn, batch_size=1000):
"""数据迁移脚本"""
with source_conn.cursor(pymysql.cursors.SSCursor) as src_cursor:
src_cursor.execute("SELECT * FROM source_table")
while True:
batch = src_cursor.fetchmany(batch_size)
if not batch:
break
with target_conn.cursor() as tgt_cursor:
tgt_cursor.executemany(
"INSERT INTO target_table VALUES (%s, %s, %s)",
batch
)
target_conn.commit()
11. 监控与维护
11.1 数据库监控
python复制def check_database_health(conn):
"""数据库健康检查"""
metrics = {}
with conn.cursor() as cursor:
# 检查连接数
cursor.execute("SHOW STATUS LIKE 'Threads_connected'")
metrics['connections'] = cursor.fetchone()['Value']
# 检查查询缓存命中率
cursor.execute("""
SELECT
SUM(Qs) AS queries,
SUM(Qs * avg_query_time) / SUM(Qs) AS avg_time,
SUM(Qs * avg_lock_time) / SUM(Qs) AS avg_lock_time
FROM performance_schema.events_statements_summary_by_digest
""")
metrics.update(cursor.fetchone())
return metrics
11.2 定期维护任务
python复制def run_maintenance(conn):
"""执行数据库维护任务"""
with conn.cursor() as cursor:
# 优化表
cursor.execute("OPTIMIZE TABLE users, accounts")
# 备份关键数据
cursor.execute("""
CREATE TABLE users_backup_2023 AS
SELECT * FROM users WHERE is_active = TRUE
""")
# 清理过期数据
cursor.execute("""
DELETE FROM session_logs
WHERE created_at < DATE_SUB(NOW(), INTERVAL 90 DAY)
""")
conn.commit()
12. 替代方案与比较
12.1 PyMySQL vs MySQLdb
| 特性 | PyMySQL | MySQLdb |
|---|---|---|
| 实现语言 | 纯Python | C扩展 |
| Python 3支持 | 是 | 需要mysqlclient |
| 性能 | 中等 | 高 |
| 安装难度 | 简单 | 需要编译 |
| 异步支持 | 通过aiomysql | 无 |
12.2 ORM替代方案
对于复杂应用,可以考虑使用ORM工具:
- SQLAlchemy:功能最全的Python ORM
- Django ORM:Django框架内置的ORM
- Peewee:轻量级ORM,学习曲线低
python复制# SQLAlchemy示例
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://user:password@localhost/db')
results = engine.execute("SELECT * FROM users")
for row in results:
print(row)
13. 最佳实践总结
-
连接管理:
- 使用连接池管理数据库连接
- 确保连接在使用后正确关闭
- 设置合理的连接超时参数
-
查询优化:
- 只查询需要的列
- 合理使用索引
- 对大结果集使用流式获取
-
事务控制:
- 保持事务尽可能短小
- 明确处理提交和回滚
- 考虑使用保存点处理复杂事务
-
安全防护:
- 永远使用参数化查询
- 遵循最小权限原则
- 加密敏感数据
-
错误处理:
- 捕获并妥善处理数据库异常
- 记录详细的错误日志
- 实现重试机制处理临时性故障
在实际项目中,我通常会创建一个单独的数据库模块来封装所有数据库操作,这样既保证了代码复用性,又能集中管理数据库相关的配置和错误处理。对于Web应用,建议结合框架的生命周期管理数据库连接,确保每个请求都能获取到可用的连接而不会泄漏。