作为一名长期使用Python进行数据处理和科学计算的开发者,我深刻体会到NumPy在Python科学计算生态中的核心地位。NumPy不仅提供了高效的多维数组对象,还包含了大量数学函数库,是几乎所有Python科学计算工具的基础。今天,我将分享如何从零开始掌握NumPy的核心用法。
提示:虽然本文标题提到NumPy,但根据您提供的内容,实际是关于SQLAlchemy ORM的教程。以下将基于您提供的SQLAlchemy内容进行详细扩展。
在Python生态中,数据库操作主要有两种方式:原始SQL和ORM。SQLAlchemy ORM的优势在于:
我曾在多个项目中使用SQLAlchemy ORM,包括用户量百万级的Web应用,其稳定性和性能表现都非常出色。
bash复制pip install sqlalchemy
根据您的数据库类型选择对应的驱动:
bash复制# PostgreSQL
pip install psycopg2-binary
# MySQL
pip install mysql-connector-python
# SQLite (Python内置支持)
注意:生产环境建议使用完整版的psycopg2而非binary版本,后者可能存在兼容性问题。
python复制import sqlalchemy
print(sqlalchemy.__version__) # 应输出1.4.x或更高版本
SQLAlchemy ORM建立在几个核心组件之上:
mermaid复制graph TD
A[Engine] --> B[Session]
B --> C[Model]
C --> D[Query]
python复制from sqlalchemy import create_engine
# SQLite配置(开发环境推荐)
engine = create_engine('sqlite:///app.db',
echo=True, # 输出SQL日志
pool_size=5, # 连接池大小
max_overflow=10) # 最大溢出连接数
# PostgreSQL生产配置示例
# engine = create_engine(
# 'postgresql://user:pass@localhost:5432/mydb',
# pool_pre_ping=True, # 连接前检查有效性
# pool_recycle=3600 # 1小时后回收连接
# )
python复制from sqlalchemy.orm import sessionmaker
SessionLocal = sessionmaker(
autocommit=False,
autoflush=False,
bind=engine
)
# 使用上下文管理器确保会话正确关闭
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
python复制from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
python复制from sqlalchemy import Column, Integer, String, DateTime
from datetime import datetime
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True, autoincrement=True)
username = Column(String(50), unique=True, nullable=False)
email = Column(String(120), unique=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow,
onupdate=datetime.utcnow)
def __repr__(self):
return f"<User(id={self.id}, username='{self.username}')>"
python复制from sqlalchemy import ForeignKey
from sqlalchemy.orm import relationship
class Post(Base):
__tablename__ = 'posts'
id = Column(Integer, primary_key=True)
title = Column(String(100))
content = Column(String)
user_id = Column(Integer, ForeignKey('users.id'))
# 定义多对一关系
author = relationship("User", back_populates="posts")
# 定义多对多关系
tags = relationship("Tag", secondary="post_tags", back_populates="posts")
# 在User类中添加反向引用
User.posts = relationship("Post", back_populates="author")
# 关联表模型
class PostTag(Base):
__tablename__ = 'post_tags'
post_id = Column(Integer, ForeignKey('posts.id'), primary_key=True)
tag_id = Column(Integer, ForeignKey('tags.id'), primary_key=True)
created_at = Column(DateTime, default=datetime.utcnow)
python复制Base.metadata.create_all(bind=engine)
bash复制pip install alembic
alembic init migrations
编辑alembic.ini配置数据库URL,然后在env.py中设置target_metadata:
python复制from models import Base
target_metadata = Base.metadata
生成迁移脚本:
bash复制alembic revision --autogenerate -m "Initial migration"
alembic upgrade head
python复制# 单个对象
new_user = User(username='johndoe', email='john@example.com')
db.add(new_user)
db.commit()
# 批量插入
users = [
User(username='alice', email='alice@example.com'),
User(username='bob', email='bob@example.com')
]
db.bulk_save_objects(users)
db.commit()
python复制# 获取全部用户
users = db.query(User).all()
# 条件查询
user = db.query(User).filter_by(username='johndoe').first()
# 复杂查询
from sqlalchemy import or_
active_users = db.query(User).filter(
or_(
User.username.like('j%'),
User.email.contains('example')
)
).order_by(User.created_at.desc()).limit(10).all()
python复制user = db.query(User).get(1)
user.email = 'newemail@example.com'
db.commit()
# 批量更新
db.query(User).filter(User.username.like('j%')).update(
{'email': lambda x: x.username + '@company.com'},
synchronize_session=False
)
db.commit()
python复制user = db.query(User).get(1)
db.delete(user)
db.commit()
# 批量删除
db.query(User).filter(User.username.like('test%')).delete()
db.commit()
python复制# 预加载关联对象避免N+1问题
from sqlalchemy.orm import joinedload
posts = db.query(Post).options(
joinedload(Post.author),
joinedload(Post.tags)
).all()
# 使用子查询
from sqlalchemy import func
subq = db.query(
Post.user_id,
func.count('*').label('post_count')
).group_by(Post.user_id).subquery()
user_post_counts = db.query(
User.username,
subq.c.post_count
).outerjoin(subq, User.id == subq.c.user_id).all()
python复制from sqlalchemy import func
# 基本统计
user_count = db.query(func.count(User.id)).scalar()
avg_id = db.query(func.avg(User.id)).scalar()
# 分组统计
post_stats = db.query(
User.username,
func.count(Post.id).label('post_count'),
func.max(Post.created_at).label('latest_post')
).join(Post).group_by(User.username).all()
python复制try:
with db.begin_nested():
# 操作1
user = User(username='nested', email='nested@example.com')
db.add(user)
# 操作2
post = Post(title='Nested Transaction', user_id=user.id)
db.add(post)
# 外层事务
db.commit()
except:
db.rollback()
python复制# 创建保存点
savepoint = db.begin_nested()
try:
user = User(username='savepoint', email='savepoint@example.com')
db.add(user)
savepoint.commit()
except:
savepoint.rollback()
raise
python复制# 批量插入示例
users_data = [{'username': f'user{i}', 'email': f'user{i}@example.com'}
for i in range(1000)]
db.bulk_insert_mappings(User, users_data)
db.commit()
python复制# 乐观锁实现示例
class Product(Base):
__tablename__ = 'products'
id = Column(Integer, primary_key=True)
name = Column(String)
stock = Column(Integer)
version_id = Column(Integer, nullable=False)
__mapper_args__ = {
'version_id_col': version_id
}
在电商系统开发中,我们使用SQLAlchemy ORM处理了日均百万级的订单数据。以下是关键经验:
python复制# 分表示例
class MonthlyData(Base):
__tablename__ = 'data_{month}'
__table_args__ = {'schema': 'archive'}
@classmethod
def get_table(cls, month):
cls.__table__.name = cls.__tablename__.format(month=month)
return cls
经过多年实践,我认为SQLAlchemy ORM最强大的特性是其灵活性。对于初学者,建议:
对于高级用户,可以探索:
SQLAlchemy的官方文档非常完善,遇到问题时首先查阅文档往往能快速找到解决方案。