Zero-downtime upgrades with SQLAlchemy + Alembic

Zero-downtime upgrades with Alembic A look at some of the
power features of Alembic, the database migration tool PyCon IE 2022 @stephenﬁn

Stephen Finucane (@stephenﬁn) Senior Software Engineer Red Hat

Alembic +

Background

class User(Base): __tablename__ = "user_account" id = Column(Integer, primary_key=True) first_name
= Column(String(30)) last_name = Column(String(30)) addresses = relationship( "Address", back_populates="user", cascade="all, delete-orphan" ) def __repr__(self): return f"User(id={self.id!r})" models.py

❯ alembic revision --autogenerate ...

# revision identifiers, used by Alembic. revision = '6cb93d555e2b' down_revision
= None branch_labels = None depends_on = None def upgrade() -> None: op.create_table( 'user_account', sa.Column('id', sa.Integer(), nullable=False), sa.Column('first_name', sa.String(length=30), nullable=True), sa.Column('last_name', sa.String(length=30), nullable=True), sa.PrimaryKeyConstraint('id'), ) 6cb93d555e2b_initial_migration.py

❯ alembic upgrade head

. ├── alembic_expand_contract_demo │ ├── __init__.py │ ├── migrations │
│ ├── env.py │ │ ├── README │ │ ├── script.py.mako │ │ └── versions │ │ └── 6cb93d555e2b_initial_migration.py │ ├── models.py ├── alembic.ini ├── README.md └── requirements.txt

The Problem

class User(Base): __tablename__ = "user_account" id = Column(Integer, primary_key=True) first_name
= Column(String(30)) last_name = Column(String(30)) addresses = relationship( "Address", back_populates="user", cascade="all, delete-orphan" ) def __repr__(self): return f"User(id={self.id!r})" models.py

class User(Base): __tablename__ = "user_account" id = Column(Integer, primary_key=True) name
= Column(String) addresses = relationship( "Address", back_populates="user", cascade="all, delete-orphan" ) def __repr__(self): return f"User(id={self.id!r})" models.py

def upgrade() -> None: # schema migration - add new
columns op.add_column( 'user_account', sa.Column('name', sa.String(), nullable=True), ) # ...some (manually added) data migrations... # schema migration - drop old columns op.drop_column('user_account', 'first_name') op.drop_column('user_account', 'last_name') 0585005489f0_merge_user_names.py

To do the schema and data migrations in one go,
we’d need to shut down our instances…

To do the schema and data migrations in one go,
we’d need to shut down our instances… How do we avoid downtime? 🤔

A Solution

Expand - Contract

Expand - Migrate - Contract

first_name last_name first_name last_name name name expand migrate contract

Let’s split up those migrations (manually, for now)

revision = '9c36df1b3f62' down_revision = '6cb93d555e2b' branch_labels = None depends_on
= None def upgrade() -> None: # schema migration - add new columns op.add_column( 'user_account', sa.Column('name', sa.String(), nullable=True), ) 9c36df1b3f62_merge_user_names_expand.py

revision = 'e629a4ea0677' down_revision = '9c36df1b3f62' branch_labels = None depends_on
= None def upgrade() -> None: # schema migrations - contract op.drop_column('user_account', 'first_name') op.drop_column('user_account', 'last_name') 9c36df1b3f62_merge_user_names_contract.py

❯ alembic upgrade 9c36df1b3f62 # expand hash ❯ alembic upgrade
e629a4ea0677 # contract hash

That’s kind of ugly, no? 🤮

A Better Solution

“A branch describes a point in a migration stream when
two or more versions refer to the same parent migration as their ancestor.” Alembic Documentation

revision = '9c36df1b3f62' down_revision = '6cb93d555e2b' branch_labels = None depends_on
= None def upgrade() -> None: # schema migration - add new columns op.add_column( 'user_account', sa.Column('name', sa.String(), nullable=True), ) 9c36df1b3f62_merge_user_names_expand.py (before)

revision = '9c36df1b3f62' down_revision = '6cb93d555e2b' branch_labels = ('expand',) depends_on
= None def upgrade() -> None: # schema migration - add new columns op.add_column( 'user_account', sa.Column('name', sa.String(), nullable=True), ) 9c36df1b3f62_merge_user_names_expand.py (after)

revision = 'e629a4ea0677' down_revision = '9c36df1b3f62' branch_labels = None depends_on
= None def upgrade() -> None: # schema migrations - contract op.drop_column('user_account', 'first_name') op.drop_column('user_account', 'last_name') 9c36df1b3f62_merge_user_names_contract.py (before)

revision = 'e629a4ea0677' down_revision = '9c36df1b3f62' branch_labels = ('contract',) depends_on
= None def upgrade() -> None: # schema migrations - contract op.drop_column('user_account', 'first_name') op.drop_column('user_account', 'last_name') 9c36df1b3f62_merge_user_names_contract.py (after)

❯ alembic upgrade expand ❯ alembic upgrade contract

Much better! .

Much better! But we can do more…

The “Best” Solution

def run_migrations_online() -> None: connectable = engine_from_config( config.get_section(config.config_ini_section), prefix="sqlalchemy.", poolclass=pool.NullPool,
) with connectable.connect() as connection: context.configure( connection=connection, target_metadata=target_metadata, ) with context.begin_transaction(): context.run_migrations() env.py

) with connectable.connect() as connection: context.configure( connection=connection, target_metadata=target_metadata, process_revision_directives=autogen.process_revision_directives, ) with context.begin_transaction(): context.run_migrations() env.py

process_revision_directives - a callable function that will be passed a
structure representing the end result of an autogenerate or plain “revision” operation, which can be manipulated to affect how the alembic revision command ultimately outputs new revision scripts. Alembic Documentation

) with connectable.connect() as connection: context.configure( connection=connection, target_metadata=target_metadata, process_revision_directives=autogen.process_revision_directives, ) with context.begin_transaction(): context.run_migrations() env.py

_ec_dispatcher = Dispatcher() def process_revision_directives(context, revision, directives): directives[:] = list(_assign_directives(context,
directives)) def _assign_directives(context, directives, phase=None): for directive in directives: decider = _ec_dispatcher.dispatch(directive) if phase is None: phases = ('expand', 'contract') else: phases = (phase,) for phase in phases: decided = decider(context, directive, phase) if decided: yield decided autogen.py (part 1)

@_ec_dispatcher.dispatch_for(ops.MigrationScript) def _migration_script_ops(context, directive, phase): op = ops.MigrationScript( new_rev_id(), ops.UpgradeOps(
ops=list( _assign_directives(context, directive.upgrade_ops.ops, phase) ) ), ops.DowngradeOps(ops=[]), message=directive.message, head=f'{phase}@head', ) if not op.upgrade_ops.is_empty(): return op autogen.py (part 2)

@_ec_dispatcher.dispatch_for(ops.AddConstraintOp) @_ec_dispatcher.dispatch_for(ops.CreateIndexOp) @_ec_dispatcher.dispatch_for(ops.CreateTableOp) @_ec_dispatcher.dispatch_for(ops.AddColumnOp) def _expands(context, directive, phase): if phase
== 'expand': return directive else: return None autogen.py (part 3)

@_ec_dispatcher.dispatch_for(ops.DropConstraintOp) @_ec_dispatcher.dispatch_for(ops.DropIndexOp) @_ec_dispatcher.dispatch_for(ops.DropTableOp) @_ec_dispatcher.dispatch_for(ops.DropColumnOp) def _contracts(context, directive, phase): if phase
== 'contract': return directive else: return None autogen.py (part 4)

@_ec_dispatcher.dispatch_for(ops.AlterColumnOp) def _alter_column(context, directive, phase): is_expand = phase == 'expand'
if is_expand and directive.modify_nullable is True: return directive elif not is_expand and directive.modify_nullable is False: return directive else: raise NotImplementedError( "Don't know if operation is an expand or contract at the moment: " "%s" % directive ) autogen.py (part 5)

@_ec_dispatcher.dispatch_for(ops.ModifyTableOps) def _modify_table_ops(context, directive, phase): op = ops.ModifyTableOps( directive.table_name, ops=list(_assign_directives(context,
directive.ops, phase)), schema=directive.schema, ) if not op.is_empty(): return op autogen.py (part 6)

# revision identifiers, used by Alembic. revision = '1630422c66fe' down_revision
= '9c36df1b3f62' branch_labels = None depends_on = None def upgrade() -> None: op.add_column( 'user_account', sa.Column('name', sa.String(), nullable=True) ) 1630422c66fe_merge_user_names.py

# revision identifiers, used by Alembic. revision = 'a142d030afc5' down_revision
= 'e629a4ea0677' branch_labels = None depends_on = None def upgrade() -> None: op.drop_column('user_account', 'last_name') op.drop_column('user_account', 'first_name') a142d030afc5_merge_user_names.py

Next Steps & Wrap Up

We can improve this further…

We can improve this further… Distinguish expand/contract migrations by ﬁlename

❯ ls migrations/version 1630422c66fe_merge_user_names.py 6cb93d555e2b_initial_models.py 9c36df1b3f62_initial_expand.py a142d030afc5_merge_user_names.py e629a4ea0677_initial_contract.py

Add checkpoints (e.g. version 1.0, 1.1, …)

❯ neutron-db-manage --expand zed ❯ neutron-db-manage --contract zed

Add checkpoints (e.g. version 1.0, 1.1, …) Add tests …

that.guru/blog/zero-downtime-upgrades-with-alembic-and-sqlalchemy/

Credits Cover photo by Yunus Tuğ on Unsplash Source code
based OpenStack Neutron code, licensed under Apache 2.0

Zero-downtime upgrades with SQLAlchemy + Alembic

Zero-downtime upgrades with SQLAlchemy + Alembic

More Decks by Stephen Finucane

Other Decks in Technology

Featured

Transcript