Upgrade to Pro — share decks privately, control downloads, hide ads and more …

marshmallow for data validation and serialization

marshmallow for data validation and serialization

At talk about marshmallow, a Python library for data validation, serialization, and deserialization.

Steven Loria

April 27, 2016
Tweet

More Decks by Steven Loria

Other Decks in Programming

Transcript

  1. ------- marshmallow | Model | == Schema ==> {"id": 4189861,

    ------- "title": 'Nas @ UB Stadium' "datetime": "2011-04-29T19:00:00", "ticket_status": "available", "on_sale_datetime": "2011-03-08T10:00:00", "facebook_rsvp_url": "...", "description": "2011 Block Party...", ...}
  2. "Sometimes we only need a few fields..." def event_to_dict(model): return

    { 'id': model.pk, 'title': model.title, 'datetime': model.datetime.isoformat(), 'ticket_status': model.ticket_status, 'on_sale_datetime': model.on_sale_datetime.isoformat(), } def event_to_dict_summary(model): return {'id': model.pk, 'title': model.title}
  3. class Event(Model): datetime = DateTimeField() @property def formatted_date(self): return self.datetime.strftime('%A

    %B %m, %Y') def event_to_dict(model): return { 'id': model.pk, 'title': model.title, 'datetime': model.datetime.isoformat(), 'formatted_datetime': model.formatted_date, 'ticket_status': model.ticket_status, 'on_sale_datetime': model.on_sale_datetime.isoformat(), }
  4. def event_to_dict(model): return { 'id': model.pk, 'title': model.title, 'datetime': model.datetime.isoformat(),

    'ticket_status': model.ticket_status, 'on_sale_datetime': model.on_sale_datetime.isoformat(), 'formatted_datetime': model.formatted_date, 'ticket_status': model.ticket_status, 'on_sale_datetime': model.on_sale_datetime.isoformat(), } def event_to_dict_summary(model): return {'id': model.pk, 'title': model.title} def venue_to_dict(model): return { 'name': model.name, 'city': model.city, } def event_to_dict_with_venue(model): ret = event_to_dict(model) venue_dict - venue_to_dict(model.venue) ret['venue'] = { 'name': venue_dict['name'] } return ret
  5. Limitations 4 Can't change output format at runtime 4 Formatting

    complex values gets repetitive (e.g. time- related fields)
  6. Separate Business Logic from Presentation ------- marshmallow | Model |

    == Schema ==> {'id': 4189861, ------- 'title': 'Nas @ UB Stadium' "datetime": "2011-04-29T19:00:00", "ticket_status": "available", "on_sale_datetime": "2011-03-08T10:00:00", "facebook_rsvp_url": "...", "description": "2011 Block Party...", ...}
  7. Separate Data Processing from Data Storage ------- marshmallow | Model

    | <== Schema == { ------- 'title': 'Nas @ UB Stadium' "datetime": "2011-04-29T19:00:00", "on_sale_datetime": "2011-03-08T10:00:00", "facebook_rsvp_url": "...", "description": "2011 Block Party...", ...}
  8. Does one three things well 4 Serialize objects to dicts

    4 Validate data 4 Deserialize dicts to objects
  9. ------- marshmallow | Model | <== Schema ==> {'id': 4189861,

    ------- 'title': 'Nas @ UB Stadium' "datetime": "2011-04-29T19:00:00", "ticket_status": "available", "on_sale_datetime": "2011-03-08T10:00:00", "facebook_rsvp_url": "...", "description": "2011 Block Party...", ...}
  10. BandsInTown APIv21 { "id": 4189861, "title": "Nas @ UB Stadium

    in Buffalo, NY", "datetime": "2011-04-29T19:00:00", "ticket_url": "http://www.bandsintown.com/event/4189861/buy_tickets?artist=Nas" "ticket_type": "Tickets", "ticket_status": "available", ... } 1 http://www.bandsintown.com/api/overview
  11. from marshmallow import Schema, fields class EventSchema(Schema): id = fields.Int()

    title = fields.Str(required=True) datetime = fields.DateTime() ticket_url = fields.URL() ticket_type = fields.Str() ticket_status = fields.Str()
  12. from myorm import * class Venue(Model): name = CharField() city

    = CharField() #... class Artist(Model): name = CharField() # ... class Event(Model): title = CharField() datetime = DateTimeField() ticket_url = CharField() # ... venue = ForeignKeyField(Venue) artists = ManyToMany(Artist)
  13. from .models import Event from .schemas import EventSchema schema =

    EventSchema() event = Event.get(Event.id == 4189861) result = schema.dump(event) result.data # { # "id": 4189861, # "title": "Nas @ UB Stadium in Buffalo, NY", # "datetime": "2011-04-29T19:00:00", # "ticket_url": "http://www.bandsintown.com/event/41..." # "ticket_type": "Tickets", # "ticket_status": "available", # }
  14. Ordering (Optional) from marshmallow import Schema, fields class EventSchema(Schema): id

    = fields.Int() title = fields.Str(required=True) datetime = fields.DateTime() ticket_url = fields.URL() ticket_type = fields.Str() ticket_status = fields.Str() class Meta: ordered = True
  15. from marshmallow import Schema, fields class EventSchema(Schema): id = fields.Int()

    title = fields.Str(required=True) datetime = fields.DateTime() ticket_url = fields.URL() ticket_type = fields.Str() ticket_status = fields.Str()
  16. from marshmallow import Schema, fields class EventSchema(Schema): class Meta: fields

    = ('id', 'title', 'datetime', 'ticket_url', 'ticket_type', 'ticket_status')
  17. /events/?exclude=ticket_type,ticket_status schema = EventSchema(exclude=('ticket_type', 'ticket_status')) result = schema.dump(event) result.data #

    { # "id": 4189861, # "title": "Nas @ UB Stadium in Buffalo, NY", # "datetime": "2011-04-29T19:00:00", # "ticket_url": "http://www.bandsintown.com/event/41..." # }
  18. from .schemas import EventSchema event_data = { 'title': 'Miike Snow

    @ Weenie Roast in Irvine, CA', 'datetime' : '2016-05-14T19:00:00' } schema = EventSchema() result = schema.load(event_data) result.data # { # 'title': 'Miike Snow @ Weenie Roast in Irvine, CA', # 'datetime' : datetime.datetime(2016, 5, 14, 19, 0) # }
  19. Deserializing to an object from marshmallow import Schema, fields, post_load

    from .models import Event class EventSchema(Schema): id = fields.Int() title = fields.Str(required=True) # ... @post_load def make_event(self, data): return Event(**data)
  20. Deserializing to an object event_data = { 'title': 'Miike Snow

    @ Weenie Roast in Irvine, CA', 'datetime' : '2016-05-14T19:00:00' } schema = EventSchema() result = schema.load(event_data) result.data # => <Event(title='Miike Snow...')>
  21. (De)serializing collections events = Events.select_all_for_artist('Young Thug') schema = EventSchema(many=True, only=('id',

    'title')) schema.dump(events).data # [ # {'id': 11708299, # 'title': 'Young Thug @ in Philadelphia, PA'}, # {'id': 11712280, # 'title': 'Young Thug @ in New York, NY'}, # ]
  22. from marshmallow import Schema, fields from marshmallow.validate import Length, OneOf

    class EventSchema(Schema): id = fields.Int() title = fields.Str( required=True, validate=Length(min=7, max=500) ) # ... ticket_status = fields.Str( validate=OneOf(['available', 'soldout']) )
  23. Validation invalid = { 'ticket_status': 'punkd' } schema = EventSchema()

    result = schema.load(invalid) result.errors # { # 'title': ['Missing data for required field.'], # 'ticket_status': ['Not a valid choice.'] # }
  24. Validating collections event_data = [ {'title': 'wat', 'ticket_status': 'soldout'}, {'title':

    'Valid name', 'ticket_status': 'lol'} ] schema = EventSchema(many=True) result = schema.load(event_data) result.errors # {0: {'title': ['Length must be between 7 and 500.']}, # 1: {'ticket_status': ['Not a valid choice.']}}
  25. Validator methods from marshmallow import (Schema, fields, validates, ValidationError) class

    EventSchema(Schema): id = fields.Int() @validates('id') def validate_id(self, value): if not Event.get(value): raise ValidationError('Event does not exist')
  26. from myorm import * class Venue(Model): name = CharField() city

    = CharField() #... class Artist(Model): name = CharField() # ... class Event(Model): title = CharField() datetime = DateTimeField() ticket_url = CharField() # ... venue = ForeignKeyField(Venue) artists = ManyToMany(Artist)
  27. from marshmallow import Schema, fields class VenueSchema(Schema): class Meta: fields

    = ('name', 'city', ...) class ArtistSchema(Schema): class Meta: fields = ('name', ...)
  28. from marshmallow import Schema, fields class EventSchema(Schema): id = fields.Int()

    title = fields.Str() # ... venue = fields.Nested(VenueSchema()) artists = fields.Nested(ArtistSchema(many=True))
  29. class EventSchema(Schema): id = fields.Int() title = fields.Str() # ...

    venue = fields.Nested(VenueSchema()) artists = fields.Nested(ArtistSchema(many=True)) #... schema.dump(event).data # { # "id": 4189861, # "title": "Nas @ UB Stadium in Buffalo, NY", # ... # "artists": [{ # "name": "Nas", # }], # "venue": { # "name": "UB Stadium", # "city": "Buffalo", # } # }
  30. from marshmallow import Schema, fields class VenueSchema(Schema): events = fields.Nested('EventSchema',

    many=True, exclude=('venue', )) class Meta: fields = ('name', 'city', ...) class EventSchema(Schema): #... venue = field.Nested(VenueSchema())
  31. from marshmallow import Schema, fields class ArtistSchema(Schema): mbid = fields.Str()

    name = fields.Str() image_url = fields.URL() facebook_tour_dates_url = fields.URL() collaborators = fields.Nested('self', many=True, only=('mbid', 'name'))
  32. from marshmallow import Schema, fields class VenueSchema(Schema): datetime = fields.DateTime()

    latitude = fields.Function(lambda v: v.location['lat']) longitude = fields.Method('get_longitude') def get_longitude(self, venue): return venue.location['long']
  33. from mashmallow import ( Schema, pre_load, post_load, post_dump ) class

    ArtistSchema(Schema): class Meta: fields = ('mbid', 'name' ...) @pre_load def unwrap_envelope(self, in_data): return in_data['data'] @post_load def make_artist(self, in_data): return Artist(**in_data) @post_dump def wrap_with_envelope(self, out_data): return {'data': out_data}
  34. event_schema.dump(event).data # { # "data": { # "id": "11708299", #

    "type": "events" # "attributes": { # "title": "Young Thug @ in Philadelphia, PA" # }, # "relationships": { # "venues": { # "links": { # "related": "/venues/9" # } # }, # "artists": { # "links": { # "related": "/events/11708299/artists/" # } # "data": [ # {"id": 5, "type": "artists"}, # {"id": 12, "type": "artists"} # ], # } # }, # } # }
  35. Lessons Learned 4 More stability = More work 4 Don't

    reinvent the wheel, but be critical 4 We don't need more framework-specific libs 4 Backwards-compat is important 4 Users make for better software