Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Storm: the Hadoop of Realtime Stream Processing

Storm: the Hadoop of Realtime Stream Processing

Twitter's new scalable, fault-tolerant, and simple(ish) stream programming system... with Python!

Gabriel Grant

March 25, 2012
Tweet

More Decks by Gabriel Grant

Other Decks in Programming

Transcript

  1. JOY

  2. WOW

  3. I'VE GOT YOU COVERED class LogParserBolt(AutoAckBolt): class Default(Stream): fields =

    'ip_address' def execute(self, input): ip_address = parse_log(input.message) self.emit(ip_address)
  4. I'VE GOT YOU COVERED class GeolocatorBolt(AutoAckBolt): class Default(Stream): fields =

    'lat', 'long' def __init__(self, *args, **kwargs): self.geoip = pygeoip.GeoIP('GeoLiteCity.dat') super(GeolocatorBolt, self) \ .__init__(*args, **kwargs) def execute(self, input): record = self.geoip.record_by_addr(input.ip) lat = record['latitude'] long_ = record['longitude'] self.emit((lat, long_))
  5. I'VE GOT YOU COVERED class WSPuserBolt(Bolt): def __init__(self, *args, **kwargs):

    self.batcher = TimeBatcher() self.pusher = zerorpc.Client(timeout=None) url = os.environ['WSPUSHER_ZERORPC_URL'] self.wspusher.connect(url) super(WSPusherBolt, self).__init__(*args, **kwargs def execute(self, input): t = time() batch = self.pop_batch(t) if batch: self.wspusher.push_list(batch) data = input.lat, input.long self.batcher.push_item(t, data)
  6. I'VE GOT YOU COVERED class GeocoderTopology(Topology): # components redis =

    RedisSpout(1) parser = LogParserBolt(3) geolocator = GeolocatorBolt(2) pusher = WSPuserBolt(4) # plumbing parser.inputs.append(ShuffleGrouping(redis)) geolocator.inputs.append(ShuffleGrouping(parser)) pusher.inputs.append( FieldsGrouping(geolocator, 'lat', 'long'))
  7. HOW