Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Data streams processing with PHP and STORM

Data streams processing with PHP and STORM

34be88398f623c109b61d23e8215bd23?s=128

Mariusz Gil

April 20, 2013
Tweet

Transcript

  1. PROCESSING t he php way of... STORM DAta STREAMS Mariusz

    Gil
  2. about me

  3. #php #scalability #nosql #performance #hadoop #hive #pig #bigdata #mahout #datamining

    #storm https://music.twitter.com/_login/background.jpg
  4. batch #1 batch #2 batch #3 t he P r

    obl em
  5. t he S t or y

  6. STORM DISTRIBUTED REALTIME COMPUTATION SYSTEM

  7. scalable no data lost fault tolerant extremely robust language agnostic

    efficient messaging local or distributed
  8. terms and architecture

  9. Spouts Bolts Stream Topologies (val1, val2) (val3, val4) (val5, val6)

    unbounded sequence of tuples tuple tuple tuple tuple tuple tuple tuple
  10. Spouts Bolts Stream Topologies (val1, val2) (val3, val4) (val5, val6)

    source of streams tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple
  11. Spouts Bolts Stream Topologies (val1, val2) (val3, val4) (val5, val6)

    process input streams and produce new streams tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple
  12. Spouts Bolts Stream Topologies (val1, val2) (val3, val4) (val5, val6)

    network of spouts and bolts TextSpout SplitSentenceBolt WordCountBolt [sentence] [word] [word, count]
  13. None
  14. storm-kestrel storm-kafka storm-amqp-spout storm-jms storm-pubsub storm-beanstalkd mapr-spout

  15. shuffle grouping fields grouping all grouping global grouping direct grouping

    local or shuffle grouping
  16. ZooKeepers Supervisors Nimbus

  17. fast CLUSTER STATE IS STORED LOCALLY OR IN ZOOKEEPERS fail

  18. code examples

  19. https://github.com/nathanmarz/storm

  20. https://github.com/maltoe/storm-install

  21. https://github.com/nathanmarz/storm-starter/

  22. https://github.com/lazyshot/storm-php

  23. public class DoubleAndTripleBolt extends BaseRichBolt { private OutputCollectorBase _collector; @Override

    public void prepare(Map conf, TopologyContext context, OutputCollectorBase collector) { _collector = collector; } @Override public void execute(Tuple input) { int val = input.getInteger(0); _collector.emit(input, new Values(val*2, val*3)); _collector.ack(input); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("double", "triple")); } } Java example / bolt
  24. public static class ExclamationBolt implements IRichBolt { OutputCollector _collector; public

    void prepare(Map conf, TopologyContext context, OutputCollector collector) { _collector = collector; } public void execute(Tuple tuple) { _collector.emit(tuple, new Values(tuple.getString(0) + "!!!")); _collector.ack(tuple); } public void cleanup() { } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } public Map getComponentConfiguration() { return null; } } Java example / bolt
  25. TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("words", new TestWordSpout(), 10); builder.setBolt("exclaim1",

    new ExclamationBolt(), 3) .shuffleGrouping("words"); builder.setBolt("exclaim2", new ExclamationBolt(), 2) .shuffleGrouping("exclaim1"); Java example / topology ... words exclaim1 exclaim2
  26. zkServer.sh start bin/storm nimbus bin/storm supervisor bin/storm ui #optional storm

    jar all-my-code.jar backtype.storm.MyTopology arg1 arg2 Java example / run
  27. PHP example / spout PHP example / spout require_once('storm.php'); class

    RandomSentenceSpout extends ShellSpout { ! protected $sentences = array( ! ! "the cow jumped over the moon", ! ! "an apple a day keeps the doctor away", ! ! "four score and seven years ago", ! ! "snow white and the seven dwarfs", ! ); ! protected function nextTuple() ! { ! ! sleep(.1); ! ! $sentence = $this->sentences[ rand(0, count($this->sentences) -1)];! ! ! $this->emit(array($sentence)); ! } ! protected function ack($tuple_id) ! { ! ! return; ! } ! protected function fail($tuple_id) ! { ! ! return; ! }! } $SentenceSpout = new RandomSentenceSpout(); $SentenceSpout->run();
  28. PHP example / bolt require_once('storm.php'); class SplitSentenceBolt extends BasicBolt {

    ! public function process(Tuple $tuple) ! { ! ! $words = explode(" ", $tuple->values[0]); ! ! foreach($words as $word) ! ! { ! ! ! $this->emit(array($word)); ! ! } ! } } $splitsentence = new SplitSentenceBolt(); $splitsentence->run();
  29. /** * This topology demonstrates Storm's stream groupings and multilang

    capabilities. */ public class WordCountPHPTopology { public static class SplitSentence extends ShellBolt implements IRichBolt { public SplitSentence() { super("php", "splitsentence.php"); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } @Override public Map<String, Object> getComponentConfiguration() { return null; } } // ... } MultiLang example / Topology, Bolt
  30. {"command": "next"} {"command": "ack", "id": "1231231"} {"command": "fail", "id": "1231231"}

    NonJVMSpout NonJVMBolt {"command": "sync"} { ! "command": "emit", ! "id": "1231231", ! "stream": "1", ! "task": 9, ! "tuple": ["field1", 2, 3] } { ! "id": "-6955786537413359385", ! "comp": "1", ! "stream": "1", ! "task": 9, ! "tuple": ["snow white and dwarfs", "field2", 3] } { ! "command": "emit", ! "anchors": ["1231231", "-234234234"], ! "stream": "1", ! "task": 9, ! "tuple": ["field1", 2, 3] } https://github.com/nathanmarz/storm/wiki/Multilang-protocol
  31. demo

  32. use cases

  33. stream processing

  34. continous query computation

  35. RPC distributed arguments results [request-id, arguments] [request-id, results]

  36. realtime analytics personalization search revenue optimization monitoring

  37. content search realtime analytics generating feeds integrated with elastic search,

    Hbase,hadoop and hdfs
  38. realtime scoring moments generation integration with kafka queues and hdfs

    storage
  39. thanks! feel free to contact with me email: mariusz@mariuszgil.pl twitter:

    @mariuszgil