Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Data streams processing with PHP and STORM

Data streams processing with PHP and STORM

Mariusz Gil

April 20, 2013
Tweet

More Decks by Mariusz Gil

Other Decks in Programming

Transcript

  1. #php #scalability #nosql #performance #hadoop #hive #pig #bigdata #mahout #datamining

    #storm https://music.twitter.com/_login/background.jpg
  2. Spouts Bolts Stream Topologies (val1, val2) (val3, val4) (val5, val6)

    unbounded sequence of tuples tuple tuple tuple tuple tuple tuple tuple
  3. Spouts Bolts Stream Topologies (val1, val2) (val3, val4) (val5, val6)

    source of streams tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple
  4. Spouts Bolts Stream Topologies (val1, val2) (val3, val4) (val5, val6)

    process input streams and produce new streams tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple tuple
  5. Spouts Bolts Stream Topologies (val1, val2) (val3, val4) (val5, val6)

    network of spouts and bolts TextSpout SplitSentenceBolt WordCountBolt [sentence] [word] [word, count]
  6. public class DoubleAndTripleBolt extends BaseRichBolt { private OutputCollectorBase _collector; @Override

    public void prepare(Map conf, TopologyContext context, OutputCollectorBase collector) { _collector = collector; } @Override public void execute(Tuple input) { int val = input.getInteger(0); _collector.emit(input, new Values(val*2, val*3)); _collector.ack(input); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("double", "triple")); } } Java example / bolt
  7. public static class ExclamationBolt implements IRichBolt { OutputCollector _collector; public

    void prepare(Map conf, TopologyContext context, OutputCollector collector) { _collector = collector; } public void execute(Tuple tuple) { _collector.emit(tuple, new Values(tuple.getString(0) + "!!!")); _collector.ack(tuple); } public void cleanup() { } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } public Map getComponentConfiguration() { return null; } } Java example / bolt
  8. TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("words", new TestWordSpout(), 10); builder.setBolt("exclaim1",

    new ExclamationBolt(), 3) .shuffleGrouping("words"); builder.setBolt("exclaim2", new ExclamationBolt(), 2) .shuffleGrouping("exclaim1"); Java example / topology ... words exclaim1 exclaim2
  9. zkServer.sh start bin/storm nimbus bin/storm supervisor bin/storm ui #optional storm

    jar all-my-code.jar backtype.storm.MyTopology arg1 arg2 Java example / run
  10. PHP example / spout PHP example / spout require_once('storm.php'); class

    RandomSentenceSpout extends ShellSpout { ! protected $sentences = array( ! ! "the cow jumped over the moon", ! ! "an apple a day keeps the doctor away", ! ! "four score and seven years ago", ! ! "snow white and the seven dwarfs", ! ); ! protected function nextTuple() ! { ! ! sleep(.1); ! ! $sentence = $this->sentences[ rand(0, count($this->sentences) -1)];! ! ! $this->emit(array($sentence)); ! } ! protected function ack($tuple_id) ! { ! ! return; ! } ! protected function fail($tuple_id) ! { ! ! return; ! }! } $SentenceSpout = new RandomSentenceSpout(); $SentenceSpout->run();
  11. PHP example / bolt require_once('storm.php'); class SplitSentenceBolt extends BasicBolt {

    ! public function process(Tuple $tuple) ! { ! ! $words = explode(" ", $tuple->values[0]); ! ! foreach($words as $word) ! ! { ! ! ! $this->emit(array($word)); ! ! } ! } } $splitsentence = new SplitSentenceBolt(); $splitsentence->run();
  12. /** * This topology demonstrates Storm's stream groupings and multilang

    capabilities. */ public class WordCountPHPTopology { public static class SplitSentence extends ShellBolt implements IRichBolt { public SplitSentence() { super("php", "splitsentence.php"); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } @Override public Map<String, Object> getComponentConfiguration() { return null; } } // ... } MultiLang example / Topology, Bolt
  13. {"command": "next"} {"command": "ack", "id": "1231231"} {"command": "fail", "id": "1231231"}

    NonJVMSpout NonJVMBolt {"command": "sync"} { ! "command": "emit", ! "id": "1231231", ! "stream": "1", ! "task": 9, ! "tuple": ["field1", 2, 3] } { ! "id": "-6955786537413359385", ! "comp": "1", ! "stream": "1", ! "task": 9, ! "tuple": ["snow white and dwarfs", "field2", 3] } { ! "command": "emit", ! "anchors": ["1231231", "-234234234"], ! "stream": "1", ! "task": 9, ! "tuple": ["field1", 2, 3] } https://github.com/nathanmarz/storm/wiki/Multilang-protocol