Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Building a Firehose

Building a Firehose

My talk about dealing with streaming data in PHP, from PHP Tek 12.

Ian Barber

May 24, 2012
Tweet

More Decks by Ian Barber

Other Decks in Technology

Transcript

  1. Data Source Data Source Data Source Output Messaging Batch HTTP

    Logs HTTP Chunked Websockets Batched POST
  2. APACHE PHP APACHE PHP HTTP POST function sendPos() { navigator.geolocation.getCurrentPosition(

    function(pos) { $.ajax({ type: 'POST', url:'http://firehose.com/input.php', data: {lat: pos.coords.latitude, lon: pos.coords.longitude}}); }); setTimeout(sendPos, 60000); } sendPos(); location.php
  3. APACHE PHP APACHE PHP PUSH ZEROMQ WEBSOCKETS $ctx = new

    ZMQContext(); $sock = $ctx->getSocket(ZMQ::SOCKET_PUSH); $sock->connect("tcp://localhost:5566"); $data = array( 'id' => get_next_msg_id(), 'uid' => $_COOKIE['uid'], 'lat' => $_POST['lat'], 'lon' => $_POST['lon'] ); $sock->send(json_encode($data)); input.php
  4. APACHE PHP APACHE PHP NODE.JS ZEROMQ PULL WEBSOCKETS app=require('http').createServer(handler), io

    = require('socket.io').listen(app), zmq = require('zmq'), sock = zmq.socket('pull'); app.listen(8080); sock.bind('tcp://*:5566'); sock.on('message', function (msg) { var data = JSON.parse(msg); // send to all clients io.sockets.emit("position", event); }); output.js
  5. PHP DAEMON PHP DAEMON NODE.JS PUSH ZEROMQ PULL HTTP STREAM

    WEBSOCKETS $fh = fopen("https://".$user.":". $pass."@stream.twitter.com/1/statuses/ filter.json?track=".$search, "r"); while(!feof($fh)) { $d = fgets($fh); if(strlen($d) > 4) { $sock->send($d); } } twitter.php
  6. SOURCE PHP ZEROMQ PUB SUB REDIS $ctx = new ZMQContext();

    $sub = $ctx->getSocket(ZMQ::SOCKET_SUB); $sub->setSockOpt(ZMQ::SOCKOPT_SUBSCRIBE,""); $sub->connect("tcp://localhost:5577"); while( $dat = $sub->recv() ) { $aug = augment(json_decode($dat,true),$obj); $redis->lpush($dat['id'],json_encode($aug)); } augmentor.php
  7. $mongo = new Mongo(); $collection = $m->starbucks->locations; function augment($data, $collection)

    { $loc = array((float) $data['lon'], (float) $data['lat']); $res = $collection->findOne(array( 'loc' => array('$near' => $loc))); return array('name' => 'starbucks', 'val' => $res['street']); } SOURCE PHP REDIS starbucks.php DB
  8. $ld = new Text_LanguageDetect(); $ld->setNameMode(2); function augment($data, $ld) { /*

    ["en"]=> float(0.24702222222222) */ $names = $ld->detect($data['text'], 1); return array('name' => 'lang', 'val' => key($names)); } SOURCE PHP REDIS langdetect.php
  9. $zk->create( $path . "/" . uniqid(), NULL, array( array( "perms"

    => Zookeeper::PERM_ALL, "scheme" => "world", "id" => "anyone")), Zookeeper::EPHEMERAL); PHP ZOOKEEPER augmentor.php
  10. REASSEMBLE while($dat = $sub->recv()){ do { $start = microtime(true); $aug

    = $redis->brpop($dat['id'],$time)); if(count($aug)) $dat['aug'][] = $aug; $time -= microtime(true) - $start; } while($time > 0 && count($dat['aug']) != $servs); $out->send(json_encode($dat)); //forward } COUNT reassemble.php
  11. FILTER ELASTIC SEARCH QUERY - NAME QUERY - NAME MSG

    ? ? MSG ZEROMQ SUB ZEROMQ PUB TOPIC MSG TOPIC MSG TOPIC MSG ZEROMQ PULL QUERY - NAME MSG HTTP / REST
  12. ELASTIC SEARCH QUERY - NAME QUERY - NAME MSG MSG

    HTTP / REST function escall($server, $path, $param) { $context = stream_context_create( array('http' => $http)); $result = file_get_contents( $serv.'/'.$path, NULL, $context); return json_decode( $result ); } elasticsearch.php
  13. ELASTIC SEARCH QUERY - NAME QUERY - NAME MSG MSG

    HTTP / REST function percolate($host, $path, $tweet) { $path = "/twitter/tweet/_percolate"; $tweet = array('doc' => array( 'tweet' => $tweet['text'])); $match = escall($host, $path, array('content' => json_encode($tweet))); return $match['matches']; } elasticsearch.php
  14. // snip... creating in, ctl, out ZMQ socks $poll =

    new ZMQPoll(); $poll->add($in, ZMQ::POLL_IN); $poll->add($ctl, ZMQ::POLL_IN); $read = $write = array(); FILTER MSG ZEROMQ PULL QUERY - NAME ZEROMQ SUB elasticsearch.php
  15. while(true) { $ev = $poll->poll($read, $write, -1); if($read[0] === $in)

    { $msg = json_decode( $in->recv() ); $matches = percolate($host, $msg); foreach($matches as $match) { $out->sendMulti(array($match, $msg)); } } else if($read[0] === $ctl) { $q = json_decode($ctl->recv()); $name = $q['name']; $query = $q['query']; add_query($host, $name,$query); } } elasticsearch.php
  16. STORE PHP KAFKA TOPIC TOPIC 1 2 3 4 1

    2 3 4 SUB APACHE PHP CLIENT HTTP GET TOPIC - OFFSET
  17. PHP KAFKA TOPIC TOPIC 1 2 3 4 1 2

    3 4 SUB $k = new Kafka_Producer("localhost", 9092); while ($data = $in->recvMulti()) { $topic = $data[0]; $msg = $data[1]; $bytes = $k->send(array($msg), $topic); } kafkastore.php
  18. $consumer = new Kafka_SimpleConsumer( 'localhost', 9092, 1, $max); do {

    $msgs = $consumer->fetch( new Kafka_FetchRequest($top,0,$os,$max) ); foreach($msgs as $msg) echo $msg->payload(), "\n"; $offset += $msgs->validBytes(); } while($msgs->validBytes() > 0); echo json_encode(array("offset"=>$offset)); kafkaconsume.php KAFKA TOPIC TOPIC 1 2 3 4 1 2 3 4 APACHE PHP CLIENT GET
  19. OPS

  20. JSON & MSGPACK $data = array('id'=>1,'a'=>'a','b'=>'xyz', 'c' => array(1, 2,

    "abcdefg", array(5, 7, 8))); $enc = json_encode($data); var_dump( json_decode($enc) ); $enc = msgpack_pack($data); var_dump( msgpack_unpack($enc) ); JSON MSGPACK MSGPACK JSON