Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Building A Firehose - PHPNW

Ian Barber
October 06, 2012

Building A Firehose - PHPNW

#phpnw12 version of my talk on building firehose style streaming data systems

Ian Barber

October 06, 2012
Tweet

More Decks by Ian Barber

Other Decks in Technology

Transcript

  1. +Ian Barber - [email protected] - @ianbarber
    https://github.com/ianbarber/Firehose-PHP-Talk
    BUILDING A FIREHOSE

    View Slide

  2. FILTERABLE
    REAL TIME
    STREAMING
    DATA

    View Slide

  3. SELLING
    DATA
    ANALYSIS
    & DECISIONS
    USER
    TOOLS
    $£¥ ☑☒

    View Slide

  4. DATA SOURCES
    COMPOSE
    latency
    AUGMENT
    STORE
    FILTER
    STREAM

    View Slide

  5. EVENT
    SAMPLE
    order
    tweet
    temperature
    snapshot

    View Slide

  6. Data Source Data Source Data Source
    Output

    View Slide

  7. Data Source Data Source Data Source
    Output Output

    View Slide

  8. Data Source Data Source Data Source
    Output
    Messaging
    Batch HTTP Logs
    HTTP Chunked Websockets
    Batched POST

    View Slide

  9. APACHE
    PHP
    APACHE
    PHP
    NODE.JS
    PUSH
    ZEROMQ
    PULL
    HTTP POST
    WEBSOCKETS

    View Slide

  10. APACHE
    PHP
    APACHE
    PHP
    HTTP POST
    function sendPos() {
    navigator.geolocation.getCurrentPosition(
    function(pos) {
    $.ajax({ type: 'POST',
    url:'http://firehose.com/input.php',
    data: {lat: pos.coords.latitude,
    lon: pos.coords.longitude}}); });
    setTimeout(sendPos, 60000);
    } sendPos();
    location.php

    View Slide

  11. APACHE
    PHP
    APACHE
    PHP
    PUSH
    ZEROMQ
    WEBSOCKETS
    $ctx = new ZMQContext();
    $sock = $ctx->getSocket(ZMQ::SOCKET_PUSH);
    $sock->connect("tcp://localhost:5566");
    $data = array(
    'id' => get_next_msg_id(),
    'uid' => $_COOKIE['uid'],
    'lat' => $_POST['lat'],
    'lon' => $_POST['lon']
    );
    $sock->send(json_encode($data));
    input.php

    View Slide

  12. APACHE
    PHP
    APACHE
    PHP
    NODE.JS
    ZEROMQ PULL
    WEBSOCKETS
    app=require('http').createServer(handler),
    io = require('socket.io').listen(app),
    zmq = require('zmq'),
    sock = zmq.socket('pull');
    app.listen(8080);
    sock.bind('tcp://*:5566');
    sock.on('message', function (msg) {
    var data = JSON.parse(msg);
    // send to all clients
    io.sockets.emit("position", event);
    });
    output.js

    View Slide

  13. PHP
    DAEMON
    PHP
    DAEMON
    NODE.JS
    PUSH
    ZEROMQ
    PULL
    HTTP STREAM
    WEBSOCKETS
    $fh = fopen("https://".$user.":".
    $pass."@stream.twitter.com/1/statuses/
    filter.json?track=".$search, "r");
    while(!feof($fh)) {
    $d = fgets($fh);
    if(strlen($d) > 4) {
    $sock->send($d);
    }
    }
    twitter.php

    View Slide

  14. Data Source Data Source
    Output
    Assemble
    Process Process

    View Slide

  15. SOURCE
    ASSEMBLE
    PHP PHP
    ZEROMQ PUB
    SUB
    SUB SUB
    REDIS
    ZEROMQ
    PUSH

    View Slide

  16. SOURCE PHP
    ZEROMQ
    PUB SUB
    REDIS
    $ctx = new ZMQContext();
    $sub = $ctx->getSocket(ZMQ::SOCKET_SUB);
    $sub->setSockOpt(ZMQ::SOCKOPT_SUBSCRIBE,"");
    $sub->connect("tcp://localhost:5577");
    while( $dat = $sub->recv() ) {
    $aug = augment(json_decode($dat,true),$obj);
    $redis->lpush($dat['id'],json_encode($aug));
    }
    augmentor.php

    View Slide

  17. $mongo = new Mongo();
    $collection = $m->starbucks->locations;
    function augment($data, $collection) {
    $loc = array((float) $data['lon'],
    (float) $data['lat']);
    $res = $collection->findOne(array(
    'loc' => array('$near' => $loc)));
    return array('name' => 'starbucks',
    'val' => $res['street']);
    }
    SOURCE PHP REDIS
    starbucks.php
    DB

    View Slide

  18. $ld = new Text_LanguageDetect();
    $ld->setNameMode(2);
    function augment($data, $ld) {
    /* ["en"]=> float(0.24702222222222) */
    $names = $ld->detect($data['text'], 1);
    return array('name' => 'lang',
    'val' => key($names));
    }
    SOURCE PHP REDIS
    langdetect.php

    View Slide

  19. $zk = new Zookeeper();
    $zk->connect("localhost:2181");
    SOURCE
    ASSEMBLE PHP
    PHP
    REDIS PHP
    ZOOKEEPER
    COUNT OF SERVICES

    View Slide

  20. $zk->create(
    $path . "/" . uniqid(), NULL,
    array( array(
    "perms" => Zookeeper::PERM_ALL,
    "scheme" => "world",
    "id" => "anyone")),
    Zookeeper::EPHEMERAL);
    PHP ZOOKEEPER
    augmentor.php

    View Slide

  21. REASSEMBLE
    SOURCE
    REDIS
    ZOOKEEPER
    define("TIMEOUT", 5);
    $ch = $zk->getChildren("/services");
    $servs = count($ch);
    COUNT

    View Slide

  22. REASSEMBLE
    while($dat = $sub->recv()){
    do {
    $start = microtime(true);
    $aug = $redis->brpop($dat['id'],$time));
    if(count($aug)) $dat['aug'][] = $aug;
    $time -= microtime(true) - $start;
    } while($time > 0 &&
    count($dat['aug']) != $servs);
    $out->send(json_encode($dat)); //forward
    }
    COUNT
    reassemble.php

    View Slide

  23. Data Source Data Source
    Output
    Assemble
    Process Process
    Filter Filter Filter

    View Slide

  24. FILTER
    ELASTIC SEARCH
    QUERY - NAME
    QUERY - NAME
    MSG
    ?
    ?
    MSG
    ZEROMQ SUB
    ZEROMQ PUB
    TOPIC
    MSG
    TOPIC
    MSG
    TOPIC
    MSG
    ZEROMQ PULL
    QUERY - NAME
    MSG
    HTTP / REST

    View Slide

  25. ELASTIC SEARCH
    QUERY - NAME
    QUERY - NAME
    MSG
    MSG
    HTTP / REST
    function escall($server, $path, $param) {
    $context = stream_context_create(
    array('http' => $http));
    $result = file_get_contents(
    $serv.'/'.$path, NULL, $context);
    return json_decode( $result );
    }
    elasticsearch.php

    View Slide

  26. ELASTIC SEARCH
    QUERY - NAME
    QUERY - NAME
    MSG
    MSG
    HTTP / REST
    function percolate($host, $path, $tweet) {
    $path = "/twitter/tweet/_percolate";
    $tweet = array('doc' => array(
    'tweet' => $tweet['text']));
    $match = escall($host, $path,
    array('content' =>
    json_encode($tweet)));
    return $match['matches'];
    }
    elasticsearch.php

    View Slide

  27. // snip... creating in, ctl, out ZMQ socks
    $poll = new ZMQPoll();
    $poll->add($in, ZMQ::POLL_IN);
    $poll->add($ctl, ZMQ::POLL_IN);
    $read = $write = array();
    FILTER
    MSG
    ZEROMQ PULL
    QUERY - NAME
    ZEROMQ SUB
    elasticsearch.php

    View Slide

  28. while(true) {
    $ev = $poll->poll($read, $write, -1);
    if($read[0] === $in) {
    $msg = json_decode( $in->recv() );
    $matches = percolate($host, $msg);
    foreach($matches as $match) {
    $out->sendMulti(array($match, $msg));
    }
    } else if($read[0] === $ctl) {
    $q = json_decode($ctl->recv());
    $name = $q['name'];
    $query = $q['query'];
    add_query($host, $name,$query);
    }
    }
    elasticsearch.php

    View Slide

  29. Data Source
    Output
    Queue Process
    Filter
    Data
    Store
    Data
    Store

    View Slide

  30. STORE
    PHP
    KAFKA
    TOPIC
    TOPIC
    1 2 3 4
    1 2 3 4
    SUB
    APACHE
    PHP
    CLIENT
    HTTP GET
    TOPIC - OFFSET

    View Slide

  31. PHP
    KAFKA
    TOPIC
    TOPIC
    1 2 3 4
    1 2 3 4
    SUB
    $k = new Kafka_Producer("localhost", 9092);
    while ($data = $in->recvMulti()) {
    $topic = $data[0];
    $msg = $data[1];
    $bytes = $k->send(array($msg), $topic);
    }
    kafkastore.php

    View Slide

  32. $consumer = new Kafka_SimpleConsumer(
    'localhost', 9092, 1, $max);
    do {
    $msgs = $consumer->fetch(
    new Kafka_FetchRequest($top,0,$os,$max)
    );
    foreach($msgs as $msg)
    echo $msg->payload(), "\n";
    $offset += $msgs->validBytes();
    } while($msgs->validBytes() > 0);
    echo json_encode(array("offset"=>$offset));
    kafkaconsume.php
    KAFKA
    TOPIC
    TOPIC
    1 2 3 4
    1 2 3 4
    APACHE
    PHP
    CLIENT
    GET

    View Slide

  33. OPS

    View Slide

  34. JSON & MSGPACK
    $data = array('id'=>1,'a'=>'a','b'=>'xyz',
    'c' => array(1, 2, "abcdefg",
    array(5, 7, 8)));
    $enc = json_encode($data);
    var_dump( json_decode($enc) );
    $enc = msgpack_pack($data);
    var_dump( msgpack_unpack($enc) );
    JSON
    MSGPACK
    MSGPACK
    JSON

    View Slide

  35. Data Source
    Output
    Queue Process
    Filter
    Data
    Store
    Tap
    Trace
    Trace
    Trace

    View Slide

  36. Data Source
    Output
    See Also: http://slidesha.re/JaWE78

    View Slide

  37. +Ian Barber - [email protected] - @ianbarber
    https://github.com/ianbarber/Firehose-PHP-Talk
    THANKS!

    View Slide