Upgrade to Pro — share decks privately, control downloads, hide ads and more …

BigData PHP

BigData PHP

Mariusz Gil

October 27, 2013
Tweet

More Decks by Mariusz Gil

Other Decks in Programming

Transcript

  1. PHP and memcached, advanced use-cases / PHPCon PL 2010 Aspect

    oriented programming in PHP / PHPCon PL 2010 3
  2. php is good php is simply php is popular php

    is good php is simply php is popular php, 1 is, 1 simply, 1 php, 1 is, 1 popular, 1 php, 1 is, 1 good, 1 php, 3 is, 3 good, 1 simply, 1 popular, 1 good, 1 is, 1 is, 1 is, 1 php, 1 php, 1 php, 1 simply, 1 popular, 1 php, 3 is, 3 good, 1 simply, 1 popular, 1
  3. NodeManager YARNChild MapTask ReduceTask node manager node <?php while (($line

    = fgets(STDIN)) !== false) { $words = explode(' ', trim($line)); foreach ($words as $word) { echo $word . ' ' . 1 . PHP_EOL; } }
  4. $mongo = new MongoClient(); $app['mongo'] = $mongo->selectDB('db'); $map = new

    MongoCode('function() { emit(this.key, this.value); }'); $reduce = new MongoCode('function(key, values) { return Array.sum(values); }'); $result = $app['mongo']->command(array( 'mapreduce' => 'collection', 'map' => $map, 'reduce' => $reduce, 'out' => array( 'inline' => 1, ), ));
  5. Apache Zookeeper Apache HBase Apache Hive Apache Oozie Apache Pig

    Apache Avro Apache Ambari Apache Chukwa Apache Flume Apache Scribe Apache Whirr Apache Mahout Apache Sqoop Apache Zookeeper Apache HBase Apache Hive Apache Pig Apache Avro
  6. $socket = new TSocket('localhost', 9090); $socket->setSendTimeout(2000); $socket->setRecvTimeout(4000); $transport = new

    TBufferedTransport($socket); $protocol = new TBinaryProtocol($transport); $client = new HbaseClient($protocol); $transport->open(); $table = 'test'; $descriptors = $client->getColumnDescriptors($table); $result = $client->getRow($table, "php"); foreach ($descriptors as $col) { echo ("Column: {$col->name}, maxVer: {$col->maxVersions}" . PHP_EOL); } $transport->close();
  7. CREATE TABLE page_views ( user_id INT, page_id, date DATE, user_agent

    STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; SELECT page_views.* FROM page_views WHERE page_views.date >= '2008-03-01' AND page_views.date <= '2008-03-31' SELECT page_views.* FROM page_views JOIN dim_users ON (page_views.user_id = dim_users.id) WHERE page_views.date >= '2008-03-01' AND page_views.date <= '2008-03-31' SELECT col1 FROM t1 GROUP BY col1 HAVING SUM(col2) > 10
  8. CREATE TABLE www_logs ( ip STRING, method STRING, url STRING,

    http_code SMALLINT, referrer STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; add FILE www_logs_mapper.php; INSERT OVERWRITE TABLE www_logs_raw SELECT TRANSFORM (line) USING 'php www_logs_mapper.php' AS (ip, method, url, http_code, referrer) FROM www_logs; SELECT user_agent, COUNT(*) FROM www_logs GROUP BY user_agent; CREATE TABLE www_logs_raw ( line STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
  9. class RandomSentenceSpout extends ShellSpout { protected $sentences = array( "the

    cow jumped over the moon", "an apple a day keeps the doctor away", ); protected function nextTuple() { sleep(.1); $sentence = $this->sentences[ rand(0, count($this->sentences) - 1)]; $this->emit(array($sentence)); } protected function ack($tuple_id) { return; } protected function fail($tuple_id) { return; } } $SentenceSpout = new RandomSentenceSpout(); $SentenceSpout->run();
  10. class SplitSentenceBolt extends BasicBolt { public function process(Tuple $tuple) {

    $words = explode(" ", $tuple->values[0]); foreach($words as $word) { $this->emit(array($word)); } } } $splitsentence = new SplitSentenceBolt(); $splitsentence->run();