Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Harnessing Elasticsearch with PHP

Harnessing Elasticsearch with PHP

My talk on Elasticsearch given at the PHP Cape Town Conference 2014 (http://www.phpsouthafrica.com/).

Rudolf Vavruch

October 03, 2014
Tweet

More Decks by Rudolf Vavruch

Other Decks in Programming

Transcript

  1. What is Elasticsearch? Elasticsearch is a powerful opensource distributed, real-time

    search and analytics engine that makes data easy to explore. 2 40 /
  2. What is Elasticsearch? > Scalable > REST-based > Speaks JSON

    > Fast, powerful search & analytics engine 3 40 /
  3. Prepare repository, install Java & Elasticsearch # echo "deb http://packages.elasticsearch.org/elasticsearch/1.3/debian

    \ stable main" > /etc/apt/sources.list.d/elasticsearch.list # wget -O - http://packages.elasticsearch.org/GPG-KEY-elasticsearch | \ apt-key add - # aptitude update && aptitude -y install elasticsearch openjdk-7-jre-headless # update-rc.d elasticsearch defaults 95 10 http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/setup-repositories.html http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/setup-service.html 14 40 /
  4. Configure # vim /etc/elasticsearch/elasticsearch.yml node.name: "Lake Silencio" bootstrap.mlockall: true path.logs:

    /var/log/elasticsearch path.data: /var/data/elasticsearch # mkdir -p /var/data/elasticsearch # chown -R elasticsearch:elasticsearch /var/data/elasticsearch http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/setup-configuration.html 15 40 /
  5. Start & test # service elasticsearch start # curl localhost:9200/_nodes/process?pretty

    { "cluster_name" : "elasticsearch", "nodes" : { "EBPb3dI5SbydDNIPLcRVFA" : { "name" : "Lake Silencio", "transport_address" : "inet[/10.0.2.15:9300]", "host" : "debian-7", "ip" : "127.0.1.1", "version" : "1.2.1", "build" : "6c95b75", "http_address" : "inet[/10.0.2.15:9200]", "process" : { "refresh_interval_in_millis" : 1000, "id" : 2049, "max_file_descriptors" : 65535, "mlockall" : true } } } } 16 40 /
  6. PHP Install # aptitude update && aptitude -y install php5-curl

    # curl -s http://getcomposer.org/installer | php # vim composer.json { "require": { "elasticsearch/elasticsearch": "~1.0.x" } } # php composer.phar install http://www.elasticsearch.org/guide/en/elasticsearch/client/php-api/current/_installation_2.html 18 40 /
  7. Add an index http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/indices-create-index.html require 'vendor/autoload.php'; $params = array(); $params['index']

    = 'ducks'; $client = new Elasticsearch\Client(); $result = $client->indices()->create($params); # curl -XPUT 'http://localhost:9200/ducks/' http://www.elasticsearch.org/guide/en/elasticsearch/client/php-api/current/_index_operations.html 19 40 /
  8. $mapping = array( '_source' => array( 'enabled' => true ),

    'properties' => array( 'fullname' => array( 'type' => 'string', 'index' => 'no', 'store' => true, ), 'gender' => array('type' => 'string'), 'plumage' => array( 'type' => 'object', 'properties' => array( 'h' => array('type' => 'integer'), 's' => array('type' => 'integer'), 'l' => array('type' => 'integer'), ), ), 'location' => array('type' => 'geo_point'), 'glasses' => array('type' => 'boolean'), 'birthday' => array( 'type' => 'date', 'format' => 'yyyy-MM-dd', ), 'height' => array('type' => 'float'), ) ); $params = array(); $params['index'] = 'ducks'; $params['type'] = 'duck'; $params['body']['duck'] = $mapping; $client = new Elasticsearch\Client(); $result = $client->indices()->putMapping($params); # curl -XPUT 'http://localhost:9200/twitter/tweet/_mapping' -d ' { "tweet" : { "properties" : { "message" : { "type" : "string", "store" : true } } } } ' Map a type 20 40 /
  9. Index a document $data = array( 'fullname' => 'Rory The

    Duck', 'gender' => 'male', 'plumage' => array( 'h' => 50, 's' => 100, 'l' => 50), 'location' => "-34.086508, 18.45499", 'glasses' => true, 'birthday' => '2012-08-13', 'height' => 30.56, ); $params = array(); $params['body'] = $data; $params['index'] = 'ducks'; $params['type'] = 'duck'; //$params['id'] = 'ducky_id'; $client = new Elasticsearch\Client(); $result = $client->index($params); http://www.elasticsearch.org/guide/en/elasticsearch/client/php-api/current/_index_operations.html array(5) { ["_index"]=> string(5) "ducks" ["_type"]=> string(4) "duck" ["_id"]=> string(22) "OdcAUa75TbaRa3DhRxOS-A" ["_version"]=> int(1) ["created"]=> bool(true) } 22 40 /
  10. Update a document $data = array( 'fullname' => 'Rory The

    Duck', 'gender' => 'male', 'plumage' => array( 'h' => 50, 's' => 100, 'l' => 50), 'location' => "-34.086508, 18.45499", 'glasses' => false, 'birthday' => '2012-08-13', 'height' => 30.56, ); $params = array(); $params['body'] = $data; $params['index'] = 'ducks'; $params['type'] = 'duck'; $params['id'] = 'OdcAUa75TbaRa3DhRxOS-A'; $client = new Elasticsearch\Client(); $result = $client->index($params); array(5) { ["_index"]=> string(5) "ducks" ["_type"]=> string(4) "duck" ["_id"]=> string(22) "OdcAUa75TbaRa3DhRxOS-A" ["_version"]=> int(2) ["created"]=> bool(false) } 23 40 /
  11. Perform a simple query $params = array(); $params['index'] = 'ducks';

    $params['type'] = 'duck'; $range = array(); $range['gte'] = 30; $range['lte'] = 60; $params['body']['query']['range']['height'] = $range; $client = new Elasticsearch\Client(); $result = $client->search($params); array(4) { ["took"]=> int(122) ... ["hits"]=> array(3) { ... ["hits"]=> array(10) { [0]=> array(5) { ["_index"]=> string(5) "ducks" ["_type"]=> string(4) "duck" ["_id"]=> string(22) "5XH1fhPjRxiHUo7-cVFT-g" ["_score"]=> float(1) ["_source"]=> array(7) { ["fullname"]=> string(14) "Peter The Duck" ... ["height"]=> float(50.14) } [1]=> ... } } } } 26 40 /
  12. Perform a complex query - Queries $params['index'] = 'ducks'; $params['type']

    = 'duck'; $params['body']['size'] = 1000; //$params['body']['from'] = 0; $queries = array(); $query = array(); $query['term'] = array(); $query['term']['gender'] = 'female'; $queries[] = $query; $query = array(); $query['range']['plumage.h']['gte'] = 10; $query['range']['plumage.h']['lte'] = 20; $query['range']['plumage.s']['gte'] = 70; $query['range']['plumage.s']['lte'] = 80; $query['range']['plumage.l']['gte'] = 40; $query['range']['plumage.l']['lte'] = 50; $queries[] = $query; $query = array(); $query['range']['birthday']['gte'] = '2011-01-01'; $query['range']['birthday']['lte'] = '2014-01-01'; $queries[] = $query; $query = array(); $query['range']['height']['gte'] = 30; $query['range']['height']['lte'] = 60; $queries[] = $query; $params['body']['query']['filtered']['query']['bool']['must'] = $queries; 32 40 /
  13. Perform a complex query - Filters $filters = array(); $filter

    = array(); $filter['term']['glasses'] = false; $filters[] = $filter; $filter = array(); $filter['geo_distance']['distance'] = '2km'; $filter['geo_distance']['location'] = "-34.086, 18.454"; $filters[] = $filter; $params['body']['query']['filtered']['filter']['and'] = $filters; $client = new Elasticsearch\Client(); $result = $client->search($params); http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html 33 40 /
  14. A simple aggregation $params = []; $params['index'] = 'funnel'; $params['type']

    = 'tracking'; $params['search_type'] = 'count'; $params['body'] = <<<JSON { "aggs" : { "steps" : { "terms" : { "field" : "step" } } } } JSON; $client = new Elasticsearch\Client(); $ret = $client->search($params); 37 40 /
  15. A complex aggregation { "aggs": { "colors": { "terms": {

    "field": "color" }, "aggs": { "avg_price": { "avg": { "field": "price" } }, "make" : { "terms" : { "field" : "make" }, "aggs" : { "min_price" : { "min": { "field": "price"} }, "max_price" : { "max": { "field": "price"} } } } } } } } 39 40 /