Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Elasticsearch Intro

Elasticsearch Intro

Brief Elasticsearch introduction on OSDC.tw 2013

Kang-min Liu

April 20, 2013
Tweet

More Decks by Kang-min Liu

Other Decks in Technology

Transcript

  1. ELASTICSEARCH Distributed (Near) Real Time Search Engine RESTful ෩ɼLucene ࠎɼNoSQL

    ܥ elasticsearch.org https://github.com/elasticsearch/elasticsearch elasticsearch.com 13年4月20日星期六
  2. ໊ࢺሣর Relational DB ElasticSearch database index table type row document

    column field schema mapping index (શ෦) SQL query DSL 13年4月20日星期六
  3. CREATE curl -XPOST http://localhost:9200/social/tweet/1 -d '{ content: "େՈ޷" user: {

    name: "gugod", id: 385782393, }, tag: ["osdctw", "demo", "moedict"] }' 13年4月20日星期六
  4. SEARCH $ curl -XGET 'http://localhost:9200/twitter/tweet/_search' -d '{ "query": { "filtered"

    : { "query" : { "query_string" : { "query" : "some query string here" } }, "filter" : { "term" : { "user" : "kimchy" } } } } } ' query DSL 13年4月20日星期六
  5. TOKENIZATION Ԇ ల ੑ ෺ ࣭ ۩ Ԇ ௕ ٴ

    ల ։ త ੑ ࣭ ᜝ ҝ Ԇ ల ੑ ҝ େ ଟ Ꮠ ۚ ሱ ೭ ಛ ੑ Ԇ ฏ ܊ Ԧ ໌ Ṅ ੒ ޭ త ෧ ᥒ Ԇ ೥ ӹ ᆹ Ԇ ௕ ᆹ ໋ ଟ ҝ ᰌ ॕ ਓ ௕ ᆹ త ༻ ࢺ Ԇ ੑ ෺ ࣭ Մ Ԇ ௕ ҝ ࡉ ើ త ੑ ࣭ ᜝ ҝ Ԇ ੑ ࡐ ྉ ࡏ ഁ ᆦ ೭ લ ఄ ݱ ઼ ੑ Ꮣ ܗ త ఔ ౓ Ԇ ੑ Մ ༻ ፮ ྗ ࢼ ᱛ த త ৳ ௕ ཰ ٴ Ꮧ ໘ ॖ ཰ ද ࣔ ೭ Ԇ ؇ Ԇ ᬌ Ԇ ޙ 13年4月20日星期六
  6. TOKENIZATION Ԇలੑ: ෺࣭ ࣭۩ ۩Ԇ Ԇ௕ ௕ٴ ٴల ల։ ։త

    తੑ ੑ࣭ ᜝ҝ Ԇల లੑ ҝେ େଟ ଟᏐ Ꮠۚ ۚሱ ሱ೭ ೭ಛ ಛੑ Ԇฏ܊Ԧ: ໌Ṅ Ṅ੒ ੒ޭ ޭత త෧ ෧ᥒ Ԇ೥ӹᆹ: Ԇ௕ ௕ᆹ ᆹ໋ ଟҝ ҝᰌ ᰌॕ ॕਓ ਓ௕ ௕ᆹ ᆹత త༻ ༻ࢺ Ԇੑ: ෺࣭ ࣭Մ ՄԆ Ԇ௕ ௕ҝ ҝࡉ ࡉើ ើత తੑ ੑ࣭ ᜝ҝ Ԇੑ ࡐྉ ྉࡏ ࡏ ഁ ഁᆦ ᆦ೭ ೭લ ఄݱ ݱ઼ ઼ੑ ੑᏓ Ꮣܗ ܗత తఔ ఔ౓ Ԇੑ ੑՄ Մ༻ ༻፮ ፮ྗ ྗࢼ ࢼᱛ ᱛத தత త৳ ৳௕ ௕཰ ཰ٴ ٴᏗ Ꮧ໘ ໘ॖ ॖ཰ ཰ද දࣔ ࣔ೭ Ԇ؇: Ԇᬌ Ԇޙ 13年4月20日星期六
  7. ፺ਘɿԆ௕ Ԇలੑ: ෺࣭ ࣭۩ ۩Ԇ Ԇ௕ ௕ٴ ٴల ల։ ։త

    తੑ ੑ࣭ ᜝ҝ Ԇల లੑ ҝେ େଟ ଟᏐ Ꮠۚ ۚሱ ሱ೭ ೭ಛ ಛੑ Ԇฏ܊Ԧ: ໌Ṅ Ṅ੒ ੒ޭ ޭత త෧ ෧ᥒ Ԇ೥ӹᆹ: Ԇ௕ ௕ᆹ ᆹ໋ ଟҝ ҝᰌ ᰌॕ ॕਓ ਓ௕ ௕ᆹ ᆹత త༻ ༻ࢺ Ԇੑ: ෺࣭ ࣭Մ ՄԆ Ԇ௕ ௕ҝ ҝࡉ ࡉើ ើత తੑ ੑ࣭ ᜝ҝ Ԇੑ ࡐྉ ྉࡏ ࡏ ഁ ഁᆦ ᆦ೭ ೭લ ఄݱ ݱ઼ ઼ੑ ੑᏓ Ꮣܗ ܗత తఔ ఔ౓ Ԇੑ ੑՄ Մ༻ ༻፮ ፮ྗ ྗࢼ ࢼᱛ ᱛத தత త৳ ৳௕ ௕཰ ཰ٴ ٴᏗ Ꮧ໘ ໘ॖ ॖ཰ ཰ද දࣔ ࣔ೭ Ԇ؇: Ԇᬌ Ԇޙ 13年4月20日星期六
  8. څ෼ Ԇలੑ: ෺࣭ ࣭۩ ۩Ԇ Ԇ௕ ௕ٴ ٴల ల։ ։త

    తੑ ੑ࣭ ᜝ҝ Ԇల లੑ ҝେ େଟ ଟᏐ Ꮠۚ ۚሱ ሱ೭ ೭ಛ ಛੑ Ԇฏ܊Ԧ: ໌Ṅ Ṅ੒ ੒ޭ ޭత త෧ ෧ᥒ Ԇ೥ӹᆹ: Ԇ௕ ௕ᆹ ᆹ໋ ଟҝ ҝᰌ ᰌॕ ॕਓ ਓ௕ ௕ᆹ ᆹత త༻ ༻ࢺ Ԇੑ: ෺࣭ ࣭Մ ՄԆ Ԇ௕ ௕ҝ ҝࡉ ࡉើ ើత తੑ ੑ࣭ ᜝ҝ Ԇੑ ࡐྉ ྉࡏ ࡏ ഁ ഁᆦ ᆦ೭ ೭લ ఄݱ ݱ઼ ઼ੑ ੑᏓ Ꮣܗ ܗత తఔ ఔ౓ Ԇੑ ੑՄ Մ༻ ༻፮ ፮ྗ ྗࢼ ࢼᱛ ᱛத தత త৳ ৳௕ ௕཰ ཰ٴ ٴᏗ Ꮧ໘ ໘ॖ ॖ཰ ཰ද දࣔ ࣔ೭ Ԇ؇: Ԇᬌ Ԇޙ 0.5 0.7 0.3 13年4月20日星期六
  9. { "query": { "term": { "user": "ingy" } } }

    TERM 13年4月20日星期六
  10. { "query": { "text": { "content": "Ṝᒬಹᒬ" } } }

    TEXT Ṝᒬ ᒬಹ ಹᒬ 13年4月20日星期六
  11. { "query": { "range" : { "age" : { "from"

    : 10, "to" : 20, "include_lower" : true, "include_upper": false, "boost" : 2.0 } } } } RANGE 13年4月20日星期六
  12. { "query": { "query_string" : { "query" : "Ṝᒬ AND

    ಹᒬ OR ዎᒬ" } } } QUERY_STRING 13年4月20日星期六
  13. { "query": { "wildcard" : { "user" : "ki*y" }

    } } WILDCARD 13年4月20日星期六
  14. { "query": { "more_like_this" : { "like_text" : "Ṝᒬಹᒬ", "min_term_freq"

    : 1, "max_query_terms" : 12 } } } MLT(MORE LIKE THIS) 13年4月20日星期六
  15. จ݅݁ߏ { "heteronyms": [ { "bopomofo": "ㄧㄢˊ ㄓㄢˇ ㄒㄧㄥˋ", "bopomofo2":

    "yán jǎn shìng", "definitions": [ { "def": "෺࣭۩Ԇ௕ٴల։తੑ࣭ɼ᜝ҝʮԆలੑʯɻҝେଟ Ꮠۚሱ೭ಛੑɻ" } ], "pinyin": "yán zhǎn xìng" } ], "title": "Ԇలੑ" }, 13年4月20日星期六
  16. ਖ਼查 # curl http://localhost:9200/moedict/revised/$(uri_escape ፺)'?pretty=1' { "_index" : "moedict", "_type"

    : "revised", "_id" : "፺", "_version" : 1, "exists" : true, "_source" : {"non_radical_stroke_count":10,"stroke_count": 13,"heteronyms":[{"bopomofo":"ㄙㄡ","pinyin":"sōu","bopomofo2":"sōu","definitions": [{"quote":["׽ॻɽრ࿡ɽ෢ఇلɿʮळɼด৓໳େ፺ɼᚙᨶፅಮޒݪɻʯ","ᡅᜊࢤҟɽრҰɽޅ ՇঁɿʮቮࣕओਓᏑञ۩ɼগҰऋɼ້፺ෆಘɻʯ"],"def":"ፙਘɺਘٻɻ","type":"ಈ"}, {"example":["೗ɿʮ፺਎ʯɻ"],"quote":["ݩɽԦመำɽ੢ኈهɽୈࡾຊɽୈೋંɿʮෆߠ፺ࣗ ݾڰҝɼ୞଴ཁ᧯ผਓഁ୼ɻʯ"],"def":"ᒾ查、檢 點。","type":"ಈ"}]}],"title":"፺","radical":"ख"} } 13年4月20日星期六
  17. ൓查ʢ༝ٛ查ࢺʣ # curl --silent http://localhost:9200/moedict/revised/_search\ '?fields=&pretty=1&q='$(uri_escape ፙਘ) | grep _id

    "_id" : "ਘࠜ", "_id" : "ਘፙ", "_id" : "๚ٻ", "_id" : "୳ᛝ", "_id" : "ਘٻ", "_id" : "ፙਘ", "_id" : "ፙػ။", "_id" : "ࣗፙ", "_id" : "ਘᒜ", "_id" : "୳ࡧ", 13年4月20日星期六
  18. ༻ྫ۟查 # curl --silent -XPOST http://localhost:9200/moedict/revised/_search'?pretty’\ -d '{query:{text:{example: "ߚᒭເ" }}}'

    { "took" : 8, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "failed" : 0 }, "hits" : { "total" : 485, "max_score" : 5.116848, "hits" : [ { "_index" : "moedict", "_type" : "revised", "_id" : "ঞຊ", "_score" : 5.116848, "_source" : {"heteronyms":[{"bopomofo":"ㄔㄠ ㄅㄣ ˇ","pinyin":"chāo běn","bopomofo2":"chāu běn","definitions":[{"link":["ຠ᜝ҝʮሜຊʯɺʮᭁ ຊʯɻ"],"example":["೗ɿʮঞຊߚᒭເʯɻ"],"synonyms":"खຊ","def":"खঞతॻ ੶ɻ"}]}],"title":"ঞຊ"} }, { "_index" : "moedict", "_type" : "revised", "_id" : "Ұ໊", "_score" : 4.27241, "_source" : {"heteronyms":[{"bopomofo":"ㄧ ㄇㄧㄥˊ","pinyin":"yī míng","bopomofo2":"yī míng","definitions":[{"quote":["ߚᒭເɽୈ࢛ेീճɿʮ೿Լᤴᦛ೭ೕ෕ 13年4月20日星期六
  19. ༻Ҿݴ查 # curl --silent -XPOST http://localhost:9200/moedict/revised/_search'?pretty'\ -d '{query:{text:{quote: "ߚᒭເ"}}}' {

    "took" : 18, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "failed" : 0 }, "hits" : { "total" : 11858, "max_score" : 1.7719736, "hits" : [ { "_index" : "moedict", "_type" : "revised", "_id" : "ݪߘ", "_score" : 1.7719736, "_source" : {"heteronyms":[{"bopomofo":"ㄩㄢˊ ㄍㄠˇ","pinyin":"yuán gǎo","bopomofo2":"yuán gǎu","definitions":[{"synonyms":"ఈߘ,ߘຊ,ॳߘ,૲ߘ","quote":["ߚᒭເɽୈޒ ճɿʮ㘸ඟɼճ಄໋খᱺऔྃʰߚᒭເʱݪߘိɼᬇᢛሞۄɻʯ"],"def":"࡞඼࠷ॳతखߘɻ"}]}],"title":"ݪ ߘ"} }, { "_index" : "moedict", "_type" : "revised", "_id" : "Ұ໘೭㐊", "_score" : 1.6941531, "_source" : {"heteronyms":[{"bopomofo":"ㄧ ㄇㄧㄢˋ ㄓ ㄩㄢ 13年4月20日星期六
  20. ༻஫Ի查 # curl --silent -XPOST http://localhost:9200/moedict/revised/ _search'?pretty' -d '{query:{text_phrase:{bopomofo:"ㄏㄨㄣˋ ㄏㄨ

    ㄣˋ"}}}' | grep _id "_id" : "䓬䓬", "_id" : "ࠞࠞ", "_id" : "ࠞࠞಱಱ", "_id" : "ᕒᕒ", 13年4月20日星期六
  21. ग़ݱස཰࠷ߴత஫Ի # curl --silent -XPOST http://localhost:9200/moedict/revised/_search'?pretty' -d ' { "query"

    : { "match_all" : {} }, "facets" : { "bpmf" : { "terms" : { "field" : "bopomofo" } } } }' 13年4月20日星期六
  22. { "facets" : { "bpmf" : { "_type" : "terms",

    "missing" : 1485, "total" : 430736, "other" : 401706, "terms" : [ { "term" : "ㄅㄨˋ", "count" : 5400 }, { "term" : "ㄕˋ", "count" : 4438 }, { "term" : "ㄧˋ", "count" : 3218 }, { "term" : "ㄧ", "count" : 2939 }, { "term" : "ㄖㄣˊ", "count" : 2754 }, { "term" : "ㄓ", "count" : 2625 }, { "term" : "ㄌㄧˋ", "count" : 2580 }, { "term" : "ㄗ", "count" : 2044 }, { "term" : "ㄕˊ", "count" : 1541 }, { "term" : "ㄒㄧㄣ", "count" : 1491 } ] } } 13年4月20日星期六
  23. curl -XPOST http://localhost:9200/lyg0v/interp_body/_search'?pretty=1' -d ' { "query": { "match_all": {}

    }, "facets": { "top": { "terms": { "field": "content" } } } }' 13年4月20日星期六
  24. curl -XPOST http://localhost:9200/lyg0v/interp_body/_search'?pretty=1' -d ' { "query": { "match_all": {}

    }, "facets": { "top": { "terms": { "field": "speaker" } } } }' 13年4月20日星期六
  25. CLUSTERING more shard faster indexing / scaling more replica faster

    searching / failover 13年4月20日星期六
  26. SEE ALSO Cool, Bonsai Cool - An introduction to ElasticSearch

    http://bit.ly/112xtsk The Road to a Distributed Search Engine http://bit.ly/ZqBBUt elasticsearch, Big Data, Search & Analytics http://bit.ly/11tmbyK 13年4月20日星期六