Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Apache Solr

Apache Solr

Introduction about Apache Solr

Abdulaziz AlShetwi

December 19, 2011
Tweet

Other Decks in Technology

Transcript

  1. Scalable Flexible Easy of Use Fast Accept any data Open

    Source Code-less Extendable Configurable
  2. 5

  3. Lightweight, No Dependence Java Library for indexing and searching Inverted

    Index 53% from companies use it. [IDC] Apache Lucene
  4. <dataConfig> <dataSource driver="org.hsqldb.jdbcDriver" url="jdbc:hsqldb:/temp/ example/ex" user="sa" /> <document> <entity name="item"

    query="select * from item"> <entity name="feature" query="select description as features from feature where item_id='${item.ID}'"/> <entity name="item_category" query="select CATEGORY_ID from item_category where item_id='${item.ID}'"> <entity name="category" query="select description as cat from category where id = '${item_category.CATEGORY_ID}'"/> </entity> </entity> </document> </dataConfig> Database
  5. <dataConfig> <dataSource type="FileDataSource" encoding="UTF-8" /> <document> <entity name="page" processor="XPathEntityProcessor" stream="true"

    forEach="/mediawiki/page/" url="/data/enwiki-20080724-pages-articles.xml" transformer="RegexTransformer,DateFormatTransformer"> <field column="id" xpath="/mediawiki/page/id" /> <field column="title" xpath="/mediawiki/page/title" /> <field column="revision" xpath="/mediawiki/page/revision/id" /> <field column="user" xpath="/mediawiki/page/revision/contributor/username" /> <field column="userId" xpath="/mediawiki/page/revision/contributor/id" /> <field column="text" xpath="/mediawiki/page/revision/text" /> <field column="timestamp" xpath="/mediawiki/page/revision/timestamp" dateTimeFormat="yyyy-MM-dd'T'hh:mm <field column="$skipDoc" regex="^#REDIRECT .*" replaceWith="true" sourceColName="text"/> </entity> </document> </dataConfig> XML
  6. HTTP POST to /update <add> <doc boost=“2”> <field name=“article”>05991</field> <field

    name=“title”>Apache Solr</field> <field name=“subject”>An intro...</field> <field name=“category”>search</field> <field name=“category”>lucene</field> <field name=“body”>Solr is a full...</field> </doc> </add>
  7. Delete by Query (multiple documents) HTTP POST to /update <delete>

    <id>05591</id> </delete> Delete by Id <delete> <query>manufacturer:microsoft</query> </delete>
  8. <response> <responseHeader> <status>0</status> <QTime>1</QTime> </responseHeader> <result numFound="16173" start="0"> <doc> <str

    name="name">Apple 60 GB iPod with Video</str> <float name="price">399.0</float> </doc> <doc> <str name="name">ASUS Extreme N7800GTX/2DHTV</str> <float name="price">479.95</float> </doc> </result> </response>
  9. <field name="id" type="string" indexed="true" stored="true" required="true" /> <field name="dedupe" type="string"

    indexed="true" stored="true" /> <field name="filetype" type="string" indexed="true" stored="true" /> <field name="hostname" type="string" indexed="true" stored="true" /> <field name="filename" type="string" indexed="true" stored="true" /> <field name="category" type="string" indexed="true" stored="false" multiValued="true" /> <field name="popularity" type="double" indexed="true" stored="true" /> <field name="title" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" default="No Title Found"/> <field name="text" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true"/> <field name="timestamp" type="date" indexed="true" stored="true" default="NOW/SECOND" multiValued="false"/> <field name="content_type" type="string" indexed="true" stored="true" multiValued="false"/> <field name="last_modified_date" type="date" indexed="true" stored="true" multiValued="false"/>
  10. <analyzer type="query"> <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"

    ignoreCase="true" expand="true"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter class="solr.SnowballPorterFilterFactory" language="English"/> </analyzer>