Upgrade to Pro — share decks privately, control downloads, hide ads and more …

SQLiteで高速全文検索〜日本語編〜

shoby
August 20, 2014

 SQLiteで高速全文検索〜日本語編〜

【第8回】potatotips (iOS/Android開発Tips共有会)

shoby

August 20, 2014
Tweet

More Decks by shoby

Other Decks in Technology

Transcript

  1. جຊతͳQuery SELECT * FROM article WHERE body MATCH 'iPhone5s'; term

    match SELECT * FROM article WHERE body MATCH 'iPhone*'; term-prefix match
  2. phrase match SELECT * FROM article WHERE body MATCH 'have

    iPhone*'; ◦I have iPhone5s ×I have Android and iPhone5s
  3. ಛघͳQuery SELECT * FROM article WHERE article MATCH 'iPhone*'; all

    columns SELECT * FROM article WHERE article MATCH 'title:Apple iPhone*'; specified column
  4. CREATE TABLE article (id, title, body); CREATE VIRTUAL TABLE articleTokens

    USING fts4 (articleId, titleTokens, bodyTokens); ݩσʔλΛೖΕΔςʔϒϧ FTSςʔϒϧ
  5. INSERT͍ͨ͠σʔλ INSERT INTO article(id, title, body) VALUES (1, 'Ξοϓϧ', 'ࢲ͸ΞΠϑΥʔϯ4ͱΞΠϑΥʔϯ5sΛ͍࣋ͬͯ·

    ͢'); ! INSERT INTO articleTokens(articleId, titleTokens, bodyTokens) VALUES (1, 'Ξοϓϧ', 'ࢲ͸ ΞΠϑΥʔϯ4 ͱ ΞΠϑΥʔϯ5s Λ ࣋ͬ ͍ͯ·͢');
  6. ౤͍͛ͨSELECT SELECT * FROM article JOIN ( SELECT articleId FROM

    articleTokens WHERE bodyTokens MATCH 'ΞΠϑΥʔϯ*' LIMIT 100; ) AS result ON article.id = result.articleId ORDER BY article.id;
  7. - (NSArray *)tokenArrayWithString:(NSString *)string { NSLocale *locale = [[NSLocale alloc]

    initWithLocaleIdentifier:@"ja"]; CFRange range = CFRangeMake(0, CFStringGetLength((CFStringRef)string)); CFStringTokenizerRef tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault, (CFStringRef)string, range, kCFStringTokenizerUnitWordBoundary, (CFLocaleRef)locale); NSMutableArray *tokenArray = [NSMutableArray array]; while(CFStringTokenizerAdvanceToNextToken(tokenizer) != kCFStringTokenizerTokenNone) { CFRange tokenRange = CFStringTokenizerGetCurrentTokenRange(tokenizer); if(range.location != kCFNotFound) { NSString *token = [string substringWithRange:NSMakeRange(tokenRange.location, tokenRange.length)]; [tokenArray addObject:token]; } } CFRelease(tokenizer); return tokenArray; }
  8. ֤Tokenʹ * Λ௥Ճ͠ ൒֯εϖʔεͰ࿈݁ͯ͠ QueryΛ૊ΈཱͯΔ NSMutableArray *searchTokens = [NSMutableArray arrayWithCapacity:tokenArray.count];

    for (NSString *token in tokenArray) { [searchTokens addObject:[token stringByAppendingString:@"*"]]; } [searchTokens componentsJoinedByString:@" "];
  9. ౤͍͛ͨSELECT SELECT * FROM article JOIN ( SELECT articleId, rank(matchinfo(articleTokens,

    'pcnalx')) AS rank FROM articleTokens WHERE bodyTokens MATCH 'ΞΠϑΥʔϯ*' LIMIT 100; ) AS result ON article.id = result.articleId ORDER BY rank DESC;