Slide 39
Slide 39 text
2. LambdaεΫϦϓτ࡞
• termextract.py
• 1. ༻͢ΔࢺIDͷఆٛ
import re
APPLY_IDS = [30, 36, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 56, 57]
APPLY_JOIN_IDS = [10, 13, 18, 20, 24, 25, 31, 32, 33, 51, 58]
NOT_ENDOFWORDS = [
[13, "͔Β"],
[18, "͕"],
[18, "ͷͷ"],
[18, "ͱ"],
[24, "ͷ"],
]
EX_APPLY_IDS = [
[18, "ͯ"], [18, "Ͱ"], [25, "ͨ"], [25, "ͣ"], [25, "ͳ͍"],
[25, "·͢"], [25, "Μ"], [25, "·͠"], [25, "·ͤ"], [31, "͍"], [54, "ͦ͏"]
]
IGNORE_WORDS =[u"ͦ͏", u"ͨ͠", u"͋ͱ", u"Կ", u"Կ͔"]
࿈݁ରͷࢺID
ࢺIDͱଐޠͷηοτΛΈ߹Θͤ