Slide 32
Slide 32 text
Arabic Stemmer
func stem(input []byte) []byte {
runes := bytes.Runes(input)
for _, p := range prefixes {
if canStemPrefix(runes, p) {
runes = runes[len(p):]
break
}
}
for _, s := range suffixes {
if canStemSuffix(runes, s) {
runes = runes[:len(runes)-len(s)]
}
}
return analysis.BuildTermFromRunes(runes)
}
prefixes := [][]rune{
[]rune("ﻝلﺍا"),
[]rune("ﻝلﺍاﻭو"),
[]rune("ﻝلﺎﺑ"),
[]rune("ﻝلﺎﻛ"),
[]rune("ﻝلﺎﻓ"),
[]rune("ﻞﻟ"),
[]rune("ﻭو"),
}
suffixes := [][]rune{
[]rune("ﺎﻫﮬﮪھ"),
[]rune("ﻥنﺍا"),
[]rune("ﺕتﺍا"),
[]rune("ﻥنﻭو"),
[]rune("ﻦﻳﯾ"),
[]rune("ﻪﮫﻳﯾ"),
[]rune("ﺔﻳﯾ"),
[]rune("ﻩه"),
[]rune("ﺓة"),
[]rune("ﻱي"),
}