Slide 25
Slide 25 text
Hierarchical softmax
from gensim.models import Word2Vec
sentences = [["he", "is", "a", "very", "kind", "man"]]
model = Word2Vec(sentences, min_count=1, seed=1, hs=1)
for word in model.vocab.keys():
print("word:", word)
print("index", model.vocab[word].index)
print("code", model.vocab[word].code)
print("point", model.vocab[word].point)
print("-------------")
('word:', 'a')
('index', 0)
('code', array([1, 0, 0], dtype=uint8))
('point', array([4, 3, 1], dtype=uint32))
-------------
('word:', 'kind')
('index', 1)
('code', array([1, 0, 1], dtype=uint8))
('point', array([4, 3, 1], dtype=uint32))
-------------
('word:', 'very')
('index', 2)
('code', array([1, 1, 1], dtype=uint8))
('point', array([4, 3, 0], dtype=uint32))
-------------
('word:', 'is')
('index', 3)
('code', array([0, 1], dtype=uint8))
('point', array([4, 2], dtype=uint32))
-------------
('word:', 'he')
('index', 4)
('code', array([0, 0], dtype=uint8))
('point', array([4, 2], dtype=uint32))
-------------
('word:', 'man')
('index', 5)
('code', array([1, 1, 0], dtype=uint8))
('point', array([4, 3, 0], dtype=uint32))
-------------
Gensimは、Hierarchical softmaxのデータ構造をindex,
code, pointという形で保持している