Slide 23
Slide 23 text
Itertools.groupby
[('"AS-IS".', ['"AS-IS".']),
('"Defect"', ['"Defect"']),
('"Pro-', ['"Pro-']),
('"Project', ['"Project']),
('"Right', ['"Right']),
('"Small', ['"Small', '"Small']),
('"small', ['"small']),
('#1787]', ['#1787]']),
("&c.'", ["&c.'"]),
("''Tis", ["''Tis"]),
("'A", ["'A", "'A", "'A", "'A", "'A"]),
...
)
keywords = groupby(sorted(hamlet.split()))
[('the', 970),
('and', 708),
('of', 666),
('to', 632),
('I', 521),
('a', 466),
('my', 444),
('in', 391),
('you', 383),
('Ham.', 358),
('is', 318),
('his', 284),
('it', 274),
('not', 260),
...
]