from html.parser import HTMLParser from itertools import chain class AttendanceParser(HTMLParser): ... def handle_data(self, data): num = data.strip() if self.match and num: self.nums = chain(self.nums, [int(num)]) self.match = False
def extract_attendance(): parser = AttendanceParser() # Inner loop to parse each line for line in f: parser.feed(line.decode(encoding)) yield from parser.nums # Outer loop to extract each page for page in range(5): url = BASE_URL.format(base=BASE, page=page) f = urlopen(url) encoding = f.info().get_content_charset() >>> len(list(extract_attendance())) 25
>>> printable = [ ...: chr(x) for x in range(150) ...: if chr(x).isprintable()] >>> len(printable) >>> [ ...: (x, chr(x)) for x in range(150) ...: if chr(x).isprintable()] >>> ''.join(printable[num] for num in attendance) [(32, ' '), (33, '!'), (34, '"'), (35, '#')...] 'asR`N_WMH.153F24682579(?7' 95
Decrypted message Your shortcake is at a cafe in Nairobi that shares an object with a snake in this flickr group https://www.flickr.com/groups/1329313@N21/ Find the first photo taken by the most prolific group member in 2017.