Slide 14
Slide 14 text
>>> from pprint import pprint
>>> from Bio import SeqIO
>>> from collections import defaultdict
>>> from grph import find_kmers
>>> k = 3
>>> start, end = defaultdict(list), defaultdict(list)
>>> for rec in SeqIO.parse('tests/inputs/1.fa', 'fasta'):
... if kmers := find_kmers(str(rec.seq), k):
... start[kmers[0]].append(rec.id)
... end[kmers[-1]].append(rec.id)
...
>>> pprint(start)
{'AAA': ['Rosalind_0498', 'Rosalind_2391', 'Rosalind_0442'], 'GGG':
['Rosalind_5013'], 'TTT': ['Rosalind_2323']}
>>> pprint(end)
{'AAA': ['Rosalind_0498'], 'CCC': ['Rosalind_2323', 'Rosalind_0442'],
'GGG': ['Rosalind_5013'],