Slide 20
Slide 20 text
Frameworks y módulos en Python para computación distribuida
listaTuplas = [('a',1), ('z',3), ('b',4), ('c',3), ('a',4)]
rddTuplas= sc.parallelize(listaTuplas)
claves = rddTuplas.keys() # ['a', 'z', 'b', 'c', 'a']
valores = rddTuplas.values() # [1, 3, 4, 3, 4]
rddMapValues = rddTuplas.mapValues(lambda x: (x,x*2))
# [('a', (1, 2)), ('z', (3, 6)), ('b', (4, 8)), ('c', (3, 6)), ('a', (4, 8))]
rddFMV = rddTuplas.flatMapValues(lambda x: (x,x*2))
# [('a', 1),
# ('a', 2),
# ('z', 3),
# ('z', 6),
# ('b', 4),