lambda x: x.replace(',', ' ') .replace('.',' ') .replace('-',' ') .lower()) \ .flatMap(lambda x: x.split()) \ .map(lambda x: (x, 1)) \ .reduceByKey(lambda x,y:x+y) input: [‘So wise so young, they say, do never live long.’] map: [‘so wise so young they say do never live long’] flatMap: [‘so’, ‘wise’, ‘so’, ‘young’, ‘they’, ‘say’, ‘do’, ‘never’, ‘live’, ‘long’] map: [(‘so’, 1), (‘wise’, 1), (‘so’, 1), (‘young’, 1), (‘they’, 1), …, (‘long’, 1)] output: [(‘so’, 2), (‘wise’, 1), (‘young’, 1), (‘they’, 1), …, (‘long’, 1)]