Welcome, guest | Sign In | My Account | Store | Cart
def map_reduce(data, mapper, reducer=None):
    'The mapper returns key/value pairs.  Optional reducer aggregates values.'
    d = {}
    for entry in data:
        k, v = mapper(entry)
        d.setdefault(k, []).append(v)
    if reducer is not None:
        for k, group in d.items():
            d[k] = reducer(group)
    return d

if __name__ == '__main__':

    from collections import namedtuple
    from pprint import pprint

    Person = namedtuple('Person', ['name', 'gender', 'age', 'height'])

    persons = [
        Person('mary', 'fem', 20, 60.2),
        Person('suzy', 'fem', 30, 50.1),
        Person('jane', 'fem', 20, 58.1),
        Person('jill', 'fem', 20, 49.1),
        Person('bess', 'fem', 40, 56.6),
        Person('john', 'mal', 20, 50.8),
        Person('jack', 'mal', 40, 59.1),
        Person('jase', 'mal', 50, 60.3),
        Person('zack', 'mal', 40, 53.7),
        Person('ambr', 'fem', 20, 57.0),
        Person('bill', 'mal', 20, 62.1)
    ]

    def height_by_gender_and_agegroup(p):
        key = p.gender, p.age //10
        val = p.height
        return key, val

    def avg(s):
        return fsum(s) / len(s)

    pprint(persons)                                                      # input dataset
    pprint(map_reduce(persons, lambda p: ((p.gender, p.age), p), None))  # grouped people
    pprint(map_reduce(persons, height_by_gender_and_agegroup, None))     # grouped heights
    pprint(map_reduce(persons, height_by_gender_and_agegroup, len))      # size of each group
    pprint(map_reduce(persons, height_by_gender_and_agegroup, max))      # maximum height by group
    pprint(map_reduce(persons, height_by_gender_and_agegroup, avg))      # average height by group

History