import urllib2
import re
page = 1
contrib = [] # each element of contrib is a tuple consisting of the name of the user and the number of submitted recipes.
while 1: # loop over pages
print "Processing page %s" % (page)
f=urllib2.urlopen("http://code.activestate.com/recipes/users/?page=%s" % (page))
html = f.read()
f.close()
pattern = '
(.*)\s*\((.*) recipe[s]?\)'
res = re.findall(pattern, html)
if res:
contrib.extend(res)
if html.find('') != -1: # found at the last page
break
else:
page += 1
# Print users and number of recipes on screen
#for p in contrib:
# print p[0], p[1]
# Number of recipes as a list:
nrecipes = [int(p[1]) for p in contrib]
# Print the distribution
n = 1
while n <= max(nrecipes):
c = nrecipes.count(n)
if c:
print "%s people contribute %s recipes each" % (c,n)
n += 1