![]()
#!/usr/bin/python
''' Counts all posts to Python-tutor by author'''
# -*- coding: latin-1 -*-
from datetime import date, timedelta
import operator, urllib2
from BeautifulSoup import BeautifulSoup
today = date.today()
for year in [2008]:
startDate = date(year, 1, 1)
endDate = date(year, 12, 31)
thirtyOne = timedelta(days=31)
counts = {}
# Collect all the counts for a year by scraping the monthly author archive pages
while startDate < endDate and startDate < today:
dateString = startDate.strftime('%Y-%B')
# url = 'http://mail.python.org/pipermail/tutor/%s/author.html' % dateString
url = 'http://lists.digium.com/pipermail/asterisk-users/%s/author.html' % dateString
data = urllib2.urlopen(url).read()
soup = BeautifulSoup(data)
li = soup.findAll('li')[2:-2]
for l in li:
name = l.i.string.strip()
counts[name] = counts.get(name, 0) + 1
startDate += thirtyOne
# Consolidate names that vary by case under the most popular spelling
nameMap = dict() # Map lower-case name to most popular name
for name, count in sorted(counts.iteritems(),
key=operator.itemgetter(1), reverse=True):
lower = name.lower()
if lower in nameMap:
# Add counts for a name we have seen already
counts[nameMap[lower]] += count
else:
nameMap[lower] = name
print
print year
print '===='
for name, count in sorted(counts.iteritems(),
key=operator.itemgetter(1), reverse=True)[:20]:
print name.encode('latin-1', 'xmlcharrefreplace'), count
print
David Abbott - david at linuxcrazy dot com