Find out how often and by who a particular file is being requested. Prints the requesting addresses, hostnames, access times, and hit counts.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55  | from operator import itemgetter
from socket import gethostbyaddr, herror
import time
accessLog = "/var/log/apache2/access_log"
def track(filename, logFile=accessLog):
    """Retrieve request statistics for a specific file in an access log."""
    log = open(logFile)
    filename = '/' + filename.lstrip('/')
    access = {}
    for line in log:
        parts = line.split()
        if parts[6] == filename:
            timeStr = parts[3]
            accessTime = time.strptime(timeStr, "[%d/%b/%Y:%H:%M:%S")
            access[parts[0]] = (accessTime, access.get(parts[0], ('', 0))[1] + 1)
    print '%s has been requested by %d people and hit %d times.' % \
          (filename, len(access), sum(map(itemgetter(1), access.itervalues())))
    
    accessTuple = sorted(access.iteritems(), key=itemgetter(1))
    hitWidth = len(str(max([data[1] for user, data in accessTuple])))
    for user, data in accessTuple:
        lastAccess, hits = data
        print 'User: %s Last Access: %s Hits: %s' % \
              ((user+",").ljust(16),
               time.strftime("%a %d-%b-%Y %I:%M:%S %p,", lastAccess),
               str(hits).rjust(hitWidth))
    return accessTuple
def resolve(*args):
    """Resolve a sequence of IP addresses to their hostnames, if possible."""
    if len(args) > 1:
        addrList = args
    else:
        addrList = args[0]
    for addr in addrList:
        if isinstance(addr, tuple):
            addr = addr[0]
        try:
            host = gethostbyaddr(addr)[0]
        except (herror, IndexError):
            host = addr
        print 'Addr: %s Hostname: %s' % ((addr+',').ljust(16), host)
if __name__ == "__main__":
    import sys
    if len(sys.argv) < 2:
        print "Usage: python track_access.py filename access_log"
    else:
        if len(sys.argv) > 2:
            accessLog = sys.argv[2]
        resolve(track(sys.argv[1], accessLog))
 | 
track() returns a list of tuples including access statistics for each IP address. To print simple hit statistics, call track like so:
track('index.html', '/var/log/apache2/access_log')
To also resolve hostnames:
resolve(track('index.html', '/var/log/apache2/access_log'))
Here's an example of how it is used from the command line:
$ python track_file.py beaches.png /var/log/lighttpd/access.log /beaches.png has been requested by 17 people and hit 29 times. User: 129.22.9.207, Last Access: Tue 02-Aug-2005 12:01:07 AM, Hits: 2 User: 129.22.151.187, Last Access: Tue 02-Aug-2005 12:38:05 AM, Hits: 1 ... Addr: 129.22.9.207, Hostname: h-129-22-9-207.ins.cwru.edu Addr: 129.22.151.187, Hostname: thomaspaine.STUDENT.CWRU.Edu ...
Download
Copy to clipboard