# Author: David Decotigny 2008 Oct 3
# @brief Routines to determine which new objects are reachable
# between 2 points in the code
import gc, cPickle as pickle, weakref, sys, traceback
#
# Method 1: use weak ref to track new live objects
# Advantages: we have live pointers to the new live objects. And fast
# Drawbacks: doesn't track many types (such as list, dict, etc.) but
# generally this is not a problem because: if they contain
# sub-objects, these objects might most probably be track-able
#
class RefTracker(object):
"""
The scan() method will apply the given callback to the list of
new objects created since last call to scan() (or since the
construction, for the 1st time).
"""
def __init__(self):
self._not_tracked_types = set()
self._current_refs = dict()
self.scan()
def _get_objects(self):
return gc.get_objects()
def _scan(self, callback_new_object):
"""
This is NOT MT-safe and will not work for most builtin types
"""
objs = self._get_objects()
# First: remove the objects that are not available anymore
to_remove = []
for oid, ref in self._current_refs.iteritems():
if ref() is None:
to_remove.append(oid)
for oid in to_remove:
del self._current_refs[oid]
del to_remove
# Create the list of objects that are brand new:
for obj in objs:
try:
my_ref = self._current_refs[id(obj)]
# The object was already recorded last time.
# If the recorded object were not the current one,
# it would mean that the recorded object had been
# deallocated... this is caught by the previous loop
#
# Do some sanity checks, just to make sure:
assert my_ref() is not None
assert my_ref() == obj
except KeyError:
# This is a new object. Try to make a weak-ref out of it:
try:
wref = weakref.ref(obj)
except TypeError:
# Track only weak-ref-friendly objects, remember
# the types of the objects we couldn't weak-reference:
self._not_tracked_types.add(str(type(obj)))
continue
# Ok, good, we have a weak ref. Record it:
self._current_refs[id(obj)] = wref
# We also want to know that it's a new thing
try:
if callback_new_object:
callback_new_object(obj)
del obj
except:
traceback.print_exc()
def scan(self, callback_new_object = None):
"""Call the callback on each new object"""
# We need this in order to free the refs still held
# by _scan due to the callback (approx explanation...)
gc.collect()
self._scan(callback_new_object)
gc.collect()
@property
def not_tracked_types(self):
"""Return the list of type names of the objects that could not
be tracked"""
return self._not_tracked_types
@staticmethod
def _print_new_obj(obj):
"""Callback used by scan_and_print_new_objs"""
print "New obj:", repr(obj)
def scan_and_print_new_objs(self, msg = None):
# Print list of new objs, making sure that the list is
# correctly garbage-collected by the GC
print "\n# -- %s:" % (msg or "New objects")
self.scan(self._print_new_obj)
print "# ---------------\n"
#
# Method 2: Keep track of the garbage list
# Advantages: we have live pointers to the new live objects. And fast
# Drawbacks: will only show the object /after/ the GC had tried to
# reclaim them, not as soon as they have been
# creaded. Still useful to debug leaks... But: are we sure
# that lost objects are only found in cycles ??? Same
# type restrictions as for method 1 ???
#
class GarbageTracker(RefTracker):
def _get_objects(self):
return gc.garbage
#
# Method 3: approximate method storing signatures of objects to a file
# and comparing the signatures. The signature consist of a pair
# object id / str(type(obj))
# Advantages: all object types can potentially be tracked. Can allow
# basic offline analysis
# Drawbacks: might not see some new objects if they are at the same address
# as previous ones having the same signature. Slow
#
first_time = True
def make_gc_snapShot(filename, name):
"""Append the signatures to a file, giving them the given
'name'. A signature is a pair object_id / type_name"""
global first_time
if first_time:
gc.collect()
first_time = False
contents = []
for o in gc.get_objects():
try:
tname = o.__class__.__name__
except AttributeError:
tname = str(type(o))
contents.append((id(o), tname))
del tname
f = open(filename, 'a')
pickle.dump((name, contents), f)
f.close()
del contents
del f
class GCSnapshot(object):
"""Used to read a set of signatures from the file"""
def __init__(self, stream):
self.name, contents = pickle.load(stream)
self._contents = set(contents)
def __sub__(self, other):
"""Give the differences between 2 sets of
signatures. Return a set of pairs object_id /
type_name"""
return self._contents - other._contents
def reach(self, ids):
"""
\param ids Iterable of object id, as returned by x[0],
with x in the result of (snapshot2 - snapshot1)
Return a dict id -> object with that id currently known.
The objects recorded with these id might have been
replaced by new ones... so we might end-up seeing objects
that don't correspond to the original ones. This is
especially true after a gc.collect()
"""
result = dict()
for obj in gc.get_objects():
if id(obj) in ids:
result[id(obj)] = obj
return result
def read_snapshots(filename):
"""Sequentially reads the sets of signatures from a file. For
each set of signatures, a GCSnapshot is created with the
stored name. return the dict set name -> GCSnapshot object"""
result = dict()
f = open(filename, 'r')
while 1:
try:
snap = GCSnapshot(f)
result[snap.name] = snap
except (EOFError, pickle.UnpicklingError):
break
f.close()
return result
#### BEGIN: ONLY FOR THE TESTS
class Dummy:
def __init__(self):
print "INFO: ctor", self
def __del__(self):
print "INFO: dtor", self
# A pair of mutually-referencing objects with __del__ methods
# See http://docs.python.org/library/gc.html#gc.garbage
# for an explanation why they are not automatically reclaimable
class ObjectReferencer:
def __init__( self, obj ):
print "INFO: ctor", self
self.reference = obj
def __del__(self):
print "INFO: dtor", self
class ReferencerCreator:
def __init__( self ):
print "INFO: ctor", self
self.attribute = ObjectReferencer( self )
def __del__(self):
print "INFO: dtor", self
def break_cycle(self):
# Necessary to break the cycle that prevents the GC from
# doing its job
print "INFO: break_cycle", self
self.attribute = None
def _test1():
"""Tests for method 1 (RefTracker)"""
print "*** Method 1 (RefTracker) ***"
r = RefTracker()
d = Dummy()
print "del dummy now..."
del d
r.scan_and_print_new_objs("After creation/del of Dummy()")
# Contains a cycle: will not be freed by GC...
o = ReferencerCreator()
print "del obj now..."
del o
r.scan_and_print_new_objs("After creation/del of ReferencerCreator")
# The same, but we break the cycle
o = ReferencerCreator()
print "break_cycle now..."
o.break_cycle()
print "del obj now..."
del o
r.scan_and_print_new_objs("After creation/break_cycle/del of ReferencerCreator")
print "Types not tracked:"
for typ in r.not_tracked_types:
print " %s" % typ
print "End of test method 1."
def _test2():
"""Tests for method 2 (GarbageTracker)"""
print "*** Method 2 (GarbageTracker) ***"
r = GarbageTracker()
d = Dummy()
print "del dummy now..."
del d
r.scan_and_print_new_objs("After creation/del of Dummy()")
# Contains a cycle: will not be freed by GC...
o = ReferencerCreator()
print "del obj now..."
del o
r.scan_and_print_new_objs("After creation/del of ReferencerCreator")
# The same, but we break the cycle
o = ReferencerCreator()
print "break_cycle now..."
o.break_cycle()
print "del obj now..."
del o
r.scan_and_print_new_objs("After creation/break_cycle/del of ReferencerCreator")
print "Types not tracked:"
for typ in r.not_tracked_types:
print " %s" % typ
print "End of test method 2."
def _test3():
"""Tests for method 3 (compare signatures)"""
import os
print "*** Method 3 (compare signatures) ***"
fname = "/tmp/gc-%s-snapshot" % os.environ["USER"]
make_gc_snapShot(fname, "0")
make_gc_snapShot(fname, "1")
l = list()
l.append(l)
make_gc_snapShot(fname, "2")
l.append(42)
t = ReferencerCreator()
make_gc_snapShot(fname, "3")
# Now analyzing
snaps = read_snapshots(fname)
os.remove(fname)
print "Between 2 and 1, diff is:"
diff21 = snaps["2"] - snaps["1"]
for d in diff21:
print " ", d
print "Between 2 and 1, diff as live objects is:"
for obj in snaps["3"].reach([d[0] for d in diff21]).itervalues():
print " ", obj
print "Between 3 and 2, diff is:"
diff32 = snaps["3"] - snaps["2"]
for d in diff32:
print " ", d
print "Between 3 and 2, diff as live objects is:"
for obj in snaps["3"].reach([d[0] for d in diff32]).itervalues():
print " ", obj
print "Between 3 and 1, diff is:"
diff31 = snaps["3"] - snaps["1"]
for d in diff31:
print " ", d
print "Between 3 and 1, diff as live objects is:"
for obj in snaps["3"].reach([d[0] for d in diff31]).itervalues():
print " ", obj
print "End of test method 3."
#### END: ONLY FOR THE TESTS
if __name__ == "__main__":
_test1()
_test2()
_test3()
print "Bye."