# A regular expression that matches Python string literals.
# Tripple-quoted, unicode, and raw strings are supported. This
# regular expression should be compiled with the re.VERBOSE flag.
PY_STRING_LITERAL_RE = (r"""
[uU]?[rR]?
(?: # Single-quote (') strings
'''(?: # Tripple-quoted can contain...
[^'] | # a non-quote
\\' | # a backslashed quote
'{1,2}(?!') # one or two quotes
)*''' |
'(?: # Non-tripple quoted can contain...
[^'] | # a non-quote
\\' # a backslashded quote
)*'(?!') | """+
r''' # Double-quote (") strings
"""(?: # Tripple-quoted can contain...
[^"] | # a non-quote
\\" | # a backslashed single
"{1,2}(?!") # one or two quotes
)*""" |
"(?: # Non-tripple quoted can contain...
[^"] | # a non-quote
\\" # a backslashded quote
)*"(?!")
)''')
# Example use case:
def replace_identifier(s, old, new):
"""
Replace any occurance of the Python identifier `old` with `new` in
the given string `s` -- but do *not* modify any occurances of
`old` that occur inside of string literals or comments. This
could be used, e.g., for variable renaming.
"""
# A regexp that matches comments, strings, and `old`.
comment_re = r'\#.*'
regexp = re.compile(r'(?x)%s|%s|(?P<old>\b%s\b)' %
(comment_re, PY_STRING_LITERAL_RE, re.escape(old)))
# A callback used to find the replacement value for each match.
def repl(match):
if match.group('old'):
# We matched `old`:
return new
else:
# We matched a comment or string literal:
return match.group()
# Find an regexp matches, and use `repl()` to find the replacement
# value for each. Since re.sub only replaces leftmost
# non-overlapping occurances, occurances of `old` inside strings
# or comments will be matched as part of that string or comment,
# and so won't be changed.
return regexp.sub(repl, s)