Added functions for creating context diffs and unified diffs.

Documentation update and NEWS item are forthcoming.

Added functions for creating context diffs and unified diffs.
Documentation update and NEWS item are forthcoming.
f0b1a1fc · Raymond Hettinger · f177bf22 · f0b1a1fc
Kaydet (Commit) f0b1a1fc authored Haz 08, 2003 tarafından Raymond Hettinger
Hide whitespace changes
Inline Side-by-side

Showing with 200 additions and 1 deletion

difflib.py Lib/difflib.py +200 -1

No files found.
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -6,12 +6,18 @@ Module difflib -- helpers for computing deltas between objects.
 Function get_close_matches(word, possibilities, n=3, cutoff=0.6):
    Use SequenceMatcher to return list of the best "good enough" matches.

+Function context_diff(a, b):
+    For two lists of strings, return a delta in context diff format.
+
 Function ndiff(a, b):
    Return a delta: the difference between `a` and `b` (lists of strings).

 Function restore(delta, which):
    Return one of the two sequences that generated an ndiff delta.

+Function unified_diff(a, b):
+    For two lists of strings, return a delta in unified diff format.
+
 Class SequenceMatcher:
    A flexible class for comparing pairs of sequences of any type.

@@ -20,7 +26,8 @@ Class Differ:
 """

 __all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
-           'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK']
+           'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
+           'unified_diff']

 class SequenceMatcher:

@@ -532,6 +539,54 @@ class SequenceMatcher:
                answer.append( ('equal', ai, i, bj, j) )
        return answer

+    def get_grouped_opcodes(self, n=3):
+        """ Isolate change clusters by eliminating ranges with no changes.
+
+        Return a generator of groups with upto n lines of context.
+        Each group is in the same format as returned by get_opcodes().
+
+        >>> from pprint import pprint
+        >>> a = map(str, range(1,40))
+        >>> b = a[:]
+        >>> b[8:8] = ['i']     # Make an insertion
+        >>> b[20] += 'x'       # Make a replacement
+        >>> b[23:28] = []      # Make a deletion
+        >>> b[30] += 'y'       # Make another replacement
+        >>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes()))
+        [[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],
+         [('equal', 16, 19, 17, 20),
+          ('replace', 19, 20, 20, 21),
+          ('equal', 20, 22, 21, 23),
+          ('delete', 22, 27, 23, 23),
+          ('equal', 27, 30, 23, 26)],
+         [('equal', 31, 34, 27, 30),
+          ('replace', 34, 35, 30, 31),
+          ('equal', 35, 38, 31, 34)]]
+        """
+
+        codes = self.get_opcodes()
+        # Fixup leading and trailing groups if they show no changes.
+        if codes[0][0] == 'equal':
+            tag, i1, i2, j1, j2 = codes[0]
+            codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2
+        if codes[-1][0] == 'equal':
+            tag, i1, i2, j1, j2 = codes[-1]
+            codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n)
+
+        nn = n + n
+        group = []
+        for tag, i1, i2, j1, j2 in codes:
+            # End the current group and start a new one whenever
+            # there is a large range with no changes.
+            if tag == 'equal' and i2-i1 > nn:
+                group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n)))
+                yield group
+                group = []
+                i1, j1 = max(i1, i2-n), max(j1, j2-n)
+            group.append((tag, i1, i2, j1 ,j2))
+        if group and not (len(group)==1 and group[0][0] == 'equal'):
+            yield group
+
    def ratio(self):
        """Return a measure of the sequences' similarity (float in [0,1]).

@@ -1042,6 +1097,150 @@ def IS_CHARACTER_JUNK(ch, ws=" \t"):

 del re

+
+def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
+                 tofiledate='', n=3, lineterm='\n'):
+    r"""
+    Compare two sequences of lines; generate the delta as a unified diff.
+
+    Unified diffs are a compact way of showing line changes and a few
+    lines of context.  The number of context lines is set by 'n' which
+    defaults to three.
+
+    By default, the diff control lines (those with *** or ---) are
+    created with a trailing newline.  This is helpful so that inputs
+    created from file.readlines() result in diffs that are suitable for
+    file.writelines() since both the inputs and outputs have trailing
+    newlines.
+
+    For inputs that do not have trailing newlines, set the lineterm
+    argument to "" so that the output will be uniformly newline free.
+
+    The unidiff format normally has a header for filenames and modification
+    times.  Any or all of these may be specified using strings for
+    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
+    times are normally expressed in the format returned by time.ctime().
+
+    Example:
+
+    >>> for line in unified_diff('one two three four'.split(),
+    ...             'zero one tree four'.split(), 'Original', 'Current',
+    ...             'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
+    ...             lineterm=''):
+    ...     print line
+    --- Original Sat Jan 26 23:30:50 1991
+    +++ Current Fri Jun 06 10:20:52 2003
+    @@ -1,4 +1,4 @@
+    +zero
+     one
+    -two
+    -three
+    +tree
+     four
+    """
+
+    started = False
+    for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+        if not started:
+            yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
+            yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
+            started = True
+        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
+        yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
+        for tag, i1, i2, j1, j2 in group:
+            if tag == 'equal':
+                for line in a[i1:i2]:
+                    yield ' ' + line
+                continue
+            if tag == 'replace' or tag == 'delete':
+                for line in a[i1:i2]:
+                    yield '-' + line
+            if tag == 'replace' or tag == 'insert':
+                for line in b[j1:j2]:
+                    yield '+' + line
+
+# See http://www.unix.org/single_unix_specification/
+def context_diff(a, b, fromfile='', tofile='',
+                 fromfiledate='', tofiledate='', n=3, lineterm='\n'):
+    r"""
+    Compare two sequences of lines; generate the delta as a context diff.
+
+    Context diffs are a compact way of showing line changes and a few
+    lines of context.  The number of context lines is set by 'n' which
+    defaults to three.
+
+    By default, the diff control lines (those with *** or ---) are
+    created with a trailing newline.  This is helpful so that inputs
+    created from file.readlines() result in diffs that are suitable for
+    file.writelines() since both the inputs and outputs have trailing
+    newlines.
+
+    For inputs that do not have trailing newlines, set the lineterm
+    argument to "" so that the output will be uniformly newline free.
+
+    The context diff format normally has a header for filenames and
+    modification times.  Any or all of these may be specified using
+    strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
+    The modification times are normally expressed in the format returned
+    by time.ctime().  If not specified, the strings default to blanks.
+
+    Example:
+
+    >>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
+    ...       'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current',
+    ...       'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')),
+    *** Original Sat Jan 26 23:30:50 1991
+    --- Current Fri Jun 06 10:22:46 2003
+    ***************
+    *** 1,4 ****
+      one
+    ! two
+    ! three
+      four
+    --- 1,4 ----
+    + zero
+      one
+    ! tree
+      four
+    """
+
+    started = False
+    prefixmap = dict(insert='+ ', delete='- ', replace='! ', equal='  ')
+    for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+        if not started:
+            yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm)
+            yield '--- %s %s%s' % (tofile, tofiledate, lineterm)
+            started = True
+        yield '***************%s' % (lineterm,)
+        if group[-1][2] - group[0][1] >= 2:
+            yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm)
+        else:
+            yield '*** %d ****%s' % (group[-1][2], lineterm)
+        empty = True
+        for tag, i1, i2, j1, j2 in group:
+            if tag == 'replace' or tag == 'delete':
+                empty = False
+                break
+        if not empty:
+            for tag, i1, i2, j1, j2 in group:
+                if tag != 'insert':
+                    for line in a[i1:i2]:
+                        yield prefixmap[tag] + line
+        if group[-1][4] - group[0][3] >= 2:
+            yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm)
+        else:
+            yield '--- %d ----%s' % (group[-1][4], lineterm)
+        empty = True
+        for tag, i1, i2, j1, j2 in group:
+            if tag == 'replace' or tag == 'insert':
+                empty = False
+                break
+        if not empty:
+            for tag, i1, i2, j1, j2 in group:
+                if tag != 'delete':
+                    for line in b[j1:j2]:
+                        yield prefixmap[tag] + line
+
 def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
    r"""
    Compare `a` and `b` (lists of strings); return a `Differ`-style delta.