From b900cea6cf6054b7f5a5d7f1c68a2d1f2b76ef80 Mon Sep 17 00:00:00 2001
From: LeoHsiao <leohsiao@foxmail.com>
Date: Mon, 24 Aug 2020 22:25:35 +0800
Subject: [PATCH] Rewrite diff() to simulate the output of GNU diff

---
 tests/bash_tests/testcases.py |  9 ++---
 tests/bash_tests/utils.py     | 64 +++++++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/tests/bash_tests/testcases.py b/tests/bash_tests/testcases.py
index 0a3315b7..1923904b 100644
--- a/tests/bash_tests/testcases.py
+++ b/tests/bash_tests/testcases.py
@@ -278,11 +278,6 @@ set Exif.Photo.DateTimeDigitized 2020:05:26 07:31:42'''
 
 
     def test_exiv2(self):
-        if BT.Conf.system_name in ['SunOS', 'FreeBSD', 'NetBSD']:
-            diffargs = '-w'
-        else:
-            diffargs = '-w --text'
-
         # Add each image to the following three lists.
         # The image basename in the second and third lists
         # is the Exif timestamp adjusted by -12:01:01.
@@ -388,7 +383,7 @@ set Exif.Photo.DateTimeDigitized 2020:05:26 07:31:42'''
         out += ''
 
         out += 'Compare image data and extracted data ------------------------------------'
-        out += BT.excute('diff {diffargs} iii jjj', vars(), expected_returncodes=[1])
+        out += BT.diff('iii', 'jjj')
         out += ''
 
         out += 'Delete Thumbnail ---------------------------------------------------------'
@@ -409,7 +404,7 @@ set Exif.Photo.DateTimeDigitized 2020:05:26 07:31:42'''
         out += ''
 
         out += 'Compare original and inserted image data ---------------------------------'
-        out += BT.excute('diff {diffargs} iii kkk', vars(), expected_returncodes=[1])
+        out += BT.diff('iii', 'kkk')
 
         BT.reportTest('exiv2-test', out)
 
diff --git a/tests/bash_tests/utils.py b/tests/bash_tests/utils.py
index f1b6a820..fd5c7a46 100644
--- a/tests/bash_tests/utils.py
+++ b/tests/bash_tests/utils.py
@@ -1,3 +1,4 @@
+import difflib
 import hashlib
 import multiprocessing
 import os
@@ -141,9 +142,56 @@ def save(content: (bytes, str, tuple, list), filename, encoding=None):
         raise ValueError('Expect content of type (bytes, str, tuple, list), but get {}'.format(type(content).__name__))
 
 
-def diff(file1, file2):
-    list1            = cat(file1).split('\n')
-    list2            = cat(file2).split('\n')
+def diff(file1, file2, encoding=None):
+    """
+    Simulates the output of GNU diff.
+    You can use `diff(f1, f2)` to simulate `diff -w f1 f2`
+    """
+    encoding     = encoding or Conf.encoding
+    texts        = []
+    for f in [file1, file2]:
+        text     = cat(f, encoding=encoding)
+        # Ignore whitespace characters
+        for i in '\t\r\v\f':
+            text = text.replace(i, ' ')
+        texts   += [text.split('\n')]
+    text1, text2 = texts
+
+    output       = []
+    new_part     = True
+    i            = 0
+    for line in difflib.unified_diff(text1, text2, fromfile=file1, tofile=file2, lineterm=''):
+        i       += 1
+        if i     < 3:
+            continue
+
+        flag             = line[0]
+        if flag         == '-':   # line unique to sequence 1
+            new_flag     = '< '
+        elif flag       == '+':   # line unique to sequence 2
+            new_flag     = '> '
+            if new_part:
+                new_part = False
+                output  += ['---']
+        elif flag       == ' ':   # line common to both sequences
+            new_flag     = ' '
+        elif flag       == '?':   # line not present in either input sequence
+            new_flag     = '? '
+        elif flag       == '@':
+            output      += [re.sub(r'@@ -([^ ]+) \+([^ ]+) @@', r'\1c\2', line)]
+            new_part     = True
+            continue
+        else:
+            new_flag     = flag
+        output          += [new_flag + line[1:]]
+
+    return '\n'.join(output)
+
+
+def simply_diff(file1, file2, encoding=None):
+    encoding         = encoding or Conf.encoding
+    list1            = cat(file1, encoding=encoding).split('\n')
+    list2            = cat(file2, encoding=encoding).split('\n')
     if list1        == list2:
         return
     report           = []
@@ -201,8 +249,9 @@ def excute(cmd: str, vars_dict=dict(),
     output          = [i or b'' for i in output]
     output          = [i.rstrip(b'\n') for i in output]
 
+    encoding        = encoding or Conf.encoding
     if not return_in_bytes:
-        output      = [i.decode(encoding or Conf.encoding) for i in output]
+        output      = [i.decode(encoding) for i in output]
         output      = [i.replace('\r\n', '\n') for i in output]   # fix dos line-endings
         output      = [i.replace('\\', r'/') for i in output]     # fix dos path separators
     if expected_returncodes and p.returncode not in expected_returncodes:
@@ -219,15 +268,16 @@ def excute(cmd: str, vars_dict=dict(),
 def reportTest(testname, output: str, encoding=None):
     """ If the output of the test case is correct, this function returns None. Otherwise print its error. """
     output               = str(output) + '\n'
+    encoding             = encoding or Conf.encoding
     reference_file       = os.path.join(Conf.data_dir, '{}.out'.format(testname))
-    reference_output     = cat(reference_file, encoding=encoding or Conf.encoding)
+    reference_output     = cat(reference_file, encoding=encoding)
     if reference_output == output:
         return
     log.error('The output of the testcase mismatch the reference')
     output_file = os.path.join(Conf.tmp_dir, '{}.out'.format(testname))
-    save(output, output_file, encoding=encoding or Conf.encoding)
+    save(output, output_file, encoding=encoding)
     log.info('The output has been saved to file {}'.format(output_file))
-    log.info('diff:\n' + str(diff(reference_file, output_file)))
+    log.info('diff:\n' + str(simply_diff(reference_file, output_file, encoding=encoding)))
     raise RuntimeError('\n' + log.to_str())