From 35dd929651c1a610a767e6f6249da514a44eb653 Mon Sep 17 00:00:00 2001
From: Todd Gamblin <tgamblin@llnl.gov>
Date: Sun, 23 Dec 2018 23:59:58 -0800
Subject: [PATCH] bugfix: handle unicode properly in spack.util.executable
 (#10186)

- When returning string output, use text_type and decode utf-8 in Python
  2 instead of using `str`

- This properly handles unicode, whereas before we would pass bad strings
  to colify in `spack blame` when reading git output

- add a test that round-trips some unicode through an Executable object
---
 lib/spack/spack/test/util/executable.py | 34 +++++++++++++++++++++++++
 lib/spack/spack/util/executable.py      | 21 +++------------
 2 files changed, 37 insertions(+), 18 deletions(-)
 create mode 100644 lib/spack/spack/test/util/executable.py

diff --git a/lib/spack/spack/test/util/executable.py b/lib/spack/spack/test/util/executable.py
new file mode 100644
index 0000000000..6b4fd2288a
--- /dev/null
+++ b/lib/spack/spack/test/util/executable.py
@@ -0,0 +1,34 @@
+# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import sys
+
+import llnl.util.filesystem as fs
+
+import spack.util.executable as ex
+
+
+def test_read_unicode(tmpdir):
+    script_name = 'print_unicode.py'
+
+    with tmpdir.as_cwd():
+
+        # make a script that prints some unicode
+        with open(script_name, 'w') as f:
+            f.write('''#!{0}
+from __future__ import print_function
+import sys
+if sys.version_info < (3, 0, 0):
+    reload(sys)
+    sys.setdefaultencoding('utf8')
+print(u'\\xc3')
+'''.format(sys.executable))
+
+        # make it executable
+        fs.set_executable(script_name)
+
+        # read the unicode back in and see whether things work
+        script = ex.Executable('./%s' % script_name)
+        assert u'\xc3' == script(output=str).strip()
diff --git a/lib/spack/spack/util/executable.py b/lib/spack/spack/util/executable.py
index e1d7b49016..3b544f9206 100644
--- a/lib/spack/spack/util/executable.py
+++ b/lib/spack/spack/util/executable.py
@@ -6,8 +6,7 @@
 import os
 import re
 import subprocess
-from six import string_types
-import sys
+from six import string_types, text_type
 
 import llnl.util.tty as tty
 
@@ -171,9 +170,9 @@ def streamify(arg, mode):
             if output is str or error is str:
                 result = ''
                 if output is str:
-                    result += to_str(out)
+                    result += text_type(out.decode('utf-8'))
                 if error is str:
-                    result += to_str(err)
+                    result += text_type(err.decode('utf-8'))
 
             rc = self.returncode = proc.returncode
             if fail_on_error and rc != 0 and (rc not in ignore_errors):
@@ -224,20 +223,6 @@ def __str__(self):
         return ' '.join(self.exe)
 
 
-def to_str(content):
-    """Produce a str type from the content of a process stream obtained with
-       Popen.communicate.
-    """
-    # Prior to python3, Popen.communicate returns a str type. For python3 it
-    # returns a bytes type. In the case of python3 we decode the
-    # byte string to produce a str type. This will generate junk if the
-    # encoding is not UTF-8 (which includes ASCII).
-    if sys.version_info < (3, 0, 0):
-        return content
-    else:
-        return content.decode('utf-8')
-
-
 def which(*args, **kwargs):
     """Finds an executable in the path like command-line which.
 
-- 
GitLab