From 1cea47186a6acf14ee9cdd3a96aee057a76c47ca Mon Sep 17 00:00:00 2001 From: Christian Riedel Date: Tue, 2 Feb 2021 21:52:48 +0100 Subject: [PATCH 1/9] add decode_output func to decode popen output fixes #309 by trying to decode popen output with utf8 first and on error tries other encodings provided by the systems preferences. --- nox/popen.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/nox/popen.py b/nox/popen.py index 48fc9726..865bbd94 100644 --- a/nox/popen.py +++ b/nox/popen.py @@ -12,11 +12,42 @@ # See the License for the specific language governing permissions and # limitations under the License. +import locale import subprocess import sys from typing import IO, Mapping, Sequence, Tuple, Union +def decode_output(output: bytes) -> str: + """Try to decode the given bytes with encodings from the system. + + :param output: output to decode + :raises UnicodeDecodeError: if all encodings fail + :return: decoded string + """ + decoded_output = "" + encodings = { + "utf8", + sys.stdout.encoding or "utf8", + sys.getdefaultencoding() or "utf8", + locale.getpreferredencoding() or "utf8", + } + + for idx, encoding in enumerate(encodings): + try: + print(encoding) + decoded_output = output.decode(encoding) + except UnicodeDecodeError as exc: + if idx + 1 < len(encodings): + continue + exc.encoding = str(encodings).replace("'", "") + raise + else: + break + + return decoded_output + + def popen( args: Sequence[str], env: Mapping[str, str] = None, @@ -45,4 +76,4 @@ def popen( return_code = proc.wait() - return return_code, out.decode("utf-8") if out else "" + return return_code, decode_output(out) if out else "" From 62f3272828e7e6c0feb8059db8eed16d7c2906f9 Mon Sep 17 00:00:00 2001 From: Christian Riedel Date: Wed, 3 Feb 2021 10:00:55 +0100 Subject: [PATCH 2/9] simplified decode_output function --- nox/popen.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/nox/popen.py b/nox/popen.py index 865bbd94..a583d8c7 100644 --- a/nox/popen.py +++ b/nox/popen.py @@ -26,24 +26,20 @@ def decode_output(output: bytes) -> str: :return: decoded string """ decoded_output = "" - encodings = { - "utf8", - sys.stdout.encoding or "utf8", - sys.getdefaultencoding() or "utf8", - locale.getpreferredencoding() or "utf8", - } - for idx, encoding in enumerate(encodings): + try: + decoded_output = output.decode("utf8") + except UnicodeDecodeError: + nd_enc = locale.getpreferredencoding() + if nd_enc.casefold() in ("utf8", "utf-8"): + raise + try: - print(encoding) - decoded_output = output.decode(encoding) + decoded_output = output.decode("utf8") except UnicodeDecodeError as exc: - if idx + 1 < len(encodings): - continue - exc.encoding = str(encodings).replace("'", "") + exc.encoding = f"[utf-8, {nd_enc}]" raise - else: - break + return decoded_output From 2dfed804883ee28e85a04a838d686a55f284452b Mon Sep 17 00:00:00 2001 From: Christian Riedel Date: Wed, 3 Feb 2021 10:15:32 +0100 Subject: [PATCH 3/9] fix linting issues --- nox/popen.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nox/popen.py b/nox/popen.py index a583d8c7..6e252d3f 100644 --- a/nox/popen.py +++ b/nox/popen.py @@ -39,7 +39,6 @@ def decode_output(output: bytes) -> str: except UnicodeDecodeError as exc: exc.encoding = f"[utf-8, {nd_enc}]" raise - return decoded_output From 4f87b9317c853524aa50e9a4451b10db2f3091d2 Mon Sep 17 00:00:00 2001 From: Christian Riedel Date: Wed, 3 Feb 2021 10:23:54 +0100 Subject: [PATCH 4/9] fix double hard coded utf8 --- nox/popen.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nox/popen.py b/nox/popen.py index 6e252d3f..0bb46442 100644 --- a/nox/popen.py +++ b/nox/popen.py @@ -30,14 +30,14 @@ def decode_output(output: bytes) -> str: try: decoded_output = output.decode("utf8") except UnicodeDecodeError: - nd_enc = locale.getpreferredencoding() - if nd_enc.casefold() in ("utf8", "utf-8"): + second_encoding = locale.getpreferredencoding() + if second_encoding.casefold() in ("utf8", "utf-8"): raise try: - decoded_output = output.decode("utf8") + decoded_output = output.decode(second_encoding) except UnicodeDecodeError as exc: - exc.encoding = f"[utf-8, {nd_enc}]" + exc.encoding = f"[utf-8, {second_encoding}]" raise return decoded_output From 65aad218ec534c7b95ff53539263210b950c066c Mon Sep 17 00:00:00 2001 From: Christian Riedel Date: Wed, 3 Feb 2021 11:14:19 +0100 Subject: [PATCH 5/9] change utf8 to utf-8 --- nox/popen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nox/popen.py b/nox/popen.py index 0bb46442..8abf9e69 100644 --- a/nox/popen.py +++ b/nox/popen.py @@ -28,7 +28,7 @@ def decode_output(output: bytes) -> str: decoded_output = "" try: - decoded_output = output.decode("utf8") + decoded_output = output.decode("utf-8") except UnicodeDecodeError: second_encoding = locale.getpreferredencoding() if second_encoding.casefold() in ("utf8", "utf-8"): From b89bade3e3f7e2cce438cd5bc0c3fd2b66afd5be Mon Sep 17 00:00:00 2001 From: Christian Riedel Date: Wed, 3 Feb 2021 11:22:58 +0100 Subject: [PATCH 6/9] add tests for decode_output --- tests/test_command.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/test_command.py b/tests/test_command.py index 45919ddb..3dcb66f3 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -18,6 +18,7 @@ from unittest import mock import nox.command +import nox.popen import pytest PYTHON = sys.executable @@ -294,3 +295,41 @@ def test_custom_stderr_failed_command(capsys, tmpdir): tempfile_contents = stderr.read().decode("utf-8") assert "out" not in tempfile_contents assert "err" in tempfile_contents + + +def test_output_decoding() -> None: + result = nox.popen.decode_output(b"abc") + + assert result == "abc" + + +def test_output_decoding_non_ascii() -> None: + result = nox.popen.decode_output("ü".encode("utf-8")) + + assert result == "ü" + + +def test_output_decoding_utf8_only_fail(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(nox.popen.locale, "getpreferredencoding", lambda: "utf8") + + with pytest.raises(UnicodeDecodeError) as exc: + nox.popen.decode_output(b"\x95") + + assert exc.value.encoding == "utf-8" + + +def test_output_decoding_utf8_fail_cp1252_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(nox.popen.locale, "getpreferredencoding", lambda: "cp1252") + + result = nox.popen.decode_output(b"\x95") + + assert result == "•" # U+2022 + + +def test_output_decoding_both_fail(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(nox.popen.locale, "getpreferredencoding", lambda: "ascii") + + with pytest.raises(UnicodeDecodeError) as exc: + nox.popen.decode_output(b"\x95") + + assert exc.value.encoding == "[utf-8, ascii]" From 1502b09f91882287ef43098a6ad62b5f18175978 Mon Sep 17 00:00:00 2001 From: Christian Riedel Date: Wed, 3 Feb 2021 12:02:03 +0100 Subject: [PATCH 7/9] fix linting issues --- tests/test_command.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test_command.py b/tests/test_command.py index 3dcb66f3..3c519b62 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -314,15 +314,17 @@ def test_output_decoding_utf8_only_fail(monkeypatch: pytest.MonkeyPatch) -> None with pytest.raises(UnicodeDecodeError) as exc: nox.popen.decode_output(b"\x95") - + assert exc.value.encoding == "utf-8" -def test_output_decoding_utf8_fail_cp1252_success(monkeypatch: pytest.MonkeyPatch) -> None: +def test_output_decoding_utf8_fail_cp1252_success( + monkeypatch: pytest.MonkeyPatch +) -> None: monkeypatch.setattr(nox.popen.locale, "getpreferredencoding", lambda: "cp1252") result = nox.popen.decode_output(b"\x95") - + assert result == "•" # U+2022 @@ -330,6 +332,6 @@ def test_output_decoding_both_fail(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(nox.popen.locale, "getpreferredencoding", lambda: "ascii") with pytest.raises(UnicodeDecodeError) as exc: - nox.popen.decode_output(b"\x95") - + nox.popen.decode_output(b"\x95") + assert exc.value.encoding == "[utf-8, ascii]" From 7d7ffcf13919341445953ab0ca8ffe67d4f9bb8f Mon Sep 17 00:00:00 2001 From: Christian Riedel Date: Thu, 11 Feb 2021 12:28:14 +0100 Subject: [PATCH 8/9] simplify nested try...except block and fix nested exception msg Co-authored-by: Claudio Jolowicz --- nox/popen.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/nox/popen.py b/nox/popen.py index 8abf9e69..c152f7d0 100644 --- a/nox/popen.py +++ b/nox/popen.py @@ -25,22 +25,14 @@ def decode_output(output: bytes) -> str: :raises UnicodeDecodeError: if all encodings fail :return: decoded string """ - decoded_output = "" - try: - decoded_output = output.decode("utf-8") + return output.decode("utf-8") except UnicodeDecodeError: second_encoding = locale.getpreferredencoding() if second_encoding.casefold() in ("utf8", "utf-8"): raise - try: - decoded_output = output.decode(second_encoding) - except UnicodeDecodeError as exc: - exc.encoding = f"[utf-8, {second_encoding}]" - raise - - return decoded_output + return output.decode(second_encoding) def popen( From 2cc976d63e8130b2664a6b9c7fb011cf206b724b Mon Sep 17 00:00:00 2001 From: Christian Riedel Date: Thu, 11 Feb 2021 14:04:40 +0100 Subject: [PATCH 9/9] fix tests for simplified deode_output --- tests/test_command.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_command.py b/tests/test_command.py index 3c519b62..01fb06e4 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -334,4 +334,4 @@ def test_output_decoding_both_fail(monkeypatch: pytest.MonkeyPatch) -> None: with pytest.raises(UnicodeDecodeError) as exc: nox.popen.decode_output(b"\x95") - assert exc.value.encoding == "[utf-8, ascii]" + assert exc.value.encoding == "ascii"