Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decode popen output using the system locale if UTF-8 decoding fails. #380

Merged
merged 11 commits into from
Feb 11, 2021
28 changes: 27 additions & 1 deletion nox/popen.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,37 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import locale
import subprocess
import sys
from typing import IO, Mapping, Sequence, Tuple, Union


def decode_output(output: bytes) -> str:
"""Try to decode the given bytes with encodings from the system.

:param output: output to decode
:raises UnicodeDecodeError: if all encodings fail
:return: decoded string
"""
decoded_output = ""

try:
decoded_output = output.decode("utf-8")
except UnicodeDecodeError:
second_encoding = locale.getpreferredencoding()
if second_encoding.casefold() in ("utf8", "utf-8"):
raise

try:
decoded_output = output.decode(second_encoding)
except UnicodeDecodeError as exc:
exc.encoding = f"[utf-8, {second_encoding}]"
raise

return decoded_output
Cielquan marked this conversation as resolved.
Show resolved Hide resolved


def popen(
args: Sequence[str],
env: Mapping[str, str] = None,
Expand Down Expand Up @@ -45,4 +71,4 @@ def popen(

return_code = proc.wait()

return return_code, out.decode("utf-8") if out else ""
return return_code, decode_output(out) if out else ""
41 changes: 41 additions & 0 deletions tests/test_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from unittest import mock

import nox.command
import nox.popen
import pytest

PYTHON = sys.executable
Expand Down Expand Up @@ -294,3 +295,43 @@ def test_custom_stderr_failed_command(capsys, tmpdir):
tempfile_contents = stderr.read().decode("utf-8")
assert "out" not in tempfile_contents
assert "err" in tempfile_contents


def test_output_decoding() -> None:
result = nox.popen.decode_output(b"abc")

assert result == "abc"


def test_output_decoding_non_ascii() -> None:
result = nox.popen.decode_output("ü".encode("utf-8"))

assert result == "ü"


def test_output_decoding_utf8_only_fail(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(nox.popen.locale, "getpreferredencoding", lambda: "utf8")

with pytest.raises(UnicodeDecodeError) as exc:
nox.popen.decode_output(b"\x95")

assert exc.value.encoding == "utf-8"


def test_output_decoding_utf8_fail_cp1252_success(
monkeypatch: pytest.MonkeyPatch
) -> None:
monkeypatch.setattr(nox.popen.locale, "getpreferredencoding", lambda: "cp1252")

result = nox.popen.decode_output(b"\x95")

assert result == "•" # U+2022


def test_output_decoding_both_fail(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(nox.popen.locale, "getpreferredencoding", lambda: "ascii")

with pytest.raises(UnicodeDecodeError) as exc:
nox.popen.decode_output(b"\x95")

assert exc.value.encoding == "[utf-8, ascii]"