Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions commitizen/out.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,34 @@

from termcolor import colored

if sys.platform == "win32":
if isinstance(sys.stdout, io.TextIOWrapper) and sys.version_info >= (3, 7):
sys.stdout.reconfigure(encoding="utf-8")

def _ensure_utf8_stdout(stream: object) -> None:
"""Reconfigure ``stream`` to UTF-8 if its current encoding can't represent
the unicode characters commitizen emits (e.g. ``\U0001f680`` πŸš€, the
``\u2019`` typographic apostrophe).

Without this, ``print`` raises ``UnicodeEncodeError`` mid-output on:

* Windows ``cmd.exe`` defaulting to ``cp1252`` (the historical case),
* Linux/macOS terminals with a non-UTF-8 ``LANG`` such as
``de_CH.ISO8859-1`` (#956).

``errors="replace"`` is used as a safety net for terminals that
genuinely can't render the bytes, so commitizen falls back to a
placeholder character instead of crashing.
Comment thread
bearomorphism marked this conversation as resolved.
Outdated
"""
if not isinstance(stream, io.TextIOWrapper):
return
encoding = (stream.encoding or "").lower().replace("-", "").replace("_", "")
if encoding == "utf8":
return
try:
stream.reconfigure(encoding="utf-8", errors="replace")
except (AttributeError, ValueError): # pragma: no cover - safety net
pass


_ensure_utf8_stdout(sys.stdout)


def write(value: object, *args: object) -> None:
Expand Down
Binary file modified docs/images/cli_interactive/bump.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/images/cli_interactive/commit.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/images/cli_interactive/init.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/images/cli_interactive/shortcut_custom.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/images/cli_interactive/shortcut_default.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
85 changes: 85 additions & 0 deletions tests/test_out.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""Tests for ``commitizen.out``.

Mostly focused on the stdout-encoding helper introduced for #956: the
function must reconfigure non-UTF-8 streams to UTF-8 with a permissive
``errors="replace"`` strategy so commitizen output (emoji, typographic
quotes) doesn't crash with ``UnicodeEncodeError`` on terminals using
locale-dependent encodings such as ``cp1252`` (Windows) or
``ISO8859-1`` (Linux/macOS).
"""

from __future__ import annotations

import io
from typing import Any

from commitizen.out import _ensure_utf8_stdout


class _StubStream(io.TextIOWrapper):
"""Light-weight ``TextIOWrapper`` that records calls to ``reconfigure``.

Subclassing ``TextIOWrapper`` keeps the ``isinstance`` check in
``_ensure_utf8_stdout`` happy without monkey-patching ``sys.stdout``.
"""

reconfigure_calls: list[dict[str, Any]]

def __init__(self, encoding: str) -> None:
super().__init__(io.BytesIO(), encoding=encoding)
self.reconfigure_calls = []

def reconfigure(self, **kwargs: Any) -> None:
self.reconfigure_calls.append(kwargs)
super().reconfigure(**kwargs)


def test_ensure_utf8_stdout_noop_when_already_utf8():
stream = _StubStream(encoding="utf-8")
_ensure_utf8_stdout(stream)
assert stream.reconfigure_calls == []


def test_ensure_utf8_stdout_noop_for_dashless_utf8_alias():
stream = _StubStream(encoding="UTF8")
_ensure_utf8_stdout(stream)
assert stream.reconfigure_calls == []


def test_ensure_utf8_stdout_reconfigures_iso8859_1_terminal():
"""Regression test for #956 (Linux/macOS ``LANG=de_CH.ISO8859-1``)."""
stream = _StubStream(encoding="latin-1")
_ensure_utf8_stdout(stream)
assert stream.reconfigure_calls == [{"encoding": "utf-8", "errors": "replace"}]


def test_ensure_utf8_stdout_reconfigures_windows_cp1252():
"""Regression test for the historical Windows ``cmd.exe`` case."""
stream = _StubStream(encoding="cp1252")
_ensure_utf8_stdout(stream)
assert stream.reconfigure_calls == [{"encoding": "utf-8", "errors": "replace"}]


def test_ensure_utf8_stdout_skips_non_textio_streams():
class NotATextIO:
encoding = "latin-1"
reconfigure_calls: list[dict[str, Any]] = []

def reconfigure(self, **kwargs: Any) -> None: # pragma: no cover - unused
self.reconfigure_calls.append(kwargs)

stream = NotATextIO()
_ensure_utf8_stdout(stream)
assert stream.reconfigure_calls == []


def test_ensure_utf8_stdout_after_reconfigure_can_emit_emoji():
"""End-to-end: after reconfiguration, writing an emoji must not raise."""
stream = _StubStream(encoding="latin-1")
_ensure_utf8_stdout(stream)

# Should not raise UnicodeEncodeError; ``errors="replace"`` lets
# genuinely-unrenderable bytes fall through as ``?`` instead of
# crashing the whole command.
Comment thread
bearomorphism marked this conversation as resolved.
Outdated
stream.write("Configuration complete \U0001f680")
stream.flush()
Loading