Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 1 addition & 28 deletions Lib/_pyrepl/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations
import builtins
import functools
import keyword
import re
import token as T
Expand All @@ -11,12 +10,12 @@
from collections import deque
from io import StringIO
from tokenize import TokenInfo as TI
from traceback import _str_width as str_width, _wlen as wlen
from typing import Iterable, Iterator, Match, NamedTuple, Self

from .types import CharBuffer, CharWidths
from .trace import trace

ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
IDENTIFIERS_AFTER = {"def", "class"}
Expand Down Expand Up @@ -59,32 +58,6 @@ class ColorSpan(NamedTuple):
tag: str


@functools.cache
def str_width(c: str) -> int:
if ord(c) < 128:
return 1
# gh-139246 for zero-width joiner and combining characters
if unicodedata.combining(c):
return 0
category = unicodedata.category(c)
if category == "Cf" and c != "\u00ad":
return 0
w = unicodedata.east_asian_width(c)
if w in ("N", "Na", "H", "A"):
return 1
return 2


def wlen(s: str) -> int:
if len(s) == 1 and s != "\x1a":
return str_width(s)
length = sum(str_width(i) for i in s)
# remove lengths of any escape sequences
sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
ctrl_z_cnt = s.count("\x1a")
return length - sum(len(i) for i in sequence) + ctrl_z_cnt


def unbracket(s: str, including_content: bool = False) -> str:
r"""Return `s` with \001 and \002 characters removed.

Expand Down
3 changes: 2 additions & 1 deletion Lib/test/test_pyrepl/support.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from code import InteractiveConsole
from functools import partial
from traceback import ANSI_ESCAPE_SEQUENCE
from typing import Iterable
from unittest.mock import MagicMock

from _pyrepl.console import Console, Event
from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig
from _pyrepl.simple_interact import _strip_final_indent
from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE
from _pyrepl.utils import unbracket


class ScreenEqualMixin:
Expand Down
45 changes: 1 addition & 44 deletions Lib/test/test_pyrepl/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,9 @@
from unittest import TestCase

from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors
from _pyrepl.utils import prev_next_window, gen_colors


class TestUtils(TestCase):
def test_str_width(self):
characters = [
'a',
'1',
'_',
'!',
'\x1a',
'\u263A',
'\uffb9',
'\N{LATIN SMALL LETTER E WITH ACUTE}', # é
'\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
'\u00ad',
]
for c in characters:
self.assertEqual(str_width(c), 1)

zero_width_characters = [
'\N{COMBINING ACUTE ACCENT}',
'\N{ZERO WIDTH JOINER}',
]
for c in zero_width_characters:
with self.subTest(character=c):
self.assertEqual(str_width(c), 0)

characters = [chr(99989), chr(99999)]
for c in characters:
self.assertEqual(str_width(c), 2)

def test_wlen(self):
for c in ['a', 'b', '1', '!', '_']:
self.assertEqual(wlen(c), 1)
self.assertEqual(wlen('\x1a'), 2)

char_east_asian_width_N = chr(3800)
self.assertEqual(wlen(char_east_asian_width_N), 1)
char_east_asian_width_W = chr(4352)
self.assertEqual(wlen(char_east_asian_width_W), 2)

self.assertEqual(wlen('hello'), 5)
self.assertEqual(wlen('hello' + '\x1a'), 7)
self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)

def test_prev_next_window(self):
def gen_normal():
yield 1
Expand Down
71 changes: 71 additions & 0 deletions Lib/test/test_traceback.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import traceback
from functools import partial
from pathlib import Path
from traceback import _str_width, _wlen
import _colorize

MODULE_PREFIX = f'{__name__}.' if __name__ == '__main__' else ''
Expand Down Expand Up @@ -1787,6 +1788,50 @@ def f():
]
self.assertEqual(result_lines, expected)

def test_str_width(self):
characters = [
'a',
'1',
'_',
'!',
'\x1a',
'\u263A',
'\uffb9',
'\N{LATIN SMALL LETTER E WITH ACUTE}', # é
'\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
'\u00ad',
]
for c in characters:
self.assertEqual(_str_width(c), 1)

zero_width_characters = [
'\N{COMBINING ACUTE ACCENT}',
'\N{ZERO WIDTH JOINER}',
]
for c in zero_width_characters:
with self.subTest(character=c):
self.assertEqual(_str_width(c), 0)

characters = [chr(99989), chr(99999)]
for c in characters:
self.assertEqual(_str_width(c), 2)

def test_wlen(self):
for c in ['a', 'b', '1', '!', '_']:
self.assertEqual(_wlen(c), 1)
self.assertEqual(_wlen('\x1a'), 2)

char_east_asian_width_N = chr(3800)
self.assertEqual(_wlen(char_east_asian_width_N), 1)
char_east_asian_width_W = chr(4352)
self.assertEqual(_wlen(char_east_asian_width_W), 2)

self.assertEqual(_wlen('hello'), 5)
self.assertEqual(_wlen('hello' + '\x1a'), 7)
self.assertEqual(_wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
self.assertEqual(_wlen('a\N{ZERO WIDTH JOINER}b'), 2)


class TestKeywordTypoSuggestions(unittest.TestCase):
TYPO_CASES = [
("with block ad something:\n pass", "and"),
Expand Down Expand Up @@ -5321,6 +5366,32 @@ def expected(t, m, fn, l, f, E, e, z):
]
self.assertEqual(actual, expected(**colors))

def test_colorized_traceback_unicode(self):
try:
啊哈=1; 啊哈/0####
except Exception as e:
exc = traceback.TracebackException.from_exception(e)

actual = "".join(exc.format(colorize=True)).splitlines()
def expected(t, m, fn, l, f, E, e, z):
return [
f" 啊哈=1; {e}啊哈{z}{E}/{z}{e}0{z}####",
f" {e}~~~~{z}{E}^{z}{e}~{z}",
]
self.assertEqual(actual[2:4], expected(**colors))

try:
ééééé/0
except Exception as e:
exc = traceback.TracebackException.from_exception(e)

actual = "".join(exc.format(colorize=True)).splitlines()
def expected(t, m, fn, l, f, E, e, z):
return [
f" {E}ééééé{z}/0",
f" {E}^^^^^{z}",
]
self.assertEqual(actual[2:4], expected(**colors))

class TestLazyImportSuggestions(unittest.TestCase):
"""Test that lazy imports are not reified when computing AttributeError suggestions."""
Expand Down
57 changes: 46 additions & 11 deletions Lib/traceback.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Extract, format and print information about Python stack traces."""

import collections.abc
import functools
import itertools
import linecache
import re
import sys
import textwrap
import types
Expand Down Expand Up @@ -681,12 +683,12 @@ def output_line(lineno):
colorized_line_parts = []
colorized_carets_parts = []

for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]):
for color, group in itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]):
caret_group = list(group)
if color == "^":
if "^" in color:
colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in caret_group) + theme.reset)
colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, caret in caret_group) + theme.reset)
elif color == "~":
elif "~" in color:
colorized_line_parts.append(theme.error_range + "".join(char for char, _ in caret_group) + theme.reset)
colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in caret_group) + theme.reset)
else:
Expand Down Expand Up @@ -968,7 +970,46 @@ def setup_positions(expr, force_valid=True):

return None

_WIDE_CHAR_SPECIFIERS = "WF"

def _zip_display_width(line, carets):
import unicodedata
carets = iter(carets)
for char in unicodedata.iter_graphemes(line):
char = str(char)
char_width = _display_width(char)
yield char, "".join(itertools.islice(carets, char_width))


@functools.cache
def _str_width(c: str) -> int:
import unicodedata
if ord(c) < 128:
return 1
# gh-139246 for zero-width joiner and combining characters
if unicodedata.combining(c):
return 0
category = unicodedata.category(c)
if category == "Cf" and c != "\u00ad":
return 0
w = unicodedata.east_asian_width(c)
if w in ("N", "Na", "H", "A"):
return 1
return 2


ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")


def _wlen(s: str) -> int:
if len(s) == 1 and s != "\x1a":
return _str_width(s)
length = sum(_str_width(i) for i in s)
# remove lengths of any escape sequences
sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
ctrl_z_cnt = s.count("\x1a")
return length - sum(len(i) for i in sequence) + ctrl_z_cnt



def _display_width(line, offset=None):
"""Calculate the extra amount of width space the given source
Expand All @@ -982,13 +1023,7 @@ def _display_width(line, offset=None):
if line.isascii():
return offset

import unicodedata

return sum(
2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
for char in line[:offset]
)

return _wlen(line[:offset])


class _ExceptionPrintContext:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix traceback color output with unicode characters
Loading