From c8392bb99d86ca799d057c9fcc91fa53d880af23 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Mon, 16 Jan 2017 20:39:20 +0100 Subject: initial utf8 support Derive a UTF8Terminal class from Terminal that handles utf8 byte sequences. It only works on Python 3.x, because feed_utf8 relies on addch handling unicode str objects. The whole utf8 support is also conditional to Python 3.x, because the locale module was not safe to use in Python 2.x. A terminal is considered utf8 if the LC_* or LANG variables indicate that. This branch still has known crashes. Writing non-ascii letter at the end of a line or letting it scroll from one column to another causes crashes. More problems may hide elsewhere. --- tcvt.py | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/tcvt.py b/tcvt.py index de62116..64c2ff5 100755 --- a/tcvt.py +++ b/tcvt.py @@ -29,6 +29,7 @@ # those of the authors and should not be interpreted as representing official # policies, either expressed or implied, of Helmut Grohne. +import locale import pty import sys import os @@ -299,9 +300,11 @@ def compose_dicts(dct1, dct2): pass return result -simple_characters = bytearray( +simple_low_characters = bytearray( b'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + - b'0123456789@:~$ .#!/_(),[]=-+*\'"|<>%&\\?;`^{}' + + b'0123456789@:~$ .#!/_(),[]=-+*\'"|<>%&\\?;`^{}') + +simple_characters = simple_low_characters + bytearray( b'\xb4\xb6\xb7\xc3\xc4\xd6\xdc\xe4\xe9\xfc\xf6') class Terminal: @@ -691,6 +694,32 @@ class Terminal: else: raise ValueError("feed esc ] %r %r" % (prev, char)) +class UTF8Terminal(Terminal): + def feed_simple(self, char): + func = self.feed_simple_table.get(char) + if func: + func(self) + elif char & 0b11000000 == 0b11000000: + self.mode = (self.feed_utf8, bytearray((char,))) + elif char in simple_low_characters: + self.addch(char) + elif char == 0x1b: + self.mode = (self.feed_esc,) + else: + raise ValueError("feed %r" % char) + + def feed_utf8(self, char, s): + if char & 0b11000000 != 0b10000000: + raise ValueError("invalid utf8 sequence") + s += bytearray((char,)) + l = 8 - ((s[0] | 0b11) ^ 0b11111100).bit_length() + if len(s) >= l: + utf8char = s.decode("utf8") + assert len(utf8char) == 1 + self.addch(utf8char) + self.feed_reset() + else: + self.mode = (self.feed_utf8, s) symbolic_keymapping = { ord(b"\n"): "cr", @@ -811,10 +840,19 @@ def main(): def screenfactory(realscreen): return Columns(realscreen, options.columns, reverse=options.reverse) + # 1. Do not use locale.getpreferredencoding on Python 2.x, because + # http://bugs.python.org/issue11022. + # 2. Do not use an UTF8Terminal on Python 2.x, because + # http://bugs.python.org/issue18118. + if sys.version_info.major >= 3 and locale.getpreferredencoding() == 'UTF-8': + t = UTF8Terminal(acsc, screenfactory) + else: + t = Terminal(acsc, screenfactory) + process = ForkPty(args or [os.environ["SHELL"]], dict(TERM="ansi")) try: with process as masterfd: - with Terminal(acsc, screenfactory) as t: + with t: t.resizepty(masterfd) process.start() while True: -- cgit v1.2.3 From aba64222f1ec884d97f67fb6ae43f421d525ca65 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Fri, 3 Nov 2017 21:22:38 +0100 Subject: fix writing of utf8 chars in the last column Using insch is nice, because it does not update the cursor position, but it does not work with utf8 chars and outputs garbage instead. The problem can be avoided by using addch, but it creates a new problem: addch moves the cursor. For all but the very last character on the screen that's fine, but adding the last character on the screen means scrolling. Thus we follow the advice from http://stackoverflow.com/a/41923640/1626632 and disable scrolling and catching the exception. --- tcvt.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tcvt.py b/tcvt.py index 64c2ff5..e97da3c 100755 --- a/tcvt.py +++ b/tcvt.py @@ -168,11 +168,20 @@ class Columns: def addch(self, char): if self.xpos == self.columnwidth - 1: - self.curwin.insch(self.curypos, self.curxpos, char, self.attrs) if self.ypos + 1 == self.numcolumns * self.height: + self.curwin.scrollok(0) # disable scrolling for the addch call + try: + self.curwin.addch(self.curypos, self.curxpos, char, + self.attrs) + except curses.error: + # It errors out, but still draws the character. + # http://stackoverflow.com/a/41923640/1626632 + pass + self.curwin.scrollok(1) self.scroll() self.move(self.ypos, 0) else: + self.curwin.addch(self.curypos, self.curxpos, char, self.attrs) self.move(self.ypos + 1, 0) else: self.curwin.addch(self.curypos, self.curxpos, char, self.attrs) -- cgit v1.2.3