diff options
author | Helmut Grohne <helmut@subdivi.de> | 2017-01-16 20:39:20 +0100 |
---|---|---|
committer | Helmut Grohne <helmut@subdivi.de> | 2017-01-16 20:39:20 +0100 |
commit | c8392bb99d86ca799d057c9fcc91fa53d880af23 (patch) | |
tree | 1bb834a4d88f3857ed1da54f14feddd6e8964fb1 | |
parent | f0109cd5051e4b32e038d0dc156e5a8e4fd08caf (diff) | |
download | tcvt-c8392bb99d86ca799d057c9fcc91fa53d880af23.tar.gz |
initial utf8 support
Derive a UTF8Terminal class from Terminal that handles utf8 byte
sequences. It only works on Python 3.x, because feed_utf8 relies on
addch handling unicode str objects. The whole utf8 support is also
conditional to Python 3.x, because the locale module was not safe to use
in Python 2.x.
A terminal is considered utf8 if the LC_* or LANG variables indicate
that.
This branch still has known crashes. Writing non-ascii letter at the end
of a line or letting it scroll from one column to another causes
crashes. More problems may hide elsewhere.
-rwxr-xr-x | tcvt.py | 44 |
1 files changed, 41 insertions, 3 deletions
@@ -29,6 +29,7 @@ # those of the authors and should not be interpreted as representing official # policies, either expressed or implied, of Helmut Grohne. +import locale import pty import sys import os @@ -299,9 +300,11 @@ def compose_dicts(dct1, dct2): pass return result -simple_characters = bytearray( +simple_low_characters = bytearray( b'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + - b'0123456789@:~$ .#!/_(),[]=-+*\'"|<>%&\\?;`^{}' + + b'0123456789@:~$ .#!/_(),[]=-+*\'"|<>%&\\?;`^{}') + +simple_characters = simple_low_characters + bytearray( b'\xb4\xb6\xb7\xc3\xc4\xd6\xdc\xe4\xe9\xfc\xf6') class Terminal: @@ -691,6 +694,32 @@ class Terminal: else: raise ValueError("feed esc ] %r %r" % (prev, char)) +class UTF8Terminal(Terminal): + def feed_simple(self, char): + func = self.feed_simple_table.get(char) + if func: + func(self) + elif char & 0b11000000 == 0b11000000: + self.mode = (self.feed_utf8, bytearray((char,))) + elif char in simple_low_characters: + self.addch(char) + elif char == 0x1b: + self.mode = (self.feed_esc,) + else: + raise ValueError("feed %r" % char) + + def feed_utf8(self, char, s): + if char & 0b11000000 != 0b10000000: + raise ValueError("invalid utf8 sequence") + s += bytearray((char,)) + l = 8 - ((s[0] | 0b11) ^ 0b11111100).bit_length() + if len(s) >= l: + utf8char = s.decode("utf8") + assert len(utf8char) == 1 + self.addch(utf8char) + self.feed_reset() + else: + self.mode = (self.feed_utf8, s) symbolic_keymapping = { ord(b"\n"): "cr", @@ -811,10 +840,19 @@ def main(): def screenfactory(realscreen): return Columns(realscreen, options.columns, reverse=options.reverse) + # 1. Do not use locale.getpreferredencoding on Python 2.x, because + # http://bugs.python.org/issue11022. + # 2. Do not use an UTF8Terminal on Python 2.x, because + # http://bugs.python.org/issue18118. + if sys.version_info.major >= 3 and locale.getpreferredencoding() == 'UTF-8': + t = UTF8Terminal(acsc, screenfactory) + else: + t = Terminal(acsc, screenfactory) + process = ForkPty(args or [os.environ["SHELL"]], dict(TERM="ansi")) try: with process as masterfd: - with Terminal(acsc, screenfactory) as t: + with t: t.resizepty(masterfd) process.start() while True: |