summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHelmut Grohne <helmut@subdivi.de>2017-01-16 20:39:20 +0100
committerHelmut Grohne <helmut@subdivi.de>2017-01-16 20:39:20 +0100
commitc8392bb99d86ca799d057c9fcc91fa53d880af23 (patch)
tree1bb834a4d88f3857ed1da54f14feddd6e8964fb1
parentf0109cd5051e4b32e038d0dc156e5a8e4fd08caf (diff)
downloadtcvt-c8392bb99d86ca799d057c9fcc91fa53d880af23.tar.gz
initial utf8 support
Derive a UTF8Terminal class from Terminal that handles utf8 byte sequences. It only works on Python 3.x, because feed_utf8 relies on addch handling unicode str objects. The whole utf8 support is also conditional to Python 3.x, because the locale module was not safe to use in Python 2.x. A terminal is considered utf8 if the LC_* or LANG variables indicate that. This branch still has known crashes. Writing non-ascii letter at the end of a line or letting it scroll from one column to another causes crashes. More problems may hide elsewhere.
-rwxr-xr-xtcvt.py44
1 files changed, 41 insertions, 3 deletions
diff --git a/tcvt.py b/tcvt.py
index de62116..64c2ff5 100755
--- a/tcvt.py
+++ b/tcvt.py
@@ -29,6 +29,7 @@
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of Helmut Grohne.
+import locale
import pty
import sys
import os
@@ -299,9 +300,11 @@ def compose_dicts(dct1, dct2):
pass
return result
-simple_characters = bytearray(
+simple_low_characters = bytearray(
b'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' +
- b'0123456789@:~$ .#!/_(),[]=-+*\'"|<>%&\\?;`^{}' +
+ b'0123456789@:~$ .#!/_(),[]=-+*\'"|<>%&\\?;`^{}')
+
+simple_characters = simple_low_characters + bytearray(
b'\xb4\xb6\xb7\xc3\xc4\xd6\xdc\xe4\xe9\xfc\xf6')
class Terminal:
@@ -691,6 +694,32 @@ class Terminal:
else:
raise ValueError("feed esc ] %r %r" % (prev, char))
+class UTF8Terminal(Terminal):
+ def feed_simple(self, char):
+ func = self.feed_simple_table.get(char)
+ if func:
+ func(self)
+ elif char & 0b11000000 == 0b11000000:
+ self.mode = (self.feed_utf8, bytearray((char,)))
+ elif char in simple_low_characters:
+ self.addch(char)
+ elif char == 0x1b:
+ self.mode = (self.feed_esc,)
+ else:
+ raise ValueError("feed %r" % char)
+
+ def feed_utf8(self, char, s):
+ if char & 0b11000000 != 0b10000000:
+ raise ValueError("invalid utf8 sequence")
+ s += bytearray((char,))
+ l = 8 - ((s[0] | 0b11) ^ 0b11111100).bit_length()
+ if len(s) >= l:
+ utf8char = s.decode("utf8")
+ assert len(utf8char) == 1
+ self.addch(utf8char)
+ self.feed_reset()
+ else:
+ self.mode = (self.feed_utf8, s)
symbolic_keymapping = {
ord(b"\n"): "cr",
@@ -811,10 +840,19 @@ def main():
def screenfactory(realscreen):
return Columns(realscreen, options.columns, reverse=options.reverse)
+ # 1. Do not use locale.getpreferredencoding on Python 2.x, because
+ # http://bugs.python.org/issue11022.
+ # 2. Do not use an UTF8Terminal on Python 2.x, because
+ # http://bugs.python.org/issue18118.
+ if sys.version_info.major >= 3 and locale.getpreferredencoding() == 'UTF-8':
+ t = UTF8Terminal(acsc, screenfactory)
+ else:
+ t = Terminal(acsc, screenfactory)
+
process = ForkPty(args or [os.environ["SHELL"]], dict(TERM="ansi"))
try:
with process as masterfd:
- with Terminal(acsc, screenfactory) as t:
+ with t:
t.resizepty(masterfd)
process.start()
while True: