Skip to content

Commit 3634ce2

Browse files
committed
[WIP] unicode support
1 parent 4d788a9 commit 3634ce2

File tree

2 files changed

+65
-35
lines changed

2 files changed

+65
-35
lines changed

pyrepl/reader.py

+26-14
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,24 @@
2020
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
2121

2222
from __future__ import unicode_literals
23+
import sys
2324
import unicodedata
2425
from pyrepl import commands
2526
from pyrepl import input
2627
try:
2728
unicode
29+
decode = lambda x, enc = sys.stdout.encoding: unicode(x, enc)
2830
except NameError:
2931
unicode = str
3032
unichr = chr
3133
basestring = bytes, str
34+
decode = lambda x, _ = None: x
35+
36+
37+
def width(c):
38+
return 2 if unicodedata.east_asian_width(c) in "FW" else 1
39+
def wlen(s):
40+
return sum(map(width, s))
3241

3342

3443
def _make_unctrl_map():
@@ -39,8 +48,8 @@ def _make_unctrl_map():
3948
for i in range(32):
4049
c = unichr(i)
4150
uc_map[c] = '^' + unichr(ord('A') + i - 1)
42-
uc_map[b'\t'] = ' ' # display TABs as 4 characters
43-
uc_map[b'\177'] = unicode('^?')
51+
uc_map['\t'] = ' ' # display TABs as 4 characters
52+
uc_map['\177'] = unicode('^?')
4453
for i in range(256):
4554
c = unichr(i)
4655
if c not in uc_map:
@@ -53,7 +62,7 @@ def _my_unctrl(c, u=_make_unctrl_map()):
5362
return u[c]
5463
else:
5564
if unicodedata.category(c).startswith('C'):
56-
return br'\u%04x' % ord(c)
65+
return '\\u%04x' % ord(c)
5766
else:
5867
return c
5968

@@ -75,7 +84,7 @@ def disp_str(buffer, join=''.join, uc=_my_unctrl):
7584
s = [uc(x) for x in buffer]
7685
b = [] # XXX: bytearray
7786
for x in s:
78-
b.append(1)
87+
b.append(width(x[0]))
7988
b.extend([0] * (len(x) - 1))
8089
return join(s), b
8190

@@ -280,7 +289,7 @@ def calc_screen(self):
280289
for mline in self.msg.split("\n"):
281290
screen.append(mline)
282291
screeninfo.append((0, []))
283-
self.lxy = p, ln
292+
# self.lxy = p, ln
284293
prompt = self.get_prompt(ln, ll >= p >= 0)
285294
while '\n' in prompt:
286295
pre_prompt, _, prompt = prompt.partition('\n')
@@ -289,8 +298,8 @@ def calc_screen(self):
289298
p -= ll + 1
290299
prompt, lp = self.process_prompt(prompt)
291300
l, l2 = disp_str(line)
292-
wrapcount = (len(l) + lp) // w
293-
if wrapcount == 0:
301+
wrapcount = (wlen(l) + lp) // w
302+
if 1 or wrapcount == 0: # FIXME
294303
screen.append(prompt + l)
295304
screeninfo.append((lp, l2 + [1]))
296305
else:
@@ -318,7 +327,7 @@ def process_prompt(self, prompt):
318327
is returned with these control characters removed. """
319328

320329
out_prompt = ''
321-
l = len(prompt)
330+
l = wlen(prompt)
322331
pos = 0
323332
while True:
324333
s = prompt.find('\x01', pos)
@@ -420,7 +429,7 @@ def get_prompt(self, lineno, cursor_on_line):
420429
# the object on which str() was called. This ensures that even if the
421430
# same object is used e.g. for ps1 and ps2, str() is called only once.
422431
if res not in self._pscache:
423-
self._pscache[res] = str(res)
432+
self._pscache[res] = decode(res)
424433
return self._pscache[res]
425434

426435
def push_input_trans(self, itrans):
@@ -438,23 +447,26 @@ def pos2xy(self, pos):
438447
if pos == len(self.buffer):
439448
y = len(self.screeninfo) - 1
440449
p, l2 = self.screeninfo[y]
441-
return p + len(l2) - 1, y
450+
return p + sum(l2) + l2.count(0) - 1, y
442451
else:
443452
for p, l2 in self.screeninfo:
444-
l = l2.count(1)
453+
l = len(l2) - l2.count(0)
445454
if l > pos:
446455
break
447456
else:
448457
pos -= l
449458
y += 1
450459
c = 0
451460
i = 0
452-
while c < pos:
453-
c += l2[i]
461+
j = 0
462+
while j < pos:
463+
j += 1 if l2[i] else 0
464+
c += l2[i] or 1
454465
i += 1
455466
while l2[i] == 0:
467+
c += 1
456468
i += 1
457-
return p + i, y
469+
return p + c, y
458470

459471
def insert(self, text):
460472
"""Insert 'text' at the insertion point."""

pyrepl/unix_console.py

+39-21
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import re
2929
import time
3030
import sys
31+
import unicodedata
3132
from fcntl import ioctl
3233
from . import curses
3334
from .fancy_termios import tcgetattr, tcsetattr
@@ -44,6 +45,13 @@ class InvalidTerminal(RuntimeError):
4445
except NameError:
4546
unicode = str
4647

48+
49+
def width(c):
50+
return 2 if unicodedata.east_asian_width(c) in "FW" else 1
51+
def wlen(s):
52+
return sum(map(width, s))
53+
54+
4755
_error = (termios.error, curses.error, InvalidTerminal)
4856

4957
# there are arguments for changing this to "refresh"
@@ -247,46 +255,56 @@ def __write_changed_line(self, y, oldline, newline, px):
247255
# structuring this function are equally painful (I'm trying to
248256
# avoid writing code generators these days...)
249257
x = 0
250-
minlen = min(len(oldline), len(newline))
258+
i = 0
259+
minlen = min(wlen(oldline), wlen(newline))
260+
pi = 0
261+
xx = 0
262+
for c in oldline:
263+
xx += width(c)
264+
pi += 1
265+
if xx >= px: break
251266
#
252267
# reuse the oldline as much as possible, but stop as soon as we
253268
# encounter an ESCAPE, because it might be the start of an escape
254269
# sequene
255-
#XXX unicode check!
256-
while x < minlen and oldline[x] == newline[x] and newline[x] != '\x1b':
257-
x += 1
258-
if oldline[x:] == newline[x+1:] and self.ich1:
270+
while x < minlen and oldline[i] == newline[i] and newline[i] != '\x1b':
271+
x += width(newline[i])
272+
i += 1
273+
if oldline[i:] == newline[i+1:] and self.ich1:
259274
if (y == self.__posxy[1] and x > self.__posxy[0] and
260-
oldline[px:x] == newline[px+1:x+1]):
275+
oldline[pi:i] == newline[pi+1:i+1]):
276+
i = pi
261277
x = px
262278
self.__move(x, y)
263-
self.__write_code(self.ich1)
264-
self.__write(newline[x])
265-
self.__posxy = x + 1, y
266-
elif x < minlen and oldline[x + 1:] == newline[x + 1:]:
279+
cw = width(newline[i])
280+
self.__write_code(cw*self.ich1)
281+
self.__write(newline[i])
282+
self.__posxy = x + cw, y
283+
elif (x < minlen and oldline[i + 1:] == newline[i + 1:]
284+
and width(oldline[i]) == width(newline[i])):
267285
self.__move(x, y)
268-
self.__write(newline[x])
269-
self.__posxy = x + 1, y
270-
elif (self.dch1 and self.ich1 and len(newline) == self.width
271-
and x < len(newline) - 2
272-
and newline[x+1:-1] == oldline[x:-2]):
286+
self.__write(newline[i])
287+
self.__posxy = x + width(newline[i]), y
288+
elif (self.dch1 and self.ich1 and wlen(newline) == self.width
289+
and x < wlen(newline) - 2
290+
and newline[i+1:-1] == oldline[i:-2]):
291+
raise NotImplementedError() # FIXME
273292
self.__hide_cursor()
274293
self.__move(self.width - 2, y)
275294
self.__posxy = self.width - 2, y
276295
self.__write_code(self.dch1)
277296
self.__move(x, y)
278297
self.__write_code(self.ich1)
279-
self.__write(newline[x])
280-
self.__posxy = x + 1, y
298+
self.__write(newline[i])
299+
self.__posxy = x + width(newline[i]), y
281300
else:
282301
self.__hide_cursor()
283302
self.__move(x, y)
284-
if len(oldline) > len(newline):
303+
if wlen(oldline) > wlen(newline):
285304
self.__write_code(self._el)
286-
self.__write(newline[x:])
287-
self.__posxy = len(newline), y
305+
self.__write(newline[i:])
306+
self.__posxy = wlen(newline), y
288307

289-
#XXX: check for unicode mess
290308
if '\x1b' in newline:
291309
# ANSI escape characters are present, so we can't assume
292310
# anything about the position of the cursor. Moving the cursor

0 commit comments

Comments
 (0)