@@ -255,6 +255,33 @@ def test_refresh_control(self):
255255 self .assertIs (win .is_wintouched (), syncok )
256256 self .assertIs (stdscr .is_wintouched (), syncok )
257257
258+ # Many tests below use a common set of non-ASCII cases, each applied only
259+ # when the window encoding can represent it -- so the whole suite is meant to
260+ # be run under several locales (e.g. ISO-8859-1, ISO-8859-15, KOI8-U):
261+ # 'A'/'a' ASCII
262+ # 'é' common to the Latin encodings
263+ # '¤'/'€'/'є' byte 0xA4 in ISO-8859-1 / ISO-8859-15 / KOI8-U
264+ # Precomposed characters are used so a round-trip does not depend on the form.
265+
266+ def _encodable (self , s ):
267+ # Wide characters are only supported in a locale that can encode them.
268+ try :
269+ s .encode (self .stdscr .encoding )
270+ except UnicodeEncodeError :
271+ return False
272+ return True
273+
274+ def _read_char (self , y , x ):
275+ # The character written to a cell, read back for output checks. inch()
276+ # is unusable here: on a wide build it returns the low 8 bits of the
277+ # character's code point rather than its locale-encoded byte, mangling
278+ # anything outside Latin-1. in_wch() reads the wide cell directly;
279+ # without it, instr() re-encodes the cell to the window encoding.
280+ stdscr = self .stdscr
281+ if hasattr (stdscr , 'in_wch' ):
282+ return str (stdscr .in_wch (y , x ))
283+ return stdscr .instr (y , x , 1 ).decode (stdscr .encoding )
284+
258285 def test_output_character (self ):
259286 stdscr = self .stdscr
260287 encoding = stdscr .encoding
@@ -264,32 +291,98 @@ def test_output_character(self):
264291 stdscr .addch ('A' )
265292 stdscr .addch (b'A' )
266293 stdscr .addch (65 )
267- c = '\u20ac '
268- try :
269- stdscr .addch (c )
270- except UnicodeEncodeError :
271- self .assertRaises (UnicodeEncodeError , c .encode , encoding )
272- except OverflowError :
273- encoded = c .encode (encoding )
274- self .assertNotEqual (len (encoded ), 1 , repr (encoded ))
294+ # See _encodable for the character set. Each is either written (mapped
295+ # to a single byte), or raises UnicodeEncodeError (not in the encoding)
296+ # or OverflowError (a multibyte sequence, e.g. in UTF-8).
297+ for c in ('A' , '\u00e9 ' , '\u00a4 ' , '\u20ac ' , '\u0454 ' ):
298+ try :
299+ stdscr .addch (c )
300+ except UnicodeEncodeError :
301+ self .assertRaises (UnicodeEncodeError , c .encode , encoding )
302+ except OverflowError :
303+ encoded = c .encode (encoding )
304+ self .assertNotEqual (len (encoded ), 1 , repr (encoded ))
275305 stdscr .addch ('A' , curses .A_BOLD )
276306 stdscr .addch (1 , 2 , 'A' )
277307 stdscr .addch (2 , 3 , 'A' , curses .A_BOLD )
278308 self .assertIs (stdscr .is_wintouched (), True )
279309
310+ # The same characters supplied as an int chtype (a byte > 127). The
311+ # cell is read back with _read_char(), not inch(): on a wide build the
312+ # int is stored through the locale as a wide character that inch()
313+ # cannot represent for a character outside Latin-1.
314+ for c in ('é' , '¤' , '€' , 'є' ):
315+ try :
316+ b = c .encode (encoding )
317+ except UnicodeEncodeError :
318+ continue
319+ if len (b ) != 1 :
320+ continue
321+ # A wide build stores a character outside Latin-1 as a wide cell,
322+ # not as its encoded byte, so it cannot round-trip here.
323+ if ord (c ) > 0xff and hasattr (stdscr , 'get_wch' ):
324+ continue
325+ v = b [0 ]
326+ with self .subTest (c = c ):
327+ stdscr .addch (0 , 0 , v )
328+ self .assertEqual (self ._read_char (0 , 0 ), c )
329+ stdscr .addch (0 , 1 , v , curses .A_BOLD )
330+ self .assertEqual (self ._read_char (0 , 1 ), c )
331+ self .assertTrue (stdscr .inch (0 , 1 ) & curses .A_BOLD )
332+ stdscr .move (2 , 0 )
333+ stdscr .echochar (v )
334+ self .assertEqual (self ._read_char (2 , 0 ), c )
335+ # insch() round-trips a byte only where its code point equals
336+ # the byte value (Latin-1): on a wide build ncurses winsch
337+ # stores a printable byte directly as a code point instead of
338+ # decoding it through the locale.
339+ if ord (c ) < 0x100 :
340+ stdscr .insch (1 , 0 , v )
341+ self .assertEqual (self ._read_char (1 , 0 ), c )
342+
343+ # The same characters supplied as a str. Unlike the int path above, a
344+ # str is stored as a wide-character cell on a wide build, so every
345+ # encodable character round-trips, insch() included. A multibyte
346+ # character does not fit a cell on a narrow build and is skipped.
347+ wide = hasattr (stdscr , 'in_wch' )
348+ for c in ('é' , '¤' , '€' , 'є' ):
349+ if not self ._encodable (c ):
350+ continue
351+ if not wide and len (c .encode (encoding )) != 1 :
352+ continue
353+ # A wide build stores a character outside Latin-1 as a wide cell,
354+ # not as its encoded byte, so it cannot round-trip here.
355+ if ord (c ) > 0xff and hasattr (stdscr , 'get_wch' ):
356+ continue
357+ with self .subTest (c = c ):
358+ stdscr .addch (0 , 0 , c )
359+ self .assertEqual (self ._read_char (0 , 0 ), c )
360+ stdscr .addch (0 , 1 , c , curses .A_BOLD )
361+ self .assertEqual (self ._read_char (0 , 1 ), c )
362+ self .assertTrue (stdscr .inch (0 , 1 ) & curses .A_BOLD )
363+ stdscr .insch (1 , 0 , c )
364+ self .assertEqual (self ._read_char (1 , 0 ), c )
365+ stdscr .move (2 , 0 )
366+ stdscr .echochar (c )
367+ self .assertEqual (self ._read_char (2 , 0 ), c )
368+
280369 # echochar()
281370 stdscr .refresh ()
282371 stdscr .move (0 , 0 )
283372 stdscr .echochar ('A' )
284373 stdscr .echochar (b'A' )
285374 stdscr .echochar (65 )
286- with self .assertRaises ((UnicodeEncodeError , OverflowError )):
287- # Unicode is not fully supported yet, but at least it does
288- # not crash.
289- # It is supposed to fail because either the character is
290- # not encodable with the current encoding, or it is encoded to
291- # a multibyte sequence.
292- stdscr .echochar ('\u0114 ' )
375+ # See _encodable for the character set; as in the addch() loop above.
376+ for c in ('A' , '\u00e9 ' , '\u00a4 ' , '\u20ac ' , '\u0454 ' ):
377+ try :
378+ stdscr .echochar (c )
379+ except UnicodeEncodeError :
380+ # The character is not encodable with the current encoding.
381+ self .assertRaises (UnicodeEncodeError , c .encode , encoding )
382+ except OverflowError :
383+ # The character is encoded to a multibyte sequence.
384+ encoded = c .encode (encoding )
385+ self .assertNotEqual (len (encoded ), 1 , repr (encoded ))
293386 stdscr .echochar ('A' , curses .A_BOLD )
294387 self .assertIs (stdscr .is_wintouched (), False )
295388
@@ -299,14 +392,18 @@ def test_output_string(self):
299392 # addstr()/insstr()
300393 for func in [stdscr .addstr , stdscr .insstr ]:
301394 with self .subTest (func .__qualname__ ):
302- stdscr .move (0 , 0 )
303395 func ('abcd' )
304396 func (b'abcd' )
305- s = 'àßçđ'
306- try :
307- func (s )
308- except UnicodeEncodeError :
309- self .assertRaises (UnicodeEncodeError , s .encode , encoding )
397+ # Common and encoding-distinctive strings (see _encodable for the
398+ # 0xA4 set); 'àßçđ' is UTF-8-only. Each is written if the
399+ # encoding allows, else raises UnicodeEncodeError.
400+ for s in ('soupçon' , 'àßçđ' , 'soupçon ¤' , 'soupçon €' , 'дякую' ):
401+ stdscr .move (0 , 0 )
402+ try :
403+ func (s )
404+ except UnicodeEncodeError :
405+ self .assertRaises (UnicodeEncodeError , s .encode , encoding )
406+ stdscr .move (0 , 0 )
310407 func ('abcd' , curses .A_BOLD )
311408 func (1 , 2 , 'abcd' )
312409 func (2 , 3 , 'abcd' , curses .A_BOLD )
@@ -317,11 +414,14 @@ def test_output_string(self):
317414 stdscr .move (0 , 0 )
318415 func ('1234' , 3 )
319416 func (b'1234' , 3 )
320- s = '\u0661 \u0662 \u0663 \u0664 '
321- try :
322- func (s , 3 )
323- except UnicodeEncodeError :
324- self .assertRaises (UnicodeEncodeError , s .encode , encoding )
417+ # As above (see _encodable); Arabic-Indic digits are UTF-8-only.
418+ for s in ('caf\u00e9 ' , '\u0661 \u0662 \u0663 \u0664 ' , 'caf\u00e9 \u00a4 ' , 'caf\u00e9 \u20ac ' , '\u0434 \u044f \u043a \u0443 \u044e ' ):
419+ stdscr .move (0 , 0 )
420+ try :
421+ func (s , 3 )
422+ except UnicodeEncodeError :
423+ self .assertRaises (UnicodeEncodeError , s .encode , encoding )
424+ stdscr .move (0 , 0 )
325425 func ('1234' , 5 )
326426 func ('1234' , 3 , curses .A_BOLD )
327427 func (1 , 2 , '1234' , 3 )
@@ -411,6 +511,24 @@ def test_read_from_window(self):
411511 self .assertEqual (stdscr .instr (0 , 2 , 4 ), b'BCD ' )
412512 self .assertRaises (ValueError , stdscr .instr , - 2 )
413513 self .assertRaises (ValueError , stdscr .instr , 0 , 2 , - 2 )
514+ # A non-ASCII character of an 8-bit locale reads back as its encoded
515+ # byte (see _encodable for the set). instr() returns the locale bytes
516+ # for any single-byte character; inch() packs the text into a chtype, so
517+ # on a wide build it only round-trips a Latin-1 codepoint (byte ==
518+ # codepoint).
519+ encoding = stdscr .encoding
520+ for ch in ('A' , 'é' , '¤' , '€' , 'є' ):
521+ try :
522+ b = ch .encode (encoding )
523+ except UnicodeEncodeError :
524+ continue
525+ if len (b ) != 1 :
526+ continue
527+ with self .subTest (ch = ch ):
528+ stdscr .addstr (2 , 0 , ch )
529+ self .assertEqual (stdscr .instr (2 , 0 , 1 ), b )
530+ if ord (ch ) < 0x100 :
531+ self .assertEqual (stdscr .inch (2 , 0 ) & curses .A_CHARTEXT , b [0 ])
414532
415533 def test_coordinate_errors (self ):
416534 # Addressing a cell outside the window raises curses.error.
@@ -447,6 +565,10 @@ def test_getch(self):
447565 self .assertEqual (win .getch (), b'm' [0 ])
448566 self .assertEqual (win .getch (), b'\n ' [0 ])
449567
568+ # A key value > 127 is delivered unchanged (it is not locale text).
569+ curses .ungetch (0xE9 )
570+ self .assertEqual (win .getch (), 0xE9 )
571+
450572 def test_getstr (self ):
451573 win = curses .newwin (5 , 12 , 5 , 2 )
452574 curses .echo ()
@@ -619,6 +741,33 @@ def test_background(self):
619741 self .assertEqual (win .inch (0 , 0 ), b'L' [0 ] | curses .A_REVERSE )
620742 self .assertEqual (win .inch (0 , 5 ), b'#' [0 ] | curses .A_REVERSE )
621743
744+ # A non-ASCII background character of an 8-bit locale reads back as its
745+ # encoded byte. See _encodable for the character set.
746+ win .bkgd (' ' )
747+ encoding = win .encoding
748+ for ch in ('é' , '¤' , '€' , 'є' ):
749+ try :
750+ b = ch .encode (encoding )
751+ except UnicodeEncodeError :
752+ continue
753+ if len (b ) != 1 :
754+ continue
755+ # A wide build stores a character outside Latin-1 as a wide cell,
756+ # not as its encoded byte, so it cannot round-trip here.
757+ if ord (ch ) > 0xff and hasattr (win , 'get_wch' ):
758+ continue
759+ with self .subTest (ch = ch ):
760+ win .bkgd (ch )
761+ self .assertEqual (win .getbkgd (), b [0 ])
762+ if ord (ch ) < 0x100 :
763+ # The same byte given as an int. A wide build stores it
764+ # through the locale, so only a Latin-1 byte round-trips.
765+ win .bkgd (' ' )
766+ win .bkgdset (b [0 ])
767+ self .assertEqual (win .getbkgd (), b [0 ])
768+ win .bkgd (b [0 ])
769+ self .assertEqual (win .getbkgd (), b [0 ])
770+
622771 def test_overlay (self ):
623772 srcwin = curses .newwin (5 , 18 , 3 , 4 )
624773 lorem_ipsum (srcwin )
@@ -711,6 +860,16 @@ def test_borders_and_lines(self):
711860 win .border (65 , 66 )
712861 win .border (65 )
713862 win .border ()
863+ # With no arguments, border() fills the edges with ACS line and corner
864+ # characters.
865+ chartext = curses .A_CHARTEXT
866+ maxy , maxx = win .getmaxyx ()
867+ self .assertEqual (win .inch (0 , 0 ) & chartext , curses .ACS_ULCORNER & chartext )
868+ self .assertEqual (win .inch (0 , maxx - 1 ) & chartext , curses .ACS_URCORNER & chartext )
869+ self .assertEqual (win .inch (maxy - 1 , 0 ) & chartext , curses .ACS_LLCORNER & chartext )
870+ self .assertEqual (win .inch (maxy - 1 , maxx - 1 ) & chartext , curses .ACS_LRCORNER & chartext )
871+ self .assertEqual (win .inch (0 , 1 ) & chartext , curses .ACS_HLINE & chartext )
872+ self .assertEqual (win .inch (1 , 0 ) & chartext , curses .ACS_VLINE & chartext )
714873
715874 win .box (':' , '~' )
716875 self .assertEqual (win .instr (0 , 1 , 8 ), b'~~~~~~~~' )
@@ -721,6 +880,11 @@ def test_borders_and_lines(self):
721880 self .assertRaises (TypeError , win .box , 65 , 66 , 67 )
722881 self .assertRaises (TypeError , win .box , 65 )
723882 win .box ()
883+ # With no arguments, box() likewise draws ACS corners and lines.
884+ self .assertEqual (win .inch (0 , 0 ) & chartext , curses .ACS_ULCORNER & chartext )
885+ self .assertEqual (win .inch (0 , maxx - 1 ) & chartext , curses .ACS_URCORNER & chartext )
886+ self .assertEqual (win .inch (0 , 1 ) & chartext , curses .ACS_HLINE & chartext )
887+ self .assertEqual (win .inch (1 , 0 ) & chartext , curses .ACS_VLINE & chartext )
724888
725889 win .move (1 , 2 )
726890 win .hline ('-' , 5 )
@@ -742,6 +906,43 @@ def test_borders_and_lines(self):
742906 self .assertEqual (win .inch (2 , 1 ), b';' [0 ] | curses .A_STANDOUT )
743907 self .assertEqual (win .inch (3 , 1 ), b'a' [0 ])
744908
909+ # A border or line character of an 8-bit locale round-trips as its
910+ # encoded byte. See _encodable for the character set.
911+ encoding = win .encoding
912+ for ch in ('é' , '¤' , '€' , 'є' ):
913+ try :
914+ b = ch .encode (encoding )
915+ except UnicodeEncodeError :
916+ continue
917+ if len (b ) != 1 :
918+ continue
919+ # A wide build stores a character outside Latin-1 as a wide cell,
920+ # not as its encoded byte, so it cannot round-trip here.
921+ if ord (ch ) > 0xff and hasattr (win , 'get_wch' ):
922+ continue
923+ with self .subTest (ch = ch ):
924+ win .erase ()
925+ win .hline (2 , 0 , ch , 5 )
926+ self .assertEqual (win .instr (2 , 0 , 5 ), b * 5 )
927+ win .vline (0 , 0 , ch , 3 )
928+ self .assertEqual (win .instr (0 , 0 , 1 ), b )
929+ self .assertEqual (win .instr (1 , 0 , 1 ), b )
930+ win .border (ch , ch , ch , ch , ch , ch , ch , ch )
931+ self .assertEqual (win .instr (0 , 0 ), b * maxx )
932+ if ord (ch ) < 0x100 :
933+ # The same byte given as an int. A wide build stores it
934+ # through the locale, so only a Latin-1 byte round-trips.
935+ v = b [0 ]
936+ win .erase ()
937+ win .hline (2 , 0 , v , 5 )
938+ self .assertEqual (win .instr (2 , 0 , 5 ), b * 5 )
939+ win .vline (0 , 0 , v , 3 )
940+ self .assertEqual (win .instr (1 , 0 , 1 ), b )
941+ win .border (v , v , v , v , v , v , v , v )
942+ self .assertEqual (win .instr (0 , 0 ), b * maxx )
943+ win .box (v , v )
944+ self .assertEqual (win .instr (0 , 1 , 1 ), b )
945+
745946 def test_unctrl (self ):
746947 # TODO: wunctrl()
747948 self .assertEqual (curses .unctrl (b'A' ), b'A' )
@@ -750,6 +951,19 @@ def test_unctrl(self):
750951 self .assertEqual (curses .unctrl (b'\n ' ), b'^J' )
751952 self .assertEqual (curses .unctrl ('\n ' ), b'^J' )
752953 self .assertEqual (curses .unctrl (10 ), b'^J' )
954+ # A printable non-ASCII byte of an 8-bit locale is returned unchanged.
955+ # See _encodable for the character set.
956+ encoding = self .stdscr .encoding
957+ for ch in ('é' , '¤' , '€' , 'є' ):
958+ try :
959+ b = ch .encode (encoding )
960+ except UnicodeEncodeError :
961+ continue
962+ if len (b ) != 1 :
963+ continue
964+ with self .subTest (ch = ch ):
965+ self .assertEqual (curses .unctrl (ch ), b )
966+ self .assertEqual (curses .unctrl (b [0 ]), b ) # the byte as an int
753967 self .assertRaises (TypeError , curses .unctrl , b'' )
754968 self .assertRaises (TypeError , curses .unctrl , b'AB' )
755969 self .assertRaises (TypeError , curses .unctrl , '' )
@@ -1449,7 +1663,8 @@ def test_issue6243(self):
14491663 def test_unget_wch (self ):
14501664 stdscr = self .stdscr
14511665 encoding = stdscr .encoding
1452- for ch in ('a' , '\xe9 ' , '\u20ac ' , '\U0010FFFF ' ):
1666+ # See _encodable for the character set, plus a non-BMP character.
1667+ for ch in ('a' , '\xe9 ' , '\xa4 ' , '\u20ac ' , '\u0454 ' , '\U0010FFFF ' ):
14531668 try :
14541669 ch .encode (encoding )
14551670 except UnicodeEncodeError :
0 commit comments