|
1 | 1 | (DEFINE-FILE-INFO PACKAGE "INTERLISP" READTABLE "INTERLISP" BASE 10 FORMAT XCCS) |
2 | | -(FILECREATED " 3-Jul-2021 13:37:33" |
3 | | -{DSK}<Users>kaplan>Local>medley3.5>git-medley>library>UNICODE.;175 66483 |
| 2 | +(FILECREATED " 6-Aug-2021 10:30:15" |
| 3 | +{DSK}<Users>kaplan>Local>medley3.5>git-medley>library>UNICODE.;185 64537 |
4 | 4 |
|
5 | | - changes to%: (FNS READ-UNICODE-MAPPING MAKE-UNICODE-FORMATS) |
| 5 | + changes to%: (FNS \UTF16.BACKCCODEFN) |
6 | 6 |
|
7 | | - previous date%: " 3-Jul-2021 11:41:05" |
8 | | -{DSK}<Users>kaplan>Local>medley3.5>git-medley>library>UNICODE.;173) |
| 7 | + previous date%: " 5-Aug-2021 22:34:22" |
| 8 | +{DSK}<Users>kaplan>Local>medley3.5>git-medley>library>UNICODE.;184) |
9 | 9 |
|
10 | 10 |
|
11 | 11 | (PRETTYCOMPRINT UNICODECOMS) |
|
14 | 14 | [(COMS |
15 | 15 | (* ;; "External formats") |
16 | 16 |
|
17 | | - (FNS UTF8.OUTCHARFN UTF8.INCCODEFN UTF8.PEEKCCODEFN \UTF8.BACKCHARFN) |
18 | | - (FNS UTF16BE.OUTCHARFN UTF16BE.INCCODEFN UTF16BE.PEEKCCODEFN \UTF16.BACKCHARFN) |
| 17 | + (FNS UTF8.OUTCHARFN UTF8.INCCODEFN UTF8.PEEKCCODEFN \UTF8.BACKCCODEFN) |
| 18 | + (FNS UTF16BE.OUTCHARFN UTF16BE.INCCODEFN UTF16BE.PEEKCCODEFN \UTF16.BACKCCODEFN) |
19 | 19 | (INITVARS (EXTERNALEOL 'LF)) |
20 | 20 | (FNS MAKE-UNICODE-FORMATS) |
21 | 21 | (P (MAKE-UNICODE-FORMATS EXTERNALEOL)) |
|
78 | 78 | (DEFINEQ |
79 | 79 |
|
80 | 80 | (UTF8.OUTCHARFN |
81 | | - [LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 1-Feb-2021 15:50 by rmk:") |
| 81 | + [LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 5-Aug-2021 22:34 by rmk:") |
82 | 82 | (* ; "Edited 17-Aug-2020 08:45 by rmk:") |
83 | 83 | (* ; "Edited 30-Jan-2020 23:08 by rmk:") |
84 | 84 |
|
85 | 85 | (* ;; "Perhaps the translation table should already do the mapping for EOL to LF, but that seems to be a separate property of the stream") |
86 | 86 |
|
87 | | - (* ;; "PRINT UTF8 sequence for CHARCODE. Do not do XCCS to Unicode translation if RAW.") |
| 87 | + (* ;; "Print UTF8 sequence for CHARCODE. Do not do XCCS to Unicode translation if RAW.") |
88 | 88 |
|
89 | 89 | (IF (EQ CHARCODE (CHARCODE EOL)) |
90 | | - THEN (REPLACE (STREAM CHARPOSITION) OF STREAM WITH 0) |
91 | | - (\BOUT STREAM (SELECTC (FETCH (STREAM EOLCONVENTION) OF STREAM) |
92 | | - (LF.EOLC (CHARCODE LF)) |
93 | | - (CR.EOLC (CHARCODE CR)) |
94 | | - (CRLF.EOLC (\BOUT STREAM (CHARCODE CR)) |
95 | | - (CHARCODE LF)) |
96 | | - (SHOULDNT))) |
| 90 | + THEN (\BOUTEOL STREAM) |
97 | 91 | ELSE (CHANGE (FETCH (STREAM CHARPOSITION) OF STREAM) |
98 | 92 | (IPLUS DATUM 1)) (* ; "Avoid overflow") |
99 | 93 | (FOR C INSIDE (CL:IF RAW |
|
131 | 125 |
|
132 | 126 | (UTF8.PEEKCCODEFN [LAMBDA (STREAM NOERROR RAW) (* ; "Edited 14-Jun-2021 22:53 by rmk:") (* ;; "Modeled this after \EUCPEEK on LLREAD. In the multi-byte (non-ASCII) case, backs the file pointer to the beginning by the proper number of \BACKFILEPTRs, and returns a count of 0. Returns NIL if NOERROR and either invalid UTF8 or end of file.") (* ;; "Could be that the caller takes care of backing up the file position if the number of binned-bytes is returned.") (* ;; "Do not do UNICODE to XCCS translation if RAW") (PROG (BYTE1 BYTE2 BYTE3 BYTE4 CODE) (SETQ BYTE1 (\PEEKBIN STREAM NOERROR)) (* ;; "Distinguish on header bytex") (CL:UNLESS BYTE1 (RETURN NIL)) [IF (ILESSP BYTE1 128) THEN (* ;; "Test first: Ascii is the common case. No need to back up, since we peeked.") (SETQ CODE BYTE1) ELSEIF (IGEQ BYTE1 (LLSH 15 4)) THEN (* ; "4 bytes") (\BIN STREAM) (CL:UNLESS (AND (SETQ BYTE2 (\PEEKBIN STREAM NOERROR)) (IGEQ BYTE2 128)) (\BACKFILEPTR STREAM) (OR NOERROR (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2))) (RETURN CODE)) (\BIN STREAM) (CL:UNLESS (AND (SETQ BYTE3 (\PEEKBIN STREAM NOERROR)) (IGEQ BYTE3 128)) (\BACKFILEPTR STREAM) (\BACKFILEPTR STREAM) (OR NOERROR (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2 BYTE3))) (RETURN CODE)) (\BIN STREAM) (SETQ BYTE4 (\PEEKBIN STREAM NOERROR)) (* ; "PEEK the last, no need to back it up") (\BACKFILEPTR STREAM) (\BACKFILEPTR STREAM) (\BACKFILEPTR STREAM) (IF (AND BYTE4 (IGEQ BYTE4 128)) THEN (SETQ CODE (LOGOR (LLSH (LOADBYTE BYTE1 0 3) 18) (LLSH (LOADBYTE BYTE2 0 6) 12) (LLSH (LOADBYTE BYTE3 0 6) 6) (LOADBYTE BYTE4 0 6))) ELSEIF NOERROR ELSE (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2 BYTE3 BYTE4))) ELSEIF (IGEQ BYTE1 (LLSH 7 5)) THEN (* ; "3 bytes") (\BIN STREAM) (CL:UNLESS (AND (SETQ BYTE2 (\PEEKBIN STREAM NOERROR)) (IGEQ BYTE2 128)) (\BACKFILEPTR STREAM) (OR NOERROR (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2))) (RETURN CODE)) (\BIN STREAM) (SETQ BYTE3 (\PEEKBIN STREAM NOERROR)) (\BACKFILEPTR STREAM) (\BACKFILEPTR STREAM) (IF (AND BYTE3 (IGEQ BYTE3 128)) THEN (SETQ CODE (LOGOR (LLSH (LOADBYTE BYTE1 0 4) 12) (LLSH (LOADBYTE BYTE2 0 6) 6) (LOADBYTE BYTE3 0 6))) ELSEIF NOERROR ELSE (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2 BYTE3))) ELSE (* ; "Must be 2 bytes") (\BIN STREAM) (SETQ BYTE2 (\PEEKBIN STREAM NOERROR)) (\BACKFILEPTR STREAM) (IF (AND BYTE2 (IGEQ BYTE2 128)) THEN (SETQ CODE (LOGOR (LLSH (LOADBYTE BYTE1 0 5) 6) (LOADBYTE BYTE2 0 6))) ELSEIF NOERROR ELSE (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2] (CL:WHEN (AND CODE (NOT RAW)) (SETQ CODE (UNICODE.TRANSLATE CODE *UNICODETOXCCS*))) (RETURN CODE]) |
133 | 127 |
|
134 | | -(\UTF8.BACKCHARFN [LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) (* ; "Edited 15-Jun-2021 13:38 by rmk:") (* ;; "\BACKFILEPTR is NIL at beginning of FILE, just return COUNT") (BIND (C _ 0) WHILE (CL:WHEN (\BACKFILEPTR STREAM) (ADD C 1) (EQ 2 (LRSH (\PEEKBIN STREAM) 6))) REPEATUNTIL (EQ C 4) FINALLY (CL:WHEN BYTECOUNTVAR (SET BYTECOUNTVAR (IPLUS BYTECOUNTVAL C)))]) |
| 128 | +(\UTF8.BACKCCODEFN |
| 129 | + [LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) (* ; "Edited 15-Jun-2021 13:38 by rmk:") |
| 130 | + |
| 131 | + (* ;; "\BACKFILEPTR is NIL at beginning of FILE, just return COUNT") |
| 132 | + |
| 133 | + (BIND (C _ 0) WHILE (CL:WHEN (\BACKFILEPTR STREAM) |
| 134 | + (ADD C 1) |
| 135 | + (EQ 2 (LRSH (\PEEKBIN STREAM) |
| 136 | + 6))) REPEATUNTIL (EQ C 4) |
| 137 | + FINALLY (CL:WHEN BYTECOUNTVAR |
| 138 | + (SET BYTECOUNTVAR (IPLUS BYTECOUNTVAL C)))]) |
135 | 139 | ) |
136 | 140 | (DEFINEQ |
137 | 141 |
|
|
155 | 159 |
|
156 | 160 | (UTF16BE.PEEKCCODEFN [LAMBDA (STREAM NOERROR RAW) (* ; "Edited 14-Jun-2021 22:58 by rmk:") (* ;; "Could be that the caller takes care of backing up the file position if the number of binned-bytes is returned.") (* ;; "Do not do UNICODE to XCCS translation if RAW") (LET (BYTE1 BYTE2 CODE) (SETQ BYTE1 (\PEEKBIN STREAM NOERROR)) (IF BYTE1 THEN (\BIN STREAM) (SETQ BYTE2 (\PEEKBIN STREAM NOERROR)) (\BACKFILEPTR STREAM) (IF BYTE2 THEN (SETQ CODE (LOGOR (LLSH BYTE1 8) BYTE2)) (CL:IF RAW CODE (UNICODE.TRANSLATE CODE *UNICODETOXCCS*)) ELSEIF NOERROR THEN NIL) ELSEIF NOERROR THEN NIL ELSE (ERROR "INVALID UTF16 CHARACTER" (LIST BYTE1 BYTE2]) |
157 | 161 |
|
158 | | -(\UTF16.BACKCHARFN [LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) (* ; "Edited 15-Jun-2021 13:35 by rmk:") (* ;; "\BACKFILEPTR is NIL at beginning of FILE, do nothing.") (* ;; "Common for big-ending and little-ending") (IF (NOT (\BACKFILEPTR STREAM)) ELSEIF (\BACKFILEPTR STREAM) THEN (AND BYTECOUNTVAR (SET BYTECOUNTVAR (IPLUS BYTECOUNTVAL 2))) ELSE (AND BYTECOUNTVAR (SET BYTECOUNTVAR (ADD1 BYTECOUNTVAL]) |
| 162 | +(\UTF16.BACKCCODEFN |
| 163 | + [LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) (* ; "Edited 6-Aug-2021 10:15 by rmk:") |
| 164 | + |
| 165 | + (* ;; "\BACKFILEPTR is NIL at beginning of FILE, do nothing.") |
| 166 | + |
| 167 | + (* ;; "Common for big-ending and little-ending") |
| 168 | + |
| 169 | + (CL:WHEN (\BACKFILEPTR STREAM) |
| 170 | + [IF (\BACKFILEPTR STREAM) |
| 171 | + THEN (AND BYTECOUNTVAR (SET BYTECOUNTVAR (IPLUS BYTECOUNTVAL 2))) |
| 172 | + ELSE (AND BYTECOUNTVAR (SET BYTECOUNTVAR (ADD1 BYTECOUNTVAL])]) |
159 | 173 | ) |
160 | 174 |
|
161 | 175 | (RPAQ? EXTERNALEOL 'LF) |
162 | 176 | (DEFINEQ |
163 | 177 |
|
164 | 178 | (MAKE-UNICODE-FORMATS |
165 | | - [LAMBDA (EXTERNALEOL) (* ; "Edited 3-Jul-2021 13:17 by rmk:") |
| 179 | + [LAMBDA (EXTERNALEOL) (* ; "Edited 1-Aug-2021 23:18 by rmk:") |
166 | 180 |
|
167 | 181 | (* ;; "RAW formats do not do XCCS/Unicode translation, just deal with the byte encoding.") |
168 | 182 |
|
169 | 183 | (* ;; "The EXTERNALEOL specifies the EOLCONVENTION of the stream, particularly to produce output files with the desired convention. On input the macro \CHECKEOLC (LLREAD) coerces only that coding to the internal EOL, which is a mistake.") |
170 | 184 |
|
171 | | - (SETQ EXTERNALEOL (SELECTQ EXTERNALEOL |
172 | | - (LF LF.EOLC) |
173 | | - (CR CR.EOLC) |
174 | | - (CRLF CRLF.EOLC) |
175 | | - (SHOULDNT))) |
176 | | - (\INSTALL.EXTERNALFORMAT (CREATE EXTERNALFORMAT |
177 | | - NAME _ :UTF16BE |
178 | | - EOL _ EXTERNALEOL |
179 | | - INCCODEFN _ (FUNCTION UTF16BE.INCCODEFN) |
180 | | - PEEKCCODEFN _ (FUNCTION UTF16BE.PEEKCCODEFN) |
181 | | - BACKCCODEFN _ (FUNCTION \UTF16.BACKCHARFN) |
182 | | - OUTCHARFN _ (FUNCTION UTF16BE.OUTCHARFN))) |
183 | | - [\INSTALL.EXTERNALFORMAT (CREATE EXTERNALFORMAT |
184 | | - NAME _ :UTF16BE-RAW |
185 | | - EOL _ EXTERNALEOL |
186 | | - INCCODEFN _ [FUNCTION (LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) |
187 | | - (UTF16BE.INCCODEFN STREAM |
188 | | - BYTECOUNTVAR BYTECOUNTVAL T] |
189 | | - PEEKCCODEFN _ [FUNCTION (LAMBDA (STREAM NOERROR) |
190 | | - (UTF16BE.PEEKCCODEFN STREAM NOERROR |
191 | | - T] |
192 | | - BACKCCODEFN _ (FUNCTION \UTF16.BACKCHARFN) |
193 | | - OUTCHARFN _ (FUNCTION (LAMBDA (STREAM CHARCODE) |
194 | | - (UTF16BE.OUTCHARFN STREAM CHARCODE T] |
195 | | - [\INSTALL.EXTERNALFORMAT (CREATE EXTERNALFORMAT |
196 | | - NAME _ :UTF-8-RAW |
197 | | - EOL _ EXTERNALEOL |
198 | | - INCCODEFN _ [FUNCTION (LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) |
199 | | - (UTF8.INCCODEFN STREAM BYTECOUNTVAR |
200 | | - BYTECOUNTVAL T] |
201 | | - PEEKCCODEFN _ [FUNCTION (LAMBDA (STREAM NOERROR) |
202 | | - (UTF8.PEEKCCODEFN STREAM NOERROR T] |
203 | | - BACKCCODEFN _ (FUNCTION \UTF8.BACKCHARFN) |
204 | | - OUTCHARFN _ (FUNCTION (LAMBDA (STREAM CHARCODE) |
205 | | - (UTF8.OUTCHARFN STREAM CHARCODE T] |
206 | | - (\INSTALL.EXTERNALFORMAT (CREATE EXTERNALFORMAT |
207 | | - NAME _ :UTF-8 |
208 | | - EOL _ EXTERNALEOL |
209 | | - INCCODEFN _ (FUNCTION UTF8.INCCODEFN) |
210 | | - PEEKCCODEFN _ (FUNCTION UTF8.PEEKCCODEFN) |
211 | | - BACKCCODEFN _ (FUNCTION \UTF8.BACKCHARFN) |
212 | | - OUTCHARFN _ (FUNCTION UTF8.OUTCHARFN]) |
| 185 | + (MAKE-EXTERNALFORMAT :UTF-8 (FUNCTION UTF8.INCCODEFN) |
| 186 | + (FUNCTION UTF8.PEEKCCODEFN) |
| 187 | + (FUNCTION \UTF8.BACKCCODEFN) |
| 188 | + (FUNCTION UTF8.OUTCHARFN) |
| 189 | + NIL EXTERNALEOL) |
| 190 | + (MAKE-EXTERNALFORMAT :UTF-8-RAW [FUNCTION (LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) |
| 191 | + (UTF8.INCCODEFN STREAM BYTECOUNTVAR BYTECOUNTVAL |
| 192 | + T] |
| 193 | + [FUNCTION (LAMBDA (STREAM NOERROR) |
| 194 | + (UTF8.PEEKCCODEFN STREAM NOERROR T] |
| 195 | + (FUNCTION \UTF8.BACKCCODEFN) |
| 196 | + [FUNCTION (LAMBDA (STREAM CHARCODE) |
| 197 | + (UTF8.OUTCHARFN STREAM CHARCODE T] |
| 198 | + NIL EXTERNALEOL) |
| 199 | + (MAKE-EXTERNALFORMAT :UTF-16BE (FUNCTION UTF16BE.INCCODEFN) |
| 200 | + (FUNCTION UTF16BE.PEEKCCODEFN) |
| 201 | + (FUNCTION \UTF16.BACKCCODEFN) |
| 202 | + (FUNCTION UTF16BE.OUTCHARFN) |
| 203 | + NIL EXTERNALEOL) |
| 204 | + (MAKE-EXTERNALFORMAT :UTF-16BE-RAW [FUNCTION (LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) |
| 205 | + (UTF16BE.INCCODEFN STREAM BYTECOUNTVAR |
| 206 | + BYTECOUNTVAL T] |
| 207 | + [FUNCTION (LAMBDA (STREAM NOERROR) |
| 208 | + (UTF16BE.PEEKCCODEFN STREAM NOERROR T] |
| 209 | + (FUNCTION \UTF16.BACKCCODEFN) |
| 210 | + [FUNCTION (LAMBDA (STREAM CHARCODE) |
| 211 | + (UTF16BE.OUTCHARFN STREAM CHARCODE T] |
| 212 | + NIL EXTERNALEOL]) |
213 | 213 | ) |
214 | 214 |
|
215 | 215 | (MAKE-UNICODE-FORMATS EXTERNALEOL) |
|
993 | 993 | ) |
994 | 994 | ) |
995 | 995 | (DECLARE%: DONTCOPY |
996 | | - (FILEMAP (NIL (4126 17801 (UTF8.OUTCHARFN 4136 . 7332) (UTF8.INCCODEFN 7334 . 12450) (UTF8.PEEKCCODEFN |
997 | | - 12452 . 17226) (\UTF8.BACKCHARFN 17228 . 17799)) (17802 20994 (UTF16BE.OUTCHARFN 17812 . 18545) ( |
998 | | -UTF16BE.INCCODEFN 18547 . 19430) (UTF16BE.PEEKCCODEFN 19432 . 20503) (\UTF16.BACKCHARFN 20505 . 20992) |
999 | | -) (21024 24537 (MAKE-UNICODE-FORMATS 21034 . 24535)) (24633 25939 (UNICODE.UNMAPPED 24643 . 25937)) ( |
1000 | | -25940 26476 (XCCS-UTF8-AFTER-OPEN 25950 . 26474)) (27546 27895 (XTOUCODE 27556 . 27724) (UTOXCODE |
1001 | | -27726 . 27893)) (27935 44118 (READ-UNICODE-MAPPING-FILENAMES 27945 . 29107) (READ-UNICODE-MAPPING |
1002 | | -29109 . 32407) (WRITE-UNICODE-MAPPING 32409 . 36626) (WRITE-UNICODE-INCLUDED 36628 . 41350) ( |
1003 | | -WRITE-UNICODE-MAPPING-HEADER 41352 . 42584) (WRITE-UNICODE-MAPPING-FILENAME 42586 . 44116)) (47455 |
1004 | | -55928 (MAKE-UNICODE-TRANSLATION-TABLES 47465 . 55926)) (56349 64253 (HEXSTRING 56359 . 57520) ( |
1005 | | -UTF8HEXSTRING 57522 . 59727) (NUTF8CODEBYTES 59729 . 60392) (NUTF8STRINGBYTES 60394 . 60875) ( |
1006 | | -XTOUSTRING 60877 . 63888) (XCCSSTRING 63890 . 64251)) (64254 65723 (SHOWCHARS 64264 . 65721))))) |
| 996 | + (FILEMAP (NIL (4105 17365 (UTF8.OUTCHARFN 4115 . 6895) (UTF8.INCCODEFN 6897 . 12013) (UTF8.PEEKCCODEFN |
| 997 | + 12015 . 16789) (\UTF8.BACKCCODEFN 16791 . 17363)) (17366 20563 (UTF16BE.OUTCHARFN 17376 . 18109) ( |
| 998 | +UTF16BE.INCCODEFN 18111 . 18994) (UTF16BE.PEEKCCODEFN 18996 . 20067) (\UTF16.BACKCCODEFN 20069 . 20561 |
| 999 | +)) (20593 22591 (MAKE-UNICODE-FORMATS 20603 . 22589)) (22687 23993 (UNICODE.UNMAPPED 22697 . 23991)) ( |
| 1000 | +23994 24530 (XCCS-UTF8-AFTER-OPEN 24004 . 24528)) (25600 25949 (XTOUCODE 25610 . 25778) (UTOXCODE |
| 1001 | +25780 . 25947)) (25989 42172 (READ-UNICODE-MAPPING-FILENAMES 25999 . 27161) (READ-UNICODE-MAPPING |
| 1002 | +27163 . 30461) (WRITE-UNICODE-MAPPING 30463 . 34680) (WRITE-UNICODE-INCLUDED 34682 . 39404) ( |
| 1003 | +WRITE-UNICODE-MAPPING-HEADER 39406 . 40638) (WRITE-UNICODE-MAPPING-FILENAME 40640 . 42170)) (45509 |
| 1004 | +53982 (MAKE-UNICODE-TRANSLATION-TABLES 45519 . 53980)) (54403 62307 (HEXSTRING 54413 . 55574) ( |
| 1005 | +UTF8HEXSTRING 55576 . 57781) (NUTF8CODEBYTES 57783 . 58446) (NUTF8STRINGBYTES 58448 . 58929) ( |
| 1006 | +XTOUSTRING 58931 . 61942) (XCCSSTRING 61944 . 62305)) (62308 63777 (SHOWCHARS 62318 . 63775))))) |
1007 | 1007 | STOP |
|
0 commit comments