Skip to content

Commit 9ab24c0

Browse files
committed
Merge branch 'Miscellaneous-format-stuff'
2 parents 565f599 + 3de2ebb commit 9ab24c0

File tree

6 files changed

+1028
-1035
lines changed

6 files changed

+1028
-1035
lines changed

library/UNICODE

Lines changed: 72 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
(DEFINE-FILE-INFO PACKAGE "INTERLISP" READTABLE "INTERLISP" BASE 10 FORMAT XCCS)
2-
(FILECREATED " 3-Jul-2021 13:37:33" 
3-
{DSK}<Users>kaplan>Local>medley3.5>git-medley>library>UNICODE.;175 66483
2+
(FILECREATED " 6-Aug-2021 10:30:15" 
3+
{DSK}<Users>kaplan>Local>medley3.5>git-medley>library>UNICODE.;185 64537
44

5-
changes to%: (FNS READ-UNICODE-MAPPING MAKE-UNICODE-FORMATS)
5+
changes to%: (FNS \UTF16.BACKCCODEFN)
66

7-
previous date%: " 3-Jul-2021 11:41:05"
8-
{DSK}<Users>kaplan>Local>medley3.5>git-medley>library>UNICODE.;173)
7+
previous date%: " 5-Aug-2021 22:34:22"
8+
{DSK}<Users>kaplan>Local>medley3.5>git-medley>library>UNICODE.;184)
99

1010

1111
(PRETTYCOMPRINT UNICODECOMS)
@@ -14,8 +14,8 @@
1414
[(COMS
1515
(* ;; "External formats")
1616

17-
(FNS UTF8.OUTCHARFN UTF8.INCCODEFN UTF8.PEEKCCODEFN \UTF8.BACKCHARFN)
18-
(FNS UTF16BE.OUTCHARFN UTF16BE.INCCODEFN UTF16BE.PEEKCCODEFN \UTF16.BACKCHARFN)
17+
(FNS UTF8.OUTCHARFN UTF8.INCCODEFN UTF8.PEEKCCODEFN \UTF8.BACKCCODEFN)
18+
(FNS UTF16BE.OUTCHARFN UTF16BE.INCCODEFN UTF16BE.PEEKCCODEFN \UTF16.BACKCCODEFN)
1919
(INITVARS (EXTERNALEOL 'LF))
2020
(FNS MAKE-UNICODE-FORMATS)
2121
(P (MAKE-UNICODE-FORMATS EXTERNALEOL))
@@ -78,22 +78,16 @@
7878
(DEFINEQ
7979

8080
(UTF8.OUTCHARFN
81-
[LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 1-Feb-2021 15:50 by rmk:")
81+
[LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 5-Aug-2021 22:34 by rmk:")
8282
(* ; "Edited 17-Aug-2020 08:45 by rmk:")
8383
(* ; "Edited 30-Jan-2020 23:08 by rmk:")
8484

8585
(* ;; "Perhaps the translation table should already do the mapping for EOL to LF, but that seems to be a separate property of the stream")
8686

87-
(* ;; "PRINT UTF8 sequence for CHARCODE. Do not do XCCS to Unicode translation if RAW.")
87+
(* ;; "Print UTF8 sequence for CHARCODE. Do not do XCCS to Unicode translation if RAW.")
8888

8989
(IF (EQ CHARCODE (CHARCODE EOL))
90-
THEN (REPLACE (STREAM CHARPOSITION) OF STREAM WITH 0)
91-
(\BOUT STREAM (SELECTC (FETCH (STREAM EOLCONVENTION) OF STREAM)
92-
(LF.EOLC (CHARCODE LF))
93-
(CR.EOLC (CHARCODE CR))
94-
(CRLF.EOLC (\BOUT STREAM (CHARCODE CR))
95-
(CHARCODE LF))
96-
(SHOULDNT)))
90+
THEN (\BOUTEOL STREAM)
9791
ELSE (CHANGE (FETCH (STREAM CHARPOSITION) OF STREAM)
9892
(IPLUS DATUM 1)) (* ; "Avoid overflow")
9993
(FOR C INSIDE (CL:IF RAW
@@ -131,7 +125,17 @@
131125

132126
(UTF8.PEEKCCODEFN [LAMBDA (STREAM NOERROR RAW) (* ; "Edited 14-Jun-2021 22:53 by rmk:") (* ;; "Modeled this after \EUCPEEK on LLREAD. In the multi-byte (non-ASCII) case, backs the file pointer to the beginning by the proper number of \BACKFILEPTRs, and returns a count of 0. Returns NIL if NOERROR and either invalid UTF8 or end of file.") (* ;; "Could be that the caller takes care of backing up the file position if the number of binned-bytes is returned.") (* ;; "Do not do UNICODE to XCCS translation if RAW") (PROG (BYTE1 BYTE2 BYTE3 BYTE4 CODE) (SETQ BYTE1 (\PEEKBIN STREAM NOERROR)) (* ;; "Distinguish on header bytex") (CL:UNLESS BYTE1 (RETURN NIL)) [IF (ILESSP BYTE1 128) THEN (* ;;  "Test first: Ascii is the common case. No need to back up, since we peeked.") (SETQ CODE BYTE1) ELSEIF (IGEQ BYTE1 (LLSH 15 4)) THEN (* ; "4 bytes") (\BIN STREAM) (CL:UNLESS (AND (SETQ BYTE2 (\PEEKBIN STREAM NOERROR)) (IGEQ BYTE2 128)) (\BACKFILEPTR STREAM) (OR NOERROR (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2))) (RETURN CODE)) (\BIN STREAM) (CL:UNLESS (AND (SETQ BYTE3 (\PEEKBIN STREAM NOERROR)) (IGEQ BYTE3 128)) (\BACKFILEPTR STREAM) (\BACKFILEPTR STREAM) (OR NOERROR (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2 BYTE3))) (RETURN CODE)) (\BIN STREAM) (SETQ BYTE4 (\PEEKBIN STREAM NOERROR)) (* ;  "PEEK the last, no need to back it up") (\BACKFILEPTR STREAM) (\BACKFILEPTR STREAM) (\BACKFILEPTR STREAM) (IF (AND BYTE4 (IGEQ BYTE4 128)) THEN (SETQ CODE (LOGOR (LLSH (LOADBYTE BYTE1 0 3) 18) (LLSH (LOADBYTE BYTE2 0 6) 12) (LLSH (LOADBYTE BYTE3 0 6) 6) (LOADBYTE BYTE4 0 6))) ELSEIF NOERROR ELSE (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2 BYTE3 BYTE4))) ELSEIF (IGEQ BYTE1 (LLSH 7 5)) THEN (* ; "3 bytes") (\BIN STREAM) (CL:UNLESS (AND (SETQ BYTE2 (\PEEKBIN STREAM NOERROR)) (IGEQ BYTE2 128)) (\BACKFILEPTR STREAM) (OR NOERROR (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2))) (RETURN CODE)) (\BIN STREAM) (SETQ BYTE3 (\PEEKBIN STREAM NOERROR)) (\BACKFILEPTR STREAM) (\BACKFILEPTR STREAM) (IF (AND BYTE3 (IGEQ BYTE3 128)) THEN (SETQ CODE (LOGOR (LLSH (LOADBYTE BYTE1 0 4) 12) (LLSH (LOADBYTE BYTE2 0 6) 6) (LOADBYTE BYTE3 0 6))) ELSEIF NOERROR ELSE (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2 BYTE3))) ELSE (* ; "Must be 2 bytes") (\BIN STREAM) (SETQ BYTE2 (\PEEKBIN STREAM NOERROR)) (\BACKFILEPTR STREAM) (IF (AND BYTE2 (IGEQ BYTE2 128)) THEN (SETQ CODE (LOGOR (LLSH (LOADBYTE BYTE1 0 5) 6) (LOADBYTE BYTE2 0 6))) ELSEIF NOERROR ELSE (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2] (CL:WHEN (AND CODE (NOT RAW)) (SETQ CODE (UNICODE.TRANSLATE CODE *UNICODETOXCCS*))) (RETURN CODE])
133127

134-
(\UTF8.BACKCHARFN [LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) (* ; "Edited 15-Jun-2021 13:38 by rmk:") (* ;; "\BACKFILEPTR is NIL at beginning of FILE, just return COUNT") (BIND (C _ 0) WHILE (CL:WHEN (\BACKFILEPTR STREAM) (ADD C 1) (EQ 2 (LRSH (\PEEKBIN STREAM) 6))) REPEATUNTIL (EQ C 4) FINALLY (CL:WHEN BYTECOUNTVAR (SET BYTECOUNTVAR (IPLUS BYTECOUNTVAL C)))])
128+
(\UTF8.BACKCCODEFN
129+
[LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) (* ; "Edited 15-Jun-2021 13:38 by rmk:")
130+
131+
(* ;; "\BACKFILEPTR is NIL at beginning of FILE, just return COUNT")
132+
133+
(BIND (C _ 0) WHILE (CL:WHEN (\BACKFILEPTR STREAM)
134+
(ADD C 1)
135+
(EQ 2 (LRSH (\PEEKBIN STREAM)
136+
6))) REPEATUNTIL (EQ C 4)
137+
FINALLY (CL:WHEN BYTECOUNTVAR
138+
(SET BYTECOUNTVAR (IPLUS BYTECOUNTVAL C)))])
135139
)
136140
(DEFINEQ
137141

@@ -155,61 +159,57 @@
155159

156160
(UTF16BE.PEEKCCODEFN [LAMBDA (STREAM NOERROR RAW) (* ; "Edited 14-Jun-2021 22:58 by rmk:") (* ;; "Could be that the caller takes care of backing up the file position if the number of binned-bytes is returned.") (* ;; "Do not do UNICODE to XCCS translation if RAW") (LET (BYTE1 BYTE2 CODE) (SETQ BYTE1 (\PEEKBIN STREAM NOERROR)) (IF BYTE1 THEN (\BIN STREAM) (SETQ BYTE2 (\PEEKBIN STREAM NOERROR)) (\BACKFILEPTR STREAM) (IF BYTE2 THEN (SETQ CODE (LOGOR (LLSH BYTE1 8) BYTE2)) (CL:IF RAW CODE (UNICODE.TRANSLATE CODE *UNICODETOXCCS*)) ELSEIF NOERROR THEN NIL) ELSEIF NOERROR THEN NIL ELSE (ERROR "INVALID UTF16 CHARACTER" (LIST BYTE1 BYTE2])
157161

158-
(\UTF16.BACKCHARFN [LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) (* ; "Edited 15-Jun-2021 13:35 by rmk:") (* ;; "\BACKFILEPTR is NIL at beginning of FILE, do nothing.") (* ;; "Common for big-ending and little-ending") (IF (NOT (\BACKFILEPTR STREAM)) ELSEIF (\BACKFILEPTR STREAM) THEN (AND BYTECOUNTVAR (SET BYTECOUNTVAR (IPLUS BYTECOUNTVAL 2))) ELSE (AND BYTECOUNTVAR (SET BYTECOUNTVAR (ADD1 BYTECOUNTVAL])
162+
(\UTF16.BACKCCODEFN
163+
[LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL) (* ; "Edited 6-Aug-2021 10:15 by rmk:")
164+
165+
(* ;; "\BACKFILEPTR is NIL at beginning of FILE, do nothing.")
166+
167+
(* ;; "Common for big-ending and little-ending")
168+
169+
(CL:WHEN (\BACKFILEPTR STREAM)
170+
[IF (\BACKFILEPTR STREAM)
171+
THEN (AND BYTECOUNTVAR (SET BYTECOUNTVAR (IPLUS BYTECOUNTVAL 2)))
172+
ELSE (AND BYTECOUNTVAR (SET BYTECOUNTVAR (ADD1 BYTECOUNTVAL])])
159173
)
160174

161175
(RPAQ? EXTERNALEOL 'LF)
162176
(DEFINEQ
163177

164178
(MAKE-UNICODE-FORMATS
165-
[LAMBDA (EXTERNALEOL) (* ; "Edited 3-Jul-2021 13:17 by rmk:")
179+
[LAMBDA (EXTERNALEOL) (* ; "Edited 1-Aug-2021 23:18 by rmk:")
166180

167181
(* ;; "RAW formats do not do XCCS/Unicode translation, just deal with the byte encoding.")
168182

169183
(* ;; "The EXTERNALEOL specifies the EOLCONVENTION of the stream, particularly to produce output files with the desired convention. On input the macro \CHECKEOLC (LLREAD) coerces only that coding to the internal EOL, which is a mistake.")
170184

171-
(SETQ EXTERNALEOL (SELECTQ EXTERNALEOL
172-
(LF LF.EOLC)
173-
(CR CR.EOLC)
174-
(CRLF CRLF.EOLC)
175-
(SHOULDNT)))
176-
(\INSTALL.EXTERNALFORMAT (CREATE EXTERNALFORMAT
177-
NAME _ :UTF16BE
178-
EOL _ EXTERNALEOL
179-
INCCODEFN _ (FUNCTION UTF16BE.INCCODEFN)
180-
PEEKCCODEFN _ (FUNCTION UTF16BE.PEEKCCODEFN)
181-
BACKCCODEFN _ (FUNCTION \UTF16.BACKCHARFN)
182-
OUTCHARFN _ (FUNCTION UTF16BE.OUTCHARFN)))
183-
[\INSTALL.EXTERNALFORMAT (CREATE EXTERNALFORMAT
184-
NAME _ :UTF16BE-RAW
185-
EOL _ EXTERNALEOL
186-
INCCODEFN _ [FUNCTION (LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL)
187-
(UTF16BE.INCCODEFN STREAM
188-
BYTECOUNTVAR BYTECOUNTVAL T]
189-
PEEKCCODEFN _ [FUNCTION (LAMBDA (STREAM NOERROR)
190-
(UTF16BE.PEEKCCODEFN STREAM NOERROR
191-
T]
192-
BACKCCODEFN _ (FUNCTION \UTF16.BACKCHARFN)
193-
OUTCHARFN _ (FUNCTION (LAMBDA (STREAM CHARCODE)
194-
(UTF16BE.OUTCHARFN STREAM CHARCODE T]
195-
[\INSTALL.EXTERNALFORMAT (CREATE EXTERNALFORMAT
196-
NAME _ :UTF-8-RAW
197-
EOL _ EXTERNALEOL
198-
INCCODEFN _ [FUNCTION (LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL)
199-
(UTF8.INCCODEFN STREAM BYTECOUNTVAR
200-
BYTECOUNTVAL T]
201-
PEEKCCODEFN _ [FUNCTION (LAMBDA (STREAM NOERROR)
202-
(UTF8.PEEKCCODEFN STREAM NOERROR T]
203-
BACKCCODEFN _ (FUNCTION \UTF8.BACKCHARFN)
204-
OUTCHARFN _ (FUNCTION (LAMBDA (STREAM CHARCODE)
205-
(UTF8.OUTCHARFN STREAM CHARCODE T]
206-
(\INSTALL.EXTERNALFORMAT (CREATE EXTERNALFORMAT
207-
NAME _ :UTF-8
208-
EOL _ EXTERNALEOL
209-
INCCODEFN _ (FUNCTION UTF8.INCCODEFN)
210-
PEEKCCODEFN _ (FUNCTION UTF8.PEEKCCODEFN)
211-
BACKCCODEFN _ (FUNCTION \UTF8.BACKCHARFN)
212-
OUTCHARFN _ (FUNCTION UTF8.OUTCHARFN])
185+
(MAKE-EXTERNALFORMAT :UTF-8 (FUNCTION UTF8.INCCODEFN)
186+
(FUNCTION UTF8.PEEKCCODEFN)
187+
(FUNCTION \UTF8.BACKCCODEFN)
188+
(FUNCTION UTF8.OUTCHARFN)
189+
NIL EXTERNALEOL)
190+
(MAKE-EXTERNALFORMAT :UTF-8-RAW [FUNCTION (LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL)
191+
(UTF8.INCCODEFN STREAM BYTECOUNTVAR BYTECOUNTVAL
192+
T]
193+
[FUNCTION (LAMBDA (STREAM NOERROR)
194+
(UTF8.PEEKCCODEFN STREAM NOERROR T]
195+
(FUNCTION \UTF8.BACKCCODEFN)
196+
[FUNCTION (LAMBDA (STREAM CHARCODE)
197+
(UTF8.OUTCHARFN STREAM CHARCODE T]
198+
NIL EXTERNALEOL)
199+
(MAKE-EXTERNALFORMAT :UTF-16BE (FUNCTION UTF16BE.INCCODEFN)
200+
(FUNCTION UTF16BE.PEEKCCODEFN)
201+
(FUNCTION \UTF16.BACKCCODEFN)
202+
(FUNCTION UTF16BE.OUTCHARFN)
203+
NIL EXTERNALEOL)
204+
(MAKE-EXTERNALFORMAT :UTF-16BE-RAW [FUNCTION (LAMBDA (STREAM BYTECOUNTVAR BYTECOUNTVAL)
205+
(UTF16BE.INCCODEFN STREAM BYTECOUNTVAR
206+
BYTECOUNTVAL T]
207+
[FUNCTION (LAMBDA (STREAM NOERROR)
208+
(UTF16BE.PEEKCCODEFN STREAM NOERROR T]
209+
(FUNCTION \UTF16.BACKCCODEFN)
210+
[FUNCTION (LAMBDA (STREAM CHARCODE)
211+
(UTF16BE.OUTCHARFN STREAM CHARCODE T]
212+
NIL EXTERNALEOL])
213213
)
214214

215215
(MAKE-UNICODE-FORMATS EXTERNALEOL)
@@ -993,15 +993,15 @@
993993
)
994994
)
995995
(DECLARE%: DONTCOPY
996-
(FILEMAP (NIL (4126 17801 (UTF8.OUTCHARFN 4136 . 7332) (UTF8.INCCODEFN 7334 . 12450) (UTF8.PEEKCCODEFN
997-
12452 . 17226) (\UTF8.BACKCHARFN 17228 . 17799)) (17802 20994 (UTF16BE.OUTCHARFN 17812 . 18545) (
998-
UTF16BE.INCCODEFN 18547 . 19430) (UTF16BE.PEEKCCODEFN 19432 . 20503) (\UTF16.BACKCHARFN 20505 . 20992)
999-
) (21024 24537 (MAKE-UNICODE-FORMATS 21034 . 24535)) (24633 25939 (UNICODE.UNMAPPED 24643 . 25937)) (
1000-
25940 26476 (XCCS-UTF8-AFTER-OPEN 25950 . 26474)) (27546 27895 (XTOUCODE 27556 . 27724) (UTOXCODE
1001-
27726 . 27893)) (27935 44118 (READ-UNICODE-MAPPING-FILENAMES 27945 . 29107) (READ-UNICODE-MAPPING
1002-
29109 . 32407) (WRITE-UNICODE-MAPPING 32409 . 36626) (WRITE-UNICODE-INCLUDED 36628 . 41350) (
1003-
WRITE-UNICODE-MAPPING-HEADER 41352 . 42584) (WRITE-UNICODE-MAPPING-FILENAME 42586 . 44116)) (47455
1004-
55928 (MAKE-UNICODE-TRANSLATION-TABLES 47465 . 55926)) (56349 64253 (HEXSTRING 56359 . 57520) (
1005-
UTF8HEXSTRING 57522 . 59727) (NUTF8CODEBYTES 59729 . 60392) (NUTF8STRINGBYTES 60394 . 60875) (
1006-
XTOUSTRING 60877 . 63888) (XCCSSTRING 63890 . 64251)) (64254 65723 (SHOWCHARS 64264 . 65721)))))
996+
(FILEMAP (NIL (4105 17365 (UTF8.OUTCHARFN 4115 . 6895) (UTF8.INCCODEFN 6897 . 12013) (UTF8.PEEKCCODEFN
997+
12015 . 16789) (\UTF8.BACKCCODEFN 16791 . 17363)) (17366 20563 (UTF16BE.OUTCHARFN 17376 . 18109) (
998+
UTF16BE.INCCODEFN 18111 . 18994) (UTF16BE.PEEKCCODEFN 18996 . 20067) (\UTF16.BACKCCODEFN 20069 . 20561
999+
)) (20593 22591 (MAKE-UNICODE-FORMATS 20603 . 22589)) (22687 23993 (UNICODE.UNMAPPED 22697 . 23991)) (
1000+
23994 24530 (XCCS-UTF8-AFTER-OPEN 24004 . 24528)) (25600 25949 (XTOUCODE 25610 . 25778) (UTOXCODE
1001+
25780 . 25947)) (25989 42172 (READ-UNICODE-MAPPING-FILENAMES 25999 . 27161) (READ-UNICODE-MAPPING
1002+
27163 . 30461) (WRITE-UNICODE-MAPPING 30463 . 34680) (WRITE-UNICODE-INCLUDED 34682 . 39404) (
1003+
WRITE-UNICODE-MAPPING-HEADER 39406 . 40638) (WRITE-UNICODE-MAPPING-FILENAME 40640 . 42170)) (45509
1004+
53982 (MAKE-UNICODE-TRANSLATION-TABLES 45519 . 53980)) (54403 62307 (HEXSTRING 54413 . 55574) (
1005+
UTF8HEXSTRING 55576 . 57781) (NUTF8CODEBYTES 57783 . 58446) (NUTF8STRINGBYTES 58448 . 58929) (
1006+
XTOUSTRING 58931 . 61942) (XCCSSTRING 61944 . 62305)) (62308 63777 (SHOWCHARS 62318 . 63775)))))
10071007
STOP

library/UNICODE.LCOM

-445 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)