-
Notifications
You must be signed in to change notification settings - Fork 0
/
source.red
414 lines (378 loc) · 13.4 KB
/
source.red
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
Red [
title: "Rich-content editor basis and source format codec"
author: @hiiamboris
license: BSD-3
description: {
This provides an internal efficient format for holding rich-content /data:
[item1 attr1 item2 attr2 ...]
itemN can be a char! value or space! object
attrN is an integer index in global attribute catalog
All rich-content edits work with this /data.
For convenience there's also a high-level 'source' format, which is used by VID/S to populate /data.
Export from /data into source format is also available (as rich/source/serialize).
Source format design principles:
- Markdown & co are very complex formats for humans, that a machine can understand but with a lot of work.
- GML and it's successors (HTML, XML) are source formats for machines that
neither a human cannot read anymore, nor machine parse efficiently.
- Source format used here is a Red block-level data representation for machines,
that should be simple enough for a human to read.
Source syntax summary:
- `name` word opens an attribute with a `true` value
- `name: value` opens an attribute with a given value (sameness is not guaranteed to persist!)
if it's already open, just attribute value is replaced
value cannot be false or none
- `/name` closes a previously opened attribute
- `"string"` represents text fragment to be assigned a currently opened attribute set
- `object!` represents a space! object, but attributes for it have no effect for now (could be extended later)
Supported attributes are:
- [bold italic underline strike] as flag attributes
- [color backdrop size font] as value-bearing attributes:
color & backdrop accept tuples
size (font size) accepts integer
font (font face) accepts string
}
notes: {
Not sure whether to use /same or /case comparison for attributes.
- /same good for blocks like code
- /case good for strings
Since I'm using copy/deep sometimes, and decided against /command attribute, /case makes most sense.
}
]
;@@ should there be a single items/attributes array for the whole document composed from many paragraphs sources?
;; this context proceeds from lowest level (ranges) to highest level (source) below
rich: context [ ;@@ what would be a better name?
~: self
;; catalog holds every attribute combo ever encountered
;; format: [hash [attr value ...] ...] - hash is used to look up by attribute combo (binary is hashed by hash! type)
;; but normally attributes are indexed by integer (half offset, zero-based), since it's more readable on mold
;; attrs are in a block because there usually aren't many anyway, no reason to use a map
;; attr names must always all be lowercase or hashing would have to be slowed down with auto-lowecasing
;@@ test lowercase in high-level funcs
catalog: make hash! 1024
hash-attrs: function [attrs [block!]] [
attrs: sort/skip append clear [] attrs 2 ;-- sort to guarantee uniqueness of the combo
checksum (native-mold/all/flat/only attrs) 'sha1 ;-- native mold is much faster than save into redbin
]
store-attrs: function [attrs [block!]] [
#assert [any [empty? attrs all extract next attrs 2]] ;-- only truthy values are allowed for attrs
hash: hash-attrs attrs
unless pos: find catalog hash [
pos: tail catalog
repend catalog [hash copy/deep attrs]
]
half skip? pos
]
index->attrs: function [index [integer!]] [
copy/deep pick catalog index + 1 * 2 ;-- copy ensures attrs are never modified in place
]
attrs->index: function [attrs [block!]] [
hash: hash-attrs attrs
if pos: find catalog hash [half skip? pos]
]
store-attrs [] ;-- empty attribute set is always present and has zero index
#assert [ ;-- other attributes are added for testing purposes
1 = store-attrs [bold #(true)]
2 = store-attrs [bold #(true) underline #(true)]
3 = store-attrs [size 8]
4 = store-attrs [size 12]
4 = store-attrs [size 12]
3 = attrs->index [size 8]
[bold #(true)] = index->attrs 1
]
ranges: context [
to-rtd-pair: function [
"Convert source range into RTD range"
range [pair!]
][
range/1 + 1 thru span? range
]
from-rtd-pair: function [
"Convert RTD range into source range"
range [pair!]
][
0 thru range/2 + range/1 - 1
]
#assert [2x5 = to-rtd-pair 1x6]
#assert [1x6 = from-rtd-pair 2x5]
]
extract-ranges: function [data [block!] (even? length? data)] [
ranges: clear []
range-code: 0
offset: 0
flush: [
if range-code > 0 [
range: range-start thru offset
attrs: index->attrs range-code
repend ranges [range attrs]
]
]
foreach [item code] data [ ;@@ use for-each
if code <> range-code [
do flush
range-start: offset
range-code: code
]
offset: offset + 1
]
do flush
copy ranges
]
#assert [ [0x3 [bold #(true)]] = extract-ranges [_ 1 _ 1 _ 1] ]
;; external context allows me to use /copy word without shadowing the global one
attributes: context [
to-rtd-flag: make-rtd-flags: change: mark: clear: pick: exclude: compatible?: none
]
attributes/to-rtd-flag: function [attr [word!] value [tuple! logic! string! integer!]] [
switch attr [
bold italic underline strike [attr]
color size font [value]
backdrop [compose [backdrop (value)]]
]
]
#assert [[backdrop 10.20.30] = attributes/to-rtd-flag 'backdrop 10.20.30]
value-types!: make typeset! [tuple! logic! string! integer!]
rtd-attrs: make hash! [bold italic underline strike color backdrop size font]
attributes/make-rtd-flags: function [
"Make an RTD flags block out of data attributes"
data [block!] limits [pair!] "segment to extract" ;-- segment used in to-spaces to create individual paragraphs
][
limits: clip limits 0 half length? data
data: append/part clear [] (skip data limits/1 * 2) (2 * span? limits)
result: clear []
ranges: extract-ranges data
foreach [range attrs] ranges [
flags: clear []
pair: ~/ranges/to-rtd-pair range
foreach [attr value] attrs [ ;@@ use map-each
unless find value-types! type? :value [
ERROR "rich-content attribute value cannot be (type? :value) = (mold/flat/part :value 60)"
]
attr: attributes/to-rtd-flag to word! attr value
if attr [append append flags pair attr] ;-- only collects attributes supported by RTD
]
append result flags
]
copy result
]
#assert [
[1x3 bold] = attributes/make-rtd-flags [_ 0 _ 0 _ 1 _ 1 _ 1 _ 0 _ 0] 2x5
[1x1 bold] = attributes/make-rtd-flags [_ 0 _ 0 _ 1 _ 1 _ 1 _ 0 _ 0] 4x5
[3x3 bold] = attributes/make-rtd-flags [_ 0 _ 0 _ 1 _ 1 _ 1 _ 0 _ 0] 0x5
[3x3 bold] = attributes/make-rtd-flags [_ 0 _ 0 _ 1 _ 1 _ 1 _ 0 _ 0] 0x10
]
attributes/change: function [
attrs [block!] "modified"
attr [word!]
value
][
pos: find/skip attrs attr 2
either :value [
change/only change any [pos tail attrs] attr :value
][
remove/part pos 2
]
attrs
]
attributes/mark: function [ ;@@ maybe rename to set! ?
data [block!] "modified" (parse data [end | 1 3 [skip integer!] to end])
range [word! ('all = range) pair!]
attr [word!]
value
][
if range = 'all [range: 0 thru 2e9]
range: clip range 0 half length? data ;-- avoid runaway repeat if range is infinite
repeat i span? range [ ;@@ use for-each!
code: pick data i2: range/1 + i * 2
either last-code = code [ ;-- streaming optimization
code: new-code
][
attrs: copy index->attrs last-code: code
attributes/change attrs attr :value
new-code: code: store-attrs attrs
]
data/:i2: code
]
data
]
#assert [
[] = attributes/mark [] 4x12 'x 1
[_ 0 _ 1 _ 1 _ 1 _ 0] = attributes/mark [_ 0 _ 0 _ 1 _ 0 _ 0] 1x4 'bold on
[_ 0 _ 0 _ 0 _ 0 _ 0] = attributes/mark [_ 0 _ 1 _ 0 _ 1 _ 0] 1x4 'bold off
[_ 0 _ 0 _ 0 _ 0 _ 0] = attributes/mark [_ 0 _ 1 _ 1 _ 1 _ 0] 1x4 'bold off
]
;; unlike /mark, clears all attributes in the range
attributes/clear: function [
data [block!] "modified" (parse data [end | 1 3 [skip integer!] to end])
range [word! ('all = range) pair!]
][
if range = 'all [range: 0 thru 2e9]
range: clip range 0 half length? data ;-- avoid runaway repeat if range is infinite
repeat i span? range [poke data (range/1 + i * 2) 0] ;@@ use map-each/self!
data
]
attributes/pick: function [attrs [integer! block!] attr [word!]] [
if integer? attrs [attrs: index->attrs attrs]
select/skip attrs attr 2
]
#assert [
on = attributes/pick 1 'bold
none? attributes/pick 0 'bold
none? attributes/pick 3 'bold
8 = attributes/pick 3 'size
]
attributes/exclude: function [set1 [block!] set2 [block!]] [
result: copy set1
remove-each [name value] result [
:value == select/case/skip set2 name 2
]
result
]
#assert [
[a 1] = attributes/exclude [a 1 b 2] [b 2]
[a 1] = attributes/exclude [a 1 b 2] [b 2 a 3]
[a 3] = attributes/exclude [b 2 a 3] [a 1 b 2]
]
;; used to split text on font size change, to ease alignment
attributes/compatible?: function [index1 [integer!] index2 [integer!]] [
to logic! any [
index1 =? index2
all [
attrs1: index->attrs index1
attrs2: index->attrs index2
size1: select/skip attrs1 'size 2
size2: select/skip attrs2 'size 2
size1 =? size2
font1: select/skip attrs1 'font 2
font2: select/skip attrs2 'font 2
font1 = font2
]
]
]
source: context [deserialize: serialize: format: to-spaces: none]
source/format: function [
"Convert decoded source into plain text"
data [block!] "[item attr ...] block" (even? length? data)
/local format: {} ;-- used when item has no /format in the kit
][
result: make {} half length? data
foreach [item attr] data [ ;@@ use map-each
case [
char? :item [append result item]
space? :item [append result batch item [format]]
]
]
#debug clipboard [#print " rich/source/format: (mold/part result 120)"]
result
]
#assert ["abc" = source/format [#"a" 1 #"b" 0 #"c" 1]]
;@@ leverage prototypes for this
source/to-spaces: function [
"Transform decoded source into a list of spaces (for use in rich-content)"
data [block!] "[item attr ...] block" (even? length? data)
; return: [block!] "[content ranges]"
/local char
][
content: clear []
ranges: clear [] ;-- range spans of items that caret can dive into
;@@ or trim linefeed? or silently split into multiple paragraphs (hard)?
#assert [not find data #"^/" "line breaks are not allowed inside paragraph text"]
buf: clear {}
parse data [any [
[ s: [set char char! set attr1 integer! (append buf char)]
any [
set char char! set attr2 integer!
if (attributes/compatible? attr1 attr2) (append buf char)
] e: (
append content obj: make-space 'text []
append obj/text buf
clear buf
range: half as-pair skip? s skip? e
obj/flags: attributes/make-rtd-flags data range
)
| set obj [object! integer!] ( ;@@ apply attribute to the object?
append content obj
range: 0x1 + half skip? s
)
] (repend ranges [obj range])
| end
| (ERROR "Unsupported data in the source: (mold/part s 40)")
]]
reduce [copy content make hash! ranges]
]
source/deserialize: function [
"Split source into decoded block of [item attr ...]"
source [block!]
/local attr value item
][
result: clear [] ;@@ should items be just chars and objects? other types support, e.g. image?
attrs: clear []
parse source [any [
set attr [
word! (value: on)
| set-word! p: (value: do/next p 'p) :p ;-- reduce words (color names) to their values
]
(attributes/change attrs to word! attr :value)
| set attr refinement! ;-- attributes work stack-like and do not close automatically
(attributes/change attrs to word! attr none)
| set item string! ( ;@@ make it a module
code: store-attrs attrs
zip/into explode item code result
)
| set item skip (
code: store-attrs attrs
repend result [item code]
)
]]
copy result
]
#assert [
[#"1" 0 #"2" 0 #"3" 1 #"4" 1 #"5" 1 #"6" 1 #"7" 0] = source/deserialize ["12" bold "3456" /bold "7"]
]
source/serialize: function [
"Create a source block out of decoded data"
data [block!] "[item attr ...] block" (even? length? data)
][
result: clear []
string: clear {}
last-attrs: clear []
last-code: 0
flush-string: [
unless empty? string [
append result copy string
clear string
]
]
foreach [item code] data [
if last-code <> code [
do flush-string
attrs: index->attrs last-code: code
opened: attributes/exclude attrs last-attrs 2 ;-- native 'exclude' is useless since it ignores value slot
closed: attributes/exclude last-attrs attrs 2
last-attrs: attrs
foreach [name value] closed [ ;@@ use map-each
append result to refinement! name
]
foreach [name value] opened [ ;@@ use map-each
repend result either true = :value [[name]] [[to set-word! name :value]]
]
]
either char? :item [
append string item
][
do flush-string
append/only result :item
]
]
do flush-string
;@@ no reason to auto-close opened attributes?
copy result
]
#assert [
["x"] = source/serialize [#"x" 0]
["12" bold "3456" /bold "7"] = source/serialize [#"1" 0 #"2" 0 #"3" 1 #"4" 1 #"5" 1 #"6" 1 #"7" 0]
; ["12" bold underline "3" /bold /underline] = source/serialize [#"1" 0 #"2" 0 #"3" 2]
["12" bold underline "3"] = source/serialize [#"1" 0 #"2" 0 #"3" 2]
["12" bold underline "3" /bold /underline "4"] = source/serialize [#"1" 0 #"2" 0 #"3" 2 #"4" 0]
["1" size: 8 "2" /size size: 12 "3" /size "4"] = source/serialize [#"1" 0 #"2" 3 #"3" 4 #"4" 0]
]
]