From 4d5472cd2f12fc6dc081eb6bbcf396f192aa2225 Mon Sep 17 00:00:00 2001 From: gilch Date: Sun, 24 Dec 2023 14:12:53 -0700 Subject: [PATCH 1/3] Add raw symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Common Lisp–style escapes. --- src/hissp/reader.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/hissp/reader.py b/src/hissp/reader.py index cd1254b90..ff6f85904 100644 --- a/src/hissp/reader.py +++ b/src/hissp/reader.py @@ -85,8 +85,16 @@ )* # Zero or more times. " # Close quote. ) + |(?P + [|] # open + (?:[^|\\] # Any non-magic character. + |\\(?:.|\n) # Backslash only if paired, including with newline. + )* + [|] # close + ) |(?P [#]?" # String not closed. + |[|] # Injection not closed. |;.* # Comment may need another line. ) |(?P(?:[^\\ \n"();]|\\.)+) # Let Python deal with it. @@ -259,6 +267,7 @@ def _parse(self) -> Iterator: elif k == "close": return self._close() elif k == "macro": yield from self._macro(v) elif k == "string": yield self._string(v) + elif k == "injection":yield self._injection(v) elif k == "continue": raise self._continue() elif k == "atom": yield self.atom(v) else: raise self._error(k) @@ -498,6 +507,9 @@ def _string(v): val = v[1:-1] # Only remove quotes. return v if (v := pformat(val)).startswith("(") else f"({v})" + def _injection(self, v): + return re.sub(r"(?s)\\(.)", R'\1', v[1:-1]) + def _continue(self): return SoftSyntaxError("Incomplete string token.", self.position()) From b8872e940f79166b6b0031ac766a94e42c12e975 Mon Sep 17 00:00:00 2001 From: gilch Date: Sun, 24 Dec 2023 15:19:13 -0700 Subject: [PATCH 2/3] Raw symbols with doubled (Smalltalk-style) escapes and no newlines. --- src/hissp/reader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hissp/reader.py b/src/hissp/reader.py index ff6f85904..321fe894c 100644 --- a/src/hissp/reader.py +++ b/src/hissp/reader.py @@ -87,8 +87,8 @@ ) |(?P [|] # open - (?:[^|\\] # Any non-magic character. - |\\(?:.|\n) # Backslash only if paired, including with newline. + (?:[^|\n] # No newlines or unpaired |. + |[|][|] # | only if paired. )* [|] # close ) @@ -508,7 +508,7 @@ def _string(v): return v if (v := pformat(val)).startswith("(") else f"({v})" def _injection(self, v): - return re.sub(r"(?s)\\(.)", R'\1', v[1:-1]) + return v[1:-1].replace("||", "|") def _continue(self): return SoftSyntaxError("Incomplete string token.", self.position()) From 6c7374ef4385a30375be7292652fc05d91b5fdad Mon Sep 17 00:00:00 2001 From: gilch Date: Sun, 24 Dec 2023 15:27:09 -0700 Subject: [PATCH 3/3] Eliminate raw/hash string distinction || tokens are now called "fragments". Primer considers strings, symbols/control words to be special cases of fragments with shorthand. --- docs/lissp_lexer.py | 2 + docs/lissp_whirlwind_tour.rst | 24 +--- docs/primer.rst | 235 +++++++++++++++++----------------- docs/style_guide.rst | 2 +- src/hissp/macros.lissp | 35 ++--- src/hissp/reader.py | 21 ++- tests/test_cmd.py | 15 ++- tests/test_macros.lissp | 4 +- 8 files changed, 164 insertions(+), 174 deletions(-) diff --git a/docs/lissp_lexer.py b/docs/lissp_lexer.py index 4010bb55b..b5272de6d 100644 --- a/docs/lissp_lexer.py +++ b/docs/lissp_lexer.py @@ -68,7 +68,9 @@ def preprocess_atom(lexer, match, ctx=None): pt.Punctuation, # close pt.Operator, # macro pt.String, + pt.String.Symbol, pt.Error, # continue + pt.Error, # unclosed using(AtomSubLexer), pt.Error, ), diff --git a/docs/lissp_whirlwind_tour.rst b/docs/lissp_whirlwind_tour.rst index 3f985bd56..80de8dea8 100644 --- a/docs/lissp_whirlwind_tour.rst +++ b/docs/lissp_whirlwind_tour.rst @@ -357,22 +357,18 @@ Lissp Whirlwind Tour ;;;; 6.3 String Literals - #> "raw string" - >>> ('raw string') - 'raw string' + #> "a string" + >>> ('a string') + 'a string' #> 'not-string' ;symbol >>> 'notQz_stringQzAPOS_' 'notQz_stringQzAPOS_' - #> #"Say \"Cheese!\" \u263a" ;Hash strings process Python escapes. + #> "Say \"Cheese!\" \u263a" ;Python escape sequences. >>> ('Say "Cheese!" ☺') 'Say "Cheese!" ☺' - #> "Say \"Cheese!\" \u263a" ;Raw strings don't. - >>> ('Say \\"Cheese!\\" \\u263a') - 'Say \\"Cheese!\\" \\u263a' - #> "string #..with @@ -382,12 +378,6 @@ Lissp Whirlwind Tour 'string\nwith\nnewlines\n' - #> "one\" - #..string\\" ;Tokenizer expects paired \'s, even raw. - >>> ('one\\"\nstring\\\\') - 'one\\"\nstring\\\\' - - ;;;; 7 Advanced Calls #> (dict :) ;Left paren before function! Notice the :. @@ -443,7 +433,7 @@ Lissp Whirlwind Tour #.. :* "xyz" ;:* is a repeatable positional target. #.. :** (dict : sep "-") ;Target :** to unpack mapping. #.. flush True ;Kwargs still allowed after :**. - #.. :** (dict : end #"!?\n")) ;Multiple :** allowed too. + #.. :** (dict : end "!?\n")) ;Multiple :** allowed too. >>> print( ... (1), ... *('abc'), @@ -1629,14 +1619,14 @@ Lissp Whirlwind Tour ;; Finds spam.lissp & eggs.lissp in the current package & compile to spam.py & eggs.py #> (.write_text (pathlib..Path "eggs.lissp") - #.. #"(print \"Hello World!\")") + #.. "(print \"Hello World!\")") >>> __import__('pathlib').Path( ... ('eggs.lissp')).write_text( ... ('(print "Hello World!")')) 22 #> (.write_text (pathlib..Path "spam.lissp") - #.. #"(print \"Hello from spam!\") + #.. "(print \"Hello from spam!\") #..(.update (globals) : x 42)") >>> __import__('pathlib').Path( ... ('spam.lissp')).write_text( diff --git a/docs/primer.rst b/docs/primer.rst index 1e81f7997..4f0fc3f54 100644 --- a/docs/primer.rst +++ b/docs/primer.rst @@ -1,12 +1,12 @@ .. Copyright 2019, 2020, 2021, 2022, 2023 Matthew Egan Odendahl SPDX-License-Identifier: CC-BY-SA-4.0 -.. Hidden doctest adds bundled macros for REPL-consistent behavior. - #> (.update (globals) : _macro_ (types..SimpleNamespace : :** (vars hissp.._macro_))) - >>> globals().update( - ... _macro_=__import__('types').SimpleNamespace( - ... **vars( - ... __import__('hissp')._macro_))) +.. Hidden doctest adds bundled macros for REPL-consistent behavior. + #> (.update (globals) : _macro_ (types..SimpleNamespace : :** (vars hissp.._macro_))) + >>> globals().update( + ... _macro_=__import__('types').SimpleNamespace( + ... **vars( + ... __import__('hissp')._macro_))) Hissp Primer ############ @@ -296,6 +296,36 @@ Hissp has special behaviors for Python's `tuple` and `str` types. Everything else is just data, and Hissp does its best to compile it that way. +In Lissp, the Hissp `tuple` and `str` elements +are written with ``()`` and ``||``, respectively. +The `str`\ s represents text fragments, +so the ``||`` tokens in Lissp are called "fragments". + +Lissp has full generality with just these two elements, +although some things would be awkward. +Here's our first Hissp program again written that way: + +.. code-block:: REPL + + #> (|lambda| (|name|) + #.. (|print| (|quote| |Hello|) |name|)) + >>> (lambda name: + ... print( + ... 'Hello', + ... name)) + at 0x...> + + #> (|_| (|quote| |World|)) + >>> _( + ... 'World') + Hello World + +Notice that the fragments are interpreted in different ways depending on the context. +``|lambda|`` is a special instruction to the Hissp compiler. +``|print|`` is a fragment of Python code, an identifier in this case, +but basically any Python expression works. +``|Hello|`` is a string. + In addition to the special behaviors from the Hissp level for tuple and string lexical elements, the Lissp level has special behavior for *reader macros*. @@ -306,7 +336,7 @@ which is passed through to the Hissp level with minimal processing. Basic Atoms +++++++++++ -Most literals work just like Python: +Most data literals work just like Python: .. code-block:: REPL @@ -349,130 +379,93 @@ and do not appear in the output. >>> -Raw Strings -+++++++++++ - -Hash strings and raw strings represent text data, -but are lexically distinct from the other atoms, -and have somewhat different behavior. +Strings ++++++++ -*Raw strings* in Lissp are double-quoted and read backslashes and newlines literally, -which makes them similar to triple-quoted r-strings in Python. -In other words, escape sequences are not processed. +You've already seen how to make strings from fragments: you quote them. .. code-block:: REPL - #> "Two - #..lines\ntotal" - >>> ('Two\nlines\\ntotal') - 'Two\nlines\\ntotal' + #> (|quote| |Hello|) + >>> 'Hello' + 'Hello' - #> (print _) - >>> print( - ... _) - Two - lines\ntotal - -Do note, however, that the `tokenizer ` still expects backslashes to be paired with another character. +We've already seen that the reader has a shorthand for quotation. .. code-block:: REPL - #> "\" - #..\\" ; One string, not two! - >>> ('\\"\n\\\\') - '\\"\n\\\\' + #> '|Hello| + >>> 'Hello' + 'Hello' - #> (print _) - >>> print( - ... _) - \" - \\ +If that particular fragment weren't quoted in this context, +it would be interpreted as a Python identifier instead. -The second double-quote character didn't end the raw string, -but the backslash "escaping" it was still read literally. -The third double quote did end the string despite being adjacent to a backslash, -because that was already paired with another backslash. -Again, this is the `same as Python's r-strings `. +Fragment text is raw; you can't use Python's escape sequences for special characters. +(Although you can escape a ``|`` by doubling it.) -Recall that the Hissp-level `str` type is used to represent Python identifiers in the compiled output, -and must be quoted with the ``quote`` special form to represent text data instead. +.. code-block:: REPL ->>> readerless( -... ('print' # str containing identifier -... ,('quote','hi'),) # string as data -... ) -"print(\n 'hi')" ->>> eval(_) -hi + #> '|Say "Cheese!"\n\u263a| + >>> 'Say "Cheese!"\\n\\u263a' + 'Say "Cheese!"\\n\\u263a' -Hissp-level strings can represent almost any Python code to include in the compiled output, -not just identifiers. -So another way to represent text data in Hissp -is a Hissp-level string that contains the Python code for a string literal. +The solution, of course, is to put a Python string literal in the fragment, +and then not quote it. +This is another way to make strings from fragments. ->>> readerless( -... ('print' # str containing identifier -... ,'"hi"',) # str containing a string literal -... ) -'print(\n "hi")' ->>> eval(_) -hi +.. code-block:: REPL -Quoting our entire example shows us how that Lissp would get translated to Hissp. -(When quoted, it's just data.) + #> |"Say \"Cheese!\"\n\u263a"| ; There is a reason we used double quotes. + >>> "Say \"Cheese!\"\n\u263a" + 'Say "Cheese!"\n☺' -.. code-block:: REPL + #> (|print| |_|) + >>> print( + ... _) + Say "Cheese!" + ☺ - #> (quote - #.. (lambda (name) - #.. (print "Hello" name))) - >>> ('lambda', - ... ('name',), - ... ('print', - ... "('Hello')", - ... 'name',),) - ('lambda', ('name',), ('print', "('Hello')", 'name')) +And, in fact, the reader has a shorthand for this already. +If you've got a fragment surrounded by double quotes (``"``), you can drop the ``||``. +(This doesn't work on single quotes, since those are reserved for the reader's quotation shorthand.) -This tuple is data, but it's also valid Hissp code. -You could pass it to `readerless()` to get working Python code: +.. code-block:: REPL ->>> readerless(('lambda', ('name',), ('print', "('Hello')", 'name'))) -"(lambda name:\n print(\n ('Hello'),\n name))" ->>> print(_) -(lambda name: - print( - ('Hello'), - name)) + #> "Say \"Cheese!\" + #..\u263a" ; Notice it includes parentheses. + >>> ('Say "Cheese!"\n☺') + 'Say "Cheese!"\n☺' -Notice the raw string reader syntax -``"Hello"`` produced a string in the Hissp output containing -``('Hello')``, a Python string literal, -which saved us a ``quote`` form. +Also notice that you're allowed a literal newline +(although the ``\n`` escape sequence also works), +like in Python's triple-quoted strings. +This is a convenience not currently allowed in the ``||``-delimited tokens. -Hash Strings -++++++++++++ +These are not direct representations like the other atoms! +They're reader shorthand for a fragment *containing* a string literal. +If you expect them to represent themselves in the Hissp when you quote them, +you will be confused. +``'"foo"`` is a shorthand for ``|('foo')|``. Try it. -You can enable the processing of Python's backslash escape sequences -by prefixing the raw string syntax with a hash ``#``. -These are called *hash strings*. +This also applies to double-quoted (``""``) tokens quoted indirectly through a tuple. +See the difference? .. code-block:: REPL - #> #"Three - #..lines\ntotal" - >>> ('Three\nlines\ntotal') - 'Three\nlines\ntotal' - - #> (print _) - >>> print( - ... _) - Three - lines - total + #> '("foo" |bar|) + >>> ("('foo')", + ... 'bar',) + ("('foo')", 'bar') Symbols +++++++ +Symbols are meant for variable names and the like. +They're another reader shorthand. +If you have a fragment containing a valid Python identifier, +you can drop the ``||``. + In our basic example: .. code-block:: Lissp @@ -483,8 +476,7 @@ In our basic example: ``lambda``, ``name``, ``print``, ``Hello``, and ``name`` are *symbols*. -Symbols are meant for variable names and the like. -Quoting our example again to see how Lissp would get read as Hissp, +Quoting our example to see how Lissp would get read as Hissp, .. code-block:: REPL @@ -500,22 +492,21 @@ Quoting our example again to see how Lissp would get read as Hissp, ('lambda', ('name',), ('print', ('quote', 'Hello'), 'name')) we see that there are *no symbol objects* at the Hissp level. -The Lissp symbols are read in as strings. +The Lissp symbols are read in as strings, just like fragments. In other Lisps, symbols are a data type in their own right, but symbols only exist as a *reader syntax* in Lissp, where they represent the subset of Hissp-level strings that can act as identifiers. +Python has no built in symbol type +and instead uses strings pervasively whenever it has to represent identifiers. Symbols in Lissp become strings in Hissp which become identifiers in Python, -unless they're quoted, like ``('quote','Hello',)``, -in which case they become string literals in Python. - -Experiment with this process in the REPL. +unless they're quoted, in which case they become string literals in Python. Attributes ---------- -Symbols can have internal ``.``'s to access attributes. +Symbols can have internal ``.``\ s to access attributes. .. code-block:: REPL @@ -561,7 +552,7 @@ but used in another. Munging ------- -Symbols have another important difference from raw strings: +Symbols have another important difference from other fragments. .. code-block:: REPL @@ -649,14 +640,20 @@ Notice that only the first digit had to be munged to make it a valid Python iden >>> 'QzDIGITxONE_o8' 'QzDIGITxONE_o8' +By the way, since module handles count as symbols, +special characters in them also get munged. +They will then attempt to import modules with funny names, +which only works if you have modules with said names to import. Just saying. + Control Words ------------- -Atoms that begin with a colon are called *control words* [#key]_. +Symbols that begin with a colon are called *control words* [#key]_. +(They don't need the ``||``\ s either, but they're allowed.) These are mainly used to give internal structure to macro invocations—you want a word distinguishable from a string at compile time, but it's not meant to be a Python identifier. -Thus, they do not get munged: +Thus, they do not get munged like normal symbols would: .. code-block:: REPL @@ -674,7 +671,7 @@ but you can: >>> ':foo->bar?' ':foo->bar?' -Note that you can do nearly the same thing with a raw string: +Note that you can do nearly the same thing with a ``""`` token: .. code-block:: REPL @@ -880,7 +877,7 @@ the rest are pairs, implied by position. .. code-block:: REPL - #> (print : :? 1 :? 2 :? 3 sep ":" end #"\n.") + #> (print : :? 1 :? 2 :? 3 sep ":" end "\n.") >>> print( ... (1), ... (2), @@ -908,7 +905,7 @@ For example: .. code-block:: REPL - #> (print 1 2 3 : sep ":" end #"\n.") + #> (print 1 2 3 : sep ":" end "\n.") >>> print( ... (1), ... (2), @@ -961,7 +958,7 @@ Use the control words ``:*`` for iterable unpacking, .. code-block:: REPL - #> (print : :* '(1 2) :? 3 :* '(4) :** (dict : sep : end #"\n.")) + #> (print : :* '(1 2) :? 3 :* '(4) :** (dict : sep : end "\n.")) >>> print( ... *((1), ... (2),), @@ -1100,7 +1097,7 @@ Python injection: .. code-block:: REPL - #> .##"{(1, 2): \"\"\"buckle my shoe\"\"\"} # This is Python!" + #> .#"{(1, 2): \"\"\"buckle my shoe\"\"\"} # This is Python!" >>> {(1, 2): """buckle my shoe"""} # This is Python! {(1, 2): 'buckle my shoe'} @@ -1291,7 +1288,7 @@ If you tried to run .. code-block:: Python - readerless((, 1, 2, 3, ':', 'sep', ':')) + readerless((, 1, 2, 3, ':', 'sep', ':')) then you'd get a syntax error. Try it, if you'd like. diff --git a/docs/style_guide.rst b/docs/style_guide.rst index af2daa9df..afd2f5146 100644 --- a/docs/style_guide.rst +++ b/docs/style_guide.rst @@ -656,7 +656,7 @@ this can be done at read time instead: .. code-block:: REPL - #> (print (.upper '.#(textwrap..dedent #"\ + #> (print (.upper '.#(textwrap..dedent "\ #.. These lines #.. Don't interrupt #.. the flow."))) diff --git a/src/hissp/macros.lissp b/src/hissp/macros.lissp index 0b6221f26..f7f2c5722 100644 --- a/src/hissp/macros.lissp +++ b/src/hissp/macros.lissp @@ -324,7 +324,7 @@ Hidden doctest adds bundled macros for REPL-consistent behavior. 1::2::3 See also: - `<\<# `, `attach`, + `<\\<# `, `attach`, `lambda `. " (let ($fn `$#fn) @@ -865,16 +865,16 @@ Hidden doctest adds bundled macros for REPL-consistent behavior. ;; >>> __import__('hissp')._macro_.bQzHASH_ ;; ;; - ;; #> (H:#b\# "b# macro at compile time") + ;; #> (H:#b\# |b# macro at compile time|) ;; >>> # hissp.._macro_.bQzHASH_ ;; ... b'b# macro at compile time' ;; b'b# macro at compile time' ;; - ;; #> hissp.._macro_.b#"Fully-qualified b# macro at read time." + ;; #> hissp.._macro_.b#|Fully-qualified b# macro at read time.| ;; >>> b'Fully-qualified b# macro at read time.' ;; b'Fully-qualified b# macro at read time.' ;; - ;; #> H:##b"Read-time b# via alias." + ;; #> H:##b|Read-time b# via alias.| ;; >>> b'Read-time b# via alias.' ;; b'Read-time b# via alias.' ;; @@ -1259,7 +1259,7 @@ Hidden doctest adds bundled macros for REPL-consistent behavior. ([{'a'}, 'bc'], 'de') See also: - `-\<>>`, `X#`, `get#`. + `-\\<>>`, `X#`, `get#`. " (functools..reduce XY#.#"(Y[0],X,*Y[1:],)" (map X#.#"X if type(X) is tuple else (X,)" forms) @@ -1315,7 +1315,7 @@ Hidden doctest adds bundled macros for REPL-consistent behavior. (define _TAO (lambda s (-> (.join " " (re..findall "(?m)^# (.*)~$" (s hissp.))) - (.replace ":" #"\n")))) + (.replace ":" "\n")))) ;;;; Control Flow @@ -1778,15 +1778,18 @@ Hidden doctest adds bundled macros for REPL-consistent behavior. ;; ;; .. code-block:: REPL ;; - ;; #> b#"bytes + ;; #> b#|\xff'\n'||foo| + ;; >>> b"\xff'\n'|foo" + ;; b"\xff'\n'|foo" + ;; + ;; #> b#.#"bytes ;; #..with\nnewlines" ;; >>> b'bytes\nwith\nnewlines' ;; b'bytes\nwith\nnewlines' ;; (-> raw - ast..literal_eval - (.replace "'" "\'") - (.replace #"\n" "\n") + (.replace "'" '|\'|) + (.replace "\n" '|\n|) (-<>> (.format "b'{}'")) ast..literal_eval)) @@ -2485,7 +2488,7 @@ Hidden doctest adds bundled macros for REPL-consistent behavior. ;; ... ;; Exception ;; - `(exec ',(.format #"\ + `(exec ',(.format "\ from functools import partial,reduce from itertools import *;from operator import * def engarde(xs,h,f,/,*a,**kw): @@ -2505,9 +2508,9 @@ _macro_=__import__('types').SimpleNamespace() try:exec('from {}._macro_ import *',vars(_macro_)) except ModuleNotFoundError:pass" __name__) - ,ns))(.##"\144efma\143ro" import(: :* args) - `(.##"p\162int"(.##"\143ode\143s..en\143ode" - (_TAO .##"in\163pe\143\164..ge\164\163our\143e")','.##"ro\16413"))) + ,ns))(.#"\144efma\143ro" import(: :* args) + `(.#"p\162int"(.#"\143ode\143s..en\143ode" + (_TAO .#"in\163pe\143\164..ge\164\163our\143e")','.#"ro\16413"))) ;;;; Advanced @@ -2631,7 +2634,7 @@ except ModuleNotFoundError:pass" ``:`` NOP (no depth) Has no effect. A separator when no other magic applies. - They can be escaped with a backtick (:literal:`\``). + They can be escaped with a backtick (:literal:`\\``). Other terms are either callables or data, and read as Lissp. @@ -2735,7 +2738,7 @@ except ModuleNotFoundError:pass" (if-else (ands (op#is_ str (type sym)) (re..search ".[:^]" (hissp..demunge sym))) (._rewrite _macro_ - (re..findall "([/&@<>*:]|(?:[^,^`/&@<>*:]|`[,^/&@<>*:])+)(,?\^*)" + (re..findall <<#;([/&@<>*:]|(?:[^,^`/&@<>*:]|`[,^/&@<>*:])+)(,?\^*) (hissp..demunge sym)) : :* (map X#(.^*\# _macro_ X) args)) `(,@(map X#(.^*\# _macro_ X) e)))) diff --git a/src/hissp/reader.py b/src/hissp/reader.py index 321fe894c..9eaf1f902 100644 --- a/src/hissp/reader.py +++ b/src/hissp/reader.py @@ -75,17 +75,16 @@ |['`,] |[.][#] # Any atom that ends in ``#``, but not ``.#`` or ``\#``. - |(?:[^\\ \n"();#]|\\.)*(?:[^.\\ \n"();#]|\\.)[#]+ + |(?:[^\\ \n"|();#]|\\.)*(?:[^.\\ \n"|();#]|\\.)[#]+ ) |(?P - [#]? # raw? " # Open quote. (?:[^"\\] # Any non-magic character. |\\(?:.|\n) # Backslash only if paired, including with newline. )* # Zero or more times. " # Close quote. ) - |(?P + |(?P [|] # open (?:[^|\n] # No newlines or unpaired |. |[|][|] # | only if paired. @@ -94,10 +93,10 @@ ) |(?P [#]?" # String not closed. - |[|] # Injection not closed. |;.* # Comment may need another line. ) - |(?P(?:[^\\ \n"();]|\\.)+) # Let Python deal with it. + |(?P[|]) + |(?P(?:[^\\ \n"|();]|\\.)+) # Let Python deal with it. |(?P.) """ ) @@ -267,8 +266,9 @@ def _parse(self) -> Iterator: elif k == "close": return self._close() elif k == "macro": yield from self._macro(v) elif k == "string": yield self._string(v) - elif k == "injection":yield self._injection(v) + elif k == "fragment": yield self._fragment(v) elif k == "continue": raise self._continue() + elif k == "unclosed": raise SyntaxError("Unpaired |", self.position()) elif k == "atom": yield self.atom(v) else: raise self._error(k) # fmt: on @@ -500,14 +500,11 @@ def escape(atom): @staticmethod def _string(v): - if v[0] == "#": # Let Python process escapes. - v = v.replace("\\\n", "").replace("\n", R"\n") - val = ast.literal_eval(v[1:]) - else: # raw - val = v[1:-1] # Only remove quotes. + v = v.replace("\\\n", "").replace("\n", R"\n") + val = ast.literal_eval(v) return v if (v := pformat(val)).startswith("(") else f"({v})" - def _injection(self, v): + def _fragment(self, v): return v[1:-1].replace("||", "|") def _continue(self): diff --git a/tests/test_cmd.py b/tests/test_cmd.py index 762810b8c..d89271c65 100644 --- a/tests/test_cmd.py +++ b/tests/test_cmd.py @@ -198,17 +198,18 @@ def test_repl_str_continue(): x " - b#"" - b#"foo bar" - b#" + b#.#"" + b#.#"foo bar" + b#.#" " - b#" + b#.#" x" - (.decode b#"\\xff - foo" : errors 'ignore) + (.decode b#.#<<#;\\xff + ;; foo + : errors 'ignore) """, out="""\ """ @@ -220,7 +221,7 @@ def test_repl_str_continue(): #> b'foo bar' #> #..#..#..b'\n\n\n' #> #..#..b'\n\nx' - #> #..'\nfoo' + #> #..#..'\nfoo' #> """, err="""\ """ diff --git a/tests/test_macros.lissp b/tests/test_macros.lissp index 896b61d93..cdd769bee 100644 --- a/tests/test_macros.lissp +++ b/tests/test_macros.lissp @@ -228,7 +228,7 @@ test_string_newline (lambda (self) - (self.assertEqual #"\ + (self.assertEqual "\ foo\ bar\nbaz" "foobar @@ -238,7 +238,7 @@ baz") foo bar " - #"\n\nfoo\nbar\n")) + "\n\nfoo\nbar\n")) test_string_reader_macro (lambda (self)