From 5ca1ef7829c74cb0b0716e7c1b87772b13b6344a Mon Sep 17 00:00:00 2001 From: Mitchell Rosen Date: Fri, 21 Jun 2024 16:24:41 -0400 Subject: [PATCH] delete old doc support from the lexer and parser --- .../src/Unison/Syntax/TermParser.hs | 366 ------------------ unison-syntax/src/Unison/Syntax/Lexer.hs | 30 -- 2 files changed, 396 deletions(-) diff --git a/parser-typechecker/src/Unison/Syntax/TermParser.hs b/parser-typechecker/src/Unison/Syntax/TermParser.hs index 8c91633700..02b6442939 100644 --- a/parser-typechecker/src/Unison/Syntax/TermParser.hs +++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs @@ -19,11 +19,8 @@ import Data.List qualified as List import Data.List.Extra qualified as List.Extra import Data.List.NonEmpty (NonEmpty ((:|))) import Data.List.NonEmpty qualified as NonEmpty -import Data.Maybe qualified as Maybe -import Data.Sequence qualified as Sequence import Data.Set qualified as Set import Data.Text qualified as Text -import Data.Tuple.Extra qualified as TupleE import Text.Megaparsec qualified as P import U.Core.ABT qualified as ABT import Unison.ABT qualified as ABT @@ -56,7 +53,6 @@ import Unison.Type (Type) import Unison.Type qualified as Type import Unison.Typechecker.Components qualified as Components import Unison.Util.Bytes qualified as Bytes -import Unison.Util.List (intercalateMapWith, quenchRuns) import Unison.Var (Var) import Unison.Var qualified as Var import Prelude hiding (and, or, seq) @@ -129,15 +125,6 @@ termLink' = do | Set.size s == 1 -> pure $ const (Set.findMin s) <$> id | otherwise -> customFailure $ UnknownTerm id s -link' :: (Monad m, Var v) => P v m (Either (L.Token Reference) (L.Token Referent)) -link' = do - id <- hqPrefixId - ns <- asks names - case (Names.lookupHQTerm Names.IncludeSuffixes (L.payload id) ns, Names.lookupHQType Names.IncludeSuffixes (L.payload id) ns) of - (s, s2) | Set.size s == 1 && Set.null s2 -> pure . Right $ const (Set.findMin s) <$> id - (s, s2) | Set.size s2 == 1 && Set.null s -> pure . Left $ const (Set.findMin s2) <$> id - (s, s2) -> customFailure $ UnknownId id s s2 - link :: (Monad m, Var v) => TermP v m link = termLink <|> typeLink where @@ -452,7 +439,6 @@ termLeaf = delayQuote, (snd <$> delayBlock), bang, - docBlock, doc2Block <&> \(spanAnn, trm) -> trm {ABT.annotation = ABT.annotation trm <> spanAnn} ] @@ -610,66 +596,6 @@ doc2Block = do pure $ (spanAnn, Term.apps' f [addDelay tm]) _ -> regular -docBlock :: (Monad m, Var v) => TermP v m -docBlock = do - openTok <- openBlockWith "[:" - segs <- many segment - closeTok <- closeBlock - let a = ann openTok <> ann closeTok - pure . docNormalize $ Term.app a (Term.constructor a (ConstructorReference DD.docRef DD.docJoinId)) (Term.list a segs) - where - segment = blob <|> linky - blob = do - s <- string - pure $ - Term.app - (ann s) - (Term.constructor (ann s) (ConstructorReference DD.docRef DD.docBlobId)) - (Term.text (ann s) (L.payload s)) - linky = asum [include, signature, evaluate, source, link] - include = do - _ <- P.try (reserved "include") - hashQualifiedPrefixTerm - signature = do - _ <- P.try (reserved "signature") - tok <- termLink' - pure $ - Term.app - (ann tok) - (Term.constructor (ann tok) (ConstructorReference DD.docRef DD.docSignatureId)) - (Term.termLink (ann tok) (L.payload tok)) - evaluate = do - _ <- P.try (reserved "evaluate") - tok <- termLink' - pure $ - Term.app - (ann tok) - (Term.constructor (ann tok) (ConstructorReference DD.docRef DD.docEvaluateId)) - (Term.termLink (ann tok) (L.payload tok)) - source = do - _ <- P.try (reserved "source") - l <- link'' - pure $ - Term.app - (ann l) - (Term.constructor (ann l) (ConstructorReference DD.docRef DD.docSourceId)) - l - link'' = either ty t <$> link' - where - t tok = - Term.app - (ann tok) - (Term.constructor (ann tok) (ConstructorReference DD.linkRef DD.linkTermId)) - (Term.termLink (ann tok) (L.payload tok)) - ty tok = - Term.app - (ann tok) - (Term.constructor (ann tok) (ConstructorReference DD.linkRef DD.linkTypeId)) - (Term.typeLink (ann tok) (L.payload tok)) - link = d <$> link'' - where - d tm = Term.app (ann tm) (Term.constructor (ann tm) (ConstructorReference DD.docRef DD.docLinkId)) tm - -- Used by unbreakParas within docNormalize. Doc literals are a joined sequence -- segments. This type describes a property of a segment. data UnbreakCase @@ -682,298 +608,6 @@ data UnbreakCase StartsUnindented deriving (Eq, Show) --- Doc literal normalization --- --- This normalization allows the pretty-printer and doc display code to do --- indenting, and to do line-wrap of paragraphs, but without the inserted --- newlines being then frozen into the text for ever more over subsequent --- edit/update cycles. --- --- The alternative would be to stop line-wrapping docs on view/display by adding --- newlines in the pretty-printer, and instead leave wrapping to the --- terminal/editor. Might be worth considering if this code ends up being --- too buggy and fragile to maintain. Maybe display could add newlines, --- and view could refrain from doing so. --- --- Operates on the text of the Blobs within a doc (as parsed by docBlock): --- - reduces the whitespace after all newlines so that at least one of the --- non-initial lines has zero indent (important because the pretty-printer adds --- indenting when displaying doc literals) --- - removes trailing whitespace from each line --- - removes newlines between any sequence of non-empty zero-indent lines --- (i.e. undo line-breaking within paragraphs). --- --- Should be understood in tandem with Util.Pretty.paragraphyText, which --- outputs doc text for display/edit/view. --- See also unison-src/transcripts/doc-formatting.md. --- --- There is some heuristic/approximate logic in here - see the comment flagged --- with ** below. --- --- This function is a bit painful - it's trying to act on a sequence of lines, --- but that sequence is split up between the various blobs in the doc, which --- are separated by the elements tracking things like @[source] etc. It --- would be simplified if the doc representation was something like --- [Either Char EnrichedElement]. --- --- This function has some tracing which you can enable by deleting some calls to --- 'const id' below. -docNormalize :: (Ord v, Show v) => Term v a -> Term v a -docNormalize tm = case tm of - -- This pattern is just `DD.DocJoin seqs`, but exploded in order to grab - -- the annotations. The aim is just to map `normalize` over it. - a@(Term.App' c@(Term.Constructor' (ConstructorReference DD.DocRef DD.DocJoinId)) s@(Term.List' seqs)) -> - join - (ABT.annotation a) - (ABT.annotation c) - (ABT.annotation s) - (normalize seqs) - where - - _ -> error $ "unexpected doc structure: " ++ show tm - where - normalize = - Sequence.fromList - . (map TupleE.fst3) - . (tracing "after unbreakParas") - . unbreakParas - . (tracing "after full preprocess") - . preProcess - . (tracing "after unindent") - . unIndent - . (tracing "initial parse") - . miniPreProcess - preProcess xs = - zip3 - seqs - (lineStarteds $ Sequence.fromList seqs) - (followingLines $ Sequence.fromList seqs) - where - seqs = map fst xs - miniPreProcess seqs = zip (toList seqs) (lineStarteds seqs) - unIndent :: - (Ord v) => - [(Term v a, UnbreakCase)] -> - [(Term v a, UnbreakCase)] - unIndent tms = map go tms - where - go (b, previous) = - ((mapBlob $ (reduceIndent includeFirst minIndent)) b, previous) - where - -- Since previous was calculated before unindenting, it will often be wrongly - -- StartsIndented instead of StartsUnindented - but that's OK just for the test - -- below. And we'll recalculate it later in preProcess. - includeFirst = previous == LineEnds - concatenatedBlobs :: Text - concatenatedBlobs = mconcat (toList (fmap (getBlob . fst) tms)) - getBlob (DD.DocBlob txt) = txt - getBlob _ = "." - -- Note we exclude the first line when calculating the minimum indent - the lexer - -- already stripped leading spaces from it, and anyway it would have been sharing - -- its line with the [: and maybe other stuff. - nonInitialNonEmptyLines = - filter (not . Text.null) $ - map Text.stripEnd $ - drop 1 $ - Text.lines - concatenatedBlobs - minIndent = - minimumOrZero $ - map - (Text.length . (Text.takeWhile Char.isSpace)) - nonInitialNonEmptyLines - minimumOrZero xs = if length xs == 0 then 0 else minimum xs - reduceIndent :: Bool -> Int -> Text -> Text - reduceIndent includeFirst n t = - fixup $ - Text.unlines $ - mapExceptFirst reduceLineIndent onFirst $ - Text.lines t - where - onFirst = if includeFirst then reduceLineIndent else id - reduceLineIndent l = result - where - currentIndent = Text.length $ (Text.takeWhile Char.isSpace) l - remainder = (Text.dropWhile Char.isSpace) l - newIndent = maximum [0, currentIndent - n] - result = Text.replicate newIndent " " `mappend` remainder - -- unlines . lines adds a trailing newline if one was not present: undo that. - fixup = if Text.takeEnd 1 t == "\n" then id else Text.dropEnd 1 - -- Remove newlines between any sequence of non-empty zero-indent lines. - -- This is made more complicated by Doc elements (e.g. links) which break up a - -- blob but don't break a line of output text**. We sometimes need to refer back to the - -- previous blob to see whether a newline is between two zero-indented lines. - -- For example... - -- "This link to @foo makes it harder to see\n - -- that the newline should be removed." - -- Whether an element does this (breaks a blob but not a line of output text) really - -- depends on some things we don't know here: does an @[include] target doc occupy - -- just one line or several; whether this doc is going to be viewed or displayed. - -- So we'll get it wrong sometimes. The impact of this is that we may sometimes - -- misjudge whether a newline is separating two non-indented lines, and should therefore - -- be removed. - unbreakParas :: - (Show v, Ord v) => - [(Term v a, UnbreakCase, Bool)] -> - [(Term v a, UnbreakCase, Bool)] - unbreakParas = map go - where - -- 'candidate' means 'candidate to be joined with an adjacent line as part of a - -- paragraph'. - go (b, previous, nextIsCandidate) = - (mapBlob go b, previous, nextIsCandidate) - where - go txt = if Text.null txt then txt else tr result' - where - tr = - const id $ - trace $ - "\nprocessElement on blob " - ++ (show txt) - ++ ", result' = " - ++ (show result') - ++ ", lines: " - ++ (show ls) - ++ ", candidates = " - ++ (show candidates) - ++ ", previous = " - ++ (show previous) - ++ ", firstIsCandidate = " - ++ (show firstIsCandidate) - ++ "\n\n" - -- remove trailing whitespace - -- ls is non-empty thanks to the Text.null check above - -- Don't cut the last line's trailing whitespace - there's an assumption here - -- that it's followed by something which will put more text on the same line. - ls = mapExceptLast Text.stripEnd id $ Text.lines txt - -- Work out which lines are candidates to be joined as part of a paragraph, i.e. - -- are not indented. - candidate l = case Text.uncons l of - Just (initial, _) -> not . Char.isSpace $ initial - Nothing -> False -- empty line - -- The segment of this blob that runs up to the first newline may not itself - -- be the start of a line of the doc - for example if it's preceded by a link. - -- So work out whether the line of which it is a part is a candidate. - firstIsCandidate = case previous of - LineEnds -> candidate (head ls) - StartsIndented -> False - StartsUnindented -> True - candidates = firstIsCandidate : (tail (map candidate ls)) - result = mconcat $ intercalateMapWith sep fst (zip ls candidates) - sep (_, candidate1) (_, candidate2) = - if candidate1 && candidate2 then " " else "\n" - -- Text.lines forgets whether there was a trailing newline. - -- If there was one, then either add it back or convert it to a space. - result' = - if (Text.takeEnd 1 txt) == "\n" - then - if (last candidates) && nextIsCandidate - then result `Text.append` " " - else result `Text.append` "\n" - else result - -- A list whose entries match those of tms. `Nothing` is used for elements - -- which just continue a line, and so need to be ignored when looking back - -- for how the last line started. Otherwise describes whether the last - -- line of this entry is indented (or maybe terminated by a newline.) - -- A value of `Nothing` protects ensuing text from having its leading - -- whitespace removed by `unindent`. - -- Note that some elements render over multiple lines when displayed. - -- See test2 in transcript doc-formatting.md for an example of how - -- this looks when there is whitespace immediately following @[source] - -- or @[evaluate]. - lastLines :: (Show v) => Sequence.Seq (Term v a) -> [Maybe UnbreakCase] - lastLines tms = (flip fmap) (toList tms) $ \case - DD.DocBlob txt -> unbreakCase txt - DD.DocLink _ -> Nothing - DD.DocSource _ -> Nothing - DD.DocSignature _ -> Nothing - DD.DocEvaluate _ -> Nothing - Term.Var' _ -> Nothing -- @[include] - e@_ -> error ("unexpected doc element: " ++ show e) - -- Work out whether the last line of this blob is indented (or maybe - -- terminated by a newline.) - unbreakCase :: Text -> Maybe UnbreakCase - unbreakCase txt = - let (startAndNewline, afterNewline) = Text.breakOnEnd "\n" txt - in if Text.null startAndNewline - then Nothing - else - if Text.null afterNewline - then Just LineEnds - else - if Char.isSpace (Text.head afterNewline) - then Just StartsIndented - else Just StartsUnindented - -- A list whose entries match those of tms. Describes how the current - -- line started (the line including the start of this entry) - or LineEnds - -- if this entry is starting a line itself. - -- Calculated as the UnbreakCase of the previous entry that included a newline. - -- Really there's a function of type (a -> Bool) -> a -> [a] -> [a] in here - -- fighting to break free - overwriting elements that are 'shadowed' by - -- a preceding element for which the predicate is true, with a copy of - -- that element. - lineStarteds :: (Show v) => Sequence.Seq (Term v a) -> [UnbreakCase] - lineStarteds tms = tr $ quenchRuns LineEnds StartsUnindented $ xs'' - where - tr = - const id $ - trace $ - "lineStarteds: xs = " - ++ (show xs) - ++ ", xss = " - ++ (show xss) - ++ ", xs' = " - ++ (show xs') - ++ ", xs'' = " - ++ (show xs'') - ++ "\n\n" - -- Make sure there's a Just at the start of the list so we always find - -- one when searching back. - -- Example: xs = [J1,N2,J3] - xs :: [Maybe UnbreakCase] - xs = Just LineEnds : (lastLines tms) - -- Example: xss = [[J1],[J1,N2],[J1,N2,J3]] - xss :: [[Maybe UnbreakCase]] - xss = drop 1 $ List.inits xs - -- Example: after each step of the map... - -- [[J1],[N2,J1],[J3,N2,J1]] -- after reverse - -- [Just J1, Just J1, Just J3] -- after find - -- ... - -- result = [1,1,3] - xs' = - map (Maybe.fromJust . Maybe.fromJust . (List.find isJust) . reverse) xss - xs'' = List.Extra.dropEnd 1 xs' - -- For each element, can it be a line-continuation of a preceding blob? - continuesLine :: Sequence.Seq (Term v a) -> [Bool] - continuesLine tms = (flip fmap) (toList tms) \case - DD.DocBlob _ -> False -- value doesn't matter - you don't get adjacent blobs - DD.DocLink _ -> True - DD.DocSource _ -> False - DD.DocSignature _ -> False - DD.DocEvaluate _ -> False - Term.Var' _ -> False -- @[include] - _ -> error ("unexpected doc element" ++ show tm) - -- A list whose entries match those of tms. Can the subsequent entry by a - -- line continuation of this one? - followingLines tms = drop 1 ((continuesLine tms) ++ [False]) - mapExceptFirst :: (a -> b) -> (a -> b) -> [a] -> [b] - mapExceptFirst fRest fFirst = \case - [] -> [] - x : rest -> (fFirst x) : (map fRest rest) - mapExceptLast fRest fLast = reverse . (mapExceptFirst fRest fLast) . reverse - tracing :: (Show a) => [Char] -> a -> a - tracing when x = - (const id $ trace ("at " ++ when ++ ": " ++ (show x) ++ "\n")) x - blob aa ac at txt = - Term.app aa (Term.constructor ac (ConstructorReference DD.docRef DD.docBlobId)) (Term.text at txt) - join aa ac as segs = - Term.app aa (Term.constructor ac (ConstructorReference DD.docRef DD.docJoinId)) (Term.list' as segs) - mapBlob :: (Ord v) => (Text -> Text) -> Term v a -> Term v a - -- this pattern is just `DD.DocBlob txt` but exploded to capture the annotations as well - mapBlob f (aa@(Term.App' ac@(Term.Constructor' (ConstructorReference DD.DocRef DD.DocBlobId)) at@(Term.Text' txt))) = - blob (ABT.annotation aa) (ABT.annotation ac) (ABT.annotation at) (f txt) - mapBlob _ t = t - delayQuote :: (Monad m, Var v) => TermP v m delayQuote = P.label "quote" do start <- reserved "'" diff --git a/unison-syntax/src/Unison/Syntax/Lexer.hs b/unison-syntax/src/Unison/Syntax/Lexer.hs index e17074b519..1c85ee2519 100644 --- a/unison-syntax/src/Unison/Syntax/Lexer.hs +++ b/unison-syntax/src/Unison/Syntax/Lexer.hs @@ -406,7 +406,6 @@ lexemes' eof = toks :: P [Token Lexeme] toks = doc2 - <|> doc <|> token numeric <|> token character <|> reserved @@ -835,35 +834,6 @@ lexemes' eof = where final = last ts - doc :: P [Token Lexeme] - doc = open <+> (CP.space *> fmap fixup body) <+> (close <* space) - where - open = token'' (\t _ _ -> t) $ tok (Open <$> lit "[:") - close = tok (Close <$ lit ":]") - at = lit "@" - -- this removes some trailing whitespace from final textual segment - fixup [] = [] - fixup (Token (Textual (reverse -> txt)) start stop : []) = - [Token (Textual txt') start stop] - where - txt' = reverse (dropWhile (\c -> isSpace c && not (c == '\n')) txt) - fixup (h : t) = h : fixup t - - body :: P [Token Lexeme] - body = txt <+> (atk <|> pure []) - where - ch = (":]" <$ lit "\\:]") <|> ("@" <$ lit "\\@") <|> (pure <$> P.anySingle) - txt = tok (Textual . join <$> P.manyTill ch (P.lookAhead sep)) - sep = void at <|> void close - ref = at *> (tok identifierLexemeP <|> docTyp) - atk = (ref <|> docTyp) <+> body - docTyp = do - _ <- lit "[" - typ <- tok (P.manyTill P.anySingle (P.lookAhead (lit "]"))) - _ <- lit "]" *> CP.space - t <- tok identifierLexemeP - pure $ (fmap Reserved <$> typ) <> t - blank = separated wordySep do _ <- char '_'