From 9e24f8ae701c35ce2708e840ba7a92c79b993df7 Mon Sep 17 00:00:00 2001 From: Matt Kantor Date: Sat, 29 Mar 2025 13:48:02 -0400 Subject: [PATCH 1/3] Automatically anchor regular expressions --- src/constructors.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/constructors.ts b/src/constructors.ts index dba2e8f..8ba9c40 100644 --- a/src/constructors.ts +++ b/src/constructors.ts @@ -36,11 +36,13 @@ export const nothing: ParserWhichAlwaysSucceeds = input => output: undefined, }) -export const regularExpression = - (pattern: RegExp): Parser => - input => { - const match = pattern.exec(input) - return match === null || match.index !== 0 +export const regularExpression = (pattern: RegExp): Parser => { + const patternAnchoredToStartOfString = pattern.source.startsWith('^') + ? pattern + : new RegExp(`^${pattern.source}`, pattern.flags) + return input => { + const match = patternAnchoredToStartOfString.exec(input) + return match === null ? either.makeLeft({ input, message: 'input did not match regular expression', @@ -50,3 +52,4 @@ export const regularExpression = output: match[0], }) } +} From e148a0feb3c582cebad202e5be836546572c48b0 Mon Sep 17 00:00:00 2001 From: Matt Kantor Date: Sat, 29 Mar 2025 15:01:12 -0400 Subject: [PATCH 2/3] Add `ParserResult` type constructor --- src/parser.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 780cff8..92a9e26 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -6,14 +6,14 @@ export type InvalidInputError = { readonly message: string } -export type Parser = ( - input: string, -) => Either> +export type Parser = (input: string) => ParserResult export type ParserWhichAlwaysSucceeds = ( input: string, ) => Right> +export type ParserResult = Either> + export type Success = { readonly remainingInput: string readonly output: Output From f3c2f004003a683b0021b9b100b36dff0ca77fc8 Mon Sep 17 00:00:00 2001 From: Matt Kantor Date: Sat, 29 Mar 2025 15:02:35 -0400 Subject: [PATCH 3/3] Try harder to frontload computations for parsers This has noticeable performance gains while informally benchmarking. --- src/combinators.ts | 215 ++++++++++++++++++++++---------------------- src/constructors.ts | 9 +- 2 files changed, 112 insertions(+), 112 deletions(-) diff --git a/src/combinators.ts b/src/combinators.ts index 87736a3..3c09f15 100644 --- a/src/combinators.ts +++ b/src/combinators.ts @@ -1,9 +1,10 @@ -import type { Either } from '@matt.kantor/either' +import type { Either, Right } from '@matt.kantor/either' import * as either from '@matt.kantor/either' import { nothing } from './constructors.js' import type { InvalidInputError, Parser, + ParserResult, ParserWhichAlwaysSucceeds, Success, } from './parser.js' @@ -11,16 +12,16 @@ import type { /** * Substitute the output of a successful parse. */ -export const as = - ( - parser: Parser, - newOutput: NewOutput, - ): Parser => - input => - either.map(parser(input), success => ({ - output: newOutput, - remainingInput: success.remainingInput, - })) +export const as = ( + parser: Parser, + newOutput: NewOutput, +): Parser => { + const replaceOutput = (success: Success) => ({ + output: newOutput, + remainingInput: success.remainingInput, + }) + return input => either.map(parser(input), replaceOutput) +} /** * Attempt to parse input with `parser`. If successful, ensure the same input @@ -31,39 +32,40 @@ export const as = * butNot(anySingleCharacter, literal('a'), 'the letter a') // parses any character besides 'a' * ``` */ -export const butNot = - ( - parser: Parser, - not: Parser, - notName: string, - ): Parser => - input => +export const butNot = ( + parser: Parser, + not: Parser, + notName: string, +): Parser => { + const errorMessage = `input was unexpectedly ${notName}` + return input => either.flatMap(parser(input), success => { const notResult = not(input) if (!either.isLeft(notResult)) { return either.makeLeft({ input, - message: `input was unexpectedly ${notName}`, + message: errorMessage, }) } else { return either.makeRight(success) } }) +} /** * Map the output of `parser` to another `Parser` which is then applied to the * remaining input, flattening the parse results. */ -export const flatMap = - ( - parser: Parser, - f: (output: Output) => Parser, - ): Parser => - input => - either.flatMap(parser(input), success => { - const nextParser = f(success.output) - return nextParser(success.remainingInput) - }) +export const flatMap = ( + parser: Parser, + f: (output: Output) => Parser, +): Parser => { + const applyF = (success: Success) => { + const nextParser = f(success.output) + return nextParser(success.remainingInput) + } + return input => either.flatMap(parser(input), applyF) +} /** * Create a `Parser` from a thunk. This can be useful for recursive parsers. @@ -82,60 +84,61 @@ export const lazy = * lookaheadNot(anySingleCharacter, literal('a'), 'the letter a') // parses the first character of 'ab', but not 'aa' * ``` */ -export const lookaheadNot = - ( - parser: Parser, - notFollowedBy: Parser, - followedByName: string, - ): Parser => - input => +export const lookaheadNot = ( + parser: Parser, + notFollowedBy: Parser, + followedByName: string, +): Parser => { + const errorMessage = `input was unexpectedly followed by ${followedByName}` + return input => either.flatMap(parser(input), success => either.match(notFollowedBy(success.remainingInput), { left: _ => either.makeRight(success), right: _ => either.makeLeft({ input, - message: `input was unexpectedly followed by ${followedByName}`, + message: errorMessage, }), }), ) +} /** * Map the output of `parser` to new output. */ -export const map = - ( - parser: Parser, - f: (output: Output) => NewOutput, - ): Parser => - input => - either.map(parser(input), success => ({ - output: f(success.output), - remainingInput: success.remainingInput, - })) +export const map = ( + parser: Parser, + f: (output: Output) => NewOutput, +): Parser => { + const applyF = (success: Success) => ({ + output: f(success.output), + remainingInput: success.remainingInput, + }) + return input => either.map(parser(input), applyF) +} /** * Apply the given `parsers` to the same input until one succeeds or all fail. */ -export const oneOf = - < - Parsers extends readonly [ - Parser, - Parser, - ...(readonly Parser[]), - ], - >( - parsers: Parsers, - ): Parser> => - input => - parsers.reduce( +export const oneOf = < + Parsers extends readonly [ + Parser, + Parser, + ...(readonly Parser[]), + ], +>( + parsers: Parsers, +): Parser> => { + const [firstParser, ...otherParsers] = parsers + return input => { + const firstResult = firstParser(input) + return otherParsers.reduce( (result: ReturnType>>, parser) => - either.match(result, { - right: either.makeRight, - left: _ => parser(input), - }), - either.makeLeft({ input, message: '' }), // `parsers` is non-empty so this is never returned + either.isLeft(result) ? parser(input) : result, + firstResult, ) + } +} type OneOfOutput[]> = { [Index in keyof Parsers]: OutputOf }[number] @@ -162,32 +165,24 @@ export const sequence = >( parsers: Parsers, ): Parser> => - input => - either.map( - parsers.reduce( - ( - results: ReturnType< - Parser[number][]> - >, - parser, - ) => - either.match(results, { - right: successes => - either.map(parser(successes.remainingInput), newSuccess => ({ - remainingInput: newSuccess.remainingInput, - output: [...successes.output, newSuccess.output], - })), - left: either.makeLeft, - }), - either.makeRight({ remainingInput: input, output: [] }), // `parsers` is non-empty so this is never returned - ), - ({ output, remainingInput }) => ({ - // The above `reduce` callback constructs `output` such that its - // elements align with `Parsers`, but TypeScript doesn't know that. - output: output as SequenceOutput, - remainingInput, - }), + input => { + const parseResult = parsers.reduce( + ( + results: ReturnType[number][]>>, + parser, + ) => + either.isRight(results) + ? either.map(parser(results.value.remainingInput), newSuccess => ({ + remainingInput: newSuccess.remainingInput, + output: [...results.value.output, newSuccess.output], + })) + : results, + either.makeRight({ remainingInput: input, output: [] }), // `parsers` is non-empty so this is never returned ) + // The above `reduce` callback constructs `output` such that its + // elements align with `Parsers`, but TypeScript doesn't know that. + return parseResult as ParserResult> + } type SequenceOutput[]> = { [Index in keyof Parsers]: OutputOf } @@ -195,29 +190,30 @@ type SequenceOutput[]> = { /** * Refine/transform the output of `parser` via a function which may fail. */ -export const transformOutput = - ( - parser: Parser, - f: (output: Output) => Either, - ): Parser => - input => - either.flatMap(parser(input), success => - either.map(f(success.output), output => ({ - output, - remainingInput: success.remainingInput, - })), - ) +export const transformOutput = ( + parser: Parser, + f: (output: Output) => Either, +): Parser => { + const transformation = (success: Success) => + either.map(f(success.output), output => ({ + output, + remainingInput: success.remainingInput, + })) + return input => either.flatMap(parser(input), transformation) +} /** * Repeatedly apply `parser` to the input as long as it keeps succeeding. * Outputs are collected in an array. */ -export const zeroOrMore = - ( - parser: Parser, - ): ParserWhichAlwaysSucceeds => - input => { - const result = oneOf([parser, nothing])(input) +export const zeroOrMore = ( + parser: Parser, +): ParserWhichAlwaysSucceeds => { + const parserOrNothing = oneOf([parser, nothing]) + + // Give this a name so it can be recursively referenced. + const thisParser = (input: string): Right> => { + const result = parserOrNothing(input) const success = either.match(result, { left: _ => ({ output: [], @@ -230,7 +226,7 @@ export const zeroOrMore = remainingInput: lastSuccess.remainingInput, } } else { - const nextResult = zeroOrMore(parser)(lastSuccess.remainingInput) + const nextResult = thisParser(lastSuccess.remainingInput) return { output: [lastSuccess.output, ...nextResult.value.output], remainingInput: nextResult.value.remainingInput, @@ -241,6 +237,9 @@ export const zeroOrMore = return either.makeRight(success) } + return thisParser +} + type OutputOf> = Extract< ReturnType['value'], Success diff --git a/src/constructors.ts b/src/constructors.ts index 8ba9c40..6099860 100644 --- a/src/constructors.ts +++ b/src/constructors.ts @@ -17,9 +17,9 @@ export const anySingleCharacter: Parser = input => { } } -export const literal = - (text: Text): Parser => - input => +export const literal = (text: Text): Parser => { + const errorMessage = `input did not begin with "${text}"` + return input => input.startsWith(text) ? either.makeRight({ remainingInput: input.slice(text.length), @@ -27,8 +27,9 @@ export const literal = }) : either.makeLeft({ input, - message: `input did not begin with "${text}"`, + message: errorMessage, }) +} export const nothing: ParserWhichAlwaysSucceeds = input => either.makeRight({