diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4efb5e10..9fd85bce5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -222,7 +222,7 @@ jobs: - name: Run LinkChecker (local links only) if: github.event_name == 'push' || github.event_name == 'pull_request' run: | - linkchecker './_out/html-multi/' + linkchecker --config=.linkchecker/linkcheckerrc './_out/html-multi/' # This saved number is used by a workflow_run trigger. It # manages labels that indicate the status of the built HTML. diff --git a/.linkchecker/linkcheckerrc b/.linkchecker/linkcheckerrc new file mode 100644 index 000000000..0cd632270 --- /dev/null +++ b/.linkchecker/linkcheckerrc @@ -0,0 +1,2 @@ +[filtering] +ignorewarnings=url-content-too-large,url-too-long diff --git a/Manual.lean b/Manual.lean index 9363e4be2..0ecc1a3f2 100644 --- a/Manual.lean +++ b/Manual.lean @@ -20,6 +20,7 @@ import Manual.Simp import Manual.Grind import Manual.VCGen import Manual.BasicTypes +import Manual.Iterators import Manual.BasicProps import Manual.NotationsMacros import Manual.IO @@ -118,6 +119,7 @@ draft := true {docstring Dynamic.get?} +{include 0 Manual.Iterators} # Standard Library %%% @@ -345,6 +347,32 @@ Std.ExtHashSet Std.TreeMap Std.DTreeMap Std.TreeSet +Std.Iterators +Std.Iterators.Iter +Std.Iterators.Iter.Equiv +Std.Iterators.Iter.TerminationMeasures +Std.Iterators.IterM +Std.Iterators.IterM.Equiv +Std.Iterators.IterM.TerminationMeasures +Std.Iterators.Iterator +Std.Iterators.IteratorAccess +Std.Iterators.IteratorLoop +Std.Iterators.IteratorLoopPartial +Std.Iterators.Finite +Std.Iterators.Productive +Std.Iterators.PostconditionT +Std.Iterators.HetT +Std.PRange +Std.PRange.UpwardEnumerable +Std.Rco +Std.Rcc +Std.Rci +Std.Roo +Std.Roc +Std.Roi +Std.Rio +Std.Ric +Std.Rii ``` ```exceptions @@ -836,6 +864,17 @@ Prod.lex Prod.Lex ``` +```exceptions +Std.Iterators.Iter.instForIn' +Std.Iterators.Iter.step_filter +Std.Iterators.Iter.val_step_filter +``` + +```exceptions +Std.Iterators.IterM.instForIn' +Std.Iterators.IterM.toListRev.go +``` + ```exceptions Lean.Elab.Tactic.elabSetOption diff --git a/Manual/BasicTypes.lean b/Manual/BasicTypes.lean index 1c4590698..70ead1819 100644 --- a/Manual/BasicTypes.lean +++ b/Manual/BasicTypes.lean @@ -25,6 +25,7 @@ import Manual.BasicTypes.List import Manual.BasicTypes.Maps import Manual.BasicTypes.Subtype import Manual.BasicTypes.Thunk +import Manual.BasicTypes.Range open Manual.FFIDocType @@ -319,6 +320,8 @@ Most comparisons on Booleans should be performed using the {inst}`DecidableEq Bo {include 0 Manual.BasicTypes.ByteArray} +{include 0 Manual.BasicTypes.Range} + {include 0 Manual.BasicTypes.Maps} {include 0 Manual.BasicTypes.Subtype} diff --git a/Manual/BasicTypes/Array.lean b/Manual/BasicTypes/Array.lean index 5c922ceb6..4db659a6e 100644 --- a/Manual/BasicTypes/Array.lean +++ b/Manual/BasicTypes/Array.lean @@ -336,6 +336,14 @@ tag := "array-api" ## Iteration +{docstring Array.iter} + +{docstring Array.iterFromIdx} + +{docstring Array.iterM} + +{docstring Array.iterFromIdxM} + {docstring Array.foldr} {docstring Array.foldrM} diff --git a/Manual/BasicTypes/List.lean b/Manual/BasicTypes/List.lean index 7de1efc40..079cc24c7 100644 --- a/Manual/BasicTypes/List.lean +++ b/Manual/BasicTypes/List.lean @@ -254,6 +254,10 @@ tag := "list-api-reference" ## Iteration +{docstring List.iter} + +{docstring List.iterM} + {docstring List.forA} {docstring List.forM} diff --git a/Manual/BasicTypes/Maps.lean b/Manual/BasicTypes/Maps.lean index 7c763767f..099a158f9 100644 --- a/Manual/BasicTypes/Maps.lean +++ b/Manual/BasicTypes/Maps.lean @@ -449,6 +449,12 @@ $_ ~m $_ ## Iteration +{docstring Std.HashMap.iter} + +{docstring Std.HashMap.keysIter} + +{docstring Std.HashMap.valuesIter} + {docstring Std.HashMap.map} {docstring Std.HashMap.fold} @@ -571,6 +577,12 @@ $_ ~m $_ ## Iteration +{docstring Std.DHashMap.iter} + +{docstring Std.DHashMap.keysIter} + +{docstring Std.DHashMap.valuesIter} + {docstring Std.DHashMap.map} {docstring Std.DHashMap.fold} @@ -812,6 +824,8 @@ $_ ~m $_ ## Iteration +{docstring Std.HashSet.iter} + {docstring Std.HashSet.all} {docstring Std.HashSet.any} @@ -972,6 +986,12 @@ The declarations in this section should be imported using `import Std.DTreeMap`. ## Iteration +{docstring Std.DTreeMap.iter} + +{docstring Std.DTreeMap.keysIter} + +{docstring Std.DTreeMap.valuesIter} + {docstring Std.DTreeMap.map} {docstring Std.DTreeMap.foldl} diff --git a/Manual/BasicTypes/Maps/TreeMap.lean b/Manual/BasicTypes/Maps/TreeMap.lean index 99de615a7..2cb7ce19e 100644 --- a/Manual/BasicTypes/Maps/TreeMap.lean +++ b/Manual/BasicTypes/Maps/TreeMap.lean @@ -219,6 +219,12 @@ The declarations in this section should be imported using `import Std.TreeMap`. # Iteration +{docstring Std.TreeMap.iter} + +{docstring Std.TreeMap.keysIter} + +{docstring Std.TreeMap.valuesIter} + {docstring Std.TreeMap.map} {docstring Std.TreeMap.all} diff --git a/Manual/BasicTypes/Maps/TreeSet.lean b/Manual/BasicTypes/Maps/TreeSet.lean index 7e70c0183..077fca594 100644 --- a/Manual/BasicTypes/Maps/TreeSet.lean +++ b/Manual/BasicTypes/Maps/TreeSet.lean @@ -127,6 +127,8 @@ tag := "TreeSet" # Iteration +{docstring Std.TreeSet.iter} + {docstring Std.TreeSet.all} {docstring Std.TreeSet.any} diff --git a/Manual/BasicTypes/Range.lean b/Manual/BasicTypes/Range.lean new file mode 100644 index 000000000..731778bfe --- /dev/null +++ b/Manual/BasicTypes/Range.lean @@ -0,0 +1,701 @@ +/- +Copyright (c) 2025 Lean FRO LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: David Thrane Christiansen +-/ + +import VersoManual + +import Manual.Meta +import Manual.Interaction.FormatRepr + +open Lean.MessageSeverity + +open Verso.Genre Manual +open Verso.Genre.Manual.InlineLean + +set_option pp.rawOnError true +set_option format.width 60 + +#doc (Manual) "Ranges" => +%%% +tag := "ranges" +%%% + +A {deftech}_range_ represents a series of consecutive elements of some type, from a lower bound to an upper bound. +The bounds may be open, in which case the bound value is not part of the range, or closed, in which case the bound value is part of the range. +Either bound may be omitted, in which case the range extends infinitely in the corresponding direction. + +Ranges have dedicated syntax that consists of a starting point, {keyword}`...`, and an ending point. +The starting point may be either `*`, which denotes a range that continues infinitely downwards, or a term, which denotes a range with a specific starting value. +By default, ranges are left-closed: they contain their starting points. +A trailing `<` indicates that the range is left-open and does not contain its starting point. +The ending point may be `*`, in which case the range continues infinitely upwards, or a term, which denotes a range with a specific ending value. +By default, ranges are right-open: they do not contain their ending points. +The ending point may be prefixed with `<` to indicate that it is right-open; this is the default and does not change the meaning, but may be easier to read. +It may also be prefixed with `=` to indicate that the range is right-closed and contains its ending point. + + +:::example "Ranges of Natural Numbers" +The range that contains the numbers {lean}`3` through {lean}`6` can be written in a variety of ways: +```lean (name := rng1) +#eval (3...7).toList +``` +```leanOutput rng1 +[3, 4, 5, 6] +``` +```lean (name := rng2) +#eval (3...=6).toList +``` +```leanOutput rng2 +[3, 4, 5, 6] +``` +```lean (name := rng3) +#eval (2<...=6).toList +``` +```leanOutput rng3 +[3, 4, 5, 6] +``` +::: + +:::example "Finite and Infinite Ranges" +This range cannot be converted to a list, because it is infinite: +```lean (name := rng4) +error +#eval (3...*).toList +``` +Finiteness of a left-closed, right-unbounded range is indicated by the presence of an instance of {name}`Std.Rxi.IsAlwaysFinite`, which does not exist for {name}`Nat`. +{name}`Std.Rco` is the type of these ranges, and the name {name}`Std.Rxi.IsAlwaysFinite` indicates that it determines finiteness for all right-unbounded ranges. +```leanOutput rng4 +failed to synthesize + Std.Rxi.IsAlwaysFinite Nat + +Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command. +``` + +Attempting to enumerate the negative integers leads to a similar error, this time because there is no way to determine the least element: +```lean (name := intrange) +error +#eval (*...(0 : Int)).toList +``` +```leanOutput intrange +failed to synthesize + Std.PRange.Least? Int + +Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command. +``` + +Unbounded ranges in finite types indicate that the range extends to the greatest element of the type. +Because {name}`UInt8` has 256 elements, this range contains 253 elements: +```lean (name := uintrange) +#eval ((3 : UInt8)...*).toArray.size +``` +```leanOutput uintrange +253 +``` + +::: + + + +:::syntax term (title := "Range Syntax") + +This range is left-closed, right-open, and indicates {name}`Std.Rco`: +```grammar +$a...$b +``` + +This range is left-closed, right-open, and indicates {name}`Std.Rco`: +```grammar +$a...<$b +``` + +This range is left-closed, right-closed, and indicates {name}`Std.Rcc`: +```grammar +$a...=$b +``` + +This range is left-closed, right-infinite, and indicates {name}`Std.Rci`: +```grammar +$a...* +``` + +This range is left-open, right-open, and indicates {name}`Std.Roo`: +```grammar +$a<...$b +``` + +This range is left-open, right-open, and indicates {name}`Std.Roo`: +```grammar +$a<...<$b +``` + +This range is left-open, right-closed, and indicates {name}`Std.Roc`: +```grammar +$a<...=$b +``` +This range is left-open, right-infinite, and indicates {name}`Std.Roi`: +```grammar +$a<...* +``` +This range is left-infinite, right-open, and indicates {name}`Std.Rio`: +```grammar +*...$b +``` + +This range is left-infinite, right-open, and indicates {name}`Std.Ric`: +```grammar +*...<$b +``` + +This range is left-infinite, right-closed, and indicates {name}`Std.Ric`: +```grammar +*...=$b +``` + +This range is infinite on both sides, and indicates {name}`Std.Rii`: +```grammar +*...* +``` +::: + +# Range Types + +{docstring Std.Rco +allowMissing} + +{docstring Std.Rco.iter} + +{docstring Std.Rco.toArray} + +{docstring Std.Rco.toList} + +{docstring Std.Rco.size} + +{docstring Std.Rco.isEmpty} + +{docstring Std.Rcc +allowMissing} + +{docstring Std.Rcc.iter} + +{docstring Std.Rcc.toArray} + +{docstring Std.Rcc.toList} + +{docstring Std.Rcc.size} + +{docstring Std.Rcc.isEmpty} + +{docstring Std.Rci +allowMissing} + +{docstring Std.Rci.iter} + +{docstring Std.Rci.toArray} + +{docstring Std.Rci.toList} + +{docstring Std.Rci.size} + +{docstring Std.Rci.isEmpty} + +{docstring Std.Roo +allowMissing} + +{docstring Std.Roo.iter} + +{docstring Std.Roo.toArray} + +{docstring Std.Roo.toList} + +{docstring Std.Roo.size} + +{docstring Std.Roo.isEmpty} + +{docstring Std.Roc +allowMissing} + +{docstring Std.Roc.iter} + +{docstring Std.Roc.toArray} + +{docstring Std.Roc.toList} + +{docstring Std.Roc.size} + +{docstring Std.Roc.isEmpty} + +{docstring Std.Roi +allowMissing} + +{docstring Std.Roi.iter} + +{docstring Std.Roi.toArray} + +{docstring Std.Roi.toList} + +{docstring Std.Roi.size} + +{docstring Std.Roi.isEmpty} + +{docstring Std.Rio +allowMissing} + +{docstring Std.Rio.iter} + +{docstring Std.Rio.toArray} + +{docstring Std.Rio.toList} + +{docstring Std.Rio.size} + +{docstring Std.Rio.isEmpty} + +{docstring Std.Ric +allowMissing} + +{docstring Std.Ric.iter} + +{docstring Std.Ric.toArray} + +{docstring Std.Ric.toList} + +{docstring Std.Ric.size} + +{docstring Std.Ric.isEmpty} + +{docstring Std.Rii} + +{docstring Std.Rii.iter} + +{docstring Std.Rii.toArray} + +{docstring Std.Rii.toList} + +{docstring Std.Rii.size} + +{docstring Std.Rii.isEmpty} + +# Range-Related Type Classes + +{docstring Std.PRange.UpwardEnumerable} + +{docstring Std.PRange.UpwardEnumerable.LE} + +{docstring Std.PRange.UpwardEnumerable.LT} + +{docstring Std.PRange.LawfulUpwardEnumerable} + +{docstring Std.PRange.Least?} + +{docstring Std.PRange.InfinitelyUpwardEnumerable +allowMissing} + +{docstring Std.PRange.LinearlyUpwardEnumerable +allowMissing} + +{docstring Std.Rxi.IsAlwaysFinite +allowMissing} + +{docstring Std.Rxi.HasSize} + +{docstring Std.Rxc.IsAlwaysFinite +allowMissing} + +{docstring Std.Rxc.HasSize} + +# Implementing Ranges + +The built-in range types may be used with any type, but their usefulness depends on the presence of certain type class instances. +Generally speaking, ranges are either checked for membership, enumerated or iterated over. +To check whether an value is contained in a range, {name}`DecidableLT` and {name}`DecidableLE` instances are used to compare the value to the range's respective open and closed endpoints. +To get an iterator for a range, instances of {name}`Std.PRange.UpwardEnumerable` and {name}`Std.PRange.LawfulUpwardEnumerable` are all that's needed. +To iterate directly over it in a {keywordOf Lean.Parser.Term.doFor}`for` loop, {name}`Std.PRange.LawfulUpwardEnumerableLE` and {name}`Std.PRange.LawfulUpwardEnumerableLT` are required as well. +To enumerate a range (e.g. by calling {name Std.Rco.toList}`toList`), it must be proven finite. +This is done by supplying instances of {name}`Std.Rxi.IsAlwaysFinite`, {name}`Std.Rxc.IsAlwaysFinite`, or {name}`Std.Rxo.IsAlwaysFinite`. + +::::example "Implementing Ranges" (open := true) +The enumeration type {name}`Day` represents the days of the week: +```lean +inductive Day where + | mo | tu | we | th | fr | sa | su +deriving Repr +``` + +:::paragraph +```imports -show +import Std.Data.Iterators +``` + +While it's already possible to use this type in ranges, they're not particularly useful. +There's no membership instance: +```lean +error (name := noMem) +#eval Day.we ∈ (Day.mo...=Day.fr) +``` +```leanOutput noMem +failed to synthesize + Membership Day (Std.Rcc Day) + +Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command. +``` +Ranges can't be iterated over: +```lean +error (name := noIter) +#eval show IO Unit from + for d in Day.mo...=Day.fr do + IO.println s!"It's {repr d}" +``` +```leanOutput noIter +failed to synthesize instance for 'for_in%' notation + ForIn (EIO IO.Error) (Std.Rcc Day) ?m.11 +``` +Nor can they be enumerated, even though the type is finite: +```lean +error (name := noEnum) +#eval (Day.sa...*).toList +``` +```leanOutput noEnum +failed to synthesize + Std.PRange.UpwardEnumerable Day + +Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command. +``` +::: + +:::paragraph +Membership tests require {name}`DecidableLT` and {name}`DecidableLE` instances. +An easy way to get these is to number each day, and compare the numbers: +```lean +def Day.toNat : Day → Nat + | mo => 0 + | tu => 1 + | we => 2 + | th => 3 + | fr => 4 + | sa => 5 + | su => 6 + +instance : LT Day where + lt d1 d2 := d1.toNat < d2.toNat + +instance : LE Day where + le d1 d2 := d1.toNat ≤ d2.toNat + +instance : DecidableLT Day := + fun d1 d2 => inferInstanceAs (Decidable (d1.toNat < d2.toNat)) + +instance : DecidableLE Day := + fun d1 d2 => inferInstanceAs (Decidable (d1.toNat ≤ d2.toNat)) +``` +::: + +:::paragraph +With these instances available, membership tests work as expected: +```lean +def Day.isWeekday (d : Day) : Bool := d ∈ Day.mo...Day.sa +``` +```lean (name := thursday) +#eval Day.th.isWeekday +``` +```leanOutput thursday +true +``` +```lean (name := saturday) +#eval Day.sa.isWeekday +``` +```leanOutput saturday +false +``` +::: + +:::paragraph +Iteration and enumeration are both variants on repeatedly applying a successor function until either the upper bound of the range or the largest element of the type is reached. +This successor function is {name}`Std.PRange.UpwardEnumerable.succ?`. +It's also convenient to have a definition of the function in {name}`Day`'s namespace for use with generalized field notation: +```lean +def Day.succ? : Day → Option Day + | mo => some tu + | tu => some we + | we => some th + | th => some fr + | fr => some sa + | sa => some su + | su => none + +instance : Std.PRange.UpwardEnumerable Day where + succ? := Day.succ? +``` +::: + +Iteration also requires a proof that the implementation of {name Std.PRange.UpwardEnumerable.succ?}`succ?` is sensible. +Its properties are expressed in terms of {name}`Std.PRange.UpwardEnumerable.succMany?`, which iterates the application of {name Std.PRange.UpwardEnumerable.succ?}`succ?` a certain number of times and has a default implementation in terms of {name}`Nat.repeat` and {name Std.PRange.UpwardEnumerable.succ?}`succ?`. +In particular, an instance of {name Std.PRange.LawfulUpwardEnumerable}`LawfulUpwardEnumerable` requires proofs that {name}`Std.PRange.UpwardEnumerable.succMany?` corresponds to the default implementation along with a proof that repeatedly applying the successor never yields the same element again. + +:::paragraph +The first step is to write two helper lemmas for the two proofs about {name Std.PRange.UpwardEnumerable.succMany?}`succMany?`. +While they could be written inline in the instance declaration, it's convenient for them to have the {attrs}`@[simp]` attribute. +```lean +@[simp] +theorem Day.succMany?_zero (d : Day) : + Std.PRange.succMany? 0 d = some d := by + simp [Std.PRange.succMany?, Nat.repeat] + +@[simp] +theorem Day.succMany?_add_one (n : Nat) (d : Day) : + Std.PRange.succMany? (n + 1) d = + (Std.PRange.succMany? n d).bind Std.PRange.succ? := by + simp [Std.PRange.succMany?, Nat.repeat, Std.PRange.succ?] +``` + +Proving that there are no cycles in successor uses a convenient helper lemma that calculates the number of successor steps between any two days. +It is marked {attrs}`@[grind →]` because when assumptions that match its premises are present, it adds a great deal of new information: +```lean +@[grind →] +theorem Day.succMany?_steps {d d' : Day} {steps} : + Std.PRange.succMany? steps d = some d' → + if d ≤ d' then steps = d'.toNat - d.toNat + else False := by + intro h + match steps with + | 0 | 1 | 2 | 3 | 4 | 5 | 6 => + cases d <;> cases d' <;> + simp_all +decide [Std.PRange.succMany?, Nat.repeat, Day.succ?] + | n + 7 => + simp at h + cases h' : (Std.PRange.succMany? n d) with + | none => + simp_all + | some d'' => + rw [h'] at h + cases d'' <;> contradiction +``` +With that helper, the proof is quite short: +```lean +instance : Std.PRange.LawfulUpwardEnumerable Day where + ne_of_lt d1 d2 h := by grind [Std.PRange.UpwardEnumerable.LT] + succMany?_zero := Day.succMany?_zero + succMany?_add_one := Day.succMany?_add_one +``` +::: + +:::paragraph +Proving the three kinds of enumerable ranges to be finite makes it possible to enumerate ranges of days: +```lean +instance : Std.Rxo.IsAlwaysFinite Day where + finite init hi := ⟨7, by cases init <;> simp [Std.PRange.succ?, Day.succ?]⟩ + +instance : Std.Rxc.IsAlwaysFinite Day where + finite init hi := ⟨7, by cases init <;> simp [Std.PRange.succ?, Day.succ?]⟩ + +instance : Std.Rxi.IsAlwaysFinite Day where + finite init := ⟨7, by cases init <;> rfl⟩ +``` +```lean (name := allWeekdays) +def allWeekdays : List Day := (Day.mo...Day.sa).toList +#eval allWeekdays +``` +```leanOutput allWeekdays +[Day.mo, Day.tu, Day.we, Day.th, Day.fr] +``` +Adding a {name}`Std.PRange.Least?` instance allows enumeration of left-unbounded ranges: +```lean (name := allWeekdays') +instance : Std.PRange.Least? Day where + least? := some .mo + +def allWeekdays' : List Day := (*...Day.sa).toList +#eval allWeekdays' +``` +```leanOutput allWeekdays' +[Day.mo, Day.tu, Day.we, Day.th, Day.fr] +``` +It's also possible to create an iterator that can be enumerated, but it can't yet be used with {keywordOf Lean.Parser.Term.doFor}`for`: +```lean (name := iterEnum) +#eval (Day.we...Day.fr).iter.toList +``` +```leanOutput iterEnum +[Day.we, Day.th] +``` +```lean (name := iterForNo) +error +#eval show IO Unit from do + for d in (Day.mo...Day.th).iter do + IO.println s!"It's {repr d}." +``` +```leanOutput iterForNo +failed to synthesize instance for 'for_in%' notation + ForIn (EIO IO.Error) (Std.Iter Day) ?m.12 +``` + +::: + +:::paragraph +The last step to enable iteration, thus making ranges of days fully-featured, is to prove that the less-than and less-than-or-equal-to relations on {name}`Day` correspond to the notions of inequality that are derived from iterating the successor function. +This is captured in the classes {name}`Std.PRange.LawfulUpwardEnumerableLT` and {name}`Std.PRange.LawfulUpwardEnumerableLE`, which require that the two notions are logically equivalent: +```lean +instance : Std.PRange.LawfulUpwardEnumerableLT Day where + lt_iff d1 d2 := by + constructor + . intro lt + simp only [Std.PRange.UpwardEnumerable.LT, Day.succMany?_add_one] + exists d2.toNat - d1.toNat.succ + cases d1 <;> cases d2 <;> + simp_all [Day.toNat, Std.PRange.succ?, Day.succ?] <;> + contradiction + . intro ⟨steps, eq⟩ + have := Day.succMany?_steps eq + cases d1 <;> cases d2 <;> + simp only [if_false_right] at this <;> + cases this <;> first | decide | contradiction + +instance : Std.PRange.LawfulUpwardEnumerableLE Day where + le_iff d1 d2 := by + constructor + . intro le + simp only [Std.PRange.UpwardEnumerable.LE] + exists d2.toNat - d1.toNat + cases d1 <;> cases d2 <;> + simp_all [Day.toNat, Std.PRange.succ?, Day.succ?] <;> + contradiction + . intro ⟨steps, eq⟩ + have := Day.succMany?_steps eq + cases d1 <;> cases d2 <;> + simp only [if_false_right] at this <;> + cases this <;> grind +``` +::: + +:::paragraph +It is now possible to iterate over ranges of days: +```lean (name := done) +#eval show IO Unit from do + for x in (Day.mo...Day.th).iter do + IO.println s!"It's {repr x}" +``` +```leanOutput done +It's Day.mo +It's Day.tu +It's Day.we +``` +::: + +:::: + +# Ranges and Slices + +Range syntax can be used with data structures that support slicing to select a slice of the structure. + +:::example "Slicing Lists" +Lists may be sliced with any of the interval types: +```lean +def groceries := + ["apples", "bananas", "coffee", "dates", "endive", "fennel"] +``` + +```lean (name := rco) +#eval groceries[1...4] |>.toList +``` +```leanOutput rco +["bananas", "coffee", "dates"] +``` +```lean (name := rcc) +#eval groceries[1...=4] |>.toList +``` +```leanOutput rcc +["bananas", "coffee", "dates", "endive"] +``` +```lean (name := rci) +#eval groceries[1...*] |>.toList +``` +```leanOutput rci +["bananas", "coffee", "dates", "endive", "fennel"] +``` +```lean (name := roo) +#eval groceries[1<...4] |>.toList +``` +```leanOutput roo +["coffee", "dates"] +``` +```lean (name := roc) +#eval groceries[1<...=4] |>.toList +``` +```leanOutput roc +["coffee", "dates", "endive"] +``` +```lean (name := ric) +#eval groceries[*...=4] |>.toList +``` +```leanOutput ric +["apples", "bananas", "coffee", "dates", "endive"] +``` +```lean (name := rio) +#eval groceries[*...4] |>.toList +``` +```leanOutput rio +["apples", "bananas", "coffee", "dates"] +``` +```lean (name := rii) +#eval groceries[*...*] |>.toList +``` +```leanOutput rii +["apples", "bananas", "coffee", "dates", "endive", "fennel"] +``` + + +::: + +:::example "Custom Slices" +A {name}`Triple` contains three values of the same type: +```lean +structure Triple (α : Type u) where + fst : α + snd : α + thd : α +deriving Repr +``` +Positions in a triple may be any of the fields, or just after {name Triple.thd}`thd`: +```lean +inductive TriplePos where + | fst | snd | thd | done +deriving Repr +``` +A slice of a triple consists of a triple, a starting position, and a stopping position. +The starting position is inclusive, and the stopping position exclusive: +```lean +structure TripleSlice (α : Type u) where + triple : Triple α + start : TriplePos + stop : TriplePos +deriving Repr +``` +Ranges of {name}`TriplePos` can be used to select a slice from a triple by implementing instances of each supported range type's {name Std.Rco.Sliceable}`Sliceable` class. +For example, {name}`Std.Rco.Sliceable` allows left-closed, right-open ranges to be used to slice {name}`Triple`s: +```lean +instance : Std.Rco.Sliceable (Triple α) TriplePos (TripleSlice α) where + mkSlice triple range := + { triple, start := range.lower, stop := range.upper } +``` +```lean (name := slice) +def abc : Triple Char := ⟨'a', 'b', 'c'⟩ + +open TriplePos in +#eval abc[snd...thd] +``` +```leanOutput slice +{ triple := { fst := 'a', snd := 'b', thd := 'c' }, start := TriplePos.snd, stop := TriplePos.thd } +``` +Infinite ranges have only a lower bound: +```lean (name := slice2) +instance : Std.Rci.Sliceable (Triple α) TriplePos (TripleSlice α) where + mkSlice triple range := + { triple, start := range.lower, stop := .done } + +open TriplePos in +#eval abc[snd...*] +``` +```leanOutput slice2 +{ triple := { fst := 'a', snd := 'b', thd := 'c' }, start := TriplePos.snd, stop := TriplePos.done } +``` + +::: + +{docstring Std.Rco.Sliceable +allowMissing} + +{docstring Std.Rcc.Sliceable +allowMissing} + +{docstring Std.Rci.Sliceable +allowMissing} + +{docstring Std.Roo.Sliceable +allowMissing} + +{docstring Std.Roc.Sliceable +allowMissing} + +{docstring Std.Roi.Sliceable +allowMissing} + +{docstring Std.Rio.Sliceable +allowMissing} + +{docstring Std.Ric.Sliceable +allowMissing} + +{docstring Std.Rii.Sliceable +allowMissing} diff --git a/Manual/Iterators.lean b/Manual/Iterators.lean new file mode 100644 index 000000000..f12b18ba0 --- /dev/null +++ b/Manual/Iterators.lean @@ -0,0 +1,1168 @@ +/- +Copyright (c) 2025 Lean FRO LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: David Thrane Christiansen +-/ + +import VersoManual +import Std.Data.Iterators +import Std.Data.TreeMap + +import Manual.Meta +import Manual.Interaction.FormatRepr + +open Lean.MessageSeverity + +open Verso.Genre Manual +open Verso.Genre.Manual.InlineLean + +set_option pp.rawOnError true + +open Std.Iterators +open Std (TreeMap) + +#doc (Manual) "Iterators" => +%%% +tag := "iterators" +%%% + +An {deftech}_iterator_ provides sequential access to each element of some source of data. +Typical iterators allow the elements in a collection, such as a list, array, or {name Std.TreeMap}`TreeMap` to be accessed one by one, but they can also provide access to data by carrying out some {tech (key := "monad")}[monadic] effect, such as reading files. +Iterators provide a common interface to all of these operations. +Code that is written to the iterator API can be agnostic as to the source of the data. + +Each iterator maintains an internal state that enables it to determine the next value. +Because Lean is a pure functional language, consuming an iterator does not invalidate it, but instead copies it with an updated state. +As usual, {tech (key := "reference count")}[reference counting] is used to optimize programs that use values only once into programs that destructively modify values. + +To use iterators, import {module}`Std.Data.Iterators`. + +:::example "Mixing Collections" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +Combining a list and an array using {name}`List.zip` or {name}`Array.zip` would ordinarily require converting one of them into the other collection. +Using iterators, they can be processed without conversion: +```lean (name := zip) +def colors : Array String := #["purple", "gray", "blue"] +def codes : List String := ["aa27d1", "a0a0a0", "0000c5"] + +#eval colors.iter.zip codes.iter |>.toArray +``` +```leanOutput zip +#[("purple", "aa27d1"), ("gray", "a0a0a0"), ("blue", "0000c5")] +``` +::: + +::::example "Avoiding Intermediate Structures" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +:::paragraph +In this example, an array of colors and a list of color codes are combined. +The program separates three intermediate stages: +1. The names and codes are combined into pairs. +2. The pairs are transformed into readable strings. +3. The strings are combined with newlines. +```lean (name := intermediate) +def colors : Array String := #["purple", "gray", "blue"] + +def codes : List String := ["aa27d1", "a0a0a0", "0000c5"] + +def go : IO Unit := do + let colorCodes := colors.iter.zip codes.iter + let colorCodes := colorCodes.map fun (name, code) => + s!"{name} ↦ #{code}" + let colorCodes := colorCodes.fold (init := "") fun x y => + if x.isEmpty then y else x ++ "\n" ++ y + IO.println colorCodes + +#eval go +``` +```leanOutput intermediate +purple ↦ #aa27d1 +gray ↦ #a0a0a0 +blue ↦ #0000c5 +``` +::: + +The intermediate stages of the computation do not allocate new data structures. +Instead, all the steps of the transformation are fused into a single loop, with {name}`Iter.fold` carrying out one step at a time. +In each step, a single color and color code are combined into a pair, rewritten to a string, and added to the result string. +:::: + +The Lean standard library provides three kinds of iterator operations. +{deftech}_Producers_ create a new iterator from some source of data. +They determine which data is to be returned by an iterator, and how this data is to be computed, but they are not in control of _when_ the computations occur. +{deftech}_Consumers_ use the data in an iterator for some purpose. +Consumers request the iterator's data, and the iterator computes only enough data to satisfy a consumer's requests. +{deftech (key := "iterator combinator")}_Combinators_ are both consumers and producers: they create new iterators from existing iterators. +Examples include {name}`Iter.map` and {name}`Iter.filter`. +The resulting iterators produce data by consuming their underlying iterators, and do not actually iterate over the underlying collection until they themselves are consumed. + + +:::keepEnv +```lean -show +/-- A collection type. -/ +structure Coll : Type u where +/-- The elements of the collection `Coll`. -/ +structure Elem : Type u where +/-- Returns an iterator for `c`. -/ +def Coll.iter (c : Coll) := (#[].iter : Iter Elem) +``` +Each built-in collection for which it makes sense to do so can be iterated over. +In other words, the collection libraries include iterator {tech}[producers]. +By convention, a collection type {name}`Coll` provides a function {name}`Coll.iter` that returns an iterator over the elements of a collection. +Examples include {name}`List.iter`, {name}`Array.iter`, and {name}`TreeMap.iter`. +Additionally, other built-in types such as ranges support iteration using the same convention. +::: + +# Run-Time Considerations + +For many use cases, using iterators can give a performance benefit by avoiding allocating intermediate data structures. +Without iterators, zipping a list with an array requires first converting one of them to the other type, allocating an intermediate structure, and then using the appropriate {name List.zip}`zip` function. +Using iterators, the intermediate structure can be avoided. + +When an iterator is consumed, the resulting computation should be thought of as a single loop, even if the iterator itself is built using combinators from a number of underlying iterators. +One step of the loop may carry out multiple steps from the underlying iterators. +In many cases, the Lean compiler can optimize iterator computations, removing the intermediate overhead, but this is not guaranteed. +When profiling shows that significant time is taken by a tight loop that involves multiple sources of data, it can be necessary to inspect the compiler's IR to see whether the iterators' operations were fused. +In particular, if the IR contains many pattern matches over steps, then it can be a sign of a failure to inline or specialize. +If this is the case, it may be necessary to write a tail-recursive function by hand rather than using the higher-level API. + +# Iterator Definitions + +Iterators may be either monadic or pure, and they may be finite, productive, or potentially infinite. +{deftech (key:="monadic iterator")}_Monadic_ iterators use side effects in some {tech}[monad] to emit each value, and must therefore be used in the monad, while {deftech (key:="pure iterator")}_pure_ iterators do not require side effects. +For example, iterating over all files in a directory requires the {name}`IO` monad. +Pure iterators have type {name}`Iter`, while monadic iterators are represented by {name}`IterM`. + +{docstring Iter} + +{docstring IterM} + +The types {name}`Iter` and {name}`IterM` are merely wrappers around an internal state. +This inner state type is the implicit parameter to the iterator types. +For basic producer iterators, like the one that results from {name}`List.iter`, this type is fairly simple; however, iterators that result from {tech (key := "iterator combinator")}[combinators] use polymorphic state types that can grow large. +Because Lean elaborates the specified return type of a function before elaborating its body, it may not be possible to automatically determine the internal state type of an iterator type returned by a function. +In these cases, it can be helpful to omit the return type from the signature and instead place a type annotation on the definition's body, which allows the specific iterator combinators invoked from the body to be used to determine the state type. + +:::example "Iterator State Types" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` + +Writing the internal state type explicitly for list and array iterators is feasible: +```lean +def reds := ["red", "crimson"] + +example : @Iter (ListIterator String) String := reds.iter + +example : @Iter (ArrayIterator String) String := reds.toArray.iter +``` +However, the internal state type of a use of the {name}`Iter.map` combinator is quite complicated: +```lean +example : + @Iter + (Map (ListIterator String) Id Id @id fun x : String => + pure x.length) + Nat := + reds.iter.map String.length +``` +Omitting the state type leads to an error: +```lean +error (name := noStateType) +example : Iter Nat := reds.iter.map String.length +``` +```leanOutput noStateType +don't know how to synthesize implicit argument `α` + @Iter ?m.1 Nat +context: +⊢ Type + +Note: Because this declaration's type has been explicitly provided, all parameter types and holes (e.g., `_`) in its header are resolved before its body is processed; information from the declaration body cannot be used to infer what these values should be +``` +Rather than writing the state type by hand, it can be convenient to omit the return type and instead provide the annotation around the term: +```lean +example := (reds.iter.map String.length : Iter Nat) + +example := + show Iter Nat from + reds.iter.map String.length +``` +::: + +The actual process of iteration consists of producing a sequence of iteration steps when requested. +Each step returns an updated iterator with a new internal state along with either a data value (in {name}`IterStep.yield`), an indicator that the caller should request a data value again ({name}`IterStep.skip`), or an indication that iteration is finished ({name}`IterStep.done`). +Without the ability to {name IterStep.skip}`skip`, it would be much more difficult to work with iterator combinators such as {name}`Iter.filter` that do not yield values for all of those yielded by the underlying iterator. +With {name IterStep.skip}`skip`, the implementation of {name Iter.filter}`filter` doesn't need to worry about whether the underlying iterator is {tech (key:="finite iterator")}[finite] in order to be a well-defined function, and reasoning about its finiteness can be carried out in separate proofs. +Additionally, {name Iter.filter}`filter` would require an inner loop, which is much more difficult for the compiler to inline. + +{docstring IterStep} + +Steps taken by {name}`Iter` and {name}`IterM` are respectively represented by the types {name}`Iter.Step` and {name}`IterM.Step`. +Both types of step are wrappers around {name}`IterStep` that include {ref "iterator-plausibility"}[additional proofs] that are used to track termination behavior. + +{docstring Iter.Step} + +{docstring IterM.Step} + +Steps are produced from iterators using {name}`Iterator.step`, which is a method of the {name}`Iterator` type class. +{name}`Iterator` is used for both pure and monadic iterators; pure iterators can be completely polymorphic in the choice of monad, which allows callers to instantiate it with {name}`Id`. + +{docstring Iterator +allowMissing} + +## Plausibility +%%% +tag := "iterator-plausibility" +%%% + +In addition to the step function, instances of {name}`Iterator` include a relation {name}`Iterator.IsPlausibleStep`. +This relation exists because most iterators both maintain invariants over their internal state and yield values in a predictable manner. +For example, array iterators track both an array and a current index into it. +Stepping an array iterator results in an iterator over the same underlying array; it yields a value when the index is small enough, or is done otherwise. +The {deftech}_plausible steps_ from an iterator state are those which are related to it via the iterator's implementation of {name Iterator.IsPlausibleStep}`IsPlausibleStep`. +Tracking plausibility at the logical level makes it feasible to reason about termination behavior for monadic iterators. + +Both {name}`Iter.Step` and {name}`IterM.Step` are defined in terms of {name}`PlausibleIterStep`; thus, both types can be used with {tech}[leading dot notation] for its namespace. +An {name}`Iter.Step` or {name}`IterM.Step` can be analyzed using the three {ref "match_pattern-functions"}[match pattern functions] {name}`PlausibleIterStep.yield`, {name}`PlausibleIterStep.skip`, and {name}`PlausibleIterStep.done`. +These functions pair the information in the underlying {name}`IterStep` with the surrounding proof object. + +{docstring PlausibleIterStep} + +{docstring PlausibleIterStep.yield} + +{docstring PlausibleIterStep.skip} + +{docstring PlausibleIterStep.done} + +## Finite and Productive Iterators + +:::paragraph +Not all iterators are guaranteed to return a finite number of results; it is perfectly sensible to iterate over all of the natural numbers. +Similarly, not all iterators are guaranteed to either return a single result or terminate; iterators may be defined using arbitrary programs. +Thus, Lean divides iterators into three termination classes: +* {deftech (key:="finite iterator")}_Finite_ iterators are guaranteed to finish iterating after a finite number of steps. These iterators have a {name}`Finite` instance. +* {deftech (key:="productive iterator")}_Productive_ iterators are guaranteed to yield a value or terminate in finitely many steps, but they may yield infinitely many values. These iterators have a {name}`Productive` instance. +* All other iterators, whose termination behavior is unknown. These iterators have neither instance. + +All finite iterators are necessarily productive. +::: + +{docstring Finite +allowMissing} + +{docstring Productive +allowMissing} + +Sometimes, a needed {name}`Finite` instance is not available because an iterator has not yet been proved finite. +In these cases, {name}`Iter.allowNontermination` can be used to bypass a finiteness requirement. + +{docstring Iter.allowNontermination} + +{docstring IterM.allowNontermination} + +::::example "Iterating Over `Nat`" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +:::paragraph +To write an iterator that yields each natural number in turn, the first step is to implement its internal state. +This iterator only needs to remember the next natural number: +```lean +structure Nats where + next : Nat +``` +::: +:::paragraph +This iterator will only ever yield the next natural number. +Thus, its step function will never return {name IterStep.skip}`skip` or {name IterStep.done}`done`. +Whenever it yields a value, the value will be the internal state's {name Nats.next}`next` field, and the successor iterator's {name Nats.next}`next` field will be one greater. +The {tactic}`grind` tactic suffices to show that the step is indeed plausible: +```lean +instance [Pure m] : Iterator Nats m Nat where + IsPlausibleStep it + | .yield it' n => + n = it.internalState.next ∧ + it'.internalState.next = n + 1 + | _ => False + step it := + let n := it.internalState.next + pure <| .deflate <| + .yield { it with internalState.next := n + 1 } n (by grind) +``` +::: + +:::paragraph +```lean -show +section +variable [Pure m] [inst : Iterator Nats m Nat] (it it' : IterM (α := Nats) m Nat) +``` +This {name Iterator.step}`step` function is productive because it never returns {name IterStep.skip}`skip`. +Thus, the proof that each chain of {name IterStep.skip}`skip`s has finite length can rely on the fact that when {lean}`it` is a {name}`Nats` iterator, {lean}`Iterator.IsPlausibleStep it (.skip it') = False`: +```lean -show +end +``` +```lean +instance [Pure m] : Productive Nats m where + wf := .intro <| fun _ => .intro _ nofun +``` +Because there are infinitely many {name}`Nat`s, the iterator is not finite. +::: + + +:::paragraph +A {name}`Nats` iterator can be created using this function: +```lean +def Nats.iter : Iter (α := Nats) Nat := + toIterM { next := 0 } Id Nat |>.toIter +``` +::: + +:::paragraph +This iterator is useful with combinators such as {name}`Iter.zip`: +```lean (name := natzip) +#eval show IO Unit from do + let xs : List String := ["cat", "dog", "pachycephalosaurus"] + for (x, y) in Nats.iter.zip xs.iter do + IO.println s!"{x}: {y}" +``` +```leanOutput natzip +0: cat +1: dog +2: pachycephalosaurus +``` +::: +:::: + +::::example "Iterating Over Triples" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +The type {name}`Triple` contains three values of the same type: +```lean +structure Triple α where + fst : α + snd : α + thd : α +``` + +The internal state of an iterator over {name}`Triple` can consist of a triple paired with a current position. +This position may either be one of the fields or an indication that iteration is finished. +```lean +inductive TriplePos where + | fst | snd | thd | done +``` + +Positions can be used to look up elements: + +```lean +def Triple.get? (xs : Triple α) (pos : TriplePos) : Option α := + match pos with + | .fst => some xs.fst + | .snd => some xs.snd + | .thd => some xs.thd + | _ => none +``` + +Each field's position has a successor position: +```lean +@[grind, grind cases] +inductive TriplePos.Succ : TriplePos → TriplePos → Prop where + | fst : Succ .fst .snd + | snd : Succ .snd .thd + | thd : Succ .thd .done +``` + +The iterator itself pairs a triple with the position of the next element: +```lean +structure TripleIterator α where + triple : Triple α + pos : TriplePos +``` + +Iteration begins at {name TriplePos.fst}`fst`: +```lean +def Triple.iter (xs : Triple α) : Iter (α := TripleIterator α) α := + toIterM {triple := xs, pos := .fst : TripleIterator α} Id α |>.toIter +``` + +There are two plausible steps: either the iterator's position has a successor, in which case the next iterator is one that points at the same triple with the successor position, or it does not, in which case iteration is complete. +```lean +@[grind] +inductive TripleIterator.IsPlausibleStep : + @IterM (TripleIterator α) m α → + IterStep (@IterM (TripleIterator α) m α) α → + Prop where + | yield : + it.internalState.triple = it'.internalState.triple → + it.internalState.pos.Succ it'.internalState.pos → + it.internalState.triple.get? it.internalState.pos = some out → + IsPlausibleStep it (.yield it' out) + | done : + it.internalState.pos = .done → + IsPlausibleStep it .done +``` + +The corresponding step function yields the iterator and value describe by the relation: +```lean +instance [Pure m] : Iterator (TripleIterator α) m α where + IsPlausibleStep := TripleIterator.IsPlausibleStep + step + | ⟨xs, pos⟩ => + pure <| .deflate <| + match pos with + | .fst => .yield ⟨xs, .snd⟩ xs.fst ?_ + | .snd => .yield ⟨xs, .thd⟩ xs.snd ?_ + | .thd => .yield ⟨xs, .done⟩ xs.thd ?_ + | .done => .done <| ?_ +where finally + all_goals grind [Triple.get?] +``` + +This iterator cannot yet be converted to an array, because it is missing a {name}`Finite` instance and an {name}`IteratorCollect` instance: +```lean +def abc : Triple Char := ⟨'a', 'b', 'c'⟩ +``` +```lean (name := noAbc) +error +#eval abc.iter.toArray +``` +```leanOutput noAbc +failed to synthesize + Finite (TripleIterator Char) Id + +Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command. +``` + +To prove finiteness, it's easiest to start at {name}`TriplePos.done` and work backwards toward {name}`TriplePos.fst`, showing that each position in turn has a finite chain of successors: + +```lean +@[grind! .] +theorem acc_done [Pure m] : + Acc (IterM.IsPlausibleSuccessorOf (m := m)) + ⟨{ triple, pos := .done : TripleIterator α}⟩ := + Acc.intro _ fun + | _, ⟨_, ⟨_, h⟩⟩ => by + cases h <;> grind [IterStep.successor_done] + +@[grind! .] +theorem acc_thd [Pure m] : + Acc (IterM.IsPlausibleSuccessorOf (m := m)) + ⟨{ triple, pos := .thd : TripleIterator α}⟩ := + Acc.intro _ fun + | ⟨{ triple, pos }⟩, ⟨h, h', h''⟩ => by + cases h'' <;> grind [IterStep.successor_yield] + +@[grind! .] +theorem acc_snd [Pure m] : + Acc (IterM.IsPlausibleSuccessorOf (m := m)) + ⟨{ triple, pos := .snd : TripleIterator α}⟩ := + Acc.intro _ fun + | ⟨{ triple, pos }⟩, ⟨h, h', h''⟩ => by + cases h'' <;> grind [IterStep.successor_yield] + +@[grind! .] +theorem acc_fst [Pure m] : + Acc (IterM.IsPlausibleSuccessorOf (m := m)) + ⟨{ triple, pos := .fst : TripleIterator α}⟩ := + Acc.intro _ fun + | ⟨{ triple, pos }⟩, ⟨h, h', h''⟩ => by + cases h'' <;> grind [IterStep.successor_yield] + +instance [Pure m] : Finite (TripleIterator α) m where + wf := .intro <| fun + | { internalState := { triple, pos } } => by + cases pos <;> grind +``` + +With the {name}`Finite` instance in place, the default implementation of {name}`IteratorCollect` can be used: + +```lean +instance [Iterator (TripleIterator α) m α] [Monad n] : + IteratorCollect (TripleIterator α) m n := + IteratorCollect.defaultImplementation +``` + +{name}`Iter.toArray` now works: +```lean (name := abcToArray) +#eval abc.iter.toArray +``` +```leanOutput abcToArray +#['a', 'b', 'c'] +``` + +To enable the iterator in {keywordOf Lean.Parser.Term.doFor}`for` loops, instances of {name}`IteratorLoopPartial` and {name}`IteratorLoop` are needed: +```lean +instance [Monad m] [Monad n] : + IteratorLoopPartial (TripleIterator α) m n := + .defaultImplementation + +instance [Monad m] [Monad n] : + IteratorLoop (TripleIterator α) m n := + .defaultImplementation +``` +```lean (name := abc) +#eval show IO Unit from do + for x in abc.iter do + IO.println x +``` +```leanOutput abc +a +b +c +``` +:::: + +::::example "Iterators and Effects" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +One way to iterate over the contents of a file is to read a specified number of bytes from a {name IO.FS.Stream}`Stream` at each step. +When EOF is reached, the iterator can close the file by letting its reference count drop to zero: +```lean +structure FileIterator where + stream? : Option IO.FS.Stream + count : USize := 8192 +``` + +An iterator can be created by opening a file and converting its handle to a stream: +```lean +def iterFile + (path : System.FilePath) + (count : USize := 8192) : + IO (IterM (α := FileIterator) IO ByteArray) := do + let h ← IO.FS.Handle.mk path .read + let stream? := some (IO.FS.Stream.ofHandle h) + return toIterM { stream?, count } IO ByteArray +``` + +For this iterator, a {name IterStep.yield}`yield` is plausible when the file is still open, and {name IterStep.done}`done` is plausible when the file is closed. +The actual step function performs a read and closes the file if no bytes were returned: +```lean +instance : Iterator FileIterator IO ByteArray where + IsPlausibleStep it + | .yield .. => + it.internalState.stream?.isSome + | .skip .. => False + | .done => it.internalState.stream?.isNone + step it := do + let { stream?, count } := it.internalState + match stream? with + | none => return .deflate <| .done rfl + | some stream => + let bytes ← stream.read count + let it' := + { it with internalState.stream? := + if bytes.size == 0 then none else some stream + } + return .deflate <| .yield it' bytes (by grind) +``` + +To use it in loops, {name}`IteratorLoop` and {name}`IteratorLoopPartial` instances will be necessary. +In practice, the latter is most important: because file streams may be infinite, the iterator itself may be infinite. +```lean +instance [Monad n] : IteratorLoop FileIterator IO n := + .defaultImplementation + +instance [Monad n] : IteratorLoopPartial FileIterator IO n := + .defaultImplementation +``` + +This is enough support code to use the iterator to calculate file sizes: +```lean +def fileSize (name : System.FilePath) : IO Nat := do + let mut size := 0 + let f := (← iterFile name).allowNontermination + for bytes in f do + size := size + bytes.size + return size +``` + +:::: + +## Accessing Elements + +Some iterators support efficient random access. +For example, an array iterator can skip any number of elements in constant time by incrementing the index that it maintains into the array. + +{docstring IteratorAccess +allowMissing} + +{docstring IterM.nextAtIdx?} + +## Loops + +{docstring IteratorLoop +allowMissing} + +{docstring IteratorLoop.defaultImplementation} + +{docstring LawfulIteratorLoop +allowMissing} + +{docstring IteratorLoopPartial +allowMissing} + +{docstring IteratorLoopPartial.defaultImplementation} + +## Universe Levels + +To make the {tech}[universe levels] of iterators more flexible, a wrapper type {name Std.Shrink}`Shrink` is applied around the result of {name}`Iterator.step`. +This type is presently a placeholder. +It is present to reduce the scope of the breaking change when the full implementation is available. + +{docstring Std.Shrink} + +{docstring Std.Shrink.inflate} + +{docstring Std.Shrink.deflate} + + +## Basic Iterators + +In addition to the iterators provided by collection types, there are two basic iterators that are not connected to any underlying data structure. +{name}`Iter.empty` finishes iteration immediately after yielding no data, and {name}`Iter.repeat` yields the same element forever. +These iterators are primarily useful as parts of larger iterators built with combinators. + +{docstring Iter.empty} + +{docstring IterM.empty} + +{docstring Iter.repeat} + + +# Consuming Iterators + +:::paragraph +There are three primary ways to consume an iterator: + +: Converting it to a sequential data structure + + The functions {name}`Iter.toList`, {name}`Iter.toArray`, and their monadic equivalents {name}`IterM.toList` and {name}`IterM.toArray`, construct a lists or arrays that contain the values from the iterator, in order. + Only {tech}[finite iterators] can be converted to sequential data structures. + +: {keywordOf Lean.Parser.Term.doFor}`for` loops + + A {keywordOf Lean.Parser.Term.doFor}`for` loop can consume an iterator, making each value available in its body. + This requires that the iterator have either an instance of {name}`IteratorLoop` or {name}`IteratorLoopPartial` for the loop's monad. + +: Stepping through iterators + + Iterators can provide their values one-by-one, with client code explicitly requesting each new value in turn. + When stepped through, iterators perform only enough computation to yield the requested value. +::: + + +:::example "Converting Iterators to Lists" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +In {name}`countdown`, an iterator over a range is transformed into an iterator over strings using {name}`Iter.map`. +This call to {name}`Iter.map` does not result in any iteration over the range until {name}`Iter.toList` is called, at which point each element of the range is produced and transformed into a string. +```lean (name := toListEx) +def countdown : String := + let steps : Iter String := (0...10).iter.map (s!"{10 - ·}!\n") + String.join steps.toList + +#eval IO.println countdown +``` +```leanOutput toListEx +10! +9! +8! +7! +6! +5! +4! +3! +2! +1! +``` +::: + +:::example "Converting Infinite Iterators to Lists" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +Attempting to construct a list of all the natural numbers from an iterator fails: +```lean (name := toListInf) +error -keep +def allNats : List Nat := + let steps : Iter Nat := (0...*).iter + steps.toList +``` +The resulting error message states that there is no {name}`Finite` instance: +```leanOutput toListInf +failed to synthesize + Finite (Std.Rxi.Iterator Nat) Id + +Hint: Additional diagnostic information may be available using the `set_option diagnostics true` command. +``` +If the failure to synthesize the instance is due to a missing proof, or if an infinite loop is desirable for an application, then the fact that consuming the iterator may not terminate can be hidden using {name}`Iter.allowNontermination`: +```lean +def allNats : List Nat := + let steps : Iter Nat := (0...*).iter + steps.allowNontermination.toList +``` + +::: + +:::example "Consuming Iterators in Loops" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +This program creates an iterator of strings from a range, and then consumes the strings in a {keywordOf Lean.Parser.Term.doFor}`for` loop: +```lean (name := iterFor) +def countdown (n : Nat) : IO Unit := do + let steps : Iter String := (0...n).iter.map (s!"{n - ·}!") + for i in steps do + IO.println i + IO.println "Blastoff!" + +#eval countdown 5 +``` +```leanOutput iterFor +5! +4! +3! +2! +1! +Blastoff! +``` +::: + +:::example "Consuming Iterators Directly" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +The function {name}`countdown` calls the range iterator's {name Iter.step}`step` function directly, handling each of the three possible cases. +```lean +def countdown (n : Nat) : IO Unit := do + let steps : Iter Nat := (0...n).iter + go steps +where + go iter := do + match iter.step with + | .done _ => pure () + | .skip iter' _ => go iter' + | .yield iter' i _ => do + IO.println s!"{i}!" + if i == 2 then + IO.println s!"Almost there..." + go iter' + termination_by iter.finitelyManySteps +``` +::: + +## Stepping Iterators + +Iterators are manually stepped using {name}`Iter.step` or {name}`IterM.step`. + +{docstring Std.Iterators.Iter.step} + +{docstring Std.Iterators.IterM.step} + +### Termination + +When manually stepping an finite iterator, the termination measures {name Iter.finitelyManySteps}`finitelyManySteps` and {name Iter.finitelyManySkips}`finitelyManySkips` can be used to express that each step brings iteration closer to the end. +The proof automation for {ref "well-founded-recursion"}[well-founded recursion] is pre-configured to prove that recursive calls after steps reduce these measures. + +:::example "Finitely Many Skips" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +This function returns the first element of an iterator, if there is one, or {name}`none` otherwise. +Because the iterator must be productive, it is guaranteed to return an element after at most a finite number of {name PlausibleIterStep.skip}`skip`s. +This function terminates even for infinite iterators. +```lean +def getFirst [Iterator α Id β] [Productive α Id] + (it : @Iter α β) : Option β := + match it.step with + | .done .. => none + | .skip it' .. => getFirst it' + | .yield _ x .. => pure x +termination_by it.finitelyManySkips +``` +::: + +{docstring Iter.finitelyManySteps} + +{docstring IterM.finitelyManySteps} + +{docstring IterM.TerminationMeasures.Finite +allowMissing} + +{docstring Iter.finitelyManySkips} + +{docstring IterM.finitelyManySkips} + +{docstring IterM.TerminationMeasures.Productive +allowMissing} + +## Consuming Pure Iterators + +{docstring Iter.fold} + +{docstring Iter.foldM} + +{docstring Iter.count} + +{docstring Iter.any} + +{docstring Iter.anyM} + +{docstring Iter.all} + +{docstring Iter.allM} + +{docstring Iter.find? +allowMissing} + +{docstring Iter.findM? +allowMissing} + +{docstring Iter.findSome? +allowMissing} + +{docstring Iter.findSomeM? +allowMissing} + +{docstring Iter.atIdx?} + +{docstring Iter.atIdxSlow?} + +## Consuming Monadic Iterators + +{docstring IterM.drain} + +{docstring IterM.fold} + +{docstring IterM.foldM} + +{docstring IterM.count} + +{docstring IterM.any} + +{docstring IterM.anyM} + +{docstring IterM.all} + +{docstring IterM.allM} + +{docstring IterM.find? +allowMissing} + +{docstring IterM.findM? +allowMissing} + +{docstring IterM.findSome? +allowMissing} + +{docstring IterM.findSomeM? +allowMissing} + +{docstring IterM.atIdx?} + +## Collectors + +Collectors consume an iterator, returning all of its data in a list or array. +To be collected, an iterator must be finite and have an {name}`IteratorCollect` or {name}`IteratorCollectPartial` instance. + +{docstring Iter.toArray} + +{docstring IterM.toArray} + +{docstring Iter.toList} + +{docstring IterM.toList} + +{docstring Iter.toListRev} + +{docstring IterM.toListRev} + +{docstring IteratorCollect} + +{docstring IteratorCollect.defaultImplementation} + +{docstring LawfulIteratorCollect +allowMissing} + +{docstring IteratorCollectPartial} + +{docstring IteratorCollectPartial.defaultImplementation} + + +# Iterator Combinators + +The documentation for iterator combinators often includes {deftech}_marble diagrams_ that show the relationship between the elements returned by the underlying iterators and the elements returned by the combinator's iterator. +Marble diagrams provide examples, not full specifications. +These diagrams consist of a number of rows. +Each row shows an example of an iterator's output, where `-` indicates a {name PlausibleIterStep.skip}`skip`, a term indicates a value returned with {name PlausibleIterStep.yield}`yield`, and `⊥` indicates the end of iteration. +Spaces indicate that iteration did not occur. +Unbound identifiers in the marble diagram stand for arbitrary values of the iterator's element type. + + +Vertical alignment in the marble diagram indicates a causal relationship: when two elements are aligned, it means that consuming the iterator in the lower row results in the upper rows being consumed. +In particular, consuming up to the $`n`th column of the lower iterator results in the consumption of the first $`n` columns from the upper iterator. + +:::paragraph +A marble diagram for an identity iterator combinator that returns each element from the underlying iterator looks like this: +``` +it ---a-----b---c----d⊥ +it.id ---a-----b---c----d⊥ +``` +::: +:::paragraph +A marble diagram for an iterator combinator that duplicates each element of the underlying iterator looks like this: +``` +it ---a ---b ---c ---d⊥ +it.double ---a-a---b-b---c-c---d-d⊥ +``` +::: +:::paragraph +The marble diagram for {name}`Iter.filter` shows how some elements of the underlying iterator do not occur in the filtered iterator, but also that stepping the filtered iterator results in a {name PlausibleIterStep.skip}`skip` when the underlying iterator returns a value that doesn't satisfy the predicate: +``` +it ---a--b--c--d-e--⊥ +it.filter ---a-----c-------⊥ +``` +The diagram requires an explanatory note: +> (given that `f a = f c = true` and `f b = f d = d e = false`) +::: +:::paragraph +The diagram for {name}`Iter.zip` shows how consuming the combined iterator consumes the underlying iterators: +``` +left --a ---b --c +right --x --y --⊥ +left.zip right -----(a, x)------(b, y)-----⊥ +``` +The zipped iterator emits {name PlausibleIterStep.skip}`skip`s so long as `left` does. +When `left` emits `a`, the zipped iterator emits one more {name PlausibleIterStep.skip}`skip`. +After this, the zipped iterator switches to consuming `right`, and it emits {name PlausibleIterStep.skip}`skip`s so long as `right` does. +When `right` emits `x`, the zipped iterator emits the pair `(a, x)`. +This interleaving of `left` and `right` continues until one of them stops, at which point the zipped iterator stops. +Blank spaces in the upper rows of the marble diagram indicate that the iterator is not being consumed at that step. +::: + + +## Pure Combinators + +{docstring Std.Iterators.toIterM} + +{docstring Std.Iterators.Iter.toIterM} + +{docstring Std.Iterators.Iter.take} + +{docstring Std.Iterators.Iter.takeWhile} + +{docstring Std.Iterators.Iter.toTake} + +{docstring Std.Iterators.Iter.drop} + +{docstring Std.Iterators.Iter.dropWhile} + +{docstring Std.Iterators.Iter.stepSize} + +{docstring Std.Iterators.Iter.map} + +{docstring Std.Iterators.Iter.mapM} + +{docstring Std.Iterators.Iter.mapWithPostcondition} + +{docstring Std.Iterators.Iter.uLift} + +{docstring Std.Iterators.Iter.flatMap} + +{docstring Std.Iterators.Iter.flatMapM} + +{docstring Std.Iterators.Iter.flatMapAfter} + +{docstring Std.Iterators.Iter.flatMapAfterM} + +{docstring Std.Iterators.Iter.filter} + +{docstring Std.Iterators.Iter.filterM} + +{docstring Std.Iterators.Iter.filterWithPostcondition} + +{docstring Std.Iterators.Iter.filterMap} + +{docstring Std.Iterators.Iter.filterMapM} + +{docstring Std.Iterators.Iter.filterMapWithPostcondition} + +{docstring Std.Iterators.Iter.zip} + +{docstring Iter.attachWith} + + +## Monadic Combinators + +{docstring Std.Iterators.IterM.toIter} + +{docstring Std.Iterators.IterM.take} + +{docstring Std.Iterators.IterM.takeWhile} + +{docstring Std.Iterators.IterM.takeWhileM} + +{docstring Std.Iterators.IterM.takeWhileWithPostcondition} + +{docstring Std.Iterators.IterM.toTake} + +{docstring Std.Iterators.IterM.drop} + +{docstring Std.Iterators.IterM.dropWhile} + +{docstring Std.Iterators.IterM.dropWhileM} + +{docstring Std.Iterators.IterM.dropWhileWithPostcondition} + +{docstring Std.Iterators.IterM.stepSize} + +{docstring Std.Iterators.IterM.map} + +{docstring Std.Iterators.IterM.mapM} + +{docstring Std.Iterators.IterM.mapWithPostcondition} + +{docstring Std.Iterators.IterM.uLift} + +{docstring Std.Iterators.IterM.flatMap} + +{docstring Std.Iterators.IterM.flatMapM} + +{docstring Std.Iterators.IterM.flatMapAfter} + +{docstring Std.Iterators.IterM.flatMapAfterM} + +{docstring Std.Iterators.IterM.filter} + +{docstring Std.Iterators.IterM.filterM} + +{docstring Std.Iterators.IterM.filterWithPostcondition} + +{docstring Std.Iterators.IterM.filterMap} + +{docstring Std.Iterators.IterM.filterMapM} + +{docstring Std.Iterators.IterM.filterMapWithPostcondition} + +{docstring Std.Iterators.IterM.zip} + +{docstring IterM.attachWith} + +# Reasoning About Iterators + +## Reasoning About Consumers + +The iterator library provides a large number of useful lemmas. +Most theorems about finite iterators can be proven by rewriting the statement to one about lists, using the fact that the correspondence between iterator combinators and corresponding list operations has already been proved. +In practice, many of these theorems are already registered as {tactic}`simp` lemmas. + +:::paragraph +The lemmas have a very predictable naming system, and many are in the {tech}[default simp set]. +Some of the most important include: + + * Consumer lemmas such as {name}`Iter.all_toList`, {name}`Iter.any_toList`, and {name}`Iter.foldl_toList` that introduce lists as a model. + + * Simplification lemmas such as {name}`Iter.toList_map` that {name}`Iter.toList_filter` push the list model “inwards” in the goal. + + * Producer lemmas such as {name}`List.toList_iter` and {name}`Array.toList_iter` that replace a producer with a list model, removing iterators from the goal entirely. + +The latter two categories are typically automatic with {tactic}`simp`. +::: + +:::example "Reasoning via Lists" +```imports -show +import Std.Data.Iterators +``` +```lean -show +open Std.Iterators +``` +Every element returned by an iterator that multiplies the numbers consumed some other iterator by two is even. +To prove this statement, {name}`Iter.all_toList`, {name}`Iter.toList_map`, and {name}`Array.toList_iter` are used to replace the statement about iterators with one about lists, after which {tactic}`simp` discharges the goal: +```lean +example (l : Array Nat) : + (l.iter.map (· * 2)).all (· % 2 = 0) := by + rw [← Iter.all_toList] + rw [Iter.toList_map] + rw [Array.toList_iter] + simp +``` + +In fact, because most of the needed lemmas are in the {tech}[default simp set], the proof can be quite short: +```lean +example (l : Array Nat) : + (l.iter.map (· * 2)).all (· % 2 = 0) := by + simp [← Iter.all_toList] +``` +::: + +## Stepwise Reasoning + +When there are not enough lemmas to prove a property by rewriting to a list model, it can be necessary to prove things about iterators by reasoning directly about their step functions. +The induction principles in this section are useful for stepwise reasoning. + +{docstring Iter.inductSkips} + +{docstring IterM.inductSkips} + +{docstring Iter.inductSteps} + +{docstring IterM.inductSteps} + +The standard library also includes lemmas for the stepwise behavior of all the producers and combinators. +Examples include {name}`List.step_iter_nil`, {name}`List.step_iter_cons`, {name}`IterM.step_map`. + +## Monads for Reasoning + +{docstring Std.Iterators.PostconditionT} + +{docstring Std.Iterators.PostconditionT.run} + +{docstring Std.Iterators.PostconditionT.lift} + +{docstring Std.Iterators.PostconditionT.liftWithProperty} + +{docstring Iter.IsPlausibleIndirectOutput +allowMissing} + +{docstring HetT} + +{docstring IterM.stepAsHetT} + +{docstring HetT.lift} + +{docstring HetT.prun} + +{docstring HetT.pure} + +{docstring HetT.map} + +{docstring HetT.pmap} + +{docstring HetT.bind} + +{docstring HetT.pbind} + +## Equivalence + +Iterator equivalence is defined in terms of the observable behavior of iterators, rather than their implementations. +In particular, the internal state is ignored. + +{docstring Iter.Equiv} + +{docstring IterM.Equiv} diff --git a/Manual/Meta/Example.lean b/Manual/Meta/Example.lean index 882346526..ce74b7e28 100644 --- a/Manual/Meta/Example.lean +++ b/Manual/Meta/Example.lean @@ -108,11 +108,9 @@ def renderExampleContent (exampleBlocks : List String) : String := /-- A domain for named examples -/ def examples : Domain := {} -@[directive_expander «example»] -def «example» : DirectiveExpander - | args, contents => do - let cfg ← parseThe ExampleConfig args - +@[directive] +def «example» : DirectiveExpanderOf ExampleConfig + | cfg, contents => do let description ← DocElabM.withFileMap cfg.description.1 <| cfg.description.2.mapM elabInline @@ -136,10 +134,11 @@ def «example» : DirectiveExpander if cfg.keep then exampleCode else withoutModifyingEnv <| exampleCode let liveLinkContent := if acc = [] then none else some (renderExampleContent acc) + -- Examples are represented using the first block to hold the description. Storing it in the JSON -- entails repeated (de)serialization. - pure #[← ``(Block.other (Block.example $(quote descriptionString) $(quote cfg.tag) (opened := $(quote cfg.opened)) $(quote liveLinkContent)) - #[Block.para #[$description,*], $blocks,*])] + ``(Block.other (Block.example $(quote descriptionString) $(quote cfg.tag) (opened := $(quote cfg.opened)) $(quote liveLinkContent)) + #[Block.para #[$description,*], $blocks,*]) @[block_extension «example»] def example.descr : BlockDescr where diff --git a/Manual/Simp.lean b/Manual/Simp.lean index 50e665d01..9d4638c6a 100644 --- a/Manual/Simp.lean +++ b/Manual/Simp.lean @@ -331,7 +331,7 @@ tag := "simp-sets" %%% A collection of rules used by the simplifier is called a {deftech}_simp set_. -A simp set is specified in terms of modifications from a _default simp set_. +A simp set is specified in terms of modifications from a {deftech}_default simp set_. These modifications can include adding rules, removing rules, or adding a set of rules. The `only` modifier to the {tactic}`simp` tactic causes it to start with an empty simp set, rather than the default one. Rules are added to the default simp set using the {attr}`simp` attribute. diff --git a/Manual/Terms.lean b/Manual/Terms.lean index a7a8dda80..a44560cd1 100644 --- a/Manual/Terms.lean +++ b/Manual/Terms.lean @@ -232,7 +232,7 @@ open D ## Leading `.` When an identifier begins with a dot (`.`), the type that the elaborator expects for the expression is used to resolve it, rather than the current namespace and set of open namespaces. -{tech}[Generalized field notation] is related: leading dot notation uses the expected type of the identifier to resolve it to a name, while field notation uses the inferred type of the term immediately prior to the dot. +{tech}[Generalized field notation] is related: this {deftech}_leading dot notation_ uses the expected type of the identifier to resolve it to a name, while field notation uses the inferred type of the term immediately prior to the dot. Identifiers with a leading `.` are to be looked up in the {deftech}_expected type's namespace_. If the type expected for a term is a constant applied to zero or more arguments, then its namespace is the constant's name. diff --git a/lake-manifest.json b/lake-manifest.json index 2ecda3806..680981665 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -5,7 +5,7 @@ "type": "git", "subDir": null, "scope": "", - "rev": "c5f95b7296ee0d6ecfadd346c457e75cb838e853", + "rev": "795bc01672b6f616aeaa24a7c7c4b32b20fe9c04", "name": "verso", "manifestFile": "lake-manifest.json", "inputRev": "main",