From e6e7de4972731d42a3f12206859371d6c74b8e2c Mon Sep 17 00:00:00 2001
From: Shannon Rae <166186361+secretlyshannon@users.noreply.github.com>
Date: Mon, 28 Apr 2025 14:36:16 -0700
Subject: [PATCH 1/6] Update compiler_test.go

---
 compiler/compiler_test.go | 270 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 270 insertions(+)

diff --git a/compiler/compiler_test.go b/compiler/compiler_test.go
index a97081165..301eef1d3 100644
--- a/compiler/compiler_test.go
+++ b/compiler/compiler_test.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 	"testing"
 
+	"github.com/inspirer/textmapper/grammar/grammar"
 	"github.com/inspirer/textmapper/parsers/parsertest"
 	"github.com/inspirer/textmapper/parsers/tm"
 	"github.com/inspirer/textmapper/parsers/tm/ast"
@@ -36,6 +37,7 @@ var testFiles = []string{
 	"inject.tmerr",
 	"flexmode.tmerr",
 	"max_la.tmerr",
+	"disabled_syntax.tmerr",
 }
 
 func TestErrors(t *testing.T) {
@@ -191,3 +193,271 @@ func TestDebugInfo(t *testing.T) {
 		}
 	}
 }
+
+func TestArgRef(t *testing.T) {
+	header := `
+		language medium(cc);
+
+		namespace = "medium"
+
+		:: lexer
+
+		KW_A: /a/
+		KW_B: /b/
+		KW_C: /c/
+		KW_D: /d/
+		',': /,/
+
+		:: parser
+
+		%input Z;
+
+		a {std::string}: KW_A;
+		b {int}: KW_B;
+		c {int*}: KW_C;
+		d {double}: KW_D;
+	`
+
+	testCases := []struct {
+		input string
+		// If not provided, default to the start symbol "Z".
+		symbol    string
+		want      [][]string
+		wantMulti [][][]string
+	}{
+		// Section: Optional.
+		{
+			input: "Z: a? {};",
+			want:  [][]string{{`$1[a]?`}},
+		},
+		{
+			input: `Z: c b?;`,
+			// No arg refs are collected because there are no semantic actions.
+			want: [][]string{},
+		},
+		{
+			input: `Z: (a b)? {};`,
+			want:  [][]string{{`$1[a]?`, `$2[b]?`}},
+		},
+		// Mid rule.
+		{
+			input: `Z: b? {} c;`,
+			// Only b is collected because c is after the mid rule.
+			want: [][]string{{`$1[b]?`}},
+		},
+		// Mid rule and semantic action.
+		{
+			input: `Z: b? {} c {};`,
+			// For the mid rule, only b is collected; for the semantic action, both b and c are collected.
+			want: [][]string{{`$1[b]?`}, {`$1[b]?`, `$2[c]`}},
+		},
+		// Duplicate symbol names.
+		{
+			input: `Z: (a a)? {};`,
+			want:  [][]string{{`$1[a]?`, `$2[a]?`}},
+		},
+		// Duplicate symbol names.
+		{
+			input: `Z: a? a {};`,
+			want:  [][]string{{`$1[a]?`, `$2[a]`}},
+		},
+		// Optional terminal.
+		{
+			input: `Z: KW_A? {};`,
+			want:  [][]string{{`$1[KW_A]?`}},
+		},
+		// With state marker.
+		{
+			input: `Z: a? .my_state b {};`,
+			want:  [][]string{{`$1[a]?`, `$2[b]`}},
+		},
+		// Section: List
+		{
+			input: `Z: a+ {};`,
+			want:  [][]string{{`$1[a_list]`}},
+		},
+		{
+			input: `Z: a* {};`,
+			want:  [][]string{{`$1[a_optlist]`}},
+		},
+		// List with separator.
+		{
+			input: `Z: (a separator ',')+ {};`,
+			want:  [][]string{{`$1[a_list_Comma_separated]`}},
+		},
+		// List with separator.
+		{
+			input: `Z: (a separator ',')* {};`,
+			want:  [][]string{{`$1[a_list_Comma_separatedopt]`}},
+		},
+		// List of terminals.
+		{
+			input: `Z: KW_A+ {};`,
+			want: [][]string{{`$1[KWA_list]`}},
+		},
+		// Semantic action inside list.
+		{
+			input:  `Z: ( a {} )+ {};`,
+			symbol: "a_list",
+			want:   [][]string{{`$1[a]`}},
+		},
+		// Section: Alternating group.
+		{
+			input: `Z: (a | b) {};`,
+			want:  [][]string{{`$1[a]?`, `$2[b]?`}},
+		},
+		{
+			// Commands in alternating groups.
+			// cmd1 only has access to b, and cmd2 only has access to c. cmd has access to all a, b, and
+			// c.
+			input: `Z: a ( b {cmd1} | c {cmd2} ) {cmd3};`,
+			wantMulti: [][][]string{
+				// cmd1
+				{{`$2[b]?`}, {`$1[a]`, `$2[b]?`, `$3[c]?`}},
+				// cmd2
+				{{`$3[c]?`}, {`$1[a]`, `$2[b]?`, `$3[c]?`}},
+			},
+		},
+		// Terminals in alternating group.
+		{
+			input: `Z: (KW_A | KW_B) {};`,
+			want:  [][]string{{`$1[KW_A]?`, `$2[KW_B]?`}},
+		},
+		// Section: Nested syntax extensions.
+		{
+			input: `Z: (a? | b) {};`,
+			want:  [][]string{{`$1[a]?`, `$2[b]?`}},
+		},
+		{
+			input: `Z: (a+ | b*) {};`,
+			want:  [][]string{{`$1[a_list]?`, `$2[b_optlist]?`}},
+		},
+		{
+			// Nested choice inside a list
+			input: `Z: ( a {cmd1} | b {cmd2} )+ {cmd3};`,
+			want:  [][]string{{`$1[Z$1]`}},
+		},
+		{
+			// List inside nested choice
+			input: `Z: (a+ {cmd1} | (b{cmd2})* ) {};`,
+			wantMulti: [][][]string{
+				// (a+ {cmd1}) {};
+				{{`$1[a_list]?`}, {`$1[a_list]?`, `$2[b_optlist]?`}},
+				// b{cmd2}* {};
+				{{`$1[a_list]?`, `$2[b_optlist]?`}},
+			},
+		},
+		{
+			input: `Z: (a? b)+ {};`,
+			want:  [][]string{{`$1[Z$1]`}},
+		},
+		{
+			input: `Z: (a | b)* {};`,
+			want:  [][]string{{`$1[Z$1]`}},
+		},
+		// Section: Set.
+		{
+			input: `Z: set(KW_A | KW_B) {};`,
+			want:  [][]string{{`$1[setof_KW_A_or_KW_B]`}},
+		},
+	}
+
+	for _, tc := range testCases {
+		input := header + tc.input
+		parsed, err := parseToGrammar(input)
+
+		if err != nil {
+			t.Fatalf("cannot parse %q: %v", input, err)
+		}
+
+		nts := parsed.Parser.Nonterms
+
+		var sym string
+		if tc.symbol != "" {
+			sym = tc.symbol
+		} else {
+			sym = "Z"
+		}
+		nt := getNt(nts, sym)
+		if nt == nil {
+			t.Fatalf("cannot find the start symbol Z")
+		}
+		rules := []*syntax.Expr{nt.Value}
+		if nt.Value.Kind == syntax.Choice {
+			rules = nt.Value.Sub
+		}
+
+		for i, rule := range rules {
+			got := gotArgRefs(rule, parsed)
+			var want string
+			if tc.wantMulti != nil {
+				want = fmt.Sprintf("%+v", tc.wantMulti[i])
+			} else {
+				want = fmt.Sprintf("%+v", tc.want)
+			}
+			if got != want {
+				t.Errorf("got %v, want %v for input %q", got, want, tc.input)
+			}
+		}
+	}
+}
+
+// A convenience function to parse a grammar string and return the corresponding model.
+func parseToGrammar(content string) (*grammar.Grammar, error) {
+	ctx := context.Background()
+	filename := "test.tm"
+	_, err := ast.Parse(ctx, filename, content, tm.StopOnFirstError)
+	if err != nil {
+		return nil, fmt.Errorf("%v: parsing failed with %v", filename, err)
+	}
+
+	return Compile(ctx, filename, content, Params{DebugTables: true})
+}
+
+func getNt(nts []*syntax.Nonterm, name string) *syntax.Nonterm {
+	for _, nt := range nts {
+		if nt.Name == name {
+			return nt
+		}
+	}
+	return nil
+}
+
+func serializeArgRef(ref syntax.ArgRef, grammar *grammar.Grammar) string {
+	ret := fmt.Sprintf("$%v[%v]", ref.Pos, grammar.Syms[ref.Symbol].Name)
+	if ref.Optional {
+		ret += "?"
+	}
+	return ret
+}
+
+func serializeArgRefs(refs map[int]syntax.ArgRef, grammar *grammar.Grammar) string {
+	var keys []int
+	for k := range refs {
+		keys = append(keys, k)
+	}
+	sort.Ints(keys)
+
+	var ret []string
+	for _, pos := range keys {
+		ret = append(ret, serializeArgRef(refs[pos], grammar))
+	}
+	return "[" + strings.Join(ret, " ") + "]"
+}
+
+func gotArgRefs(e *syntax.Expr, grammar *grammar.Grammar) string {
+	var collect func(e *syntax.Expr)
+	collected := make([]map[int]syntax.ArgRef, 0)
+	collect = func(e *syntax.Expr) {
+		if e.CmdArgs != nil && e.CmdArgs.ArgRefs != nil {
+			collected = append(collected, e.CmdArgs.ArgRefs)
+		}
+	}
+
+	e.ForEach(-1, collect)
+	var ret []string
+	for _, argRefs := range collected {
+		ret = append(ret, serializeArgRefs(argRefs, grammar))
+	}
+	return "[" + strings.Join(ret, " ") + "]"
+}

From 5893d5778275ef4e371d7adadd263f40ecaa5e6d Mon Sep 17 00:00:00 2001
From: Shannon Rae <166186361+secretlyshannon@users.noreply.github.com>
Date: Mon, 28 Apr 2025 15:16:07 -0700
Subject: [PATCH 2/6] Upstream changes to enables C++ semantic actions to
 co-exist with optional, choice, and list syntaxes for ZetaSQL

---
 compiler/compiler.go               |  80 ++++++---
 compiler/options.go                |   2 +
 compiler/syntax.go                 | 253 +++++++++++++++++++++++------
 gen/funcs.go                       | 185 ++++++++++++++-------
 gen/funcs_test.go                  |  46 +++---
 gen/templates/cc_parser_cc.go.tmpl |  29 ++--
 gen/templates/cc_parser_h.go.tmpl  |  19 ++-
 grammar/grammar.go                 |  85 ++++++++--
 syntax/expand.go                   | 253 +++++++++++++++++++++++++++--
 syntax/set_test.go                 |   2 +-
 syntax/syntax.go                   |  68 +++++++-
 syntax/syntax_test.go              |  28 ++--
 12 files changed, 820 insertions(+), 230 deletions(-)

diff --git a/compiler/compiler.go b/compiler/compiler.go
index c94031d99..605d164dd 100644
--- a/compiler/compiler.go
+++ b/compiler/compiler.go
@@ -211,14 +211,42 @@ func checkLookaheads(m *syntax.Model, maxSize int) error {
 	return s.Err()
 }
 
+func checkSyntaxes(m *syntax.Model, opts *grammar.Options) error {
+	var s status.Status
+	disabled := make(map[string]bool)
+	for _, kind := range opts.DisableSyntax {
+		disabled[kind] = true
+	}
+	var visit func(e *syntax.Expr, top bool)
+	visit = func(e *syntax.Expr, top bool) {
+		if disabled["NestedChoice"] && e.Kind == syntax.Choice && !top {
+			s.Errorf(e.Origin, "parenthesized Choice operator is not supported")
+		}
+		var kind = e.Kind.GoString()
+		if disabled[kind] {
+			s.Errorf(e.Origin, "syntax %v is not supported", kind)
+		}
+		for _, sub := range e.Sub {
+			visit(sub, false)
+		}
+	}
+	for _, nt := range m.Nonterms {
+		if len(nt.Params) > 0 && disabled["Templates"] {
+			s.Errorf(nt.Origin, "templates are not supported")
+		}
+		visit(nt.Value, true)
+	}
+	return s.Err()
+}
+
 func (c *compiler) compileParser(file ast.File) {
 	p, ok := file.Parser()
 	if !ok || !c.out.Options.GenParser {
 		// Lexer-only grammar.
 		return
 	}
-
-	loader := newSyntaxLoader(c.resolver, c.out.Options, c.Status)
+    target, _ := file.Header().Target()
+	loader := newSyntaxLoader(c.resolver, target.Text(), c.out.Options, c.Status)
 	loader.load(p, file.Header())
 	if c.Err() != nil {
 		// Parsing errors cause inconsistencies inside c.source. Aborting.
@@ -228,6 +256,15 @@ func (c *compiler) compileParser(file ast.File) {
 	c.out.Parser.Prec = loader.prec
 
 	source := loader.out
+
+	if len(c.out.Options.DisableSyntax) > 0 {
+		err := checkSyntaxes(source, c.out.Options)
+		if err != nil {
+			c.AddError(err)
+			return
+		}
+	}
+
 	if err := syntax.PropagateLookaheads(source); err != nil {
 		c.AddError(err)
 		return
@@ -273,7 +310,7 @@ func (c *compiler) compileParser(file ast.File) {
 		}
 	}
 
-	if err := syntax.Expand(source); err != nil {
+	if err := syntax.Expand(source, loader.expandOpts); err != nil {
 		c.AddError(err)
 		return
 	}
@@ -487,15 +524,11 @@ func generateTables(source *syntax.Model, out *grammar.Grammar, opts genOptions,
 							for _, r := range rule.RHS {
 								if r.IsStateMarker() {
 									s.Errorf(origin, "mixing mid-rule actions with state markers is not supported")
-									continue
-								}
-								if int(r) < len(out.Syms) {
-									vars.Types = append(vars.Types, out.Syms[r].Type)
 								} else {
-									// No types for extracted commands.
-									vars.Types = append(vars.Types, "")
+									vars.SymRefCount++
 								}
 							}
+							addTypes(vars, out.Syms)
 						}
 						cmdNT := midrule.extract(nt, command, vars, cmdOrigin)
 						rule.RHS = append(rule.RHS, cmdNT)
@@ -549,16 +582,11 @@ func generateTables(source *syntax.Model, out *grammar.Grammar, opts genOptions,
 				if args != nil {
 					act.Vars = &grammar.ActionVars{CmdArgs: *args, Remap: actualPos}
 					for _, r := range rule.RHS {
-						if r.IsStateMarker() {
-							continue
-						}
-						if int(r) < len(out.Syms) {
-							act.Vars.Types = append(act.Vars.Types, out.Syms[r].Type)
-						} else {
-							// No types for extracted commands.
-							act.Vars.Types = append(act.Vars.Types, "")
+						if !r.IsStateMarker() {
+							act.Vars.SymRefCount++
 						}
 					}
+					addTypes(act.Vars, out.Syms)
 					act.Vars.LHSType = out.Syms[rule.LHS].Type
 				}
 				rule.Action = len(parser.Actions)
@@ -584,6 +612,20 @@ func generateTables(source *syntax.Model, out *grammar.Grammar, opts genOptions,
 	return err
 }
 
+// addTypes updates the `Types` field of the given action variables using the type information
+// from the given symbols `syms`.
+func addTypes(vars *grammar.ActionVars, syms []grammar.Symbol) {
+	vars.Types = make(map[int]string)
+	for _, ref := range vars.CmdArgs.ArgRefs {
+		if ref.Symbol < len(syms) {
+			vars.Types[ref.Pos] = syms[ref.Symbol].Type
+		} else {
+			// No types for extracted commands.
+			vars.Types[ref.Pos] = ""
+		}
+	}
+}
+
 type commandExtractor struct {
 	baseSyms  int
 	takenName map[string]bool
@@ -606,7 +648,7 @@ type commandKey struct {
 func newCommandExtractor(m *syntax.Model, baseSyms int) *commandExtractor {
 	taken := make(map[string]bool)
 	for _, t := range m.Terminals {
-		taken[t] = true
+		taken[t.Name] = true
 	}
 	for _, p := range m.Params {
 		taken[p.Name] = true
@@ -645,7 +687,7 @@ func (e *commandExtractor) extract(n *syntax.Nonterm, command string, vars *gram
 
 		// Give a hint to the code generator that this rule's rhs starts
 		// earlier in the stack.
-		args.Delta = -len(vars.Types)
+		args.Delta = -vars.SymRefCount
 
 		// Make a copy.
 		copy := *vars
diff --git a/compiler/options.go b/compiler/options.go
index 7645458a2..5e0f5f66e 100644
--- a/compiler/options.go
+++ b/compiler/options.go
@@ -103,6 +103,8 @@ func (p *optionsParser) parseFrom(file ast.File) {
 			opts.NoEmptyRules = p.parseExpr(opt.Value(), opts.NoEmptyRules).(bool)
 		case "maxLookahead":
 			opts.MaxLookahead = p.parseExpr(opt.Value(), opts.MaxLookahead).(int)
+		case "disableSyntax":
+			opts.DisableSyntax = p.parseExpr(opt.Value(), opts.DisableSyntax).([]string)
 		case "eventFields":
 			p.validLangs(opt.Key(), "go")
 			opts.EventFields = p.parseExpr(opt.Value(), opts.EventFields).(bool)
diff --git a/compiler/syntax.go b/compiler/syntax.go
index 061078d8b..ca82bd939 100644
--- a/compiler/syntax.go
+++ b/compiler/syntax.go
@@ -36,21 +36,31 @@ type syntaxLoader struct {
 	nonterms  map[string]int // -> index in source.Nonterms
 	cats      map[string]int // -> index in source.Cats
 	paramPerm []int          // for parameter permutations
-	rhsPos    int            // Counter for positional index of a reference in the current rule.
-	rhsNames  map[string]int
+	ruleStack []*rhsRule
+
+	expandOpts *syntax.ExpandOptions
 }
 
-func newSyntaxLoader(resolver *resolver, opts *grammar.Options, s *status.Status) *syntaxLoader {
+func newSyntaxLoader(resolver *resolver, targetLang string, opts *grammar.Options, s *status.Status) *syntaxLoader {
+	var expandOpts *syntax.ExpandOptions
+	switch targetLang {
+	case "cc":
+		expandOpts = syntax.CcExpandOptions()
+	default:
+		expandOpts = &syntax.ExpandOptions{}
+	}
+
 	return &syntaxLoader{
 		resolver:     resolver,
 		noEmptyRules: opts.NoEmptyRules,
 		optSuffix:    opts.OptInstantiationSuffix,
 		Status:       s,
 
-		namedSets: make(map[string]int),
-		params:    make(map[string]int),
-		nonterms:  make(map[string]int),
-		cats:      make(map[string]int),
+		namedSets:  make(map[string]int),
+		params:     make(map[string]int),
+		nonterms:   make(map[string]int),
+		cats:       make(map[string]int),
+		expandOpts: expandOpts,
 	}
 }
 
@@ -497,13 +507,24 @@ func (c *syntaxLoader) instantiateOpt(name string, origin ast.Symref) (int, bool
 
 	var ref *syntax.Expr
 	target := strings.TrimSuffix(name, c.optSuffix)
+	var sym int
+	var symType string
 	if index, ok := c.resolver.syms[target]; ok {
-		nt.Type = c.resolver.Syms[index].Type
-		ref = &syntax.Expr{Kind: syntax.Reference, Symbol: index, Origin: origin, Model: c.out}
+		// Opt-terminal is also supported, e.g. KW_Aopt for KW_A.
+		sym = index
+		symType = c.resolver.Syms[sym].Type
+		if c.expandOpts.OptionalType != nil {
+			nt.Type = c.expandOpts.OptionalType(symType)
+		}
+		ref = &syntax.Expr{Kind: syntax.Reference, Symbol: sym, Origin: origin, Model: c.out, Pos: 1}
 	} else if nonterm, ok := c.nonterms[target]; ok {
-		nt.Type = c.out.Nonterms[nonterm].Type
+		sym = c.resolver.NumTokens + nonterm
+		symType = c.out.Nonterms[nonterm].Type
+		if c.expandOpts.OptionalType != nil {
+			nt.Type = c.expandOpts.OptionalType(symType)
+		}
 		nt.Params = c.out.Nonterms[nonterm].Params
-		ref = &syntax.Expr{Kind: syntax.Reference, Symbol: c.resolver.NumTokens + nonterm, Origin: origin, Model: c.out}
+		ref = &syntax.Expr{Kind: syntax.Reference, Symbol: sym, Origin: origin, Model: c.out, Pos: 1}
 		for _, param := range nt.Params {
 			ref.Args = append(ref.Args, syntax.Arg{Param: param, TakeFrom: param})
 		}
@@ -513,6 +534,15 @@ func (c *syntaxLoader) instantiateOpt(name string, origin ast.Symref) (int, bool
 	}
 	nt.Value = &syntax.Expr{Kind: syntax.Optional, Sub: []*syntax.Expr{ref}, Origin: origin}
 
+	if nt.Type != "" && c.expandOpts.OptionalCmd != nil {
+		refs := map[int]syntax.ArgRef{
+			1: syntax.ArgRef{Pos: 1, Kind: "reference", Optional: true, Symbol: sym},
+		}
+		cmdArgs := &syntax.CmdArgs{MaxPos: 2, Names: map[string]int{target: 1}, ArgRefs: refs}
+		cmd := &syntax.Expr{Kind: syntax.Command, Name: c.expandOpts.OptionalCmd(symType), CmdArgs: cmdArgs, Origin: origin}
+		nt.Value = &syntax.Expr{Kind: syntax.Sequence, Sub: []*syntax.Expr{nt.Value, cmd}, Origin: origin}
+	}
+
 	c.nonterms[name] = len(c.out.Nonterms)
 	index := c.resolver.NumTokens + len(c.out.Nonterms)
 	c.out.Nonterms = append(c.out.Nonterms, nt)
@@ -727,60 +757,79 @@ func (c *syntaxLoader) convertSeparator(sep ast.ListSeparator) *syntax.Expr {
 	}
 }
 
-func (c *syntaxLoader) allocatePos() int {
-	ret := c.rhsPos
-	c.rhsPos++
-	return ret
+func (c *syntaxLoader) allocatePos(underOpts bool, kind string, sym int) int {
+	rule := c.currentRule()
+	pos := rule.nextPos()
+	rule.incPos()
+	ref := syntax.ArgRef{Pos: pos, Kind: kind, Optional: underOpts, Symbol: sym}
+	rule.argRefs = append(rule.argRefs, ref)
+	return pos
 }
 
 func (c *syntaxLoader) pushName(name string, pos int) {
-	if c.rhsNames == nil {
-		c.rhsNames = make(map[string]int)
+	rule := c.currentRule()
+	// Names need to be unique across the top-level rule.
+	var topNames map[string]int
+	names := rule.names
+	if rule.top == nil {
+		topNames = rule.names
+	} else {
+		topNames = rule.top.names
 	}
 	var index int
-	if _, ok := c.rhsNames[name+"#0"]; ok {
+	if _, ok := topNames[name+"#0"]; ok {
 		for {
 			index++
-			if _, ok := c.rhsNames[fmt.Sprintf("%v#%v", name, index)]; !ok {
+			if _, ok := topNames[fmt.Sprintf("%v#%v", name, index)]; !ok {
 				break
 			}
 		}
-	} else if val, ok := c.rhsNames[name]; ok {
-		c.rhsNames[name+"#0"] = val
-		delete(c.rhsNames, name)
+	} else if val, ok := topNames[name]; ok {
+		topNames[name+"#0"] = val
+		names[name+"#0"] = val
+		delete(topNames, name)
+		delete(names, name)
 		index = 1
 	}
 	if index > 0 {
 		name = fmt.Sprintf("%v#%v", name, index)
 	}
-	c.rhsNames[name] = pos
+	topNames[name] = pos
+	names[name] = pos
 }
 
-func (c *syntaxLoader) convertPart(p ast.RhsPart, nonterm *syntax.Nonterm) *syntax.Expr {
+func (c *syntaxLoader) convertPart(p ast.RhsPart, nonterm *syntax.Nonterm, underOpts bool) *syntax.Expr {
+	rhs := c.currentRule()
 	switch p := p.(type) {
 	case *ast.Command:
-		args := &syntax.CmdArgs{MaxPos: c.rhsPos}
-		if len(c.rhsNames) > 0 {
+		args := &syntax.CmdArgs{MaxPos: rhs.nextPos()}
+		if len(rhs.names) > 0 {
 			// Only names and references preceding the command are available to its code.
 			// Note: the list below can include entities from a different alternative but
 			// they'll be automatically filtered later on.
 			args.Names = make(map[string]int)
-			for k, v := range c.rhsNames {
+			for k, v := range rhs.names {
 				args.Names[k] = v
 			}
 		}
+		if len(rhs.argRefs) > 0 {
+			args.ArgRefs = make(map[int]syntax.ArgRef)
+			for _, argRef := range rhs.argRefs {
+				args.ArgRefs[argRef.Pos] = argRef
+			}
+		}
 		text := p.Text()
 		return &syntax.Expr{Kind: syntax.Command, Name: text, CmdArgs: args, Origin: p}
 	case *ast.RhsAssignment:
-		inner := c.convertPart(p.Inner(), nonterm)
+		inner := c.convertPart(p.Inner(), nonterm, underOpts)
 		name := p.Id().Text()
 		subs := []*syntax.Expr{inner}
 		return &syntax.Expr{Kind: syntax.Assign, Name: name, Sub: subs, Origin: p}
 	case *ast.RhsPlusAssignment:
-		subs := []*syntax.Expr{c.convertPart(p.Inner(), nonterm)}
+		subs := []*syntax.Expr{c.convertPart(p.Inner(), nonterm, underOpts)}
 		return &syntax.Expr{Kind: syntax.Append, Name: p.Id().Text(), Sub: subs, Origin: p}
 	case *ast.RhsAlias:
-		ret := c.convertPart(p.Inner(), nonterm)
+		ret := c.convertPart(p.Inner(), nonterm, underOpts)
 
 		name := p.Name().Text()
 		if ret.Pos > 0 {
@@ -808,39 +857,50 @@ func (c *syntaxLoader) convertPart(p ast.RhsPart, nonterm *syntax.Nonterm) *synt
 		}
 		return &syntax.Expr{Kind: syntax.Lookahead, Sub: subs, Origin: p}
 	case *ast.RhsNested:
-		return c.convertRules(p.Rule0(), nonterm, report{} /*defaultReport*/, false /*topLevel*/, p)
+		return c.convertRules(p.Rule0(), nonterm, report{} /*defaultReport*/, false /*topLevel*/, underOpts, p)
 	case *ast.RhsOptional:
-		subs := []*syntax.Expr{c.convertPart(p.Inner(), nonterm)}
+		subs := []*syntax.Expr{c.convertPart(p.Inner(), nonterm, true /*underOpts*/)}
 		return &syntax.Expr{Kind: syntax.Optional, Sub: subs, Origin: p}
 	case *ast.RhsPlusList:
-		seq := c.convertSequence(p.RuleParts(), nonterm, false /*topLevel*/, p)
+		c.pushRule(true /*topLevel*/)
+		seq := c.convertSequence(p.RuleParts(), nonterm, false /*topLevel*/, underOpts, p)
+		c.popRule()
 		subs := []*syntax.Expr{seq}
 		if sep := c.convertSeparator(p.ListSeparator()); sep.Kind != syntax.Empty {
 			subs = []*syntax.Expr{seq, sep}
 		}
-		return &syntax.Expr{Kind: syntax.List, Sub: subs, ListFlags: syntax.OneOrMore, Pos: c.allocatePos(), Origin: p}
+		return &syntax.Expr{Kind: syntax.List, Sub: subs, ListFlags: syntax.OneOrMore, Pos: c.allocatePos(underOpts, "plusList", -1 /*sym*/), Origin: p}
 	case *ast.RhsStarList:
-		seq := c.convertSequence(p.RuleParts(), nonterm, false /*topLevel*/, p)
+		c.pushRule(true /*topLevel*/)
+		seq := c.convertSequence(p.RuleParts(), nonterm, false /*topLevel*/, underOpts, p)
+		c.popRule()
 		subs := []*syntax.Expr{seq}
 		if sep := c.convertSeparator(p.ListSeparator()); sep.Kind != syntax.Empty {
 			subs = []*syntax.Expr{seq, sep}
 		}
-		return &syntax.Expr{Kind: syntax.List, Sub: subs, Pos: c.allocatePos(), Origin: p}
+		return &syntax.Expr{Kind: syntax.List, Sub: subs, Pos: c.allocatePos(underOpts, "starList", -1 /*sym*/), Origin: p}
 	case *ast.RhsPlusQuantifier:
-		subs := []*syntax.Expr{c.convertPart(p.Inner(), nonterm)}
-		return &syntax.Expr{Kind: syntax.List, Sub: subs, ListFlags: syntax.OneOrMore, Pos: c.allocatePos(), Origin: p}
+		c.pushRule(true /*topLevel*/)
+		subs := []*syntax.Expr{c.convertPart(p.Inner(), nonterm, underOpts)}
+		c.popRule()
+		return &syntax.Expr{Kind: syntax.List, Sub: subs, ListFlags: syntax.OneOrMore, Pos: c.allocatePos(underOpts, "plusQuantifier", -1 /*sym*/), Origin: p}
 	case *ast.RhsStarQuantifier:
-		subs := []*syntax.Expr{c.convertPart(p.Inner(), nonterm)}
-		return &syntax.Expr{Kind: syntax.List, Sub: subs, Pos: c.allocatePos(), Origin: p}
+		c.pushRule(true /*topLevel*/)
+		subs := []*syntax.Expr{c.convertPart(p.Inner(), nonterm, underOpts)}
+		c.popRule()
+		return &syntax.Expr{Kind: syntax.List, Sub: subs, Pos: c.allocatePos(underOpts, "starQuantifier", -1 /*sym*/), Origin: p}
 	case *ast.RhsSet:
+		c.pushRule(true /*topLevel*/)
 		set := c.convertSet(p.Expr())
+		c.popRule()
 		index := len(c.out.Sets)
 		c.out.Sets = append(c.out.Sets, set)
-		return &syntax.Expr{Kind: syntax.Set, Pos: c.allocatePos(), SetIndex: index, Origin: p, Model: c.out}
+		return &syntax.Expr{Kind: syntax.Set, Pos: c.allocatePos(underOpts, "set", -1 /*sym*/), SetIndex: index, Origin: p, Model: c.out}
 	case *ast.RhsSymbol:
 		sym, args := c.resolveRef(p.Reference(), nonterm)
-		c.pushName(p.Reference().Name().Text(), c.rhsPos)
-		return &syntax.Expr{Kind: syntax.Reference, Symbol: sym, Args: args, Pos: c.allocatePos(), Origin: p, Model: c.out}
+		pos := c.allocatePos(underOpts, "reference", sym)
+		c.pushName(p.Reference().Name().Text(), pos)
+		return &syntax.Expr{Kind: syntax.Reference, Symbol: sym, Args: args, Pos: pos, Origin: p, Model: c.out}
 	case *ast.StateMarker:
 		return &syntax.Expr{Kind: syntax.StateMarker, Name: p.Name().Text(), Origin: p}
 	case *ast.SyntaxProblem:
@@ -851,7 +911,7 @@ func (c *syntaxLoader) convertPart(p ast.RhsPart, nonterm *syntax.Nonterm) *synt
 	return &syntax.Expr{Kind: syntax.Empty, Origin: p.TmNode()}
 }
 
-func (c *syntaxLoader) convertSequence(parts []ast.RhsPart, nonterm *syntax.Nonterm, topLevel bool, origin status.SourceNode) *syntax.Expr {
+func (c *syntaxLoader) convertSequence(parts []ast.RhsPart, nonterm *syntax.Nonterm, topLevel, underOpts bool, origin status.SourceNode) *syntax.Expr {
 	var subs []*syntax.Expr
 	var empty *ast.RhsEmpty
 	var nonEmpty bool
@@ -874,7 +934,7 @@ func (c *syntaxLoader) convertSequence(parts []ast.RhsPart, nonterm *syntax.Nont
 			nonEmpty = true
 		}
 
-		out := c.convertPart(p, nonterm)
+		out := c.convertPart(p, nonterm, underOpts)
 		if out.Kind != syntax.Empty {
 			subs = append(subs, out)
 		}
@@ -932,8 +992,14 @@ func (c *syntaxLoader) isSelector(name string) bool {
 	return ok
 }
 
-func (c *syntaxLoader) convertRules(rules []ast.Rule0, nonterm *syntax.Nonterm, defaultReport report, topLevel bool, origin status.SourceNode) *syntax.Expr {
+func (c *syntaxLoader) convertRules(rules []ast.Rule0, nonterm *syntax.Nonterm, defaultReport report, topLevel, underOpts bool, origin status.SourceNode) *syntax.Expr {
 	var subs []*syntax.Expr
+
+	if !topLevel && len(rules) > 1 {
+		// This is a nested choice, e.g. the "(a | b)" in "start: (a | b) c".
+		underOpts = true
+	}
+
 	for _, rule0 := range rules {
 		rule, ok := rule0.(*ast.Rule)
 		if !ok {
@@ -941,11 +1007,7 @@ func (c *syntaxLoader) convertRules(rules []ast.Rule0, nonterm *syntax.Nonterm,
 			continue
 		}
 
-		if topLevel {
-			// Counting of RHS symbols does not restart for inline alternatives.
-			c.rhsPos = 1
-			c.rhsNames = nil
-		}
+		c.pushRule(topLevel)
 		var prec *ast.RhsPrec
 		for _, p := range rule.RhsPart() {
 			switch p := p.(type) {
@@ -958,7 +1020,7 @@ func (c *syntaxLoader) convertRules(rules []ast.Rule0, nonterm *syntax.Nonterm,
 			}
 		}
 
-		expr := c.convertSequence(rule.RhsPart(), nonterm, topLevel, rule)
+		expr := c.convertSequence(rule.RhsPart(), nonterm, topLevel, underOpts, rule)
 		clause, _ := rule.ReportClause()
 		expr = c.convertReportClause(clause).withDefault(defaultReport).apply(expr)
 		if prec != nil && topLevel {
@@ -977,6 +1039,7 @@ func (c *syntaxLoader) convertRules(rules []ast.Rule0, nonterm *syntax.Nonterm,
 		}
 
 		subs = append(subs, expr)
+		c.popRule()
 	}
 	switch len(subs) {
 	case 0:
@@ -994,7 +1057,7 @@ func (c *syntaxLoader) convertRules(rules []ast.Rule0, nonterm *syntax.Nonterm,
 func (c *syntaxLoader) load(p ast.ParserSection, header status.SourceNode) {
 	c.out = new(syntax.Model)
 	for _, sym := range c.resolver.Syms {
-		c.out.Terminals = append(c.out.Terminals, sym.ID)
+		c.out.Terminals = append(c.out.Terminals, syntax.Terminal{Name: sym.ID, Type: sym.Type})
 	}
 	c.collectParams(p)
 	nonterms := c.collectNonterms(p)
@@ -1023,7 +1086,7 @@ func (c *syntaxLoader) load(p ast.ParserSection, header status.SourceNode) {
 			c.Errorf(alias, "nonterminal aliases are not yet supported")
 		}
 		defaultReport := c.convertReportClause(clause)
-		expr := c.convertRules(nt.def.Rule0(), c.out.Nonterms[nt.nonterm], defaultReport, true /*topLevel*/, nt.def)
+		expr := c.convertRules(nt.def.Rule0(), c.out.Nonterms[nt.nonterm], defaultReport, true /*topLevel*/, false /*underOpts*/, nt.def)
 		c.out.Nonterms[nt.nonterm].Value = or(c.out.Nonterms[nt.nonterm].Value, expr)
 	}
 }
@@ -1046,3 +1109,85 @@ func or(a, b *syntax.Expr) *syntax.Expr {
 	}
 	return &syntax.Expr{Kind: syntax.Choice, Sub: []*syntax.Expr{a, b}, Origin: b.Origin}
 }
+
+type rhsRule struct {
+	top     *rhsRule        // The top-level rule this rule is nested under. Nil if this is a top-level rule.
+	pos     int             // The next position to be allocated. Populated only for top-level rules.
+	names   map[string]int  // name -> position. Contains the names visible to the command of this rule.
+	argRefs []syntax.ArgRef // The argument references visible to the command of this rule.
+}
+
+// nextPos returns the next position to be allocated w.r.t. the top-level rule.
+func (r *rhsRule) nextPos() int {
+	if r.top == nil {
+		return r.pos
+	}
+	return r.top.pos
+}
+
+func (r *rhsRule) incPos() {
+	if r.top == nil {
+		r.pos++
+	} else {
+		r.top.pos++
+	}
+}
+
+func (r *rhsRule) isTopLevel() bool {
+	return r.top == nil
+}
+
+func (c *syntaxLoader) pushRule(topLevel bool) {
+	var rule *rhsRule
+	if topLevel {
+		rule = &rhsRule{
+			pos:   1,
+			names: make(map[string]int),
+		}
+	} else {
+		p := c.ruleStack[len(c.ruleStack)-1]
+		var top *rhsRule
+		if p.top == nil {
+			top = p
+		} else {
+			top = p.top
+		}
+		rule = &rhsRule{
+			top:   top,
+			names: make(map[string]int),
+		}
+	}
+	c.ruleStack = append(c.ruleStack, rule)
+}
+
+func (c *syntaxLoader) currentRule() *rhsRule {
+	return c.ruleStack[len(c.ruleStack)-1]
+}
+
+func (c *syntaxLoader) popRule() {
+	rule := c.ruleStack[len(c.ruleStack)-1]
+	c.ruleStack = c.ruleStack[:len(c.ruleStack)-1]
+
+	if rule.top == nil {
+		return
+	}
+
+	// This is a nested rule. Add the names and arg refs to the parent rule so that they are
+	// accessible to the command of the parent rule.
+	//
+	// For example, if we have:
+	//
+	// start: a ( b {cmd1} | c {cmd2} ) {cmd3}
+	//
+	// both "b" and "c" should be accessible by cmd3 as well.
+	p := c.ruleStack[len(c.ruleStack)-1]
+	if p.top != nil {
+		// The `names` field of a top-level rule is already populated by pushName().
+		for name, pos := range rule.names {
+			p.names[name] = pos
+		}
+	}
+	for _, ref := range rule.argRefs {
+		p.argRefs = append(p.argRefs, ref)
+	}
+}
diff --git a/gen/funcs.go b/gen/funcs.go
index 1d3f3d919..bdf64790f 100644
--- a/gen/funcs.go
+++ b/gen/funcs.go
@@ -231,6 +231,15 @@ func sub(a, b int) int {
 	return a - b
 }
 
+func indexToPos(i int, remap map[int]int) int {
+	for pos, idx := range remap {
+		if idx == i {
+			return pos
+		}
+	}
+	return 0
+}
+
 func goParserAction(s string, args *grammar.ActionVars, origin status.SourceNode) (string, error) {
 	var decls strings.Builder
 	var sb strings.Builder
@@ -254,18 +263,19 @@ func goParserAction(s string, args *grammar.ActionVars, origin status.SourceNode
 		}
 
 		var index int
+		var pos int
 		switch id {
 		case "left()", "leftRaw()":
 			index = -2
 		case "first()":
-			if len(args.Types) == 0 {
+			if args.SymRefCount == 0 {
 				index = -1
 			}
 		case "last()":
-			if len(args.Types) == 0 {
+			if args.SymRefCount == 0 {
 				index = -1
 			} else {
-				index = len(args.Types) - 1
+				index = args.SymRefCount - 1
 			}
 		default:
 			if strings.HasPrefix(id, "self[") && strings.HasSuffix(id, "]") {
@@ -275,11 +285,20 @@ func goParserAction(s string, args *grammar.ActionVars, origin status.SourceNode
 				}
 			}
 
-			var ok bool
-			index, ok = args.Resolve(id)
+			ref, ok := args.Resolve(id)
 			if !ok {
 				return "", status.Errorf(origin, "invalid reference %q", id)
 			}
+			index = ref.Index
+			pos = ref.Pos
+		}
+
+		// We are trying to locate the first or last symbol from RHS.
+		if pos == 0 && index >= 0 {
+			pos = indexToPos(index, args.Remap)
+			if pos == 0 {
+				return "", status.Errorf(origin, "internal error: cannot find the position for index %v", index)
+			}
 		}
 
 		if index == -1 {
@@ -294,7 +313,7 @@ func goParserAction(s string, args *grammar.ActionVars, origin status.SourceNode
 		if index == -2 {
 			v = "lhs"
 		} else {
-			v = fmt.Sprintf("stack[len(stack)-%v]", len(args.Types)-index)
+			v = fmt.Sprintf("stack[len(stack)-%v]", args.SymRefCount-index)
 		}
 		switch {
 		case prop == "sym":
@@ -302,10 +321,10 @@ func goParserAction(s string, args *grammar.ActionVars, origin status.SourceNode
 		case prop == "value":
 			v += ".value"
 			switch {
-			case index >= 0 && args.Types[index] != "":
+			case index >= 0 && args.Types[pos] != "":
 				varName := fmt.Sprintf("nn%v", index)
 				if !seen[index] {
-					fmt.Fprintf(&decls, "%v, _ := %v.(%v)\n", varName, v, args.Types[index])
+					fmt.Fprintf(&decls, "%v, _ := %v.(%v)\n", varName, v, args.Types[pos])
 					seen[index] = true
 				}
 				v = varName
@@ -326,6 +345,16 @@ func goParserAction(s string, args *grammar.ActionVars, origin status.SourceNode
 	return decls.String() + sb.String(), nil
 }
 
+func ccWrapInOptional(argType, input string) string {
+	return fmt.Sprintf("std::optional<%v>(%v)", argType, input)
+}
+
+// ccTypeFromUnion returns the type of the union field, without the last ID, e.g. the "int" in
+// "int x".
+func ccTypeFromUnion(unionField string) string {
+	return strings.TrimSpace(unionField[:len(unionField)-len(lastID(unionField))])
+}
+
 func ccParserAction(s string, args *grammar.ActionVars, origin status.SourceNode, variantStackEntry bool) (ret string, err error) {
 	defer func(s string) {
 		if r := recover(); r != nil {
@@ -349,74 +378,95 @@ func ccParserAction(s string, args *grammar.ActionVars, origin status.SourceNode
 		}
 
 		// Handle the rest of this '$' or '@'
-		var target, prop string
+
+		// $$ --> lhs.value
 		if s[0] == '$' {
-			// $$ --> lhs.value
-			target = "lhs"
+			var replacement string
 			if ch == '@' {
-				prop = "sym.location"
+				replacement = "lhs.sym.location"
 			} else {
 				t := args.LHSType
 				if t == "" {
 					return "", status.Errorf(origin, "$$ cannot be used inside a nonterminal semantic action without a type")
 				}
 				if variantStackEntry {
-					prop = "std::get<" + t + ">(" + target + ".value)"
-					target = ""
+					replacement = "std::get<" + t + ">(lhs.value)"
 				} else {
-					prop = "value." + lastID(t)
+					replacement = "lhs.value." + lastID(t)
 				}
 			}
 			s = s[1:]
-		} else {
-			var d int
-			r, w := utf8.DecodeRuneInString(s)
-			for unicode.IsDigit(r) || unicode.IsLetter(r) || r == '_' {
-				d += w
-				r, w = utf8.DecodeRuneInString(s[d:])
+			sb.WriteString(replacement)
+			continue
+		}
+
+		// RHS symbol references, e.g. $1, @a.
+		var d int
+		r, w := utf8.DecodeRuneInString(s)
+		for unicode.IsDigit(r) || unicode.IsLetter(r) || r == '_' {
+			d += w
+			r, w = utf8.DecodeRuneInString(s[d:])
+		}
+		if d == 0 {
+			return "", status.Errorf(origin, "%c should be followed by a number or identifier", ch)
+		}
+		val := s[:d]
+		s = s[d:]
+
+		// cc uses 1-based indexing.
+		ref, ok := args.ResolveOneBased(val)
+		if !ok {
+			return "", status.Errorf(origin, "invalid reference %c%q", ch, val)
+		}
+
+		index := ref.Index
+		pos := ref.Pos
+
+		argType := args.Types[pos]
+
+		// The symbol reference is valid in the original rule but is not present in the expanded
+		// rule, so it references an optional symbol either expanded from a Choice or an Optional.
+		if index == -1 {
+			// Use std::optional<T>() as the semantic value for the non-present symbol.
+			if ch == '@' {
+				sb.WriteString(ccWrapInOptional("decltype(lhs.sym.location)", ""))
+				continue
 			}
-			if d == 0 {
-				return "", status.Errorf(origin, "%c should be followed by a number or identifier", ch)
+
+			if argType == "" {
+				return "", status.Errorf(origin, "symbol %c%q does not have an associated type", ch, val)
 			}
-			val := s[:d]
-			s = s[d:]
-			var index int
-			if pos, err := strconv.Atoi(val); err == nil {
-				if pos < 1 || pos >= args.CmdArgs.MaxPos {
-					// Index out of range.
-					return "", status.Errorf(origin, "out of bounds reference %c%v [max = %v]", ch, val, args.CmdArgs.MaxPos)
-				}
-				index = pos - 1
-			} else {
-				// Resolve by name
-				var ok bool
-				index, ok = args.Resolve(val)
-				if !ok {
-					return "", status.Errorf(origin, "invalid reference %c%q", ch, val)
-				}
+			if !variantStackEntry {
+				argType = ccTypeFromUnion(argType)
 			}
+			sb.WriteString(ccWrapInOptional(argType, ""))
+			continue
+		}
 
-			target = fmt.Sprintf("rhs[%v]", index+args.Delta)
-			if ch == '@' {
-				prop = "sym.location"
+		// The referenced symbol is present in the expanded rule.
+		var replacement string
+		target := fmt.Sprintf("rhs[%v]", index+args.Delta)
+		if ch == '@' {
+			replacement = target + ".sym.location"
+			argType = "decltype(lhs.sym.location)"
+		} else {
+			if argType == "" {
+				return "", status.Errorf(origin, "%c%q does not have an associated type", ch, val)
+			}
+			if variantStackEntry {
+				replacement = "std::get<" + argType + ">(" + target + ".value)"
 			} else {
-				t := args.Types[index]
-				if t == "" {
-					return "", status.Errorf(origin, "%c%q does not have an associated type", ch, val)
-				}
-				if variantStackEntry {
-					prop = "std::get<" + t + ">(" + target + ".value)"
-					target = ""
-				} else {
-					prop = "value." + lastID(t)
-				}
+				replacement = target + ".value." + lastID(argType)
+				argType = ccTypeFromUnion(argType)
 			}
 		}
-		if len(target) > 0 {
-			sb.WriteString(target)
-			sb.WriteByte('.')
+
+		if argRef := args.ArgRefs[pos]; argRef.Optional {
+			// This symbol reference is optional in the original rule, so we wrap it inside a
+			// std::optional to unify the semantic actions for the expanded rules.
+			replacement = ccWrapInOptional(argType, replacement)
 		}
-		sb.WriteString(prop)
+		sb.WriteString(replacement)
 	}
 	return sb.String(), nil
 }
@@ -442,18 +492,19 @@ func bisonParserAction(s string, args *grammar.ActionVars, origin status.SourceN
 		}
 
 		var index int
+		var pos int
 		switch id {
 		case "left()", "leftRaw()":
 			index = -2
 		case "first()":
-			if len(args.Types) == 0 {
+			if args.SymRefCount == 0 {
 				index = -1
 			}
 		case "last()":
-			if len(args.Types) == 0 {
+			if args.SymRefCount == 0 {
 				index = -1
 			} else {
-				index = len(args.Types) - 1
+				index = args.SymRefCount - 1
 			}
 		default:
 			if strings.HasPrefix(id, "self[") && strings.HasSuffix(id, "]") {
@@ -463,11 +514,19 @@ func bisonParserAction(s string, args *grammar.ActionVars, origin status.SourceN
 				}
 			}
 
-			var ok bool
-			index, ok = args.Resolve(id)
+			ref, ok := args.Resolve(id)
 			if !ok {
 				return "", status.Errorf(origin, "invalid reference %q", id)
 			}
+			index = ref.Index
+			pos = ref.Pos
+		}
+
+		if pos == 0 && index >= 0 {
+			pos = indexToPos(index, args.Remap)
+			if pos == 0 {
+				return "", status.Errorf(origin, "internal error: cannot find the position for index %v", index)
+			}
 		}
 
 		if index == -1 {
@@ -484,9 +543,9 @@ func bisonParserAction(s string, args *grammar.ActionVars, origin status.SourceN
 			case index < 0 && args.LHSType != "" && id != "leftRaw()":
 				needsParen = true
 				fmt.Fprintf(&sb, "(/*%v*/", args.LHSType)
-			case index >= 0 && args.Types[index] != "":
+			case index >= 0 && args.Types[pos] != "":
 				needsParen = true
-				fmt.Fprintf(&sb, "(/*%v*/", args.Types[index])
+				fmt.Fprintf(&sb, "(/*%v*/", args.Types[pos])
 			}
 			sb.WriteByte('$')
 		} else {
diff --git a/gen/funcs_test.go b/gen/funcs_test.go
index 3d1e916a4..6e7104d7f 100644
--- a/gen/funcs_test.go
+++ b/gen/funcs_test.go
@@ -165,7 +165,8 @@ func TestParserAction(t *testing.T) {
 
 		{"$a + ${last()}", vars("a:0", "b", "c:1", "d"), "stack[len(stack)-2].value + stack[len(stack)-1].value"},
 		{"${first()} + ${left()}", vars("a:0", "b", "c:1", "d"), "stack[len(stack)-2].value + lhs.value"},
-		{"${first()} + ${left()}", vars("a:1:bar", "b", "c", "d"), "nn0, _ := stack[len(stack)-1].value.(bar)\nnn0 + lhs.value"},
+		{"${first()} + ${left()}", vars("a:0:bar", "b", "c", "d"), "nn0, _ := stack[len(stack)-1].value.(bar)\nnn0 + lhs.value"},
+		{"${first()} + ${left()} + $a", vars("a:0:bar", "b", "c", "d"), "nn0, _ := stack[len(stack)-1].value.(bar)\nnn0 + lhs.value + nn0"},
 
 		{"${left().sym}", vars("a:0", "b", "c:1", "d:2"), "(&lhs.sym)"},
 		{"${left().offset}", vars("a:0", "b", "c:1", "d:2"), "lhs.sym.offset"},
@@ -194,10 +195,11 @@ func TestCcParserAction(t *testing.T) {
 		want       string
 		useVariant bool
 	}{
-		{"abc", varsOneBased(), "abc", false},
-		{"$$ = $1", varsOneBased("%node", "a:0:expr"), "lhs.value.node = rhs[0].value.expr", false},
-		{"$$ = @$ @1", varsOneBased("%node", "a:0:expr"), "lhs.value.node = lhs.sym.location rhs[0].sym.location", false},
-		{"$$ = $1", varsOneBased("%node", "a:0:expr"), "std::get<node>(lhs.value) = std::get<expr>(rhs[0].value)", true},
+		{"abc", vars(), "abc", false},
+		// The 1-based index for "a" is 2.
+		{"$$ = $2", vars("%node", "a:0:expr"), "lhs.value.node = rhs[0].value.expr", false},
+		{"$$ = @$ @2", vars("%node", "a:0:expr"), "lhs.value.node = lhs.sym.location rhs[0].sym.location", false},
+		{"$$ = $2", vars("%node", "a:0:expr"), "std::get<node>(lhs.value) = std::get<expr>(rhs[0].value)", true},
 	}
 
 	for _, tc := range tests {
@@ -212,45 +214,43 @@ func TestCcParserAction(t *testing.T) {
 	}
 }
 
-func varsOneBased(list ...string) *grammar.ActionVars {
-	return varsWithOffset(false, list...)
-}
-
 func vars(list ...string) *grammar.ActionVars {
-	return varsWithOffset(true, list...)
-}
-
-func varsWithOffset(zeroBased bool, list ...string) *grammar.ActionVars {
 	ret := &grammar.ActionVars{
 		CmdArgs: syntax.CmdArgs{
-			MaxPos: 1 + len(list),
-			Names:  make(map[string]int),
+			MaxPos:  1 + len(list),
+			Names:   make(map[string]int),
+			ArgRefs: make(map[int]syntax.ArgRef),
 		},
 		Remap: make(map[int]int),
+		Types: make(map[int]string),
 	}
 	for i, descr := range list {
+		pos := i + 1
+		ret.CmdArgs.ArgRefs[pos] = syntax.ArgRef{Pos: pos}
+		ret.Types[pos] = ""
+
 		if strings.HasPrefix(descr, "%") {
 			ret.LHSType = descr[1:]
 			continue
 		}
 		name, num, mapped := strings.Cut(descr, ":")
 		if name != "" {
-			ret.Names[name] = i
+			ret.Names[name] = pos
+			ret.ArgRefs[pos] = syntax.ArgRef{
+				Pos:     pos,
+			}
 		}
 		if !mapped {
 			continue
 		}
+		ret.SymRefCount++
 		num, tp, _ := strings.Cut(num, ":")
-		target, err := strconv.Atoi(num)
+		index, err := strconv.Atoi(num)
 		if err != nil {
 			log.Fatalf("cannot parse %q as a number in %q", num, descr)
 		}
-		ret.Types = append(ret.Types, tp)
-		index := i
-		if !zeroBased {
-			index++
-		}
-		ret.Remap[index] = target
+		ret.Types[pos] = tp
+		ret.Remap[pos] = index
 	}
 	return ret
 }
diff --git a/gen/templates/cc_parser_cc.go.tmpl b/gen/templates/cc_parser_cc.go.tmpl
index 3099a3bc5..880bbb8a5 100644
--- a/gen/templates/cc_parser_cc.go.tmpl
+++ b/gen/templates/cc_parser_cc.go.tmpl
@@ -721,8 +721,8 @@ absl::Status Parser::action{{$index}}([[maybe_unused]] stackEntry& lhs,
 {{ end -}}
 {{ end -}}
 
-absl::Status Parser::applyRule(int32_t rule, stackEntry& lhs,
-                        [[maybe_unused]] const stackEntry* rhs,
+absl::Status Parser::applyRule(int32_t rule, int32_t ruleLen, stackEntry& lhs,
+                        [[maybe_unused]] stackEntry* rhs,
                         Lexer& lexer) {
 {{ if or .Parser.HasActions .Parser.Tables.Lookaheads -}}
   switch (rule) {
@@ -768,6 +768,13 @@ absl::Status Parser::applyRule(int32_t rule, stackEntry& lhs,
     return absl::OkStatus();
 {{ end -}}
   default:
+{{ if .Parser.HasAssocValues -}}
+    if (ruleLen > 0) {
+      // If no semantic action is provided, and the rhs is not empty, we use the
+      // value of the first symbol on the RHS as the value of the lhs.
+      lhs.value = std::move(rhs[0].value);
+    }
+{{ end -}}
     break;
   }
 {{ end -}}
@@ -826,6 +833,12 @@ absl::Status Parser::Parse(int{{$stateType}}_t start, int{{$stateType}}_t end,
   end_state_ = end;
 {{- end}}
   fetchNext(lexer, stack);
+  // The location in this stackEntry will be used for any leading non-terminal
+  // symbols satsified by %empty, so it needs to be initialized. We initialize
+  // it to the start location of the first token.
+  stack.back().sym.location =
+      Lexer::Location(lexer.LastTokenLocation(){{template "locStart"}},
+                      lexer.LastTokenLocation(){{template "locStart"}});
 
   while (state != end) {
     int32_t action = tmAction[state];
@@ -860,22 +873,16 @@ absl::Status Parser::Parse(int{{$stateType}}_t start, int{{$stateType}}_t end,
       int32_t ln = tmRuleLen[rule];
       stackEntry entry;
       entry.sym.symbol = tmRuleSymbol[rule];
-      const stackEntry* rhs = &stack[0] + stack.size() - ln;
+      stackEntry* rhs = &stack[0] + stack.size() - ln;
 
       if (ln == 0) {
         entry.sym.location = Lexer::Location(stack.back().sym.location{{template "locEnd"}},
                                              stack.back().sym.location{{template "locEnd"}});
-{{ if .Parser.HasAssocValues -}}
-        entry.value = stack.back().value;
-{{ end -}}
       } else {
         entry.sym.location = {{template "CreateLocationFromRHS" . -}}(ln,
           [&](int32_t i) { return rhs[i].sym.location; });
-{{ if .Parser.HasAssocValues -}}
-        entry.value = rhs[0].value;
-{{ end -}}
       }
-      absl::Status ret = applyRule(rule, entry, rhs, lexer{{if .NeedsSession}}, &s{{end}});
+      absl::Status ret = applyRule(rule, ln, entry, rhs, lexer{{if .NeedsSession}}, &s{{end}});
       if (!ret.ok()) {
         return ret;
       }
@@ -1006,4 +1013,4 @@ absl::Status Parser::Parse(int{{$stateType}}_t start, int{{$stateType}}_t end,
 {{ else -}}
 {{ template "customReportNext" . -}}
 {{ end -}}
-{{ end -}}
+{{ end -}}
\ No newline at end of file
diff --git a/gen/templates/cc_parser_h.go.tmpl b/gen/templates/cc_parser_h.go.tmpl
index dbc3dafdc..a7e279bbb 100644
--- a/gen/templates/cc_parser_h.go.tmpl
+++ b/gen/templates/cc_parser_h.go.tmpl
@@ -32,11 +32,22 @@ struct symbol {
 {{end -}}
 
 {{ block "stackEntry" . -}}
+{{ if .Options.VariantStackEntry -}}
+{{ range .Parser.UnionFields -}}
+static_assert(std::is_default_constructible_v<{{.}}>,
+              "Symbol associated value type {{.}} is not default constructible.");
+{{ end -}}
+{{ end -}}
+
 {{$stateType := bits_per_element .Parser.Tables.FromTo -}}
 struct stackEntry {
   symbol sym;
   int{{$stateType}}_t state = 0;
-{{ if .Parser.HasAssocValues -}}
+{{ if .UnionDefinition -}}
+  union
+{{ .UnionDefinition -}}
+  value;
+{{ else if .Parser.HasAssocValues -}}
 {{ if .Options.VariantStackEntry -}}
   std::variant<
 {{ range .Parser.UnionFields -}}
@@ -162,8 +173,8 @@ class Parser final {
 {{ end -}}
 {{ end -}}
 
-  absl::Status applyRule(int32_t rule, stackEntry& lhs,
-                         [[maybe_unused]] const stackEntry* rhs,
+  absl::Status applyRule(int32_t rule, int32_t ruleLen, stackEntry& lhs,
+                         [[maybe_unused]] stackEntry* rhs,
                          Lexer& lexer);
   absl::Status Parse(
     int{{$stateType}}_t start, int{{$stateType}}_t end, Lexer& lexer);
@@ -199,4 +210,4 @@ class Parser final {
 
 }  // namespace {{.Options.Namespace}}
 
-#endif  // {{.Options.IncludeGuardPrefix}}PARSER_H_
+#endif  // {{.Options.IncludeGuardPrefix}}PARSER_H_
\ No newline at end of file
diff --git a/grammar/grammar.go b/grammar/grammar.go
index 9a0ae2092..94ce34c9d 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -53,6 +53,7 @@ type Grammar struct {
 	Parser  *Parser
 
 	CustomTemplates string
+	UnionDefinition string
 }
 
 // Range marks the portion of a rule that needs to be reported.
@@ -78,34 +79,82 @@ type SemanticAction struct {
 type ActionVars struct {
 	syntax.CmdArgs
 
-	// Types of the references of the rule.
-	Types   []string
+	// position -> type of the references of the original rule.
+	//
+	// Note: types are indexed by position rather than index to support getting types of references
+	// that are not present in the current expansion of the rule.
+	Types   map[int]string
 	LHSType string
 
 	// Not every symbol reference is present in the desugared rule.
 	Remap map[int]int
+
+	// Number of RHS symbols in the expanded rule.
+	SymRefCount int
+}
+
+// Reference is a symbol reference in a semantic action.
+type Reference struct {
+	// Position of the reference in the original rule. 1-based. Used to identify the symbol in
+	// semantic actions code blocks.
+	Pos int
+
+	// Index of the symbol in the expanded rule. 0 based. Used to identify the symbol in the TM
+	// compiler.
+	//
+	// -1 means that the reference is present in the original rule but not in this expanded rule.
+	Index int
+}
+
+// Resolve resolves the symbol reference `val` to an RHS index (0-based). `val` can either be a
+// 0-based index (e.g. "0" in "$0") or a named symbol (e.g. "a" in "$a").
+//
+// Returns 0 if `val` is not a valid symbol in the original rule, e.g. using "$a" in "start: b".
+//
+// Returns -1 if `val` is a valid symbol in the original rule but does not show up in the
+// expanded rule. For example, a: b? expands into two rules:
+//
+//		a: b
+//	  | %empty
+//
+// For the %empty rule, Resolve("b") returns -1.
+func (a *ActionVars) Resolve(val string) (Reference, bool) {
+	return a.resolve(val /*zeroBased=*/, true)
 }
 
-// Resolve resolves "val" to an RHS index for the current rule.
-func (a *ActionVars) Resolve(val string) (int, bool) {
-	pos, ok := a.CmdArgs.Names[val]
-	if !ok {
-		var err error
-		pos, err = strconv.Atoi(val)
-		if err != nil {
-			return 0, false
+// ResolveOneBased is similar to Resolve, except that `val` is 1-based if it is a number.
+func (a *ActionVars) ResolveOneBased(val string) (Reference, bool) {
+	return a.resolve(val /*zeroBased=*/, false)
+}
+
+func (a *ActionVars) resolve(val string, zeroBased bool) (Reference, bool) {
+	// `pos` is always 1-based.
+	pos, err := strconv.Atoi(val)
+	if err == nil {
+		// The input "val" is a number reference, e.g. $1.
+		if zeroBased {
+			// The input reference starts from 0, e.g. $0 references the first symbol. Change it to
+			// 1-based.
+			pos++
 		}
-		pos++ // "val" is 0-based, while positions are 1-based.
 		if pos < 1 || pos >= a.CmdArgs.MaxPos {
 			// Index out of range.
-			return 0, false
+			return Reference{}, false
+		}
+	} else {
+		// The input "val" is a named symbol reference, e.g. $a.
+		var exists bool
+		pos, exists = a.CmdArgs.Names[val]
+		if !exists {
+			// No such a symbol exists in the original rule.
+			return Reference{}, false
 		}
 	}
-	ret, ok := a.Remap[pos]
-	if !ok {
-		ret = -1
+	idx, exists := a.Remap[pos]
+	if !exists {
+		idx = -1
 	}
-	return ret, true
+	return Reference{Index: idx, Pos: pos}, true
 }
 
 // String is used as a digest of a semantic action environment (and also as a debug string).
@@ -201,6 +250,8 @@ type Options struct {
 	MaxLookahead           int    // If set, all lookaheads expressions will be validated to fit this limit.
 	OptInstantiationSuffix string // Suffix that triggers auto-instantiation optional nonterminals (e.g. "opt" or "_opt").
 
+	DisableSyntax []string // Lists grammar syntaxes that should be disabled.
+
 	// AST generation. Go-specific for now.
 	TokenStream   bool
 	EventBased    bool
@@ -224,4 +275,4 @@ type Options struct {
 	DirIncludePrefix   string   // for generated headers
 	ParseParams        []string // parser fields initialized in the constructor
 	VariantStackEntry  bool     // whether to generate a std::variant stackEntry rather than a union. Default false.
-}
+}
\ No newline at end of file
diff --git a/syntax/expand.go b/syntax/expand.go
index d26f3587e..d9e07d89f 100644
--- a/syntax/expand.go
+++ b/syntax/expand.go
@@ -10,6 +10,111 @@ import (
 	"github.com/inspirer/textmapper/util/ident"
 )
 
+// updateArgRefs updates the ArgRefs of `e` to include the new nonterminals in `newNts`.
+//
+// When `e.ArgRefs` was created, we did not have the non-terminals that TextMapper creates for
+// Lists yet. We fill in the missing non-terminals once they are created by calling this function.
+func updateArgRefs(m *Model, newNts map[int]int, e *Expr) {
+	if cmdArgs := e.CmdArgs; cmdArgs != nil {
+		for pos, sym := range newNts {
+			copied, exists := cmdArgs.ArgRefs[pos]
+			if !exists {
+				// The ArgRefs of mid rules do not the terminals after it.
+				continue
+			}
+			copied.Symbol = sym
+			cmdArgs.ArgRefs[pos] = copied
+		}
+		return
+	}
+	for _, sub := range e.Sub {
+		updateArgRefs(m, newNts, sub)
+	}
+}
+
+// ExpandOptions contains the options for the Expand function.
+type ExpandOptions struct {
+	// OptionalType returns the type of an optional symbol s? or s_opt, where `t` is the type of the
+	// symbol s.
+	OptionalType func(t string) string
+
+	// OptionalCmd returns the command to calculate the semantic value of an optional symbol s_opt,
+	// where `t` is the type of the symbol s.
+	OptionalCmd func(t string) string
+
+	// ListType returns the type of the list symbol s* or s+, where `t` is the type of the element
+	// symbol s.
+	ListType func(t string) string
+
+	// NewList returns the command to create a new list of the given type.
+	NewList func(elemType string, elemPos int, listFlags ListFlags) string
+
+	// Append returns the command to append an element to a list.
+	Append func(elemPos, listPos int, listFlags ListFlags) string
+
+	// DefaultValue returns the default value of the given type `t`.
+	DefaultValue func(t string) string
+}
+
+// CcExpandOptions returns the ExpandOptions for generating C++ semantic actions.
+func CcExpandOptions() *ExpandOptions {
+	return &ExpandOptions{
+		OptionalType: func(t string) string {
+			if t == "" {
+				return ""
+			}
+			return "std::optional<" + t + ">"
+		},
+		OptionalCmd: func(t string) string {
+			// For cc the semantic action does not need the input type `t`.
+			//
+			// TODO: This involves copying the rhs value when constructing the std::optional. Example
+			// generated code:
+			//
+			// ```cc
+			// lhs.value = std::optional<std::string>(std::get<std::string>(rhs[0].value));
+			// ```
+			//
+			// If this turns out to be a performance bottleneck, we should find a way to use move when
+			// constructing the std::optional.
+			return fmt.Sprintf(`{ $$ = $1; }`)
+		},
+		ListType: func(t string) string {
+			if t == "" {
+				return ""
+			}
+			return "std::vector<" + t + ">"
+		},
+		NewList: func(elemType string, elemPos int, listFlags ListFlags) string {
+			if listFlags&OneOrMore != 0 {
+				return fmt.Sprintf(`
+				  auto& elem = $%v;
+				  auto& mutable_elem = const_cast<std::remove_const<typename std::remove_reference<decltype(elem)>::type>::type&>(elem);
+				  $$ = std::vector<%v>{std::move(mutable_elem)};
+				`, elemPos, elemType)
+			}
+			return fmt.Sprintf(`$$ = std::vector<%v>{};`, elemType)
+		},
+		Append: func(elemPos, listPos int, listFlags ListFlags) string {
+			return fmt.Sprintf(`{
+				auto& list = $%v;
+				auto& elem = $%v;
+				auto& mutable_list = const_cast<std::remove_const<typename std::remove_reference<decltype(list)>::type>::type&>(list);
+				auto& mutable_elem = const_cast<std::remove_const<typename std::remove_reference<decltype(elem)>::type>::type&>(elem);
+				auto new_list = std::move(mutable_list);
+				new_list.push_back(std::move(mutable_elem));
+				$$ = std::move(new_list);
+			}`, listPos, elemPos)
+		},
+		DefaultValue: func(t string) string {
+			if t == "" {
+				return ""
+			}
+			return t + "{}"
+		},
+	}
+}
+
 // Expand rewrites the grammar substituting extended notation clauses with equivalent
 // context-free production forms. Every nonterminal becomes a choice of sequences (production
 // rules), where each sequence can contain only StateMarker, Command, or Reference expressions.
@@ -24,12 +129,13 @@ import (
 // Note: for now it leaves Assign, Append, and Arrow expressions untouched. The first two can
 // contain references only. Arrow can contain a sub-sequence if it reports more than one
 // symbol reference.
-func Expand(m *Model) error {
+func Expand(m *Model, opts *ExpandOptions) error {
 	e := &expander{
 		Model: m,
 		m:     make(map[string]int),
 		perm:  make([]int, len(m.Nonterms)),
 		reuse: make([]int, 0, 16),
+		opts:  opts,
 	}
 	max := len(m.Nonterms)
 	for i, nt := range m.Nonterms {
@@ -68,23 +174,33 @@ func Expand(m *Model) error {
 	for self, nt := range m.Nonterms {
 		switch nt.Value.Kind {
 		case Optional:
-			// Note: this case facilitates 0..* lists extraction.
+			// Note: this case facilitates 0..* lists extraction. All other optionals are handled by
+			// expandRule.
+			if nt.Value.Sub[0].Kind != Reference {
+				return status.Errorf(nt.Value.Origin, "internal error: expecting an optional reference, but got %+v", nt.Value.Sub[0])
+			}
+			symbolType := getSymbolType(nt.Value.Sub[0], m)
+			subs := []*Expr{nt.Value.Sub[0], &Expr{Kind: Empty, Origin: nt.Value.Origin}}
+			// For the %empty rule, use an empty list as the semantic value.
+			if e.opts.DefaultValue != nil && symbolType != "" {
+				defaultVal := e.opts.DefaultValue(symbolType)
+				subs[1] = &Expr{Kind: Command, Name: "$$ = " + defaultVal + ";", Origin: nt.Value.Origin, CmdArgs: &CmdArgs{MaxPos: 1}}
+			}
 			nt.Value = &Expr{
-				Kind: Choice,
-				Sub: []*Expr{
-					nt.Value.Sub[0],
-					{Kind: Empty, Origin: nt.Value.Origin},
-				},
+				Kind:   Choice,
+				Sub:    subs,
 				Origin: nt.Value.Origin,
 			}
 		case List:
 			// Note: at this point all lists either have at least one element or have no separators.
-			rr := nt.Value.ListFlags&RightRecursive != 0
-			nonEmpty := nt.Value.ListFlags&OneOrMore != 0
+			listFlags := nt.Value.ListFlags
+			rr := listFlags&RightRecursive != 0
+			nonEmpty := listFlags&OneOrMore != 0
 			elem := nt.Value.Sub[0]
 			origin := nt.Value.Origin
 			rec := &Expr{Kind: Sequence, Origin: origin}
-			rec.Sub = append(rec.Sub, &Expr{Kind: Reference, Symbol: len(m.Terminals) + self, Model: m, Origin: origin})
+			listRef := &Expr{Kind: Reference, Symbol: len(m.Terminals) + self, Model: m, Origin: origin}
+			rec.Sub = append(rec.Sub, listRef)
 			if len(nt.Value.Sub) > 1 {
 				if rr {
 					rec = concat(origin, nt.Value.Sub[1], rec)
@@ -96,7 +212,65 @@ func Expand(m *Model) error {
 				Kind:   Choice,
 				Origin: origin,
 			}
-			if elem.Kind == Choice {
+			// Automatic value propagation works for lists of references only (with and without
+			// separators). In every other sense this branch repeats the next one.
+			if elem.Kind == Reference {
+				// Add the recursion rule, e.g. `a_list: a_list a`.
+				var recursion []*Expr
+				if rr {
+					recursion = append(recursion, elem, rec)
+				} else {
+					recursion = append(recursion, rec, elem)
+				}
+				elemType := getSymbolType(elem, m)
+				if opts.Append != nil && elemType != "" {
+					// Assign a new Pos for the list reference itself so that its semantic value can be
+					// referenced.
+					//
+					// The position of the list reference only needs to be different from the element Pos,
+					// instead of having to match the order between the listRef and the elem. For example,
+					// consider the following rule:
+					//
+					//   start: a+ {...}
+					//
+					// `elem.Pos` is 1. Assuming we generate left-recursion rules for a_list:
+					//
+					//    a_list: a_list a
+					//
+					// listRef.Pos is 2 (elem.Pos + 1), even though the listRef "a_list" actually appears
+					// before the "a". This is ok because Pos is only used to identify the symbols (and thus
+					// only needs to be unique), and the only semantic action that uses `listRef.Pos` is
+					// generated by `opts.Append`, which accepts both elemPos and listPos as arguments.
+					listPos := elem.Pos + 1
+					listRef.Pos = listPos
+					argRefs := map[int]ArgRef{
+						elem.Pos: ArgRef{Pos: elem.Pos, Symbol: elem.Symbol},
+						listPos:  ArgRef{Pos: listPos, Symbol: listRef.Symbol},
+					}
+					code := opts.Append(elem.Pos, listPos, listFlags)
+					cmdArgs := &CmdArgs{MaxPos: listPos + 1, ArgRefs: argRefs}
+					recursion = append(recursion, &Expr{Kind: Command, Name: code, Origin: origin, CmdArgs: cmdArgs})
+				}
+				nt.Value.Sub = append(nt.Value.Sub, concat(origin, recursion...))
+
+				// Add the base rule, e.g. `a_list: a`.
+				var base []*Expr
+				switch {
+				case nonEmpty:
+					base = append(base, elem)
+					if opts.NewList != nil && elemType != "" {
+						argRefs := map[int]ArgRef{
+							elem.Pos: ArgRef{Pos: elem.Pos, Symbol: elem.Symbol},
+						}
+						base = append(base, &Expr{Kind: Command, Name: opts.NewList(elemType, elem.Pos, listFlags), Origin: origin, CmdArgs: &CmdArgs{MaxPos: elem.Pos + 1, ArgRefs: argRefs}})
+					}
+				case opts.NewList != nil && elemType != "":
+					base = append(base, &Expr{Kind: Command, Name: opts.NewList(elemType, elem.Pos, listFlags), Origin: origin, CmdArgs: &CmdArgs{MaxPos: elem.Pos + 1}})
+				default:
+					base = append(base, &Expr{Kind: Empty, Origin: origin})
+				}
+				nt.Value.Sub = append(nt.Value.Sub, concat(origin, base...))
+			} else if elem.Kind == Choice {
 				if rr {
 					nt.Value.Sub = append(nt.Value.Sub, multiConcat(origin, elem.Sub, []*Expr{rec})...)
 				} else {
@@ -135,6 +309,10 @@ type expander struct {
 	start int // nonterminal, for sorting
 	base  int
 	reuse []int
+
+	createdNts map[int]int // The non-terminals created the current rule. Position -> Symbol
+
+	opts *ExpandOptions // Target-language-specific options during expansion.
 }
 
 func (e *expander) sortTail() {
@@ -163,7 +341,7 @@ func (e *expander) sortTail() {
 	e.reuse = local // return for reuse
 }
 
-func (e *expander) extractNonterm(expr *Expr) *Expr {
+func (e *expander) extractNonterm(expr *Expr, nonTermType string) *Expr {
 	name := ProvisionalName(expr, e.Model)
 	if existing, ok := e.m[name]; ok && expr.Equal(e.Nonterms[existing].Value) {
 		sym := len(e.Terminals) + existing
@@ -191,6 +369,7 @@ func (e *expander) extractNonterm(expr *Expr) *Expr {
 		Name:   name,
 		Value:  expr,
 		Origin: expr.Origin,
+		Type:   nonTermType,
 	}
 	e.Nonterms = append(e.Nonterms, nt)
 	e.extra++
@@ -198,7 +377,14 @@ func (e *expander) extractNonterm(expr *Expr) *Expr {
 	return &Expr{Kind: Reference, Symbol: sym, Model: e.Model, Origin: expr.Origin}
 }
 
-func (e *expander) expandRule(rule *Expr) []*Expr {
+func (e *expander) expandRule(rule *Expr) (expanded []*Expr) {
+	e.createdNts = make(map[int]int)
+	defer func() {
+		for _, rule := range expanded {
+			updateArgRefs(e.Model, e.createdNts, rule)
+		}
+	}()
+
 	if rule.Kind == Prec {
 		ret := e.expandExpr(rule.Sub[0])
 		for i, val := range ret {
@@ -250,8 +436,11 @@ func (e *expander) expandExpr(expr *Expr) []*Expr {
 		}
 		return ret
 	case Set, Lookahead:
-		ret := e.extractNonterm(expr)
+		ret := e.extractNonterm(expr, "" /*nonTermType*/)
 		ret.Pos = expr.Pos
+		if expr.Kind == Set {
+			e.createdNts[ret.Pos] = ret.Symbol
+		}
 		return []*Expr{ret}
 	case List:
 		out := &Expr{Kind: List, Origin: expr.Origin, ListFlags: expr.ListFlags}
@@ -268,11 +457,34 @@ func (e *expander) expandExpr(expr *Expr) []*Expr {
 			out.Sub = append(out.Sub, sep[0])
 			out.ListFlags |= OneOrMore
 		}
-		ret := e.extractNonterm(out)
+		var listType string
+		// Calculate the list type for list of references. More complex structures, e.g.
+		// (a b)*, (a? b)+, (a?)* do not propagate the type automatically.
+		if expr.Sub[0].Kind == Reference {
+			elemType := getSymbolType(expr.Sub[0], e.Model)
+			if e.opts.ListType != nil {
+				listType = e.opts.ListType(elemType)
+			}
+		}
+		ret := e.extractNonterm(out, listType)
 		if expr.ListFlags&OneOrMore == 0 && out.ListFlags&OneOrMore != 0 {
-			ret = e.extractNonterm(&Expr{Kind: Optional, Sub: []*Expr{ret}, Origin: expr.Origin})
+			// List structs like "(a separator ',')*"" generates the following two non-terminals:
+			//
+			// (1) a_separator_comma_listopt: a_separator_comma_list | %empty
+			// (2) a_separator_comma_list: a_separator_comma_list ',' a | a
+			//
+			// We assign `listType` to "a_separator_comma_listopt" instead of using
+			// `e.opts.OptionalType(listType)` so that empty lists share the same type, e.g.
+			//
+			// list_string {std::string} = (a separator ',')*[a_list] {
+			//   // $a_list will be of type std::vector<std::string>. For the %empty case the list will
+			//   // be empty instead of std::optional<std::vector<std::string>>.
+			//   $$ = absl::StrJoin($a_list, ", ");
+			// }
+			ret = e.extractNonterm(&Expr{Kind: Optional, Sub: []*Expr{ret}, Origin: expr.Origin}, listType)
 		}
 		ret.Pos = expr.Pos
+		e.createdNts[ret.Pos] = ret.Symbol
 		return []*Expr{ret}
 	}
 	return []*Expr{expr}
@@ -335,7 +547,7 @@ func ProvisionalName(expr *Expr, m *Model) string {
 	switch expr.Kind {
 	case Reference:
 		if expr.Symbol < len(m.Terminals) {
-			return ident.Produce(m.Terminals[expr.Symbol], ident.CamelCase)
+			return ident.Produce(m.Terminals[expr.Symbol].Name, ident.CamelCase)
 		}
 		return m.Nonterms[expr.Symbol-len(m.Terminals)].Name
 	case Optional:
@@ -435,3 +647,10 @@ func appendSetName(ts *TokenSet, m *Model, out *strings.Builder) {
 		log.Fatalf("cannot compute name for TokenSet Kind=%v", ts.Kind)
 	}
 }
+
+func getSymbolType(expr *Expr, m *Model) string {
+	if expr.Symbol < len(m.Terminals) {
+		return m.Terminals[expr.Symbol].Type
+	}
+	return m.Nonterms[expr.Symbol-len(m.Terminals)].Type
+}
diff --git a/syntax/set_test.go b/syntax/set_test.go
index 40b06410f..35b8d4b4e 100644
--- a/syntax/set_test.go
+++ b/syntax/set_test.go
@@ -99,7 +99,7 @@ func TestSets(t *testing.T) {
 			t.Errorf("cannot parse %q: %v", tc.input, err)
 			continue
 		}
-		err = syntax.Expand(model)
+		err = syntax.Expand(model, &syntax.ExpandOptions{})
 		if err != nil {
 			t.Errorf("cannot expand %q: %v", tc.input, err)
 			continue
diff --git a/syntax/syntax.go b/syntax/syntax.go
index 1e80e7073..f5e0fd031 100644
--- a/syntax/syntax.go
+++ b/syntax/syntax.go
@@ -11,9 +11,31 @@ import (
 	"github.com/inspirer/textmapper/status"
 )
 
+// Terminal is a terminal symbol used in a grammar.
+type Terminal struct {
+	Name string
+	Type string
+}
+
+func (t *Terminal) String() string {
+	return t.Name + "(type = " + t.Type + ")"
+}
+
+// ArgRef represents a reference to a symbol in semantic actions.
+type ArgRef struct {
+	Pos      int    // The positional index of the symbol in the original rule. 1-based. Used for resolving the number references in semantic actions.
+	Optional bool   // Whether the symbol reference is under an Optional or a Nested Choice.
+	Kind     string // The kind of the symbol, e.g. reference, starQuantifier, etc. Used for debugging.
+	Symbol   int    // The symbol index in the grammar.
+}
+
+func (p *ArgRef) String() string {
+	return fmt.Sprintf("%+v", *p)
+}
+
 // Model is a model of a language's syntax built on top of a set of terminals.
 type Model struct {
-	Terminals []string
+	Terminals []Terminal
 	Params    []Param
 	Nonterms  []*Nonterm // all params and nonterms must have distinct names
 	Inputs    []Input
@@ -24,7 +46,7 @@ type Model struct {
 // Ref returns the string version of a symbol reference for debugging.
 func (m *Model) Ref(sym int, args []Arg) string {
 	if sym < len(m.Terminals) {
-		return m.Terminals[sym]
+		return m.Terminals[sym].Name
 	}
 	nt := m.Nonterms[sym-len(m.Terminals)]
 	if len(args) == 0 {
@@ -82,6 +104,17 @@ func (m *Model) Rearrange(perm []int) {
 			expr.Symbol = terms + perm[nt]
 		}
 	})
+	m.ForEach(Command, func(_ *Nonterm, expr *Expr) {
+		if expr.CmdArgs == nil || expr.CmdArgs.ArgRefs == nil {
+			return
+		}
+		for pos, argRef := range expr.CmdArgs.ArgRefs {
+			if nt := argRef.Symbol - terms; nt >= 0 {
+				argRef.Symbol = terms + perm[nt]
+				expr.CmdArgs.ArgRefs[pos] = argRef
+			}
+		}
+	})
 	for _, set := range m.Sets {
 		set.ForEach(func(ts *TokenSet) {
 			if nt := ts.Symbol - terms; nt >= 0 {
@@ -150,7 +183,7 @@ type Expr struct {
 	Sub        []*Expr
 	Symbol     int
 	Args       []Arg
-	Pos        int // Positional index of a reference, set, or list in the original rule.
+	Pos        int // Positional index of a reference, set, or list in the original rule. 1-based.
 	Predicate  *Predicate
 	ListFlags  ListFlags
 	ArrowFlags []string
@@ -160,6 +193,26 @@ type Expr struct {
 	Model      *Model // Kept for some kinds for debugging. TODO error-prone, get rid of
 }
 
+// ForEach visits all the Exprs of a given kind under `expr`. If `kind` is -1, all Expr kinds are
+// visited.
+func (e *Expr) ForEach(kind ExprKind, consumer func(e *Expr)) {
+	seen := make(map[*Expr]bool)
+	var visit func(e *Expr)
+	visit = func(e *Expr) {
+		if seen[e] {
+			return
+		}
+		seen[e] = true
+		if e.Kind == kind || kind == -1 {
+			consumer(e)
+		}
+		for _, sub := range e.Sub {
+			visit(sub)
+		}
+	}
+	visit(e)
+}
+
 // Equal returns true for equivalent grammar clauses.
 func (e *Expr) Equal(oth *Expr) bool {
 	if e.Kind != oth.Kind {
@@ -222,7 +275,7 @@ func (e *Expr) String() string {
 	case Prec:
 		var sym string
 		if e.Model != nil {
-			sym = e.Model.Terminals[e.Symbol]
+			sym = e.Model.Terminals[e.Symbol].Name
 		} else {
 			sym = strconv.Itoa(e.Symbol)
 		}
@@ -379,9 +432,10 @@ func (k ExprKind) GoString() string {
 
 // CmdArgs defines which RHS symbols are available inside a semantic action.
 type CmdArgs struct {
-	Names  map[string]int
-	MaxPos int // exclusive, 1-based
-	Delta  int // Added to the final position to adjust for extracted middle rule actions.
+	Names   map[string]int // alias -> position
+	MaxPos  int            // exclusive, 1-based
+	Delta   int            // Added to the final position to adjust for extracted middle rule actions.
+	ArgRefs map[int]ArgRef // position -> ArgRef
 }
 
 // TokenSet is a grammar expression that resolves to a set of tokens.
diff --git a/syntax/syntax_test.go b/syntax/syntax_test.go
index ce3893ea5..51fde6856 100644
--- a/syntax/syntax_test.go
+++ b/syntax/syntax_test.go
@@ -130,14 +130,14 @@ var parserTests = []struct {
 	want  *syntax.Model
 }{
 	{`A: a; B:;`, &syntax.Model{
-		Terminals: []string{"EOI", "a"},
+		Terminals: []syntax.Terminal{{Name: "EOI"}, {Name: "a"}},
 		Nonterms: []*syntax.Nonterm{
 			{Name: "A", Value: &syntax.Expr{Kind: syntax.Reference, Symbol: 1}},
 			{Name: "B", Value: &syntax.Expr{Kind: syntax.Empty}},
 		},
 	}},
 	{`A: b=a;`, &syntax.Model{
-		Terminals: []string{"EOI", "a"},
+		Terminals: []syntax.Terminal{{Name: "EOI"}, {Name: "a"}},
 		Nonterms: []*syntax.Nonterm{
 			{Name: "A", Value: &syntax.Expr{Kind: syntax.Assign, Name: "b", Sub: []*syntax.Expr{
 				{Kind: syntax.Reference, Symbol: 1},
@@ -145,7 +145,7 @@ var parserTests = []struct {
 		},
 	}},
 	{`A: a a -> foo;`, &syntax.Model{
-		Terminals: []string{"EOI", "a"},
+		Terminals: []syntax.Terminal{{Name: "EOI"}, {Name: "a"}},
 		Nonterms: []*syntax.Nonterm{
 			{Name: "A", Value: &syntax.Expr{Kind: syntax.Arrow, Name: "foo", Sub: []*syntax.Expr{
 				{Kind: syntax.Sequence, Sub: []*syntax.Expr{
@@ -156,7 +156,7 @@ var parserTests = []struct {
 		},
 	}},
 	{`A: b c+; B: (A separator b)*?;`, &syntax.Model{
-		Terminals: []string{"EOI", "b", "c"},
+		Terminals: []syntax.Terminal{{Name: "EOI"}, {Name: "b"}, {Name: "c"}},
 		Nonterms: []*syntax.Nonterm{
 			{
 				Name: "A",
@@ -179,7 +179,7 @@ var parserTests = []struct {
 		},
 	}},
 	{`%flag T; %lookahead flag V = true; A {foo}: a B<T=V, V=true>; B<T,V>:[T!=123];`, &syntax.Model{
-		Terminals: []string{"EOI", "a"},
+		Terminals: []syntax.Terminal{{Name: "EOI"}, {Name: "a"}},
 		Params: []syntax.Param{
 			{Name: "T"},
 			{Name: "V", DefaultValue: "true", Lookahead: true},
@@ -202,7 +202,7 @@ var parserTests = []struct {
 		},
 	}},
 	{`%flag A; %flag B; input: [A==false && B || !A] a | b;`, &syntax.Model{
-		Terminals: []string{"EOI", "a", "b"},
+		Terminals: []syntax.Terminal{{Name: "EOI"}, {Name: "a"}, {Name: "b"}},
 		Params: []syntax.Param{
 			{Name: "A"},
 			{Name: "B"},
@@ -228,7 +228,7 @@ var parserTests = []struct {
 		},
 	}},
 	{`A: set(a & B | c | ~first B & precede B & last P & follow P & ~Q); B: z; P:; Q:;`, &syntax.Model{
-		Terminals: []string{"EOI", "a", "c", "z"},
+		Terminals: []syntax.Terminal{{Name: "EOI"}, {Name: "a"}, {Name: "c"}, {Name: "z"}},
 		Nonterms: []*syntax.Nonterm{
 			{Name: "A", Value: &syntax.Expr{Kind: syntax.Set, SetIndex: 0}},
 			{Name: "B", Value: &syntax.Expr{Kind: syntax.Reference, Symbol: 3}},
@@ -258,7 +258,7 @@ var parserTests = []struct {
 		}},
 	}},
 	{`A: (?= A) a;`, &syntax.Model{
-		Terminals: []string{"EOI", "a"},
+		Terminals: []syntax.Terminal{{Name: "EOI"}, {Name: "a"}},
 		Nonterms: []*syntax.Nonterm{
 			{Name: "A", Value: &syntax.Expr{Kind: syntax.Sequence, Sub: []*syntax.Expr{
 				{Kind: syntax.Lookahead, Sub: []*syntax.Expr{
@@ -269,7 +269,7 @@ var parserTests = []struct {
 		},
 	}},
 	{`A: (?= P & !Q) a b; P: a; Q: b;`, &syntax.Model{
-		Terminals: []string{"EOI", "a", "b"},
+		Terminals: []syntax.Terminal{{Name: "EOI"}, {Name: "a"}, {Name: "b"}},
 		Nonterms: []*syntax.Nonterm{
 			{Name: "A", Value: &syntax.Expr{Kind: syntax.Sequence, Sub: []*syntax.Expr{
 				{Kind: syntax.Lookahead, Sub: []*syntax.Expr{
@@ -286,7 +286,7 @@ var parserTests = []struct {
 		},
 	}},
 	{`%interface Q, P; A: a;`, &syntax.Model{
-		Terminals: []string{"EOI", "a"},
+		Terminals: []syntax.Terminal{{Name: "EOI"}, {Name: "a"}},
 		Nonterms: []*syntax.Nonterm{
 			{Name: "A", Value: &syntax.Expr{Kind: syntax.Reference, Symbol: 1}},
 		},
@@ -354,7 +354,7 @@ func initSymbols(input string, out *syntax.Model) error {
 	var l tm.Lexer
 	l.Init(input)
 	seen := make(map[string]bool)
-	out.Terminals = []string{"EOI"}
+	out.Terminals = []syntax.Terminal{{Name: "EOI"}}
 	out.Nonterms = nil
 	var prev token.Type
 	for tok := l.Next(); tok != token.EOI; tok = l.Next() {
@@ -373,7 +373,7 @@ func initSymbols(input string, out *syntax.Model) error {
 
 		if isTerm(l.Text()) {
 			if !seen[l.Text()] {
-				out.Terminals = append(out.Terminals, l.Text())
+				out.Terminals = append(out.Terminals, syntax.Terminal{Name: l.Text()})
 			}
 			seen[l.Text()] = true
 		} else {
@@ -568,7 +568,7 @@ func (p *parser) parseTermRef() int {
 		p.errorf("terminal reference is expected (found %q)", name)
 	}
 	for i, val := range p.out.Terminals {
-		if val == name {
+		if val.Name == name {
 			return i
 		}
 	}
@@ -892,4 +892,4 @@ func (p *parser) parseSetPrimary() *syntax.TokenSet {
 		ret = &syntax.TokenSet{Kind: syntax.Complement, Sub: []*syntax.TokenSet{ret}, Origin: tilde}
 	}
 	return ret
-}
+}
\ No newline at end of file

From b699846f2b3130795cf5aedf745bfbd1f4e76ce2 Mon Sep 17 00:00:00 2001
From: Shannon Rae <166186361+secretlyshannon@users.noreply.github.com>
Date: Mon, 28 Apr 2025 15:23:45 -0700
Subject: [PATCH 3/6] Update templates_test.go

---
 syntax/templates_test.go | 42 ++++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/syntax/templates_test.go b/syntax/templates_test.go
index 9d5339c68..f78110e94 100644
--- a/syntax/templates_test.go
+++ b/syntax/templates_test.go
@@ -9,6 +9,10 @@ import (
 	"github.com/inspirer/textmapper/util/dump"
 )
 
+func expand(m *syntax.Model) error {
+	return syntax.Expand(m, &syntax.ExpandOptions{})
+}
+
 var modelTests = []struct {
 	fnName string
 	fn     func(m *syntax.Model) error
@@ -98,79 +102,79 @@ var modelTests = []struct {
 	},
 
 	// Syntax sugar expansion.
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: a?;`,
 		`Z: a | ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: a? | b?;`,
 		`Z: a | | b ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: (a | b)?;`,
 		`Z: a | b | ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: (a b?)?;`,
 		`Z: a b | a |  ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: (a b|b) (c|d);`,
 		`Z: a b c | a b d | b c | b d ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: a? %prec b ;`,
 		`Z: a %prec b | %prec b ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: a? -> A ;`,
 		`Z: a -> A | -> A ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: a=a? ;`,
 		`Z: a=a | ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: a? {Foo} -> A ;`,
 		`Z: a {Foo} -> A | {Foo} -> A ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: a+ | q ;`,
 		`A_list: A_list a | a; Z: A_list | q ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: a* | q ;`,
 		`A_optlist: A_optlist a | ; Z: A_optlist | q ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: b | (a separator b)+ ;`,
 		`A_list_B_separated: A_list_B_separated b a | a; Z: b | A_list_B_separated ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: b | (a separator b)* ;`,
 		`A_list_B_separated: A_list_B_separated b a | a; A_list_B_separatedopt: A_list_B_separated | ; Z: b | A_list_B_separatedopt ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: set(a | ~b);`,
 		`Z: set(a | ~b);`, // top level sets are not expanded
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: a b set(a | ~b) | c ;`,
 		`Z: a b setof_a_or_not_b | c ; setof_a_or_not_b: set(a | ~b) ;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: (?= A); A:a|b;`,
 		`Z: (?= A); A:a|b;`, // top level lookaheads are not expanded
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: a (?= A & !B) b | c; A: a|b; B: a|b;`,
 		`Z: a lookahead_A_notB b | c; lookahead_A_notB: (?= A & !B); A: a|b; B: a|b;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`Z: A+ | C+ | B+; A: a|x; B: b|y; C: c|z;`, // sorting test
 		`A_list: A_list A | A; B_list: B_list B | B; C_list: C_list C | C; Z: A_list | C_list | B_list; A: a|x; B: b|y; C: c|z;`,
 	},
-	{"Expand", syntax.Expand,
+	{"Expand", expand,
 		`%input X; X: B+ | Y+ | A+; A: a|x; B: b|y; Y: c|z;`, // sorting test #2
 		`%input X; A_list: A_list A | A; B_list: B_list B | B; X: B_list | Y_list | A_list; Y_list: Y_list Y | Y; A: a|x; B: b|y; Y: c|z;`,
 	},

From fa221081c6364c5f69bc86bd8ffc089ebb3a7fc9 Mon Sep 17 00:00:00 2001
From: Shannon Rae <166186361+secretlyshannon@users.noreply.github.com>
Date: Mon, 28 Apr 2025 15:26:04 -0700
Subject: [PATCH 4/6] Update imports in compiler_test.go

---
 compiler/compiler_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler/compiler_test.go b/compiler/compiler_test.go
index 301eef1d3..5a3903738 100644
--- a/compiler/compiler_test.go
+++ b/compiler/compiler_test.go
@@ -8,7 +8,7 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/inspirer/textmapper/grammar/grammar"
+	"github.com/inspirer/textmapper/grammar"
 	"github.com/inspirer/textmapper/parsers/parsertest"
 	"github.com/inspirer/textmapper/parsers/tm"
 	"github.com/inspirer/textmapper/parsers/tm/ast"

From 0ae2c10be188680dbea3d64d76ad14e8bc8200b6 Mon Sep 17 00:00:00 2001
From: Shannon Rae <166186361+secretlyshannon@users.noreply.github.com>
Date: Mon, 28 Apr 2025 15:28:15 -0700
Subject: [PATCH 5/6] Add import of "sort" to compiler_test.go.

---
 compiler/compiler_test.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler/compiler_test.go b/compiler/compiler_test.go
index 5a3903738..185a4a82a 100644
--- a/compiler/compiler_test.go
+++ b/compiler/compiler_test.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"sort"
 	"strings"
 	"testing"
 

From d1c4fba9cd5ed62226cfafdce96338643a0aec6b Mon Sep 17 00:00:00 2001
From: Shannon Rae <166186361+secretlyshannon@users.noreply.github.com>
Date: Mon, 28 Apr 2025 15:31:30 -0700
Subject: [PATCH 6/6] Add disabled_syntax.tmerr

---
 compiler/testdata/disabled_syntax.tmerr | 53 +++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 compiler/testdata/disabled_syntax.tmerr

diff --git a/compiler/testdata/disabled_syntax.tmerr b/compiler/testdata/disabled_syntax.tmerr
new file mode 100644
index 000000000..b1df7bdaa
--- /dev/null
+++ b/compiler/testdata/disabled_syntax.tmerr
@@ -0,0 +1,53 @@
+language parser(go);
+
+disableSyntax = ["Lookahead", "Arrow", "Templates", "NestedChoice"]
+
+:: lexer
+
+a: /a/
+b: /b/
+c: /c/
+d: /d/
+
+:: parser
+
+input: A1 B1 C1 D1 E1 F1;
+
+A1: «(?= laA)» a;
+# err: syntax Lookahead is not supported
+
+laA: a b c d;
+
+B1: «(?= laB)» b;
+# err: syntax Lookahead is not supported
+
+laB: a b d;
+
+C1: «(?= laC)» c;
+# err: syntax Lookahead is not supported
+
+laC: laA | laB ;
+
+# Note: reusing laC again.
+
+D1: «(?= laC)» d;
+# err: syntax Lookahead is not supported
+
+E1: «(?= laE)» d;
+# err: syntax Lookahead is not supported
+
+laE: a+ b;
+
+F1: «(?= laF)» d;
+# err: syntax Lookahead is not supported
+
+laF «-> Thing»: laE b;
+# err: syntax Arrow is not supported
+
+%flag T;
+
+«g»<T>: F1;
+# err: templates are not supported
+
+h : F1 | (F1 F1) | «(F1 | F1 F1)»;
+# err: parenthesized Choice operator is not supported