Skip to content

Commit b8774df

Browse files
mr-martianunhammer
andauthored
fix unconsumed tags/lemq in lt-proc -db (closes #182) (#195)
* fix unconsumed tags/lemq in `lt-proc -db` (#182) * new mode gm_bilgen for biltrans generation, giving @ on unconsumed tags --------- Co-authored-by: Kevin Brubeck Unhammer <[email protected]>
1 parent 648471e commit b8774df

File tree

4 files changed

+22
-1
lines changed

4 files changed

+22
-1
lines changed

lttoolbox/fst_processor.cc

+5-1
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,7 @@ FSTProcessor::generation(InputFile& input, UFILE *output, GenerationMode mode)
12821282
case gm_tagged:
12831283
u_fputc(rd.mark, output);
12841284
[[fallthrough]];
1285+
case gm_bilgen:
12851286
case gm_clean:
12861287
writeEscaped(removeTags(rd.content), output);
12871288
break;
@@ -1343,6 +1344,7 @@ FSTProcessor::generation(InputFile& input, UFILE *output, GenerationMode mode)
13431344
case gm_tagged:
13441345
if (!rd.content.empty()) u_fputc('#', output);
13451346
[[fallthrough]];
1347+
case gm_bilgen:
13461348
case gm_clean:
13471349
writeEscaped(removeTags(rd.content), output);
13481350
break;
@@ -1775,7 +1777,9 @@ FSTProcessor::bilingual(InputFile& input, UFILE *output, GenerationMode mode)
17751777
}
17761778
}
17771779
// if there are no tags, we only return complete matches
1778-
if (!seenTags && queue_start + 1 < symbols.size()) result.clear();
1780+
if ((!seenTags || mode == gm_all || mode == gm_bilgen) && queue_start + 1 < symbols.size()) {
1781+
result.clear();
1782+
}
17791783

17801784
UString source;
17811785
size_t queue_pos = 0;

lttoolbox/fst_processor.h

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ enum GenerationMode
4343
gm_clean, // clear all
4444
gm_unknown, // display unknown words, clear transfer and generation tags
4545
gm_all, // display all
46+
gm_bilgen, // generation using biltrans (don't allow unconsumed tags)
4647
gm_tagged, // tagged generation
4748
gm_tagged_nm, // clean tagged generation
4849
gm_carefulcase // try lowercase iff no uppercase

lttoolbox/lt_proc.cc

+1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ int main(int argc, char *argv[])
8989
if (args["generation"]) {
9090
if (cmd && cmd != 'b') cli.print_usage();
9191
if (!cmd) cmd = 'g';
92+
else if(cmd == 'b') bilmode = gm_bilgen;
9293
}
9394
if (args["decompose-nouns"]) {
9495
if (cmd) cli.print_usage();

tests/lt_proc/__init__.py

+15
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,21 @@ class BiltransAnyCharEscapes(ProcTest):
516516
inputs = [r'^«\[\[tf:i:a\]\]s\\\^å\[\[\/\]\]»<MERGED>$']
517517
expectedOutputs = [r'^«\[\[tf:i:a\]\]s\\\^å\[\[\/\]\]»<MERGED>/«\[\[tf:i:a\]\]s\\\^å\[\[\/\]\]»<MERGED>$']
518518

519+
class BiltransGenDebugSymbols(ProcTest):
520+
procdix = 'data/minimal-mono.dix'
521+
procdir = 'rl'
522+
procflags = ['-d', '-b']
523+
inputs = [
524+
'^ab<n><def>$',
525+
'^ab<n><def><potato>$',
526+
'^ab<n><def>#c$',
527+
]
528+
expectedOutputs = [
529+
'^ab<n><def>/abc$',
530+
'^ab<n><def><potato>/#ab<n><def><potato>$',
531+
'^ab<n><def>#c/#ab<n><def>#c$',
532+
]
533+
519534

520535
# These fail on some systems:
521536
#from null_flush_invalid_stream_format import *

0 commit comments

Comments
 (0)