Skip to content

Commit 359bf55

Browse files
committed
Skip empty; add flag to allow empty scoring
1 parent 31c1218 commit 359bf55

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

silnlp/common/compare_translations.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,12 @@ def main() -> None:
2121
default={"bleu", "chrf3", "chrf3+", "chrf3++", "spbleu", "ter"},
2222
help="Set of scorers",
2323
)
24+
parser.add_argument(
25+
"--score-empty", type=bool, required=False, help="If true, do not calculate BLEU score on segment pairs where at least one segment is empty", default=False
26+
)
27+
2428
args = parser.parse_args()
25-
scores = compare_translations(args.projects[0], args.projects[1], args.scorers)
29+
scores = compare_translations(args.projects[0], args.projects[1], args.scorers, args.score_empty)
2630

2731
print(f"{args.projects[0]},{args.projects[1]}")
2832
if args.output_file is not None:
@@ -36,17 +40,18 @@ def main() -> None:
3640
print(f"{key},{value}")
3741

3842

39-
def compare_translations(project1: Path, project2: Path, scorers: Set[str]) -> Dict[str, float]:
43+
def compare_translations(project1: Path, project2: Path, scorers: Set[str], score_empty:bool=False) -> Dict[str, float]:
4044
corpus_a = ParatextTextCorpus(project1)
4145
corpus_b = ParatextTextCorpus(project2)
4246
parallel_corpus = corpus_a.align_rows(corpus_b)
4347
a_lines = []
4448
b_lines = []
4549
with parallel_corpus.get_rows() as rows:
4650
for row in rows:
51+
if not score_empty and (len(row.source_text.strip()) == 0 or len(row.target_text.strip()) == 0):
52+
continue
4753
a_lines.append(row.source_text)
4854
b_lines.append(row.target_text)
49-
5055
return score_pair(a_lines, [b_lines], scorers)
5156

5257

0 commit comments

Comments
 (0)