-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathBLEU.sh
58 lines (45 loc) · 1.6 KB
/
BLEU.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
if [ $# -ne 4 ]; then
echo "usage: $0 TESTSET SRCLANG TGTLANG GEN"
exit 1
fi
TESTSET=$1
SRCLANG=$2
TGTLANG=$3
GEN=$4
if ! command -v sacremoses &> /dev/null
then
echo "sacremoses could not be found, please install with: pip install sacremoses==0.0.41"
exit
fi
if ! command -v sacrebleu &> /dev/null
then
echo "sacrebleu could not be found, please install with: pip install sacrebleu==1.4.10"
exit
fi
grep ^H $GEN \
| sed 's/^H\-//' \
| sort -n -k 1 \
| awk -F '\t' '{print $NF}' \
| sacremoses detokenize \
> $GEN.sorted.detok
sacrebleu -t $TESTSET -l $SRCLANG-$TGTLANG --echo ref > $SRCLANG-$TGTLANG.ref
echo ">>>sacrebleu"
cat $GEN.sorted.detok | sacrebleu -t $TESTSET -l $SRCLANG-$TGTLANG
echo ""
echo "origin=$SRCLANG"
cat $GEN.sorted.detok | sacrebleu -t $TESTSET -l $SRCLANG-$TGTLANG --origlang=$SRCLANG
echo ""
echo "origin=$TGTLANG"
cat $GEN.sorted.detok | sacrebleu -t $TESTSET -l $SRCLANG-$TGTLANG --origlang=non-$SRCLANG
echo ""
echo ">>>token level multi-bleu"
sacremoses tokenize <$GEN.sorted.detok> $GEN.sorted.tok
sacremoses tokenize <$SRCLANG-$TGTLANG.ref> $SRCLANG-$TGTLANG.ref.tok
fairseq-score -r $SRCLANG-$TGTLANG.ref.tok -s $GEN.sorted.tok
echo ""
echo ">>>compound-bleu"
perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' <$GEN.sorted.tok> $GEN.sorted.tok.com
perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' <$SRCLANG-$TGTLANG.ref.tok> $SRCLANG-$TGTLANG.ref.tok.com
fairseq-score -r $SRCLANG-$TGTLANG.ref.tok.com -s $GEN.sorted.tok.com
rm $SRCLANG-$TGTLANG.ref.tok.com $GEN.sorted.tok.com $GEN.sorted.detok $GEN.sorted.tok $SRCLANG-$TGTLANG.ref.tok $SRCLANG-$TGTLANG.ref