Skip to content

Commit 62380ad

Browse files
committed
Restored check for efficiency
1 parent 63b9f56 commit 62380ad

File tree

1 file changed

+18
-10
lines changed

1 file changed

+18
-10
lines changed

bash/blockchair.sh

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,30 @@ if [[ "$2" == "" ]]; then
44
echo "$(basename $0) DIR NTHREADS [OUTPUT]" 1>&2
55
echo "Reads files in DIR and processes them using NTHREADS parallel sorts." 1>&2
66
echo "Files are processed as input files unless OUTPUT is specified." 1>&2
7+
echo "FILES MUST END WITH A NEWLINE. Fix them with \"sed -i -e '\$a\\' *\"." 1>&2
78
exit 1
89
fi
910

1011
DIR=$1
1112
NTHREADS=$2
1213
OUTPUT=$3
1314

15+
function file_ends_with_newline() {
16+
[[ $(tail -c1 "$1" | wc -l) -gt 0 ]]
17+
}
18+
1419
FILES=$(mktemp)
1520
find $DIR -type f >$FILES
21+
22+
# Check that all files end with a newline
23+
24+
while read FILE; do
25+
if ! file_ends_with_newline $FILE; then
26+
echo "File $FILE does not end with a newline" 1>&2
27+
exit 1
28+
fi
29+
done <$FILES
30+
1631
NFILES=$(cat $FILES | wc -l)
1732

1833
# To avoid empty splits, there must be at least as many threads as files
@@ -26,19 +41,12 @@ SPLITBASE=$(mktemp)
2641
split -n l/$NTHREADS $FILES $SPLITBASE
2742
SPLITS=$(for file in ${SPLITBASE}?*; do echo $file; done)
2843

29-
for SPLIT in $SPLITS; do
44+
for SPLIT in $SPLITS; do
3045
mkfifo $SPLIT.pipe
31-
32-
# For each file, delete first line (labels); cut will add a newline at the end if missing
33-
3446
if [[ "$OUTPUT" != "" ]]; then
35-
( while read FILE; do
36-
cut -f2,7,10 "$FILE" | tail -n+2 | awk '{ if ($3 == 0) print $1 "\t" $2 }'
37-
done <$SPLIT | LC_ALL=C sort -S2G >$SPLIT.pipe) &
47+
(tail -q -n+2 $(cat $SPLIT) | cut -f2,7,10 | awk '{ if ($3 == 0) print $1 "\t" $2 }' | LC_ALL=C sort -S2G >$SPLIT.pipe) &
3848
else
39-
( while read FILE; do
40-
cut -f7,13 "$FILE" | tail -n+2 | awk '{ print $2 "\t" $1 }'
41-
done <$SPLIT | LC_ALL=C sort -S2G >$SPLIT.pipe) &
49+
(tail -q -n+2 $(cat $SPLIT) | cut -f7,13 | awk '{ print $2 "\t" $1 }' | LC_ALL=C sort -S2G >$SPLIT.pipe) &
4250
fi
4351
done
4452

0 commit comments

Comments
 (0)