Skip to content

Feat/check links multi strip vars #700

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 20 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 93 additions & 20 deletions tools/check_links.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,16 @@

usage()
{
echo "Usage: check_links.sh [-d] [-a] mdfile"
echo "The script will search the mdfile for pictures from hedgedoc and report the missing ones"
echo "Usage: check_links.sh [-d] [-a] mdfile [mdfile [...]]"
echo "The script will search the mdfile(s) for pictures from hedgedoc and report the missing ones"
echo "The option -d will also download the missing ones and suggest you git add them."
echo "The option -a will not limit the links to be downloaded to images."
echo "The option -a will not limit the links to images but all linked files."
exit 1
}

ispic()
{
if test "$ALL" = "1"; then return 0; fi
ext=${1##*.}
ext="${1##*.}"
ext="$(echo $ext | tr A-Z a-z)"
case $ext in
png|jpg|jpeg|heic|heif|svg|avif)
Expand All @@ -31,8 +30,7 @@ ispic()

exist()
{
fn=${1##*/}
test -r $fn
test -r "$1"
}

# main
Expand All @@ -41,35 +39,110 @@ if test "$1" = "-d"; then DOWNLOAD=1; shift; fi
if test "$1" = "-a"; then ALL=1; shift; fi
if test -z "$1" -o "$1" = "-h"; then usage; fi
if test ! -r "$1"; then echo "ERROR: File \"$1\" not readable" 1>&2; exit 2; fi

ADDS=""
FAILED=""
CHGD=""
errs=0

SEDCHANGES=""
while test -n "$1"; do
INPATH=${1%/*}
INFILE=${1##*/}
pushd . >/dev/null 2>&1
if test "$INPATH" != "$1"; then cd $INPATH; INPATH="${INPATH}/"; else INPATH=""; fi

ADDS=""
errs=0
echo "*** $INFILE ***"
CHANGES=""
while read line; do
# FIXME: Handle multiple links in one line
LINK="$(echo $line | sed 's@^.*\(https://input.scs.community/[^) #"]*\).*$@\1@')"
LINK="${LINK%\'}"
if ispic $LINK; then
if exist $LINK; then
echo "$LINK present already"
LINK1="$(echo $line | sed 's@^.*\(https://input.scs.community/[^) ">]*\).*$@\1@')"
LINK="${LINK1%\'}"
LINKANCHOR="${LINK##*#}"
if test "$LINK" = "$LINKANCHOR"; then LINKANCHOR=""; fi
LINKNOANCHOR="${LINK%#*}"
LINKNOVAR="${LINK%\?*}"
LINKNONO="${LINKNOANCHOR%\?*}"
TGTFILE="${LINKNONO##*/}"
if test -z "$TGTFILE"; then
if test -n "$LINKANCHOR" -a -n "#DOWNLOAD"; then
echo " Replace link to self with anchor $LINKANCHOR ..."
CHANGES="$CHANGES -e 's~${LINK}~#${LINKANCHOR}~g'"
fi
continue
fi
# Skip .css and .js and .xml
if test "${TGTFILE%.css}" != "$TGTFILE" \
-o "${TGTFILE%.js}" != "$TGTFILE" \
-o "${TGTFILE%.xml}" != "$TGTFILE"; then continue; fi
if test "$ALL" = "1" || ispic "$LINK"; then
ERR=0
echo " Consider replacing $LINK with $TGTFILE[.md] ..."
if exist "$TGTFILE"; then
echo " $TGTFILE present already"
elif exist "${TGTFILE}.md"; then
TGTFILE="${TGTFILE}.md"
echo " $TGTFILE present already"
else
echo "$LINK missing"
if test "$DOWNLOAD" = "1"; then
curl -LO "$LINK"
if test $? = "0"; then echo "Downloaded $LINK successfully."; ADDS="$ADDS ${INPATH}${LINK##*/}"
else echo "ERROR downloading $LINK" 1>&2; let errs+=1; fi
if ispic "$LINK"; then
curl -sLO "$LINK"
ERR=$?
else
# TODO: Handle hedgedoc /p links (/s does not need speciel treatment)
LINKNONO="${LINKNONO/https:\/\/input.scs.community\/p/https://input.scs.community}"
curl -sLO "$LINKNONO"/download
ERR=$?
if test $ERR=0 && grep -i '^<!DOCTYPE html>$' download >/dev/null; then
ERR=1
rm download
else
TGTFILE="${TGTFILE%.md}.md"
sed -i 's/\s*$//' download
mv download "$TGTFILE"
fi
fi
if test $ERR = 0; then
echo " Downloaded $LINKNONO successfully."
ADDS="$ADDS \"${INPATH}${TGTFILE}\""
fi
fi
fi
if test $ERR = 0; then
if test -n "$LINKANCHOR"; then TGTFILE="$TGTFILE#$LINKANCHOR"; fi
# We strip off variables here, but leave anchors in
if echo "$line" | grep "(${LINK})" >/dev/null; then
#echo "() -> Plain replacement"
CHANGES="$CHANGES -e 's~${LINK}~${TGTFILE}~g'"
elif echo "$line" | grep "<$LINK>" >/dev/null; then
#echo "<> -> Change to []()"
CHANGES="$CHANGES -e 's~<${LINK}[^>]*>~[${TGTFILE%#*}](${TGTFILE})~g'"
else
let errs+=1
#echo "Plain -> Change to []()"
CHANGES="$CHANGES -e 's~${LINK}~[${TGTFILE%#*}](${TGTFILE})~g'"
fi
else
echo "ERROR downloading $LINK" 1>&2
let errs+=1
FAILED="$FAILED\"$LINK\" "
fi
fi
done < <(tr ' ' '\n' < "$INFILE"|grep 'https://input.scs.community'|sed 's/!\[..*\]//g')
if test "$DOWNLOAD" -a -n "$ADDS"; then
echo -e "Consider\ngit add $ADDS"
if test -n "$CHANGES"; then SEDCHANGES="${SEDCHANGES}sed -i$CHANGES \"$1\"\n"; CHGD="$CHGD \"$1\""; fi
shift
popd >/dev/null 2>&1
done

if test -n "$DOWNLOAD" -a -n "$ADDS"; then
echo -e "Consider\ngit add$ADDS"
fi
if test -n "$DOWNLOAD" -a -n "$CHGD"; then
echo -en "$SEDCHANGES"
echo "git add$CHGD"
fi
if test $errs -gt 0; then
echo "$errs missing files" 1>&2
echo "FAILED: $FAILED"
fi
exit $errs