@@ -23,14 +23,15 @@ jobs:
2323 - name : Check the Validity of Hyperlinks
2424 run : |
2525 cd ${{github.workspace}}
26+ timeout=15 # max connect timeout
2627 fail="FALSE"
2728 retry="FALSE"
2829 # url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'linkedin')
2930 merged_commit=$(git log -1 --format='%H')
3031 changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
3132 if [ -n "$changed_files" ]; then
3233 for changed_file in $changed_files; do
33- url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'linkedin') || true
34+ url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | sort -u | grep -Ev 'linkedin') || true
3435 if [ -n "$url_lines" ]; then
3536 for url_line in $url_lines; do
3637 url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
@@ -39,10 +40,10 @@ jobs:
3940 echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person."
4041 retry="FALSE"
4142 else
42- response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
43+ response=$(curl --connect-timeout $timeout - L -s -o /dev/null -w "%{http_code}" "$url")
4344 if [ "$response" -ne 200 ]; then
4445 echo "**********Validation failed, status code: $response, try again**********"
45- response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
46+ response_retry=$(curl --connect-timeout $timeout - s -o /dev/null -w "%{http_code}" "$url")
4647 if [ "$response_retry" -eq 200 ]; then
4748 echo "*****Retry successful*****"
4849 else
@@ -105,15 +106,15 @@ jobs:
105106 fail="FALSE"
106107 repo_name=${{ github.event.pull_request.head.repo.full_name }}
107108 if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then
108- owner =$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1 )
109- branch="https://github.com/$owner/docs /tree/${{ github.event.pull_request.head.ref }}"
109+ repo_fork =$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f2 )
110+ branch="https://github.com/$repo_fork /tree/${{ github.event.pull_request.head.ref }}"
110111 else
111112 branch="https://github.com/opea-project/docs/blob/${{ github.event.pull_request.head.ref }}"
112113 fi
113114 link_head="https://github.com/opea-project/docs/blob/main"
114115 merged_commit=$(git log -1 --format='%H')
115116 changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
116- png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http'|grep -Ev 'mailto'|grep -Ev 'portal.azure.com')
117+ png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|sort -u| grep -Ev 'http'|grep -Ev 'mailto'|grep -Ev 'portal.azure.com')
117118 if [ -n "$png_lines" ]; then
118119 for png_line in $png_lines; do
119120 refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
@@ -138,12 +139,12 @@ jobs:
138139 url_dev=$branch$(echo "$real_path" | sed 's|.*/docs||')$png_path
139140 response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
140141 if [ "$response" -ne 200 ]; then
141- echo "**********Validation failed, try again**********"
142+ echo "**********Validation failed, status code: $response, try again**********"
142143 response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
143144 if [ "$response_retry" -eq 200 ]; then
144145 echo "*****Retry successfully*****"
145146 else
146- echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev"
147+ echo "Status code: $response_retry => invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev"
147148 fail="TRUE"
148149 fi
149150 else
0 commit comments