Skip to content

Commit a9073fd

Browse files
authored
Merge pull request #7363 from umbraco/broken-link-pr
Adding Lychee Config files
2 parents 5fee40f + af4b8c7 commit a9073fd

File tree

4 files changed

+232
-0
lines changed

4 files changed

+232
-0
lines changed

.github/lychee.toml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# .github/lychee.toml
2+
3+
############################# Display #############################
4+
# Verbose program output
5+
# Accepts log level: "error", "warn", "info", "debug", "trace"
6+
verbose = "info"
7+
8+
# Don't show interactive progress bar while checking links.
9+
no_progress = true
10+
11+
############################# Cache ###############################
12+
# Enable link caching. This can be helpful to avoid checking the same links on
13+
# multiple runs.
14+
cache = false
15+
16+
############################# Runtime #############################
17+
# Maximum number of concurrent link checks.
18+
max_concurrency = 12
19+
20+
# Maximum number of allowed redirects.
21+
max_redirects = 5
22+
23+
# Maximum number of allowed retries before a link is declared dead.
24+
max_retries = 1
25+
26+
############################# Requests ############################
27+
# Website timeout from connect to response finished.
28+
timeout = 10
29+
30+
# Minimum wait time in seconds between retries of failed requests.
31+
retry_wait_time = 1
32+
33+
# Accept more status codes (follow redirects automatically)
34+
accept = ["200..=204", "301..=308", "429"]
35+
36+
# Avoid false fragment errors
37+
include_fragments = false
38+
39+
# Only test links with the given schemes (e.g. https).
40+
# Omit to check links with any other scheme.
41+
# At the moment, we support http, https, file, and mailto.
42+
scheme = ["https"]
43+
44+
# When links are available using HTTPS, treat HTTP links as errors.
45+
require_https = false
46+
47+
# Fallback extensions to apply when a URL does not specify one.
48+
# This is common in documentation tools that cross-reference files without extensions.
49+
fallback_extensions = ["md", "html"]
50+
51+
############################# Exclusions ##########################
52+
# Exclude URLs and mail addresses from checking (supports regex).
53+
exclude = [
54+
'^mailto:',
55+
'^https?://localhost',
56+
'^https?://127\\.0\\.0\\.1',
57+
'^https://www\.linkedin\.com',
58+
'^https?://issues\.umbraco\.org/',
59+
'^https?://web\\.archive\\.org/web/'
60+
]
61+
62+
# Exclude these filesystem paths from getting checked.
63+
exclude_path = [
64+
'(^|/)node_modules/',
65+
'(^|/)dist/',
66+
'(^|/)bin/',
67+
'\\.txt$', # skip .txt extensions
68+
'(^|/)test/' # skip directories named "test"
69+
]
70+
71+
# URLs to check (supports regex). Has preference over all excludes.
72+
include = ['gist\.github\.com.*']
73+
74+
# Skip checking mail addresses
75+
include_mail = true
76+
77+
############################# Content Checks ######################
78+
# Mark pages as broken if the body contains "page not found" or "404"
79+
[content]
80+
deny = ["(?i)page not found", "(?i)404"]
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
name: Check Links in Pull Requests
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- main
7+
paths:
8+
- '**/*.md'
9+
10+
jobs:
11+
check-links:
12+
runs-on: ubuntu-latest
13+
14+
steps:
15+
# 1️⃣ Checkout repository
16+
- name: Checkout repository
17+
uses: actions/checkout@v4
18+
with:
19+
fetch-depth: 0
20+
21+
# 2️⃣ Get changed Markdown files in the PR
22+
- name: Get changed Markdown files
23+
id: changed-files
24+
run: |
25+
CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep '\.md$' || true)
26+
CHANGED_FILES="${CHANGED_FILES//$'\n'/ }"
27+
echo "CHANGED_FILES=$CHANGED_FILES" >> $GITHUB_ENV
28+
echo "Changed Markdown files: $CHANGED_FILES"
29+
30+
# 3️⃣ Skip if no Markdown files changed
31+
- name: Skip if no Markdown files changed
32+
if: env.CHANGED_FILES == ''
33+
run: |
34+
echo "No Markdown files changed. Skipping link check."
35+
exit 0
36+
37+
# 4️⃣ Run Lychee on changed files
38+
- name: Run Lychee
39+
id: run-lychee
40+
uses: lycheeverse/lychee-action@v2
41+
with:
42+
args: |
43+
--no-progress
44+
--include-fragments
45+
--format detailed
46+
${{ env.CHANGED_FILES }}
47+
output: lychee/out_raw.md
48+
fail: false # ✅ don't fail yet, let us capture output
49+
50+
# 5️⃣ Format Lychee output (user-friendly, relative paths)
51+
- name: Format Lychee report
52+
id: format-report
53+
if: always()
54+
run: |
55+
mkdir -p lychee
56+
: > lychee/comment.md # start with empty file
57+
58+
awk '
59+
/^Errors in / {
60+
file=$3
61+
gsub("^/home/runner/work/UmbracoDocs/UmbracoDocs/", "", file)
62+
print "\nBroken links found in:\n" file >> "lychee/comment.md"
63+
next
64+
}
65+
66+
/\[ERROR\]/ {
67+
msg = $0
68+
sub(/^- \[ \] /, "", msg)
69+
sub(/^\[ERROR\] /, "", msg)
70+
gsub("^file:///home/runner/work/UmbracoDocs/UmbracoDocs/", "", msg)
71+
print "\n⚓ Anchor not found → " msg >> "lychee/comment.md"
72+
next
73+
}
74+
75+
/\[404\]/ {
76+
msg = $0
77+
sub(/^- \[ \] /, "", msg)
78+
sub(/^\[404\] /, "", msg)
79+
print "\n❌ 404 Not Found → " msg >> "lychee/comment.md"
80+
next
81+
}
82+
83+
/\[301\]|\[302\]/ {
84+
msg = $0
85+
sub(/^- \[ \] /, "", msg)
86+
sub(/^\[(301|302)\] /, "", msg)
87+
print "\n🔀 Redirect → " msg >> "lychee/comment.md"
88+
next
89+
}
90+
91+
/Timeout/ && !/Timeouts/ {
92+
msg = $0
93+
sub(/^- \[ \] /, "", msg)
94+
print "\n⏳ Timeout → " msg >> "lychee/comment.md"
95+
next
96+
}
97+
' lychee/out_raw.md
98+
99+
# Add header only if we found content
100+
if [ -s lychee/comment.md ]; then
101+
sed -i '1i **The Link Checker found broken links in your PR**.\n Please review the following list:\n' lychee/comment.md
102+
echo "has_content=true" >> $GITHUB_OUTPUT
103+
else
104+
echo "has_content=false" >> $GITHUB_OUTPUT
105+
fi
106+
107+
# 6️⃣ Comment broken links on PR (if present)
108+
- name: Comment broken links
109+
if: always() && (env.CHANGED_FILES != '') && (steps.format-report.outputs.has_content == 'true')
110+
uses: marocchino/sticky-pull-request-comment@v2
111+
with:
112+
path: lychee/comment.md
113+
recreate: true
114+
115+
# 7️⃣ Fail workflow if broken links exist
116+
- name: Fail workflow if broken links
117+
if: steps.format-report.outputs.has_content == 'true'
118+
run: |
119+
echo "❌ Broken links detected. Please review the PR comment for details."
120+
exit 1

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
*.orig
55
.vscode
66
.idea
7+
.lycheecache

.lycheeignore

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# These links are ignored by lychee link checker: https://github.com/lycheeverse/lychee
2+
# The file allows you to list multiple regular expressions for exclusion (one pattern per line).
3+
# The `.lycheeignore` file is only used for excluding URLs, not paths. Use the `exclude_path` key in the `lychee.toml` file. ref: https://lychee.cli.rs/recipes/excluding-paths/
4+
5+
# GitHub blob/tree fragment links
6+
^https://github\.com/umbraco/Umbraco-CMS/blob/.*/.*#L.*
7+
^https://github\.com/umbraco/Umbraco-CMS/tree/.*
8+
^https://github\.com/Shazwazza/Articulate/blob/.*/.*#L.*
9+
^https://github\.com/umbraco/Umbraco-CMS/blob/.*
10+
11+
# Anchor/fragment links causing false positives
12+
^https://apidocs\.umbraco\.com/.*/#.*
13+
^https://tinymce\.github\.io/.*/#.*
14+
^https://openid\.net/.*/#.*
15+
^https://docs\.microsoft\.com/.*#.*
16+
^https://learn\.microsoft\.com/.*#.*
17+
^https://developer\.mozilla\.org/.*/#.*
18+
^https://learning\.postman\.com/docs/.*/#.*
19+
^https://nginx\.org/.*/#.*
20+
^https://azure\.microsoft\.com/en-gb/services/media-services/.*
21+
^https://www\.tiny\.cloud/docs/.*
22+
23+
# TinyMCE anchors
24+
^https://github\.com/tinymce/tinymce/issues/.*#.*
25+
26+
# NIST FIPS and other static docs
27+
^https://csrc\.nist\.gov/publications/PubsFIPS\.html#.*
28+
29+
# Timeout-prone Umbraco issue links
30+
^https://issues\.umbraco\.org/issue/.*
31+
^https://issues\.umbraco\.org/issues/.*

0 commit comments

Comments
 (0)