Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions .github/lychee.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# .github/lychee.toml

############################# Display #############################
# Verbose program output
# Accepts log level: "error", "warn", "info", "debug", "trace"
verbose = "info"

# Don't show interactive progress bar while checking links.
no_progress = true

############################# Cache ###############################
# Enable link caching. This can be helpful to avoid checking the same links on
# multiple runs.
cache = false

############################# Runtime #############################
# Maximum number of concurrent link checks.
max_concurrency = 12

# Maximum number of allowed redirects.
max_redirects = 5

# Maximum number of allowed retries before a link is declared dead.
max_retries = 1

############################# Requests ############################
# Website timeout from connect to response finished.
timeout = 10

# Minimum wait time in seconds between retries of failed requests.
retry_wait_time = 1

# Accept more status codes (follow redirects automatically)
accept = ["200..=204", "301..=308", "429"]

# Avoid false fragment errors
include_fragments = false

# Only test links with the given schemes (e.g. https).
# Omit to check links with any other scheme.
# At the moment, we support http, https, file, and mailto.
scheme = ["https"]

# When links are available using HTTPS, treat HTTP links as errors.
require_https = false

# Fallback extensions to apply when a URL does not specify one.
# This is common in documentation tools that cross-reference files without extensions.
fallback_extensions = ["md", "html"]

############################# Exclusions ##########################
# Exclude URLs and mail addresses from checking (supports regex).
exclude = [
'^mailto:',
'^https?://localhost',
'^https?://127\\.0\\.0\\.1',
'^https://www\.linkedin\.com',
'^https?://issues\.umbraco\.org/',
'^https?://web\\.archive\\.org/web/'
]

# Exclude these filesystem paths from getting checked.
exclude_path = [
'(^|/)node_modules/',
'(^|/)dist/',
'(^|/)bin/',
'\\.txt$', # skip .txt extensions
'(^|/)test/' # skip directories named "test"
]

# URLs to check (supports regex). Has preference over all excludes.
include = ['gist\.github\.com.*']

# Skip checking mail addresses
include_mail = true

############################# Content Checks ######################
# Mark pages as broken if the body contains "page not found" or "404"
[content]
deny = ["(?i)page not found", "(?i)404"]
120 changes: 120 additions & 0 deletions .github/workflows/check-broken-pr-links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
name: Check Links in Pull Requests

on:
pull_request:
branches:
- main
paths:
- '**/*.md'

jobs:
check-links:
runs-on: ubuntu-latest

steps:
# 1️⃣ Checkout repository
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0

# 2️⃣ Get changed Markdown files in the PR
- name: Get changed Markdown files
id: changed-files
run: |
CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep '\.md$' || true)
CHANGED_FILES="${CHANGED_FILES//$'\n'/ }"
echo "CHANGED_FILES=$CHANGED_FILES" >> $GITHUB_ENV
echo "Changed Markdown files: $CHANGED_FILES"

# 3️⃣ Skip if no Markdown files changed
- name: Skip if no Markdown files changed
if: env.CHANGED_FILES == ''
run: |
echo "No Markdown files changed. Skipping link check."
exit 0

# 4️⃣ Run Lychee on changed files
- name: Run Lychee
id: run-lychee
uses: lycheeverse/lychee-action@v2
with:
args: |
--no-progress
--include-fragments
--format detailed
${{ env.CHANGED_FILES }}
output: lychee/out_raw.md
fail: false # βœ… don't fail yet, let us capture output

# 5️⃣ Format Lychee output (user-friendly, relative paths)
- name: Format Lychee report
id: format-report
if: always()
run: |
mkdir -p lychee
: > lychee/comment.md # start with empty file

awk '
/^Errors in / {
file=$3
gsub("^/home/runner/work/UmbracoDocs/UmbracoDocs/", "", file)
print "\nBroken links found in:\n" file >> "lychee/comment.md"
next
}

/\[ERROR\]/ {
msg = $0
sub(/^- \[ \] /, "", msg)
sub(/^\[ERROR\] /, "", msg)
gsub("^file:///home/runner/work/UmbracoDocs/UmbracoDocs/", "", msg)
print "\nβš“ Anchor not found β†’ " msg >> "lychee/comment.md"
next
}

/\[404\]/ {
msg = $0
sub(/^- \[ \] /, "", msg)
sub(/^\[404\] /, "", msg)
print "\n❌ 404 Not Found β†’ " msg >> "lychee/comment.md"
next
}

/\[301\]|\[302\]/ {
msg = $0
sub(/^- \[ \] /, "", msg)
sub(/^\[(301|302)\] /, "", msg)
print "\nπŸ”€ Redirect β†’ " msg >> "lychee/comment.md"
next
}

/Timeout/ && !/Timeouts/ {
msg = $0
sub(/^- \[ \] /, "", msg)
print "\n⏳ Timeout β†’ " msg >> "lychee/comment.md"
next
}
' lychee/out_raw.md

# Add header only if we found content
if [ -s lychee/comment.md ]; then
sed -i '1i **The Link Checker found broken links in your PR**.\n Please review the following list:\n' lychee/comment.md
echo "has_content=true" >> $GITHUB_OUTPUT
else
echo "has_content=false" >> $GITHUB_OUTPUT
fi

# 6️⃣ Comment broken links on PR (if present)
- name: Comment broken links
if: always() && (env.CHANGED_FILES != '') && (steps.format-report.outputs.has_content == 'true')
uses: marocchino/sticky-pull-request-comment@v2
with:
path: lychee/comment.md
recreate: true

# 7️⃣ Fail workflow if broken links exist
- name: Fail workflow if broken links
if: steps.format-report.outputs.has_content == 'true'
run: |
echo "❌ Broken links detected. Please review the PR comment for details."
exit 1
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
*.orig
.vscode
.idea
.lycheecache
31 changes: 31 additions & 0 deletions .lycheeignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# These links are ignored by lychee link checker: https://github.com/lycheeverse/lychee
# The file allows you to list multiple regular expressions for exclusion (one pattern per line).
# The `.lycheeignore` file is only used for excluding URLs, not paths. Use the `exclude_path` key in the `lychee.toml` file. ref: https://lychee.cli.rs/recipes/excluding-paths/

# GitHub blob/tree fragment links
^https://github\.com/umbraco/Umbraco-CMS/blob/.*/.*#L.*
^https://github\.com/umbraco/Umbraco-CMS/tree/.*
^https://github\.com/Shazwazza/Articulate/blob/.*/.*#L.*
^https://github\.com/umbraco/Umbraco-CMS/blob/.*

# Anchor/fragment links causing false positives
^https://apidocs\.umbraco\.com/.*/#.*
^https://tinymce\.github\.io/.*/#.*
^https://openid\.net/.*/#.*
^https://docs\.microsoft\.com/.*#.*
^https://learn\.microsoft\.com/.*#.*
^https://developer\.mozilla\.org/.*/#.*
^https://learning\.postman\.com/docs/.*/#.*
^https://nginx\.org/.*/#.*
^https://azure\.microsoft\.com/en-gb/services/media-services/.*
^https://www\.tiny\.cloud/docs/.*

# TinyMCE anchors
^https://github\.com/tinymce/tinymce/issues/.*#.*

# NIST FIPS and other static docs
^https://csrc\.nist\.gov/publications/PubsFIPS\.html#.*

# Timeout-prone Umbraco issue links
^https://issues\.umbraco\.org/issue/.*
^https://issues\.umbraco\.org/issues/.*