From 435adeaec4b640f9a7f99a2afcffc427a0d89af3 Mon Sep 17 00:00:00 2001 From: Esha Noronha Date: Tue, 2 Sep 2025 09:41:27 +0200 Subject: [PATCH 1/8] Adding Lychee Config files --- .github/lychee.toml | 80 +++++++++++++++++++++ .github/workflows/check-broken-pr-links.yml | 75 +++++++++++++++++++ .gitignore | 1 + .lycheeignore | 31 ++++++++ 4 files changed, 187 insertions(+) create mode 100644 .github/lychee.toml create mode 100644 .github/workflows/check-broken-pr-links.yml create mode 100644 .lycheeignore diff --git a/.github/lychee.toml b/.github/lychee.toml new file mode 100644 index 00000000000..ce2cad4b805 --- /dev/null +++ b/.github/lychee.toml @@ -0,0 +1,80 @@ +# .github/lychee.toml + +############################# Display ############################# +# Verbose program output +# Accepts log level: "error", "warn", "info", "debug", "trace" +verbose = "info" + +# Don't show interactive progress bar while checking links. +no_progress = true + +############################# Cache ############################### +# Enable link caching. This can be helpful to avoid checking the same links on +# multiple runs. +cache = false + +############################# Runtime ############################# +# Maximum number of concurrent link checks. +max_concurrency = 12 + +# Maximum number of allowed redirects. +max_redirects = 5 + +# Maximum number of allowed retries before a link is declared dead. +max_retries = 1 + +############################# Requests ############################ +# Website timeout from connect to response finished. +timeout = 10 + +# Minimum wait time in seconds between retries of failed requests. +retry_wait_time = 1 + +# Accept more status codes (follow redirects automatically) +accept = ["200..=204", "301..=308", "429"] + +# Avoid false fragment errors +include_fragments = false + +# Only test links with the given schemes (e.g. https). +# Omit to check links with any other scheme. +# At the moment, we support http, https, file, and mailto. +scheme = ["https"] + +# When links are available using HTTPS, treat HTTP links as errors. +require_https = false + +# Fallback extensions to apply when a URL does not specify one. +# This is common in documentation tools that cross-reference files without extensions. +fallback_extensions = ["md", "html"] + +############################# Exclusions ########################## +# Exclude URLs and mail addresses from checking (supports regex). +exclude = [ + '^mailto:', + '^https?://localhost', + '^https?://127\\.0\\.0\\.1', + '^https://www\.linkedin\.com', + '^https?://issues\.umbraco\.org/', + '^https?://web\\.archive\\.org/web/' +] + +# Exclude these filesystem paths from getting checked. +exclude_path = [ + '(^|/)node_modules/', + '(^|/)dist/', + '(^|/)bin/', + '\\.txt$', # skip .txt extensions + '(^|/)test/' # skip directories named "test" +] + +# URLs to check (supports regex). Has preference over all excludes. +include = ['gist\.github\.com.*'] + +# Skip checking mail addresses +include_mail = true + +############################# Content Checks ###################### +# Mark pages as broken if the body contains "page not found" or "404" +[content] +deny = ["(?i)page not found", "(?i)404"] diff --git a/.github/workflows/check-broken-pr-links.yml b/.github/workflows/check-broken-pr-links.yml new file mode 100644 index 00000000000..37afdb76362 --- /dev/null +++ b/.github/workflows/check-broken-pr-links.yml @@ -0,0 +1,75 @@ +name: Check Links in Pull Requests + +on: + pull_request: + branches: + - main + paths: + - '**/*.md' + +jobs: + check-links: + runs-on: ubuntu-latest + + steps: + # 1️⃣ Checkout the repository + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # 2️⃣ Get changed Markdown files in the PR + - name: Get changed Markdown files + id: changed-files + run: | + CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep '\.md$' || true) + CHANGED_FILES="${CHANGED_FILES//$'\n'/ }" # replace newlines with spaces + echo "CHANGED_FILES=$CHANGED_FILES" >> $GITHUB_ENV + echo "Changed Markdown files: $CHANGED_FILES" + + # 3️⃣ Skip if no Markdown files changed + - name: Skip if no changed Markdown files + if: env.CHANGED_FILES == '' + run: | + echo "No Markdown files changed. Skipping link check." + exit 0 + + # 4️⃣ Run Lychee on changed files (compact output) + - name: Run Lychee + id: run-lychee + uses: lycheeverse/lychee-action@v2 + with: + args: | + --no-progress + --include-fragments + --format compact + ${{ env.CHANGED_FILES }} + output: lychee/out_raw.md + fail: false # ✅ don't fail yet, let us capture output + + # 5️⃣ Clean Lychee output (remove summary line + mark if has content) + - name: Clean Lychee output + id: clean-output + if: always() + run: | + grep -v '^🔍' lychee/out_raw.md > lychee/out.md || true + if [ -s lychee/out.md ]; then + echo "has_content=true" >> $GITHUB_OUTPUT + else + echo "has_content=false" >> $GITHUB_OUTPUT + fi + + # 6️⃣ Comment broken links on PR + - name: Comment broken links + if: always() && (env.CHANGED_FILES != '') && (steps.clean-output.outputs.has_content == 'true') + uses: marocchino/sticky-pull-request-comment@v2 + with: + path: lychee/out.md + recreate: true + + # 7️⃣ Fail workflow if broken links are found + - name: Fail workflow if broken links + if: steps.clean-output.outputs.has_content == 'true' + run: | + echo "❌ Broken links detected. Please review the PR comment for details." + exit 1 diff --git a/.gitignore b/.gitignore index 655ba93439d..946cb65a602 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ *.orig .vscode .idea +.lycheecache diff --git a/.lycheeignore b/.lycheeignore new file mode 100644 index 00000000000..c149333be85 --- /dev/null +++ b/.lycheeignore @@ -0,0 +1,31 @@ +# These links are ignored by lychee link checker: https://github.com/lycheeverse/lychee +# The file allows you to list multiple regular expressions for exclusion (one pattern per line). +# The `.lycheeignore` file is only used for excluding URLs, not paths. Use the `exclude_path` key in the `lychee.toml` file. ref: https://lychee.cli.rs/recipes/excluding-paths/ + +# GitHub blob/tree fragment links +^https://github\.com/umbraco/Umbraco-CMS/blob/.*/.*#L.* +^https://github\.com/umbraco/Umbraco-CMS/tree/.* +^https://github\.com/Shazwazza/Articulate/blob/.*/.*#L.* +^https://github\.com/umbraco/Umbraco-CMS/blob/.* + +# Anchor/fragment links causing false positives +^https://apidocs\.umbraco\.com/.*/#.* +^https://tinymce\.github\.io/.*/#.* +^https://openid\.net/.*/#.* +^https://docs\.microsoft\.com/.*#.* +^https://learn\.microsoft\.com/.*#.* +^https://developer\.mozilla\.org/.*/#.* +^https://learning\.postman\.com/docs/.*/#.* +^https://nginx\.org/.*/#.* +^https://azure\.microsoft\.com/en-gb/services/media-services/.* +^https://www\.tiny\.cloud/docs/.* + +# TinyMCE anchors +^https://github\.com/tinymce/tinymce/issues/.*#.* + +# NIST FIPS and other static docs +^https://csrc\.nist\.gov/publications/PubsFIPS\.html#.* + +# Timeout-prone Umbraco issue links +^https://issues\.umbraco\.org/issue/.* +^https://issues\.umbraco\.org/issues/.* From 7101a7ceefee83898a5687abe86c02398573811a Mon Sep 17 00:00:00 2001 From: Esha Noronha Date: Thu, 4 Sep 2025 09:30:09 +0200 Subject: [PATCH 2/8] Added user friendly message outputs --- .github/workflows/check-broken-pr-links.yml | 79 ++++++++++++++++----- 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/.github/workflows/check-broken-pr-links.yml b/.github/workflows/check-broken-pr-links.yml index 37afdb76362..6ed576dbb97 100644 --- a/.github/workflows/check-broken-pr-links.yml +++ b/.github/workflows/check-broken-pr-links.yml @@ -12,8 +12,8 @@ jobs: runs-on: ubuntu-latest steps: - # 1️⃣ Checkout the repository - - name: Checkout code + # 1️⃣ Checkout repository + - name: Checkout repository uses: actions/checkout@v4 with: fetch-depth: 0 @@ -23,18 +23,18 @@ jobs: id: changed-files run: | CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep '\.md$' || true) - CHANGED_FILES="${CHANGED_FILES//$'\n'/ }" # replace newlines with spaces + CHANGED_FILES="${CHANGED_FILES//$'\n'/ }" echo "CHANGED_FILES=$CHANGED_FILES" >> $GITHUB_ENV echo "Changed Markdown files: $CHANGED_FILES" # 3️⃣ Skip if no Markdown files changed - - name: Skip if no changed Markdown files + - name: Skip if no Markdown files changed if: env.CHANGED_FILES == '' run: | echo "No Markdown files changed. Skipping link check." exit 0 - # 4️⃣ Run Lychee on changed files (compact output) + # 4️⃣ Run Lychee on changed files - name: Run Lychee id: run-lychee uses: lycheeverse/lychee-action@v2 @@ -42,34 +42,79 @@ jobs: args: | --no-progress --include-fragments - --format compact + --format detailed ${{ env.CHANGED_FILES }} output: lychee/out_raw.md - fail: false # ✅ don't fail yet, let us capture output + fail: false # ✅ don't fail yet, let us capture output - # 5️⃣ Clean Lychee output (remove summary line + mark if has content) - - name: Clean Lychee output - id: clean-output + # 5️⃣ Format Lychee output (user-friendly, relative paths) + - name: Format Lychee report + id: format-report if: always() run: | - grep -v '^🔍' lychee/out_raw.md > lychee/out.md || true - if [ -s lychee/out.md ]; then + mkdir -p lychee + : > lychee/comment.md # start with empty file + + awk ' + /^Errors in / { + file=$3 + gsub("^/home/runner/work/UmbracoDocs/UmbracoDocs/", "", file) + print "\nFile: " file >> "lychee/comment.md" + next + } + + /\[ERROR\]/ { + msg = $0 + sub(/^- \[ \] /, "", msg) + sub(/^\[ERROR\] /, "", msg) + gsub("^file:///home/runner/work/UmbracoDocs/UmbracoDocs/", "", msg) + print "⚓ Anchor not found → " msg >> "lychee/comment.md" + next + } + + /\[404\]/ { + msg = $0 + sub(/^- \[ \] /, "", msg) + sub(/^\[404\] /, "", msg) + print "❌ 404 Not Found → " msg >> "lychee/comment.md" + next + } + + /\[301\]|\[302\]/ { + msg = $0 + sub(/^- \[ \] /, "", msg) + sub(/^\[(301|302)\] /, "", msg) + print "🔀 Redirect → " msg >> "lychee/comment.md" + next + } + + /Timeout/ && !/Timeouts/ { + msg = $0 + sub(/^- \[ \] /, "", msg) + print "⏳ Timeout → " msg >> "lychee/comment.md" + next + } + ' lychee/out_raw.md + + # Add header only if we found content + if [ -s lychee/comment.md ]; then + sed -i '1i 🚨 **Link Checker found broken links in this PR**\n' lychee/comment.md echo "has_content=true" >> $GITHUB_OUTPUT else echo "has_content=false" >> $GITHUB_OUTPUT fi - # 6️⃣ Comment broken links on PR + # 6️⃣ Comment broken links on PR (if present) - name: Comment broken links - if: always() && (env.CHANGED_FILES != '') && (steps.clean-output.outputs.has_content == 'true') + if: always() && (env.CHANGED_FILES != '') && (steps.format-report.outputs.has_content == 'true') uses: marocchino/sticky-pull-request-comment@v2 with: - path: lychee/out.md + path: lychee/comment.md recreate: true - # 7️⃣ Fail workflow if broken links are found + # 7️⃣ Fail workflow if broken links exist - name: Fail workflow if broken links - if: steps.clean-output.outputs.has_content == 'true' + if: steps.format-report.outputs.has_content == 'true' run: | echo "❌ Broken links detected. Please review the PR comment for details." exit 1 From 0d6e378caaff243b574d45da0c0acf4648df4c35 Mon Sep 17 00:00:00 2001 From: Esha Noronha <82437098+eshanrnh@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:25:45 +0200 Subject: [PATCH 3/8] Update .github/workflows/check-broken-pr-links.yml Co-authored-by: sofietoft --- .github/workflows/check-broken-pr-links.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-broken-pr-links.yml b/.github/workflows/check-broken-pr-links.yml index 6ed576dbb97..882e93a1a6f 100644 --- a/.github/workflows/check-broken-pr-links.yml +++ b/.github/workflows/check-broken-pr-links.yml @@ -98,7 +98,7 @@ jobs: # Add header only if we found content if [ -s lychee/comment.md ]; then - sed -i '1i 🚨 **Link Checker found broken links in this PR**\n' lychee/comment.md + sed -i '1i **The Link Checker found broken links in your PR**.\n Please review the following list:\n' lychee/comment.md echo "has_content=true" >> $GITHUB_OUTPUT else echo "has_content=false" >> $GITHUB_OUTPUT From d689b8a4fb8a6e579c3e90c27e0df3411f9da309 Mon Sep 17 00:00:00 2001 From: Esha Noronha <82437098+eshanrnh@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:25:51 +0200 Subject: [PATCH 4/8] Update .github/workflows/check-broken-pr-links.yml Co-authored-by: sofietoft --- .github/workflows/check-broken-pr-links.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-broken-pr-links.yml b/.github/workflows/check-broken-pr-links.yml index 882e93a1a6f..badea1ca7bb 100644 --- a/.github/workflows/check-broken-pr-links.yml +++ b/.github/workflows/check-broken-pr-links.yml @@ -91,7 +91,7 @@ jobs: /Timeout/ && !/Timeouts/ { msg = $0 sub(/^- \[ \] /, "", msg) - print "⏳ Timeout → " msg >> "lychee/comment.md" + print "\n⏳ Timeout → " msg >> "lychee/comment.md" next } ' lychee/out_raw.md From de3a206c0fbe2b22324ef8e4de7565233d3c29fe Mon Sep 17 00:00:00 2001 From: Esha Noronha <82437098+eshanrnh@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:25:57 +0200 Subject: [PATCH 5/8] Update .github/workflows/check-broken-pr-links.yml Co-authored-by: sofietoft --- .github/workflows/check-broken-pr-links.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-broken-pr-links.yml b/.github/workflows/check-broken-pr-links.yml index badea1ca7bb..26689133605 100644 --- a/.github/workflows/check-broken-pr-links.yml +++ b/.github/workflows/check-broken-pr-links.yml @@ -84,7 +84,7 @@ jobs: msg = $0 sub(/^- \[ \] /, "", msg) sub(/^\[(301|302)\] /, "", msg) - print "🔀 Redirect → " msg >> "lychee/comment.md" + print "\n🔀 Redirect → " msg >> "lychee/comment.md" next } From fe9372e88a5e80b137f7978d84219b2499801624 Mon Sep 17 00:00:00 2001 From: Esha Noronha <82437098+eshanrnh@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:26:02 +0200 Subject: [PATCH 6/8] Update .github/workflows/check-broken-pr-links.yml Co-authored-by: sofietoft --- .github/workflows/check-broken-pr-links.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-broken-pr-links.yml b/.github/workflows/check-broken-pr-links.yml index 26689133605..27e612e1052 100644 --- a/.github/workflows/check-broken-pr-links.yml +++ b/.github/workflows/check-broken-pr-links.yml @@ -76,7 +76,7 @@ jobs: msg = $0 sub(/^- \[ \] /, "", msg) sub(/^\[404\] /, "", msg) - print "❌ 404 Not Found → " msg >> "lychee/comment.md" + print "\n❌ 404 Not Found → " msg >> "lychee/comment.md" next } From ff942c795a52be8d0a8adabd43669cd2883ce702 Mon Sep 17 00:00:00 2001 From: Esha Noronha <82437098+eshanrnh@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:26:18 +0200 Subject: [PATCH 7/8] Update .github/workflows/check-broken-pr-links.yml Co-authored-by: sofietoft --- .github/workflows/check-broken-pr-links.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-broken-pr-links.yml b/.github/workflows/check-broken-pr-links.yml index 27e612e1052..678d41fc86f 100644 --- a/.github/workflows/check-broken-pr-links.yml +++ b/.github/workflows/check-broken-pr-links.yml @@ -68,7 +68,7 @@ jobs: sub(/^- \[ \] /, "", msg) sub(/^\[ERROR\] /, "", msg) gsub("^file:///home/runner/work/UmbracoDocs/UmbracoDocs/", "", msg) - print "⚓ Anchor not found → " msg >> "lychee/comment.md" + print "\n⚓ Anchor not found → " msg >> "lychee/comment.md" next } From af4b8c7c14f0598b6f540ef5780c5d6563551726 Mon Sep 17 00:00:00 2001 From: Esha Noronha <82437098+eshanrnh@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:26:23 +0200 Subject: [PATCH 8/8] Update .github/workflows/check-broken-pr-links.yml Co-authored-by: sofietoft --- .github/workflows/check-broken-pr-links.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-broken-pr-links.yml b/.github/workflows/check-broken-pr-links.yml index 678d41fc86f..5b73b241603 100644 --- a/.github/workflows/check-broken-pr-links.yml +++ b/.github/workflows/check-broken-pr-links.yml @@ -59,7 +59,7 @@ jobs: /^Errors in / { file=$3 gsub("^/home/runner/work/UmbracoDocs/UmbracoDocs/", "", file) - print "\nFile: " file >> "lychee/comment.md" + print "\nBroken links found in:\n" file >> "lychee/comment.md" next }