Skip to content

Commit

Permalink
Refactor DiagnosticLinkInlineParser (#387)
Browse files Browse the repository at this point in the history
  • Loading branch information
reakaleek authored Jan 31, 2025
1 parent ef215e8 commit 9f3dc7d
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 88 deletions.
220 changes: 134 additions & 86 deletions src/Elastic.Markdown/Myst/InlineParsers/DiagnosticLinkInlineParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information

using System.Collections.Immutable;
using System.IO.Abstractions;
using Elastic.Markdown.Diagnostics;
using Elastic.Markdown.IO;
using Elastic.Markdown.Myst.Comments;
Expand Down Expand Up @@ -35,132 +36,179 @@ public void Setup(MarkdownPipeline pipeline, IMarkdownRenderer renderer) { }
public class DiagnosticLinkInlineParser : LinkInlineParser
{
// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml for a list of URI schemes
// We can add more schemes as needed
private static readonly ImmutableHashSet<string> ExcludedSchemes = ["http", "https", "tel", "jdbc"];

public override bool Match(InlineProcessor processor, ref StringSlice slice)
{
var match = base.Match(processor, ref slice);
if (!match)
return false;

if (processor.Inline is not LinkInline link)
if (!match || processor.Inline is not LinkInline link)
return match;

// Links in comments should not be validated
// This works for the current test cases, but we might need to revisit this in case it needs some traversal
if (link.Parent?.ParentBlock is CommentBlock)
var context = processor.GetContext();
if (IsInCommentBlock(link) || context.SkipValidation)
return match;

ValidateAndProcessLink(processor, link, context);
return match;
}

private static bool IsInCommentBlock(LinkInline link) =>
link.Parent?.ParentBlock is CommentBlock;

private void ValidateAndProcessLink(InlineProcessor processor, LinkInline link, ParserContext context)
{
var url = link.Url;
var line = link.Line + 1;
var column = link.Column;
var length = url?.Length ?? 1;

var context = processor.GetContext();
if (processor.GetContext().SkipValidation)
return match;
if (!ValidateBasicUrl(processor, url, line, column, length))
return;

if (string.IsNullOrEmpty(url))
var uri = Uri.TryCreate(url, UriKind.Absolute, out var u) ? u : null;

if (IsCrossLink(uri))
{
processor.EmitWarning(line, column, length, $"Found empty url");
return match;
ProcessCrossLink(link, context, line, column, length);
return;
}

if (ValidateExternalUri(processor, uri, context, line, column, length))
return;

ProcessInternalLink(processor, link, context, line, column, length);
}

private bool ValidateBasicUrl(InlineProcessor processor, string? url, int line, int column, int length)
{
if (string.IsNullOrEmpty(url))
{
processor.EmitWarning(line, column, length, "Found empty url");
return false;
}
if (url.Contains("{{") || url.Contains("}}"))
{
processor.EmitWarning(line, column, length, "The url contains a template expression. Please do not use template expressions in links. See https://github.com/elastic/docs-builder/issues/182 for further information.");
return match;
processor.EmitWarning(line, column, length,
"The url contains a template expression. Please do not use template expressions in links. " +
"See https://github.com/elastic/docs-builder/issues/182 for further information.");
return false;
}
return true;
}

var uri = Uri.TryCreate(url, UriKind.Absolute, out var u) ? u : null;

if (IsCrossLink(uri))
processor.GetContext().Build.Collector.EmitCrossLink(url!);
private bool ValidateExternalUri(InlineProcessor processor, Uri? uri, ParserContext context, int line, int column, int length)
{
if (uri == null || !uri.Scheme.StartsWith("http"))
return false;

if (uri != null && uri.Scheme.StartsWith("http"))
var baseDomain = uri.Host == "localhost" ? "localhost" : string.Join('.', uri.Host.Split('.')[^2..]);
if (!context.Configuration.ExternalLinkHosts.Contains(baseDomain))
{
var baseDomain = uri.Host == "localhost" ? "localhost" : string.Join('.', uri.Host.Split('.')[^2..]);
if (!context.Configuration.ExternalLinkHosts.Contains(baseDomain))
{
processor.EmitWarning(
line,
column,
length,
$"External URI '{uri}' is not allowed. Add '{baseDomain}' to the " +
$"'external_hosts' list in {context.Configuration.SourceFile} to " +
"allow links to this domain.");
}
return match;
processor.EmitWarning(
line,
column,
length,
$"External URI '{uri}' is not allowed. Add '{baseDomain}' to the " +
$"'external_hosts' list in the configuration file '{context.Configuration.SourceFile}' " +
"to allow links to this domain."
);
}
return true;
}

var includeFrom = context.Path.Directory!.FullName;
if (url.StartsWith('/'))
includeFrom = context.Parser.SourcePath.FullName;
private static void ProcessCrossLink(LinkInline link, ParserContext context, int line, int column, int length)
{
var url = link.Url;
if (url != null)
context.Build.Collector.EmitCrossLink(url);
// TODO: The link is not rendered correctly yet, will be fixed in a follow-up
}

var anchors = url.Split('#');
var anchor = anchors.Length > 1 ? anchors[1].Trim() : null;
url = anchors[0];
private static void ProcessInternalLink(InlineProcessor processor, LinkInline link, ParserContext context, int line, int column, int length)
{
var (url, anchor) = SplitUrlAndAnchor(link.Url ?? string.Empty);
var includeFrom = GetIncludeFromPath(url, context);

if (!string.IsNullOrWhiteSpace(url))
{
var pathOnDisk = Path.Combine(includeFrom, url.TrimStart('/'));
if ((uri is null || uri.IsFile) && !context.Build.ReadFileSystem.File.Exists(pathOnDisk))
processor.EmitError(line, column, length, $"`{url}` does not exist. resolved to `{pathOnDisk}");
}
else
ValidateInternalUrl(processor, url, includeFrom, line, column, length, context);
ProcessLinkText(processor, link, context, url, anchor, line, column, length);
UpdateLinkUrl(link, url, anchor, context.Build.UrlPathPrefix ?? string.Empty);
}

private static (string url, string? anchor) SplitUrlAndAnchor(string fullUrl)
{
var parts = fullUrl.Split('#');
return (parts[0], parts.Length > 1 ? parts[1].Trim() : null);
}

private static string GetIncludeFromPath(string url, ParserContext context) =>
url.StartsWith('/')
? context.Parser.SourcePath.FullName
: context.Path.Directory!.FullName;

private static void ValidateInternalUrl(InlineProcessor processor, string url, string includeFrom, int line, int column, int length, ParserContext context)
{
if (string.IsNullOrWhiteSpace(url))
return;

var pathOnDisk = Path.Combine(includeFrom, url.TrimStart('/'));
if (!context.Build.ReadFileSystem.File.Exists(pathOnDisk))
processor.EmitError(line, column, length, $"`{url}` does not exist. resolved to `{pathOnDisk}");
}

private static void ProcessLinkText(InlineProcessor processor, LinkInline link, ParserContext context, string url, string? anchor, int line, int column, int length)
{
if (link.FirstChild != null && string.IsNullOrEmpty(anchor))
return;

var file = ResolveFile(context, url);
var markdown = context.GetDocumentationFile?.Invoke(file) as MarkdownFile;

if (markdown == null)
{
if (string.IsNullOrEmpty(anchor))
processor.EmitWarning(line, column, length, $"No url was specified for the link.");
link.Url = "";
processor.EmitWarning(line, column, length,
$"'{url}' could not be resolved to a markdown file while creating an auto text link, '{file.FullName}' does not exist.");
return;
}

if (link.FirstChild == null || !string.IsNullOrEmpty(anchor))
var title = markdown.Title;

if (!string.IsNullOrEmpty(anchor))
{
var file = string.IsNullOrWhiteSpace(url)
? context.Path
: url.StartsWith('/')
? context.Build.ReadFileSystem.FileInfo.New(Path.Combine(context.Build.SourcePath.FullName, url.TrimStart('/')))
: context.Build.ReadFileSystem.FileInfo.New(Path.Combine(context.Path.Directory!.FullName, url));
var markdown = context.GetDocumentationFile?.Invoke(file) as MarkdownFile;
if (markdown == null)
{
processor.EmitWarning(line,
column,
length,
$"'{url}' could not be resolved to a markdown file while creating an auto text link, '{file.FullName}' does not exist.");
}

var title = markdown?.Title;

if (!string.IsNullOrEmpty(anchor))
{
if (markdown == null || !markdown.Anchors.Contains(anchor))
processor.EmitError(line, column, length, $"`{anchor}` does not exist in {markdown?.FileName}.");
else if (link.FirstChild == null && markdown.TableOfContents.TryGetValue(anchor, out var heading))
title += " > " + heading.Heading;

}

if (link.FirstChild == null && !string.IsNullOrEmpty(title))
link.AppendChild(new LiteralInline(title));
ValidateAnchor(processor, markdown, anchor, line, column, length);
if (link.FirstChild == null && markdown.TableOfContents.TryGetValue(anchor, out var heading))
title += " > " + heading.Heading;
}

if (link.FirstChild == null && !string.IsNullOrEmpty(title))
link.AppendChild(new LiteralInline(title));
}

private static IFileInfo ResolveFile(ParserContext context, string url) =>
string.IsNullOrWhiteSpace(url)
? context.Path
: url.StartsWith('/')
? context.Build.ReadFileSystem.FileInfo.New(Path.Combine(context.Build.SourcePath.FullName, url.TrimStart('/')))
: context.Build.ReadFileSystem.FileInfo.New(Path.Combine(context.Path.Directory!.FullName, url));

private static void ValidateAnchor(InlineProcessor processor, MarkdownFile markdown, string anchor, int line, int column, int length)
{
if (!markdown.Anchors.Contains(anchor))
processor.EmitError(line, column, length, $"`{anchor}` does not exist in {markdown.FileName}.");
}

private static void UpdateLinkUrl(LinkInline link, string url, string? anchor, string urlPathPrefix)
{
if (url.EndsWith(".md"))
link.Url = Path.ChangeExtension(url, ".html");
// rooted links might need the configured path prefix to properly link
var prefix = processor.GetBuildContext().UrlPathPrefix;
if (url.StartsWith("/") && !string.IsNullOrWhiteSpace(prefix))
link.Url = $"{prefix.TrimEnd('/')}{link.Url}";
url = Path.ChangeExtension(url, ".html");

if (!string.IsNullOrEmpty(anchor))
link.Url += $"#{anchor}";
if (url.StartsWith("/") && !string.IsNullOrWhiteSpace(urlPathPrefix))
url = $"{urlPathPrefix.TrimEnd('/')}{url}";

return match;
link.Url = !string.IsNullOrEmpty(anchor) ? $"{url}#{anchor}" : url;
}

private static bool IsCrossLink(Uri? uri) =>
uri != null
uri != null // This means it's not a local
&& !ExcludedSchemes.Contains(uri.Scheme)
&& !uri.IsFile
&& Path.GetExtension(uri.OriginalString) == ".md";
Expand Down
4 changes: 2 additions & 2 deletions tests/Elastic.Markdown.Tests/Inline/InlineLinkTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ public void GeneratesHtml() =>
// language=html
Html.Should().Contain(
// TODO: The link is not rendered correctly yet, will be fixed in a follow-up
"""<p><a href="kibana://index.html">test</a></p>"""
"""<p><a href="kibana://index.md">test</a></p>"""
);

[Fact]
Expand All @@ -158,7 +158,7 @@ public void GeneratesHtml() =>
// language=html
Html.Should().Contain(
// TODO: The link is not rendered correctly yet, will be fixed in a follow-up
"""<p>Go to <a href="kibana://index.html">test</a></p>"""
"""<p>Go to <a href="kibana://index.md">test</a></p>"""
);

[Fact]
Expand Down

0 comments on commit 9f3dc7d

Please sign in to comment.