Skip to content

Commit

Permalink
Enhance Update-glueckkanja.KONNEKT script to improve HTML parsing and…
Browse files Browse the repository at this point in the history
… extract release notes in YAML format
  • Loading branch information
Utesgui committed Nov 26, 2024
1 parent d8b2bee commit 4d3deb7
Showing 1 changed file with 63 additions and 9 deletions.
72 changes: 63 additions & 9 deletions scripts/Packages/Update-glueckkanja.KONNEKT.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,81 @@ $ProductName = ($PackageName).Trim().ToLower()
$URLFilter = "$($ProductName)-(X86|X64|Arm64)-(\d+\.\d+\.\d+\.\d+).Msi"

# Download the webpage
$website = Invoke-WebRequest -Uri $WebsiteURL
$website = Invoke-WebRequest -Uri $WebsiteURL -UseBasicParsing

# Extract the content of the webpage
$WebsiteLinks = $website.Links
$WebsiteContent = $website.Content

$FilteredLinks = $WebsiteLinks | Where-Object { $_.href -match $URLFilter }

$latestVersion = $FilteredLinks | ForEach-Object { $_.href -replace '.*-(\d+\.\d+\.\d+\.\d+).*', '$1' } | Sort-STNumerical -Descending | Select-Object -First 1
$latestVersion = $FilteredLinks | ForEach-Object { $_.href -replace '.*-(\d+\.\d+\.\d+).*', '$1' } | Sort-STNumerical -Descending | Select-Object -First 1

$latestVersionUrl = $FilteredLinks.href | Where-Object { ($_ -match $latestVersion) } | Where-Object { $_ -ne '' }


# Use regex to extract the content between "2.20.1.0" and "Downloads"
$Pattern = '\.?2\.10\.1\.0(.*?)Downloads'
if ($WebsiteContent -match $Pattern) {
$ExtractedContent = $matches[1]
################ HTML/ReleaseNote Parsing ###################
$xmlContent = $null
# Parse the HTML content
#$xmlContent = [xml]$website.Content

$ContentNoScripts = [regex]::Replace($WebsiteContent, "<script .*?>.*?</script>", "", [System.Text.RegularExpressions.RegexOptions]::Singleline)
$ContentNoScriptsNoComments = [regex]::Replace($ContentNoScripts, "<!--.*?-->", "", [System.Text.RegularExpressions.RegexOptions]::Singleline)
$ContentNoScriptsNoCommentsNoIds = [regex]::Replace($ContentNoScriptsNoComments, 'id=".*?"', "", [System.Text.RegularExpressions.RegexOptions]::Singleline)
$ContentNoScriptsNoCommentsNoIdsNoHidden = [regex]::Replace($ContentNoScriptsNoCommentsNoIds, 'hidden', "", [System.Text.RegularExpressions.RegexOptions]::Singleline)


# Parse the HTML content
$xmlContent = [xml]$ContentNoScriptsNoCommentsNoIdsNoHidden

# Find the h3 element with the latest version in its text
$h3Elements = $xmlContent.SelectNodes("//h3")
$targetElement = $null

foreach ($element in $h3Elements) {
if ($element.InnerText -match $latestVersion) {
$targetElement = $element
break
}
}

if ($null -ne $targetElement) {
# Extract content until the next h3 element
$content = ""
$currentElement = $targetElement.NextSibling

while ($null -ne $currentElement -and $currentElement.Name -ne "h3") {
$currentElement.InnerText
if ($currentElement.InnerText -match "Downloads") {
break
}
if ($currentElement.InnerText -match "Add" -or $currentElement.InnerText -match "Fix") {
$content += "* " + $currentElement.InnerText + "`n"
foreach ($listItem in $currentElement.NextSibling.ChildNodes) {
$content += " - " + $listItem.InnerText + "`n"
}
}
#$content += $currentElement.InnerText + "`n"
$currentElement = $currentElement.NextSibling
}

Write-Output "Extracted Content:"
Write-Output $ExtractedContent
Write-Output $content

# Convert the extracted content to YAML format
$yamlContent = "ReleaseNotes: |-`n"
$lines = $content -split "`n"
foreach ($line in $lines) {
$yamlContent += " $line`n"
}

Write-Output "YAML Content:"
Write-Output $yamlContent

$releaseNotes = $yamlContent

} else {
Write-Output "Pattern not found in the content."
Write-Output "Version not found in the content."
}

return $latestVersion, $latestVersionUrl
return $latestVersion, $latestVersionUrl, $releaseNotes

0 comments on commit 4d3deb7

Please sign in to comment.