Harvester #29260
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2023 okibcn | |
# This is free software, licensed under the GNU General Public License v3.0 | |
# See /LICENSE for more information. | |
# https://github.com/okibcn/ScoopMaster | |
# Description: Scoop Meta-Bucket | |
name: Harvester | |
on: | |
schedule: | |
- cron: '21,51 * * * *' | |
# runs every 30 mins, at minutes 21 and 51 of each hour. | |
workflow_dispatch: | |
inputs: | |
debug_enabled: | |
description: 'Debug session enabled' | |
required: false | |
default: false | |
type: boolean | |
release: | |
description: 'Delivery Channel' | |
required: false | |
default: 'Artifact' | |
type: choice | |
options: | |
- "Artifact" | |
- "Release" | |
jobs: | |
build: | |
runs-on: windows-latest | |
# Use PowerShell Core shell regardless whether the GitHub Actions runner is ubuntu-latest, macos-latest, or windows-latest | |
defaults: | |
run: | |
shell: pwsh | |
steps: | |
- name: "🐞 Debug session" | |
uses: mxschmitt/action-tmate@v3 | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
if: github.event.inputs.debug_enabled == 'true' | |
## More info at https://til.simonwillison.net/github-actions/debug-tmate | |
## or https://github.com/mxschmitt/action-tmate | |
- name: "⏬ Checkout repository" | |
uses: actions/checkout@v3 | |
- name: "🐞 Debug session" | |
uses: mxschmitt/action-tmate@v3 | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
if: github.event.inputs.debug_enabled == 'true' | |
## More info at https://til.simonwillison.net/github-actions/debug-tmate | |
## or https://github.com/mxschmitt/action-tmate | |
- name: "🔍 Discover known buckets from Scoop-Directory website." | |
run: | | |
cd .. | |
$lOfficial = (curl -s -X POST 'https://scoopsearch.search.windows.net/indexes/apps/docs/search?api-version=2020-06-30' ` | |
-H 'Content-Type: application/json' ` | |
-H 'api-key: DC6D2BBE65FC7313F2C52BBD2B0286ED' ` | |
-d ' | |
{ | |
\"facets\":[\"Metadata/Repository,count:10000\"], | |
\"filter\":\"Metadata/OfficialRepositoryNumber eq 1\", | |
\"top\":0 | |
}' | jq . | sls 'http.*github[^"]+').Matches.Value | |
Write-Host "$($lOfficial.count) Official buckets found in Scoop.sh" | |
$lCommunity = (curl -s -X POST 'https://scoopsearch.search.windows.net/indexes/apps/docs/search?api-version=2020-06-30' ` | |
-H 'Content-Type: application/json' ` | |
-H 'api-key: DC6D2BBE65FC7313F2C52BBD2B0286ED' ` | |
-d ' | |
{ | |
\"facets\":[\"Metadata/Repository,count:10000\"], | |
\"filter\":\"Metadata/OfficialRepositoryNumber eq 0\", | |
\"top\":0 | |
}' | jq . | sls 'http.*github[^"]+').Matches.Value | |
Write-Host "$($lCommunity.count) Community buckets found in Scoop.sh" | |
$scoopDBURL = "https://rasa.github.io/scoop-directory/by-apps.html" | |
$ErrorActionPreference = 'SilentlyContinue' | |
$lScoopdir = @() | |
$source = iwr $scoopDBURL | |
$source.Content -split '\r?\n' | ForEach-Object -Process { | |
if ($_ -match '<h2.*(http[^\"]+)"'){ | |
$lScoopdir = $lScoopdir + ( , $matches[1]) | |
} | |
} | |
Write-Host "$($lScoopdir.count) buckets found in Scoop-Directory." | |
$lActive = ($lOfficial+$lCommunity+$lScoopdir) | Sort-Object | Get-Unique | |
Write-Host "$($lActive.count) total Active buckets found so far" | |
$oldActive=gc ScoopMaster/ActiveBuckets | |
Write-Host "$($oldActive.count) previous Active buckets" | |
$oldDeprecated=gc ScoopMaster/DeprecatedBuckets | |
Write-Host "$($oldDeprecated.count) previous deprecated buckets" | |
$lDeprecated = ($oldActive+$oldDeprecated) | Sort-Object | Get-Unique | ? { -not ("$lActive" -match "$_") } | |
Write-Host "$($lDeprecated.count) current deprecated buckets" | |
$lBuckets = ($lActive+$lDeprecated) | Sort-Object | Get-Unique | |
Write-Host "$($lBuckets.count) Total buckets" | |
$lDeprecated | Set-Content ScoopMaster/DeprecatedBuckets | |
$lBuckets | Set-Content ScoopMaster/ActiveBuckets | |
cp ScoopMaster/ActiveBuckets Bucket_list.txt | |
- name: "⏬ Download Active buckets" | |
run: | | |
cd .. | |
$lBuckets=(gc .\Bucket_list.txt) | |
## CREATE INPUT FILE FOR ARIA2C | |
$lZips=$lBuckets | %{ | |
if( -not ($_ -match 'https:\/\/github.com\/(.+)')){ | |
$_ >> ERROR_badrepos.txt | |
return} | |
if(($_ -match 'okibcn\/ScoopMaster')){ | |
return} | |
$owner_repo=$matches[1].replace('/','~') | |
return "$_/archive/refs/heads/master.zip`n out=$owner_repo.zip" | |
} | |
$lZips > download_list.txt | |
echo "UPDATE=$(Get-Date -format 'yyyy-MM-ddTHH:mm:ssZ')" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append | |
# DOWNLOAD BUCKETS | |
aria2c --save-session aria2-out.txt -j 16 -i download_list.txt -d zips | |
(cmd.exe /c dir /s /b /a-d zips) > Zip_list.txt | |
echo "N_BUCKETS=$((cat .\Zip_list.txt).count)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append | |
- name: "📃 Extract manifests from downloaded buckets" | |
run: | | |
cd .. | |
# EXTRACT MANIFESTS | |
7z e -y zips/*.zip -ojsons/* */bucket/*.json */*.json | |
# PREPARE OUTPUT | |
Remove-Item -Force -Recurse jsons/*/.*.json | |
$nManifests = (cmd.exe /c dir /s /b /a-d jsons).count | |
$nZips = (cat .\Zip_list.txt).count | |
Write-Host "PROCESS COMPLETED. $nManifests manifests extracted from $nZips downloaded buckets." | |
- name: "📝 Indexing JSON manifests" | |
run: | | |
function longV([string]$v) { | |
$numbers = [regex]::matches($v, '\d+') | |
if ($numbers.Count -eq 0) { | |
#doesn't contain numbers | |
return $v | |
} | |
$notnumbers = [regex]::matches($v, '([^\d]+)') | |
$longnumbers = @($numbers | % { | |
$_.Value.PadLeft(10, '0') | |
}) | |
$i = [int]0 | |
if ($numbers[0].Index -eq 0) { | |
# starts with a number | |
($longnumbers | % { $_ ; $notnumbers[$i].value ; $i++ }) -join '' | |
} | |
else { | |
($notnumbers | % { $_.Value ; $longnumbers[$i] ; $i++ }) -join '' | |
} | |
} | |
# Returns 0 if both versions match, 1 if the firs one is higher, | |
# and -1 if the second is higher | |
function CompareV([string]$a, [string]$b) { | |
if ($a -eq $b) { | |
return 0 | |
} | |
if ( $(longV $a) -gt $(longV $b)) { | |
return 1 | |
} | |
return -1 | |
} | |
cd .. | |
## PROCESS JSON FILES | |
$FullDB = $null | |
$FullDB = New-Object system.data.datatable | |
$FullDB.Columns.Add("Name") | Out-Null | |
$FullDB.Columns.Add("Version") | Out-Null | |
$FullDB.Columns.Add("Date") | Out-Null | |
$FullDB.Columns.Add("Bucket") | Out-Null | |
$FullDB.Columns.Add("Description") | Out-Null | |
$FullDB.Columns.Add("Homepage") | Out-Null | |
$FullDB.Columns.Add("Autoupdate") | Out-Null | |
$FullDB.Columns.Add("Clone") | Out-Null | |
$files = gci jsons/*/*.json ##o## | |
$nManifests=$files.Count | |
$progress=0 | |
Write-host "##" | |
Write-host "## INDEXING JSON FILES..." | |
Write-host "## =================================" | |
Write-host "##" | |
foreach ( $file in $files ) { | |
$name = $file.BaseName | |
$bucket = $file.FullName.split('\')[-2].replace('~', "/") | |
$progress++ | |
$percent = [math]::round(100 * $progress / $nManifests) | |
if ($percent -ne $oldpercent){ | |
Write-Output "( $percent% ) $progress / $nManifests (indexed/total) Indexing bucket $bucket" | |
$oldpercent=$percent | |
} | |
$date = $file.LastWriteTimeUtc.ToString("yyyy-MM-ddTHH:mm:ssZ") | |
$filecontent = gc $file | |
try{ | |
$json = $filecontent | ConvertFrom-Json | |
$version = $json.version | |
$description = "$($json.description)" | |
}catch{ | |
$file.fullname >> ERROR_manifest.txt | |
continue | |
} | |
try{ | |
$homepage = $json.homepage | |
}catch{ | |
$file.fullname >> ERROR_manifest.txt | |
$homepage = "" | |
} | |
$Clone= if ($filecontent -Match 'ghproxy|"_from"') {'C'} else {'N'} | |
$autoup = if ($json.autoupdate) {'A'} else {'_'} | |
[void]$FullDB.Rows.Add($name, $version, $date, $bucket, $description, $homepage, $autoup, $clone) | |
} | |
$DB = $FullDB | Sort-Object Name | |
$hash = $null | |
$hash = New-Object system.data.datatable | |
$hash.Columns.Add("Name") | Out-Null | |
$hash.Columns.Add("Version") | Out-Null | |
$hash.Columns.Add("Date") | Out-Null | |
$hash.Columns.Add("Bucket") | Out-Null | |
$hash.Columns.Add("Description") | Out-Null | |
$hash.Columns.Add("Homepage") | Out-Null | |
$hash.Columns.Add("Autoupdate") | Out-Null | |
$hash.Columns.Add("Priority") | Out-Null | |
$name = "" | |
$nApps = 0 | |
$progress=0 | |
Write-host "##" | |
Write-host "## FINDING LATEST VERSIONS..." | |
Write-host "## =================================" | |
Write-host "##" | |
foreach ( $row in $DB ) { | |
$progress++ | |
$percent = [math]::round(100 * $progress / $nManifests) | |
if ($percent -ne $oldpercent){ | |
Write-Output "( $percent% ) $progress / $nManifests - $nApps (indexed/total-harvested) Last checked app: $name.json" | |
$oldpercent=$percent | |
} | |
if ($row.Bucket -eq "lzwme/scoop-proxy-cn") { continue } | |
if ($row.Bucket -eq "duzyn/scoop-cn") { continue } | |
if ($row.name -ne $name) { | |
$name = $row.name | |
[void]$hash.Rows.Add($name, $row.Version, $row.Date, $row.Bucket, $row.Description, $row.Homepage, $row.Autoupdate, $row.Clone) | |
$nApps++ | |
continue | |
} | |
$app=[ref]$hash.Rows[-1] | |
$current = if ($hash.Rows[-1].Bucket -match '^okibcn/' ) {'O'} else {'_'} | |
$current = $current + $hash.Rows[-1].autoupdate + $hash.Rows[-1].Version | |
$current = if ($hash.Rows[-1].Bucket -match '^ScoopInstaller/' ) {$current + 'S'} else {$current + $hash.Rows[-1].Priority} | |
$candidate = if ($row.Bucket -match '^okibcn/' ) {'O'} else {'_'} | |
$candidate = $candidate + $row.autoupdate + $row.Version | |
$candidate = if ($row.Bucket -match '^ScoopInstaller/' ) {$candidate + 'S'} else {$candidate + $row.Clone} | |
switch ( CompareV $candidate $current ) { | |
-1 { # not higher priority | |
break | |
} | |
1 { # higher priority, replace entry | |
$hash.Rows[-1].Version = $row.Version | |
$hash.Rows[-1].Date = $row.Date | |
$hash.Rows[-1].Bucket = $row.Bucket | |
$hash.Rows[-1].Description = $row.Description | |
$hash.Rows[-1].Homepage = $row.Homepage | |
$hash.Rows[-1].Autoupdate = $row.Autoupdate | |
$hash.Rows[-1].Priority = $row.Clone | |
} | |
0 { # same priority, update newest | |
if ($row.date -gt $hash.Rows[-1].LastDate) { | |
$hash.Rows[-1].Date = $row.Date | |
$hash.Rows[-1].Bucket = $row.Bucket | |
$hash.Rows[-1].Description = $row.Description | |
$hash.Rows[-1].Homepage = $row.Homepage | |
$hash.Rows[-1].Autoupdate = $row.Autoupdate | |
$hash.Rows[-1].Priority = $row.Clone | |
} | |
} | |
} | |
} | |
$hash = $hash | select | Sort-Object Name | |
$scoopDBfile = "./LastAppsDB.csv" | |
$hash | ConvertTo-csv > $scoopDBfile | |
mkdir local | Out-Null | |
$progress=0 | |
$nApps=$hash.rows.count | |
Write-host "##" | |
Write-host "## CHECKING LINE AND FILE ENDING FORMAT OF SELECTED MANIFESTS..." | |
Write-host "## =============================================================" | |
Write-host "##" | |
foreach ( $row in $hash ) { | |
[void]$FullDB.Rows.Add($row.Name, $row.Version, $row.Date, "okibcn/ScoopMaster", $row.Description, $row.Homepage, $row.Autoupdate) | |
$file="jsons/$( $row.Bucket.replace('/','~') )/$($row.Name).json" | |
Get-Content $file > "local/$($row.Name).json" | |
$progress++ | |
$percent = [math]::round(100 * $progress / $nApps) | |
if ($percent -ne $oldpercent){ | |
Write-Output "( $percent% ) $progress / $nApps (hasvested/total) Last processed app: $($row.Name).json" | |
$oldpercent=$percent | |
} | |
} | |
$FullDB = $FullDB | select Name, Version, Autoupdate, Bucket, Homepage, Description | Sort-Object Name,Autoupdate,Version | |
$scoopDBfile = "./AllAppsDB.csv" | |
$FullDB | ConvertTo-csv > $scoopDBfile | |
echo "N_MANIFESTS=$nManifests" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append | |
echo "N_APPS=$nApps" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append | |
7z a AllAppsDB.7z AllAppsDB.csv | |
7z a LastAppsDB.7z LastAppsDB.csv | |
move-item *DB.* ScoopMaster | |
- name: "📦 Prepare updated repo" | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
run: | | |
############################ | |
## ## | |
## UPDATE BUCKET ## | |
## ## | |
############################ | |
# cd .. | |
# aria2c https://github.com/okibcn/ScoopMaster/archive/refs/heads/master.zip -o ScoopMaster.zip | |
# 7z e ScoopMaster.zip */bucket/* -obucket | |
# cd ScoopMaster | |
$lDeleted = $lUpdated = $lNew = @() | |
$progress = 0 | |
Write-host "##" | |
Write-host "## CHECKING MODIFIED MANIFESTS..." | |
Write-host "## =================================" | |
Write-host "##" | |
$current = gci bucket/*.json | |
foreach ($file in $current) { | |
$progress++ | |
$percent = [math]::round(100 * $progress / $env:N_APPS) | |
if ($percent -ne $oldpercent){ | |
Write-Output "( $percent% ) $progress / $env:N_APPS (reviewed/total) Last checked app: $($file.BaseName)" | |
$oldpercent=$percent | |
} | |
$candidate = "../local/$($file.Name)" | |
if (-not (test-path $candidate)) { | |
Remove-Item $file | |
$lDeleted += $file.BaseName | |
continue | |
} | |
$hFile = (Get-FileHash -Algorithm SHA1 $file).Hash | |
$hCandidate = (Get-FileHash -Algorithm SHA1 $candidate).Hash | |
if ($hCandidate -ne $hFile) { | |
Move-Item $candidate $file -Force | |
$lUpdated += $file | |
continue | |
} | |
Remove-Item $candidate | |
} | |
$lNew = (gci ../local/*).BaseName | |
move-item ../local/* bucket -Force | |
move-item ../*.txt . | |
$lDeleted > AppsLeavingScoop.txt | |
$lUpdated > AppsUpdated.txt | |
$lNew > AppsNew.txt | |
if ( "${{github.event.inputs.release}}" -eq 'Artifact') {7z a '-x!*.7z' ScoopMaster.7z *} | |
$downloads=((( gh api repos/okibcn/ScoopMaster/releases | jq . ) -match "download_count" | sls '\d+' ).matches | Measure-Object Value -Sum ).Sum | |
$stats=@{} | |
$stats.Add('date',"$($env:UPDATE)") | |
$stats.Add('prevDate',"$((gc stats.json | ConvertFrom-Json).date.ToString("yyyy-MM-ddTHH:mm:ssZ"))") | |
$stats.Add('totalBuckets',(gc Bucket_list.txt).count) | |
$stats.Add('badBuckets',((gc ./aria2-out.txt) | sls 'http').Matches.count) | |
$stats.Add('buckets',$stats.totalBuckets - $stats.badBuckets ) | |
$stats.Add('manifests',$( [int]($env:N_MANIFESTS) + [int]($env:N_APPS) ) ) | |
$stats.Add('badManifests',(gc ERROR_manifest.txt).count) | |
$stats.Add('apps',[int]$env:N_APPS) | |
$stats.Add('appsNew',$lnew.count) | |
$stats.Add('appsUpdated',$lUpdated.count) | |
$stats.Add('appsDeleted',$lDeleted.count) | |
$stats.Add('downloads',[int]$downloads) | |
$stats.Add('totalDownloads',[int]$( (gc stats.json | ConvertFrom-Json).totalDownloads + $stats.downloads )) | |
$stats | ConvertTo-Json > stats.json | |
"New Downloads: $downloads" | |
"Total Downloads: $( (gc stats.json | ConvertFrom-Json).totalDownloads + $stats.downloads )" | |
(gc .\README.md) -Replace "\d+(?=\*\* manifests)","$($stats.manifests)" | Set-Content README.md | |
(gc .\README.md) -Replace "\d+(?=\*\* buckets)","$($stats.buckets)" | Set-Content README.md | |
(gc .\README.md) -Replace "\d+(?=\*\* apps)","$($stats.apps)" | Set-Content README.md | |
Write-Output "These databases are updated every 30 minutes. The current content and metrics for these databases are:" > body.txt | |
Write-Output "- **AllAppsDB** contains info of **$($stats.manifests)** manifests in **$($stats.buckets)** online buckets." >> body.txt | |
Write-Output "- **LastAppsDB** contains highest version of each of the **$($stats.apps)** apps in Scoop, all of them contained in this bucket." >> body.txt | |
Write-Output " " >> body.txt | |
Write-Output "These databases can be used for fast online data retrieval, for instance by CLI scoop search utilities such as `ss` (ScoopSearch)" >> body.txt | |
Write-Output " " >> body.txt | |
Write-Output "The current snapshot timestamp for these databases is $($stats.date). In the last 30 minutes:" >> body.txt | |
Write-Output "- $($stats.appsNew) apps were added to Scoop." >> body.txt | |
Write-Output "- $($stats.appsUpdated) apps were updated." >> body.txt | |
Write-Output "- $($stats.appsDeleted) apps left Scoop." >> body.txt | |
Write-Output " " | |
Write-Output " " | |
Write-Output "- AllAppsDB contains the data of **$($stats.manifests)** manifests in $($stats.buckets) online buckets." | |
Write-Output "- LastAppsDB contains highest version of each of the $($stats.apps) apps in Scoop, all of them contained in this bucket." | |
Write-Output " " | |
Write-Output "The current snapshot timestamp for these databases is $($stats.date). In the last 30 minutes:" | |
Write-Output "- $($stats.appsNew) apps were added to Scoop." | |
Write-Output "- $($stats.appsUpdated) apps were updated." | |
Write-Output "- $($stats.appsDeleted) apps left Scoop." | |
- name: "🐞 Debug session" | |
uses: mxschmitt/action-tmate@v3 | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
if: github.event.inputs.debug_enabled == 'true' | |
## More info at https://til.simonwillison.net/github-actions/debug-tmate | |
## or https://github.com/mxschmitt/action-tmate | |
- name: "🎉 Release updated databases" | |
uses: softprops/[email protected] | |
# go to https://github.com/OWNER/REPO/settings/actions and | |
# in "**"Workflow Permissions" section give actions **Read and Write permissions**. | |
if: github.event_name != 'workflow_dispatch' || github.event.inputs.release == 'Release' | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
tag_name: Databases | |
body_path: body.txt | |
files: ./*DB.* | |
- name: "⭐ Update repo (Commit-n-Push)" | |
if: github.event_name != 'workflow_dispatch' || github.event.inputs.release == 'Release' | |
run: | | |
git config --global core.autocrlf false | |
git config --global core.safecrlf false | |
git -C . init | |
git -C . config --local user.name "GitHub Actions" | |
git -C . config --local user.email [email protected] | |
git -C . add -A | |
git -C . commit --no-verify -m "Update timestamp $($env:UPDATE)" | |
$sha=(git -C . rev-parse HEAD) | |
git push | |
- name: "👍 Full Upload as Artifact" | |
uses: actions/upload-artifact@v3 | |
# if: github.event_name == 'workflow_dispatch' | |
if: github.event.inputs.release == 'Artifact' | |
with: | |
name: Logs | |
path: | | |
*.7z | |
- name: "👍 Standard Upload logs as Artifact" | |
uses: actions/upload-artifact@v3 | |
# if: github.event_name == 'workflow_dispatch' | |
if: github.event_name != 'workflow_dispatch' || github.event.inputs.release == 'Release' | |
with: | |
name: Logs | |
path: | | |
*.txt | |