Skip to content

Get latest data

Get latest data #1267

Workflow file for this run

name: Get latest data
on:
push:
workflow_dispatch:
schedule:
- cron: '5 * * * *'
jobs:
scheduled:
runs-on: ubuntu-latest
steps:
- name: Check out this repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Fetch latest data
run: |-
# Get all of the files
cat urls.txt
while IFS= read -r url; do
echo "domaining"
domain=$(echo "$url" | sed -e 's|^[^/]*//||' -e 's|/.*$||' -e 's|.*\.\([^.]*\.[^.]*\)$|\1|' -e 's|\.gov||')
filename="${domain}.json"
echo $filename
wget -qO- "$url" --user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10\_15\_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" | python -m json.tool --sort-keys > "$filename"
done < urls.txt
- name: Commit and push if it changed
run: |-
git config user.name "Automated"
git config user.email "[email protected]"
git add *.json
timestamp=$(date -u)
git commit -m "Updated data: ${timestamp}" || exit 0
git push