diff --git a/README.md b/README.md index b3a5bb3..5cfff0a 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,11 @@ A command-line tool to fetch [Blacklight](https://themarkup.org/series/blackligh - `nvm use` - `npm install` -- Create `urls.txt` file, with newline-separated absolute URLs to scan -- `npm run main` +- `./blacklight-query urls.txt` where `urls.txt` has newline-separated absolute URLs to scan ## Inputs -Write all URLs you wish to scan as **absolute URLs** (including protocol, domain, and path) in a file named `urls.txt` in the root directory. Separate urls by newline. +Write all URLs you wish to scan as **absolute URLs** (including protocol, domain, and path). Separate each URL with a newline. ### Sample `urls.txt` file @@ -25,6 +24,13 @@ https://www.themarkup.org https://www.calmatters.org ``` +### You can use pipes + +You can also pipe your list of URLs. + +- `echo "https://themarkup.org/" | ./blacklight-query` +- `./blacklight-query < urls.txt` + ### Collector Options All of the [`blacklight-collector`](https://github.com/the-markup/blacklight-collector?tab=readme-ov-file#collector-configuration) options can be specified using this tool, by editing the `config` object in `main.ts`. diff --git a/blacklight-query b/blacklight-query new file mode 100755 index 0000000..2cf5f70 --- /dev/null +++ b/blacklight-query @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +if [ -t 0 ]; then + # URLs are passed as an argument value + if (( $# != 1 )) ; then + echo "Usage: blacklight-query urls.txt" + echo " echo \"https://themarkup.org\" | blacklight-query" + echo " blacklight-query < urls.txt" + echo + echo "Please provide a list of URLs, where each URL is on its own line." + exit 1 + fi + "$dir/node_modules/.bin/ts-node" --project "$dir/tsconfig.json" "$dir/src/main.ts" $1 +else + # URLs are piped to stdin + time=$(date +%s) + while read -r line ; do + echo $line >> "$dir/.urls-$time.txt" + done + "$dir/node_modules/.bin/ts-node" --project "$dir/tsconfig.json" "$dir/src/main.ts" "$dir/.urls-$time.txt" + rm "$dir/.urls-$time.txt" +fi diff --git a/package.json b/package.json index c446d37..63e0516 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,8 @@ "name": "@themarkup/blacklight-query", "version": "1.0.0", "description": "A simple tool to generate Blacklight-Collector scans of a list of urls", - "main": "build/index.js", + "main": "src/main.ts", + "bin": "./blacklight-query", "funding": { "type": "individual", "url": "https://themarkup.org/donate" diff --git a/src/main.ts b/src/main.ts index e34bb5f..2bb3c59 100644 --- a/src/main.ts +++ b/src/main.ts @@ -6,12 +6,17 @@ import { collect } from "@themarkup/blacklight-collector"; import { reportFailures } from "./utils"; // Gather URLs from input file -const urlsPath = join(__dirname, '../urls.txt'); +const urlsFile = process.argv[2]; +let urlsPath; +if (urlsFile[0] == '/' || urlsFile[0] == '~') { + urlsPath = urlsFile; +} else { + urlsPath = join(process.cwd(), urlsFile); +} + if (!fs.existsSync(urlsPath)) { - console.log( - "Please create a file named 'urls.txt', containing a newline-separated list of urls to scan." - ); - exit(); + console.log(`Could not find ${urlsPath}.`); + exit(1); } const urls = fs.readFileSync(urlsPath, "utf8"); const urlsList = urls.trim().split(/\r?\n|\r|\n/g);