-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfetchtest.js
More file actions
65 lines (58 loc) · 2.34 KB
/
Copy pathfetchtest.js
File metadata and controls
65 lines (58 loc) · 2.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import * as cheerio from 'cheerio';
import axios from 'axios';
async function delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function fetchDataWithHeaders(url) {
const headers = { 'User-Agent': 'fic-feed Discord bot developed by shantismurf@gmail.com' };
let retryCount = 0;
const maxRetries = 5;
while (retryCount < maxRetries) {
try {
const response = await axios.get(url, { headers });
const $ = cheerio.load(response.data);
return $;
} catch (error) {
console.log(`Retrying in ${2 ** retryCount} seconds...`);
retryCount++;
if (retryCount === maxRetries) {
console.error("Max retries exceeded");
throw error;
}
await delay(2 ** retryCount * 1000); // Exponential backoff
}
}
}
async function main() {
let url = '';
url = 'https://archiveofourown.org/collections/BBC_MGE_2024';
url = 'https://archiveofourown.org/collections/BagginshieldBookClub';
url = 'https://archiveofourown.org/works/55997986';
url = 'https://archiveofourown.org/works/63048829';
url = 'https://archiveofourown.org/works/62565367';
url = 'https://archiveofourown.org/works/55061758';
url = 'https://archiveofourown.org/works/56651815';
url = 'https://archiveofourown.org/works/64152013';
try {
const $ = await fetchDataWithHeaders(url);
const metadata = {
type: 'work',
workTitle: $('h2.title.heading').text().trim(),
workAuthor: $('h3.byline.heading a').map((_, a) => $(a).text()).get().join(', ')
};
console.log(metadata);
const workauthorlength = 200;
let authorstr = (metadata.workAuthor ?? 'None').substring(0, workauthorlength);
authorstr = authorstr.length == workauthorlength ? authorstr + ' ...' : authorstr;
// const authorUrl = authorstr.includes(',') ? null : 'http://archiveofourown.org/users/' + authorstr.replace(/\(.*$/, "").trim();
const authorUrl = authorstr.includes(',') ? null : 'http://archiveofourown.org/users/' +
// Extract the name inside parentheses if it exists, else use the whole string
authorstr.match(/\((.*?)\)/)?.[1] ? authorstr.match(/\((.*?)\)/)[1] : authorstr;
console.log(`
name: 'A work by ' + ${authorstr},
...(${authorUrl} && { url: ${authorUrl})`);
} catch (error) {
console.error("Failed to fetch data:", error.message);
}
}
main();