-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapers.js
38 lines (32 loc) · 960 Bytes
/
scrapers.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
const scrapeIt = require("scrape-it");
const helpers = require('./helpers.js');
const l = helpers.log;
const getContent = helpers.getContent;
const xmlToJson = helpers.xmlToJson;
const formatJson = helpers.formatJson;
//html scraping
const html = async (what) => {
l('Scraping ' + what.id + ' in HTML mode', 'm');
l('Retrieveing Content from ' + what.url);
//for scrape-it docs visit https://github.com/IonicaBizau/scrape-it
return scrapeIt(what.url, {
items: {
listItem: what.itemsPath,
data: what.items
}
});
};
const xml = async (what) => {
l('Scraping ' + what.id + ' in XML mode', 'm');
let theContent = await getContent(what.url);
let contentInJson = await xmlToJson(theContent);
return formatJson(contentInJson, what);
};
const json = async (what) => {
};
// the object containing the scraping functions
module.exports = {
html: html,
xml: xml,
json: json
};