This repository was archived by the owner on Jul 15, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.js
126 lines (104 loc) · 3.19 KB
/
scrape.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
const dev = process.env.NODE_ENV !== "production";
if (dev) {
require("dotenv").load();
}
const firebase = require("./firebase.server");
const Pageres = require("pageres");
const sharp = require("sharp");
const path = require("path");
const got = require("got");
const fs = require("fs");
const dumpFilenameTmpl = "<%= date %> <%= time %> <%= url %>";
const dest = path.join("/", "tmp");
const bucket = firebase.storage().bucket();
const dataRef = firebase.database().ref("data");
async function main() {
console.log("scraping");
const [filename] = await scrape("eb.dk");
console.log(filename);
const converted = await convert(filename);
console.log("Uploading dump");
const [file] = await bucket.upload(converted, {
destination: `dumps/${path.basename(converted)}`
});
console.log("Adding dump row");
const rowRef = await dataRef.child("dumps").push(file.metadata);
console.log(` * ${rowRef.key}`);
console.log("Detecting faces");
const detectedFaces = await detectFaces(converted);
console.log("Extracting faces");
const faceImages = await Promise.all(detectedFaces.map(extract(converted)));
console.log("Uploading images");
const results = await Promise.all(
faceImages.map(uploadAndCreateFace(rowRef.key))
);
console.log("check");
}
function uploadAndCreateFace(dumpKey) {
return face => {
console.log(` - Uploading ${face.filename}`);
return bucket
.upload(face.filename, {
destination: `faces/${dumpKey}/${path.basename(face.filename)}`
})
.then(([file]) => {
console.log(` - Creating row ${file.name}`);
const data = Object.assign({}, face, { image: file.metadata });
return dataRef.child(`faces/${dumpKey}`).push(data);
});
};
}
function extract(origFilename) {
return face => {
const filename = origFilename.replace(/\.jpg$/, `__${face.faceId}.jpg`);
return sharp(origFilename)
.extract(face.faceRectangle)
.toFile(filename)
.then(() => Object.assign({}, face, { filename }));
};
}
function scrape(url) {
return new Pageres({ delay: 2, filename: dumpFilenameTmpl })
.src(url, ["940x1024"])
.dest(dest)
.run()
.then(results => {
return results.map(res => `${dest}/${res.filename}`);
});
}
function convert(filename) {
const newFilename = filename.replace(/\.png$/, ".jpg");
return sharp(filename)
.jpeg({ quality: 60 })
.toFile(newFilename)
.then(data => {
return newFilename;
});
}
function detectFaces(filename) {
const read = fs.createReadStream(filename);
const base = "https://westeurope.api.cognitive.microsoft.com/face/v1.0";
return got
.post(
base +
"/detect?returnFaceAttributes=age,gender,headPose,smile,facialHair,glasses,emotion,hair,makeup,occlusion,accessories,blur,exposure,noise",
{
body: read,
headers: {
Accept: "application/json",
"Content-Type": "application/octet-stream",
"Ocp-Apim-Subscription-Key": process.env.AZURE_TOKEN
}
}
)
.then(res => {
return JSON.parse(res.body);
});
}
if (require.main === module) {
main().then(() => {
console.log("done");
});
} else {
module.exports = main;
}