-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgpt_preprocessor.js
More file actions
53 lines (42 loc) · 1.68 KB
/
gpt_preprocessor.js
File metadata and controls
53 lines (42 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import fs from 'fs';
import { saveFile } from './utils/file_ctrl.js';
import classifyFoods from './utils/gpt_classifier.js';
const FILE_PATH = 'output/crawl/details.json';
const OUTPUT_DIR = 'output/gpt';
const OUTPUT_FILE_NAME = 'gpt.json';
const LOG_FILE_NAME = `failure_log.json`
const STRIDE = 20;
const jsonFile = fs.readFileSync(FILE_PATH, 'UTF-8');
// for test
// const jsonData = JSON.parse(jsonFile).slice(0, 200);
const jsonData = JSON.parse(jsonFile);
console.log(`operation starts for ${jsonData.length} items`);
let failed = [];
for(let i=0; i<jsonData.length; i+=STRIDE) {
// End of Data
const end = i+STRIDE > jsonData.length ? jsonData.length : i+STRIDE;
// assign food lists as array
let foodList = [];
for(let j=i; j<end; j++) {
foodList.push(jsonData[j].food === '' ? '한식' : jsonData[j].food.replaceAll('..|...', ''));
}
// console.log("input", foodList.length);
// classify
const foodTagList = await classifyFoods(foodList);
// console.log("res", foodTagList.length);
// Error occurred
if(foodList.length !== foodTagList.length) {
console.log(`[${i} - ${end}] failed. generated ${foodTagList.length} items for ${foodList.length}`);
failed.push(i);
continue;
}
// response as JS object
for(let j=0; j<foodTagList.length; j++) {
jsonData[i+j].foodTags = foodTagList[j].split(':')[1].split(',');
}
if(end % 100 === 0) console.log(`[${end}/${jsonData.length}] DONE`);
}
console.log(`Finished : total ${jsonData.length} items, ${failed.length * STRIDE} items failed`);
// save data
saveFile(OUTPUT_DIR, OUTPUT_FILE_NAME, jsonData);
saveFile(OUTPUT_DIR, LOG_FILE_NAME, failed);