-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAutomappingSchema.js
More file actions
146 lines (121 loc) · 4 KB
/
AutomappingSchema.js
File metadata and controls
146 lines (121 loc) · 4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
const fs = require('fs');
const fuzz = require('fuzzball');
const sourceFields = {
EmployeeID: 'INT',
FirstName: 'VARCHAR(50)',
LastName: 'VARCHAR(50)',
Email: 'VARCHAR(100)',
Age: 'INT',
DateOfBirth: 'DATE',
Salary: 'DECIMAL(10',
IsActive: 'BOOLEAN',
ProfilePicture: 'BLOB',
DepartmentID: 'INT',
JoinDate: 'DATETIME'
};
const destinationFields = {
EmpID: 'INT',
FiName: 'VARCHAR(50)',
FName: 'VARCHAR(50)',
Email: 'VARCHAR(100)',
Age: 'INT',
DOB: 'DATE',
Salary: 'DECIMAL(10)',
IsActive: 'BOOLEAN',
ProfilePicture: 'BLOB',
DepartmentID: 'INT',
JoinDate: 'DATETIME',
LName: 'VARCHAR(50)'
}
// Read and parse the file
function parseFieldsFromFile(filename) {
const sourceFields = {};
const destinationFields = {};
let currentSection = null;
const fileContent = fs.readFileSync(filename, 'utf-8');
const lines = fileContent.split('\n');
lines.forEach(line => {
line = line.trim();
if (line.startsWith('Source')) {
currentSection = 'source';
} else if (line.startsWith('Destination')) {
currentSection = 'destination';
} else if (line === '{' || line === '}') {
return; // Skip these lines
} else if (line.includes(' ')) {
const [field, fieldType] = line.split(' ');
const fieldName = field.replace(',', '');
const cleanFieldType = fieldType.replace(',', '');
if (currentSection === 'source') {
sourceFields[fieldName] = cleanFieldType;
} else if (currentSection === 'destination') {
destinationFields[fieldName] = cleanFieldType;
}
}
});
console.log(sourceFields);
return { sourceFields, destinationFields };
}
// Perform field matching
function matchFields(sourceFields, destinationFields) {
const output = {};
for (const [field1, type1] of Object.entries(sourceFields)) {
let bestMatches = [];
let bestScore = -1;
for (const [field2, type2] of Object.entries(destinationFields)) {
if (type1 === type2) {
const score = fuzz.partial_ratio(field1, field2);
if (score > bestScore) {
bestScore = score;
bestMatches = [field2];
} else if (score === bestScore) {
bestMatches.push(field2);
}
}
}
output[field1] = bestMatches;
}
return output;
}
// Refine matching using token sort ratio
function refineMatches(output) {
const output2 = {};
for (const [field1, matches] of Object.entries(output)) {
let refinedMatch = null;
let bestScore = -1;
for (const candidate of matches) {
const score = fuzz.token_sort_ratio(field1, candidate);
if (score > bestScore) {
bestScore = score;
refinedMatch = candidate;
}
}
if (refinedMatch) {
output2[field1] = [refinedMatch];
}
}
return output2;
}
// Main execution
function main() {
// Parser in case of Schema loaded from Separate File ( Sample.txt)
// const { sourceFields, destinationFields } = parseFieldsFromFile('Sample.txt');
/*
Fetch values from the RDS table / S3 / file and
map into object sourceFields : {} , destinationFields : {}
Check headers inn the file
*/
console.log("Source Fields:");
console.log(sourceFields);
console.log("\nDestination Fields:");
console.log(destinationFields);
const output = matchFields(sourceFields, destinationFields);
console.log("\nOutput using partial_ratio:");
console.log(output);
//const output2 = refineMatches(sourceFields, destinationFields);
const output2 = refineMatches(output);
console.log("\nAuto mapped Output using token_sort_ratio:");
console.log(output2);
}
// Run the script
main();