Skip to content

Commit 39cfeb5

Browse files
Copilotmathiasrw
andauthored
Escape XML characters in XLSXML export to fix #525 (#2296)
Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: mathiasrw <[email protected]> Co-authored-by: Mathias Wulff <[email protected]>
1 parent 09aeb13 commit 39cfeb5

File tree

2 files changed

+162
-7
lines changed

2 files changed

+162
-7
lines changed

src/832xlsxml.js

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,17 @@ alasql.into.XLSXML = function (filename, opts, data, columns, cb) {
3030
return res;
3131

3232
function toXML() {
33+
// Helper function to escape XML special characters
34+
function escapeXML(str) {
35+
if (str === null || str === undefined) return '';
36+
return String(str)
37+
.replace(/&/g, '&amp;')
38+
.replace(/</g, '&lt;')
39+
.replace(/>/g, '&gt;')
40+
.replace(/"/g, '&quot;')
41+
.replace(/'/g, '&apos;');
42+
}
43+
3344
var s1 =
3445
'<?xml version="1.0"?> \
3546
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet" \
@@ -196,9 +207,9 @@ alasql.into.XLSXML = function (filename, opts, data, columns, cb) {
196207
// Column title
197208
if (typeof column.title != 'undefined') {
198209
if (typeof column.title == 'function') {
199-
s3 += column.title(sheet, column, columnidx);
210+
s3 += escapeXML(column.title(sheet, column, columnidx));
200211
} else {
201-
s3 += column.title;
212+
s3 += escapeXML(column.title);
202213
}
203214
}
204215
s3 += '</Data></Cell>';
@@ -319,19 +330,19 @@ alasql.into.XLSXML = function (filename, opts, data, columns, cb) {
319330
s3 += '';
320331
} else if (typeof format != 'undefined') {
321332
if (typeof format == 'function') {
322-
s3 += format(value);
333+
s3 += escapeXML(format(value));
323334
} else if (typeof format == 'string') {
324-
s3 += value; // TODO - add string format
335+
s3 += escapeXML(value); // TODO - add string format
325336
} else {
326337
throw new Error('Unknown format type. Should be function or string');
327338
}
328339
} else {
329340
if (typeid == 'number' || typeid == 'date') {
330-
s3 += value.toString();
341+
s3 += escapeXML(value.toString());
331342
} else if (typeid == 'money') {
332-
s3 += (+value).toFixed(2);
343+
s3 += escapeXML((+value).toFixed(2));
333344
} else {
334-
s3 += value;
345+
s3 += escapeXML(value);
335346
}
336347
}
337348

test/test525.js

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
if (typeof exports === 'object') {
2+
var assert = require('assert');
3+
var alasql = require('..');
4+
var fs = require('fs');
5+
} else {
6+
__dirname = '.';
7+
}
8+
9+
describe('Test 525 - XLSXML XML character escaping', function () {
10+
if (typeof exports === 'object') {
11+
it('A) Export data with special XML characters', function (done) {
12+
var data = [
13+
{name: 'Test & Co', value: '<10'},
14+
{name: 'Quotes "test"', value: "It's > 5"},
15+
{name: 'Ampersand & less', value: '3 < 5 & 7 > 6'},
16+
{name: 'Normal text', value: 100},
17+
];
18+
19+
var outfile = __dirname + '/restest525.xls';
20+
alasql('SELECT * INTO XLSXML(?,{headers:true}) FROM ?', [outfile, data], function () {
21+
// Read the file and check if it's valid XML
22+
fs.readFile(outfile, 'utf8', function (err, content) {
23+
if (err) {
24+
done(err);
25+
return;
26+
}
27+
28+
// Check that special characters are properly escaped
29+
// & should be &amp;
30+
// < should be &lt;
31+
// > should be &gt;
32+
// " should be &quot; (in attributes)
33+
// ' should be &apos; or &#39; (in attributes)
34+
35+
// The file should not contain unescaped special characters in data cells
36+
// We should be able to parse it as XML
37+
try {
38+
// Check that file contains proper XML header
39+
assert(content.includes('<?xml version="1.0"?>'), 'Should have XML header');
40+
assert(
41+
content.includes('xmlns="urn:schemas-microsoft-com:office:spreadsheet"'),
42+
'Should have proper namespace'
43+
);
44+
45+
// Check that the content doesn't have raw unescaped characters in data
46+
// Extract data content between <Data> tags
47+
var dataMatches = content.match(/<Data[^>]*>([^<]*)<\/Data>/g);
48+
if (dataMatches) {
49+
dataMatches.forEach(function (match) {
50+
var innerText = match.replace(/<Data[^>]*>/, '').replace(/<\/Data>/, '');
51+
// If there's text content, it should not contain unescaped < > & unless they are entity references
52+
if (innerText && innerText.length > 0) {
53+
// Check for unescaped ampersands (not part of entity reference)
54+
var hasUnescapedAmp = /&(?!(amp|lt|gt|quot|apos|#\d+);)/.test(innerText);
55+
if (hasUnescapedAmp) {
56+
throw new Error('Found unescaped ampersand in: ' + innerText);
57+
}
58+
}
59+
});
60+
}
61+
62+
done();
63+
} catch (e) {
64+
done(e);
65+
}
66+
});
67+
});
68+
});
69+
70+
it('B) Verify exported data can be read back', function (done) {
71+
var data = [
72+
{name: 'Test & Co', value: '<10'},
73+
{name: 'Quotes "test"', value: "It's > 5"},
74+
];
75+
76+
var outfile = __dirname + '/restest525b.xls';
77+
alasql('SELECT * INTO XLSXML(?,{headers:true}) FROM ?', [outfile, data], function () {
78+
// Try to read it back using alasql's XML parser
79+
alasql('SELECT * FROM XML(?)', [outfile], function (res) {
80+
// The file should at least be parseable
81+
assert(res, 'Should be able to read the file');
82+
done();
83+
});
84+
});
85+
});
86+
87+
it('C) Test all five XML special characters', function (done) {
88+
var data = [
89+
{
90+
text: 'Contains & ampersand',
91+
description: 'First < second',
92+
},
93+
{
94+
text: 'Greater > than',
95+
description: 'Quote "in" text',
96+
},
97+
{
98+
text: "Apostrophe's here",
99+
description: 'All: < > & " \' together',
100+
},
101+
];
102+
103+
var outfile = __dirname + '/restest525c.xls';
104+
alasql('SELECT * INTO XLSXML(?,{headers:true}) FROM ?', [outfile, data], function () {
105+
fs.readFile(outfile, 'utf8', function (err, content) {
106+
if (err) {
107+
done(err);
108+
return;
109+
}
110+
111+
// File should be valid XML - try basic validation
112+
// Should not have unescaped < or > or & in data content
113+
var lines = content.split('\n');
114+
var inData = false;
115+
var errors = [];
116+
117+
lines.forEach(function (line, i) {
118+
// Simple check: if we're in a data cell, unescaped special chars are bad
119+
if (line.includes('<Data')) {
120+
var dataContent = line.match(/<Data[^>]*>(.+?)<\/Data>/);
121+
if (dataContent && dataContent[1]) {
122+
var text = dataContent[1];
123+
// Check for unescaped & (not followed by valid entity)
124+
if (/&(?!(amp|lt|gt|quot|apos|#\d+);)/.test(text)) {
125+
errors.push('Line ' + (i + 1) + ': unescaped & in: ' + text);
126+
}
127+
// Check for unescaped < or >
128+
if (/[<>]/.test(text)) {
129+
errors.push('Line ' + (i + 1) + ': unescaped < or > in: ' + text);
130+
}
131+
}
132+
}
133+
});
134+
135+
if (errors.length > 0) {
136+
done(new Error('XML validation errors:\n' + errors.join('\n')));
137+
} else {
138+
done();
139+
}
140+
});
141+
});
142+
});
143+
}
144+
});

0 commit comments

Comments
 (0)