|
| 1 | +if (typeof exports === 'object') { |
| 2 | + var assert = require('assert'); |
| 3 | + var alasql = require('..'); |
| 4 | + var fs = require('fs'); |
| 5 | +} else { |
| 6 | + __dirname = '.'; |
| 7 | +} |
| 8 | + |
| 9 | +describe('Test 525 - XLSXML XML character escaping', function () { |
| 10 | + if (typeof exports === 'object') { |
| 11 | + it('A) Export data with special XML characters', function (done) { |
| 12 | + var data = [ |
| 13 | + {name: 'Test & Co', value: '<10'}, |
| 14 | + {name: 'Quotes "test"', value: "It's > 5"}, |
| 15 | + {name: 'Ampersand & less', value: '3 < 5 & 7 > 6'}, |
| 16 | + {name: 'Normal text', value: 100}, |
| 17 | + ]; |
| 18 | + |
| 19 | + var outfile = __dirname + '/restest525.xls'; |
| 20 | + alasql('SELECT * INTO XLSXML(?,{headers:true}) FROM ?', [outfile, data], function () { |
| 21 | + // Read the file and check if it's valid XML |
| 22 | + fs.readFile(outfile, 'utf8', function (err, content) { |
| 23 | + if (err) { |
| 24 | + done(err); |
| 25 | + return; |
| 26 | + } |
| 27 | + |
| 28 | + // Check that special characters are properly escaped |
| 29 | + // & should be & |
| 30 | + // < should be < |
| 31 | + // > should be > |
| 32 | + // " should be " (in attributes) |
| 33 | + // ' should be ' or ' (in attributes) |
| 34 | + |
| 35 | + // The file should not contain unescaped special characters in data cells |
| 36 | + // We should be able to parse it as XML |
| 37 | + try { |
| 38 | + // Check that file contains proper XML header |
| 39 | + assert(content.includes('<?xml version="1.0"?>'), 'Should have XML header'); |
| 40 | + assert( |
| 41 | + content.includes('xmlns="urn:schemas-microsoft-com:office:spreadsheet"'), |
| 42 | + 'Should have proper namespace' |
| 43 | + ); |
| 44 | + |
| 45 | + // Check that the content doesn't have raw unescaped characters in data |
| 46 | + // Extract data content between <Data> tags |
| 47 | + var dataMatches = content.match(/<Data[^>]*>([^<]*)<\/Data>/g); |
| 48 | + if (dataMatches) { |
| 49 | + dataMatches.forEach(function (match) { |
| 50 | + var innerText = match.replace(/<Data[^>]*>/, '').replace(/<\/Data>/, ''); |
| 51 | + // If there's text content, it should not contain unescaped < > & unless they are entity references |
| 52 | + if (innerText && innerText.length > 0) { |
| 53 | + // Check for unescaped ampersands (not part of entity reference) |
| 54 | + var hasUnescapedAmp = /&(?!(amp|lt|gt|quot|apos|#\d+);)/.test(innerText); |
| 55 | + if (hasUnescapedAmp) { |
| 56 | + throw new Error('Found unescaped ampersand in: ' + innerText); |
| 57 | + } |
| 58 | + } |
| 59 | + }); |
| 60 | + } |
| 61 | + |
| 62 | + done(); |
| 63 | + } catch (e) { |
| 64 | + done(e); |
| 65 | + } |
| 66 | + }); |
| 67 | + }); |
| 68 | + }); |
| 69 | + |
| 70 | + it('B) Verify exported data can be read back', function (done) { |
| 71 | + var data = [ |
| 72 | + {name: 'Test & Co', value: '<10'}, |
| 73 | + {name: 'Quotes "test"', value: "It's > 5"}, |
| 74 | + ]; |
| 75 | + |
| 76 | + var outfile = __dirname + '/restest525b.xls'; |
| 77 | + alasql('SELECT * INTO XLSXML(?,{headers:true}) FROM ?', [outfile, data], function () { |
| 78 | + // Try to read it back using alasql's XML parser |
| 79 | + alasql('SELECT * FROM XML(?)', [outfile], function (res) { |
| 80 | + // The file should at least be parseable |
| 81 | + assert(res, 'Should be able to read the file'); |
| 82 | + done(); |
| 83 | + }); |
| 84 | + }); |
| 85 | + }); |
| 86 | + |
| 87 | + it('C) Test all five XML special characters', function (done) { |
| 88 | + var data = [ |
| 89 | + { |
| 90 | + text: 'Contains & ampersand', |
| 91 | + description: 'First < second', |
| 92 | + }, |
| 93 | + { |
| 94 | + text: 'Greater > than', |
| 95 | + description: 'Quote "in" text', |
| 96 | + }, |
| 97 | + { |
| 98 | + text: "Apostrophe's here", |
| 99 | + description: 'All: < > & " \' together', |
| 100 | + }, |
| 101 | + ]; |
| 102 | + |
| 103 | + var outfile = __dirname + '/restest525c.xls'; |
| 104 | + alasql('SELECT * INTO XLSXML(?,{headers:true}) FROM ?', [outfile, data], function () { |
| 105 | + fs.readFile(outfile, 'utf8', function (err, content) { |
| 106 | + if (err) { |
| 107 | + done(err); |
| 108 | + return; |
| 109 | + } |
| 110 | + |
| 111 | + // File should be valid XML - try basic validation |
| 112 | + // Should not have unescaped < or > or & in data content |
| 113 | + var lines = content.split('\n'); |
| 114 | + var inData = false; |
| 115 | + var errors = []; |
| 116 | + |
| 117 | + lines.forEach(function (line, i) { |
| 118 | + // Simple check: if we're in a data cell, unescaped special chars are bad |
| 119 | + if (line.includes('<Data')) { |
| 120 | + var dataContent = line.match(/<Data[^>]*>(.+?)<\/Data>/); |
| 121 | + if (dataContent && dataContent[1]) { |
| 122 | + var text = dataContent[1]; |
| 123 | + // Check for unescaped & (not followed by valid entity) |
| 124 | + if (/&(?!(amp|lt|gt|quot|apos|#\d+);)/.test(text)) { |
| 125 | + errors.push('Line ' + (i + 1) + ': unescaped & in: ' + text); |
| 126 | + } |
| 127 | + // Check for unescaped < or > |
| 128 | + if (/[<>]/.test(text)) { |
| 129 | + errors.push('Line ' + (i + 1) + ': unescaped < or > in: ' + text); |
| 130 | + } |
| 131 | + } |
| 132 | + } |
| 133 | + }); |
| 134 | + |
| 135 | + if (errors.length > 0) { |
| 136 | + done(new Error('XML validation errors:\n' + errors.join('\n'))); |
| 137 | + } else { |
| 138 | + done(); |
| 139 | + } |
| 140 | + }); |
| 141 | + }); |
| 142 | + }); |
| 143 | + } |
| 144 | +}); |
0 commit comments