diff --git a/lib/reader.js b/lib/reader.js index 43e78d9d..e6f6ec11 100644 --- a/lib/reader.js +++ b/lib/reader.js @@ -481,8 +481,17 @@ function decodeDataPageV2(cursor, header, opts) { } function decodeSchema(schemaElements) { + const {schema} = decodeSchemaWithLimit( + schemaElements, + schemaElements.length); + return schema; +} + +function decodeSchemaWithLimit(schemaElements, elementLimit) { let schema = {}; - for (let idx = 0; idx < schemaElements.length; ) { + let elementsRead = 0; + let totalElementsRead = 0; + for (let idx = 0; idx < schemaElements.length && elementsRead < elementLimit; ) { const schemaElement = schemaElements[idx]; let repetitionType = parquet_util.getThriftEnum( @@ -503,12 +512,16 @@ function decodeSchema(schemaElements) { }; if (schemaElement.num_children > 0) { + const children = decodeSchemaWithLimit( + schemaElements.slice(idx + 1), + schemaElement.num_children); schema[schemaElement.name] = { optional: optional, repeated: repeated, - fields: decodeSchema( - schemaElements.slice(idx + 1, idx + 1 + schemaElement.num_children)) + fields: children.schema }; + idx += children.totalElementsRead; + totalElementsRead += children.totalElementsRead; } else { let logicalType = parquet_util.getThriftEnum( parquet_thrift.Type, @@ -528,10 +541,12 @@ function decodeSchema(schemaElements) { }; } - idx += (schemaElement.num_children || 0) + 1; + elementsRead++; + totalElementsRead++; + idx++; } - return schema; + return {schema, totalElementsRead}; } module.exports = { diff --git a/test/integration.js b/test/integration.js index 5ed21abd..514e437d 100644 --- a/test/integration.js +++ b/test/integration.js @@ -24,6 +24,13 @@ function mkTestSchema(opts) { fields: { quantity: { type: 'INT64', repeated: true }, warehouse: { type: 'UTF8', compression: opts.compression }, + flags: { + optional: true, + fields: { + heavy: { type: 'BOOLEAN', optional: true }, + bulky: { type: 'BOOLEAN', optional: true }, + }, + }, } }, colour: { type: 'UTF8', repeated: true, compression: opts.compression }, @@ -44,8 +51,8 @@ function mkTestRows(opts) { finger: "FNORD", inter: { months: 42, days: 23, milliseconds: 777 }, stock: [ - { quantity: 10, warehouse: "A" }, - { quantity: 20, warehouse: "B" } + { quantity: 10, warehouse: "A", flags: { heavy: true } }, + { quantity: 20, warehouse: "B", flags: { heavy: true } } ], colour: [ 'green', 'red' ] }); @@ -74,8 +81,8 @@ function mkTestRows(opts) { finger: "FNORD", inter: { months: 42, days: 23, milliseconds: 777 }, stock: [ - { quantity: 42, warehouse: "f" }, - { quantity: 20, warehouse: "x" } + { quantity: 42, warehouse: "f", flags: { heavy: true, bulky: true } }, + { quantity: 20, warehouse: "x", flags: { heavy: true } } ], colour: [ 'green', 'brown' ], meta_json: { expected_ship_date: TEST_VTIME } @@ -116,7 +123,7 @@ async function readTestFile() { assert.deepEqual(reader.getMetadata(), { "myuid": "420", "fnord": "dronf" }) let schema = reader.getSchema(); - assert.equal(schema.fieldList.length, 12); + assert.equal(schema.fieldList.length, 15); assert(schema.fields.name); assert(schema.fields.stock); assert(schema.fields.stock.fields.quantity); @@ -150,7 +157,7 @@ async function readTestFile() { assert.equal(c.rLevelMax, 1); assert.equal(c.dLevelMax, 1); assert.equal(!!c.isNested, true); - assert.equal(c.fieldCount, 2); + assert.equal(c.fieldCount, 3); } { @@ -183,6 +190,51 @@ async function readTestFile() { assert.equal(c.fieldCount, undefined); } + { + const c = schema.fields.stock.fields.flags; + assert.equal(c.name, 'flags'); + assert.equal(c.primitiveType, undefined); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['stock', 'flags']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, undefined); + assert.equal(c.compression, undefined); + assert.equal(c.rLevelMax, 1); + assert.equal(c.dLevelMax, 2); + assert.equal(!!c.isNested, true); + assert.equal(c.fieldCount, 2); + } + + { + const c = schema.fields.stock.fields.flags.fields.heavy; + assert.equal(c.name, 'heavy'); + assert.equal(c.primitiveType, 'BOOLEAN'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['stock', 'flags', 'heavy']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 1); + assert.equal(c.dLevelMax, 3); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + } + + { + const c = schema.fields.stock.fields.flags.fields.bulky; + assert.equal(c.name, 'bulky'); + assert.equal(c.primitiveType, 'BOOLEAN'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['stock', 'flags', 'bulky']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 1); + assert.equal(c.dLevelMax, 3); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + } + { const c = schema.fields.price; assert.equal(c.name, 'price'); @@ -210,8 +262,8 @@ async function readTestFile() { finger: Buffer.from("FNORD"), inter: { months: 42, days: 23, milliseconds: 777 }, stock: [ - { quantity: [10], warehouse: "A" }, - { quantity: [20], warehouse: "B" } + { quantity: [10], warehouse: "A", flags: { heavy: true } }, + { quantity: [20], warehouse: "B", flags: { heavy: true } } ], colour: [ 'green', 'red' ] }); @@ -238,8 +290,8 @@ async function readTestFile() { finger: Buffer.from("FNORD"), inter: { months: 42, days: 23, milliseconds: 777 }, stock: [ - { quantity: [42], warehouse: "f" }, - { quantity: [20], warehouse: "x" } + { quantity: [42], warehouse: "f", flags: { heavy: true, bulky: true } }, + { quantity: [20], warehouse: "x", flags: { heavy: true } } ], colour: [ 'green', 'brown' ], meta_json: { expected_ship_date: TEST_VTIME } diff --git a/test/schema.js b/test/schema.js index 7b290711..d846e491 100644 --- a/test/schema.js +++ b/test/schema.js @@ -467,4 +467,155 @@ describe('ParquetSchema', function() { } }); + it('should assign correct defaults in a nested schema with nested fields', function() { + var schema = new parquet.ParquetSchema({ + name: { type: 'UTF8' }, + stock: { + repeated: true, + fields: { + quantity: { type: 'INT64', optional: true }, + warehouse: { type: 'UTF8' }, + flags: { + optional: true, + fields: { + heavy: { type: 'BOOLEAN', optional: true }, + bulky: { type: 'BOOLEAN', optional: true }, + }, + }, + } + }, + price: { type: 'DOUBLE' }, + }); + + assert.equal(schema.fieldList.length, 8); + assert(schema.fields.name); + assert(schema.fields.stock); + assert(schema.fields.stock.fields.quantity); + assert(schema.fields.stock.fields.warehouse); + assert(schema.fields.stock.fields.flags); + assert(schema.fields.stock.fields.flags.fields.heavy); + assert(schema.fields.stock.fields.flags.fields.bulky); + assert(schema.fields.price); + + { + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, 'BYTE_ARRAY'); + assert.equal(c.originalType, 'UTF8'); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'REQUIRED'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 0); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + } + + { + const c = schema.fields.stock; + assert.equal(c.name, 'stock'); + assert.equal(c.primitiveType, undefined); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['stock']); + assert.equal(c.repetitionType, 'REPEATED'); + assert.equal(c.encoding, undefined); + assert.equal(c.compression, undefined); + assert.equal(c.rLevelMax, 1); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, true); + assert.equal(c.fieldCount, 3); + } + + { + const c = schema.fields.stock.fields.quantity; + assert.equal(c.name, 'quantity'); + assert.equal(c.primitiveType, 'INT64'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['stock', 'quantity']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 1); + assert.equal(c.dLevelMax, 2); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + } + + { + const c = schema.fields.stock.fields.warehouse; + assert.equal(c.name, 'warehouse'); + assert.equal(c.primitiveType, 'BYTE_ARRAY'); + assert.equal(c.originalType, 'UTF8'); + assert.deepEqual(c.path, ['stock', 'warehouse']); + assert.equal(c.repetitionType, 'REQUIRED'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 1); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + } + + { + const c = schema.fields.stock.fields.flags; + assert.equal(c.name, 'flags'); + assert.equal(c.primitiveType, undefined); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['stock', 'flags']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, undefined); + assert.equal(c.compression, undefined); + assert.equal(c.rLevelMax, 1); + assert.equal(c.dLevelMax, 2); + assert.equal(!!c.isNested, true); + assert.equal(c.fieldCount, 2); + } + + { + const c = schema.fields.stock.fields.flags.fields.heavy; + assert.equal(c.name, 'heavy'); + assert.equal(c.primitiveType, 'BOOLEAN'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['stock', 'flags', 'heavy']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 1); + assert.equal(c.dLevelMax, 3); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + } + + { + const c = schema.fields.stock.fields.flags.fields.bulky; + assert.equal(c.name, 'bulky'); + assert.equal(c.primitiveType, 'BOOLEAN'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['stock', 'flags', 'bulky']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 1); + assert.equal(c.dLevelMax, 3); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + } + + { + const c = schema.fields.price; + assert.equal(c.name, 'price'); + assert.equal(c.primitiveType, 'DOUBLE'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['price']); + assert.equal(c.repetitionType, 'REQUIRED'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 0); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + } + }); + });