Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions lib/reader.js
Original file line number Diff line number Diff line change
Expand Up @@ -481,8 +481,17 @@ function decodeDataPageV2(cursor, header, opts) {
}

function decodeSchema(schemaElements) {
const {schema} = decodeSchemaWithLimit(
schemaElements,
schemaElements.length);
return schema;
}

function decodeSchemaWithLimit(schemaElements, elementLimit) {
let schema = {};
for (let idx = 0; idx < schemaElements.length; ) {
let elementsRead = 0;
let totalElementsRead = 0;
for (let idx = 0; idx < schemaElements.length && elementsRead < elementLimit; ) {
const schemaElement = schemaElements[idx];

let repetitionType = parquet_util.getThriftEnum(
Expand All @@ -503,12 +512,16 @@ function decodeSchema(schemaElements) {
};

if (schemaElement.num_children > 0) {
const children = decodeSchemaWithLimit(
schemaElements.slice(idx + 1),
schemaElement.num_children);
schema[schemaElement.name] = {
optional: optional,
repeated: repeated,
fields: decodeSchema(
schemaElements.slice(idx + 1, idx + 1 + schemaElement.num_children))
fields: children.schema
};
idx += children.totalElementsRead;
totalElementsRead += children.totalElementsRead;
} else {
let logicalType = parquet_util.getThriftEnum(
parquet_thrift.Type,
Expand All @@ -528,10 +541,12 @@ function decodeSchema(schemaElements) {
};
}

idx += (schemaElement.num_children || 0) + 1;
elementsRead++;
totalElementsRead++;
idx++;
}

return schema;
return {schema, totalElementsRead};
}

module.exports = {
Expand Down
72 changes: 62 additions & 10 deletions test/integration.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ function mkTestSchema(opts) {
fields: {
quantity: { type: 'INT64', repeated: true },
warehouse: { type: 'UTF8', compression: opts.compression },
flags: {
optional: true,
fields: {
heavy: { type: 'BOOLEAN', optional: true },
bulky: { type: 'BOOLEAN', optional: true },
},
},
}
},
colour: { type: 'UTF8', repeated: true, compression: opts.compression },
Expand All @@ -44,8 +51,8 @@ function mkTestRows(opts) {
finger: "FNORD",
inter: { months: 42, days: 23, milliseconds: 777 },
stock: [
{ quantity: 10, warehouse: "A" },
{ quantity: 20, warehouse: "B" }
{ quantity: 10, warehouse: "A", flags: { heavy: true } },
{ quantity: 20, warehouse: "B", flags: { heavy: true } }
],
colour: [ 'green', 'red' ]
});
Expand Down Expand Up @@ -74,8 +81,8 @@ function mkTestRows(opts) {
finger: "FNORD",
inter: { months: 42, days: 23, milliseconds: 777 },
stock: [
{ quantity: 42, warehouse: "f" },
{ quantity: 20, warehouse: "x" }
{ quantity: 42, warehouse: "f", flags: { heavy: true, bulky: true } },
{ quantity: 20, warehouse: "x", flags: { heavy: true } }
],
colour: [ 'green', 'brown' ],
meta_json: { expected_ship_date: TEST_VTIME }
Expand Down Expand Up @@ -116,7 +123,7 @@ async function readTestFile() {
assert.deepEqual(reader.getMetadata(), { "myuid": "420", "fnord": "dronf" })

let schema = reader.getSchema();
assert.equal(schema.fieldList.length, 12);
assert.equal(schema.fieldList.length, 15);
assert(schema.fields.name);
assert(schema.fields.stock);
assert(schema.fields.stock.fields.quantity);
Expand Down Expand Up @@ -150,7 +157,7 @@ async function readTestFile() {
assert.equal(c.rLevelMax, 1);
assert.equal(c.dLevelMax, 1);
assert.equal(!!c.isNested, true);
assert.equal(c.fieldCount, 2);
assert.equal(c.fieldCount, 3);
}

{
Expand Down Expand Up @@ -183,6 +190,51 @@ async function readTestFile() {
assert.equal(c.fieldCount, undefined);
}

{
const c = schema.fields.stock.fields.flags;
assert.equal(c.name, 'flags');
assert.equal(c.primitiveType, undefined);
assert.equal(c.originalType, undefined);
assert.deepEqual(c.path, ['stock', 'flags']);
assert.equal(c.repetitionType, 'OPTIONAL');
assert.equal(c.encoding, undefined);
assert.equal(c.compression, undefined);
assert.equal(c.rLevelMax, 1);
assert.equal(c.dLevelMax, 2);
assert.equal(!!c.isNested, true);
assert.equal(c.fieldCount, 2);
}

{
const c = schema.fields.stock.fields.flags.fields.heavy;
assert.equal(c.name, 'heavy');
assert.equal(c.primitiveType, 'BOOLEAN');
assert.equal(c.originalType, undefined);
assert.deepEqual(c.path, ['stock', 'flags', 'heavy']);
assert.equal(c.repetitionType, 'OPTIONAL');
assert.equal(c.encoding, 'PLAIN');
assert.equal(c.compression, 'UNCOMPRESSED');
assert.equal(c.rLevelMax, 1);
assert.equal(c.dLevelMax, 3);
assert.equal(!!c.isNested, false);
assert.equal(c.fieldCount, undefined);
}

{
const c = schema.fields.stock.fields.flags.fields.bulky;
assert.equal(c.name, 'bulky');
assert.equal(c.primitiveType, 'BOOLEAN');
assert.equal(c.originalType, undefined);
assert.deepEqual(c.path, ['stock', 'flags', 'bulky']);
assert.equal(c.repetitionType, 'OPTIONAL');
assert.equal(c.encoding, 'PLAIN');
assert.equal(c.compression, 'UNCOMPRESSED');
assert.equal(c.rLevelMax, 1);
assert.equal(c.dLevelMax, 3);
assert.equal(!!c.isNested, false);
assert.equal(c.fieldCount, undefined);
}

{
const c = schema.fields.price;
assert.equal(c.name, 'price');
Expand Down Expand Up @@ -210,8 +262,8 @@ async function readTestFile() {
finger: Buffer.from("FNORD"),
inter: { months: 42, days: 23, milliseconds: 777 },
stock: [
{ quantity: [10], warehouse: "A" },
{ quantity: [20], warehouse: "B" }
{ quantity: [10], warehouse: "A", flags: { heavy: true } },
{ quantity: [20], warehouse: "B", flags: { heavy: true } }
],
colour: [ 'green', 'red' ]
});
Expand All @@ -238,8 +290,8 @@ async function readTestFile() {
finger: Buffer.from("FNORD"),
inter: { months: 42, days: 23, milliseconds: 777 },
stock: [
{ quantity: [42], warehouse: "f" },
{ quantity: [20], warehouse: "x" }
{ quantity: [42], warehouse: "f", flags: { heavy: true, bulky: true } },
{ quantity: [20], warehouse: "x", flags: { heavy: true } }
],
colour: [ 'green', 'brown' ],
meta_json: { expected_ship_date: TEST_VTIME }
Expand Down
151 changes: 151 additions & 0 deletions test/schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -467,4 +467,155 @@ describe('ParquetSchema', function() {
}
});

it('should assign correct defaults in a nested schema with nested fields', function() {
var schema = new parquet.ParquetSchema({
name: { type: 'UTF8' },
stock: {
repeated: true,
fields: {
quantity: { type: 'INT64', optional: true },
warehouse: { type: 'UTF8' },
flags: {
optional: true,
fields: {
heavy: { type: 'BOOLEAN', optional: true },
bulky: { type: 'BOOLEAN', optional: true },
},
},
}
},
price: { type: 'DOUBLE' },
});

assert.equal(schema.fieldList.length, 8);
assert(schema.fields.name);
assert(schema.fields.stock);
assert(schema.fields.stock.fields.quantity);
assert(schema.fields.stock.fields.warehouse);
assert(schema.fields.stock.fields.flags);
assert(schema.fields.stock.fields.flags.fields.heavy);
assert(schema.fields.stock.fields.flags.fields.bulky);
assert(schema.fields.price);

{
const c = schema.fields.name;
assert.equal(c.name, 'name');
assert.equal(c.primitiveType, 'BYTE_ARRAY');
assert.equal(c.originalType, 'UTF8');
assert.deepEqual(c.path, ['name']);
assert.equal(c.repetitionType, 'REQUIRED');
assert.equal(c.encoding, 'PLAIN');
assert.equal(c.compression, 'UNCOMPRESSED');
assert.equal(c.rLevelMax, 0);
assert.equal(c.dLevelMax, 0);
assert.equal(!!c.isNested, false);
assert.equal(c.fieldCount, undefined);
}

{
const c = schema.fields.stock;
assert.equal(c.name, 'stock');
assert.equal(c.primitiveType, undefined);
assert.equal(c.originalType, undefined);
assert.deepEqual(c.path, ['stock']);
assert.equal(c.repetitionType, 'REPEATED');
assert.equal(c.encoding, undefined);
assert.equal(c.compression, undefined);
assert.equal(c.rLevelMax, 1);
assert.equal(c.dLevelMax, 1);
assert.equal(!!c.isNested, true);
assert.equal(c.fieldCount, 3);
}

{
const c = schema.fields.stock.fields.quantity;
assert.equal(c.name, 'quantity');
assert.equal(c.primitiveType, 'INT64');
assert.equal(c.originalType, undefined);
assert.deepEqual(c.path, ['stock', 'quantity']);
assert.equal(c.repetitionType, 'OPTIONAL');
assert.equal(c.encoding, 'PLAIN');
assert.equal(c.compression, 'UNCOMPRESSED');
assert.equal(c.rLevelMax, 1);
assert.equal(c.dLevelMax, 2);
assert.equal(!!c.isNested, false);
assert.equal(c.fieldCount, undefined);
}

{
const c = schema.fields.stock.fields.warehouse;
assert.equal(c.name, 'warehouse');
assert.equal(c.primitiveType, 'BYTE_ARRAY');
assert.equal(c.originalType, 'UTF8');
assert.deepEqual(c.path, ['stock', 'warehouse']);
assert.equal(c.repetitionType, 'REQUIRED');
assert.equal(c.encoding, 'PLAIN');
assert.equal(c.compression, 'UNCOMPRESSED');
assert.equal(c.rLevelMax, 1);
assert.equal(c.dLevelMax, 1);
assert.equal(!!c.isNested, false);
assert.equal(c.fieldCount, undefined);
}

{
const c = schema.fields.stock.fields.flags;
assert.equal(c.name, 'flags');
assert.equal(c.primitiveType, undefined);
assert.equal(c.originalType, undefined);
assert.deepEqual(c.path, ['stock', 'flags']);
assert.equal(c.repetitionType, 'OPTIONAL');
assert.equal(c.encoding, undefined);
assert.equal(c.compression, undefined);
assert.equal(c.rLevelMax, 1);
assert.equal(c.dLevelMax, 2);
assert.equal(!!c.isNested, true);
assert.equal(c.fieldCount, 2);
}

{
const c = schema.fields.stock.fields.flags.fields.heavy;
assert.equal(c.name, 'heavy');
assert.equal(c.primitiveType, 'BOOLEAN');
assert.equal(c.originalType, undefined);
assert.deepEqual(c.path, ['stock', 'flags', 'heavy']);
assert.equal(c.repetitionType, 'OPTIONAL');
assert.equal(c.encoding, 'PLAIN');
assert.equal(c.compression, 'UNCOMPRESSED');
assert.equal(c.rLevelMax, 1);
assert.equal(c.dLevelMax, 3);
assert.equal(!!c.isNested, false);
assert.equal(c.fieldCount, undefined);
}

{
const c = schema.fields.stock.fields.flags.fields.bulky;
assert.equal(c.name, 'bulky');
assert.equal(c.primitiveType, 'BOOLEAN');
assert.equal(c.originalType, undefined);
assert.deepEqual(c.path, ['stock', 'flags', 'bulky']);
assert.equal(c.repetitionType, 'OPTIONAL');
assert.equal(c.encoding, 'PLAIN');
assert.equal(c.compression, 'UNCOMPRESSED');
assert.equal(c.rLevelMax, 1);
assert.equal(c.dLevelMax, 3);
assert.equal(!!c.isNested, false);
assert.equal(c.fieldCount, undefined);
}

{
const c = schema.fields.price;
assert.equal(c.name, 'price');
assert.equal(c.primitiveType, 'DOUBLE');
assert.equal(c.originalType, undefined);
assert.deepEqual(c.path, ['price']);
assert.equal(c.repetitionType, 'REQUIRED');
assert.equal(c.encoding, 'PLAIN');
assert.equal(c.compression, 'UNCOMPRESSED');
assert.equal(c.rLevelMax, 0);
assert.equal(c.dLevelMax, 0);
assert.equal(!!c.isNested, false);
assert.equal(c.fieldCount, undefined);
}
});

});