|
| 1 | +'use strict'; |
| 2 | +const chai = require('chai'); |
| 3 | +const assert = chai.assert; |
| 4 | +const parquet = require('../parquet.js'); |
| 5 | +const child_process = require('child_process'); |
| 6 | + |
| 7 | +// helper function that runs parquet-tools dump inside a docker container and returns the stdout |
| 8 | +async function readParquetMr(file) { |
| 9 | + return new Promise( (resolve, reject) => { |
| 10 | + const dockerCmd = `docker run -v \${PWD}:/home nathanhowell/parquet-tools dump --debug /home/${file}`; |
| 11 | + child_process.exec(dockerCmd, (err, stdout, stderr) => { |
| 12 | + if (err || stderr) { |
| 13 | + reject(err || stderr); |
| 14 | + } else { |
| 15 | + resolve(stdout); |
| 16 | + } |
| 17 | + }); |
| 18 | + }); |
| 19 | +} |
| 20 | + |
| 21 | +describe('Parquet-mr', function() { |
| 22 | + it('should read a simple parquetjs file', async function() { |
| 23 | + var schema = new parquet.ParquetSchema({ |
| 24 | + name: { type: 'UTF8' }, |
| 25 | + quantity: { type: 'INT64' }, |
| 26 | + price: { type: 'DOUBLE' }, |
| 27 | + }); |
| 28 | + |
| 29 | + const rows = [ |
| 30 | + { name: 'apples', quantity: 10, price: 2.6 }, |
| 31 | + { name: 'oranges', quantity: 20, price: 2.7}, |
| 32 | + { name: 'kiwi', price: 4.2, quantity: 4}, |
| 33 | + ]; |
| 34 | + |
| 35 | + let writer = await parquet.ParquetWriter.openFile(schema, 'test-mr.parquet'); |
| 36 | + |
| 37 | + for (let row of rows) { |
| 38 | + await writer.appendRow(row); |
| 39 | + } |
| 40 | + |
| 41 | + await writer.close(); |
| 42 | + |
| 43 | + const result = await readParquetMr('test-mr.parquet'); |
| 44 | + assert.equal(result,'row group 0 \n--------------------------------------------------------------------------------\nname: BINARY UNCOMPRESSED DO:0 FPO:4 SZ:51/51/1.00 VC:3 ENC:PLAIN,RLE\nquantity: INT64 UNCOMPRESSED DO:0 FPO:79 SZ:46/46/1.00 VC:3 ENC:PLAIN,RLE\nprice: DOUBLE UNCOMPRESSED DO:0 FPO:154 SZ:46/46/1.00 VC:3 ENC:PLAIN,RLE\n\n name TV=3 RL=0 DL=0\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:29 VC:3\n\n quantity TV=3 RL=0 DL=0\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:24 VC:3\n\n price TV=3 RL=0 DL=0\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:24 VC:3\n\nBINARY name \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 3 *** \nvalue 1: R:0 D:0 V:apples\nvalue 2: R:0 D:0 V:oranges\nvalue 3: R:0 D:0 V:kiwi\n\nINT64 quantity \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 3 *** \nvalue 1: R:0 D:0 V:10\nvalue 2: R:0 D:0 V:20\nvalue 3: R:0 D:0 V:4\n\nDOUBLE price \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 3 *** \nvalue 1: R:0 D:0 V:2.6\nvalue 2: R:0 D:0 V:2.7\nvalue 3: R:0 D:0 V:4.2\n'); |
| 45 | + }); |
| 46 | + |
| 47 | + it('should read a nested field', async function() { |
| 48 | + var schema = new parquet.ParquetSchema({ |
| 49 | + fruit: { |
| 50 | + fields: { |
| 51 | + name: { type: 'UTF8'}, |
| 52 | + quantity: { type: 'INT32'} |
| 53 | + } |
| 54 | + } |
| 55 | + }); |
| 56 | + |
| 57 | + let writer = await parquet.ParquetWriter.openFile(schema, 'test2-mr.parquet'); |
| 58 | + |
| 59 | + await writer.appendRow({ |
| 60 | + fruit: { |
| 61 | + name: 'apple', |
| 62 | + quantity: 9 |
| 63 | + } |
| 64 | + }); |
| 65 | + |
| 66 | + await writer.close(); |
| 67 | + |
| 68 | + const result = await readParquetMr('test2-mr.parquet'); |
| 69 | + assert.equal(result,'row group 0 \n--------------------------------------------------------------------------------\nfruit: \n.name: BINARY UNCOMPRESSED DO:0 FPO:4 SZ:31/31/1.00 VC:1 ENC:PLAIN,RLE\n.quantity: INT32 UNCOMPRESSED DO:0 FPO:65 SZ:26/26/1.00 VC:1 ENC:PLAIN,RLE\n\n fruit.name TV=1 RL=0 DL=0\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:9 VC:1\n\n fruit.quantity TV=1 RL=0 DL=0\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:4 VC:1\n\nBINARY fruit.name \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 1 *** \nvalue 1: R:0 D:0 V:apple\n\nINT32 fruit.quantity \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 1 *** \nvalue 1: R:0 D:0 V:9\n'); |
| 70 | + }); |
| 71 | + |
| 72 | + it('should read a parquetjs file with optional value', async function() { |
| 73 | + var schema = new parquet.ParquetSchema({ |
| 74 | + name: { type: 'UTF8', optional: true } |
| 75 | + }); |
| 76 | + |
| 77 | + const rows = [ |
| 78 | + { name: 'apples' }, |
| 79 | + { name: 'oranges' }, |
| 80 | + { name: 'kiwi' }, |
| 81 | + ]; |
| 82 | + |
| 83 | + let writer = await parquet.ParquetWriter.openFile(schema, 'test3-mr.parquet'); |
| 84 | + for (let row of rows) { |
| 85 | + await writer.appendRow(row); |
| 86 | + } |
| 87 | + |
| 88 | + await writer.close(); |
| 89 | + |
| 90 | + const result = await readParquetMr('test3-mr.parquet'); |
| 91 | + assert.equal(result,'row group 0 \n--------------------------------------------------------------------------------\nname: BINARY UNCOMPRESSED DO:0 FPO:4 SZ:53/53/1.00 VC:3 ENC:PLAIN,RLE\n\n name TV=3 RL=0 DL=1\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:31 VC:3\n\nBINARY name \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 3 *** \nvalue 1: R:0 D:1 V:apples\nvalue 2: R:0 D:1 V:oranges\nvalue 3: R:0 D:1 V:kiwi\n'); |
| 92 | + }); |
| 93 | + |
| 94 | + it('should read repeated fields', async function() { |
| 95 | + const schema = new parquet.ParquetSchema({ |
| 96 | + stock: { |
| 97 | + repeated: true, |
| 98 | + fields: { |
| 99 | + warehouse: { type: 'UTF8' }, |
| 100 | + } |
| 101 | + } |
| 102 | + }); |
| 103 | + |
| 104 | + let writer = await parquet.ParquetWriter.openFile(schema, 'test4-mr.parquet'); |
| 105 | + |
| 106 | + await writer.appendRow({ |
| 107 | + stock: [ |
| 108 | + {warehouse: 'Newark'} |
| 109 | + ] |
| 110 | + }); |
| 111 | + |
| 112 | + await writer.close(); |
| 113 | + |
| 114 | + const result = await readParquetMr('test4-mr.parquet'); |
| 115 | + assert.equal(result,'row group 0 \n--------------------------------------------------------------------------------\nstock: \n.warehouse: BINARY UNCOMPRESSED DO:0 FPO:4 SZ:36/36/1.00 VC:1 ENC:PLAIN,RLE\n\n stock.warehouse TV=1 RL=1 DL=1\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:14 VC:1\n\nBINARY stock.warehouse \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 1 *** \nvalue 1: R:0 D:1 V:Newark\n') |
| 116 | + }); |
| 117 | +}); |
0 commit comments