Skip to content

Commit 0c7948d

Browse files
committed
Add parquet-mr tests
1 parent 07fb2fd commit 0c7948d

2 files changed

Lines changed: 125 additions & 0 deletions

File tree

.travis.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,11 @@ deploy:
99
tags: true
1010
npm_api_key:
1111
secure: HK/tFvgj/TtYTJ3s2Bszc1/yJWvbSkLcfY3ki3GEuudMpfzcq134/2fbdZLb+B7Ukg31rdRVFCrSg8k6a1KhztkRr9SnMts5WO2ZGulmzNQ+XsBwdd0Bf7KYamAtqft5qBnSvh+ypBloQJQqq5qazb31971Fwvg5pdkYTQgCQxyIfZlH8nUbOxcYyl4w6Mvz5zsQp2c4OKOdq0FgeU3OqJ05i5lWL/CZWRO9L7+f0Uih5Jr9CuRzBUcVVxIopn1uOX1czug+OudIuUMLxbJwJt69ZpWdTbywLg6wVvA58ozbyialuEx8S1UaehsqHFj29JJWcOw+6TCi5+512DrBZMguiyTkjq5I5kmRcPNPY8dcqJUZUD6eDpKYQemFeg+6vKIvT3spK53VXNoEOIqAAiNTpmfY6JQ17S31gy1TqZldMtWr1HXf95LGlLC+czgMHPi1m6YiUgdDx5N7MFXumdOxiyHNdoitQFyyyS57RS7BG8/5ZMeKIXEfhQ9KU/D5L3KpgNCBmwVR72vF3nb89aVETrvNIbZEgc/cTdYWquezfPibGoGjWVJ4c38nd30s6rmoMBwoDwznaDg87ameoHUKSCSMx3uVXRZ5uR2C4SmTqVbWNKLXszL4iIW54EaLf3M+AYjoAb+EupaPMuEonJukdzkalp03RekYVeIY23U=
12+
13+
sudo: required
14+
15+
services:
16+
- docker
17+
18+
before_install:
19+
- docker pull nathanhowell/parquet-tools

test/parquet-mr.js

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
'use strict';
2+
const chai = require('chai');
3+
const assert = chai.assert;
4+
const parquet = require('../parquet.js');
5+
const child_process = require('child_process');
6+
7+
// helper function that runs parquet-tools dump inside a docker container and returns the stdout
8+
async function readParquetMr(file) {
9+
return new Promise( (resolve, reject) => {
10+
const dockerCmd = `docker run -v \${PWD}:/home nathanhowell/parquet-tools dump --debug /home/${file}`;
11+
child_process.exec(dockerCmd, (err, stdout, stderr) => {
12+
if (err || stderr) {
13+
reject(err || stderr);
14+
} else {
15+
resolve(stdout);
16+
}
17+
});
18+
});
19+
}
20+
21+
describe('Parquet-mr', function() {
22+
it('should read a simple parquetjs file', async function() {
23+
var schema = new parquet.ParquetSchema({
24+
name: { type: 'UTF8' },
25+
quantity: { type: 'INT64' },
26+
price: { type: 'DOUBLE' },
27+
});
28+
29+
const rows = [
30+
{ name: 'apples', quantity: 10, price: 2.6 },
31+
{ name: 'oranges', quantity: 20, price: 2.7},
32+
{ name: 'kiwi', price: 4.2, quantity: 4},
33+
];
34+
35+
let writer = await parquet.ParquetWriter.openFile(schema, 'test-mr.parquet');
36+
37+
for (let row of rows) {
38+
await writer.appendRow(row);
39+
}
40+
41+
await writer.close();
42+
43+
const result = await readParquetMr('test-mr.parquet');
44+
assert.equal(result,'row group 0 \n--------------------------------------------------------------------------------\nname: BINARY UNCOMPRESSED DO:0 FPO:4 SZ:51/51/1.00 VC:3 ENC:PLAIN,RLE\nquantity: INT64 UNCOMPRESSED DO:0 FPO:79 SZ:46/46/1.00 VC:3 ENC:PLAIN,RLE\nprice: DOUBLE UNCOMPRESSED DO:0 FPO:154 SZ:46/46/1.00 VC:3 ENC:PLAIN,RLE\n\n name TV=3 RL=0 DL=0\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:29 VC:3\n\n quantity TV=3 RL=0 DL=0\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:24 VC:3\n\n price TV=3 RL=0 DL=0\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:24 VC:3\n\nBINARY name \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 3 *** \nvalue 1: R:0 D:0 V:apples\nvalue 2: R:0 D:0 V:oranges\nvalue 3: R:0 D:0 V:kiwi\n\nINT64 quantity \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 3 *** \nvalue 1: R:0 D:0 V:10\nvalue 2: R:0 D:0 V:20\nvalue 3: R:0 D:0 V:4\n\nDOUBLE price \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 3 *** \nvalue 1: R:0 D:0 V:2.6\nvalue 2: R:0 D:0 V:2.7\nvalue 3: R:0 D:0 V:4.2\n');
45+
});
46+
47+
it('should read a nested field', async function() {
48+
var schema = new parquet.ParquetSchema({
49+
fruit: {
50+
fields: {
51+
name: { type: 'UTF8'},
52+
quantity: { type: 'INT32'}
53+
}
54+
}
55+
});
56+
57+
let writer = await parquet.ParquetWriter.openFile(schema, 'test2-mr.parquet');
58+
59+
await writer.appendRow({
60+
fruit: {
61+
name: 'apple',
62+
quantity: 9
63+
}
64+
});
65+
66+
await writer.close();
67+
68+
const result = await readParquetMr('test2-mr.parquet');
69+
assert.equal(result,'row group 0 \n--------------------------------------------------------------------------------\nfruit: \n.name: BINARY UNCOMPRESSED DO:0 FPO:4 SZ:31/31/1.00 VC:1 ENC:PLAIN,RLE\n.quantity: INT32 UNCOMPRESSED DO:0 FPO:65 SZ:26/26/1.00 VC:1 ENC:PLAIN,RLE\n\n fruit.name TV=1 RL=0 DL=0\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:9 VC:1\n\n fruit.quantity TV=1 RL=0 DL=0\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:4 VC:1\n\nBINARY fruit.name \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 1 *** \nvalue 1: R:0 D:0 V:apple\n\nINT32 fruit.quantity \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 1 *** \nvalue 1: R:0 D:0 V:9\n');
70+
});
71+
72+
it('should read a parquetjs file with optional value', async function() {
73+
var schema = new parquet.ParquetSchema({
74+
name: { type: 'UTF8', optional: true }
75+
});
76+
77+
const rows = [
78+
{ name: 'apples' },
79+
{ name: 'oranges' },
80+
{ name: 'kiwi' },
81+
];
82+
83+
let writer = await parquet.ParquetWriter.openFile(schema, 'test3-mr.parquet');
84+
for (let row of rows) {
85+
await writer.appendRow(row);
86+
}
87+
88+
await writer.close();
89+
90+
const result = await readParquetMr('test3-mr.parquet');
91+
assert.equal(result,'row group 0 \n--------------------------------------------------------------------------------\nname: BINARY UNCOMPRESSED DO:0 FPO:4 SZ:53/53/1.00 VC:3 ENC:PLAIN,RLE\n\n name TV=3 RL=0 DL=1\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:31 VC:3\n\nBINARY name \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 3 *** \nvalue 1: R:0 D:1 V:apples\nvalue 2: R:0 D:1 V:oranges\nvalue 3: R:0 D:1 V:kiwi\n');
92+
});
93+
94+
it('should read repeated fields', async function() {
95+
const schema = new parquet.ParquetSchema({
96+
stock: {
97+
repeated: true,
98+
fields: {
99+
warehouse: { type: 'UTF8' },
100+
}
101+
}
102+
});
103+
104+
let writer = await parquet.ParquetWriter.openFile(schema, 'test4-mr.parquet');
105+
106+
await writer.appendRow({
107+
stock: [
108+
{warehouse: 'Newark'}
109+
]
110+
});
111+
112+
await writer.close();
113+
114+
const result = await readParquetMr('test4-mr.parquet');
115+
assert.equal(result,'row group 0 \n--------------------------------------------------------------------------------\nstock: \n.warehouse: BINARY UNCOMPRESSED DO:0 FPO:4 SZ:36/36/1.00 VC:1 ENC:PLAIN,RLE\n\n stock.warehouse TV=1 RL=1 DL=1\n ----------------------------------------------------------------------------\n page 0: DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:14 VC:1\n\nBINARY stock.warehouse \n--------------------------------------------------------------------------------\n*** row group 1 of 1, values 1 to 1 *** \nvalue 1: R:0 D:1 V:Newark\n')
116+
});
117+
});

0 commit comments

Comments
 (0)