Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ public abstract class AbstractQueryEngine implements QueryEngine {
@Value("${entrada.database.table.icmp}")
protected String tableIcmp;

@Value("${entrada.parquet.compression}")
protected String parquetCompression;

protected JdbcTemplate jdbcTemplate;
protected FileManager fileManager;
private String scriptPrefix;
Expand Down Expand Up @@ -143,6 +146,7 @@ private Map<String, Object> createValueMap(TablePartition p) {
values.put("MONTH", p.getMonth());
values.put("DAY", p.getDay());
values.put("SERVER", p.getServer());
values.put("PARQUET_COMPRESSION", parquetCompression);
return values;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ public abstract class AbstractInitializer implements Initializer {
@Value("${aws.encryption}")
protected boolean encrypt;

@Value("${entrada.parquet.compression}")
protected String parquetCompression;


private QueryEngine queryEngine;
private String scriptPrefix;
Expand Down Expand Up @@ -147,6 +150,7 @@ private Map<String, Object> dnsParameters() {
parameters.put("TABLE_NAME", tableDns);
parameters.put("TABLE_LOC", FileUtil.appendPath(output, tableDns));
parameters.put("ENCRYPTED", encrypt);
parameters.put("PARQUET_COMPRESSION", parquetCompression);
return parameters;
}

Expand All @@ -157,6 +161,7 @@ private Map<String, Object> icmpParameters() {
parameters.put("TABLE_NAME", tableIcmp);
parameters.put("TABLE_LOC", FileUtil.appendPath(output, tableIcmp));
parameters.put("ENCRYPTED", encrypt);
parameters.put("PARQUET_COMPRESSION", parquetCompression);
return parameters;
}

Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/application-dev.properties
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ entrada.parquet.filesize.max=128
entrada.parquet.rowgroup.size=128
# max rows to use for each column chunk page in parquet file
entrada.parquet.page-row.limit=20000
# parquet compression format, use GZIP to save on S3 storage/athena scan costs for example
entrada.parquet.compression=SNAPPY
# seconds cached dns questions (without responses) timeout
# required to match dns requests spanning multiple pcap files
entrada.cache.timeout=2
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ entrada.parquet.filesize.max=128
entrada.parquet.rowgroup.size=128
# max rows to use for each column chunk page in parquet file
entrada.parquet.page-row.limit=20000
# parquet compression format, use GZIP to save on S3 storage/athena scan costs for example
entrada.parquet.compression=SNAPPY
# seconds cached dns questions (without responses) timeout
# required to match dns requests spanning multiple pcap files
entrada.cache.timeout=2
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/sql/athena/partition-compaction-dns.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ CREATE TABLE ${DATABASE_NAME}.tmp_compaction
WITH (
external_location = '${TABLE_LOC}',
format = 'Parquet',
parquet_compression = 'SNAPPY')
parquet_compression = '${PARQUET_COMPRESSION}')
AS SELECT
id,
time,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ CREATE TABLE ${DATABASE_NAME}.tmp_compaction
WITH (
external_location = '${TABLE_LOC}',
format = 'Parquet',
parquet_compression = 'SNAPPY')
parquet_compression = '${PARQUET_COMPRESSION}')
AS SELECT
icmp_type,
icmp_code,
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/sql/athena/partition-purge-dns.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ CREATE TABLE ${DATABASE_NAME}.tmp_compaction
WITH (
external_location = '${TABLE_LOC}',
format = 'Parquet',
parquet_compression = 'SNAPPY')
parquet_compression = '${PARQUET_COMPRESSION}')
AS SELECT
id,
time,
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/sql/athena/partition-purge-icmp.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ CREATE TABLE ${DATABASE_NAME}.tmp_compaction
WITH (
external_location = '${TABLE_LOC}',
format = 'Parquet',
parquet_compression = 'SNAPPY')
parquet_compression = '${PARQUET_COMPRESSION}')
AS SELECT
icmp_type,
icmp_code,
Expand Down