@@ -9,6 +9,7 @@ use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
9
9
use parquet:: arrow:: ProjectionMask ;
10
10
use simplelog:: { ColorChoice , Config , TermLogger , TerminalMode } ;
11
11
12
+ use crate :: taxi_data:: taxi_data_parquet;
12
13
use vortex:: array:: chunked:: ChunkedArray ;
13
14
use vortex:: array:: downcast:: DowncastArrayBuiltin ;
14
15
use vortex:: array:: IntoArray ;
@@ -18,23 +19,35 @@ use vortex::compress::{CompressConfig, CompressCtx};
18
19
use vortex:: formatter:: display_tree;
19
20
use vortex_alp:: ALPEncoding ;
20
21
use vortex_datetime:: DateTimeEncoding ;
21
- use vortex_fastlanes:: { BitPackedEncoding , DeltaEncoding , FoREncoding } ;
22
+ use vortex_dict:: DictEncoding ;
23
+ use vortex_fastlanes:: { BitPackedEncoding , FoREncoding } ;
22
24
use vortex_ree:: REEEncoding ;
23
25
use vortex_roaring:: RoaringBoolEncoding ;
24
26
use vortex_schema:: DType ;
25
27
28
+ pub mod reader;
26
29
pub mod taxi_data;
27
30
28
- pub fn idempotent ( name : & str , f : impl FnOnce ( & mut File ) ) -> PathBuf {
31
+ pub fn idempotent < T , E > (
32
+ name : & str ,
33
+ f : impl FnOnce ( & mut File ) -> Result < T , E > ,
34
+ ) -> Result < PathBuf , E > {
35
+ let path = data_path ( name) ;
36
+ if !path. exists ( ) {
37
+ let mut file = File :: create ( & path) . unwrap ( ) ;
38
+ f ( & mut file) ?;
39
+ }
40
+ Ok ( path. to_path_buf ( ) )
41
+ }
42
+
43
+ pub fn data_path ( name : & str ) -> PathBuf {
29
44
let path = Path :: new ( env ! ( "CARGO_MANIFEST_DIR" ) )
30
45
. join ( "data" )
31
46
. join ( name) ;
32
- if !path. exists ( ) {
47
+ if !path. parent ( ) . unwrap ( ) . exists ( ) {
33
48
create_dir_all ( path. parent ( ) . unwrap ( ) ) . unwrap ( ) ;
34
- let mut file = File :: create ( & path) . unwrap ( ) ;
35
- f ( & mut file) ;
36
49
}
37
- path. to_path_buf ( )
50
+ path
38
51
}
39
52
40
53
pub fn setup_logger ( level : LevelFilter ) {
@@ -51,11 +64,11 @@ pub fn enumerate_arrays() -> Vec<EncodingRef> {
51
64
println ! ( "FOUND {:?}" , ENCODINGS . iter( ) . map( |e| e. id( ) ) . collect_vec( ) ) ;
52
65
vec ! [
53
66
& ALPEncoding ,
54
- // &DictEncoding,
67
+ & DictEncoding ,
55
68
& BitPackedEncoding ,
56
69
& FoREncoding ,
57
70
& DateTimeEncoding ,
58
- & DeltaEncoding ,
71
+ // &DeltaEncoding, Blows up the search space too much.
59
72
& REEEncoding ,
60
73
& RoaringBoolEncoding ,
61
74
// RoaringIntEncoding,
@@ -71,7 +84,7 @@ pub fn compress_ctx() -> CompressCtx {
71
84
}
72
85
73
86
pub fn compress_taxi_data ( ) -> ArrayRef {
74
- let file = File :: open ( taxi_data :: download_taxi_data ( ) ) . unwrap ( ) ;
87
+ let file = File :: open ( taxi_data_parquet ( ) ) . unwrap ( ) ;
75
88
let builder = ParquetRecordBatchReaderBuilder :: try_new ( file) . unwrap ( ) ;
76
89
let _mask = ProjectionMask :: roots ( builder. parquet_schema ( ) , [ 1 ] ) ;
77
90
let _no_datetime_mask = ProjectionMask :: roots (
@@ -141,7 +154,7 @@ mod test {
141
154
use vortex:: encode:: FromArrowArray ;
142
155
use vortex:: serde:: { ReadCtx , WriteCtx } ;
143
156
144
- use crate :: taxi_data:: download_taxi_data ;
157
+ use crate :: taxi_data:: taxi_data_parquet ;
145
158
use crate :: { compress_ctx, compress_taxi_data, setup_logger} ;
146
159
147
160
#[ ignore]
@@ -154,7 +167,7 @@ mod test {
154
167
#[ ignore]
155
168
#[ test]
156
169
fn round_trip_serde ( ) {
157
- let file = File :: open ( download_taxi_data ( ) ) . unwrap ( ) ;
170
+ let file = File :: open ( taxi_data_parquet ( ) ) . unwrap ( ) ;
158
171
let builder = ParquetRecordBatchReaderBuilder :: try_new ( file) . unwrap ( ) ;
159
172
let reader = builder. with_limit ( 1 ) . build ( ) . unwrap ( ) ;
160
173
@@ -176,7 +189,7 @@ mod test {
176
189
#[ ignore]
177
190
#[ test]
178
191
fn round_trip_arrow ( ) {
179
- let file = File :: open ( download_taxi_data ( ) ) . unwrap ( ) ;
192
+ let file = File :: open ( taxi_data_parquet ( ) ) . unwrap ( ) ;
180
193
let builder = ParquetRecordBatchReaderBuilder :: try_new ( file) . unwrap ( ) ;
181
194
let reader = builder. with_limit ( 1 ) . build ( ) . unwrap ( ) ;
182
195
@@ -194,7 +207,7 @@ mod test {
194
207
#[ ignore]
195
208
#[ test]
196
209
fn round_trip_arrow_compressed ( ) {
197
- let file = File :: open ( download_taxi_data ( ) ) . unwrap ( ) ;
210
+ let file = File :: open ( taxi_data_parquet ( ) ) . unwrap ( ) ;
198
211
let builder = ParquetRecordBatchReaderBuilder :: try_new ( file) . unwrap ( ) ;
199
212
let reader = builder. with_limit ( 1 ) . build ( ) . unwrap ( ) ;
200
213
0 commit comments