@@ -102,7 +102,7 @@ pub trait EventFormat: Sized {
102
102
if !Self :: is_schema_matching ( new_schema. clone ( ) , storage_schema, static_schema_flag) {
103
103
return Err ( anyhow ! ( "Schema mismatch" ) ) ;
104
104
}
105
- new_schema = update_field_type_in_schema ( new_schema, time_partition) ;
105
+ new_schema = update_field_type_in_schema ( new_schema, None , time_partition, None ) ;
106
106
let rb = Self :: decode ( data, new_schema. clone ( ) ) ?;
107
107
let tags_arr = StringArray :: from_iter_values ( std:: iter:: repeat ( & tags) . take ( rb. num_rows ( ) ) ) ;
108
108
let metadata_arr =
@@ -147,19 +147,56 @@ pub trait EventFormat: Sized {
147
147
}
148
148
}
149
149
150
+ pub fn get_existing_fields (
151
+ inferred_schema : Arc < Schema > ,
152
+ existing_schema : Option < & HashMap < String , Arc < Field > > > ,
153
+ ) -> Vec < Arc < Field > > {
154
+ let mut existing_fields = Vec :: new ( ) ;
155
+
156
+ for field in inferred_schema. fields . iter ( ) {
157
+ if existing_schema. map_or ( false , |schema| schema. contains_key ( field. name ( ) ) ) {
158
+ existing_fields. push ( field. clone ( ) ) ;
159
+ }
160
+ }
161
+
162
+ existing_fields
163
+ }
164
+
150
165
pub fn update_field_type_in_schema (
151
- schema : Arc < Schema > ,
166
+ inferred_schema : Arc < Schema > ,
167
+ existing_schema : Option < & HashMap < String , Arc < Field > > > ,
152
168
time_partition : Option < String > ,
169
+ log_records : Option < & Vec < Value > > ,
153
170
) -> Arc < Schema > {
171
+ let mut updated_schema = inferred_schema. clone ( ) ;
172
+
173
+ if let Some ( existing_schema) = existing_schema {
174
+ let existing_fields = get_existing_fields ( inferred_schema. clone ( ) , Some ( existing_schema) ) ;
175
+ let existing_field_names: Vec < String > = existing_fields
176
+ . iter ( )
177
+ . map ( |field| field. name ( ) . clone ( ) )
178
+ . collect ( ) ;
179
+
180
+ if let Some ( log_records) = log_records {
181
+ for log_record in log_records {
182
+ updated_schema = Arc :: new ( update_data_type_to_datetime (
183
+ ( * updated_schema) . clone ( ) ,
184
+ log_record. clone ( ) ,
185
+ existing_field_names. clone ( ) ,
186
+ ) ) ;
187
+ }
188
+ }
189
+ }
190
+
154
191
if time_partition. is_none ( ) {
155
- return schema ;
192
+ return updated_schema ;
156
193
}
157
- let field_name = time_partition. unwrap ( ) ;
158
- let new_schema: Vec < Field > = schema
194
+ let time_partition_field_name = time_partition. unwrap ( ) ;
195
+ let new_schema: Vec < Field > = updated_schema
159
196
. fields ( )
160
197
. iter ( )
161
198
. map ( |field| {
162
- if * field. name ( ) == field_name {
199
+ if * field. name ( ) == time_partition_field_name {
163
200
if field. data_type ( ) == & DataType :: Utf8 {
164
201
let new_data_type = DataType :: Timestamp ( TimeUnit :: Millisecond , None ) ;
165
202
Field :: new ( field. name ( ) . clone ( ) , new_data_type, true )
@@ -174,12 +211,16 @@ pub fn update_field_type_in_schema(
174
211
Arc :: new ( Schema :: new ( new_schema) )
175
212
}
176
213
177
- pub fn update_data_type_to_datetime ( schema : Schema , value : Value ) -> Schema {
214
+ pub fn update_data_type_to_datetime (
215
+ schema : Schema ,
216
+ value : Value ,
217
+ ignore_field_names : Vec < String > ,
218
+ ) -> Schema {
178
219
let new_schema: Vec < Field > = schema
179
220
. fields ( )
180
221
. iter ( )
181
222
. map ( |field| {
182
- if field. data_type ( ) == & DataType :: Utf8 {
223
+ if field. data_type ( ) == & DataType :: Utf8 && !ignore_field_names . contains ( field . name ( ) ) {
183
224
if let Value :: Object ( map) = & value {
184
225
if let Some ( Value :: String ( s) ) = map. get ( field. name ( ) ) {
185
226
if DateTime :: parse_from_rfc3339 ( s) . is_ok ( ) {
0 commit comments