@@ -5,7 +5,7 @@ use deltalake::protocol::SaveMode;
55use deltalake:: table:: builder:: DeltaTableBuilder ;
66use deltalake:: { datafusion:: prelude:: SessionContext , DeltaTable } ;
77use polars:: prelude:: DataFrame ;
8- use std:: collections:: HashSet ;
8+ use std:: collections:: { HashMap , HashSet } ;
99
1010use crate :: errors:: RunError ;
1111use crate :: io:: format:: AcceptedMergeMetrics ;
@@ -140,46 +140,80 @@ pub(crate) fn resolve_merge_key(entity: &config::EntityConfig) -> FloeResult<Vec
140140 Ok ( primary_key. clone ( ) )
141141}
142142
143- pub ( crate ) fn resolve_merge_ignore_columns ( entity : & config:: EntityConfig ) -> HashSet < String > {
144- entity
143+ pub ( crate ) fn resolve_merge_ignore_columns (
144+ entity : & config:: EntityConfig ,
145+ ) -> FloeResult < HashSet < String > > {
146+ let Some ( columns) = entity
145147 . sink
146148 . accepted
147149 . merge
148150 . as_ref ( )
149151 . and_then ( |merge| merge. ignore_columns . as_ref ( ) )
150- . map ( |columns| {
151- columns
152- . iter ( )
153- . map ( |column| column. trim ( ) )
154- . filter ( |column| !column. is_empty ( ) )
155- . map ( ToOwned :: to_owned)
156- . collect :: < HashSet < _ > > ( )
152+ else {
153+ return Ok ( HashSet :: new ( ) ) ;
154+ } ;
155+
156+ let schema_to_output = schema_to_output_column_name_map ( entity) ?;
157+ let resolved = columns
158+ . iter ( )
159+ . map ( |column| column. trim ( ) )
160+ . filter ( |column| !column. is_empty ( ) )
161+ . map ( |column| {
162+ schema_to_output
163+ . get ( column)
164+ . cloned ( )
165+ . unwrap_or_else ( || column. to_string ( ) )
157166 } )
158- . unwrap_or_default ( )
167+ . collect :: < HashSet < _ > > ( ) ;
168+ Ok ( resolved)
159169}
160170
161- pub ( crate ) fn resolve_merge_compare_columns ( entity : & config:: EntityConfig ) -> Option < Vec < String > > {
162- entity
171+ pub ( crate ) fn resolve_merge_compare_columns (
172+ entity : & config:: EntityConfig ,
173+ ) -> FloeResult < Option < Vec < String > > > {
174+ let Some ( columns) = entity
163175 . sink
164176 . accepted
165177 . merge
166178 . as_ref ( )
167179 . and_then ( |merge| merge. compare_columns . as_ref ( ) )
168- . map ( |columns| {
169- let mut seen = HashSet :: new ( ) ;
170- columns
171- . iter ( )
172- . map ( |column| column. trim ( ) )
173- . filter ( |column| !column. is_empty ( ) )
174- . filter_map ( |column| {
175- if seen. insert ( column. to_string ( ) ) {
176- Some ( column. to_string ( ) )
177- } else {
178- None
179- }
180- } )
181- . collect :: < Vec < _ > > ( )
180+ else {
181+ return Ok ( None ) ;
182+ } ;
183+
184+ let schema_to_output = schema_to_output_column_name_map ( entity) ?;
185+ let mut seen = HashSet :: new ( ) ;
186+ let resolved = columns
187+ . iter ( )
188+ . map ( |column| column. trim ( ) )
189+ . filter ( |column| !column. is_empty ( ) )
190+ . map ( |column| {
191+ schema_to_output
192+ . get ( column)
193+ . cloned ( )
194+ . unwrap_or_else ( || column. to_string ( ) )
182195 } )
196+ . filter ( |column| seen. insert ( column. clone ( ) ) )
197+ . collect :: < Vec < _ > > ( ) ;
198+ Ok ( Some ( resolved) )
199+ }
200+
201+ fn schema_to_output_column_name_map (
202+ entity : & config:: EntityConfig ,
203+ ) -> FloeResult < HashMap < String , String > > {
204+ let normalize_strategy = crate :: checks:: normalize:: resolve_normalize_strategy ( entity) ?;
205+ let output_columns = crate :: checks:: normalize:: resolve_output_columns (
206+ & entity. schema . columns ,
207+ normalize_strategy. as_deref ( ) ,
208+ ) ;
209+ let mut mapping = HashMap :: with_capacity ( entity. schema . columns . len ( ) ) ;
210+ for ( schema_column, output_column) in entity. schema . columns . iter ( ) . zip ( output_columns. iter ( ) ) {
211+ mapping. insert (
212+ schema_column. name . trim ( ) . to_string ( ) ,
213+ output_column. name . clone ( ) ,
214+ ) ;
215+ }
216+ Ok ( mapping)
183217}
184218
185219pub ( crate ) fn resolve_scd2_system_columns ( entity : & config:: EntityConfig ) -> Scd2SystemColumns {
0 commit comments