@@ -19,7 +19,6 @@ import java.time.Duration
19
19
import java .util
20
20
import java .util .UUID .randomUUID
21
21
import java .util .{Collections , Properties }
22
-
23
22
import org .apache .kafka .clients .admin .{AdminClient , AdminClientConfig , NewTopic }
24
23
import org .apache .kafka .clients .consumer .{ConsumerRecord , KafkaConsumer }
25
24
import org .apache .kafka .clients .producer .{KafkaProducer , ProducerConfig , ProducerRecord }
@@ -28,6 +27,7 @@ import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializ
28
27
import org .scalatest .{AppendedClues , BeforeAndAfter , FlatSpec , Matchers }
29
28
import org .testcontainers .containers .KafkaContainer
30
29
import org .testcontainers .utility .DockerImageName
30
+ import za .co .absa .hyperdrive .ingestor .implementation .transformer .deduplicate .kafka .PrunedConsumerRecord
31
31
32
32
import scala .collection .JavaConverters ._
33
33
import scala .collection .mutable
@@ -39,6 +39,12 @@ class TestKafkaUtilDockerTest extends FlatSpec with Matchers with BeforeAndAfter
39
39
private val kafkaInsufficientTimeout = Duration .ofMillis(1L )
40
40
private val topic = " test-topic"
41
41
private val maxPollRecords = 10
42
+ private val pruningFn = (r : ConsumerRecord [String , String ]) => PrunedConsumerRecord (
43
+ r.topic(),
44
+ r.partition(),
45
+ r.offset(),
46
+ Seq (r.value())
47
+ )
42
48
43
49
before{
44
50
kafka.start()
@@ -62,10 +68,10 @@ class TestKafkaUtilDockerTest extends FlatSpec with Matchers with BeforeAndAfter
62
68
63
69
// when
64
70
implicit val kafkaConsumerTimeout : Duration = kafkaSufficientTimeout
65
- val records = KafkaUtil .getMessagesAtLeastToOffset(consumer, offsets)
71
+ val records = KafkaUtil .getMessagesAtLeastToOffset(consumer, offsets, pruningFn )
66
72
67
73
// then
68
- val actualMessages = records.map(_.value() ).toList.sorted
74
+ val actualMessages = records.map(_.data.head. asInstanceOf [ String ] ).toList.sorted
69
75
actualMessages should contain theSameElementsAs messages
70
76
}
71
77
@@ -99,10 +105,10 @@ class TestKafkaUtilDockerTest extends FlatSpec with Matchers with BeforeAndAfter
99
105
100
106
// when
101
107
implicit val kafkaConsumerTimeout : Duration = kafkaSufficientTimeout
102
- val records = KafkaUtil .getMessagesAtLeastToOffset(consumer, offsets)
108
+ val records = KafkaUtil .getMessagesAtLeastToOffset(consumer, offsets, pruningFn )
103
109
104
110
// then
105
- val actualMessages = records.map(_.value() ).toList.sorted
111
+ val actualMessages = records.map(_.data.head. asInstanceOf [ String ] ).toList.sorted
106
112
actualMessages should contain allElementsOf messages
107
113
108
114
// cleanup
@@ -118,7 +124,7 @@ class TestKafkaUtilDockerTest extends FlatSpec with Matchers with BeforeAndAfter
118
124
119
125
// when
120
126
implicit val kafkaConsumerTimeout : Duration = kafkaInsufficientTimeout
121
- val exception = the[Exception ] thrownBy KafkaUtil .getMessagesAtLeastToOffset(consumer, Map (new TopicPartition (topic, 0 ) -> 0 ))
127
+ val exception = the[Exception ] thrownBy KafkaUtil .getMessagesAtLeastToOffset(consumer, Map (new TopicPartition (topic, 0 ) -> 0 ), pruningFn )
122
128
123
129
// then
124
130
exception.getMessage should include (" Subscription to topics, partitions and pattern are mutually exclusive" )
@@ -140,7 +146,7 @@ class TestKafkaUtilDockerTest extends FlatSpec with Matchers with BeforeAndAfter
140
146
141
147
// when
142
148
implicit val kafkaConsumerTimeout : Duration = kafkaInsufficientTimeout
143
- val exception = the[Exception ] thrownBy KafkaUtil .getMessagesAtLeastToOffset(consumer, offsets)
149
+ val exception = the[Exception ] thrownBy KafkaUtil .getMessagesAtLeastToOffset(consumer, offsets, pruningFn )
144
150
145
151
// then
146
152
exception.getMessage should include (" Not all expected messages were consumed" )
@@ -160,7 +166,7 @@ class TestKafkaUtilDockerTest extends FlatSpec with Matchers with BeforeAndAfter
160
166
161
167
// when
162
168
implicit val kafkaConsumerTimeout : Duration = kafkaInsufficientTimeout
163
- val exception = the[Exception ] thrownBy KafkaUtil .getMessagesAtLeastToOffset(consumer, offsets)
169
+ val exception = the[Exception ] thrownBy KafkaUtil .getMessagesAtLeastToOffset(consumer, offsets, pruningFn )
164
170
165
171
// then
166
172
exception.getMessage should include (" Requested consumption" )
@@ -209,8 +215,8 @@ class TestKafkaUtilDockerTest extends FlatSpec with Matchers with BeforeAndAfter
209
215
implicit val kafkaConsumerTimeout : Duration = kafkaSufficientTimeout
210
216
val topicPartitions = KafkaUtil .getTopicPartitions(consumer, topic)
211
217
val recordsPerPartition = topicPartitions.map(p => p -> 4L ).toMap
212
- val actualRecords = KafkaUtil .getAtLeastNLatestRecordsFromPartition(consumer, recordsPerPartition)
213
- val values = actualRecords.map(_.value() )
218
+ val actualRecords = KafkaUtil .getAtLeastNLatestRecordsFromPartition(consumer, recordsPerPartition, pruningFn )
219
+ val values = actualRecords.map(_.data.head. asInstanceOf [ String ] )
214
220
215
221
values.size should be >= 12
216
222
values should contain allElementsOf Seq (" msg_103" , " msg_102" , " msg_101" , " msg_100" , " msg_99" , " msg_97" , " msg_95" ,
@@ -231,10 +237,10 @@ class TestKafkaUtilDockerTest extends FlatSpec with Matchers with BeforeAndAfter
231
237
// when
232
238
implicit val kafkaConsumerTimeout : Duration = kafkaSufficientTimeout
233
239
val recordsPerPartition = topicPartitions.map(t => t -> 1000L ).toMap
234
- val records = KafkaUtil .getAtLeastNLatestRecordsFromPartition(consumer, recordsPerPartition)
240
+ val records = KafkaUtil .getAtLeastNLatestRecordsFromPartition(consumer, recordsPerPartition, pruningFn )
235
241
236
242
// then
237
- val actualMessages = records.map(_.value() ).toList.sorted
243
+ val actualMessages = records.map(_.data.head. asInstanceOf [ String ] ).toList.sorted
238
244
actualMessages should contain theSameElementsAs messages
239
245
}
240
246
@@ -248,7 +254,8 @@ class TestKafkaUtilDockerTest extends FlatSpec with Matchers with BeforeAndAfter
248
254
249
255
val consumer = createConsumer(kafka)
250
256
implicit val kafkaConsumerTimeout : Duration = kafkaInsufficientTimeout
251
- val result = the[Exception ] thrownBy KafkaUtil .getAtLeastNLatestRecordsFromPartition(consumer, Map (new TopicPartition (topic, 0 ) -> 10 ))
257
+ val result = the[Exception ] thrownBy KafkaUtil .getAtLeastNLatestRecordsFromPartition(consumer,
258
+ Map (new TopicPartition (topic, 0 ) -> 10 ), pruningFn)
252
259
result.getMessage should include(" increasing the consumer timeout" )
253
260
}
254
261
0 commit comments