@@ -92,7 +92,7 @@ func (consumer *BrokerConsumer) ConsumeOnChannel(msgChan chan *Message, pollTime
92
92
}, quit )
93
93
94
94
if err != nil {
95
- if err != io .EOF && err .Error () != "use of closed network connection" { //
95
+ if err != io .EOF && err .Error () != "use of closed network connection" {
96
96
log .Println ("Fatal Error: " , err )
97
97
panic (err )
98
98
}
@@ -129,6 +129,23 @@ func (consumer *BrokerConsumer) ConsumeOnChannel(msgChan chan *Message, pollTime
129
129
// MessageHandlerFunc defines the interface for message handlers accepted by Consume()
130
130
type MessageHandlerFunc func (msg * Message )
131
131
132
+ // reconnect from the earliest available offset after the current one becomes unavailable
133
+ func (consumer * BrokerConsumer ) reconnectFromEarliestAvailableOffset (conn * net.TCPConn ) (uint64 , error ) {
134
+ _ , err := conn .Write (consumer .broker .EncodeOffsetRequest (OFFSET_EARLIEST , 1 ))
135
+ if err != nil {
136
+ log .Println ("Failed kafka offset request:" , err .Error ())
137
+ return 0 , err
138
+ }
139
+
140
+ length , payload , err := consumer .broker .readResponse (conn )
141
+ log .Println ("kafka offset request of" , length , "bytes starting from offset" , consumer .offset , payload )
142
+
143
+ if err != nil {
144
+ return 0 , err
145
+ }
146
+ return binary .BigEndian .Uint64 (payload [0 :]), nil
147
+ }
148
+
132
149
// Consume makes a single fetch request and sends the messages in the message set to a handler function
133
150
func (consumer * BrokerConsumer ) Consume (handlerFunc MessageHandlerFunc , stop <- chan struct {}) (int , error ) {
134
151
conn , err := consumer .broker .connect ()
@@ -157,7 +174,25 @@ func (consumer *BrokerConsumer) consumeWithConn(conn *net.TCPConn, handlerFunc M
157
174
//log.Println("kafka fetch request of", length, "bytes starting from offset", consumer.offset)
158
175
159
176
if err != nil {
160
- return - 1 , err
177
+ if err .Error () != "Broker Response Error: 1" {
178
+ return - 1 , err
179
+ }
180
+
181
+ // special case: reset offset if kafka cleaned up the file being read
182
+ log .Println ("ERROR fetching kafka batch at offset" , consumer .offset , "- probably due to timeout or premature cleanup of kafka data file" )
183
+ log .Println ("Fetching earliest available offset in kafka log" )
184
+ consumer .offset , err = consumer .reconnectFromEarliestAvailableOffset (conn )
185
+ if err != nil {
186
+ panic (err )
187
+ }
188
+ log .Println ("Resuming at offset" , consumer .offset )
189
+ length , payload , err = consumer .broker .readResponse (conn )
190
+ if err != nil {
191
+ log .Println ("Cannot resume consuming at new offset, needs manual intervention" )
192
+ if err .Error () != "Broker Response Error: 1" {
193
+ return - 1 , err
194
+ }
195
+ }
161
196
}
162
197
163
198
num := 0
@@ -166,14 +201,15 @@ func (consumer *BrokerConsumer) consumeWithConn(conn *net.TCPConn, handlerFunc M
166
201
currentOffset := uint64 (0 )
167
202
for currentOffset <= uint64 (length - 4 ) {
168
203
totalLength , msgs , err1 := Decode (payload [currentOffset :], consumer .codecs )
169
- if ErrIncompletePacket == err1 {
204
+ if ErrIncompletePacket == err1 || ErrMalformedPacket == err1 {
170
205
// Reached the end of the current packet and the last message is incomplete.
171
206
if 0 == num {
172
207
// This is the very first message in the batch => we need to request a larger packet
173
208
// or the consumer will get stuck here indefinitely
174
209
log .Printf ("ERROR: Incomplete message at offset %d %d, change the configuration to a larger max fetch size\n " ,
175
210
consumer .offset ,
176
211
currentOffset )
212
+ log .Printf ("\n Payload length: %d, currentOffset: %d, payload: [%x]\n \n " , length , currentOffset , payload )
177
213
} else {
178
214
// Partial message at end of current batch, need a new Fetch Request from a newer offset
179
215
log .Printf ("DEBUG: Incomplete message at offset %d %d for topic '%s' (%s, partition %d), fetching new batch from offset %d\n " ,
@@ -185,10 +221,6 @@ func (consumer *BrokerConsumer) consumeWithConn(conn *net.TCPConn, handlerFunc M
185
221
consumer .offset + currentOffset )
186
222
}
187
223
break
188
- } else if ErrMalformedPacket == err1 {
189
- log .Printf ("ERROR: Malformed message at offset %d %d\n " ,
190
- consumer .offset ,
191
- currentOffset )
192
224
}
193
225
if err != nil {
194
226
log .Printf ("Payload length: %d, currentOffset: %d, payload: [%x]" , length , currentOffset , payload )
0 commit comments