|
3 | 3 | package com.azure.cosmos.spark |
4 | 4 |
|
5 | 5 | // scalastyle:off underscore.import |
6 | | -import com.azure.cosmos.implementation.CosmosSchedulers |
7 | 6 | import com.azure.cosmos.{models, _} |
8 | | -import com.azure.cosmos.models.{CosmosBulkExecutionOptions, CosmosBulkExecutionThresholdsState, CosmosBulkItemRequestOptions, CosmosBulkOperations} |
9 | | -import com.azure.cosmos.spark.BulkWriter.{bulkWriterBoundedElastic, getThreadInfo} |
| 7 | +import com.azure.cosmos.models._ |
| 8 | +import com.azure.cosmos.spark.BulkWriter.{BulkOperationFailedException, bulkWriterBoundedElastic, getThreadInfo} |
10 | 9 | import com.azure.cosmos.spark.diagnostics.DefaultDiagnostics |
11 | 10 | import reactor.core.scheduler.Scheduler |
12 | 11 |
|
@@ -38,6 +37,7 @@ import scala.collection.JavaConverters._ |
38 | 37 |
|
39 | 38 | //scalastyle:off null |
40 | 39 | //scalastyle:off multiple.string.literals |
| 40 | +//scalastyle:off file.size.limit |
41 | 41 | class BulkWriter(container: CosmosAsyncContainer, |
42 | 42 | writeConfig: CosmosWriteConfig, |
43 | 43 | diagnosticsConfig: DiagnosticsConfig) |
@@ -135,81 +135,38 @@ class BulkWriter(container: CosmosAsyncContainer, |
135 | 135 |
|
136 | 136 | bulkOperationResponseFlux.subscribe( |
137 | 137 | resp => { |
138 | | - var isGettingRetried = false |
| 138 | + var isGettingRetried = new AtomicBoolean(false) |
139 | 139 | try { |
140 | 140 | val itemOperation = resp.getOperation |
141 | 141 | val itemOperationFound = activeOperations.remove(itemOperation) |
142 | 142 | assume(itemOperationFound) // can't find the item operation in list of active operations! |
143 | 143 | val context = itemOperation.getContext[OperationContext] |
| 144 | + val itemResponse = resp.getResponse |
144 | 145 |
|
145 | 146 | if (resp.getException != null) { |
146 | 147 | Option(resp.getException) match { |
147 | 148 | case Some(cosmosException: CosmosException) => |
148 | | - log.logDebug(s"encountered ${cosmosException.getStatusCode}, " + |
149 | | - s"Context: ${operationContext.toString} ${getThreadInfo}") |
150 | | - if (shouldIgnore(cosmosException, context)) { |
151 | | - log.logDebug(s"for itemId=[${context.itemId}], partitionKeyValue=[${context.partitionKeyValue}], " + |
152 | | - s"ignored encountered ${cosmosException.getStatusCode}, Context: ${operationContext.toString}") |
153 | | - totalSuccessfulIngestionMetrics.getAndIncrement() |
154 | | - // work done |
155 | | - } else if (shouldRetry(cosmosException, context)) { |
156 | | - // requeue |
157 | | - log.logWarning(s"for itemId=[${context.itemId}], partitionKeyValue=[${context.partitionKeyValue}], " + |
158 | | - s"encountered ${cosmosException.getStatusCode}, will retry! " + |
159 | | - s"attemptNumber=${context.attemptNumber}, exceptionMessage=${cosmosException.getMessage}, " + |
160 | | - s"Context: {${operationContext.toString}} ${getThreadInfo}") |
161 | | - |
162 | | - // this is to ensure the submission will happen on a different thread in background |
163 | | - // and doesn't block the active thread |
164 | | - val deferredRetryMono = SMono.defer(() => { |
165 | | - scheduleWriteInternal(itemOperation.getPartitionKeyValue, |
166 | | - itemOperation.getItem.asInstanceOf[ObjectNode], |
167 | | - OperationContext(context.itemId, context.partitionKeyValue, context.eTag, context.attemptNumber + 1)) |
168 | | - SMono.empty |
169 | | - }) |
170 | | - |
171 | | - if (Exceptions.isTimeout(cosmosException)) { |
172 | | - deferredRetryMono |
173 | | - .delaySubscription( |
174 | | - Duration( |
175 | | - BulkWriter.minDelayOn408RequestTimeoutInMs + |
176 | | - scala.util.Random.nextInt( |
177 | | - BulkWriter.maxDelayOn408RequestTimeoutInMs - BulkWriter.minDelayOn408RequestTimeoutInMs), |
178 | | - TimeUnit.MILLISECONDS), |
179 | | - Schedulers.boundedElastic()) |
180 | | - .subscribeOn(Schedulers.boundedElastic()) |
181 | | - .subscribe() |
182 | | - |
183 | | - } else { |
184 | | - deferredRetryMono |
185 | | - .subscribeOn(Schedulers.boundedElastic()) |
186 | | - .subscribe() |
187 | | - } |
188 | | - |
189 | | - isGettingRetried = true |
190 | | - } else { |
191 | | - log.logWarning(s"for itemId=[${context.itemId}], partitionKeyValue=[${context.partitionKeyValue}], " + |
192 | | - s"encountered ${cosmosException.getStatusCode}, all retries exhausted! " + |
193 | | - s"attemptNumber=${context.attemptNumber}, exceptionMessage=${cosmosException.getMessage}, " + |
194 | | - s"Context: {${operationContext.toString} ${getThreadInfo}") |
195 | | - captureIfFirstFailure(cosmosException) |
196 | | - cancelWork() |
197 | | - } |
| 149 | + handleNonSuccessfulStatusCode( |
| 150 | + context, itemOperation, itemResponse, isGettingRetried, Some(cosmosException)) |
198 | 151 | case _ => |
199 | | - log.logWarning(s"unexpected failure: itemId=[${context.itemId}], partitionKeyValue=[${context.partitionKeyValue}], " + |
200 | | - s"encountered , attemptNumber=${context.attemptNumber}, exceptionMessage=${resp.getException.getMessage}, " + |
| 152 | + log.logWarning( |
| 153 | + s"unexpected failure: itemId=[${context.itemId}], partitionKeyValue=[" + |
| 154 | + s"${context.partitionKeyValue}], encountered , attemptNumber=${context.attemptNumber}, " + |
| 155 | + s"exceptionMessage=${resp.getException.getMessage}, " + |
201 | 156 | s"Context: ${operationContext.toString} ${getThreadInfo}", resp.getException) |
202 | 157 | captureIfFirstFailure(resp.getException) |
203 | 158 | cancelWork() |
204 | 159 | } |
| 160 | + } else if (!itemResponse.isSuccessStatusCode) { |
| 161 | + handleNonSuccessfulStatusCode(context, itemOperation, itemResponse, isGettingRetried, None) |
205 | 162 | } else { |
206 | 163 | // no error case |
207 | 164 | totalSuccessfulIngestionMetrics.getAndIncrement() |
208 | 165 | } |
209 | 166 |
|
210 | 167 | } |
211 | 168 | finally { |
212 | | - if (!isGettingRetried) { |
| 169 | + if (!isGettingRetried.get) { |
213 | 170 | semaphore.release() |
214 | 171 | } |
215 | 172 | } |
@@ -305,6 +262,88 @@ class BulkWriter(container: CosmosAsyncContainer, |
305 | 262 | bulkInputEmitter.emitNext(bulkItemOperation, emitFailureHandler) |
306 | 263 | } |
307 | 264 |
|
| 265 | + //scalastyle:off method.length |
| 266 | + private[this] def handleNonSuccessfulStatusCode |
| 267 | + ( |
| 268 | + context: OperationContext, |
| 269 | + itemOperation: CosmosItemOperation, |
| 270 | + itemResponse: CosmosBulkItemResponse, |
| 271 | + isGettingRetried: AtomicBoolean, |
| 272 | + responseException: Option[CosmosException] |
| 273 | + ) : Unit = { |
| 274 | + |
| 275 | + val exceptionMessage = responseException match { |
| 276 | + case Some(e) => e.getMessage |
| 277 | + case None => "" |
| 278 | + } |
| 279 | + |
| 280 | + log.logDebug(s"encountered item operation response with status code " + |
| 281 | + s"${itemResponse.getStatusCode}:${itemResponse.getSubStatusCode}, " + |
| 282 | + s"Context: ${operationContext.toString} ${getThreadInfo}") |
| 283 | + if (shouldIgnore(itemResponse.getStatusCode, itemResponse.getSubStatusCode, context)) { |
| 284 | + log.logDebug(s"for itemId=[${context.itemId}], partitionKeyValue=[${context.partitionKeyValue}], " + |
| 285 | + s"ignored encountered status code '${itemResponse.getStatusCode}:${itemResponse.getSubStatusCode}', " + |
| 286 | + s"Context: ${operationContext.toString}") |
| 287 | + totalSuccessfulIngestionMetrics.getAndIncrement() |
| 288 | + // work done |
| 289 | + } else if (shouldRetry(itemResponse.getStatusCode, itemResponse.getSubStatusCode, context)) { |
| 290 | + // requeue |
| 291 | + log.logWarning(s"for itemId=[${context.itemId}], partitionKeyValue=[${context.partitionKeyValue}], " + |
| 292 | + s"encountered status code '${itemResponse.getStatusCode}:${itemResponse.getSubStatusCode}', will retry! " + |
| 293 | + s"attemptNumber=${context.attemptNumber}, exceptionMessage=${exceptionMessage}, " + |
| 294 | + s"Context: {${operationContext.toString}} ${getThreadInfo}") |
| 295 | + |
| 296 | + // this is to ensure the submission will happen on a different thread in background |
| 297 | + // and doesn't block the active thread |
| 298 | + val deferredRetryMono = SMono.defer(() => { |
| 299 | + scheduleWriteInternal(itemOperation.getPartitionKeyValue, |
| 300 | + itemOperation.getItem.asInstanceOf[ObjectNode], |
| 301 | + OperationContext(context.itemId, context.partitionKeyValue, context.eTag, context.attemptNumber + 1)) |
| 302 | + SMono.empty |
| 303 | + }) |
| 304 | + |
| 305 | + if (Exceptions.isTimeout(itemResponse.getStatusCode)) { |
| 306 | + deferredRetryMono |
| 307 | + .delaySubscription( |
| 308 | + Duration( |
| 309 | + BulkWriter.minDelayOn408RequestTimeoutInMs + |
| 310 | + scala.util.Random.nextInt( |
| 311 | + BulkWriter.maxDelayOn408RequestTimeoutInMs - BulkWriter.minDelayOn408RequestTimeoutInMs), |
| 312 | + TimeUnit.MILLISECONDS), |
| 313 | + Schedulers.boundedElastic()) |
| 314 | + .subscribeOn(Schedulers.boundedElastic()) |
| 315 | + .subscribe() |
| 316 | + |
| 317 | + } else { |
| 318 | + deferredRetryMono |
| 319 | + .subscribeOn(Schedulers.boundedElastic()) |
| 320 | + .subscribe() |
| 321 | + } |
| 322 | + |
| 323 | + isGettingRetried.set(true) |
| 324 | + } else { |
| 325 | + log.logError(s"for itemId=[${context.itemId}], partitionKeyValue=[${context.partitionKeyValue}], " + |
| 326 | + s"encountered status code '${itemResponse.getStatusCode}:${itemResponse.getSubStatusCode}', all retries exhausted! " + |
| 327 | + s"attemptNumber=${context.attemptNumber}, exceptionMessage=${exceptionMessage}, " + |
| 328 | + s"Context: {${operationContext.toString} ${getThreadInfo}") |
| 329 | + |
| 330 | + val message = s"All retries exhausted for '${itemOperation.getOperationType}' bulk operation - " + |
| 331 | + s"statusCode=[${itemResponse.getStatusCode}:${itemResponse.getSubStatusCode}] " + |
| 332 | + s"itemId=[${context.itemId}], partitionKeyValue=[${context.partitionKeyValue}]" |
| 333 | + |
| 334 | + val exceptionToBeThrown = responseException match { |
| 335 | + case Some(e) => |
| 336 | + new BulkOperationFailedException(itemResponse.getStatusCode, itemResponse.getSubStatusCode, message, e) |
| 337 | + case None => |
| 338 | + new BulkOperationFailedException(itemResponse.getStatusCode, itemResponse.getSubStatusCode, message, null) |
| 339 | + } |
| 340 | + |
| 341 | + captureIfFirstFailure(exceptionToBeThrown) |
| 342 | + cancelWork() |
| 343 | + } |
| 344 | + } |
| 345 | + //scalastyle:on method.length |
| 346 | + |
308 | 347 | private[this] def throwIfCapturedExceptionExists(): Unit = { |
309 | 348 | val errorSnapshot = errorCaptureFirstException.get() |
310 | 349 | if (errorSnapshot != null) { |
@@ -476,26 +515,23 @@ class BulkWriter(container: CosmosAsyncContainer, |
476 | 515 | subscriptionDisposable.dispose() |
477 | 516 | } |
478 | 517 |
|
479 | | - private def shouldIgnore(cosmosException: CosmosException, operationContext: OperationContext): Boolean = { |
| 518 | + private def shouldIgnore(statusCode: Int, subStatusCode: Int, operationContext: OperationContext): Boolean = { |
480 | 519 | val returnValue = writeConfig.itemWriteStrategy match { |
481 | | - case ItemWriteStrategy.ItemAppend => Exceptions.isResourceExistsException(cosmosException) |
482 | | - case ItemWriteStrategy.ItemDelete => Exceptions.isNotFoundExceptionCore(cosmosException) |
483 | | - case ItemWriteStrategy.ItemDeleteIfNotModified => Exceptions.isNotFoundExceptionCore(cosmosException) || |
484 | | - Exceptions.isPreconditionFailedException(cosmosException) |
| 520 | + case ItemWriteStrategy.ItemAppend => Exceptions.isResourceExistsException(statusCode) |
| 521 | + case ItemWriteStrategy.ItemDelete => Exceptions.isNotFoundExceptionCore(statusCode, subStatusCode) |
| 522 | + case ItemWriteStrategy.ItemDeleteIfNotModified => Exceptions.isNotFoundExceptionCore(statusCode, subStatusCode) || |
| 523 | + Exceptions.isPreconditionFailedException(statusCode) |
485 | 524 | case _ => false |
486 | 525 | } |
487 | 526 |
|
488 | | - log.logDebug(s"Should ignore exception '$cosmosException' -> $returnValue, " + |
489 | | - s"Context: ${operationContext.toString} ${getThreadInfo}") |
490 | | - |
491 | 527 | returnValue |
492 | 528 | } |
493 | 529 |
|
494 | | - private def shouldRetry(cosmosException: CosmosException, operationContext: OperationContext): Boolean = { |
| 530 | + private def shouldRetry(statusCode: Int, subStatusCode: Int, operationContext: OperationContext): Boolean = { |
495 | 531 | val returnValue = operationContext.attemptNumber < writeConfig.maxRetryCount && |
496 | | - Exceptions.canBeTransientFailure(cosmosException) |
| 532 | + Exceptions.canBeTransientFailure(statusCode, subStatusCode) |
497 | 533 |
|
498 | | - log.logDebug(s"Should retry exception '$cosmosException' -> $returnValue, " + |
| 534 | + log.logDebug(s"Should retry statusCode '$statusCode:$subStatusCode' -> $returnValue, " + |
499 | 535 | s"Context: ${operationContext.toString} ${getThreadInfo}") |
500 | 536 |
|
501 | 537 | returnValue |
@@ -599,7 +635,13 @@ private object BulkWriter { |
599 | 635 | } |
600 | 636 | s"Thread[Name: ${t.getName}, Group: $group, IsDaemon: ${t.isDaemon} Id: ${t.getId}]" |
601 | 637 | } |
| 638 | + |
| 639 | + private class BulkOperationFailedException(statusCode: Int, subStatusCode: Int, message:String, cause: Throwable) |
| 640 | + extends CosmosException(statusCode, message, null, cause) { |
| 641 | + BridgeInternal.setSubStatusCode(this, subStatusCode) |
| 642 | + } |
602 | 643 | } |
603 | 644 |
|
604 | 645 | //scalastyle:on multiple.string.literals |
605 | 646 | //scalastyle:on null |
| 647 | +//scalastyle:on file.size.limit |
0 commit comments