@@ -32,10 +32,11 @@ import org.apache.celeborn.common.util.{JavaUtils, ThreadUtils, Utils}
32
32
import org .apache .celeborn .server .common .service .config .ConfigService
33
33
import org .apache .celeborn .service .deploy .master .MasterSource
34
34
import org .apache .celeborn .service .deploy .master .MasterSource .UPDATE_RESOURCE_CONSUMPTION_TIME
35
+ import org .apache .celeborn .service .deploy .master .clustermeta .AbstractMetaManager
35
36
import org .apache .celeborn .service .deploy .master .quota .QuotaStatus ._
36
37
37
38
class QuotaManager (
38
- workerToResourceConsumptions : JMap [ String , JMap [ UserIdentifier , ResourceConsumption ]] ,
39
+ statusSystem : AbstractMetaManager ,
39
40
masterSource : MasterSource ,
40
41
resourceConsumptionSource : ResourceConsumptionSource ,
41
42
celebornConf : CelebornConf ,
@@ -189,77 +190,79 @@ class QuotaManager(
189
190
masterSource.sample(UPDATE_RESOURCE_CONSUMPTION_TIME , this .getClass.getSimpleName, Map .empty) {
190
191
val clusterQuota = getClusterStorageQuota
191
192
var clusterResourceConsumption = ResourceConsumption (0 , 0 , 0 , 0 )
193
+
192
194
val tenantResourceConsumption =
193
- workerToResourceConsumptions.asScala.flatMap(_._2.asScala).groupBy(_._1.tenantId).map {
194
- case (tenantId, tenantConsumptionList) =>
195
- var tenantResourceConsumption = ResourceConsumption (0 , 0 , 0 , 0 )
196
- val userResourceConsumption =
197
- tenantConsumptionList.groupBy(_._1).map {
198
- case (userIdentifier, userConsumptionList) =>
199
- // Step 1: Compute user consumption and set quota status.
200
- val resourceConsumptionList = userConsumptionList.values.toSeq
201
- val resourceConsumption = computeUserResourceConsumption(resourceConsumptionList)
195
+ statusSystem.availableWorkers.asScala.flatMap { workerInfo =>
196
+ workerInfo.userResourceConsumption.asScala
197
+ }.groupBy(_._1.tenantId).toSeq.map { case (tenantId, tenantConsumptionList) =>
198
+ var tenantResourceConsumption = ResourceConsumption (0 , 0 , 0 , 0 )
199
+ val userResourceConsumption =
200
+ tenantConsumptionList.groupBy(_._1).map {
201
+ case (userIdentifier, userConsumptionList) =>
202
+ // Step 1: Compute user consumption and set quota status.
203
+ val resourceConsumptionList = userConsumptionList.map(_._2).toSeq
204
+ val resourceConsumption = computeUserResourceConsumption(resourceConsumptionList)
202
205
203
- // Step 2: Update user resource consumption metrics.
204
- // For extract metrics
205
- userResourceConsumptionMap.put(userIdentifier, resourceConsumption)
206
- registerUserResourceConsumptionMetrics(userIdentifier)
206
+ // Step 2: Update user resource consumption metrics.
207
+ // For extract metrics
208
+ userResourceConsumptionMap.put(userIdentifier, resourceConsumption)
209
+ registerUserResourceConsumptionMetrics(userIdentifier)
207
210
208
- // Step 3: Expire user level exceeded app except already expired app
209
- clusterResourceConsumption = clusterResourceConsumption.add(resourceConsumption)
210
- tenantResourceConsumption = tenantResourceConsumption.add(resourceConsumption)
211
- val quotaStatus = checkUserQuotaSpace(userIdentifier, resourceConsumption)
212
- userQuotaStatus.put(userIdentifier, quotaStatus)
213
- if (interruptShuffleEnabled && quotaStatus.exceed) {
214
- val subResourceConsumptions = computeSubConsumption(resourceConsumptionList)
215
- // Compute expired size
216
- val (expired, notExpired) = subResourceConsumptions.partition { case (app, _) =>
217
- appQuotaStatus.containsKey(app)
218
- }
219
- val userConsumptions =
220
- expired.values.foldLeft(resourceConsumption)(_.subtract(_))
221
- expireApplication(
222
- userConsumptions,
223
- getUserStorageQuota(userIdentifier),
224
- notExpired.toSeq,
225
- USER_EXHAUSTED )
226
- (Option (subResourceConsumptions), resourceConsumptionList)
227
- } else {
228
- (None , resourceConsumptionList)
211
+ // Step 3: Expire user level exceeded app except already expired app
212
+ clusterResourceConsumption = clusterResourceConsumption.add(resourceConsumption)
213
+ tenantResourceConsumption = tenantResourceConsumption.add(resourceConsumption)
214
+ val quotaStatus = checkUserQuotaSpace(userIdentifier, resourceConsumption)
215
+ userQuotaStatus.put(userIdentifier, quotaStatus)
216
+ if (interruptShuffleEnabled && quotaStatus.exceed) {
217
+ val subResourceConsumptions = computeSubConsumption(resourceConsumptionList)
218
+ // Compute expired size
219
+ val (expired, notExpired) = subResourceConsumptions.partition { case (app, _) =>
220
+ appQuotaStatus.containsKey(app)
229
221
}
230
- }
222
+ val userConsumptions =
223
+ expired.values.foldLeft(resourceConsumption)(_.subtract(_))
224
+ expireApplication(
225
+ userConsumptions,
226
+ getUserStorageQuota(userIdentifier),
227
+ notExpired.toSeq,
228
+ USER_EXHAUSTED )
229
+ (Option (subResourceConsumptions), resourceConsumptionList)
230
+ } else {
231
+ (None , resourceConsumptionList)
232
+ }
233
+ }
231
234
232
- val quotaStatus = checkTenantQuotaSpace(tenantId, tenantResourceConsumption)
233
- tenantQuotaStatus.put(tenantId, quotaStatus)
234
- // Expire tenant level exceeded app except already expired app
235
- if (interruptShuffleEnabled && quotaStatus.exceed) {
236
- val appConsumptions = userResourceConsumption.map {
237
- case (None , subConsumptionList) => computeSubConsumption(subConsumptionList)
238
- case (Some (subConsumptions), _) => subConsumptions
239
- }.flatMap(_.toSeq).toSeq
235
+ val quotaStatus = checkTenantQuotaSpace(tenantId, tenantResourceConsumption)
236
+ tenantQuotaStatus.put(tenantId, quotaStatus)
237
+ // Expire tenant level exceeded app except already expired app
238
+ if (interruptShuffleEnabled && quotaStatus.exceed) {
239
+ val appConsumptions = userResourceConsumption.map {
240
+ case (None , subConsumptionList) => computeSubConsumption(subConsumptionList)
241
+ case (Some (subConsumptions), _) => subConsumptions
242
+ }.flatMap(_.toSeq).toSeq
240
243
241
- // Compute nonExpired app total usage
242
- val (expired, notExpired) = appConsumptions.partition { case (app, _) =>
243
- appQuotaStatus.containsKey(app)
244
- }
245
- tenantResourceConsumption =
246
- expired.map(_._2).foldLeft(tenantResourceConsumption)(_.subtract(_))
247
- expireApplication(
248
- tenantResourceConsumption,
249
- getTenantStorageQuota(tenantId),
250
- notExpired,
251
- TENANT_EXHAUSTED )
252
- (Option (appConsumptions), tenantConsumptionList.values)
253
- } else {
254
- (None , tenantConsumptionList.values)
244
+ // Compute nonExpired app total usage
245
+ val (expired, notExpired) = appConsumptions.partition { case (app, _) =>
246
+ appQuotaStatus.containsKey(app)
255
247
}
248
+ tenantResourceConsumption =
249
+ expired.map(_._2).foldLeft(tenantResourceConsumption)(_.subtract(_))
250
+ expireApplication(
251
+ tenantResourceConsumption,
252
+ getTenantStorageQuota(tenantId),
253
+ notExpired,
254
+ TENANT_EXHAUSTED )
255
+ (Option (appConsumptions), tenantConsumptionList.map(_._2).toSeq)
256
+ } else {
257
+ (None , tenantConsumptionList.map(_._2).toSeq)
258
+ }
256
259
}
257
260
258
261
// Expire cluster level exceeded app except already expired app
259
262
clusterQuotaStatus = checkClusterQuotaSpace(clusterResourceConsumption)
260
263
if (interruptShuffleEnabled && clusterQuotaStatus.exceed) {
261
264
val appConsumptions = tenantResourceConsumption.map {
262
- case (None , subConsumptionList) => computeSubConsumption(subConsumptionList.toSeq )
265
+ case (None , subConsumptionList) => computeSubConsumption(subConsumptionList)
263
266
case (Some (subConsumptions), _) => subConsumptions
264
267
}.flatMap(_.toSeq).toSeq
265
268
0 commit comments