Skip to content

Commit

Permalink
optimization: 灰度回滚逻辑优化 (closed TencentBlueKing#1893)
Browse files Browse the repository at this point in the history
  • Loading branch information
wyyalt committed Oct 30, 2023
1 parent e8adce0 commit 6e87ccc
Showing 1 changed file with 67 additions and 17 deletions.
84 changes: 67 additions & 17 deletions apps/core/gray/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,26 @@ def update_host_ap_by_host_ids(

# 切换接入点
update_kwargs: typing.Dict[str, typing.Any] = {"updated_at": timezone.now()}
partial_host_ids: typing.List[int] = [host_node["bk_host_id"] for host_node in partial_host_nodes]
gse_v1_ap_id_is_none_host_count: int = 0
# 若需要更新gse_v1_ap_id使用F方式,顺序不可以变
if rollback:
# 先更新gse_v1_ap_id 为None的主机更新成映射对应的v1_ap_id(需要保证映射为一对一关系)
gse_v1_ap_id_is_none_host_ids: typing.List[int] = list(
node_man_models.Host.objects.filter(
bk_biz_id__in=bk_biz_ids,
bk_host_id__in=partial_host_ids,
gse_v1_ap_id=None,
).values_list("bk_host_id", flat=True)
)

gse_v1_ap_id_is_none_host_count: int = node_man_models.Host.objects.filter(
bk_host_id__in=gse_v1_ap_id_is_none_host_ids
).update(ap_id=v1_ap_id)

need_update_host_id: typing.List[int] = list(set(partial_host_ids) - set(gse_v1_ap_id_is_none_host_ids))

# 更新gse_v1_ap_id不为None的主机
update_kwargs.update(
ap_id=F("gse_v1_ap_id"),
gse_v1_ap_id=None,
Expand All @@ -173,13 +191,43 @@ def update_host_ap_by_host_ids(
gse_v1_ap_id=F("ap_id"),
ap_id=v2_ap_id,
)
need_update_host_id: typing.List[int] = partial_host_ids

update_count: int = node_man_models.Host.objects.filter(
bk_biz_id__in=bk_biz_ids, bk_host_id__in=[host_node["bk_host_id"] for host_node in partial_host_nodes]
bk_biz_id__in=bk_biz_ids, bk_host_id__in=need_update_host_id
).update(**update_kwargs)

update_count: int = update_count + gse_v1_ap_id_is_none_host_count
if all(
[
update_count,
not is_biz_gray,
rollback,
]
):
# 如果按业务回滚在上层已进行了回滚,此处不做处理
# 将与回滚主机关联的业务和管控区域回滚
rollback_info: typing.List[typing.Dict[str, int]] = list(
node_man_models.Host.objects.filter(bk_host_id__in=partial_host_ids)
.values("bk_biz_id", "bk_cloud_id")
.distinct()
.order_by("bk_biz_id")
)

bk_biz_ids: typing.Set[int] = set()
bk_cloud_ids: typing.Set[int] = set()
for info in rollback_info:
bk_biz_ids.add(info["bk_biz_id"])
bk_cloud_ids.add(info["bk_cloud_id"])

cls.update_cloud_ap_id(
validated_data={"bk_biz_ids": bk_biz_ids}, clouds=list(bk_cloud_ids), rollback=True
)
cls.update_gray_scope_list(validated_data={"bk_biz_ids": bk_biz_ids}, rollback=True)

logger.info(
f"[update_host_ap_by_host_ids][rollback={rollback}] Update count -> {update_count}, "
f"[update_host_ap_by_host_ids][rollback={rollback}] "
f"Update count -> {update_count}, "
f"expect count -> {len(partial_host_nodes)}"
)

Expand Down Expand Up @@ -222,35 +270,37 @@ def update_gray_scope_list(cls, validated_data: typing.Dict[str, typing.List[typ
logger.info("[update_gray_scope_list][rollback={rollback}] flush cache")

@classmethod
def update_cloud_ap_id(cls, validated_data: typing.Dict[str, typing.List[typing.Any]], rollback: bool = False):
def update_cloud_ap_id(
cls,
validated_data: typing.Dict[str, typing.List[typing.Any]],
clouds: typing.List[int] = [],
rollback: bool = False,
):
gray_ap_map: typing.Dict[int, int] = cls.get_gray_ap_map()
gray_scope_list: typing.List[int] = GrayTools.get_or_create_gse2_gray_scope_list(get_cache=False)

clouds = (
node_man_models.Host.objects.filter(bk_biz_id__in=validated_data["bk_biz_ids"])
.values("bk_cloud_id")
.distinct()
.order_by("bk_cloud_id")
clouds: typing.List[int] = clouds or list(
set(
node_man_models.Host.objects.filter(bk_biz_id__in=validated_data["bk_biz_ids"]).values_list(
"bk_cloud_id", flat=True
)
)
)

ap_id_obj_map: typing.Dict[int, node_man_models.AccessPoint] = node_man_models.AccessPoint.ap_id_obj_map()

for cloud in clouds:
for cloud_id in clouds:
cloud_obj: typing.Optional[node_man_models.Cloud] = node_man_models.Cloud.objects.filter(
bk_cloud_id=cloud["bk_cloud_id"]
bk_cloud_id=cloud_id
).first()

# 跳过管控区域不存在的情况
if not cloud_obj:
continue

cloud_bizs = (
node_man_models.Host.objects.filter(bk_cloud_id=cloud["bk_cloud_id"])
.values("bk_biz_id")
.distinct()
.order_by("bk_biz_id")
cloud_bk_biz_ids: typing.List[int] = list(
set(node_man_models.Host.objects.filter(bk_cloud_id=cloud_id).values_list("bk_biz_id", flat=True))
)
cloud_bk_biz_ids: typing.List[int] = [cloud_biz["bk_biz_id"] for cloud_biz in cloud_bizs]

if ap_id_obj_map[cloud_obj.ap_id].gse_version == GseVersion.V2.value and rollback:
# 当管控区域覆盖的业务(cloud_bk_biz_ids)完全包含于灰度业务集(gray_scope_list)时,需要操作回滚
Expand Down Expand Up @@ -314,7 +364,7 @@ def rollback(cls, validated_data: typing.Dict[str, typing.List[typing.Any]]):
# 更新管控区域接入点
cls.update_cloud_ap_id(validated_data, rollback=True)

# 更新灰度业务范围
# 更新灰度业务范围, 无论是按业务还是ip回滚都去掉业务灰度标记
cls.update_gray_scope_list(validated_data, rollback=True)

# 更新主机ap
Expand Down

0 comments on commit 6e87ccc

Please sign in to comment.