Skip to content

Commit f058835

Browse files
authored
Merge pull request #17710 from hakman/azure-karpenter
azure: Allow independent VMs to join the cluster
2 parents 3440db0 + 4c300bb commit f058835

File tree

7 files changed

+216
-284
lines changed

7 files changed

+216
-284
lines changed

pkg/nodeidentity/azure/client.go

Lines changed: 52 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -22,81 +22,97 @@ import (
2222
"fmt"
2323
"io"
2424
"net/http"
25+
"strings"
2526

27+
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
2628
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
2729
compute "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute"
28-
"k8s.io/kops/upup/pkg/fi"
2930
)
3031

31-
type instanceComputeMetadata struct {
32-
ResourceGroupName string `json:"resourceGroupName"`
33-
SubscriptionID string `json:"subscriptionId"`
34-
}
35-
36-
type instanceMetadata struct {
37-
Compute *instanceComputeMetadata `json:"compute"`
38-
}
39-
4032
// client is an Azure client.
4133
type client struct {
42-
metadata *instanceMetadata
43-
vmssesClient *compute.VirtualMachineScaleSetsClient
34+
subscriptionID string
35+
vmClient *compute.VirtualMachinesClient
36+
vmssClient *compute.VirtualMachineScaleSetVMsClient
4437
}
4538

4639
// newClient returns a new Client.
4740
func newClient() (*client, error) {
48-
m, err := queryInstanceMetadata()
41+
metadata, err := queryComputeInstanceMetadata()
4942
if err != nil {
5043
return nil, fmt.Errorf("error querying instance metadata: %s", err)
5144
}
52-
if m.Compute.SubscriptionID == "" {
53-
return nil, fmt.Errorf("empty subscription name")
54-
}
55-
if m.Compute.ResourceGroupName == "" {
56-
return nil, fmt.Errorf("empty resource group name")
45+
if metadata.SubscriptionID == "" {
46+
return nil, fmt.Errorf("empty subscription ID")
5747
}
5848

5949
cred, err := azidentity.NewDefaultAzureCredential(nil)
6050
if err != nil {
6151
return nil, fmt.Errorf("creating identity: %w", err)
6252
}
6353

64-
vmssesClient, err := compute.NewVirtualMachineScaleSetsClient(m.Compute.SubscriptionID, cred, nil)
54+
vmClient, err := compute.NewVirtualMachinesClient(metadata.SubscriptionID, cred, nil)
55+
if err != nil {
56+
return nil, fmt.Errorf("creating VMs client: %w", err)
57+
}
58+
59+
vmssClient, err := compute.NewVirtualMachineScaleSetVMsClient(metadata.SubscriptionID, cred, nil)
6560
if err != nil {
66-
return nil, fmt.Errorf("creating VMSS client: %w", err)
61+
return nil, fmt.Errorf("creating VMSS VMs client: %w", err)
6762
}
6863

6964
return &client{
70-
metadata: m,
71-
vmssesClient: vmssesClient,
65+
vmClient: vmClient,
66+
vmssClient: vmssClient,
7267
}, nil
7368
}
7469

75-
// getVMScaleSet returns the specified VM ScaleSet.
76-
func (c *client) getVMScaleSet(ctx context.Context, vmssName string) (*compute.VirtualMachineScaleSet, error) {
77-
opts := &compute.VirtualMachineScaleSetsClientGetOptions{
78-
Expand: fi.PtrTo(compute.ExpandTypesForGetVMScaleSetsUserData),
70+
func (c *client) getVMTags(ctx context.Context, providerID string) (map[string]*string, error) {
71+
if !strings.HasPrefix(providerID, "azure://") {
72+
return nil, fmt.Errorf("unknown providerID : %s", providerID)
7973
}
80-
resp, err := c.vmssesClient.Get(ctx, c.metadata.Compute.ResourceGroupName, vmssName, opts)
74+
75+
res, err := arm.ParseResourceID(strings.TrimPrefix(providerID, "azure://"))
8176
if err != nil {
82-
return nil, fmt.Errorf("getting VMSS: %w", err)
77+
return nil, fmt.Errorf("error parsing providerID: %v", err)
78+
}
79+
80+
switch res.ResourceType.String() {
81+
case "Microsoft.Compute/virtualMachines":
82+
resp, err := c.vmClient.Get(ctx, res.ResourceGroupName, res.Name, nil)
83+
if err != nil {
84+
return nil, fmt.Errorf("getting VM: %w", err)
85+
}
86+
return resp.VirtualMachine.Tags, nil
87+
case "Microsoft.Compute/virtualMachineScaleSets/virtualMachines":
88+
resp, err := c.vmssClient.Get(ctx, res.ResourceGroupName, res.Parent.Name, res.Name, nil)
89+
if err != nil {
90+
return nil, fmt.Errorf("getting VMSS VM: %w", err)
91+
}
92+
return resp.VirtualMachineScaleSetVM.Tags, nil
93+
default:
94+
return nil, fmt.Errorf("unsupported resource type %q for %q", res.ResourceType, providerID)
8395
}
84-
return &resp.VirtualMachineScaleSet, nil
8596
}
8697

87-
// queryInstanceMetadata queries Azure Instance Metadata documented in
88-
// https://docs.microsoft.com/en-us/azure/virtual-machines/windows/instance-metadata-service.
89-
func queryInstanceMetadata() (*instanceMetadata, error) {
98+
type instanceMetadata struct {
99+
SubscriptionID string `json:"subscriptionId"`
100+
ResourceGroupName string `json:"resourceGroupName"`
101+
}
102+
103+
// queryComputeInstanceMetadata queries Azure Instance Metadata.
104+
// https://docs.microsoft.com/en-us/azure/virtual-machines/windows/instance-metadata-service
105+
func queryComputeInstanceMetadata() (*instanceMetadata, error) {
90106
client := &http.Client{}
91-
req, err := http.NewRequest("GET", "http://169.254.169.254/metadata/instance", nil)
107+
req, err := http.NewRequest("GET", "http://169.254.169.254/metadata/instance/compute", nil)
92108
if err != nil {
93109
return nil, fmt.Errorf("error creating a new request: %s", err)
94110
}
95111
req.Header.Add("Metadata", "True")
96112

97113
q := req.URL.Query()
114+
q.Add("api-version", "2025-04-07")
98115
q.Add("format", "json")
99-
q.Add("api-version", "2020-06-01")
100116
req.URL.RawQuery = q.Encode()
101117

102118
resp, err := client.Do(req)
@@ -109,17 +125,9 @@ func queryInstanceMetadata() (*instanceMetadata, error) {
109125
if err != nil {
110126
return nil, fmt.Errorf("error reading a response from the metadata server: %s", err)
111127
}
112-
metadata, err := unmarshalInstanceMetadata(body)
113-
if err != nil {
128+
metadata := &instanceMetadata{}
129+
if err := json.Unmarshal(body, metadata); err != nil {
114130
return nil, fmt.Errorf("error unmarshalling metadata: %s", err)
115131
}
116132
return metadata, nil
117133
}
118-
119-
func unmarshalInstanceMetadata(data []byte) (*instanceMetadata, error) {
120-
m := &instanceMetadata{}
121-
if err := json.Unmarshal(data, m); err != nil {
122-
return nil, err
123-
}
124-
return m, nil
125-
}

pkg/nodeidentity/azure/client_test.go

Lines changed: 0 additions & 43 deletions
This file was deleted.

pkg/nodeidentity/azure/identify.go

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import (
2222
"strings"
2323
"time"
2424

25-
compute "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute"
25+
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
2626
corev1 "k8s.io/api/core/v1"
2727
expirationcache "k8s.io/client-go/tools/cache"
2828
"k8s.io/klog/v2"
@@ -42,16 +42,9 @@ const (
4242
cacheTTL = 60 * time.Minute
4343
)
4444

45-
type vmssGetter interface {
46-
getVMScaleSet(ctx context.Context, vmssName string) (*compute.VirtualMachineScaleSet, error)
47-
}
48-
49-
var _ vmssGetter = &client{}
50-
5145
// nodeIdentifier identifies a node from Azure VM.
5246
type nodeIdentifier struct {
53-
vmssGetter vmssGetter
54-
47+
azureClient *client
5548
// cache is a cache of nodeidentity.Info
5649
cache expirationcache.Store
5750
// cacheEnabled indicates if caching should be used
@@ -68,7 +61,7 @@ func New(cacheNodeidentityInfo bool) (nodeidentity.Identifier, error) {
6861
}
6962

7063
return &nodeIdentifier{
71-
vmssGetter: client,
64+
azureClient: client,
7265
cache: expirationcache.NewTTLStore(stringKeyFunc, cacheTTL),
7366
cacheEnabled: cacheNodeidentityInfo,
7467
}, nil
@@ -78,17 +71,20 @@ func New(cacheNodeidentityInfo bool) (nodeidentity.Identifier, error) {
7871
func (i *nodeIdentifier) IdentifyNode(ctx context.Context, node *corev1.Node) (*nodeidentity.Info, error) {
7972
providerID := node.Spec.ProviderID
8073
if providerID == "" {
81-
return nil, fmt.Errorf("providerID was not set for node %s", node.Name)
74+
return nil, fmt.Errorf("providerID not set for node %q", node.Name)
75+
}
76+
if !strings.HasPrefix(providerID, "azure://") {
77+
return nil, fmt.Errorf("providerID %q not recognized for node %q", providerID, node.Name)
8278
}
83-
vmssName, err := getVMSSNameFromProviderID(providerID)
79+
80+
vmName, err := getVMNameFromProviderID(providerID)
8481
if err != nil {
85-
return nil, fmt.Errorf("error on extracting VM ScaleSet name: %s", err)
82+
return nil, err
8683
}
8784

88-
// If caching is enabled try pulling nodeidentity.Info from cache before
89-
// doing a EC2 API call.
85+
// If caching is enabled, try pulling nodeidentity.Info from the cache before doing an API call.
9086
if i.cacheEnabled {
91-
obj, exists, err := i.cache.GetByKey(vmssName)
87+
obj, exists, err := i.cache.GetByKey(vmName)
9288
if err != nil {
9389
klog.Warningf("Nodeidentity info cache lookup failure: %v", err)
9490
}
@@ -97,13 +93,13 @@ func (i *nodeIdentifier) IdentifyNode(ctx context.Context, node *corev1.Node) (*
9793
}
9894
}
9995

100-
vmss, err := i.vmssGetter.getVMScaleSet(ctx, vmssName)
96+
tags, err := i.azureClient.getVMTags(ctx, providerID)
10197
if err != nil {
10298
return nil, fmt.Errorf("error on getting VM ScaleSet: %s", err)
10399
}
104100

105101
labels := map[string]string{}
106-
for k, v := range vmss.Tags {
102+
for k, v := range tags {
107103
if k == azure.TagClusterName && v != nil {
108104
labels[kops.LabelClusterName] = *v
109105
}
@@ -120,25 +116,25 @@ func (i *nodeIdentifier) IdentifyNode(ctx context.Context, node *corev1.Node) (*
120116
case kops.InstanceGroupRoleNode.ToLowerString():
121117
labels[nodelabels.RoleLabelNode16] = ""
122118
default:
123-
klog.Warningf("Unknown or unsupported node role tag %q for VMSS %q", k, vmssName)
119+
klog.Warningf("Unknown or unsupported node role tag %q for VM %q", k, vmName)
124120
}
125121
}
126122
if strings.HasPrefix(k, ClusterNodeTemplateLabel) && v != nil {
127123
l := strings.SplitN(*v, "=", 2)
128124
if len(l) <= 1 {
129-
klog.Warningf("Malformed cloud label tag %q=%q for VMSS %q", k, *v, vmssName)
125+
klog.Warningf("Malformed cloud label tag %q=%q for VM %q", k, *v, vmName)
130126
} else {
131127
labels[l[0]] = l[1]
132128
}
133129
}
134130
}
135131

136132
info := &nodeidentity.Info{
137-
InstanceID: vmssName,
133+
InstanceID: vmName,
138134
Labels: labels,
139135
}
140136

141-
// If caching is enabled add the nodeidentity.Info to cache.
137+
// If caching is enabled, add the nodeidentity.Info to the cache.
142138
if i.cacheEnabled {
143139
err = i.cache.Add(info)
144140
if err != nil {
@@ -155,14 +151,22 @@ func stringKeyFunc(obj interface{}) (string, error) {
155151
return key, nil
156152
}
157153

158-
func getVMSSNameFromProviderID(providerID string) (string, error) {
154+
func getVMNameFromProviderID(providerID string) (string, error) {
159155
if !strings.HasPrefix(providerID, "azure://") {
160156
return "", fmt.Errorf("providerID %q not recognized", providerID)
161157
}
162158

163-
l := strings.Split(strings.TrimPrefix(providerID, "azure://"), "/")
164-
if len(l) != 11 {
165-
return "", fmt.Errorf("unexpected format of providerID %q", providerID)
159+
res, err := arm.ParseResourceID(strings.TrimPrefix(providerID, "azure://"))
160+
if err != nil {
161+
return "", fmt.Errorf("error parsing providerID: %v", err)
162+
}
163+
164+
switch res.ResourceType.String() {
165+
case "Microsoft.Compute/virtualMachines":
166+
return res.Name, nil
167+
case "Microsoft.Compute/virtualMachineScaleSets/virtualMachines":
168+
return res.Parent.Name + "_" + res.Name, nil
169+
default:
170+
return "", fmt.Errorf("unsupported resource type %q for providerID %q", res.ResourceType, providerID)
166171
}
167-
return l[len(l)-3], nil
168172
}

0 commit comments

Comments
 (0)