Skip to content

Commit 1783144

Browse files
feat(nodes): nodes_top retrieves Node resource consumption (metrics API) (#420)
* Support for nodes_top similar to pods_top Signed-off-by: Neeraj Krishna Gopalakrishna <[email protected]> * review(nodes): nodes_top retrieves Node resource consumption (metrics API) Signed-off-by: Marc Nuri <[email protected]> --------- Signed-off-by: Neeraj Krishna Gopalakrishna <[email protected]> Signed-off-by: Marc Nuri <[email protected]> Co-authored-by: Marc Nuri <[email protected]>
1 parent 6f3a2e0 commit 1783144

10 files changed

+519
-0
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,10 @@ In case multi-cluster support is enabled (default) and you have access to multip
252252
- **nodes_stats_summary** - Get detailed resource usage statistics from a Kubernetes node via the kubelet's Summary API. Provides comprehensive metrics including CPU, memory, filesystem, and network usage at the node, pod, and container levels. On systems with cgroup v2 and kernel 4.20+, also includes PSI (Pressure Stall Information) metrics that show resource pressure for CPU, memory, and I/O. See https://kubernetes.io/docs/reference/instrumentation/understand-psi-metrics/ for details on PSI metrics
253253
- `name` (`string`) **(required)** - Name of the node to get stats from
254254

255+
- **nodes_top** - List the resource consumption (CPU and memory) as recorded by the Kubernetes Metrics Server for the specified Kubernetes Nodes or all nodes in the cluster
256+
- `label_selector` (`string`) - Kubernetes label selector (e.g. 'node-role.kubernetes.io/worker=') to filter nodes by label (Optional, only applicable when name is not provided)
257+
- `name` (`string`) - Name of the Node to get the resource consumption from (Optional, all Nodes if not provided)
258+
255259
- **pods_list** - List all the Kubernetes pods in the current cluster from all namespaces
256260
- `labelSelector` (`string`) - Optional Kubernetes label selector (e.g. 'app=myapp,env=prod' or 'app in (myapp,yourapp)'), use this option when you want to filter the pods by label
257261

pkg/kubernetes/accesscontrol_clientset.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ func (a *AccessControlClientset) DiscoveryClient() discovery.DiscoveryInterface
3939
return a.discoveryClient
4040
}
4141

42+
func (a *AccessControlClientset) Nodes() (corev1.NodeInterface, error) {
43+
gvk := &schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Node"}
44+
if !isAllowed(a.staticConfig, gvk) {
45+
return nil, isNotAllowedError(gvk)
46+
}
47+
return a.delegate.CoreV1().Nodes(), nil
48+
}
49+
4250
func (a *AccessControlClientset) NodesLogs(ctx context.Context, name string) (*rest.Request, error) {
4351
gvk := &schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Node"}
4452
if !isAllowed(a.staticConfig, gvk) {
@@ -55,6 +63,29 @@ func (a *AccessControlClientset) NodesLogs(ctx context.Context, name string) (*r
5563
AbsPath(url...), nil
5664
}
5765

66+
func (a *AccessControlClientset) NodesMetricses(ctx context.Context, name string, listOptions metav1.ListOptions) (*metrics.NodeMetricsList, error) {
67+
gvk := &schema.GroupVersionKind{Group: metrics.GroupName, Version: metricsv1beta1api.SchemeGroupVersion.Version, Kind: "NodeMetrics"}
68+
if !isAllowed(a.staticConfig, gvk) {
69+
return nil, isNotAllowedError(gvk)
70+
}
71+
versionedMetrics := &metricsv1beta1api.NodeMetricsList{}
72+
var err error
73+
if name != "" {
74+
m, err := a.metricsV1beta1.NodeMetricses().Get(ctx, name, metav1.GetOptions{})
75+
if err != nil {
76+
return nil, fmt.Errorf("failed to get metrics for node %s: %w", name, err)
77+
}
78+
versionedMetrics.Items = []metricsv1beta1api.NodeMetrics{*m}
79+
} else {
80+
versionedMetrics, err = a.metricsV1beta1.NodeMetricses().List(ctx, listOptions)
81+
if err != nil {
82+
return nil, fmt.Errorf("failed to list node metrics: %w", err)
83+
}
84+
}
85+
convertedMetrics := &metrics.NodeMetricsList{}
86+
return convertedMetrics, metricsv1beta1api.Convert_v1beta1_NodeMetricsList_To_metrics_NodeMetricsList(versionedMetrics, convertedMetrics, nil)
87+
}
88+
5889
func (a *AccessControlClientset) NodesStatsSummary(ctx context.Context, name string) (*rest.Request, error) {
5990
gvk := &schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Node"}
6091
if !isAllowed(a.staticConfig, gvk) {

pkg/kubernetes/nodes.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@ package kubernetes
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
7+
8+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
"k8s.io/metrics/pkg/apis/metrics"
10+
metricsv1beta1api "k8s.io/metrics/pkg/apis/metrics/v1beta1"
611
)
712

813
func (k *Kubernetes) NodesLog(ctx context.Context, name string, query string, tailLines int64) (string, error) {
@@ -59,3 +64,16 @@ func (k *Kubernetes) NodesStatsSummary(ctx context.Context, name string) (string
5964

6065
return string(rawData), nil
6166
}
67+
68+
type NodesTopOptions struct {
69+
metav1.ListOptions
70+
Name string
71+
}
72+
73+
func (k *Kubernetes) NodesTop(ctx context.Context, options NodesTopOptions) (*metrics.NodeMetricsList, error) {
74+
// TODO, maybe move to mcp Tools setup and omit in case metrics aren't available in the target cluster
75+
if !k.supportsGroupVersion(metrics.GroupName + "/" + metricsv1beta1api.SchemeGroupVersion.Version) {
76+
return nil, errors.New("metrics API is not available")
77+
}
78+
return k.manager.accessControlClientSet.NodesMetricses(ctx, options.Name, options.ListOptions)
79+
}

pkg/mcp/nodes_top_test.go

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
package mcp
2+
3+
import (
4+
"net/http"
5+
"testing"
6+
7+
"github.com/BurntSushi/toml"
8+
"github.com/containers/kubernetes-mcp-server/internal/test"
9+
"github.com/mark3labs/mcp-go/mcp"
10+
"github.com/stretchr/testify/suite"
11+
)
12+
13+
type NodesTopSuite struct {
14+
BaseMcpSuite
15+
mockServer *test.MockServer
16+
}
17+
18+
func (s *NodesTopSuite) SetupTest() {
19+
s.BaseMcpSuite.SetupTest()
20+
s.mockServer = test.NewMockServer()
21+
s.Cfg.KubeConfig = s.mockServer.KubeconfigFile(s.T())
22+
s.mockServer.Handle(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
23+
w.Header().Set("Content-Type", "application/json")
24+
// Request Performed by DiscoveryClient to Kube API (Get API Groups legacy -core-)
25+
if req.URL.Path == "/api" {
26+
_, _ = w.Write([]byte(`{"kind":"APIVersions","versions":[],"serverAddressByClientCIDRs":[{"clientCIDR":"0.0.0.0/0"}]}`))
27+
return
28+
}
29+
}))
30+
}
31+
32+
func (s *NodesTopSuite) TearDownTest() {
33+
s.BaseMcpSuite.TearDownTest()
34+
if s.mockServer != nil {
35+
s.mockServer.Close()
36+
}
37+
}
38+
39+
func (s *NodesTopSuite) WithMetricsServer() {
40+
s.mockServer.Handle(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
41+
// Request Performed by DiscoveryClient to Kube API (Get API Groups)
42+
if req.URL.Path == "/apis" {
43+
_, _ = w.Write([]byte(`{"kind":"APIGroupList","apiVersion":"v1","groups":[{"name":"metrics.k8s.io","versions":[{"groupVersion":"metrics.k8s.io/v1beta1","version":"v1beta1"}],"preferredVersion":{"groupVersion":"metrics.k8s.io/v1beta1","version":"v1beta1"}}]}`))
44+
return
45+
}
46+
// Request Performed by DiscoveryClient to Kube API (Get API Resources)
47+
if req.URL.Path == "/apis/metrics.k8s.io/v1beta1" {
48+
_, _ = w.Write([]byte(`{"kind":"APIResourceList","apiVersion":"v1","groupVersion":"metrics.k8s.io/v1beta1","resources":[{"name":"nodes","singularName":"","namespaced":false,"kind":"NodeMetrics","verbs":["get","list"]}]}`))
49+
return
50+
}
51+
}))
52+
}
53+
54+
func (s *NodesTopSuite) TestNodesTop() {
55+
s.WithMetricsServer()
56+
s.mockServer.Handle(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
57+
// List Nodes
58+
if req.URL.Path == "/api/v1/nodes" {
59+
_, _ = w.Write([]byte(`{
60+
"apiVersion": "v1",
61+
"kind": "NodeList",
62+
"items": [
63+
{
64+
"metadata": {
65+
"name": "node-1",
66+
"labels": {
67+
"node-role.kubernetes.io/worker": ""
68+
}
69+
},
70+
"status": {
71+
"allocatable": {
72+
"cpu": "4",
73+
"memory": "16Gi"
74+
},
75+
"nodeInfo": {
76+
"swap": {
77+
"capacity": 0
78+
}
79+
}
80+
}
81+
},
82+
{
83+
"metadata": {
84+
"name": "node-2",
85+
"labels": {
86+
"node-role.kubernetes.io/worker": ""
87+
}
88+
},
89+
"status": {
90+
"allocatable": {
91+
"cpu": "4",
92+
"memory": "16Gi"
93+
},
94+
"nodeInfo": {
95+
"swap": {
96+
"capacity": 0
97+
}
98+
}
99+
}
100+
}
101+
]
102+
}`))
103+
return
104+
}
105+
// Get NodeMetrics
106+
if req.URL.Path == "/apis/metrics.k8s.io/v1beta1/nodes" {
107+
_, _ = w.Write([]byte(`{
108+
"apiVersion": "metrics.k8s.io/v1beta1",
109+
"kind": "NodeMetricsList",
110+
"items": [
111+
{
112+
"metadata": {
113+
"name": "node-1"
114+
},
115+
"timestamp": "2025-10-29T09:00:00Z",
116+
"window": "30s",
117+
"usage": {
118+
"cpu": "500m",
119+
"memory": "2Gi"
120+
}
121+
},
122+
{
123+
"metadata": {
124+
"name": "node-2"
125+
},
126+
"timestamp": "2025-10-29T09:00:00Z",
127+
"window": "30s",
128+
"usage": {
129+
"cpu": "1000m",
130+
"memory": "4Gi"
131+
}
132+
}
133+
]
134+
}`))
135+
return
136+
}
137+
// Get specific NodeMetrics
138+
if req.URL.Path == "/apis/metrics.k8s.io/v1beta1/nodes/node-1" {
139+
_, _ = w.Write([]byte(`{
140+
"apiVersion": "metrics.k8s.io/v1beta1",
141+
"kind": "NodeMetrics",
142+
"metadata": {
143+
"name": "node-1"
144+
},
145+
"timestamp": "2025-10-29T09:00:00Z",
146+
"window": "30s",
147+
"usage": {
148+
"cpu": "500m",
149+
"memory": "2Gi"
150+
}
151+
}`))
152+
return
153+
}
154+
w.WriteHeader(http.StatusNotFound)
155+
}))
156+
s.InitMcpClient()
157+
158+
s.Run("nodes_top() - all nodes", func() {
159+
toolResult, err := s.CallTool("nodes_top", map[string]interface{}{})
160+
s.Require().NotNil(toolResult, "toolResult should not be nil")
161+
s.Run("no error", func() {
162+
s.Falsef(toolResult.IsError, "call tool should succeed")
163+
s.Nilf(err, "call tool should not return error object")
164+
})
165+
s.Run("returns metrics for all nodes", func() {
166+
content := toolResult.Content[0].(mcp.TextContent).Text
167+
s.Contains(content, "node-1", "expected metrics to contain node-1")
168+
s.Contains(content, "node-2", "expected metrics to contain node-2")
169+
s.Contains(content, "CPU(cores)", "expected header with CPU column")
170+
s.Contains(content, "MEMORY(bytes)", "expected header with MEMORY column")
171+
})
172+
})
173+
174+
s.Run("nodes_top(name=node-1) - specific node", func() {
175+
toolResult, err := s.CallTool("nodes_top", map[string]interface{}{
176+
"name": "node-1",
177+
})
178+
s.Require().NotNil(toolResult, "toolResult should not be nil")
179+
s.Run("no error", func() {
180+
s.Falsef(toolResult.IsError, "call tool should succeed")
181+
s.Nilf(err, "call tool should not return error object")
182+
})
183+
s.Run("returns metrics for specific node", func() {
184+
content := toolResult.Content[0].(mcp.TextContent).Text
185+
s.Contains(content, "node-1", "expected metrics to contain node-1")
186+
s.Contains(content, "500m", "expected CPU usage of 500m")
187+
s.Contains(content, "2048Mi", "expected memory usage of 2048Mi")
188+
})
189+
})
190+
191+
s.Run("nodes_top(label_selector=node-role.kubernetes.io/worker=)", func() {
192+
toolResult, err := s.CallTool("nodes_top", map[string]interface{}{
193+
"label_selector": "node-role.kubernetes.io/worker=",
194+
})
195+
s.Require().NotNil(toolResult, "toolResult should not be nil")
196+
s.Run("no error", func() {
197+
s.Falsef(toolResult.IsError, "call tool should succeed")
198+
s.Nilf(err, "call tool should not return error object")
199+
})
200+
s.Run("returns metrics for filtered nodes", func() {
201+
content := toolResult.Content[0].(mcp.TextContent).Text
202+
s.Contains(content, "node-1", "expected metrics to contain node-1")
203+
s.Contains(content, "node-2", "expected metrics to contain node-2")
204+
})
205+
})
206+
}
207+
208+
func (s *NodesTopSuite) TestNodesTopMetricsUnavailable() {
209+
s.InitMcpClient()
210+
211+
s.Run("nodes_top() - metrics unavailable", func() {
212+
toolResult, err := s.CallTool("nodes_top", map[string]interface{}{})
213+
s.Require().NotNil(toolResult, "toolResult should not be nil")
214+
s.Run("has error", func() {
215+
s.Truef(toolResult.IsError, "call tool should fail when metrics unavailable")
216+
s.Nilf(err, "call tool should not return error object")
217+
})
218+
s.Run("describes metrics unavailable", func() {
219+
content := toolResult.Content[0].(mcp.TextContent).Text
220+
s.Contains(content, "failed to get nodes top", "expected error message about failing to get nodes top")
221+
})
222+
})
223+
}
224+
225+
func (s *NodesTopSuite) TestNodesTopDenied() {
226+
s.Require().NoError(toml.Unmarshal([]byte(`
227+
denied_resources = [ { group = "metrics.k8s.io", version = "v1beta1" } ]
228+
`), s.Cfg), "Expected to parse denied resources config")
229+
s.WithMetricsServer()
230+
s.InitMcpClient()
231+
s.Run("nodes_top (denied)", func() {
232+
toolResult, err := s.CallTool("nodes_top", map[string]interface{}{})
233+
s.Require().NotNil(toolResult, "toolResult should not be nil")
234+
s.Run("has error", func() {
235+
s.Truef(toolResult.IsError, "call tool should fail")
236+
s.Nilf(err, "call tool should not return error object")
237+
})
238+
s.Run("describes denial", func() {
239+
expectedMessage := "failed to get nodes top: resource not allowed: metrics.k8s.io/v1beta1, Kind=NodeMetrics"
240+
s.Equalf(expectedMessage, toolResult.Content[0].(mcp.TextContent).Text,
241+
"expected descriptive error '%s', got %v", expectedMessage, toolResult.Content[0].(mcp.TextContent).Text)
242+
})
243+
})
244+
}
245+
246+
func TestNodesTop(t *testing.T) {
247+
suite.Run(t, new(NodesTopSuite))
248+
}

pkg/mcp/testdata/toolsets-core-tools.json

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,31 @@
9090
},
9191
"name": "nodes_stats_summary"
9292
},
93+
{
94+
"annotations": {
95+
"title": "Nodes: Top",
96+
"readOnlyHint": true,
97+
"destructiveHint": false,
98+
"idempotentHint": true,
99+
"openWorldHint": true
100+
},
101+
"description": "List the resource consumption (CPU and memory) as recorded by the Kubernetes Metrics Server for the specified Kubernetes Nodes or all nodes in the cluster",
102+
"inputSchema": {
103+
"type": "object",
104+
"properties": {
105+
"name": {
106+
"description": "Name of the Node to get the resource consumption from (Optional, all Nodes if not provided)",
107+
"type": "string"
108+
},
109+
"label_selector": {
110+
"description": "Kubernetes label selector (e.g. 'node-role.kubernetes.io/worker=') to filter nodes by label (Optional, only applicable when name is not provided)",
111+
"pattern": "([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]",
112+
"type": "string"
113+
}
114+
}
115+
},
116+
"name": "nodes_top"
117+
},
93118
{
94119
"annotations": {
95120
"title": "Pods: Delete",

0 commit comments

Comments
 (0)