Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 40 additions & 8 deletions pkg/autoscaler/autoscaler/metric_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ func NewMetricCollector(target *v1alpha1.Target, binding *v1alpha1.AutoscalingPo
}

type HistogramInfo struct {
PodStartTime *metav1.Time
HistogramMap map[string]*histogram.Snapshot
PodStartTime *metav1.Time
HistogramMap map[string]*histogram.Snapshot
CounterMap map[string]float64
ScrapeTimestamp int64
}

type Scope struct {
Expand Down Expand Up @@ -130,7 +132,7 @@ func (collector *MetricCollector) UpdateMetrics(ctx context.Context, podLister l

func (collector *MetricCollector) fetchMetricsFromPods(ctx context.Context, pods []*corev1.Pod, currentHistograms *map[string]HistogramInfo) InstanceInfo {
instanceInfo := InstanceInfo{true, false, make(algorithm.Metrics)}
pastHistograms, ok := collector.PastHistograms.GetLastUnfreshSnapshot()
pastHistograms, _, ok := collector.PastHistograms.GetLastUnfreshSnapshotWithTimestamp()
if !ok {
pastHistograms = make(map[string]HistogramInfo)
}
Expand All @@ -142,13 +144,24 @@ func (collector *MetricCollector) fetchMetricsFromPods(ctx context.Context, pods

pastValue, ok := pastHistograms[pod.Name]
var pastHistogramMap map[string]*histogram.Snapshot
var pastCounterMap map[string]float64
var pastScrapeTimestamp int64
if !ok || pod.Status.StartTime == nil || pastValue.PodStartTime == nil || !pod.Status.StartTime.Equal(pastValue.PodStartTime) {
pastHistogramMap = make(map[string]*histogram.Snapshot)
pastCounterMap = make(map[string]float64)
pastScrapeTimestamp = 0
} else {
pastHistogramMap = pastValue.HistogramMap
if pastValue.CounterMap == nil {
pastCounterMap = make(map[string]float64)
} else {
pastCounterMap = pastValue.CounterMap
}
pastScrapeTimestamp = pastValue.ScrapeTimestamp
}

currentHistogramMap := make(map[string]*histogram.Snapshot)
currentCounterMap := make(map[string]float64)
ip := pod.Status.PodIP
podCtx, cancel := context.WithTimeout(ctx, util.AutoscaleCtxTimeoutSeconds*time.Second)
defer cancel()
Expand All @@ -172,17 +185,19 @@ func (collector *MetricCollector) fetchMetricsFromPods(ctx context.Context, pods
return
}
result := string(bodyStr)
collector.processPrometheusString(result, pastHistogramMap, currentHistogramMap, instanceInfo.MetricsMap)
collector.processPrometheusString(result, pastHistogramMap, pastCounterMap, currentHistogramMap, currentCounterMap, pastScrapeTimestamp, instanceInfo.MetricsMap)
(*currentHistograms)[pod.Name] = HistogramInfo{
PodStartTime: pod.Status.StartTime,
HistogramMap: currentHistogramMap,
PodStartTime: pod.Status.StartTime,
HistogramMap: currentHistogramMap,
CounterMap: currentCounterMap,
ScrapeTimestamp: util.GetCurrentTimestamp(),
}
Comment on lines +188 to 194

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For improved precision in rate calculation, it's better to determine the current scrape timestamp once per pod and use it consistently. Currently, util.GetCurrentTimestamp() is called inside processPrometheusString for each metric, and again when creating HistogramInfo. This can introduce minor inaccuracies because the timestamp used for the rate calculation (now) will be slightly different from the timestamp stored for the next cycle (ScrapeTimestamp).

To improve this, you can get the timestamp once before processing the metrics and use it in both places. This ensures the elapsed_seconds for the rate calculation is based on the exact interval between the stored scrape timestamps.

Example of the proposed change:

// In fetchMetricsFromPods:
...
result := string(bodyStr)
now := util.GetCurrentTimestamp()
collector.processPrometheusString(result, pastHistogramMap, pastCounterMap, currentHistogramMap, currentCounterMap, pastScrapeTimestamp, now, instanceInfo.MetricsMap)
(*currentHistograms)[pod.Name] = HistogramInfo{
    PodStartTime:    pod.Status.StartTime,
    HistogramMap:    currentHistogramMap,
    CounterMap:      currentCounterMap,
    ScrapeTimestamp: now,
}
...

// And update processPrometheusString signature and body:
func (c *MetricCollector) processPrometheusString(..., pastScrapeTimestamp int64, now int64, instanceMetricMap algorithm.Metrics) {
    // ...
    // inside case ..._COUNTER:
    // remove: now := util.GetCurrentTimestamp()
    // ...
}

}()
}
return instanceInfo
}

func (collector *MetricCollector) processPrometheusString(metricStr string, pastHistograms map[string]*histogram.Snapshot, currentHistograms map[string]*histogram.Snapshot, instanceMetricMap algorithm.Metrics) {
func (collector *MetricCollector) processPrometheusString(metricStr string, pastHistograms map[string]*histogram.Snapshot, pastCounters map[string]float64, currentHistograms map[string]*histogram.Snapshot, currentCounters map[string]float64, pastScrapeTimestamp int64, instanceMetricMap algorithm.Metrics) {
reader := strings.NewReader(metricStr)
decoder := expfmt.NewDecoder(reader, expfmt.NewFormat(expfmt.TypeTextPlain))
for {
Expand All @@ -208,7 +223,24 @@ func (collector *MetricCollector) processPrometheusString(metricStr string, past
metric := mf.Metric[0]
switch mf.GetType() {
case io_prometheus_client.MetricType_COUNTER:
addMetric(instanceMetricMap, mf.GetName(), metric.GetCounter().GetValue())
cur := metric.GetCounter().GetValue()
currentCounters[mf.GetName()] = cur

rate := 0.0
now := util.GetCurrentTimestamp()
if pastCounters != nil && pastScrapeTimestamp > 0 {
if prev, ok := pastCounters[mf.GetName()]; ok {
if cur < prev {
rate = 0
} else {
elapsedSec := float64(now-pastScrapeTimestamp) / 1000.0
if elapsedSec > 0 {
rate = (cur - prev) / elapsedSec
}
}
}
}
addMetric(instanceMetricMap, mf.GetName(), rate)
case io_prometheus_client.MetricType_GAUGE:
addMetric(instanceMetricMap, mf.GetName(), metric.GetGauge().GetValue())
case io_prometheus_client.MetricType_HISTOGRAM:
Expand Down
16 changes: 16 additions & 0 deletions pkg/autoscaler/datastructure/sliding_window.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,19 @@ func (window *SnapshotSlidingWindow[T]) GetLastUnfreshSnapshot() (value T, ok bo
}
return front.value, true
}

func (window *SnapshotSlidingWindow[T]) GetLastUnfreshSnapshotWithTimestamp() (value T, timestamp int64, ok bool) {
if window.freshMilliseconds == 0 {
return value, 0, false
}
currentTimestamp := window.getCurrentTimestamp()
window.expire(currentTimestamp)
if window.pool.Len() == 0 {
return value, 0, false
}
front := window.pool.Front()
if isFresh(window.freshMilliseconds, currentTimestamp, front.timestamp) {
return value, 0, false
}
return front.value, front.timestamp, true
}
Comment on lines +238 to +252

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic inside this new function is nearly identical to GetLastUnfreshSnapshot. To reduce code duplication and improve maintainability, GetLastUnfreshSnapshot could be refactored to call this new function and discard the returned timestamp. This would make the code more aligned with the DRY (Don't Repeat Yourself) principle.

Loading