From de1234d5f5e2ac524e2f17dbe634629c96f73bb8 Mon Sep 17 00:00:00 2001 From: Jed Needle Date: Thu, 13 Nov 2025 11:52:48 -0500 Subject: [PATCH 1/4] add timeskew disruption --- api/v1beta1/clock_skew.go | 98 +++++++++++++++++++++++++++++++++ api/v1beta1/disruption_types.go | 25 +++++++-- types/types.go | 3 + 3 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 api/v1beta1/clock_skew.go diff --git a/api/v1beta1/clock_skew.go b/api/v1beta1/clock_skew.go new file mode 100644 index 0000000000..efe68180d1 --- /dev/null +++ b/api/v1beta1/clock_skew.go @@ -0,0 +1,98 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package v1beta1 + +import ( + "fmt" + "time" + + "github.com/hashicorp/go-multierror" +) + +// ClockSkewSpec represents a clock/time skew disruption +// This disruption manipulates the perceived system time for targeted containers, +// useful for testing time-sensitive behavior like certificate expiration, +// distributed locks, time-based tokens, and more. +type ClockSkewSpec struct { + // Offset specifies the time shift to apply + // Positive values advance time into the future (e.g., "+24h" to simulate tomorrow) + // Negative values move time into the past (e.g., "-1h" to simulate an hour ago) + // Format follows Go's time.Duration format: "300ms", "1.5h", "2h45m", etc. + // Examples: + // "+24h" - advance time by 24 hours (test cert expiry in 1 day) + // "+8760h" - advance time by 1 year (365 days) + // "-30m" - go back 30 minutes + // +kubebuilder:validation:Required + Offset DisruptionDuration `json:"offset" chaos_validate:"required"` +} + +// Validate validates args for the given disruption +func (s *ClockSkewSpec) Validate() (retErr error) { + if s.Offset == "" { + retErr = multierror.Append(retErr, fmt.Errorf("clockSkew.offset must be specified")) + return multierror.Prefix(retErr, "ClockSkew:") + } + + duration := s.Offset.Duration() + if duration == 0 { + retErr = multierror.Append(retErr, fmt.Errorf("clockSkew.offset must not be zero; use a positive value to advance time or a negative value to go back in time")) + } + + // Sanity check: warn about very large time skews (more than 10 years) + maxSkew := time.Duration(10 * 365 * 24 * time.Hour) // 10 years + if duration > maxSkew || duration < -maxSkew { + retErr = multierror.Append(retErr, fmt.Errorf("clockSkew.offset of %s seems unusually large (>10 years); please verify this is intentional", s.Offset)) + } + + return multierror.Prefix(retErr, "ClockSkew:") +} + +// GenerateArgs generates injection pod arguments for the given spec +func (s *ClockSkewSpec) GenerateArgs() []string { + args := []string{ + "clock-skew", + "--offset", + string(s.Offset), + } + + return args +} + +// Explain returns a human-readable explanation of this disruption +func (s *ClockSkewSpec) Explain() []string { + explanation := []string{""} + + duration := s.Offset.Duration() + var direction, example string + + if duration > 0 { + direction = "advance time forward" + example = "This can be used to test certificate expiration, token expiry, time-based leases, or scheduled tasks that should trigger in the future." + } else { + direction = "move time backward" + example = "This can be used to test handling of clock drift, time synchronization issues, or replaying time-sensitive operations." + } + + explanation = append(explanation, + fmt.Sprintf("spec.clockSkew will %s by %s for all processes in the targeted containers.", direction, s.Offset), + fmt.Sprintf("\tThe system clock will appear to be shifted by %s from the actual time.", s.Offset), + "\tThis is achieved using LD_PRELOAD and libfaketime to intercept time-related system calls.", + fmt.Sprintf("\t%s", example), + "", + "Important notes:", + "\t- The actual system clock is not modified; only the perception of time for targeted processes", + "\t- This affects gettimeofday(), clock_gettime(), time(), and related syscalls", + "\t- New processes started in the container will inherit the skewed time", + "\t- Network time synchronization (NTP) in the container will show the real time, not the skewed time", + ) + + if duration > 24*time.Hour { + days := int(duration.Hours() / 24) + explanation = append(explanation, fmt.Sprintf("\t- You're skewing time by approximately %d days - great for testing certificate expiration!", days)) + } + + return explanation +} diff --git a/api/v1beta1/disruption_types.go b/api/v1beta1/disruption_types.go index 322e2ea60d..bdb8133056 100644 --- a/api/v1beta1/disruption_types.go +++ b/api/v1beta1/disruption_types.go @@ -84,6 +84,8 @@ type DisruptionSpec struct { // +nullable PodReplacement *PodReplacementSpec `json:"podReplacement,omitempty"` // +nullable + ClockSkew *ClockSkewSpec `json:"clockSkew,omitempty"` + // +nullable Reporting *Reporting `json:"reporting,omitempty"` } @@ -692,25 +694,25 @@ func (s DisruptionSpec) validateGlobalDisruptionScope(requireSelectors bool) (re } // Rule: At least one disruption kind must be applied - if s.CPUPressure == nil && s.DiskPressure == nil && s.DiskFailure == nil && s.Network == nil && s.GRPC == nil && s.ContainerFailure == nil && s.NodeFailure == nil && s.PodReplacement == nil { + if s.CPUPressure == nil && s.DiskPressure == nil && s.DiskFailure == nil && s.Network == nil && s.GRPC == nil && s.ContainerFailure == nil && s.NodeFailure == nil && s.PodReplacement == nil && s.ClockSkew == nil { retErr = multierror.Append(retErr, errors.New("at least one disruption kind must be specified, please read the docs to see your options")) } // Rule: ContainerFailure, NodeFailure, and PodReplacement disruptions are not compatible with other failure types if s.ContainerFailure != nil { - if s.CPUPressure != nil || s.DiskPressure != nil || s.DiskFailure != nil || s.Network != nil || s.GRPC != nil || s.NodeFailure != nil || s.PodReplacement != nil { + if s.CPUPressure != nil || s.DiskPressure != nil || s.DiskFailure != nil || s.Network != nil || s.GRPC != nil || s.NodeFailure != nil || s.PodReplacement != nil || s.ClockSkew != nil { retErr = multierror.Append(retErr, errors.New("container failure disruptions are not compatible with other disruption kinds. The container failure will remove the impact of the other disruption types")) } } if s.NodeFailure != nil { - if s.CPUPressure != nil || s.DiskPressure != nil || s.DiskFailure != nil || s.Network != nil || s.GRPC != nil || s.ContainerFailure != nil || s.PodReplacement != nil { + if s.CPUPressure != nil || s.DiskPressure != nil || s.DiskFailure != nil || s.Network != nil || s.GRPC != nil || s.ContainerFailure != nil || s.PodReplacement != nil || s.ClockSkew != nil { retErr = multierror.Append(retErr, errors.New("node failure disruptions are not compatible with other disruption kinds. The node failure will remove the impact of the other disruption types")) } } if s.PodReplacement != nil { - if s.CPUPressure != nil || s.DiskPressure != nil || s.DiskFailure != nil || s.Network != nil || s.GRPC != nil || s.ContainerFailure != nil || s.NodeFailure != nil { + if s.CPUPressure != nil || s.DiskPressure != nil || s.DiskFailure != nil || s.Network != nil || s.GRPC != nil || s.ContainerFailure != nil || s.NodeFailure != nil || s.ClockSkew != nil { retErr = multierror.Append(retErr, errors.New("pod replacement disruptions are not compatible with other disruption kinds. The pod replacement will remove the impact of the other disruption types")) } // Rule: container failure not possible if disruption is node-level @@ -727,7 +729,8 @@ func (s DisruptionSpec) validateGlobalDisruptionScope(requireSelectors bool) (re s.ContainerFailure != nil || s.DiskPressure != nil || s.GRPC != nil || - s.DiskFailure != nil { + s.DiskFailure != nil || + s.ClockSkew != nil { retErr = multierror.Append(retErr, errors.New("OnInit is only compatible with network disruptions")) } @@ -769,7 +772,7 @@ func (s DisruptionSpec) validateGlobalDisruptionScope(requireSelectors bool) (re // Rule: pulse compatibility if s.Pulse != nil { if s.Pulse.ActiveDuration.Duration() > 0 || s.Pulse.DormantDuration.Duration() > 0 { - if s.NodeFailure != nil || s.PodReplacement != nil || s.ContainerFailure != nil { + if s.NodeFailure != nil || s.PodReplacement != nil || s.ContainerFailure != nil || s.ClockSkew != nil { retErr = multierror.Append(retErr, errors.New("pulse is only compatible with network, cpu pressure, disk pressure, and grpc disruptions")) } } @@ -826,6 +829,8 @@ func (s DisruptionSpec) DisruptionKindPicker(kind chaostypes.DisruptionKindName) disruptionKind = s.PodReplacement case chaostypes.DisruptionKindDiskFailure: disruptionKind = s.DiskFailure + case chaostypes.DisruptionKindClockSkew: + disruptionKind = s.ClockSkew } return disruptionKind @@ -908,6 +913,10 @@ func (s DisruptionSpec) DisruptionCount() int { count++ } + if s.ClockSkew != nil { + count++ + } + return count } @@ -1063,6 +1072,10 @@ func (s DisruptionSpec) Explain() []string { explanation = append(explanation, s.GRPC.Explain()...) } + if s.ClockSkew != nil { + explanation = append(explanation, s.ClockSkew.Explain()...) + } + return explanation } diff --git a/types/types.go b/types/types.go index e8dc40f6b4..12acaad6db 100644 --- a/types/types.go +++ b/types/types.go @@ -81,6 +81,8 @@ const ( DisruptionKindGRPCDisruption = "grpc-disruption" // DisruptionKindPodReplacement is a pod replacement disruption DisruptionKindPodReplacement = "pod-replacement" + // DisruptionKindClockSkew is a clock/time skew disruption + DisruptionKindClockSkew = "clock-skew" // DisruptionLevelPod is a disruption injected at the pod level DisruptionLevelPod DisruptionLevel = "pod" @@ -157,4 +159,5 @@ var DisruptionKindNames = []DisruptionKindName{ DisruptionKindDiskFailure, DisruptionKindGRPCDisruption, DisruptionKindPodReplacement, + DisruptionKindClockSkew, } From c9c7d92cb69e29748020baeeba21eca29b56f171 Mon Sep 17 00:00:00 2001 From: Jed Needle Date: Thu, 13 Nov 2025 12:07:03 -0500 Subject: [PATCH 2/4] add tests --- api/v1beta1/clock_skew_test.go | 597 +++++++++++++++++++++++++++++++++ 1 file changed, 597 insertions(+) create mode 100644 api/v1beta1/clock_skew_test.go diff --git a/api/v1beta1/clock_skew_test.go b/api/v1beta1/clock_skew_test.go new file mode 100644 index 0000000000..0a6072aac7 --- /dev/null +++ b/api/v1beta1/clock_skew_test.go @@ -0,0 +1,597 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +package v1beta1_test + +import ( + "fmt" + "strings" + "testing" + "time" + + . "github.com/DataDog/chaos-controller/api/v1beta1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("ClockSkewSpec", func() { + When("Call the 'Validate' method", func() { + DescribeTable("success cases", + func(clockSkewSpec ClockSkewSpec) { + // Action && Assert + Expect(clockSkewSpec.Validate()).Should(Succeed()) + }, + Entry("with a positive offset of 1 hour", + ClockSkewSpec{ + Offset: DisruptionDuration("1h"), + }, + ), + Entry("with a positive offset of 24 hours", + ClockSkewSpec{ + Offset: DisruptionDuration("24h"), + }, + ), + Entry("with a positive offset of 1 year (8760h)", + ClockSkewSpec{ + Offset: DisruptionDuration("8760h"), + }, + ), + Entry("with a negative offset of 1 hour", + ClockSkewSpec{ + Offset: DisruptionDuration("-1h"), + }, + ), + Entry("with a negative offset of 30 minutes", + ClockSkewSpec{ + Offset: DisruptionDuration("-30m"), + }, + ), + Entry("with a positive offset of 5 years", + ClockSkewSpec{ + Offset: DisruptionDuration("43800h"), // 5 years + }, + ), + Entry("with a complex duration", + ClockSkewSpec{ + Offset: DisruptionDuration("2h30m45s"), + }, + ), + Entry("with milliseconds", + ClockSkewSpec{ + Offset: DisruptionDuration("500ms"), + }, + ), + Entry("with seconds", + ClockSkewSpec{ + Offset: DisruptionDuration("30s"), + }, + ), + ) + + DescribeTable("error cases", + func(cs ClockSkewSpec, expectedErrors []string) { + // Action + err := cs.Validate() + + // Assert + Expect(err).To(HaveOccurred()) + for _, expectedError := range expectedErrors { + Expect(err.Error()).To(ContainSubstring(expectedError)) + } + }, + Entry("with an empty offset", + ClockSkewSpec{ + Offset: DisruptionDuration(""), + }, + []string{ + "clockSkew.offset must be specified", + }, + ), + Entry("with a zero offset", + ClockSkewSpec{ + Offset: DisruptionDuration("0s"), + }, + []string{ + "clockSkew.offset must not be zero", + }, + ), + Entry("with a zero offset as 0h", + ClockSkewSpec{ + Offset: DisruptionDuration("0h"), + }, + []string{ + "clockSkew.offset must not be zero", + }, + ), + Entry("with an offset greater than 10 years (positive)", + ClockSkewSpec{ + Offset: DisruptionDuration("100000h"), // ~11.4 years + }, + []string{ + "seems unusually large (>10 years)", + }, + ), + Entry("with an offset greater than 10 years (negative)", + ClockSkewSpec{ + Offset: DisruptionDuration("-100000h"), // ~11.4 years + }, + []string{ + "seems unusually large (>10 years)", + }, + ), + ) + }) + + When("Call the 'GenerateArgs' method", func() { + DescribeTable("success cases", + func(clockSkewSpec ClockSkewSpec, expectedArgs []string) { + // Arrange + expectedArgs = append([]string{"clock-skew"}, expectedArgs...) + + // Action + args := clockSkewSpec.GenerateArgs() + + // Assert + Expect(args).Should(Equal(expectedArgs)) + }, + Entry("with a positive offset of 1 hour", + ClockSkewSpec{ + Offset: DisruptionDuration("1h"), + }, + []string{"--offset", "1h"}, + ), + Entry("with a positive offset of 24 hours", + ClockSkewSpec{ + Offset: DisruptionDuration("24h"), + }, + []string{"--offset", "24h"}, + ), + Entry("with a positive offset of 1 year (8760h)", + ClockSkewSpec{ + Offset: DisruptionDuration("8760h"), + }, + []string{"--offset", "8760h"}, + ), + Entry("with a negative offset of 1 hour", + ClockSkewSpec{ + Offset: DisruptionDuration("-1h"), + }, + []string{"--offset", "-1h"}, + ), + Entry("with a negative offset of 30 minutes", + ClockSkewSpec{ + Offset: DisruptionDuration("-30m"), + }, + []string{"--offset", "-30m"}, + ), + Entry("with a complex duration", + ClockSkewSpec{ + Offset: DisruptionDuration("2h30m45s"), + }, + []string{"--offset", "2h30m45s"}, + ), + ) + }) + + When("Call the 'Explain' method", func() { + Context("with positive offsets", func() { + It("should explain advancing time forward", func() { + // Arrange + clockSkewSpec := ClockSkewSpec{ + Offset: DisruptionDuration("24h"), + } + + // Action + explanation := clockSkewSpec.Explain() + + // Assert + Expect(explanation).ToNot(BeEmpty()) + explanationText := fmt.Sprintf("%v", explanation) + Expect(explanationText).To(ContainSubstring("advance time forward")) + Expect(explanationText).To(ContainSubstring("24h")) + Expect(explanationText).To(ContainSubstring("certificate expiration")) + }) + + It("should mention days when offset is greater than 24 hours", func() { + // Arrange + clockSkewSpec := ClockSkewSpec{ + Offset: DisruptionDuration("72h"), // 3 days + } + + // Action + explanation := clockSkewSpec.Explain() + + // Assert + Expect(explanation).ToNot(BeEmpty()) + explanationText := fmt.Sprintf("%v", explanation) + Expect(explanationText).To(ContainSubstring("3 days")) + Expect(explanationText).To(ContainSubstring("certificate expiration")) + }) + + It("should include libfaketime information", func() { + // Arrange + clockSkewSpec := ClockSkewSpec{ + Offset: DisruptionDuration("1h"), + } + + // Action + explanation := clockSkewSpec.Explain() + + // Assert + Expect(explanation).ToNot(BeEmpty()) + explanationText := fmt.Sprintf("%v", explanation) + Expect(explanationText).To(ContainSubstring("libfaketime")) + Expect(explanationText).To(ContainSubstring("LD_PRELOAD")) + }) + }) + + Context("with negative offsets", func() { + It("should explain moving time backward", func() { + // Arrange + clockSkewSpec := ClockSkewSpec{ + Offset: DisruptionDuration("-1h"), + } + + // Action + explanation := clockSkewSpec.Explain() + + // Assert + Expect(explanation).ToNot(BeEmpty()) + explanationText := fmt.Sprintf("%v", explanation) + Expect(explanationText).To(ContainSubstring("move time backward")) + Expect(explanationText).To(ContainSubstring("clock drift")) + }) + }) + + Context("with very small offsets", func() { + It("should not mention days for offsets less than 24 hours", func() { + // Arrange + clockSkewSpec := ClockSkewSpec{ + Offset: DisruptionDuration("5h"), + } + + // Action + explanation := clockSkewSpec.Explain() + + // Assert + Expect(explanation).ToNot(BeEmpty()) + explanationText := fmt.Sprintf("%v", explanation) + Expect(explanationText).ToNot(ContainSubstring("days")) + }) + }) + }) + + Describe("DisruptionDuration integration", func() { + It("should correctly parse various time formats", func() { + testCases := []struct { + input DisruptionDuration + expected time.Duration + }{ + {"1h", time.Hour}, + {"30m", 30 * time.Minute}, + {"1h30m", time.Hour + 30*time.Minute}, + {"24h", 24 * time.Hour}, + {"8760h", 8760 * time.Hour}, // 1 year + {"-1h", -time.Hour}, + {"-30m", -30 * time.Minute}, + {"500ms", 500 * time.Millisecond}, + } + + for _, tc := range testCases { + duration := tc.input.Duration() + Expect(duration).To(Equal(tc.expected), "Failed for input: %s", tc.input) + } + }) + + It("should handle the 1 year offset correctly", func() { + // Arrange + oneYear := DisruptionDuration("8760h") + + // Action + duration := oneYear.Duration() + + // Assert + Expect(duration).To(Equal(8760 * time.Hour)) + // Verify it's approximately 365 days + Expect(duration.Hours()).To(BeNumerically("~", 365*24, 1)) + }) + }) +}) + +// ============================================================================ +// Fuzz Tests +// ============================================================================ + +// FuzzClockSkewValidate fuzzes the ClockSkewSpec Validate method +// to find edge cases that might cause panics or unexpected behavior +func FuzzClockSkewValidate(f *testing.F) { + // Seed corpus with known valid and invalid inputs + seeds := []string{ + "1h", + "24h", + "8760h", + "-1h", + "-30m", + "0s", + "0h", + "", + "100000h", + "-100000h", + "2h30m45s", + "500ms", + "1ns", + "-1ns", + // Edge cases + "999999999h", + "-999999999h", + "1s", + "-1s", + // Invalid formats (should be handled gracefully) + "1", + "h", + "1hh", + "abc", + "1d", // days not supported in Go duration + "1w", // weeks not supported + "1y", // years not supported + } + + for _, seed := range seeds { + f.Add(seed) + } + + f.Fuzz(func(t *testing.T, offset string) { + spec := ClockSkewSpec{ + Offset: DisruptionDuration(offset), + } + + // The Validate method should never panic, regardless of input + // We don't assert on the result, just that it doesn't crash + defer func() { + if r := recover(); r != nil { + t.Errorf("Validate() panicked with offset %q: %v", offset, r) + } + }() + + err := spec.Validate() + + // If there's no error, the offset should be parseable and non-zero + if err == nil { + duration := spec.Offset.Duration() + if duration == 0 { + t.Errorf("Validate() succeeded but duration is zero for offset %q", offset) + } + } + + // If the offset is empty, there should always be an error + if offset == "" && err == nil { + t.Errorf("Validate() should fail for empty offset") + } + + // If the offset is "0s" or "0h", there should be an error about zero duration + if (offset == "0s" || offset == "0h" || offset == "0m") && err == nil { + t.Errorf("Validate() should fail for zero duration offset %q", offset) + } + }) +} + +// FuzzClockSkewGenerateArgs fuzzes the ClockSkewSpec GenerateArgs method +func FuzzClockSkewGenerateArgs(f *testing.F) { + // Seed corpus + seeds := []string{ + "1h", + "24h", + "-1h", + "8760h", + "2h30m", + } + + for _, seed := range seeds { + f.Add(seed) + } + + f.Fuzz(func(t *testing.T, offset string) { + spec := ClockSkewSpec{ + Offset: DisruptionDuration(offset), + } + + // GenerateArgs should never panic + defer func() { + if r := recover(); r != nil { + t.Errorf("GenerateArgs() panicked with offset %q: %v", offset, r) + } + }() + + args := spec.GenerateArgs() + + // Args should always have the base command and at least 2 additional args + if len(args) < 3 { + t.Errorf("GenerateArgs() returned too few args: %v for offset %q", args, offset) + } + + // First arg should always be "clock-skew" + if args[0] != "clock-skew" { + t.Errorf("First arg should be 'clock-skew', got %q for offset %q", args[0], offset) + } + + // Second arg should be "--offset" + if args[1] != "--offset" { + t.Errorf("Second arg should be '--offset', got %q for offset %q", args[1], offset) + } + + // Third arg should be the offset value + if args[2] != offset { + t.Errorf("Third arg should be %q, got %q", offset, args[2]) + } + }) +} + +// FuzzClockSkewExplain fuzzes the ClockSkewSpec Explain method +func FuzzClockSkewExplain(f *testing.F) { + // Seed corpus + seeds := []string{ + "1h", + "24h", + "-1h", + "8760h", + "-30m", + "72h", + "1s", + "-1s", + } + + for _, seed := range seeds { + f.Add(seed) + } + + f.Fuzz(func(t *testing.T, offset string) { + spec := ClockSkewSpec{ + Offset: DisruptionDuration(offset), + } + + // Explain should never panic + defer func() { + if r := recover(); r != nil { + t.Errorf("Explain() panicked with offset %q: %v", offset, r) + } + }() + + explanation := spec.Explain() + + // Should always return at least one line + if len(explanation) == 0 { + t.Errorf("Explain() returned empty explanation for offset %q", offset) + } + + // Join explanation for easier checking + fullText := strings.Join(explanation, " ") + + // Should contain key information + if !strings.Contains(fullText, "clock") && !strings.Contains(fullText, "time") { + t.Errorf("Explain() should mention 'clock' or 'time', got: %s for offset %q", fullText, offset) + } + + // If offset parses correctly, check direction + duration := spec.Offset.Duration() + if duration > 0 { + if !strings.Contains(fullText, "forward") && !strings.Contains(fullText, "advance") { + t.Logf("Explain() should mention 'forward' or 'advance' for positive offset, got: %s for offset %q", fullText, offset) + } + } else if duration < 0 { + if !strings.Contains(fullText, "backward") { + t.Logf("Explain() should mention 'backward' for negative offset, got: %s for offset %q", fullText, offset) + } + } + }) +} + +// FuzzClockSkewDurationParsing fuzzes DisruptionDuration parsing +func FuzzClockSkewDurationParsing(f *testing.F) { + // Seed corpus with various duration formats + seeds := []string{ + "1h", + "1m", + "1s", + "1ms", + "1µs", + "1ns", + "-1h", + "1h30m", + "1h30m45s", + "300ms", + } + + for _, seed := range seeds { + f.Add(seed) + } + + f.Fuzz(func(t *testing.T, durationStr string) { + dd := DisruptionDuration(durationStr) + + // Duration() method should never panic + defer func() { + if r := recover(); r != nil { + t.Errorf("Duration() panicked with input %q: %v", durationStr, r) + } + }() + + duration := dd.Duration() + + // If time.ParseDuration succeeds, we should get the same result + expectedDuration, err := time.ParseDuration(durationStr) + if err == nil { + if duration != expectedDuration { + t.Errorf("Duration() returned %v, expected %v for input %q", duration, expectedDuration, durationStr) + } + } else { + // If parsing fails, duration should be 0 + if duration != 0 { + t.Logf("Duration() returned %v for unparseable input %q, expected 0", duration, durationStr) + } + } + }) +} + +// FuzzClockSkewOffsetBounds fuzzes the offset boundary conditions +func FuzzClockSkewOffsetBounds(f *testing.F) { + // Test around the 10 year boundary + tenYearsInHours := int64(10 * 365 * 24) + + seeds := []int64{ + 1, + 24, + 8760, // 1 year + 17520, // 2 years + 43800, // 5 years + tenYearsInHours - 1, + tenYearsInHours, + tenYearsInHours + 1, + 100000, + -1, + -24, + -8760, + -tenYearsInHours - 1, + -tenYearsInHours, + -tenYearsInHours + 1, + } + + for _, seed := range seeds { + f.Add(seed) + } + + f.Fuzz(func(t *testing.T, hours int64) { + offset := fmt.Sprintf("%dh", hours) + spec := ClockSkewSpec{ + Offset: DisruptionDuration(offset), + } + + // Should not panic + defer func() { + if r := recover(); r != nil { + t.Errorf("Validate() panicked with %d hours: %v", hours, r) + } + }() + + err := spec.Validate() + + // Zero should always fail + if hours == 0 && err == nil { + t.Errorf("Validate() should fail for 0 hours") + } + + // Very large values (> 10 years) should warn but might still pass + absHours := hours + if absHours < 0 { + absHours = -absHours + } + + maxHours := int64(10 * 365 * 24) + if absHours > maxHours { + if err == nil { + t.Logf("Validate() passed for large offset %d hours (>10 years), this is acceptable if it's a warning", hours) + } + } + }) +} From 504c007c54639a87644bd258b98af5619b54e67b Mon Sep 17 00:00:00 2001 From: Jed Needle Date: Fri, 14 Nov 2025 16:03:27 -0500 Subject: [PATCH 3/4] add some more stuff --- api/v1beta1/zz_generated.deepcopy.go | 20 +++++++++++++++++ .../chaos.datadoghq.com_disruptioncrons.yaml | 22 +++++++++++++++++++ ...haos.datadoghq.com_disruptionrollouts.yaml | 22 +++++++++++++++++++ .../chaos.datadoghq.com_disruptions.yaml | 22 +++++++++++++++++++ 4 files changed, 86 insertions(+) diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 6c16526f56..e7453fe4c9 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -47,6 +47,21 @@ func (in *CPUPressureSpec) DeepCopy() *CPUPressureSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClockSkewSpec) DeepCopyInto(out *ClockSkewSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClockSkewSpec. +func (in *ClockSkewSpec) DeepCopy() *ClockSkewSpec { + if in == nil { + return nil + } + out := new(ClockSkewSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Config) DeepCopyInto(out *Config) { *out = *in @@ -605,6 +620,11 @@ func (in *DisruptionSpec) DeepCopyInto(out *DisruptionSpec) { *out = new(PodReplacementSpec) (*in).DeepCopyInto(*out) } + if in.ClockSkew != nil { + in, out := &in.ClockSkew, &out.ClockSkew + *out = new(ClockSkewSpec) + **out = **in + } if in.Reporting != nil { in, out := &in.Reporting, &out.Reporting *out = new(Reporting) diff --git a/chart/templates/generated/chaos.datadoghq.com_disruptioncrons.yaml b/chart/templates/generated/chaos.datadoghq.com_disruptioncrons.yaml index 6790ffad0a..723ce6071b 100644 --- a/chart/templates/generated/chaos.datadoghq.com_disruptioncrons.yaml +++ b/chart/templates/generated/chaos.datadoghq.com_disruptioncrons.yaml @@ -86,6 +86,28 @@ spec: - e.g. apply a CPU pressure and later, apply a container failure for a short duration NB: it's ALWAYS forbidden to apply the same disruption kind to the same target to avoid unreliable effects due to competing interactions type: boolean + clockSkew: + description: |- + ClockSkewSpec represents a clock/time skew disruption + This disruption manipulates the perceived system time for targeted containers, + useful for testing time-sensitive behavior like certificate expiration, + distributed locks, time-based tokens, and more. + nullable: true + properties: + offset: + description: |- + Offset specifies the time shift to apply + Positive values advance time into the future (e.g., "+24h" to simulate tomorrow) + Negative values move time into the past (e.g., "-1h" to simulate an hour ago) + Format follows Go's time.Duration format: "300ms", "1.5h", "2h45m", etc. + Examples: + "+24h" - advance time by 24 hours (test cert expiry in 1 day) + "+8760h" - advance time by 1 year (365 days) + "-30m" - go back 30 minutes + type: string + required: + - offset + type: object containerFailure: description: ContainerFailureSpec represents a container failure injection nullable: true diff --git a/chart/templates/generated/chaos.datadoghq.com_disruptionrollouts.yaml b/chart/templates/generated/chaos.datadoghq.com_disruptionrollouts.yaml index 7f70930bcf..4676fe2743 100644 --- a/chart/templates/generated/chaos.datadoghq.com_disruptionrollouts.yaml +++ b/chart/templates/generated/chaos.datadoghq.com_disruptionrollouts.yaml @@ -87,6 +87,28 @@ spec: - e.g. apply a CPU pressure and later, apply a container failure for a short duration NB: it's ALWAYS forbidden to apply the same disruption kind to the same target to avoid unreliable effects due to competing interactions type: boolean + clockSkew: + description: |- + ClockSkewSpec represents a clock/time skew disruption + This disruption manipulates the perceived system time for targeted containers, + useful for testing time-sensitive behavior like certificate expiration, + distributed locks, time-based tokens, and more. + nullable: true + properties: + offset: + description: |- + Offset specifies the time shift to apply + Positive values advance time into the future (e.g., "+24h" to simulate tomorrow) + Negative values move time into the past (e.g., "-1h" to simulate an hour ago) + Format follows Go's time.Duration format: "300ms", "1.5h", "2h45m", etc. + Examples: + "+24h" - advance time by 24 hours (test cert expiry in 1 day) + "+8760h" - advance time by 1 year (365 days) + "-30m" - go back 30 minutes + type: string + required: + - offset + type: object containerFailure: description: ContainerFailureSpec represents a container failure injection nullable: true diff --git a/chart/templates/generated/chaos.datadoghq.com_disruptions.yaml b/chart/templates/generated/chaos.datadoghq.com_disruptions.yaml index b9f53ff1ea..6f651984d5 100644 --- a/chart/templates/generated/chaos.datadoghq.com_disruptions.yaml +++ b/chart/templates/generated/chaos.datadoghq.com_disruptions.yaml @@ -77,6 +77,28 @@ spec: - e.g. apply a CPU pressure and later, apply a container failure for a short duration NB: it's ALWAYS forbidden to apply the same disruption kind to the same target to avoid unreliable effects due to competing interactions type: boolean + clockSkew: + description: |- + ClockSkewSpec represents a clock/time skew disruption + This disruption manipulates the perceived system time for targeted containers, + useful for testing time-sensitive behavior like certificate expiration, + distributed locks, time-based tokens, and more. + nullable: true + properties: + offset: + description: |- + Offset specifies the time shift to apply + Positive values advance time into the future (e.g., "+24h" to simulate tomorrow) + Negative values move time into the past (e.g., "-1h" to simulate an hour ago) + Format follows Go's time.Duration format: "300ms", "1.5h", "2h45m", etc. + Examples: + "+24h" - advance time by 24 hours (test cert expiry in 1 day) + "+8760h" - advance time by 1 year (365 days) + "-30m" - go back 30 minutes + type: string + required: + - offset + type: object containerFailure: description: ContainerFailureSpec represents a container failure injection nullable: true From 0166d1805abfe06c0d121454951a8208d1fa4f3d Mon Sep 17 00:00:00 2001 From: Jed Needle Date: Mon, 17 Nov 2025 07:51:42 -0500 Subject: [PATCH 4/4] changes to tags, main and dockerfile for libfaketime --- bin/injector/Dockerfile | 6 +++++- cli/injector/main.go | 1 + o11y/tags/tags.go | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/injector/Dockerfile b/bin/injector/Dockerfile index ed2e0a9516..df12e7db3f 100644 --- a/bin/injector/Dockerfile +++ b/bin/injector/Dockerfile @@ -5,7 +5,8 @@ RUN apt-get update && \ # coreutils => df # iptables => iptables libs # libelf1 => EBPF libs - apt-get -y install --no-install-recommends iproute2 coreutils iptables libelf1 tree curl tar ca-certificates && \ + # libfaketime => clock skew disruption + apt-get -y install --no-install-recommends iproute2 coreutils iptables libelf1 libfaketime tree curl tar ca-certificates && \ # make copy from binaries unified and possible mkdir -p /lib64 && \ curl -LO https://github.com/libbpf/bpftool/releases/download/v7.5.0/bpftool-v7.5.0-arm64.tar.gz && \ @@ -38,6 +39,9 @@ COPY --from=binaries /lib/x86_64-linux-gn[u] /lib/x86_64-linux-gnu/ COPY --from=binaries /usr/lib/aarch64-linux-gn[u] /usr/lib/aarch64-linux-gnu/ COPY --from=binaries /usr/lib/x86_64-linux-gn[u] /usr/lib/x86_64-linux-gnu/ +# libfaketime library for clock skew disruption +COPY --from=binaries /usr/lib/*/faketime/ /usr/local/lib/faketime/ + # no more sh COPY --from=binaries /usr/bin/test /bin/sh diff --git a/cli/injector/main.go b/cli/injector/main.go index de904108be..b64717f578 100644 --- a/cli/injector/main.go +++ b/cli/injector/main.go @@ -90,6 +90,7 @@ func init() { rootCmd.AddCommand(diskFailureCmd) rootCmd.AddCommand(diskPressureCmd) rootCmd.AddCommand(grpcDisruptionCmd) + rootCmd.AddCommand(clockSkewCmd) // basic args rootCmd.PersistentFlags().BoolVar(&disruptionArgs.DryRun, "dry-run", false, "Enable dry-run mode") diff --git a/o11y/tags/tags.go b/o11y/tags/tags.go index 3e5dfb97e9..970e688517 100644 --- a/o11y/tags/tags.go +++ b/o11y/tags/tags.go @@ -115,6 +115,7 @@ const ( // Generic fields ConfigKey = "config" DataKey = "data" + EnvFileKey = "env_file" HostKey = "host" IndexedValueKey = "indexed_value" KindKey = "kind" @@ -158,6 +159,7 @@ const ( MaxRunsKey = "max_runs" NextRunKey = "next_run" NowKey = "now" + OffsetKey = "offset" PauseDurationKey = "pause_duration" RemainingDurationKey = "remaining_duration" RequeueAfterKey = "requeue_after"