Skip to content

Commit

Permalink
Use github.com/gofrs/flock to lock handler (#731)
Browse files Browse the repository at this point in the history
* Use github.com/gofrs/flock to lock handler

The nmstate-handler has a lock that make it wait if another handler is
already running in the system, but the library used for that does not
work well with containers. This change replace that library with
different one that uses golang syscall.Flock behind the scine [1] and
that works fine with containers sharing node volume.

[1] github.com/nightlyone/lockfile.

Signed-off-by: Quique Llorente <[email protected]>

* Replace handler readiness probe

The nmstate handler is marked as ready even if it was not able to take
ownership of the lock, this change runs nmstatectl and touchs a file
after gaining lock ownership so the readiness probe can check both
things.

Signed-off-by: Quique Llorente <[email protected]>

* Test nmstate handler lock mechanism

The nmstate handler don't get ready if another handler is running at the
same node. This change add an e2e tests to check that this is working.

Signed-off-by: Quique Llorente <[email protected]>
  • Loading branch information
qinqon authored Apr 27, 2021
1 parent fd6bfaa commit a8d083e
Show file tree
Hide file tree
Showing 29 changed files with 1,106 additions and 614 deletions.
4 changes: 2 additions & 2 deletions deploy/handler/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ spec:
readinessProbe:
exec:
command:
- nmstatectl
- show
- cat
- /tmp/healthy
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ require (
github.com/github-release/github-release v0.10.0
github.com/go-logr/logr v0.3.0
github.com/gobwas/glob v0.2.3
github.com/gofrs/flock v0.8.0
github.com/kelseyhightower/envconfig v1.4.0
github.com/nightlyone/lockfile v1.0.0
github.com/nightlyone/lockfile v1.0.0 // indirect
github.com/onsi/ginkgo v1.15.0
github.com/onsi/gomega v1.10.5
github.com/openshift/cluster-network-operator v0.0.0-20200922032245-f47200e8dbc0
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,7 @@ github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x
github.com/godror/godror v0.13.3/go.mod h1:2ouUT4kdhUBk7TAkHWD4SN0CdI0pgEQbo8FVHhbSKWg=
github.com/gofrs/flock v0.0.0-20190320160742-5135e617513b/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
github.com/gofrs/flock v0.7.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
github.com/gofrs/flock v0.8.0 h1:MSdYClljsF3PbENUUEx85nkWfJSGfzYI9yEBZOJz6CY=
github.com/gofrs/flock v0.8.0/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
github.com/gofrs/uuid v3.3.0+incompatible h1:8K4tyRfvU1CYPgJsveYFQMhpFd/wXNM7iK6rR7UHz84=
github.com/gofrs/uuid v3.3.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
Expand Down
38 changes: 28 additions & 10 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log/zap"
// +kubebuilder:scaffold:imports

"github.com/gofrs/flock"
"github.com/kelseyhightower/envconfig"
"github.com/nightlyone/lockfile"
"github.com/pkg/errors"
"github.com/qinqon/kube-admission-webhook/pkg/certificate"
"k8s.io/apimachinery/pkg/util/wait"
Expand All @@ -42,6 +42,8 @@ import (
nmstatev1beta1 "github.com/nmstate/kubernetes-nmstate/api/v1beta1"
"github.com/nmstate/kubernetes-nmstate/controllers"
"github.com/nmstate/kubernetes-nmstate/pkg/environment"
"github.com/nmstate/kubernetes-nmstate/pkg/file"
"github.com/nmstate/kubernetes-nmstate/pkg/nmstatectl"
"github.com/nmstate/kubernetes-nmstate/pkg/webhook"
)

Expand Down Expand Up @@ -186,6 +188,25 @@ func main() {
setupLog.Error(err, "unable to create NodeNetworkState controller", "controller", "NMState")
os.Exit(1)
}

// Check that nmstatectl is working
_, err := nmstatectl.Show()
if err != nil {
os.Exit(1)
setupLog.Error(err, "failed checking nmstatectl health")
}

// Handler runs with host networking so opening ports is problematic
// they will collide with node ports so to ensure that we reach this
// point (we have the handler lock and nmstatectl show is working) a
// file is touched and the file is checked at readinessProbe field.
healthyFile := "/tmp/healthy"
setupLog.Info("Marking handler as healthy touching healthy file", "healthyFile", healthyFile)
err = file.Touch(healthyFile)
if err != nil {
os.Exit(1)
setupLog.Error(err, "failed marking handler as healthy")
}
}

setProfiler()
Expand Down Expand Up @@ -214,23 +235,20 @@ func setProfiler() {
}
}

func lockHandler() (lockfile.Lockfile, error) {
func lockHandler() (*flock.Flock, error) {
lockFilePath, ok := os.LookupEnv("NMSTATE_INSTANCE_NODE_LOCK_FILE")
if !ok {
return "", errors.New("Failed to find NMSTATE_INSTANCE_NODE_LOCK_FILE ENV var")
return nil, errors.New("Failed to find NMSTATE_INSTANCE_NODE_LOCK_FILE ENV var")
}
setupLog.Info(fmt.Sprintf("Try to take exclusive lock on file: %s", lockFilePath))
handlerLock, err := lockfile.New(lockFilePath)
if err != nil {
return handlerLock, errors.Wrapf(err, "failed to create lockFile for %s", lockFilePath)
}
err = wait.PollImmediateInfinite(5*time.Second, func() (done bool, err error) {
err = handlerLock.TryLock()
handlerLock := flock.New(lockFilePath)
err := wait.PollImmediateInfinite(5*time.Second, func() (done bool, err error) {
locked, err := handlerLock.TryLock()
if err != nil {
setupLog.Error(err, "retrying to lock handler")
return false, nil // Don't return the error here, it will not re-poll if we do
}
return true, nil
return locked, nil
})
return handlerLock, err
}
24 changes: 24 additions & 0 deletions pkg/file/touch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package file

import (
"os"
"time"
)

func Touch(fileName string) error {
_, err := os.Stat(fileName)
if os.IsNotExist(err) {
file, err := os.Create(fileName)
if err != nil {
return err
}
defer file.Close()
} else {
currentTime := time.Now().Local()
err = os.Chtimes(fileName, currentTime, currentTime)
if err != nil {
return err
}
}
return nil
}
8 changes: 8 additions & 0 deletions test/e2e/daemonset/daemonset.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ func GetEventually(daemonSetKey types.NamespacedName) AsyncAssertion {
}, 180*time.Second, 1*time.Second)
}

func GetConsistently(daemonSetKey types.NamespacedName) AsyncAssertion {
return Consistently(func() (appsv1.DaemonSet, error) {
daemonSet := appsv1.DaemonSet{}
err := testenv.Client.Get(context.TODO(), daemonSetKey, &daemonSet)
return daemonSet, err
}, 15*time.Second, 1*time.Second)
}

// GetDaemonSetList returns a DaemonSetList matching the labels passed
func GetList(filteringLabels map[string]string) (appsv1.DaemonSetList, error) {
ds := appsv1.DaemonSetList{}
Expand Down
77 changes: 52 additions & 25 deletions test/e2e/operator/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package operator

import (
"context"
"fmt"
"os"
"testing"
"time"

Expand All @@ -11,44 +13,39 @@ import (
ginkgoreporters "kubevirt.io/qe-tools/pkg/ginkgo-reporters"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
dynclient "sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

nmstatev1beta1 "github.com/nmstate/kubernetes-nmstate/api/v1beta1"
testenv "github.com/nmstate/kubernetes-nmstate/test/env"
knmstatereporter "github.com/nmstate/kubernetes-nmstate/test/reporter"
)

var (
t *testing.T
nodes []string
startTime time.Time
t *testing.T
nodes []string
startTime time.Time
defaultNMState = nmstatev1beta1.NMState{
ObjectMeta: metav1.ObjectMeta{
Name: "nmstate",
Namespace: "nmstate",
},
}
webhookKey = types.NamespacedName{Namespace: "nmstate", Name: "nmstate-webhook"}
handlerKey = types.NamespacedName{Namespace: "nmstate", Name: "nmstate-handler"}
handlerLabels = map[string]string{"component": "kubernetes-nmstate-handler"}
)

var _ = BeforeSuite(func() {

logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))

testenv.Start()

})

func TestMain(m *testing.M) {
func TestE2E(t *testing.T) {
testenv.TestMain()
}

func TestE2E(tapi *testing.T) {
t = tapi
RegisterFailHandler(Fail)

By("Getting node list from cluster")
nodeList := corev1.NodeList{}
err := testenv.Client.List(context.TODO(), &nodeList, &dynclient.ListOptions{})
Expect(err).ToNot(HaveOccurred())
for _, node := range nodeList.Items {
nodes = append(nodes, node.Name)
}

reporters := make([]Reporter, 0)
reporters = append(reporters, knmstatereporter.New("test_logs/e2e/operator", testenv.OperatorNamespace, nodes))
if ginkgoreporters.Polarion.Run {
Expand All @@ -61,8 +58,38 @@ func TestE2E(tapi *testing.T) {
RunSpecsWithDefaultAndCustomReporters(t, "Operator E2E Test Suite", reporters)
}

var _ = BeforeEach(func() {
var _ = BeforeSuite(func() {

// Change to root directory some test expect that
os.Chdir("../../../")

logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))

testenv.Start()

By("Getting node list from cluster")
nodeList := corev1.NodeList{}
err := testenv.Client.List(context.TODO(), &nodeList, &dynclient.ListOptions{})
Expect(err).ToNot(HaveOccurred())
for _, node := range nodeList.Items {
nodes = append(nodes, node.Name)
}
})

var _ = AfterEach(func() {
var _ = AfterSuite(func() {
uninstallNMState(defaultNMState)
})

func installNMState(nmstate nmstatev1beta1.NMState) {
By(fmt.Sprintf("Creating NMState CR '%s'", nmstate.Name))
err := testenv.Client.Create(context.TODO(), &nmstate)
ExpectWithOffset(1, err).ToNot(HaveOccurred(), "NMState CR created without error")
}

func uninstallNMState(nmstate nmstatev1beta1.NMState) {
By(fmt.Sprintf("Deleting NMState CR '%s'", nmstate.Name))
err := testenv.Client.Delete(context.TODO(), &nmstate, &client.DeleteOptions{})
if !apierrors.IsNotFound(err) {
Expect(err).ToNot(HaveOccurred(), "NMState CR successfully removed")
}
}
82 changes: 51 additions & 31 deletions test/e2e/operator/nmstate_install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,35 @@ import (

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"

nmstatev1beta1 "github.com/nmstate/kubernetes-nmstate/api/v1beta1"
"github.com/nmstate/kubernetes-nmstate/test/e2e/daemonset"
"github.com/nmstate/kubernetes-nmstate/test/e2e/deployment"

"github.com/nmstate/kubernetes-nmstate/test/cmd"
testenv "github.com/nmstate/kubernetes-nmstate/test/env"
)

var (
defaultNMState = nmstatev1beta1.NMState{
ObjectMeta: metav1.ObjectMeta{
Name: "nmstate",
Namespace: "nmstate",
},
}
webhookKey = types.NamespacedName{Namespace: "nmstate", Name: "nmstate-webhook"}
handlerKey = types.NamespacedName{Namespace: "nmstate", Name: "nmstate-handler"}
handlerLabels = map[string]string{"component": "kubernetes-nmstate-handler"}
)

var _ = Describe("NMState operator", func() {
Context("when installed for the first time", func() {
BeforeEach(func() {
installDefaultNMState()
installNMState(defaultNMState)
})
It("should deploy daemonset and webhook deployment", func() {
daemonset.GetEventually(handlerKey).Should(daemonset.BeReady())
deployment.GetEventually(webhookKey).Should(deployment.BeReady())
})
AfterEach(func() {
uninstallDefaultNMState()
uninstallNMState(defaultNMState)
})
})
Context("when NMState is installed", func() {
It("should list one NMState CR", func() {
installDefaultNMState()
installNMState(defaultNMState)
daemonset.GetEventually(handlerKey).Should(daemonset.BeReady())
ds, err := daemonset.GetList(handlerLabels)
Expect(err).ToNot(HaveOccurred(), "List daemon sets in namespace nmstate succeeds")
Expand All @@ -71,7 +60,7 @@ var _ = Describe("NMState operator", func() {
})
Context("and uninstalled", func() {
BeforeEach(func() {
uninstallDefaultNMState()
uninstallNMState(defaultNMState)
})
It("should uninstall handler and webhook", func() {
Eventually(func() bool {
Expand All @@ -85,24 +74,55 @@ var _ = Describe("NMState operator", func() {
})
})
})
Context("when another handler is installed with different namespace", func() {
var (
operatorNamespace = "nmstate-alt"
)
BeforeEach(func() {
installNMState(defaultNMState)
daemonset.GetEventually(handlerKey).Should(daemonset.BeReady())
installOperator(operatorNamespace)
})
AfterEach(func() {
uninstallNMState(defaultNMState)
uninstallOperator(operatorNamespace)
installOperator("nmstate")
})
It("should wait on the old one to be deleted", func() {
By("Checking handler is locked")
daemonset.GetConsistently(types.NamespacedName{Namespace: operatorNamespace, Name: "nmstate-handler"}).ShouldNot(daemonset.BeReady())
uninstallOperator("nmstate")
By("Checking handler is unlocked after deleting old one")
daemonset.GetEventually(types.NamespacedName{Namespace: operatorNamespace, Name: "nmstate-handler"}).Should(daemonset.BeReady())
})
})
})

func installNMState(nmstate nmstatev1beta1.NMState) {
err := testenv.Client.Create(context.TODO(), &nmstate)
Expect(err).ToNot(HaveOccurred(), "NMState CR created without error")
}

func installDefaultNMState() {
installNMState(defaultNMState)
}
func installOperator(namespace string) error {
By(fmt.Sprintf("Creating NMState operator with namespace '%s'", namespace))
_, err := cmd.Run("make", false, fmt.Sprintf("OPERATOR_NAMESPACE=%s", namespace), fmt.Sprintf("HANDLER_NAMESPACE=%s", namespace), "IMAGE_REGISTRY=registry:5000", "manifests")
Expect(err).ToNot(HaveOccurred())

func uninstallNMState(nmstate nmstatev1beta1.NMState) {
err := testenv.Client.Delete(context.TODO(), &nmstate, &client.DeleteOptions{})
if !apierrors.IsNotFound(err) {
Expect(err).ToNot(HaveOccurred(), "NMState CR successfully removed")
manifestsDir := "build/_output/manifests/"
manifests := []string{"namespace.yaml", "service_account.yaml", "operator.yaml", "role.yaml", "role_binding.yaml"}
for _, manifest := range manifests {
_, err = cmd.Kubectl("apply", "-f", manifestsDir+manifest)
Expect(err).ToNot(HaveOccurred())
}
deployment.GetEventually(types.NamespacedName{Namespace: namespace, Name: "nmstate-operator"}).Should(deployment.BeReady())

return nil
}

func uninstallDefaultNMState() {
uninstallNMState(defaultNMState)
func uninstallOperator(namespace string) {
By(fmt.Sprintf("Deleting namespace '%s'", namespace))
ns := corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: namespace,
},
}
Expect(testenv.Client.Delete(context.TODO(), &ns)).To(SatisfyAny(Succeed(), WithTransform(apierrors.IsNotFound, BeTrue())))
Eventually(func() error {
return testenv.Client.Get(context.TODO(), types.NamespacedName{Name: namespace}, &ns)
}, 2*time.Minute, 5*time.Second).Should(SatisfyAll(HaveOccurred(), WithTransform(apierrors.IsNotFound, BeTrue())))
}
Loading

0 comments on commit a8d083e

Please sign in to comment.