Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce noisy reconciles + enhance logs #877

Merged
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f2c22fa
preliminary changes
himanshu-kun Nov 22, 2023
b5feb2d
reconcile machineClass only on addition/deletion
himanshu-kun Nov 22, 2023
f8bd6ab
removed double check in secret reconciler
himanshu-kun Nov 22, 2023
db98422
small refactoring
himanshu-kun Nov 22, 2023
5bdd6bc
introduced machine conditions diff logging
himanshu-kun Nov 22, 2023
e1849e4
removed old in-tree related code
himanshu-kun Nov 23, 2023
b50ff02
adapted unit tests ot new const
himanshu-kun Nov 23, 2023
0c81be0
added new retry period ConflictRetry
himanshu-kun Dec 1, 2023
61ceccd
updated log level for better readability
himanshu-kun Dec 1, 2023
aeb28f7
some more updated log levels
himanshu-kun Dec 1, 2023
a672b57
adapted test cases
himanshu-kun Dec 1, 2023
a653be5
no machine reconcile on status update
himanshu-kun Dec 1, 2023
0c0aaaf
updated log messages and used ConflictRetry
himanshu-kun Dec 1, 2023
f606092
make generate && make check
himanshu-kun Dec 1, 2023
41b8a73
reconcile on removal of critical component taint
himanshu-kun Dec 8, 2023
2099f97
reconcile machine on specific node taint updates
himanshu-kun Dec 8, 2023
692e894
adapted unit tests for create errors
himanshu-kun Dec 12, 2023
174a4b1
fixed some failing tests
himanshu-kun Dec 13, 2023
ca12dd0
Apply suggestions from code review
himanshu-kun Dec 15, 2023
b73c147
addresed Rishab's suggestion part 1
himanshu-kun Dec 15, 2023
9d41bf7
addressed Rishab's review comments part 2
himanshu-kun Dec 15, 2023
c93ce96
renamed updateRetryRequired to updateRetryPeriod
himanshu-kun Dec 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cmd/machine-controller-manager/app/controllermanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ func StartControllers(s *options.MCMServer,
recorder record.EventRecorder,
stop <-chan struct{}) error {

klog.V(5).Info("Getting available resources")
klog.V(4).Info("Getting available resources")
rishabh-11 marked this conversation as resolved.
Show resolved Hide resolved
availableResources, err := getAvailableResources(controlCoreClientBuilder)
if err != nil {
return err
Expand All @@ -229,7 +229,7 @@ func StartControllers(s *options.MCMServer,
}

if availableResources[machineGVR] || availableResources[machineSetGVR] || availableResources[machineDeploymentGVR] {
klog.V(5).Infof("Creating shared informers; resync interval: %v", s.MinResyncPeriod)
klog.V(4).Infof("Creating shared informers; resync interval: %v", s.MinResyncPeriod)

controlMachineInformerFactory := machineinformers.NewFilteredSharedInformerFactory(
controlMachineClientBuilder.ClientOrDie("control-machine-shared-informers"),
Expand All @@ -253,7 +253,7 @@ func StartControllers(s *options.MCMServer,
// All shared informers are v1alpha1 API level
machineSharedInformers := controlMachineInformerFactory.Machine().V1alpha1()

klog.V(5).Infof("Creating controllers...")
klog.V(4).Infof("Creating controllers...")
mcmController, err := mcmcontroller.NewController(
s.Namespace,
controlMachineClient,
Expand All @@ -276,7 +276,7 @@ func StartControllers(s *options.MCMServer,
controlCoreInformerFactory.Start(stop)
targetCoreInformerFactory.Start(stop)

klog.V(5).Info("Running controller")
klog.V(4).Info("Running controller")
go mcmController.Run(int(s.ConcurrentNodeSyncs), stop)

} else {
Expand Down
4 changes: 2 additions & 2 deletions docs/documents/apis.md
Original file line number Diff line number Diff line change
Expand Up @@ -1555,7 +1555,7 @@ to be.</p>
<a href="#CurrentStatus">CurrentStatus</a>)
</p>
<p>
<p>MachinePhase is a label for the condition of a machines at the current time.</p>
<p>MachinePhase is a label for the condition of a machine at the current time.</p>
</p>
<br>
<h3 id="MachineSetCondition">
Expand Down Expand Up @@ -1992,7 +1992,7 @@ MachineConfiguration
<a href="#LastOperation">LastOperation</a>)
</p>
<p>
<p>MachineState is a current state of the machine.</p>
<p>MachineState is a current state of the operation.</p>
</p>
<br>
<h3 id="MachineStatus">
Expand Down
2 changes: 1 addition & 1 deletion kubernetes/crds/machine.sapcloud.io_machines.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ spec:
format: date-time
type: string
phase:
description: MachinePhase is a label for the condition of a machines
description: MachinePhase is a label for the condition of a machine
at the current time.
type: string
timeoutActive:
Expand Down
20 changes: 10 additions & 10 deletions pkg/apis/machine/v1alpha1/machine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,10 @@ type LastOperation struct {
Type MachineOperationType `json:"type,omitempty"`
}

// MachinePhase is a label for the condition of a machines at the current time.
// MachinePhase is a label for the condition of a machine at the current time.
type MachinePhase string

// These are the valid statuses of machines.
// These are the valid phases of machines.
const (
// MachinePending means that the machine is being created
MachinePending MachinePhase = "Pending"
Expand All @@ -156,25 +156,25 @@ const (
// MachineUnknown indicates that the node is not ready at the movement
MachineUnknown MachinePhase = "Unknown"

// MachineFailed means operation failed leading to machine status failure
// MachineFailed means operation timed out
MachineFailed MachinePhase = "Failed"

// MachineCrashLoopBackOff means creation or deletion of the machine is failing. It means that machine object is present but there is no corresponding VM.
// MachineCrashLoopBackOff means machine creation is failing. It means that machine object is present but there is no corresponding VM.
MachineCrashLoopBackOff MachinePhase = "CrashLoopBackOff"
)

// MachineState is a current state of the machine.
// MachineState is a current state of the operation.
type MachineState string

// These are the valid statuses of machines.
// These are the valid states of operations performed on the machine.
const (
// MachineStatePending means there are operations pending on this machine state
// MachineStateProcessing means operation is not yet complete
MachineStateProcessing MachineState = "Processing"

// MachineStateFailed means operation failed leading to machine status failure
// MachineStateFailed means operation failed
MachineStateFailed MachineState = "Failed"

// MachineStateSuccessful indicates that the node is not ready at the moment
// MachineStateSuccessful means operation completed successfully
MachineStateSuccessful MachineState = "Successful"
)

Expand All @@ -189,7 +189,7 @@ const (
// MachineOperationUpdate indicates that the operation was an update
MachineOperationUpdate MachineOperationType = "Update"

// MachineOperationHealthCheck indicates that the operation was a create
// MachineOperationHealthCheck indicates that the operation was a health check of node object
MachineOperationHealthCheck MachineOperationType = "HealthCheck"

// MachineOperationDelete indicates that the operation was a delete
Expand Down
8 changes: 4 additions & 4 deletions pkg/util/provider/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ func StartControllers(s *options.MCServer,
recorder record.EventRecorder,
stop <-chan struct{}) error {

klog.V(5).Info("Getting available resources")
klog.V(4).Info("Getting available resources")
availableResources, err := getAvailableResources(controlCoreClientBuilder)
if err != nil {
return err
Expand Down Expand Up @@ -239,7 +239,7 @@ func StartControllers(s *options.MCServer,
}

if availableResources[machineGVR] {
klog.V(5).Infof("Creating shared informers; resync interval: %v", s.MinResyncPeriod)
klog.V(4).Infof("Creating shared informers; resync interval: %v", s.MinResyncPeriod)

controlMachineInformerFactory := machineinformers.NewFilteredSharedInformerFactory(
controlMachineClientBuilder.ClientOrDie("control-machine-shared-informers"),
Expand Down Expand Up @@ -274,7 +274,7 @@ func StartControllers(s *options.MCServer,
// All shared informers are v1alpha1 API level
machineSharedInformers := controlMachineInformerFactory.Machine().V1alpha1()

klog.V(5).Infof("Creating controllers...")
klog.V(4).Infof("Creating controllers...")
machineController, err := machinecontroller.NewController(
s.Namespace,
controlMachineClient,
Expand Down Expand Up @@ -305,7 +305,7 @@ func StartControllers(s *options.MCServer,
controlCoreInformerFactory.Start(stop)
targetCoreInformerFactory.Start(stop)

klog.V(5).Info("Running controller")
klog.V(4).Info("Running controller")
go machineController.Run(int(s.ConcurrentNodeSyncs), stop)

} else {
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/provider/drain/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,7 @@ func (o *Options) waitForReattach(ctx context.Context, podVolumeInfo PodVolumeIn

case incomingEvent := <-volumeAttachmentEventCh:
persistentVolumeName := *incomingEvent.Spec.Source.PersistentVolumeName
klog.V(5).Infof("VolumeAttachment event received for PV: %s", persistentVolumeName)
klog.V(4).Infof("VolumeAttachment event received for PV: %s", persistentVolumeName)

// Checking if event for an PV that is being waited on
if _, present := pvsWaitingForReattachments[persistentVolumeName]; present {
Expand Down
4 changes: 2 additions & 2 deletions pkg/util/provider/drain/volume_attachment.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,13 @@ func (v *VolumeAttachmentHandler) dispatch(obj interface{}) {

// AddVolumeAttachment is the event handler for VolumeAttachment add
func (v *VolumeAttachmentHandler) AddVolumeAttachment(obj interface{}) {
klog.V(5).Infof("Adding volume attachment object")
klog.V(4).Infof("Adding volume attachment object")
v.dispatch(obj)
}

// UpdateVolumeAttachment is the event handler for VolumeAttachment update
func (v *VolumeAttachmentHandler) UpdateVolumeAttachment(oldObj, newObj interface{}) {
klog.V(5).Info("Updating volume attachment object")
klog.V(4).Info("Updating volume attachment object")
v.dispatch(newObj)
}

Expand Down
12 changes: 5 additions & 7 deletions pkg/util/provider/machinecontroller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ func NewController(
controller.pvcLister = pvcInformer.Lister()
controller.pvLister = pvInformer.Lister()
controller.secretLister = secretInformer.Lister()
// TODO: Need to handle K8s versions below 1.13 differently
controller.volumeAttachementLister = volumeAttachmentInformer.Lister()
controller.machineClassLister = machineClassInformer.Lister()
controller.nodeLister = nodeInformer.Lister()
Expand All @@ -158,7 +157,7 @@ func NewController(
controller.pdbV1beta1Synced = pdbV1beta1Informer.Informer().HasSynced
}

// Secret Controller Informers
// Secret Controller's Informers
secretInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.secretAdd,
DeleteFunc: controller.secretDelete,
Expand All @@ -170,10 +169,9 @@ func NewController(
DeleteFunc: controller.machineClassToSecretDelete,
})

// Machine Class Controller Informers
// Machine Class Controller's Informers
machineInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.machineToMachineClassAdd,
UpdateFunc: controller.machineToMachineClassUpdate,
DeleteFunc: controller.machineToMachineClassDelete,
})

Expand All @@ -183,7 +181,7 @@ func NewController(
DeleteFunc: controller.machineClassDelete,
})

// Machine Controller Informers
// Machine Controller's Informers
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.addNodeToMachine,
UpdateFunc: controller.updateNodeToMachine,
Expand All @@ -196,7 +194,7 @@ func NewController(
DeleteFunc: controller.deleteMachine,
})

// MachineSafety Controller Informers
// MachineSafety Controller's Informers
// We follow the kubernetes way of reconciling the safety controller
// done by adding empty key objects. We initialize it, to trigger
// running of different safety loop on MCM startup.
Expand All @@ -209,7 +207,7 @@ func NewController(
DeleteFunc: controller.deleteMachineToSafety,
})

// Drain Controller Informers
// Drain Controller's Informers
if k8sutils.IsResourceSupported(
targetCoreClient,
schema.GroupResource{
Expand Down
Loading