feat(errorHandle): add service error handling and recovery logic
Implement error handling mechanism with retry logic for failed services. Includes: - ServiceControl struct to track service state - HandleErrorProcess function to attempt service restarts - Integration with monitor to automatically recover services - Enhanced exception handling with status checks and monitoring restart
This commit is contained in:
36
errorHandle/processor.go
Normal file
36
errorHandle/processor.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package errorHandle
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"Watchdog_Linux-systemd/postLog"
|
||||
)
|
||||
|
||||
type ServiceStatusChecker func(serviceName string) bool
|
||||
|
||||
func HandleErrorProcess(serviceName string, isServiceRunning ServiceStatusChecker) error {
|
||||
postLog.Debug(fmt.Sprintf("[HandleErrorProcess] Start handle error process for service: %s", serviceName))
|
||||
serviceControl := &ServiceControl{
|
||||
ServiceName: serviceName,
|
||||
RetryCount: 0,
|
||||
}
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
serviceControl.RetryCount++
|
||||
postLog.Debug(fmt.Sprintf("[HandleErrorProcess] Try to restart service '%s', retry count: %d", serviceName, serviceControl.RetryCount))
|
||||
cmd := exec.Command("systemctl", "restart", serviceName)
|
||||
err := cmd.Run()
|
||||
if err == nil {
|
||||
if isServiceRunning != nil && isServiceRunning(serviceName) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
time.Sleep(time.Duration(i+1) * time.Second)
|
||||
}
|
||||
serviceControl.ErrorType = "restart"
|
||||
serviceControl.ErrorMsg = fmt.Sprintf("Failed to recover service '%s', retry count: %d", serviceName, serviceControl.RetryCount)
|
||||
serviceControl.ErrorTime = time.Now()
|
||||
return fmt.Errorf(serviceControl.ErrorMsg)
|
||||
}
|
||||
13
errorHandle/vars.go
Normal file
13
errorHandle/vars.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package errorHandle
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type ServiceControl struct {
|
||||
ServiceName string
|
||||
RetryCount int
|
||||
ErrorType string
|
||||
ErrorMsg string
|
||||
ErrorTime time.Time
|
||||
}
|
||||
Reference in New Issue
Block a user