diff --git a/errorHandle/processor.go b/errorHandle/processor.go
new file mode 100644
index 0000000..72b4026
--- /dev/null
+++ b/errorHandle/processor.go
@@ -0,0 +1,36 @@
+package errorHandle
+
+import (
+ "fmt"
+ "os/exec"
+ "time"
+
+ "Watchdog_Linux-systemd/postLog"
+)
+
+type ServiceStatusChecker func(serviceName string) bool
+
+func HandleErrorProcess(serviceName string, isServiceRunning ServiceStatusChecker) error {
+ postLog.Debug(fmt.Sprintf("[HandleErrorProcess] Start handle error process for service: %s", serviceName))
+ serviceControl := &ServiceControl{
+ ServiceName: serviceName,
+ RetryCount: 0,
+ }
+
+ for i := 0; i < 5; i++ {
+ serviceControl.RetryCount++
+ postLog.Debug(fmt.Sprintf("[HandleErrorProcess] Try to restart service '%s', retry count: %d", serviceName, serviceControl.RetryCount))
+ cmd := exec.Command("systemctl", "restart", serviceName)
+ err := cmd.Run()
+ if err == nil {
+ if isServiceRunning != nil && isServiceRunning(serviceName) {
+ return nil
+ }
+ }
+ time.Sleep(time.Duration(i+1) * time.Second)
+ }
+ serviceControl.ErrorType = "restart"
+ serviceControl.ErrorMsg = fmt.Sprintf("Failed to recover service '%s', retry count: %d", serviceName, serviceControl.RetryCount)
+ serviceControl.ErrorTime = time.Now()
+ return fmt.Errorf(serviceControl.ErrorMsg)
+}
diff --git a/errorHandle/vars.go b/errorHandle/vars.go
new file mode 100644
index 0000000..17d24f9
--- /dev/null
+++ b/errorHandle/vars.go
@@ -0,0 +1,13 @@
+package errorHandle
+
+import (
+ "time"
+)
+
+type ServiceControl struct {
+ ServiceName string
+ RetryCount int
+ ErrorType string
+ ErrorMsg string
+ ErrorTime time.Time
+}
\ No newline at end of file
diff --git a/monitor/monitor.go b/monitor/monitor.go
index b579755..c14a8cc 100644
--- a/monitor/monitor.go
+++ b/monitor/monitor.go
@@ -1,6 +1,7 @@
package monitor
import (
+ "Watchdog_Linux-systemd/errorHandle"
"Watchdog_Linux-systemd/postLog"
"Watchdog_Linux-systemd/socket"
"fmt"
@@ -122,12 +123,61 @@ func checkServiceLogs(serviceName string) (bool, error) {
return true, nil
}
+func IsServiceExist(serviceName string) bool {
+ _, exists := monitors[serviceName]
+ if !exists {
+ return false
+ }
+ return true
+}
+
+func IsServiceRunning(serviceName string) bool {
+ status, err := checkServiceStatus(serviceName)
+ if err != nil || !status {
+ return false
+ }
+
+ logStatus, err := checkServiceLogs(serviceName)
+ if err != nil || !logStatus {
+ return false
+ }
+ return true
+}
+
func throwException(serviceName, errorContent string) error {
postLog.Error(fmt.Sprintf("[Monitor] Service: %s - Exception: %s", serviceName, errorContent))
- err := socket.SendMsg(fmt.Sprintf("[Exception] %s %s %s", "service", serviceName, errorContent))
- if err != nil {
- return fmt.Errorf("failed to send exception message: %v", err)
+
+ monitorsMutex.Lock()
+ monitor, exists := monitors[serviceName]
+ if exists {
+ close(monitor.StopChan)
+ monitor.Running = false
+ delete(monitors, serviceName)
}
+ monitorsMutex.Unlock()
+
+ err := errorHandle.HandleErrorProcess(serviceName, IsServiceRunning)
+ if err != nil {
+ err := socket.SendMsg(fmt.Sprintf("[Exception] %s %s %s", "service", serviceName, errorContent))
+ if err != nil {
+ return fmt.Errorf("failed to send exception message: %v", err)
+ }
+ return nil
+ }
+
+ monitorsMutex.Lock()
+ stopChan := make(chan struct{})
+ newMonitor := &ServiceMonitor{
+ ServiceName: serviceName,
+ StopChan: stopChan,
+ Running: true,
+ }
+ monitors[serviceName] = newMonitor
+ monitorsMutex.Unlock()
+
+ go runMonitor(newMonitor)
+ postLog.Info(fmt.Sprintf("[Monitor] Service recovered and monitor restarted: %s", serviceName))
+
return nil
}