diff --git a/errorHandle/processor.go b/errorHandle/processor.go index 72b4026..b2bec39 100644 --- a/errorHandle/processor.go +++ b/errorHandle/processor.go @@ -6,6 +6,7 @@ import ( "time" "Watchdog_Linux-systemd/postLog" + "Watchdog_Linux-systemd/global" ) type ServiceStatusChecker func(serviceName string) bool @@ -18,19 +19,21 @@ func HandleErrorProcess(serviceName string, isServiceRunning ServiceStatusChecke } for i := 0; i < 5; i++ { - serviceControl.RetryCount++ - postLog.Debug(fmt.Sprintf("[HandleErrorProcess] Try to restart service '%s', retry count: %d", serviceName, serviceControl.RetryCount)) - cmd := exec.Command("systemctl", "restart", serviceName) - err := cmd.Run() - if err == nil { - if isServiceRunning != nil && isServiceRunning(serviceName) { - return nil + if global.Monitors[serviceName].Recovery { // Only recovery process is started + serviceControl.RetryCount++ + postLog.Debug(fmt.Sprintf("[HandleErrorProcess] Try to restart service '%s', retry count: %d", serviceName, serviceControl.RetryCount)) + cmd := exec.Command("systemctl", "restart", serviceName) + err := cmd.Run() + if err == nil { + if isServiceRunning != nil && isServiceRunning(serviceName) { + return nil + } } + time.Sleep(time.Duration(i+1) * time.Second) } - time.Sleep(time.Duration(i+1) * time.Second) } serviceControl.ErrorType = "restart" serviceControl.ErrorMsg = fmt.Sprintf("Failed to recover service '%s', retry count: %d", serviceName, serviceControl.RetryCount) serviceControl.ErrorTime = time.Now() return fmt.Errorf(serviceControl.ErrorMsg) -} +} \ No newline at end of file diff --git a/global/vars.go b/global/vars.go new file mode 100644 index 0000000..089c6e8 --- /dev/null +++ b/global/vars.go @@ -0,0 +1,17 @@ +package global + +import ( + "sync" +) + +type ServiceMonitor struct { + ServiceName string + StopChan chan struct{} + Running bool + Recovery bool +} + +var ( + Monitors = make(map[string]*ServiceMonitor) + MonitorsMutex sync.RWMutex +) \ No newline at end of file diff --git a/monitor/monitor.go b/monitor/monitor.go index c14a8cc..7a68d26 100644 --- a/monitor/monitor.go +++ b/monitor/monitor.go @@ -3,41 +3,34 @@ package monitor import ( "Watchdog_Linux-systemd/errorHandle" "Watchdog_Linux-systemd/postLog" + "Watchdog_Linux-systemd/global" "Watchdog_Linux-systemd/socket" "fmt" "os/exec" "strings" - "sync" "time" ) -type ServiceMonitor struct { - ServiceName string - StopChan chan struct{} - Running bool -} -var ( - monitors = make(map[string]*ServiceMonitor) - monitorsMutex sync.RWMutex -) func AddServiceMonitor(serviceName string) error { - monitorsMutex.Lock() - defer monitorsMutex.Unlock() + global.MonitorsMutex.Lock() + defer global.MonitorsMutex.Unlock() - if _, exists := monitors[serviceName]; exists { - return fmt.Errorf("service monitor '%s' already exists", serviceName) + if _, exists := global.Monitors[serviceName]; exists { + if global.Monitors[serviceName].Running { + return fmt.Errorf("service monitor '%s' already exists", serviceName) + } } stopChan := make(chan struct{}) - monitor := &ServiceMonitor{ + monitor := &global.ServiceMonitor{ ServiceName: serviceName, StopChan: stopChan, Running: true, } - monitors[serviceName] = monitor + global.Monitors[serviceName] = monitor go runMonitor(monitor) @@ -46,23 +39,24 @@ func AddServiceMonitor(serviceName string) error { } func RemoveServiceMonitor(serviceName string) error { - monitorsMutex.Lock() - defer monitorsMutex.Unlock() + global.MonitorsMutex.Lock() + defer global.MonitorsMutex.Unlock() - monitor, exists := monitors[serviceName] + monitor, exists := global.Monitors[serviceName] if !exists { return fmt.Errorf("service monitor '%s' not found", serviceName) } - close(monitor.StopChan) + // close(monitor.StopChan) monitor.Running = false - delete(monitors, serviceName) + monitor.Recovery = false + // delete(global.Monitors, serviceName) postLog.Info(fmt.Sprintf("[Monitor] Removed service monitor for: %s", serviceName)) return nil } -func runMonitor(m *ServiceMonitor) { +func runMonitor(m *global.ServiceMonitor) { postLog.Info(fmt.Sprintf("[Monitor] Started monitoring service: %s", m.ServiceName)) ticker := time.NewTicker(5 * time.Second) @@ -124,7 +118,7 @@ func checkServiceLogs(serviceName string) (bool, error) { } func IsServiceExist(serviceName string) bool { - _, exists := monitors[serviceName] + _, exists := global.Monitors[serviceName] if !exists { return false } @@ -147,14 +141,15 @@ func IsServiceRunning(serviceName string) bool { func throwException(serviceName, errorContent string) error { postLog.Error(fmt.Sprintf("[Monitor] Service: %s - Exception: %s", serviceName, errorContent)) - monitorsMutex.Lock() - monitor, exists := monitors[serviceName] + global.MonitorsMutex.Lock() + monitor, exists := global.Monitors[serviceName] if exists { close(monitor.StopChan) monitor.Running = false - delete(monitors, serviceName) + monitor.Recovery = true + // delete(global.Monitors, serviceName) } - monitorsMutex.Unlock() + global.MonitorsMutex.Unlock() err := errorHandle.HandleErrorProcess(serviceName, IsServiceRunning) if err != nil { @@ -165,15 +160,15 @@ func throwException(serviceName, errorContent string) error { return nil } - monitorsMutex.Lock() + global.MonitorsMutex.Lock() stopChan := make(chan struct{}) - newMonitor := &ServiceMonitor{ + newMonitor := &global.ServiceMonitor{ ServiceName: serviceName, StopChan: stopChan, Running: true, } - monitors[serviceName] = newMonitor - monitorsMutex.Unlock() + global.Monitors[serviceName] = newMonitor + global.MonitorsMutex.Unlock() go runMonitor(newMonitor) postLog.Info(fmt.Sprintf("[Monitor] Service recovered and monitor restarted: %s", serviceName)) @@ -182,12 +177,14 @@ func throwException(serviceName, errorContent string) error { } func GetActiveMonitors() []string { - monitorsMutex.RLock() - defer monitorsMutex.RUnlock() + global.MonitorsMutex.RLock() + defer global.MonitorsMutex.RUnlock() var activeServices []string - for name := range monitors { - activeServices = append(activeServices, name) + for name, monitor := range global.Monitors { + if monitor.Running { + activeServices = append(activeServices, name) + } } return activeServices }