Files

191 lines
4.8 KiB
Go

package monitor
import (
"Watchdog_Linux-systemd/errorHandle"
"Watchdog_Linux-systemd/postLog"
"Watchdog_Linux-systemd/global"
"Watchdog_Linux-systemd/socket"
"fmt"
"os/exec"
"strings"
"time"
)
func AddServiceMonitor(serviceName string) error {
global.MonitorsMutex.Lock()
defer global.MonitorsMutex.Unlock()
if _, exists := global.Monitors[serviceName]; exists {
if global.Monitors[serviceName].Running {
return fmt.Errorf("service monitor '%s' already exists", serviceName)
}
}
stopChan := make(chan struct{})
monitor := &global.ServiceMonitor{
ServiceName: serviceName,
StopChan: stopChan,
Running: true,
}
global.Monitors[serviceName] = monitor
go runMonitor(monitor)
postLog.Info(fmt.Sprintf("[Monitor] Added service monitor for: %s", serviceName))
return nil
}
func RemoveServiceMonitor(serviceName string) error {
global.MonitorsMutex.Lock()
defer global.MonitorsMutex.Unlock()
monitor, exists := global.Monitors[serviceName]
if !exists {
return fmt.Errorf("service monitor '%s' not found", serviceName)
}
// close(monitor.StopChan)
monitor.Running = false
monitor.Recovery = false
// delete(global.Monitors, serviceName)
postLog.Info(fmt.Sprintf("[Monitor] Removed service monitor for: %s", serviceName))
return nil
}
func runMonitor(m *global.ServiceMonitor) {
postLog.Info(fmt.Sprintf("[Monitor] Started monitoring service: %s", m.ServiceName))
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for {
select {
case <-m.StopChan:
postLog.Info(fmt.Sprintf("[Monitor] Stopped monitoring service: %s", m.ServiceName))
return
case <-ticker.C:
serviceStatus, err := checkServiceStatus(m.ServiceName)
if err != nil {
throwException(m.ServiceName, fmt.Sprintf("Failed to check service status: %v", err))
continue
}
logStatus, err := checkServiceLogs(m.ServiceName)
if err != nil {
throwException(m.ServiceName, fmt.Sprintf("Failed to check service logs: %v", err))
continue
}
if !serviceStatus || !logStatus {
throwException(m.ServiceName, fmt.Sprintf("Service check failed - Status: %v, Logs: %v", serviceStatus, logStatus))
}
}
}
}
func checkServiceStatus(serviceName string) (bool, error) {
cmd := exec.Command("systemctl", "is-active", serviceName)
output, err := cmd.Output()
if err != nil {
return false, fmt.Errorf("failed to execute systemctl: %w", err)
}
status := strings.TrimSpace(string(output))
return status == "active", nil
}
func checkServiceLogs(serviceName string) (bool, error) {
cmd := exec.Command("journalctl", "-u", serviceName, "-n", "50", "--no-pager")
output, err := cmd.Output()
if err != nil {
return false, fmt.Errorf("failed to execute journalctl: %w", err)
}
logContent := string(output)
errorKeywords := []string{"error", "fatal", "failed", "critical", "exception"}
for _, keyword := range errorKeywords {
if strings.Contains(strings.ToLower(logContent), keyword) {
return false, nil
}
}
return true, nil
}
func IsServiceExist(serviceName string) bool {
_, exists := global.Monitors[serviceName]
if !exists {
return false
}
return true
}
func IsServiceRunning(serviceName string) bool {
status, err := checkServiceStatus(serviceName)
if err != nil || !status {
return false
}
logStatus, err := checkServiceLogs(serviceName)
if err != nil || !logStatus {
return false
}
return true
}
func throwException(serviceName, errorContent string) error {
postLog.Error(fmt.Sprintf("[Monitor] Service: %s - Exception: %s", serviceName, errorContent))
global.MonitorsMutex.Lock()
monitor, exists := global.Monitors[serviceName]
if exists {
close(monitor.StopChan)
monitor.Running = false
monitor.Recovery = true
// delete(global.Monitors, serviceName)
}
global.MonitorsMutex.Unlock()
err := errorHandle.HandleErrorProcess(serviceName, IsServiceRunning)
if err != nil {
err := socket.SendMsg(fmt.Sprintf("[Exception] <exceptionType>%s</exceptionType> <serviceName>%s</serviceName> <errorMsg>%s</errorMsg>", "service", serviceName, errorContent))
if err != nil {
return fmt.Errorf("failed to send exception message: %v", err)
}
return nil
}
global.MonitorsMutex.Lock()
stopChan := make(chan struct{})
newMonitor := &global.ServiceMonitor{
ServiceName: serviceName,
StopChan: stopChan,
Running: true,
}
global.Monitors[serviceName] = newMonitor
global.MonitorsMutex.Unlock()
go runMonitor(newMonitor)
postLog.Info(fmt.Sprintf("[Monitor] Service recovered and monitor restarted: %s", serviceName))
return nil
}
func GetActiveMonitors() []string {
global.MonitorsMutex.RLock()
defer global.MonitorsMutex.RUnlock()
var activeServices []string
for name, monitor := range global.Monitors {
if monitor.Running {
activeServices = append(activeServices, name)
}
}
return activeServices
}