Files
WatchDog_Linux-systemd/monitor/monitor.go
NanamiAdmin 58a8efc17a refactor(socket): decouple command handling and add message sending
- Move command handler to variable for better flexibility
- Add SendMsg function for sending messages through socket
- Fix missing return statements in command execution
- Improve error handling in monitor exception reporting
2026-04-28 19:55:45 +08:00

144 lines
3.5 KiB
Go

package monitor
import (
"Watchdog_Linux-systemd/postLog"
"Watchdog_Linux-systemd/socket"
"fmt"
"os/exec"
"strings"
"sync"
"time"
)
type ServiceMonitor struct {
ServiceName string
StopChan chan struct{}
Running bool
}
var (
monitors = make(map[string]*ServiceMonitor)
monitorsMutex sync.RWMutex
)
func AddServiceMonitor(serviceName string) error {
monitorsMutex.Lock()
defer monitorsMutex.Unlock()
if _, exists := monitors[serviceName]; exists {
return fmt.Errorf("service monitor '%s' already exists", serviceName)
}
stopChan := make(chan struct{})
monitor := &ServiceMonitor{
ServiceName: serviceName,
StopChan: stopChan,
Running: true,
}
monitors[serviceName] = monitor
go runMonitor(monitor)
postLog.Info(fmt.Sprintf("[Monitor] Added service monitor for: %s", serviceName))
return nil
}
func RemoveServiceMonitor(serviceName string) error {
monitorsMutex.Lock()
defer monitorsMutex.Unlock()
monitor, exists := monitors[serviceName]
if !exists {
return fmt.Errorf("service monitor '%s' not found", serviceName)
}
close(monitor.StopChan)
monitor.Running = false
delete(monitors, serviceName)
postLog.Info(fmt.Sprintf("[Monitor] Removed service monitor for: %s", serviceName))
return nil
}
func runMonitor(m *ServiceMonitor) {
postLog.Info(fmt.Sprintf("[Monitor] Started monitoring service: %s", m.ServiceName))
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for {
select {
case <-m.StopChan:
postLog.Info(fmt.Sprintf("[Monitor] Stopped monitoring service: %s", m.ServiceName))
return
case <-ticker.C:
serviceStatus, err := checkServiceStatus(m.ServiceName)
if err != nil {
throwException(m.ServiceName, fmt.Sprintf("Failed to check service status: %v", err))
continue
}
logStatus, err := checkServiceLogs(m.ServiceName)
if err != nil {
throwException(m.ServiceName, fmt.Sprintf("Failed to check service logs: %v", err))
continue
}
if !serviceStatus || !logStatus {
throwException(m.ServiceName, fmt.Sprintf("Service check failed - Status: %v, Logs: %v", serviceStatus, logStatus))
}
}
}
}
func checkServiceStatus(serviceName string) (bool, error) {
cmd := exec.Command("systemctl", "is-active", serviceName)
output, err := cmd.Output()
if err != nil {
return false, fmt.Errorf("failed to execute systemctl: %w", err)
}
status := strings.TrimSpace(string(output))
return status == "active", nil
}
func checkServiceLogs(serviceName string) (bool, error) {
cmd := exec.Command("journalctl", "-u", serviceName, "-n", "50", "--no-pager")
output, err := cmd.Output()
if err != nil {
return false, fmt.Errorf("failed to execute journalctl: %w", err)
}
logContent := string(output)
errorKeywords := []string{"error", "fatal", "failed", "critical", "exception"}
for _, keyword := range errorKeywords {
if strings.Contains(strings.ToLower(logContent), keyword) {
return false, nil
}
}
return true, nil
}
func throwException(serviceName, errorContent string) error {
postLog.Error(fmt.Sprintf("[Monitor] Service: %s - Exception: %s", serviceName, errorContent))
err := socket.SendMsg(fmt.Sprintf("service %s exception: %s", serviceName, errorContent))
if err != nil {
return fmt.Errorf("failed to send exception message: %v", err)
}
return nil
}
func GetActiveMonitors() []string {
monitorsMutex.RLock()
defer monitorsMutex.RUnlock()
var activeServices []string
for name := range monitors {
activeServices = append(activeServices, name)
}
return activeServices
}