191 lines
4.8 KiB
Go
191 lines
4.8 KiB
Go
package monitor
|
|
|
|
import (
|
|
"Watchdog_Linux-systemd/errorHandle"
|
|
"Watchdog_Linux-systemd/postLog"
|
|
"Watchdog_Linux-systemd/global"
|
|
"Watchdog_Linux-systemd/socket"
|
|
"fmt"
|
|
"os/exec"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
|
|
|
|
func AddServiceMonitor(serviceName string) error {
|
|
global.MonitorsMutex.Lock()
|
|
defer global.MonitorsMutex.Unlock()
|
|
|
|
if _, exists := global.Monitors[serviceName]; exists {
|
|
if global.Monitors[serviceName].Running {
|
|
return fmt.Errorf("service monitor '%s' already exists", serviceName)
|
|
}
|
|
}
|
|
|
|
stopChan := make(chan struct{})
|
|
monitor := &global.ServiceMonitor{
|
|
ServiceName: serviceName,
|
|
StopChan: stopChan,
|
|
Running: true,
|
|
}
|
|
|
|
global.Monitors[serviceName] = monitor
|
|
|
|
go runMonitor(monitor)
|
|
|
|
postLog.Info(fmt.Sprintf("[Monitor] Added service monitor for: %s", serviceName))
|
|
return nil
|
|
}
|
|
|
|
func RemoveServiceMonitor(serviceName string) error {
|
|
global.MonitorsMutex.Lock()
|
|
defer global.MonitorsMutex.Unlock()
|
|
|
|
monitor, exists := global.Monitors[serviceName]
|
|
if !exists {
|
|
return fmt.Errorf("service monitor '%s' not found", serviceName)
|
|
}
|
|
|
|
// close(monitor.StopChan)
|
|
monitor.Running = false
|
|
monitor.Recovery = false
|
|
// delete(global.Monitors, serviceName)
|
|
|
|
postLog.Info(fmt.Sprintf("[Monitor] Removed service monitor for: %s", serviceName))
|
|
return nil
|
|
}
|
|
|
|
func runMonitor(m *global.ServiceMonitor) {
|
|
postLog.Info(fmt.Sprintf("[Monitor] Started monitoring service: %s", m.ServiceName))
|
|
|
|
ticker := time.NewTicker(5 * time.Second)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-m.StopChan:
|
|
postLog.Info(fmt.Sprintf("[Monitor] Stopped monitoring service: %s", m.ServiceName))
|
|
return
|
|
case <-ticker.C:
|
|
serviceStatus, err := checkServiceStatus(m.ServiceName)
|
|
if err != nil {
|
|
throwException(m.ServiceName, fmt.Sprintf("Failed to check service status: %v", err))
|
|
continue
|
|
}
|
|
|
|
logStatus, err := checkServiceLogs(m.ServiceName)
|
|
if err != nil {
|
|
throwException(m.ServiceName, fmt.Sprintf("Failed to check service logs: %v", err))
|
|
continue
|
|
}
|
|
|
|
if !serviceStatus || !logStatus {
|
|
throwException(m.ServiceName, fmt.Sprintf("Service check failed - Status: %v, Logs: %v", serviceStatus, logStatus))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func checkServiceStatus(serviceName string) (bool, error) {
|
|
cmd := exec.Command("systemctl", "is-active", serviceName)
|
|
output, err := cmd.Output()
|
|
if err != nil {
|
|
return false, fmt.Errorf("failed to execute systemctl: %w", err)
|
|
}
|
|
|
|
status := strings.TrimSpace(string(output))
|
|
return status == "active", nil
|
|
}
|
|
|
|
func checkServiceLogs(serviceName string) (bool, error) {
|
|
cmd := exec.Command("journalctl", "-u", serviceName, "-n", "50", "--no-pager")
|
|
output, err := cmd.Output()
|
|
if err != nil {
|
|
return false, fmt.Errorf("failed to execute journalctl: %w", err)
|
|
}
|
|
|
|
logContent := string(output)
|
|
errorKeywords := []string{"error", "fatal", "failed", "critical", "exception"}
|
|
|
|
for _, keyword := range errorKeywords {
|
|
if strings.Contains(strings.ToLower(logContent), keyword) {
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
func IsServiceExist(serviceName string) bool {
|
|
_, exists := global.Monitors[serviceName]
|
|
if !exists {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func IsServiceRunning(serviceName string) bool {
|
|
status, err := checkServiceStatus(serviceName)
|
|
if err != nil || !status {
|
|
return false
|
|
}
|
|
|
|
logStatus, err := checkServiceLogs(serviceName)
|
|
if err != nil || !logStatus {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func throwException(serviceName, errorContent string) error {
|
|
postLog.Error(fmt.Sprintf("[Monitor] Service: %s - Exception: %s", serviceName, errorContent))
|
|
|
|
global.MonitorsMutex.Lock()
|
|
monitor, exists := global.Monitors[serviceName]
|
|
if exists {
|
|
close(monitor.StopChan)
|
|
monitor.Running = false
|
|
monitor.Recovery = true
|
|
// delete(global.Monitors, serviceName)
|
|
}
|
|
global.MonitorsMutex.Unlock()
|
|
|
|
err := errorHandle.HandleErrorProcess(serviceName, IsServiceRunning)
|
|
if err != nil {
|
|
err := socket.SendMsg(fmt.Sprintf("[Exception] <exceptionType>%s</exceptionType> <serviceName>%s</serviceName> <errorMsg>%s</errorMsg>", "service", serviceName, errorContent))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to send exception message: %v", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
global.MonitorsMutex.Lock()
|
|
stopChan := make(chan struct{})
|
|
newMonitor := &global.ServiceMonitor{
|
|
ServiceName: serviceName,
|
|
StopChan: stopChan,
|
|
Running: true,
|
|
}
|
|
global.Monitors[serviceName] = newMonitor
|
|
global.MonitorsMutex.Unlock()
|
|
|
|
go runMonitor(newMonitor)
|
|
postLog.Info(fmt.Sprintf("[Monitor] Service recovered and monitor restarted: %s", serviceName))
|
|
|
|
return nil
|
|
}
|
|
|
|
func GetActiveMonitors() []string {
|
|
global.MonitorsMutex.RLock()
|
|
defer global.MonitorsMutex.RUnlock()
|
|
|
|
var activeServices []string
|
|
for name, monitor := range global.Monitors {
|
|
if monitor.Running {
|
|
activeServices = append(activeServices, name)
|
|
}
|
|
}
|
|
return activeServices
|
|
}
|