|
|
|
|
@@ -2,8 +2,8 @@ package monitor
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"Watchdog_Linux-systemd/errorHandle"
|
|
|
|
|
"Watchdog_Linux-systemd/postLog"
|
|
|
|
|
"Watchdog_Linux-systemd/global"
|
|
|
|
|
"Watchdog_Linux-systemd/postLog"
|
|
|
|
|
"Watchdog_Linux-systemd/socket"
|
|
|
|
|
"fmt"
|
|
|
|
|
"os/exec"
|
|
|
|
|
@@ -11,8 +11,6 @@ import (
|
|
|
|
|
"time"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
func AddServiceMonitor(serviceName string) error {
|
|
|
|
|
global.MonitorsMutex.Lock()
|
|
|
|
|
defer global.MonitorsMutex.Unlock()
|
|
|
|
|
@@ -68,53 +66,74 @@ func runMonitor(m *global.ServiceMonitor) {
|
|
|
|
|
postLog.Info(fmt.Sprintf("[Monitor] Stopped monitoring service: %s", m.ServiceName))
|
|
|
|
|
return
|
|
|
|
|
case <-ticker.C:
|
|
|
|
|
serviceStatus, err := checkServiceStatus(m.ServiceName)
|
|
|
|
|
serviceStatus, statusDetail, err := checkServiceHealth(m.ServiceName)
|
|
|
|
|
if err != nil {
|
|
|
|
|
throwException(m.ServiceName, fmt.Sprintf("Failed to check service status: %v", err))
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logStatus, err := checkServiceLogs(m.ServiceName)
|
|
|
|
|
if err != nil {
|
|
|
|
|
throwException(m.ServiceName, fmt.Sprintf("Failed to check service logs: %v", err))
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !serviceStatus || !logStatus {
|
|
|
|
|
throwException(m.ServiceName, fmt.Sprintf("Service check failed - Status: %v, Logs: %v", serviceStatus, logStatus))
|
|
|
|
|
if !serviceStatus {
|
|
|
|
|
throwException(m.ServiceName, fmt.Sprintf("Service check failed - %s", statusDetail))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func checkServiceStatus(serviceName string) (bool, error) {
|
|
|
|
|
cmd := exec.Command("systemctl", "is-active", serviceName)
|
|
|
|
|
func checkServiceHealth(serviceName string) (bool, string, error) {
|
|
|
|
|
cmd := exec.Command(
|
|
|
|
|
"systemctl",
|
|
|
|
|
"show",
|
|
|
|
|
serviceName,
|
|
|
|
|
"--property=ActiveState",
|
|
|
|
|
"--property=SubState",
|
|
|
|
|
"--property=Result",
|
|
|
|
|
"--property=ExecMainStatus",
|
|
|
|
|
"--property=ExecMainCode",
|
|
|
|
|
"--property=NRestarts",
|
|
|
|
|
)
|
|
|
|
|
output, err := cmd.Output()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return false, fmt.Errorf("failed to execute systemctl: %w", err)
|
|
|
|
|
return false, "", fmt.Errorf("failed to execute systemctl show: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
status := strings.TrimSpace(string(output))
|
|
|
|
|
return status == "active", nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func checkServiceLogs(serviceName string) (bool, error) {
|
|
|
|
|
cmd := exec.Command("journalctl", "-u", serviceName, "-n", "50", "--no-pager")
|
|
|
|
|
output, err := cmd.Output()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return false, fmt.Errorf("failed to execute journalctl: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logContent := string(output)
|
|
|
|
|
errorKeywords := []string{"error", "fatal", "failed", "critical", "exception"}
|
|
|
|
|
|
|
|
|
|
for _, keyword := range errorKeywords {
|
|
|
|
|
if strings.Contains(strings.ToLower(logContent), keyword) {
|
|
|
|
|
return false, nil
|
|
|
|
|
properties := make(map[string]string)
|
|
|
|
|
for _, line := range strings.Split(strings.TrimSpace(string(output)), "\n") {
|
|
|
|
|
key, value, found := strings.Cut(line, "=")
|
|
|
|
|
if !found {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
properties[strings.TrimSpace(key)] = strings.TrimSpace(value)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
|
if len(properties) == 0 {
|
|
|
|
|
return false, "", fmt.Errorf("unexpected systemctl show output: %q", strings.TrimSpace(string(output)))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
activeState := properties["ActiveState"]
|
|
|
|
|
subState := properties["SubState"]
|
|
|
|
|
result := properties["Result"]
|
|
|
|
|
execMainStatus := properties["ExecMainStatus"]
|
|
|
|
|
execMainCode := properties["ExecMainCode"]
|
|
|
|
|
nRestarts := properties["NRestarts"]
|
|
|
|
|
|
|
|
|
|
detail := fmt.Sprintf("ActiveState=%s, SubState=%s, Result=%s, ExecMainStatus=%s, ExecMainCode=%s", activeState, subState, result, execMainStatus, execMainCode)
|
|
|
|
|
if nRestarts != "" {
|
|
|
|
|
detail = fmt.Sprintf("%s, NRestarts=%s", detail, nRestarts)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if activeState != "active" {
|
|
|
|
|
return false, detail, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if result != "" && result != "success" {
|
|
|
|
|
return false, detail, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if execMainStatus != "" && execMainStatus != "0" {
|
|
|
|
|
return false, detail, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true, detail, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func IsServiceExist(serviceName string) bool {
|
|
|
|
|
@@ -126,15 +145,10 @@ func IsServiceExist(serviceName string) bool {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func IsServiceRunning(serviceName string) bool {
|
|
|
|
|
status, err := checkServiceStatus(serviceName)
|
|
|
|
|
status, _, err := checkServiceHealth(serviceName)
|
|
|
|
|
if err != nil || !status {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logStatus, err := checkServiceLogs(serviceName)
|
|
|
|
|
if err != nil || !logStatus {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|