refactor(processor): replace error message formatting with errors.New for better error handling

This commit is contained in:
2026-05-21 23:05:07 +08:00
parent 751e6afa83
commit de5c7f7eda
3 changed files with 76 additions and 42 deletions

19
README.md Normal file
View File

@@ -0,0 +1,19 @@
# Super-frpc Watchdog for GNU/Linux `systemd`
## Basic Configuration
Create a `config.json` file in the project root:
```json
{
"debugMode": false
}
```
| Field | Description |
| :---: | :---: |
| `debugMode` | Debug mode status |
## TODO
- [x] Fix log trace eats too much disk io

View File

@@ -1,12 +1,13 @@
package errorHandle package errorHandle
import ( import (
"errors"
"fmt" "fmt"
"os/exec" "os/exec"
"time" "time"
"Watchdog_Linux-systemd/postLog"
"Watchdog_Linux-systemd/global" "Watchdog_Linux-systemd/global"
"Watchdog_Linux-systemd/postLog"
) )
type ServiceStatusChecker func(serviceName string) bool type ServiceStatusChecker func(serviceName string) bool
@@ -35,5 +36,5 @@ func HandleErrorProcess(serviceName string, isServiceRunning ServiceStatusChecke
serviceControl.ErrorType = "restart" serviceControl.ErrorType = "restart"
serviceControl.ErrorMsg = fmt.Sprintf("Failed to recover service '%s', retry count: %d", serviceName, serviceControl.RetryCount) serviceControl.ErrorMsg = fmt.Sprintf("Failed to recover service '%s', retry count: %d", serviceName, serviceControl.RetryCount)
serviceControl.ErrorTime = time.Now() serviceControl.ErrorTime = time.Now()
return fmt.Errorf(serviceControl.ErrorMsg) return errors.New(serviceControl.ErrorMsg)
} }

View File

@@ -2,8 +2,8 @@ package monitor
import ( import (
"Watchdog_Linux-systemd/errorHandle" "Watchdog_Linux-systemd/errorHandle"
"Watchdog_Linux-systemd/postLog"
"Watchdog_Linux-systemd/global" "Watchdog_Linux-systemd/global"
"Watchdog_Linux-systemd/postLog"
"Watchdog_Linux-systemd/socket" "Watchdog_Linux-systemd/socket"
"fmt" "fmt"
"os/exec" "os/exec"
@@ -11,8 +11,6 @@ import (
"time" "time"
) )
func AddServiceMonitor(serviceName string) error { func AddServiceMonitor(serviceName string) error {
global.MonitorsMutex.Lock() global.MonitorsMutex.Lock()
defer global.MonitorsMutex.Unlock() defer global.MonitorsMutex.Unlock()
@@ -68,53 +66,74 @@ func runMonitor(m *global.ServiceMonitor) {
postLog.Info(fmt.Sprintf("[Monitor] Stopped monitoring service: %s", m.ServiceName)) postLog.Info(fmt.Sprintf("[Monitor] Stopped monitoring service: %s", m.ServiceName))
return return
case <-ticker.C: case <-ticker.C:
serviceStatus, err := checkServiceStatus(m.ServiceName) serviceStatus, statusDetail, err := checkServiceHealth(m.ServiceName)
if err != nil { if err != nil {
throwException(m.ServiceName, fmt.Sprintf("Failed to check service status: %v", err)) throwException(m.ServiceName, fmt.Sprintf("Failed to check service status: %v", err))
continue continue
} }
logStatus, err := checkServiceLogs(m.ServiceName) if !serviceStatus {
throwException(m.ServiceName, fmt.Sprintf("Service check failed - %s", statusDetail))
}
}
}
}
func checkServiceHealth(serviceName string) (bool, string, error) {
cmd := exec.Command(
"systemctl",
"show",
serviceName,
"--property=ActiveState",
"--property=SubState",
"--property=Result",
"--property=ExecMainStatus",
"--property=ExecMainCode",
"--property=NRestarts",
)
output, err := cmd.Output()
if err != nil { if err != nil {
throwException(m.ServiceName, fmt.Sprintf("Failed to check service logs: %v", err)) return false, "", fmt.Errorf("failed to execute systemctl show: %w", err)
}
properties := make(map[string]string)
for _, line := range strings.Split(strings.TrimSpace(string(output)), "\n") {
key, value, found := strings.Cut(line, "=")
if !found {
continue continue
} }
properties[strings.TrimSpace(key)] = strings.TrimSpace(value)
if !serviceStatus || !logStatus {
throwException(m.ServiceName, fmt.Sprintf("Service check failed - Status: %v, Logs: %v", serviceStatus, logStatus))
}
}
}
}
func checkServiceStatus(serviceName string) (bool, error) {
cmd := exec.Command("systemctl", "is-active", serviceName)
output, err := cmd.Output()
if err != nil {
return false, fmt.Errorf("failed to execute systemctl: %w", err)
} }
status := strings.TrimSpace(string(output)) if len(properties) == 0 {
return status == "active", nil return false, "", fmt.Errorf("unexpected systemctl show output: %q", strings.TrimSpace(string(output)))
}
func checkServiceLogs(serviceName string) (bool, error) {
cmd := exec.Command("journalctl", "-u", serviceName, "-n", "50", "--no-pager")
output, err := cmd.Output()
if err != nil {
return false, fmt.Errorf("failed to execute journalctl: %w", err)
} }
logContent := string(output) activeState := properties["ActiveState"]
errorKeywords := []string{"error", "fatal", "failed", "critical", "exception"} subState := properties["SubState"]
result := properties["Result"]
execMainStatus := properties["ExecMainStatus"]
execMainCode := properties["ExecMainCode"]
nRestarts := properties["NRestarts"]
for _, keyword := range errorKeywords { detail := fmt.Sprintf("ActiveState=%s, SubState=%s, Result=%s, ExecMainStatus=%s, ExecMainCode=%s", activeState, subState, result, execMainStatus, execMainCode)
if strings.Contains(strings.ToLower(logContent), keyword) { if nRestarts != "" {
return false, nil detail = fmt.Sprintf("%s, NRestarts=%s", detail, nRestarts)
}
} }
return true, nil if activeState != "active" {
return false, detail, nil
}
if result != "" && result != "success" {
return false, detail, nil
}
if execMainStatus != "" && execMainStatus != "0" {
return false, detail, nil
}
return true, detail, nil
} }
func IsServiceExist(serviceName string) bool { func IsServiceExist(serviceName string) bool {
@@ -126,15 +145,10 @@ func IsServiceExist(serviceName string) bool {
} }
func IsServiceRunning(serviceName string) bool { func IsServiceRunning(serviceName string) bool {
status, err := checkServiceStatus(serviceName) status, _, err := checkServiceHealth(serviceName)
if err != nil || !status { if err != nil || !status {
return false return false
} }
logStatus, err := checkServiceLogs(serviceName)
if err != nil || !logStatus {
return false
}
return true return true
} }