feat(monitor): add service monitoring functionality with systemd integration
Implement a new monitoring system that tracks service status and logs using systemd commands. The monitor includes: - Periodic status checks via systemctl - Log analysis via journalctl - Thread-safe service tracking - Error detection and reporting
This commit is contained in:
138
monitor/monitor.go
Normal file
138
monitor/monitor.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package monitor
|
||||
|
||||
import (
|
||||
"Watchdog_Linux-systemd/postLog"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type ServiceMonitor struct {
|
||||
ServiceName string
|
||||
StopChan chan struct{}
|
||||
Running bool
|
||||
}
|
||||
|
||||
var (
|
||||
monitors = make(map[string]*ServiceMonitor)
|
||||
monitorsMutex sync.RWMutex
|
||||
)
|
||||
|
||||
func AddServiceMonitor(serviceName string) error {
|
||||
monitorsMutex.Lock()
|
||||
defer monitorsMutex.Unlock()
|
||||
|
||||
if _, exists := monitors[serviceName]; exists {
|
||||
return fmt.Errorf("service monitor '%s' already exists", serviceName)
|
||||
}
|
||||
|
||||
stopChan := make(chan struct{})
|
||||
monitor := &ServiceMonitor{
|
||||
ServiceName: serviceName,
|
||||
StopChan: stopChan,
|
||||
Running: true,
|
||||
}
|
||||
|
||||
monitors[serviceName] = monitor
|
||||
|
||||
go runMonitor(monitor)
|
||||
|
||||
postLog.Info(fmt.Sprintf("[Monitor] Added service monitor for: %s", serviceName))
|
||||
return nil
|
||||
}
|
||||
|
||||
func RemoveServiceMonitor(serviceName string) error {
|
||||
monitorsMutex.Lock()
|
||||
defer monitorsMutex.Unlock()
|
||||
|
||||
monitor, exists := monitors[serviceName]
|
||||
if !exists {
|
||||
return fmt.Errorf("service monitor '%s' not found", serviceName)
|
||||
}
|
||||
|
||||
close(monitor.StopChan)
|
||||
monitor.Running = false
|
||||
delete(monitors, serviceName)
|
||||
|
||||
postLog.Info(fmt.Sprintf("[Monitor] Removed service monitor for: %s", serviceName))
|
||||
return nil
|
||||
}
|
||||
|
||||
func runMonitor(m *ServiceMonitor) {
|
||||
postLog.Info(fmt.Sprintf("[Monitor] Started monitoring service: %s", m.ServiceName))
|
||||
|
||||
ticker := time.NewTicker(5 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-m.StopChan:
|
||||
postLog.Info(fmt.Sprintf("[Monitor] Stopped monitoring service: %s", m.ServiceName))
|
||||
return
|
||||
case <-ticker.C:
|
||||
serviceStatus, err := checkServiceStatus(m.ServiceName)
|
||||
if err != nil {
|
||||
throwException(m.ServiceName, fmt.Sprintf("Failed to check service status: %v", err))
|
||||
continue
|
||||
}
|
||||
|
||||
logStatus, err := checkServiceLogs(m.ServiceName)
|
||||
if err != nil {
|
||||
throwException(m.ServiceName, fmt.Sprintf("Failed to check service logs: %v", err))
|
||||
continue
|
||||
}
|
||||
|
||||
if !serviceStatus || !logStatus {
|
||||
throwException(m.ServiceName, fmt.Sprintf("Service check failed - Status: %v, Logs: %v", serviceStatus, logStatus))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func checkServiceStatus(serviceName string) (bool, error) {
|
||||
cmd := exec.Command("systemctl", "is-active", serviceName)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to execute systemctl: %w", err)
|
||||
}
|
||||
|
||||
status := strings.TrimSpace(string(output))
|
||||
return status == "active", nil
|
||||
}
|
||||
|
||||
func checkServiceLogs(serviceName string) (bool, error) {
|
||||
cmd := exec.Command("journalctl", "-u", serviceName, "-n", "50", "--no-pager")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to execute journalctl: %w", err)
|
||||
}
|
||||
|
||||
logContent := string(output)
|
||||
errorKeywords := []string{"error", "fatal", "failed", "critical", "exception"}
|
||||
|
||||
for _, keyword := range errorKeywords {
|
||||
if strings.Contains(strings.ToLower(logContent), keyword) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func throwException(serviceName, errorContent string) error {
|
||||
postLog.Error(fmt.Sprintf("[Monitor] Service: %s - Exception: %s", serviceName, errorContent))
|
||||
return fmt.Errorf("service %s exception: %s", serviceName, errorContent)
|
||||
}
|
||||
|
||||
func GetActiveMonitors() []string {
|
||||
monitorsMutex.RLock()
|
||||
defer monitorsMutex.RUnlock()
|
||||
|
||||
var activeServices []string
|
||||
for name := range monitors {
|
||||
activeServices = append(activeServices, name)
|
||||
}
|
||||
return activeServices
|
||||
}
|
||||
Reference in New Issue
Block a user