Restart running monitors if no heartbeat (#3952)
This commit is contained in:
		
							parent
							
								
									9f170a68d7
								
							
						
					
					
						commit
						c43223a16d
					
				
					 2 changed files with 106 additions and 1 deletions
				
			
		|  | @ -3,7 +3,7 @@ const dayjs = require("dayjs"); | ||||||
| const axios = require("axios"); | const axios = require("axios"); | ||||||
| const { Prometheus } = require("../prometheus"); | const { Prometheus } = require("../prometheus"); | ||||||
| const { log, UP, DOWN, PENDING, MAINTENANCE, flipStatus, TimeLogger, MAX_INTERVAL_SECOND, MIN_INTERVAL_SECOND, | const { log, UP, DOWN, PENDING, MAINTENANCE, flipStatus, TimeLogger, MAX_INTERVAL_SECOND, MIN_INTERVAL_SECOND, | ||||||
|     SQL_DATETIME_FORMAT |     SQL_DATETIME_FORMAT, isDev, sleep, getRandomInt | ||||||
| } = require("../../src/util"); | } = require("../../src/util"); | ||||||
| const { tcping, ping, dnsResolve, checkCertificate, checkStatusCode, getTotalClientInRoom, setting, mssqlQuery, postgresQuery, mysqlQuery, mqttAsync, setSetting, httpNtlm, radius, grpcQuery, | const { tcping, ping, dnsResolve, checkCertificate, checkStatusCode, getTotalClientInRoom, setting, mssqlQuery, postgresQuery, mysqlQuery, mqttAsync, setSetting, httpNtlm, radius, grpcQuery, | ||||||
|     redisPingAsync, mongodbPing, kafkaProducerAsync, getOidcTokenClientCredentials, rootCertificatesFingerprints |     redisPingAsync, mongodbPing, kafkaProducerAsync, getOidcTokenClientCredentials, rootCertificatesFingerprints | ||||||
|  | @ -328,6 +328,16 @@ class Monitor extends BeanModel { | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|  |             // Evil
 | ||||||
|  |             if (isDev) { | ||||||
|  |                 if (process.env.EVIL_RANDOM_MONITOR_SLEEP === "SURE") { | ||||||
|  |                     if (getRandomInt(0, 100) === 0) { | ||||||
|  |                         log.debug("evil", `[${this.name}] Evil mode: Random sleep: ` + beatInterval * 10000); | ||||||
|  |                         await sleep(beatInterval * 10000); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|             // Expose here for prometheus update
 |             // Expose here for prometheus update
 | ||||||
|             // undefined if not https
 |             // undefined if not https
 | ||||||
|             let tlsInfo = undefined; |             let tlsInfo = undefined; | ||||||
|  | @ -995,6 +1005,7 @@ class Monitor extends BeanModel { | ||||||
|             if (! this.isStop) { |             if (! this.isStop) { | ||||||
|                 log.debug("monitor", `[${this.name}] SetTimeout for next check.`); |                 log.debug("monitor", `[${this.name}] SetTimeout for next check.`); | ||||||
|                 this.heartbeatInterval = setTimeout(safeBeat, beatInterval * 1000); |                 this.heartbeatInterval = setTimeout(safeBeat, beatInterval * 1000); | ||||||
|  |                 this.lastScheduleBeatTime = dayjs(); | ||||||
|             } else { |             } else { | ||||||
|                 log.info("monitor", `[${this.name}] isStop = true, no next check.`); |                 log.info("monitor", `[${this.name}] isStop = true, no next check.`); | ||||||
|             } |             } | ||||||
|  | @ -1004,7 +1015,9 @@ class Monitor extends BeanModel { | ||||||
|         /** Get a heartbeat and handle errors */ |         /** Get a heartbeat and handle errors */ | ||||||
|         const safeBeat = async () => { |         const safeBeat = async () => { | ||||||
|             try { |             try { | ||||||
|  |                 this.lastStartBeatTime = dayjs(); | ||||||
|                 await beat(); |                 await beat(); | ||||||
|  |                 this.lastEndBeatTime = dayjs(); | ||||||
|             } catch (e) { |             } catch (e) { | ||||||
|                 console.trace(e); |                 console.trace(e); | ||||||
|                 UptimeKumaServer.errorLog(e, false); |                 UptimeKumaServer.errorLog(e, false); | ||||||
|  | @ -1013,6 +1026,9 @@ class Monitor extends BeanModel { | ||||||
|                 if (! this.isStop) { |                 if (! this.isStop) { | ||||||
|                     log.info("monitor", "Try to restart the monitor"); |                     log.info("monitor", "Try to restart the monitor"); | ||||||
|                     this.heartbeatInterval = setTimeout(safeBeat, this.interval * 1000); |                     this.heartbeatInterval = setTimeout(safeBeat, this.interval * 1000); | ||||||
|  |                     this.lastScheduleBeatTime = dayjs(); | ||||||
|  |                 } else { | ||||||
|  |                     log.info("monitor", "isStop = true, no next check."); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|  | @ -12,6 +12,7 @@ const { Settings } = require("./settings"); | ||||||
| const dayjs = require("dayjs"); | const dayjs = require("dayjs"); | ||||||
| const childProcess = require("child_process"); | const childProcess = require("child_process"); | ||||||
| const path = require("path"); | const path = require("path"); | ||||||
|  | const axios = require("axios"); | ||||||
| // DO NOT IMPORT HERE IF THE MODULES USED `UptimeKumaServer.getInstance()`, put at the bottom of this file instead.
 | // DO NOT IMPORT HERE IF THE MODULES USED `UptimeKumaServer.getInstance()`, put at the bottom of this file instead.
 | ||||||
| 
 | 
 | ||||||
| /** | /** | ||||||
|  | @ -62,6 +63,8 @@ class UptimeKumaServer { | ||||||
|      */ |      */ | ||||||
|     jwtSecret = null; |     jwtSecret = null; | ||||||
| 
 | 
 | ||||||
|  |     checkMonitorsInterval = null; | ||||||
|  | 
 | ||||||
|     static getInstance(args) { |     static getInstance(args) { | ||||||
|         if (UptimeKumaServer.instance == null) { |         if (UptimeKumaServer.instance == null) { | ||||||
|             UptimeKumaServer.instance = new UptimeKumaServer(args); |             UptimeKumaServer.instance = new UptimeKumaServer(args); | ||||||
|  | @ -75,6 +78,9 @@ class UptimeKumaServer { | ||||||
|         const sslCert = args["ssl-cert"] || process.env.UPTIME_KUMA_SSL_CERT || process.env.SSL_CERT || undefined; |         const sslCert = args["ssl-cert"] || process.env.UPTIME_KUMA_SSL_CERT || process.env.SSL_CERT || undefined; | ||||||
|         const sslKeyPassphrase = args["ssl-key-passphrase"] || process.env.UPTIME_KUMA_SSL_KEY_PASSPHRASE || process.env.SSL_KEY_PASSPHRASE || undefined; |         const sslKeyPassphrase = args["ssl-key-passphrase"] || process.env.UPTIME_KUMA_SSL_KEY_PASSPHRASE || process.env.SSL_KEY_PASSPHRASE || undefined; | ||||||
| 
 | 
 | ||||||
|  |         // Set default axios timeout to 5 minutes instead of infinity
 | ||||||
|  |         axios.defaults.timeout = 300 * 1000; | ||||||
|  | 
 | ||||||
|         log.info("server", "Creating express and socket.io instance"); |         log.info("server", "Creating express and socket.io instance"); | ||||||
|         this.app = express(); |         this.app = express(); | ||||||
|         if (sslKey && sslCert) { |         if (sslKey && sslCert) { | ||||||
|  | @ -346,6 +352,10 @@ class UptimeKumaServer { | ||||||
|         if (enable || enable === null) { |         if (enable || enable === null) { | ||||||
|             this.startNSCDServices(); |             this.startNSCDServices(); | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  |         this.checkMonitorsInterval = setInterval(() => { | ||||||
|  |             this.checkMonitors(); | ||||||
|  |         }, 60 * 1000); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /** |     /** | ||||||
|  | @ -358,6 +368,8 @@ class UptimeKumaServer { | ||||||
|         if (enable || enable === null) { |         if (enable || enable === null) { | ||||||
|             this.stopNSCDServices(); |             this.stopNSCDServices(); | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  |         clearInterval(this.checkMonitorsInterval); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /** |     /** | ||||||
|  | @ -388,6 +400,83 @@ class UptimeKumaServer { | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     /** | ||||||
|  |      * Start the specified monitor | ||||||
|  |      * @param {number} monitorID ID of monitor to start | ||||||
|  |      * @returns {Promise<void>} | ||||||
|  |      */ | ||||||
|  |     async startMonitor(monitorID) { | ||||||
|  |         log.info("manage", `Resume Monitor: ${monitorID} by server`); | ||||||
|  | 
 | ||||||
|  |         await R.exec("UPDATE monitor SET active = 1 WHERE id = ?", [ | ||||||
|  |             monitorID, | ||||||
|  |         ]); | ||||||
|  | 
 | ||||||
|  |         let monitor = await R.findOne("monitor", " id = ? ", [ | ||||||
|  |             monitorID, | ||||||
|  |         ]); | ||||||
|  | 
 | ||||||
|  |         if (monitor.id in this.monitorList) { | ||||||
|  |             this.monitorList[monitor.id].stop(); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         this.monitorList[monitor.id] = monitor; | ||||||
|  |         monitor.start(this.io); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /** | ||||||
|  |      * Restart a given monitor | ||||||
|  |      * @param {number} monitorID ID of monitor to start | ||||||
|  |      * @returns {Promise<void>} | ||||||
|  |      */ | ||||||
|  |     async restartMonitor(monitorID) { | ||||||
|  |         return await this.startMonitor(monitorID); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /** | ||||||
|  |      * Check if monitors are running properly | ||||||
|  |      */ | ||||||
|  |     async checkMonitors() { | ||||||
|  |         log.debug("monitor_checker", "Checking monitors"); | ||||||
|  | 
 | ||||||
|  |         for (let monitorID in this.monitorList) { | ||||||
|  |             let monitor = this.monitorList[monitorID]; | ||||||
|  | 
 | ||||||
|  |             // Not for push monitor
 | ||||||
|  |             if (monitor.type === "push") { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             if (!monitor.active) { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Check the lastStartBeatTime, if it is too long, then restart
 | ||||||
|  |             if (monitor.lastScheduleBeatTime ) { | ||||||
|  |                 let diff = dayjs().diff(monitor.lastStartBeatTime, "second"); | ||||||
|  | 
 | ||||||
|  |                 if (diff > monitor.interval * 1.5) { | ||||||
|  |                     log.error("monitor_checker", `Monitor Interval: ${monitor.interval} Monitor ` + monitorID + " lastStartBeatTime diff: " + diff); | ||||||
|  |                     log.error("monitor_checker", "Unexpected error: Monitor " + monitorID + " is struck for unknown reason"); | ||||||
|  |                     log.error("monitor_checker", "Last start beat time: " + R.isoDateTime(monitor.lastStartBeatTime)); | ||||||
|  |                     log.error("monitor_checker", "Last end beat time: " + R.isoDateTime(monitor.lastEndBeatTime)); | ||||||
|  |                     log.error("monitor_checker", "Last ScheduleBeatTime: " + R.isoDateTime(monitor.lastScheduleBeatTime)); | ||||||
|  | 
 | ||||||
|  |                     // Restart
 | ||||||
|  |                     log.error("monitor_checker", `Restarting monitor ${monitorID} automatically now`); | ||||||
|  |                     this.restartMonitor(monitorID); | ||||||
|  |                 } else { | ||||||
|  |                     //log.debug("monitor_checker", "Monitor " + monitorID + " is running normally");
 | ||||||
|  |                 } | ||||||
|  |             } else { | ||||||
|  |                 //log.debug("monitor_checker", "Monitor " + monitorID + " is not started yet, skipp");
 | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         log.debug("monitor_checker", "Checking monitors end"); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| module.exports = { | module.exports = { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue