123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380 |
- /*
- * Copyright (c) Contributors, http://opensimulator.org/
- * See CONTRIBUTORS.TXT for a full list of copyright holders.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the OpenSimulator Project nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Threading;
- using log4net;
- namespace OpenSim.Framework.Monitoring
- {
- /// <summary>
- /// Manages launching threads and keeping watch over them for timeouts
- /// </summary>
- public static class Watchdog
- {
- private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
- /// <summary>Timer interval in milliseconds for the watchdog timer</summary>
- public const double WATCHDOG_INTERVAL_MS = 2500.0d;
- /// <summary>Default timeout in milliseconds before a thread is considered dead</summary>
- public const int DEFAULT_WATCHDOG_TIMEOUT_MS = 5000;
- [System.Diagnostics.DebuggerDisplay("{Thread.Name}")]
- public class ThreadWatchdogInfo
- {
- public Thread Thread { get; private set; }
- /// <summary>
- /// Approximate tick when this thread was started.
- /// </summary>
- /// <remarks>
- /// Not terribly good since this quickly wraps around.
- /// </remarks>
- public int FirstTick { get; private set; }
- /// <summary>
- /// Last time this heartbeat update was invoked
- /// </summary>
- public int LastTick { get; set; }
- /// <summary>
- /// Number of milliseconds before we notify that the thread is having a problem.
- /// </summary>
- public int Timeout { get; set; }
- /// <summary>
- /// Is this thread considered timed out?
- /// </summary>
- public bool IsTimedOut { get; set; }
- /// <summary>
- /// Will this thread trigger the alarm function if it has timed out?
- /// </summary>
- public bool AlarmIfTimeout { get; set; }
- /// <summary>
- /// Method execute if alarm goes off. If null then no alarm method is fired.
- /// </summary>
- public Func<string> AlarmMethod { get; set; }
- /// <summary>
- /// Stat structure associated with this thread.
- /// </summary>
- public Stat Stat { get; set; }
- public ThreadWatchdogInfo(Thread thread, int timeout, string name)
- {
- Thread = thread;
- Timeout = timeout;
- FirstTick = Environment.TickCount & Int32.MaxValue;
- LastTick = FirstTick;
- Stat
- = new Stat(
- name,
- string.Format("Last update of thread {0}", name),
- "",
- "ms",
- "server",
- "thread",
- StatType.Pull,
- MeasuresOfInterest.None,
- stat => stat.Value = Environment.TickCount & Int32.MaxValue - LastTick,
- StatVerbosity.Debug);
- StatsManager.RegisterStat(Stat);
- }
- public ThreadWatchdogInfo(ThreadWatchdogInfo previousTwi)
- {
- Thread = previousTwi.Thread;
- FirstTick = previousTwi.FirstTick;
- LastTick = previousTwi.LastTick;
- Timeout = previousTwi.Timeout;
- IsTimedOut = previousTwi.IsTimedOut;
- AlarmIfTimeout = previousTwi.AlarmIfTimeout;
- AlarmMethod = previousTwi.AlarmMethod;
- }
- public void Cleanup()
- {
- StatsManager.DeregisterStat(Stat);
- }
- }
- /// <summary>
- /// This event is called whenever a tracked thread is
- /// stopped or has not called UpdateThread() in time<
- /// /summary>
- public static event Action<ThreadWatchdogInfo> OnWatchdogTimeout;
- /// <summary>
- /// Is this watchdog active?
- /// </summary>
- public static bool Enabled
- {
- get { return m_enabled; }
- set
- {
- // m_log.DebugFormat("[MEMORY WATCHDOG]: Setting MemoryWatchdog.Enabled to {0}", value);
- if (value == m_enabled)
- return;
- m_enabled = value;
- if (m_enabled)
- {
- // Set now so we don't get alerted on the first run
- LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue;
- }
- m_watchdogTimer.Enabled = m_enabled;
- }
- }
- private static bool m_enabled;
- private static Dictionary<int, ThreadWatchdogInfo> m_threads;
- private static System.Timers.Timer m_watchdogTimer;
- /// <summary>
- /// Last time the watchdog thread ran.
- /// </summary>
- /// <remarks>
- /// Should run every WATCHDOG_INTERVAL_MS
- /// </remarks>
- public static int LastWatchdogThreadTick { get; private set; }
- static Watchdog()
- {
- m_threads = new Dictionary<int, ThreadWatchdogInfo>();
- m_watchdogTimer = new System.Timers.Timer(WATCHDOG_INTERVAL_MS);
- m_watchdogTimer.AutoReset = false;
- m_watchdogTimer.Elapsed += WatchdogTimerElapsed;
- }
- /// <summary>
- /// Add a thread to the watchdog tracker.
- /// </summary>
- /// <param name="info">Information about the thread.</info>
- /// <param name="info">Name of the thread.</info>
- /// <param name="log">If true then creation of thread is logged.</param>
- public static void AddThread(ThreadWatchdogInfo info, string name, bool log = true)
- {
- if (log)
- m_log.DebugFormat(
- "[WATCHDOG]: Started tracking thread {0}, ID {1}", name, info.Thread.ManagedThreadId);
- lock (m_threads)
- m_threads.Add(info.Thread.ManagedThreadId, info);
- }
- /// <summary>
- /// Marks the current thread as alive
- /// </summary>
- public static void UpdateThread()
- {
- UpdateThread(Thread.CurrentThread.ManagedThreadId);
- }
- /// <summary>
- /// Stops watchdog tracking on the current thread
- /// </summary>
- /// <param name="log">If true then normal events in thread removal are not logged.</param>
- /// <returns>
- /// True if the thread was removed from the list of tracked
- /// threads, otherwise false
- /// </returns>
- public static bool RemoveThread(bool log = true)
- {
- return RemoveThread(Thread.CurrentThread.ManagedThreadId, log);
- }
- private static bool RemoveThread(int threadID, bool log = true)
- {
- lock (m_threads)
- {
- ThreadWatchdogInfo twi;
- if (m_threads.TryGetValue(threadID, out twi))
- {
- if (log)
- m_log.DebugFormat(
- "[WATCHDOG]: Removing thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId);
- twi.Cleanup();
- m_threads.Remove(threadID);
- return true;
- }
- else
- {
- m_log.WarnFormat(
- "[WATCHDOG]: Requested to remove thread with ID {0} but this is not being monitored", threadID);
- return false;
- }
- }
- }
- public static bool AbortThread(int threadID)
- {
- lock (m_threads)
- {
- if (m_threads.ContainsKey(threadID))
- {
- ThreadWatchdogInfo twi = m_threads[threadID];
- twi.Thread.Abort();
- RemoveThread(threadID);
- return true;
- }
- else
- {
- return false;
- }
- }
- }
- private static void UpdateThread(int threadID)
- {
- ThreadWatchdogInfo threadInfo;
- // Although TryGetValue is not a thread safe operation, we use a try/catch here instead
- // of a lock for speed. Adding/removing threads is a very rare operation compared to
- // UpdateThread(), and a single UpdateThread() failure here and there won't break
- // anything
- try
- {
- if (m_threads.TryGetValue(threadID, out threadInfo))
- {
- threadInfo.LastTick = Environment.TickCount & Int32.MaxValue;
- threadInfo.IsTimedOut = false;
- }
- else
- {
- m_log.WarnFormat("[WATCHDOG]: Asked to update thread {0} which is not being monitored", threadID);
- }
- }
- catch { }
- }
- /// <summary>
- /// Get currently watched threads for diagnostic purposes
- /// </summary>
- /// <returns></returns>
- public static ThreadWatchdogInfo[] GetThreadsInfo()
- {
- lock (m_threads)
- return m_threads.Values.ToArray();
- }
- /// <summary>
- /// Return the current thread's watchdog info.
- /// </summary>
- /// <returns>The watchdog info. null if the thread isn't being monitored.</returns>
- public static ThreadWatchdogInfo GetCurrentThreadInfo()
- {
- lock (m_threads)
- {
- if (m_threads.ContainsKey(Thread.CurrentThread.ManagedThreadId))
- return m_threads[Thread.CurrentThread.ManagedThreadId];
- }
- return null;
- }
- /// <summary>
- /// Check watched threads. Fire alarm if appropriate.
- /// </summary>
- /// <param name="sender"></param>
- /// <param name="e"></param>
- private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e)
- {
- int now = Environment.TickCount & Int32.MaxValue;
- int msElapsed = now - LastWatchdogThreadTick;
- if (msElapsed > WATCHDOG_INTERVAL_MS * 2)
- m_log.WarnFormat(
- "[WATCHDOG]: {0} ms since Watchdog last ran. Interval should be approximately {1} ms",
- msElapsed, WATCHDOG_INTERVAL_MS);
- LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue;
- Action<ThreadWatchdogInfo> callback = OnWatchdogTimeout;
- if (callback != null)
- {
- List<ThreadWatchdogInfo> callbackInfos = null;
- lock (m_threads)
- {
- foreach (ThreadWatchdogInfo threadInfo in m_threads.Values)
- {
- if (threadInfo.Thread.ThreadState == ThreadState.Stopped)
- {
- RemoveThread(threadInfo.Thread.ManagedThreadId);
- if (callbackInfos == null)
- callbackInfos = new List<ThreadWatchdogInfo>();
- callbackInfos.Add(threadInfo);
- }
- else if (!threadInfo.IsTimedOut && now - threadInfo.LastTick >= threadInfo.Timeout)
- {
- threadInfo.IsTimedOut = true;
- if (threadInfo.AlarmIfTimeout)
- {
- if (callbackInfos == null)
- callbackInfos = new List<ThreadWatchdogInfo>();
- // Send a copy of the watchdog info to prevent race conditions where the watchdog
- // thread updates the monitoring info after an alarm has been sent out.
- callbackInfos.Add(new ThreadWatchdogInfo(threadInfo));
- }
- }
- }
- }
- if (callbackInfos != null)
- foreach (ThreadWatchdogInfo callbackInfo in callbackInfos)
- callback(callbackInfo);
- }
- if (MemoryWatchdog.Enabled)
- MemoryWatchdog.Update();
- ChecksManager.CheckChecks();
- StatsManager.RecordStats();
- m_watchdogTimer.Start();
- }
- }
- }
|