Watchdog.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. /*
  2. * Copyright (c) Contributors, http://opensimulator.org/
  3. * See CONTRIBUTORS.TXT for a full list of copyright holders.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of the OpenSimulator Project nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY
  17. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. using System;
  28. using System.Collections.Generic;
  29. using System.Linq;
  30. using System.Threading;
  31. using log4net;
  32. namespace OpenSim.Framework
  33. {
  34. /// <summary>
  35. /// Manages launching threads and keeping watch over them for timeouts
  36. /// </summary>
  37. public static class Watchdog
  38. {
  39. /// <summary>Timer interval in milliseconds for the watchdog timer</summary>
  40. const double WATCHDOG_INTERVAL_MS = 2500.0d;
  41. /// <summary>Maximum timeout in milliseconds before a thread is considered dead</summary>
  42. const int WATCHDOG_TIMEOUT_MS = 5000;
  43. [System.Diagnostics.DebuggerDisplay("{Thread.Name}")]
  44. public class ThreadWatchdogInfo
  45. {
  46. public Thread Thread { get; private set; }
  47. /// <summary>
  48. /// Approximate tick when this thread was started.
  49. /// </summary>
  50. /// <remarks>
  51. /// Not terribly good since this quickly wraps around.
  52. /// </remarks>
  53. public int FirstTick { get; private set; }
  54. /// <summary>
  55. /// First time this heartbeat update was invoked
  56. /// </summary>
  57. public int LastTick { get; set; }
  58. /// <summary>
  59. /// Number of milliseconds before we notify that the thread is having a problem.
  60. /// </summary>
  61. public int Timeout { get; set; }
  62. /// <summary>
  63. /// Is this thread considered timed out?
  64. /// </summary>
  65. public bool IsTimedOut { get; set; }
  66. /// <summary>
  67. /// Will this thread trigger the alarm function if it has timed out?
  68. /// </summary>
  69. public bool AlarmIfTimeout { get; set; }
  70. public ThreadWatchdogInfo(Thread thread, int timeout)
  71. {
  72. Thread = thread;
  73. Timeout = timeout;
  74. FirstTick = Environment.TickCount & Int32.MaxValue;
  75. LastTick = FirstTick;
  76. }
  77. }
  78. /// <summary>
  79. /// This event is called whenever a tracked thread is stopped or
  80. /// has not called UpdateThread() in time
  81. /// </summary>
  82. /// <param name="thread">The thread that has been identified as dead</param>
  83. /// <param name="lastTick">The last time this thread called UpdateThread()</param>
  84. public delegate void WatchdogTimeout(Thread thread, int lastTick);
  85. /// <summary>This event is called whenever a tracked thread is
  86. /// stopped or has not called UpdateThread() in time</summary>
  87. public static event WatchdogTimeout OnWatchdogTimeout;
  88. private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
  89. private static Dictionary<int, ThreadWatchdogInfo> m_threads;
  90. private static System.Timers.Timer m_watchdogTimer;
  91. static Watchdog()
  92. {
  93. m_threads = new Dictionary<int, ThreadWatchdogInfo>();
  94. m_watchdogTimer = new System.Timers.Timer(WATCHDOG_INTERVAL_MS);
  95. m_watchdogTimer.AutoReset = false;
  96. m_watchdogTimer.Elapsed += WatchdogTimerElapsed;
  97. m_watchdogTimer.Start();
  98. }
  99. /// <summary>
  100. /// Start a new thread that is tracked by the watchdog timer.
  101. /// </summary>
  102. /// <param name="start">The method that will be executed in a new thread</param>
  103. /// <param name="name">A name to give to the new thread</param>
  104. /// <param name="priority">Priority to run the thread at</param>
  105. /// <param name="isBackground">True to run this thread as a background thread, otherwise false</param>
  106. /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param>
  107. /// <returns>The newly created Thread object</returns>
  108. public static Thread StartThread(
  109. ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout)
  110. {
  111. return StartThread(start, name, priority, isBackground, alarmIfTimeout, WATCHDOG_TIMEOUT_MS);
  112. }
  113. /// <summary>
  114. /// Start a new thread that is tracked by the watchdog timer
  115. /// </summary>
  116. /// <param name="start">The method that will be executed in a new thread</param>
  117. /// <param name="name">A name to give to the new thread</param>
  118. /// <param name="priority">Priority to run the thread at</param>
  119. /// <param name="isBackground">True to run this thread as a background
  120. /// thread, otherwise false</param>
  121. /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param>
  122. /// <param name="timeout">Number of milliseconds to wait until we issue a warning about timeout.</param>
  123. /// <returns>The newly created Thread object</returns>
  124. public static Thread StartThread(
  125. ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout, int timeout)
  126. {
  127. Thread thread = new Thread(start);
  128. thread.Name = name;
  129. thread.Priority = priority;
  130. thread.IsBackground = isBackground;
  131. ThreadWatchdogInfo twi = new ThreadWatchdogInfo(thread, timeout) { AlarmIfTimeout = alarmIfTimeout };
  132. m_log.DebugFormat(
  133. "[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId);
  134. lock (m_threads)
  135. m_threads.Add(twi.Thread.ManagedThreadId, twi);
  136. thread.Start();
  137. return thread;
  138. }
  139. /// <summary>
  140. /// Marks the current thread as alive
  141. /// </summary>
  142. public static void UpdateThread()
  143. {
  144. UpdateThread(Thread.CurrentThread.ManagedThreadId);
  145. }
  146. /// <summary>
  147. /// Stops watchdog tracking on the current thread
  148. /// </summary>
  149. /// <returns>
  150. /// True if the thread was removed from the list of tracked
  151. /// threads, otherwise false
  152. /// </returns>
  153. public static bool RemoveThread()
  154. {
  155. return RemoveThread(Thread.CurrentThread.ManagedThreadId);
  156. }
  157. private static bool RemoveThread(int threadID)
  158. {
  159. lock (m_threads)
  160. return m_threads.Remove(threadID);
  161. }
  162. public static bool AbortThread(int threadID)
  163. {
  164. lock (m_threads)
  165. {
  166. if (m_threads.ContainsKey(threadID))
  167. {
  168. ThreadWatchdogInfo twi = m_threads[threadID];
  169. twi.Thread.Abort();
  170. RemoveThread(threadID);
  171. return true;
  172. }
  173. else
  174. {
  175. return false;
  176. }
  177. }
  178. }
  179. private static void UpdateThread(int threadID)
  180. {
  181. ThreadWatchdogInfo threadInfo;
  182. // Although TryGetValue is not a thread safe operation, we use a try/catch here instead
  183. // of a lock for speed. Adding/removing threads is a very rare operation compared to
  184. // UpdateThread(), and a single UpdateThread() failure here and there won't break
  185. // anything
  186. try
  187. {
  188. if (m_threads.TryGetValue(threadID, out threadInfo))
  189. {
  190. threadInfo.LastTick = Environment.TickCount & Int32.MaxValue;
  191. threadInfo.IsTimedOut = false;
  192. }
  193. else
  194. {
  195. m_log.WarnFormat("[WATCHDOG]: Asked to update thread {0} which is not being monitored", threadID);
  196. }
  197. }
  198. catch { }
  199. }
  200. /// <summary>
  201. /// Get currently watched threads for diagnostic purposes
  202. /// </summary>
  203. /// <returns></returns>
  204. public static ThreadWatchdogInfo[] GetThreadsInfo()
  205. {
  206. lock (m_threads)
  207. return m_threads.Values.ToArray();
  208. }
  209. /// <summary>
  210. /// Return the current thread's watchdog info.
  211. /// </summary>
  212. /// <returns>The watchdog info. null if the thread isn't being monitored.</returns>
  213. public static ThreadWatchdogInfo GetCurrentThreadInfo()
  214. {
  215. lock (m_threads)
  216. {
  217. if (m_threads.ContainsKey(Thread.CurrentThread.ManagedThreadId))
  218. return m_threads[Thread.CurrentThread.ManagedThreadId];
  219. }
  220. return null;
  221. }
  222. /// <summary>
  223. /// Check watched threads. Fire alarm if appropriate.
  224. /// </summary>
  225. /// <param name="sender"></param>
  226. /// <param name="e"></param>
  227. private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e)
  228. {
  229. WatchdogTimeout callback = OnWatchdogTimeout;
  230. if (callback != null)
  231. {
  232. List<ThreadWatchdogInfo> callbackInfos = null;
  233. lock (m_threads)
  234. {
  235. int now = Environment.TickCount & Int32.MaxValue;
  236. foreach (ThreadWatchdogInfo threadInfo in m_threads.Values)
  237. {
  238. if (threadInfo.Thread.ThreadState == ThreadState.Stopped)
  239. {
  240. RemoveThread(threadInfo.Thread.ManagedThreadId);
  241. if (callbackInfos == null)
  242. callbackInfos = new List<ThreadWatchdogInfo>();
  243. callbackInfos.Add(threadInfo);
  244. }
  245. else if (!threadInfo.IsTimedOut && now - threadInfo.LastTick >= threadInfo.Timeout)
  246. {
  247. threadInfo.IsTimedOut = true;
  248. if (threadInfo.AlarmIfTimeout)
  249. {
  250. if (callbackInfos == null)
  251. callbackInfos = new List<ThreadWatchdogInfo>();
  252. callbackInfos.Add(threadInfo);
  253. }
  254. }
  255. }
  256. }
  257. if (callbackInfos != null)
  258. foreach (ThreadWatchdogInfo callbackInfo in callbackInfos)
  259. callback(callbackInfo.Thread, callbackInfo.LastTick);
  260. }
  261. m_watchdogTimer.Start();
  262. }
  263. }
  264. }