https://issues.apache.org/bugzilla/show_bug.cgi?id=50306
authorslaurent <slaurent@13f79535-47bb-0310-9956-ffa450edef68>
Thu, 7 Apr 2011 20:50:28 +0000 (20:50 +0000)
committerslaurent <slaurent@13f79535-47bb-0310-9956-ffa450edef68>
Thu, 7 Apr 2011 20:50:28 +0000 (20:50 +0000)
StuckThreadDetectionValve, based on code proposed by TomLu

git-svn-id: https://svn.apache.org/repos/asf/tomcat/trunk@1090003 13f79535-47bb-0310-9956-ffa450edef68

java/org/apache/catalina/valves/LocalStrings.properties
java/org/apache/catalina/valves/StuckThreadDetectionValve.java [new file with mode: 0644]
java/org/apache/catalina/valves/mbeans-descriptors.xml
webapps/docs/config/valve.xml

index 56e5c51..bb2398b 100644 (file)
@@ -44,6 +44,10 @@ remoteIpValve.syntax=Invalid regular expressions [{0}] provided.
 sslValve.certError=Failed to process certificate string [{0}] to create a java.security.cert.X509Certificate object
 sslValve.invalidProvider=The SSL provider specified on the connector associated with this request of [{0}] is invalid. The certificate data could not be processed.
 
+#Stuck thread detection Valve
+stuckThreadDetectionValve.notifyStuckThreadDetected=Thread "{0}" has been active for {1} milliseconds (since {2}) to serve the same request for {4} and may be stuck (configured threshold for this StuckThreadDetectionValve is {5} seconds). There is/are {3} thread(s) in total that are monitored by this Valve and may be stuck.
+stuckThreadDetectionValve.notifyStuckThreadCompleted=Thread "{0}" was previously reported to be stuck but has completed. It was active for approximately {1} milliseconds.{2,choice,0#|0< There is/are still {2} thread(s) that are monitored by this Valve and may be stuck.}
+
 # HTTP status reports
 http.100=The client may continue ({0}).
 http.101=The server is switching protocols according to the "Upgrade" header ({0}).
diff --git a/java/org/apache/catalina/valves/StuckThreadDetectionValve.java b/java/org/apache/catalina/valves/StuckThreadDetectionValve.java
new file mode 100644 (file)
index 0000000..d79be4b
--- /dev/null
@@ -0,0 +1,302 @@
+/*\r
+ * Licensed to the Apache Software Foundation (ASF) under one or more\r
+ * contributor license agreements.  See the NOTICE file distributed with\r
+ * this work for additional information regarding copyright ownership.\r
+ * The ASF licenses this file to You under the Apache License, Version 2.0\r
+ * (the "License"); you may not use this file except in compliance with\r
+ * the License.  You may obtain a copy of the License at\r
+ *\r
+ *      http://www.apache.org/licenses/LICENSE-2.0\r
+ *\r
+ * Unless required by applicable law or agreed to in writing, software\r
+ * distributed under the License is distributed on an "AS IS" BASIS,\r
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ * See the License for the specific language governing permissions and\r
+ * limitations under the License.\r
+ */\r
+package org.apache.catalina.valves;\r
+\r
+import java.io.IOException;\r
+import java.util.ArrayList;\r
+import java.util.Date;\r
+import java.util.List;\r
+import java.util.Queue;\r
+import java.util.concurrent.ConcurrentHashMap;\r
+import java.util.concurrent.ConcurrentLinkedQueue;\r
+import java.util.concurrent.atomic.AtomicInteger;\r
+\r
+import javax.servlet.ServletException;\r
+\r
+import org.apache.catalina.LifecycleException;\r
+import org.apache.catalina.connector.Request;\r
+import org.apache.catalina.connector.Response;\r
+import org.apache.juli.logging.Log;\r
+import org.apache.juli.logging.LogFactory;\r
+import org.apache.tomcat.util.res.StringManager;\r
+\r
+/**\r
+ * This valve allows to detect requests that take a long time to process, which might\r
+ * indicate that the thread that is processing it is stuck.\r
+ * Based on code proposed by TomLu in Bugzilla entry #50306\r
+ * \r
+ * @author slaurent\r
+ *\r
+ */\r
+public class StuckThreadDetectionValve extends ValveBase {\r
+\r
+    /**\r
+     * The descriptive information related to this implementation.\r
+     */\r
+    private static final String info =\r
+            "org.apache.catalina.valves.StuckThreadDetectionValve/1.0";\r
+    /**\r
+     * Logger\r
+     */\r
+    private static final Log log = LogFactory.getLog(StuckThreadDetectionValve.class);\r
+    \r
+    /**\r
+     * The string manager for this package.\r
+     */\r
+    private static final StringManager sm =\r
+        StringManager.getManager(Constants.Package);\r
+\r
+    /**\r
+     * Keeps count of the number of stuck threads detected\r
+     */\r
+    private final AtomicInteger stuckCount = new AtomicInteger(0);\r
+    \r
+    /**\r
+     * In seconds. Default 600 (10 minutes).\r
+     */\r
+    private int threshold = 600;\r
+    \r
+    /**\r
+     * The only references we keep to actual running Thread objects are in\r
+     * this Map (which is automatically cleaned in invoke()s finally clause).\r
+     * That way, Threads can be GC'ed, eventhough the Valve still thinks they\r
+     * are stuck (caused by a long monitor interval)\r
+     */\r
+    private ConcurrentHashMap<Long, MonitoredThread> activeThreads =\r
+            new ConcurrentHashMap<Long, MonitoredThread>();\r
+    /**\r
+     *\r
+     */\r
+    private Queue<CompletedStuckThread> completedStuckThreadsQueue =\r
+            new ConcurrentLinkedQueue<CompletedStuckThread>();\r
+\r
+    /**\r
+     * Specify the threshold (in seconds) used when checking for stuck threads.\r
+     * If &lt;=0, the detection is disabled. The default is 600 seconds.\r
+     * \r
+     * @param threshold\r
+     *            The new threshold in seconds\r
+     */\r
+    public void setThreshold(int threshold) {\r
+        this.threshold = threshold;\r
+    }\r
+\r
+    /**\r
+     * @see #setThreshold(int)\r
+     * @return The current threshold in seconds\r
+     */\r
+    public int getThreshold() {\r
+        return threshold;\r
+    }\r
+\r
+    @Override\r
+    protected void initInternal() throws LifecycleException {\r
+        super.initInternal();\r
+\r
+        if (log.isDebugEnabled()) {\r
+            log.debug("Monitoring stuck threads with threshold = "\r
+                    + threshold\r
+                    + " sec");\r
+        }\r
+    }\r
+\r
+    /**\r
+     * Return descriptive information about this Valve implementation.\r
+     */\r
+    @Override\r
+    public String getInfo() {\r
+        return info;\r
+    }\r
+\r
+    private void notifyStuckThreadDetected(MonitoredThread monitoredThread,\r
+        long activeTime, int numStuckThreads) {\r
+        if (log.isWarnEnabled()) {\r
+            String msg = sm.getString(\r
+                "stuckThreadDetectionValve.notifyStuckThreadDetected",\r
+                monitoredThread.getThread().getName(), activeTime,\r
+                monitoredThread.getStartTime(), numStuckThreads,\r
+                monitoredThread.getRequestUri(), threshold);\r
+            // msg += "\n" + getStackTraceAsString(trace);\r
+            Throwable th = new Throwable();\r
+            th.setStackTrace(monitoredThread.getThread().getStackTrace());\r
+            log.warn(msg, th);\r
+        }\r
+    }\r
+\r
+    private void notifyStuckThreadCompleted(String threadName,\r
+            long activeTime, int numStuckThreads) {\r
+        if (log.isWarnEnabled()) {\r
+            String msg = sm.getString(\r
+                "stuckThreadDetectionValve.notifyStuckThreadCompleted",\r
+                threadName, activeTime, numStuckThreads);\r
+            // Since the "stuck thread notification" is warn, this should also\r
+            // be warn\r
+            log.warn(msg);\r
+        }\r
+    }\r
+\r
+    /**\r
+     * {@inheritDoc}\r
+     */\r
+    @Override\r
+    public void invoke(Request request, Response response)\r
+            throws IOException, ServletException {\r
+        \r
+        if (threshold <= 0) {\r
+            // short-circuit if not monitoring stuck threads\r
+            getNext().invoke(request, response);\r
+            return;\r
+        }\r
+\r
+        // Save the thread/runnable\r
+        // Keeping a reference to the thread object here does not prevent\r
+        // GC'ing, as the reference is removed from the Map in the finally clause\r
+\r
+        Long key = new Long(Thread.currentThread().getId());\r
+        StringBuffer requestUrl = request.getRequestURL();\r
+        if(request.getQueryString()!=null) {\r
+            requestUrl.append("?");\r
+            requestUrl.append(request.getQueryString());\r
+        }\r
+        MonitoredThread monitoredThread = new MonitoredThread(Thread.currentThread(), \r
+            requestUrl.toString());\r
+        activeThreads.put(key, monitoredThread);\r
+\r
+        try {\r
+            getNext().invoke(request, response);\r
+        } finally {\r
+            activeThreads.remove(key);\r
+            if (monitoredThread.markAsDone() == MonitoredThreadState.STUCK) {\r
+                completedStuckThreadsQueue.add(\r
+                        new CompletedStuckThread(monitoredThread.getThread().getName(),\r
+                            monitoredThread.getActiveTimeInMillis()));\r
+            }\r
+        }\r
+    }\r
+\r
+    @Override\r
+    public void backgroundProcess() {\r
+        super.backgroundProcess();\r
+\r
+        long thresholdInMillis = threshold * 1000;\r
+\r
+        // Check monitored threads, being careful that the request might have\r
+        // completed by the time we examine it\r
+        for (MonitoredThread monitoredThread : activeThreads.values()) {\r
+            long activeTime = monitoredThread.getActiveTimeInMillis();\r
+\r
+            if (activeTime >= thresholdInMillis && monitoredThread.markAsStuckIfStillRunning()) {\r
+                int numStuckThreads = stuckCount.incrementAndGet();\r
+                notifyStuckThreadDetected(monitoredThread, activeTime, numStuckThreads);\r
+            }\r
+        }\r
+        // Check if any threads previously reported as stuck, have finished.\r
+        for (CompletedStuckThread completedStuckThread = completedStuckThreadsQueue.poll(); \r
+            completedStuckThread != null; completedStuckThread = completedStuckThreadsQueue.poll()) {\r
+\r
+            int numStuckThreads = stuckCount.decrementAndGet();\r
+            notifyStuckThreadCompleted(completedStuckThread.getName(),\r
+                    completedStuckThread.getTotalActiveTime(), numStuckThreads);\r
+        }\r
+    }\r
+    \r
+    public long[] getStuckThreadIds() {\r
+        List<Long> idList = new ArrayList<Long>();\r
+        for (MonitoredThread monitoredThread : activeThreads.values()) {\r
+            if (monitoredThread.isMarkedAsStuck()) {\r
+                idList.add(monitoredThread.getThread().getId());\r
+            }\r
+        }\r
+\r
+        long[] result = new long[idList.size()];\r
+        for (int i = 0; i < result.length; i++) {\r
+            result[i] = idList.get(i);\r
+        }\r
+        return result;\r
+    }\r
+\r
+    private class MonitoredThread {\r
+\r
+        /**\r
+         * Reference to the thread to get a stack trace from background task\r
+         */\r
+        private final Thread thread;\r
+        private final String requestUri;\r
+        private final long start;\r
+        private final AtomicInteger state = new AtomicInteger(\r
+            MonitoredThreadState.RUNNING.ordinal());\r
+\r
+        public MonitoredThread(Thread thread, String requestUri) {\r
+            this.thread = thread;\r
+            this.requestUri = requestUri;\r
+            this.start = System.currentTimeMillis();\r
+        }\r
+\r
+        public Thread getThread() {\r
+            return this.thread;\r
+        }\r
+\r
+        public String getRequestUri() {\r
+            return requestUri;\r
+        }\r
+\r
+        public long getActiveTimeInMillis() {\r
+            return System.currentTimeMillis() - start;\r
+        }\r
+\r
+        public Date getStartTime() {\r
+            return new Date(start);\r
+        }\r
+\r
+        public boolean markAsStuckIfStillRunning() {\r
+            return this.state.compareAndSet(MonitoredThreadState.RUNNING.ordinal(),\r
+                MonitoredThreadState.STUCK.ordinal());\r
+        }\r
+\r
+        public MonitoredThreadState markAsDone() {\r
+            int val = this.state.getAndSet(MonitoredThreadState.DONE.ordinal());\r
+            return MonitoredThreadState.values()[val];\r
+        }\r
+        \r
+        boolean isMarkedAsStuck() {\r
+            return this.state.get() == MonitoredThreadState.STUCK.ordinal();\r
+        }\r
+    }\r
+\r
+    private class CompletedStuckThread {\r
+\r
+        private String threadName;\r
+        private long totalActiveTime;\r
+\r
+        public CompletedStuckThread(String threadName, long totalActiveTime) {\r
+            this.threadName = threadName;\r
+            this.totalActiveTime = totalActiveTime;\r
+        }\r
+\r
+        public String getName() {\r
+            return this.threadName;\r
+        }\r
+\r
+        public long getTotalActiveTime() {\r
+            return this.totalActiveTime;\r
+        }\r
+    }\r
+\r
+    private enum MonitoredThreadState {\r
+        RUNNING, STUCK, DONE;\r
+    }\r
+}\r
index 562e859..cb46bd2 100644 (file)
                writeable="false" />
                
   </mbean>
+
+  <mbean name="StuckThreadDetectionValve"
+         description="Detect long requests for which their thread might be stuck"
+         domain="Catalina"
+         group="Valve"
+         type="org.apache.catalina.valves.StuckThreadDetectionValve">
+
+    <attribute name="asyncSupported"
+               description="Does this valve support async reporting."
+               is="true"
+               type="boolean"/>
+
+    <attribute name="className"
+               description="Fully qualified class name of the managed object"
+               type="java.lang.String"
+               writeable="false"/>
+
+    <attribute name="info"
+               description="Information about this implementation"
+               type="java.lang.String"
+               writeable="false"/>
+
+    <attribute name="stateName"
+               description="The name of the LifecycleState that this component is currently in"
+               type="java.lang.String"
+               writeable="false"/>
+
+    <attribute name="stuckThreadIds"
+               description="IDs of the thread currently considered stuck. Each ID can then be used with the ThreadMXBean to retrieve data about it."
+               type="long[]"
+               writeable="false"/>
+
+    <attribute name="threshold"
+               description="Duration in seconds after which a request is considered as stuck"
+               type="int"/>
+
+  </mbean>
 </mbeans-descriptors>
index 2939d99..4fe4ad4 100644 (file)
 
 </section>
 
+<section name="Stuck Thread Detection Valve">
 
+  <subsection name="Introduction">
+  
+    <p>This valve allows to detect requests that take a long time to process, which might 
+    indicate that the thread that is processing it is stuck.</p>
+    <p>When such a request is detected, the current stack trace of its thread is written 
+    to tomcat log with a WARN level.</p>
+    <p>The IDs of the stuck threads are available through JMX in the 
+    <code>stuckThreadIds</code> attribute. The JVM Thread MBean can then be used to 
+    retrieve other information about each stuck thread (name, stack trace...).</p>
+
+  </subsection>
+
+  <subsection name="Attributes">
+
+    <p>The <strong>Stuck Thread Detection Valve</strong> supports the
+    following configuration attributes:</p>
+
+    <attributes>
+
+      <attribute name="className" required="true">
+        <p>Java class name of the implementation to use.  This MUST be set to
+        <strong>org.apache.catalina.valves.StuckThreadDetectionValve</strong>.
+        </p>
+      </attribute>
+
+      <attribute name="threshold" required="false">
+        <p>Minimum duration in seconds after which a thread is considered stuck. 
+        Default is 600 seconds. If set to 0, the detection is disabled.</p>
+        <p>Note: since the detection is done in the background thread of the Container
+        (Engine, Host or Context) declaring this Valve, the threshold should be higher 
+        than the <code>backgroundProcessorDelay</code> of this Container.</p>
+      </attribute>
+
+    </attributes>
+
+  </subsection>
+
+</section>
 </body>