From 79d9400220265813d48c963585d09d40c5be5160 Mon Sep 17 00:00:00 2001 From: markt Date: Sun, 6 Feb 2011 21:00:52 +0000 Subject: [PATCH] Review from kkolinko 1. Matcher not thread safe 2. >1 UA header -> not a bot git-svn-id: https://svn.apache.org/repos/asf/tomcat/trunk@1067759 13f79535-47bb-0310-9956-ffa450edef68 --- .../valves/CrawlerSessionManagerValve.java | 24 ++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java b/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java index 6af847d16..b75152244 100644 --- a/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java +++ b/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java @@ -22,7 +22,6 @@ import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; -import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.servlet.ServletException; @@ -51,7 +50,7 @@ public class CrawlerSessionManagerValve extends ValveBase { private String crawlerUserAgents = ".*GoogleBot.*|.*bingbot.*|.*Yahoo! Slurp.*"; - private Matcher uaMatcher = null; + private Pattern uaPattern = null; private int sessionInactiveInterval = 60; @@ -65,9 +64,9 @@ public class CrawlerSessionManagerValve extends ValveBase { public void setCrawlerUserAgents(String crawlerUserAgents) { this.crawlerUserAgents = crawlerUserAgents; if (crawlerUserAgents == null || crawlerUserAgents.length() == 0) { - uaMatcher = null; + uaPattern = null; } else { - uaMatcher = Pattern.compile(crawlerUserAgents).matcher(""); + uaPattern = Pattern.compile(crawlerUserAgents); } } @@ -103,7 +102,7 @@ public class CrawlerSessionManagerValve extends ValveBase { protected void initInternal() throws LifecycleException { super.initInternal(); - uaMatcher = Pattern.compile(crawlerUserAgents).matcher(""); + uaPattern = Pattern.compile(crawlerUserAgents); } @@ -124,19 +123,18 @@ public class CrawlerSessionManagerValve extends ValveBase { // If the incoming request has a session ID, no action is required if (request.getRequestedSessionId() == null) { - // Is this a crawler + // Is this a crawler - cheack the UA headers Enumeration uaHeaders = request.getHeaders("user-agent"); - while (!isBot && uaMatcher != null && - uaHeaders.hasMoreElements()) { - - String uaHeader = uaHeaders.nextElement(); - uaMatcher.reset(uaHeader); - + String uaHeader = uaHeaders.nextElement(); + + // If more than one UA header - assume not a bot + if (!uaHeaders.hasMoreElements()) { + if (log.isDebugEnabled()) { log.debug(request.hashCode() + ": UserAgent=" + uaHeader); } - if (uaMatcher.matches()) { + if (uaPattern.matcher(uaHeader).matches()) { isBot = true; if (log.isDebugEnabled()) { -- 2.11.0