From: markt Date: Sun, 6 Feb 2011 23:18:02 +0000 (+0000) Subject: Remove for now since I can't get it working with Jira (my primary motivation for... X-Git-Url: https://git.internetallee.de/?a=commitdiff_plain;h=b48b82f49afb81f36f516d8e7875482b359b125f;p=tomcat7.0 Remove for now since I can't get it working with Jira (my primary motivation for writing it) It will return once I find the time to do the detailed debugging with Jira to figure out why it doesn't work. git-svn-id: https://svn.apache.org/repos/asf/tomcat/trunk@1067804 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java b/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java deleted file mode 100644 index 3935dd37a..000000000 --- a/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.catalina.valves; - -import java.io.IOException; -import java.util.Enumeration; -import java.util.Iterator; -import java.util.Map; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; -import java.util.regex.Pattern; - -import javax.servlet.ServletException; -import javax.servlet.http.Cookie; -import javax.servlet.http.HttpSession; - -import org.apache.catalina.LifecycleException; -import org.apache.catalina.connector.Request; -import org.apache.catalina.connector.Response; -import org.apache.juli.logging.Log; -import org.apache.juli.logging.LogFactory; - -/** - * Web crawlers can trigger the creation of many thousands of sessions as they - * crawl a site which may result in significant memory consumption. This Valve - * ensures that crawlers are associated with a single session - just like normal - * users - regardless of whether or not they provide a session token with their - * requests. - */ -public class CrawlerSessionManagerValve extends ValveBase { - - private static final Log log = - LogFactory.getLog(CrawlerSessionManagerValve.class); - - private Map uaIpSessionInfo = - new ConcurrentHashMap(); - - private String crawlerUserAgents = - ".*GoogleBot.*|.*bingbot.*|.*Yahoo! Slurp.*"; - private Pattern uaPattern = null; - private int sessionInactiveInterval = 60; - - - /** - * Specify the regular expression (using {@link Pattern}) that will be used - * to identify crawlers based in the User-Agent header provided. The default - * is ".*GoogleBot.*|.*bingbot.*|.*Yahoo! Slurp.*" - * - * @param crawlerUserAgents The regular expression using {@link Pattern} - */ - public void setCrawlerUserAgents(String crawlerUserAgents) { - this.crawlerUserAgents = crawlerUserAgents; - if (crawlerUserAgents == null || crawlerUserAgents.length() == 0) { - uaPattern = null; - } else { - uaPattern = Pattern.compile(crawlerUserAgents); - } - } - - /** - * @see #setCrawlerUserAgents(String) - * @return The current regular expression being used to match user agents. - */ - public String getCrawlerUserAgents() { - return crawlerUserAgents; - } - - - /** - * Specify the session timeout (in seconds) for a crawler's session. This is - * typically lower than that for a user session. The default is 60 seconds. - * - * @param sessionInactiveInterval The new timeout for crawler sessions - */ - public void setSessionInactiveInterval(int sessionInactiveInterval) { - this.sessionInactiveInterval = sessionInactiveInterval; - } - - /** - * @see #setSessionInactiveInterval(int) - * @return The current timeout in seconds - */ - public int getSessionInactiveInterval() { - return sessionInactiveInterval; - } - - - @Override - protected void initInternal() throws LifecycleException { - super.initInternal(); - - uaPattern = Pattern.compile(crawlerUserAgents); - } - - - @Override - public void invoke(Request request, Response response) throws IOException, - ServletException { - - boolean isBot = false; - SessionInfo sessionInfo = null; - String clientIp = null; - - if (log.isDebugEnabled()) { - log.debug(request.hashCode() + ": ClientIp=" + - request.getRemoteAddr() + ", RequestedSessionId=" + - request.getRequestedSessionId()); - } - - // If the incoming request has a session ID, no action is required - if (request.getRequestedSessionId() == null) { - - // Is this a crawler - cheack the UA headers - Enumeration uaHeaders = request.getHeaders("user-agent"); - String uaHeader = uaHeaders.nextElement(); - - // If more than one UA header - assume not a bot - if (!uaHeaders.hasMoreElements()) { - - if (log.isDebugEnabled()) { - log.debug(request.hashCode() + ": UserAgent=" + uaHeader); - } - - if (uaPattern.matcher(uaHeader).matches()) { - isBot = true; - - if (log.isDebugEnabled()) { - log.debug(request.hashCode() + - ": Bot found. UserAgent=" + uaHeader); - } - } - } - - // If this is a bot, is the session ID known? - if (isBot) { - clientIp = request.getRemoteAddr(); - sessionInfo = uaIpSessionInfo.get(clientIp); - if (sessionInfo != null) { - request.setRequestedSessionId(sessionInfo.getSessionId()); - // Hack for testing with Jira - request.addCookie(new Cookie("JSESSIONID", - sessionInfo.getSessionId())); - if (log.isDebugEnabled()) { - log.debug(request.hashCode() + - ": SessionID=" + sessionInfo.getSessionId()); - } - } - } - } - - getNext().invoke(request, response); - - if (isBot) { - if (sessionInfo == null) { - // Has bot just created a session, if so make a note of it - HttpSession s = request.getSession(false); - if (s != null) { - uaIpSessionInfo.put(clientIp, new SessionInfo(s.getId())); - s.setMaxInactiveInterval(sessionInactiveInterval); - - if (log.isDebugEnabled()) { - log.debug(request.hashCode() + - ": New bot session. SessionID=" + s.getId()); - } - } - } else { - sessionInfo.access(); - - if (log.isDebugEnabled()) { - log.debug(request.hashCode() + - ": Bot session accessed. SessionID=" + - sessionInfo.getSessionId()); - } - } - } - } - - - @Override - public void backgroundProcess() { - super.backgroundProcess(); - - long expireTime = System.currentTimeMillis() - - (sessionInactiveInterval + 60) * 1000; - - Iterator> iter = - uaIpSessionInfo.entrySet().iterator(); - - // Remove any sessions in the cache that have expired. - while (iter.hasNext()) { - Entry entry = iter.next(); - if (entry.getValue().getLastAccessed() < expireTime) { - iter.remove(); - } - } - } - - - private static final class SessionInfo { - private final String sessionId; - private volatile long lastAccessed; - - public SessionInfo(String sessionId) { - this.sessionId = sessionId; - this.lastAccessed = System.currentTimeMillis(); - } - - public String getSessionId() { - return sessionId; - } - - public long getLastAccessed() { - return lastAccessed; - } - - public void access() { - lastAccessed = System.currentTimeMillis(); - } - } -}