import java.io.IOException;
import java.util.Enumeration;
-import java.util.Iterator;
import java.util.Map;
-import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import javax.servlet.ServletException;
import javax.servlet.http.HttpSession;
+import javax.servlet.http.HttpSessionBindingEvent;
+import javax.servlet.http.HttpSessionBindingListener;
-import org.apache.catalina.LifecycleException;
import org.apache.catalina.connector.Request;
import org.apache.catalina.connector.Response;
import org.apache.juli.logging.Log;
* users - regardless of whether or not they provide a session token with their
* requests.
*/
-public class CrawlerSessionManagerValve extends ValveBase {
+public class CrawlerSessionManagerValve extends ValveBase
+ implements HttpSessionBindingListener {
private static final Log log =
LogFactory.getLog(CrawlerSessionManagerValve.class);
- private Map<String,SessionInfo> uaIpSessionInfo =
- new ConcurrentHashMap<String, SessionInfo>();
+ private Map<String,String> clientIpSessionId =
+ new ConcurrentHashMap<String, String>();
+ private Map<String,String> sessionIdClientIp =
+ new ConcurrentHashMap<String, String>();
private String crawlerUserAgents =
".*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*";
}
- @Override
- protected void initInternal() throws LifecycleException {
- super.initInternal();
-
- uaPattern = Pattern.compile(crawlerUserAgents);
+ public Map<String,String> getClientIpSessionId() {
+ return clientIpSessionId;
}
ServletException {
boolean isBot = false;
- SessionInfo sessionInfo = null;
+ String sessionId = null;
String clientIp = null;
if (log.isDebugEnabled()) {
// If this is a bot, is the session ID known?
if (isBot) {
clientIp = request.getRemoteAddr();
- sessionInfo = uaIpSessionInfo.get(clientIp);
- if (sessionInfo != null) {
- request.setRequestedSessionId(sessionInfo.getSessionId());
+ sessionId = clientIpSessionId.get(clientIp);
+ if (sessionId != null) {
+ request.setRequestedSessionId(sessionId);
if (log.isDebugEnabled()) {
- log.debug(request.hashCode() +
- ": SessionID=" + sessionInfo.getSessionId());
+ log.debug(request.hashCode() + ": SessionID=" +
+ sessionId);
}
}
}
getNext().invoke(request, response);
if (isBot) {
- if (sessionInfo == null) {
+ if (sessionId == null) {
// Has bot just created a session, if so make a note of it
HttpSession s = request.getSession(false);
if (s != null) {
- uaIpSessionInfo.put(clientIp, new SessionInfo(s.getId()));
+ clientIpSessionId.put(clientIp, s.getId());
+ sessionIdClientIp.put(s.getId(), clientIp);
+ // #valueUnbound() will be called on session expiration
+ s.setAttribute(this.getClass().getName(), this);
s.setMaxInactiveInterval(sessionInactiveInterval);
if (log.isDebugEnabled()) {
}
}
} else {
- sessionInfo.access();
-
if (log.isDebugEnabled()) {
log.debug(request.hashCode() +
- ": Bot session accessed. SessionID=" +
- sessionInfo.getSessionId());
+ ": Bot session accessed. SessionID=" + sessionId);
}
}
}
@Override
- public void backgroundProcess() {
- super.backgroundProcess();
-
- long expireTime = System.currentTimeMillis() -
- (sessionInactiveInterval + 60) * 1000;
-
- Iterator<Entry<String,SessionInfo>> iter =
- uaIpSessionInfo.entrySet().iterator();
-
- // Remove any sessions in the cache that have expired.
- while (iter.hasNext()) {
- Entry<String,SessionInfo> entry = iter.next();
- if (entry.getValue().getLastAccessed() < expireTime) {
- iter.remove();
- }
- }
+ public void valueBound(HttpSessionBindingEvent event) {
+ // NOOP
}
- private static final class SessionInfo {
- private final String sessionId;
- private volatile long lastAccessed;
-
- public SessionInfo(String sessionId) {
- this.sessionId = sessionId;
- this.lastAccessed = System.currentTimeMillis();
- }
-
- public String getSessionId() {
- return sessionId;
- }
-
- public long getLastAccessed() {
- return lastAccessed;
- }
-
- public void access() {
- lastAccessed = System.currentTimeMillis();
+ @Override
+ public void valueUnbound(HttpSessionBindingEvent event) {
+ String clientIp = sessionIdClientIp.remove(event.getSession().getId());
+ if (clientIp != null) {
+ clientIpSessionId.remove(clientIp);
}
}
}
</mbean>
+ <mbean name="CrawlerSessionManagerValve"
+ description="Valve that ensures web crawlers always use sessions even if no session ID is presented by the client"
+ domain="Catalina"
+ group="Valve"
+ type="org.apache.catalina.valves.CrawlerSessionManagerValve">
+
+ <attribute name="asyncSupported"
+ description="Does this valve support async reporting."
+ is="true"
+ type="boolean"/>
+
+ <attribute name="className"
+ description="Fully qualified class name of the managed object"
+ type="java.lang.String"
+ writeable="false"/>
+
+ <attribute name="clientIpSessionId"
+ description="Current Map of client IP address to session ID managed by this Valve"
+ type="java.util.Map"
+ writeable="false"/>
+
+ <attribute name="crawlerUserAgents"
+ description="Specify the regular expression used to identify crawlers based in the User-Agent header provided."
+ type="java.lang.String"
+ writeable="true"/>
+
+ <attribute name="sessionInactiveInterval"
+ description="Specify the session timeout (in seconds) for a crawler's session."
+ type="int"
+ writeable="true"/>
+
+ <attribute name="stateName"
+ description="The name of the LifecycleState that this component is currently in"
+ type="java.lang.String"
+ writeable="false"/>
+
+ </mbean>
+
<mbean name="ErrorReportValve"
description="Implementation of a Valve that outputs HTML error pages"
domain="Catalina"