From: remm Date: Thu, 2 Nov 2006 03:50:14 +0000 (+0000) Subject: - i18n handling fixes (according to what the spec authors think is what should be... X-Git-Url: https://git.internetallee.de/?a=commitdiff_plain;h=a2f1afc2eee591051fc8923fd359ee3a344bd058;p=tomcat7.0 - i18n handling fixes (according to what the spec authors think is what should be done, at least). git-svn-id: https://svn.apache.org/repos/asf/tomcat/tc6.0.x/trunk@470216 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/java/org/apache/jasper/compiler/JspDocumentParser.java b/java/org/apache/jasper/compiler/JspDocumentParser.java index 5715b7f4d..a67a3ea96 100644 --- a/java/org/apache/jasper/compiler/JspDocumentParser.java +++ b/java/org/apache/jasper/compiler/JspDocumentParser.java @@ -143,7 +143,8 @@ class JspDocumentParser boolean directivesOnly, String pageEnc, String jspConfigPageEnc, - boolean isEncodingSpecifiedInProlog) + boolean isEncodingSpecifiedInProlog, + boolean isBomPresent) throws JasperException { JspDocumentParser jspDocParser = @@ -158,6 +159,7 @@ class JspDocumentParser dummyRoot.setJspConfigPageEncoding(jspConfigPageEnc); dummyRoot.setIsEncodingSpecifiedInProlog( isEncodingSpecifiedInProlog); + dummyRoot.setIsBomPresent(isBomPresent); jspDocParser.current = dummyRoot; if (parent == null) { jspDocParser.addInclude( diff --git a/java/org/apache/jasper/compiler/Node.java b/java/org/apache/jasper/compiler/Node.java index f80a271e0..7e9fdf983 100644 --- a/java/org/apache/jasper/compiler/Node.java +++ b/java/org/apache/jasper/compiler/Node.java @@ -464,6 +464,12 @@ abstract class Node implements TagConstants { private boolean isEncodingSpecifiedInProlog; /* + * Indicates whether an encoding has been explicitly specified in the + * page's bom. + */ + private boolean isBomPresent; + + /* * Constructor. */ Root(Mark start, Node parent, boolean isXmlSyntax) { @@ -527,6 +533,14 @@ abstract class Node implements TagConstants { return isEncodingSpecifiedInProlog; } + public void setIsBomPresent(boolean isBom) { + isBomPresent = isBom; + } + + public boolean isBomPresent() { + return isBomPresent; + } + /** * @return The enclosing root to this Root. Usually represents the page * that includes this one. diff --git a/java/org/apache/jasper/compiler/Parser.java b/java/org/apache/jasper/compiler/Parser.java index 3afb49fbb..c4d367c00 100644 --- a/java/org/apache/jasper/compiler/Parser.java +++ b/java/org/apache/jasper/compiler/Parser.java @@ -108,7 +108,7 @@ class Parser implements TagConstants { public static Node.Nodes parse(ParserController pc, JspReader reader, Node parent, boolean isTagFile, boolean directivesOnly, URL jarFileUrl, String pageEnc, String jspConfigPageEnc, - boolean isDefaultPageEncoding) throws JasperException { + boolean isDefaultPageEncoding, boolean isBomPresent) throws JasperException { Parser parser = new Parser(pc, reader, isTagFile, directivesOnly, jarFileUrl); @@ -117,6 +117,7 @@ class Parser implements TagConstants { root.setPageEncoding(pageEnc); root.setJspConfigPageEncoding(jspConfigPageEnc); root.setIsDefaultPageEncoding(isDefaultPageEncoding); + root.setIsBomPresent(isBomPresent); if (directivesOnly) { parser.parseTagFileDirectives(root); diff --git a/java/org/apache/jasper/compiler/ParserController.java b/java/org/apache/jasper/compiler/ParserController.java index ecec05365..3501f13e1 100644 --- a/java/org/apache/jasper/compiler/ParserController.java +++ b/java/org/apache/jasper/compiler/ParserController.java @@ -61,6 +61,7 @@ class ParserController implements TagConstants { private Stack baseDirStack = new Stack(); private boolean isEncodingSpecifiedInProlog; + private boolean isBomPresent; private String sourceEnc; @@ -159,6 +160,7 @@ class ParserController implements TagConstants { Node.Nodes parsedPage = null; isEncodingSpecifiedInProlog = false; + isBomPresent = false; isDefaultPageEncoding = false; JarFile jarFile = getJarFile(jarFileUrl); @@ -174,7 +176,7 @@ class ParserController implements TagConstants { compiler.getPageInfo().addDependant(absFileName); } - if (isXml && isEncodingSpecifiedInProlog) { + if ((isXml && isEncodingSpecifiedInProlog) || isBomPresent) { /* * Make sure the encoding explicitly specified in the XML * prolog (if any) matches that in the JSP config element @@ -183,7 +185,7 @@ class ParserController implements TagConstants { */ if (jspConfigPageEnc != null && !jspConfigPageEnc.equals(sourceEnc) && (!jspConfigPageEnc.startsWith("UTF-16") - || !sourceEnc.startsWith("UTF-16"))) { + || !sourceEnc.startsWith("UTF-16"))) { err.jspError("jsp.error.prolog_config_encoding_mismatch", sourceEnc, jspConfigPageEnc); } @@ -199,7 +201,8 @@ class ParserController implements TagConstants { isTagFile, directiveOnly, sourceEnc, jspConfigPageEnc, - isEncodingSpecifiedInProlog); + isEncodingSpecifiedInProlog, + isBomPresent); } else { // Standard syntax InputStreamReader inStreamReader = null; @@ -212,7 +215,7 @@ class ParserController implements TagConstants { parsedPage = Parser.parse(this, jspReader, parent, isTagFile, directiveOnly, jarFileUrl, sourceEnc, jspConfigPageEnc, - isDefaultPageEncoding); + isDefaultPageEncoding, isBomPresent); } finally { if (inStreamReader != null) { try { @@ -298,7 +301,7 @@ class ParserController implements TagConstants { if (sourceEnc != null) { return; } - // We don't know the encoding + // We don't know the encoding, so use BOM to determine it sourceEnc = "ISO-8859-1"; } else { // XML syntax or unknown, (auto)detect encoding ... @@ -306,10 +309,13 @@ class ParserController implements TagConstants { jarFile, ctxt, err); sourceEnc = (String) ret[0]; if (((Boolean) ret[1]).booleanValue()) { - isEncodingSpecifiedInProlog = true; + isEncodingSpecifiedInProlog = true; + } + if (((Boolean) ret[2]).booleanValue()) { + isBomPresent = true; } - if (!isXml && sourceEnc.equals("UTF-8")) { + if (!isXml && sourceEnc.equals("UTF-8")) { /* * We don't know if we're dealing with XML or standard syntax. * Therefore, we need to check to see if the page contains @@ -359,10 +365,11 @@ class ParserController implements TagConstants { if (!isExternal) { jspReader.reset(startMark); if (hasJspRoot(jspReader)) { + if (revert) sourceEnc = "UTF-8"; isXml = true; - if (revert) sourceEnc = "UTF-8"; return; } else { + if (revert && isBomPresent) sourceEnc = "UTF-8"; isXml = false; } } @@ -373,15 +380,17 @@ class ParserController implements TagConstants { * Determine the page encoding from the page directive, unless it's * specified via JSP config. */ - sourceEnc = jspConfigPageEnc; - if (sourceEnc == null) { - sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark); - if (sourceEnc == null) { - // Default to "ISO-8859-1" per JSP spec - sourceEnc = "ISO-8859-1"; - isDefaultPageEncoding = true; - } - } + if (sourceEnc == null) { + sourceEnc = jspConfigPageEnc; + if (sourceEnc == null) { + sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark); + if (sourceEnc == null) { + // Default to "ISO-8859-1" per JSP spec + sourceEnc = "ISO-8859-1"; + isDefaultPageEncoding = true; + } + } + } } /* diff --git a/java/org/apache/jasper/compiler/Validator.java b/java/org/apache/jasper/compiler/Validator.java index 9a67341b0..a28e22a41 100644 --- a/java/org/apache/jasper/compiler/Validator.java +++ b/java/org/apache/jasper/compiler/Validator.java @@ -196,7 +196,8 @@ class Validator { err.jspError(n, "jsp.error.page.multi.pageencoding"); // 'pageEncoding' can occur at most once per file pageEncodingSeen = true; - comparePageEncodings(value, n); + String actual = comparePageEncodings(value, n); + n.getRoot().setPageEncoding(actual); } else if ("deferredSyntaxAllowedAsLiteral".equals(attr)) { if (pageInfo.getDeferredSyntaxAllowedAsLiteral() == null) { pageInfo.setDeferredSyntaxAllowedAsLiteral(value, n, @@ -266,6 +267,7 @@ class Validator { if (pageEncodingSeen) err.jspError(n, "jsp.error.tag.multi.pageencoding"); pageEncodingSeen = true; + compareTagEncodings(value, n); n.getRoot().setPageEncoding(value); } else if ("deferredSyntaxAllowedAsLiteral".equals(attr)) { if (pageInfo.getDeferredSyntaxAllowedAsLiteral() == null) { @@ -323,7 +325,7 @@ class Validator { * * @throws JasperException in case of page encoding mismatch */ - private void comparePageEncodings(String pageDirEnc, + private String comparePageEncodings(String pageDirEnc, Node.PageDirective pageDir) throws JasperException { Node.Root root = pageDir.getRoot(); @@ -335,13 +337,16 @@ class Validator { * pattern matches this page. Treat "UTF-16", "UTF-16BE", and * "UTF-16LE" as identical. */ - if (configEnc != null - && !pageDirEnc.equals(configEnc) - && (!pageDirEnc.startsWith("UTF-16") || !configEnc - .startsWith("UTF-16"))) { - err.jspError(pageDir, - "jsp.error.config_pagedir_encoding_mismatch", - configEnc, pageDirEnc); + if (configEnc != null) { + if (!pageDirEnc.equals(configEnc) + && (!pageDirEnc.startsWith("UTF-16") || !configEnc + .startsWith("UTF-16"))) { + err.jspError(pageDir, + "jsp.error.config_pagedir_encoding_mismatch", + configEnc, pageDirEnc); + } else { + return configEnc; + } } /* @@ -351,7 +356,7 @@ class Validator { * declaration). Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as * identical. */ - if (root.isXmlSyntax() && root.isEncodingSpecifiedInProlog()) { + if ((root.isXmlSyntax() && root.isEncodingSpecifiedInProlog()) || root.isBomPresent()) { String pageEnc = root.getPageEncoding(); if (!pageDirEnc.equals(pageEnc) && (!pageDirEnc.startsWith("UTF-16") || !pageEnc @@ -359,9 +364,47 @@ class Validator { err.jspError(pageDir, "jsp.error.prolog_pagedir_encoding_mismatch", pageEnc, pageDirEnc); + } else { + return pageEnc; } } + + return pageDirEnc; } + + /* + * Compares page encodings specified in various places, and throws + * exception in case of page encoding mismatch. + * + * @param pageDirEnc The value of the pageEncoding attribute of the page + * directive @param pageDir The page directive node + * + * @throws JasperException in case of page encoding mismatch + */ + private void compareTagEncodings(String pageDirEnc, + Node.TagDirective pageDir) throws JasperException { + + Node.Root root = pageDir.getRoot(); + + /* + * Compare the 'pageEncoding' attribute of the page directive with + * the encoding specified in the XML prolog (only for XML syntax, + * and only if JSP document contains XML prolog with encoding + * declaration). Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as + * identical. + */ + if ((root.isXmlSyntax() && root.isEncodingSpecifiedInProlog()) || root.isBomPresent()) { + String pageEnc = root.getPageEncoding(); + if (!pageDirEnc.equals(pageEnc) + && (!pageDirEnc.startsWith("UTF-16") || !pageEnc + .startsWith("UTF-16"))) { + err.jspError(pageDir, + "jsp.error.prolog_pagedir_encoding_mismatch", + pageEnc, pageDirEnc); + } + } + } + } /** diff --git a/java/org/apache/jasper/runtime/PageContextImpl.java b/java/org/apache/jasper/runtime/PageContextImpl.java index 936d35c70..4e6c3b97a 100644 --- a/java/org/apache/jasper/runtime/PageContextImpl.java +++ b/java/org/apache/jasper/runtime/PageContextImpl.java @@ -191,22 +191,22 @@ public class PageContextImpl extends PageContext { ((JspWriterImpl) out).flushBuffer(); } } catch (IOException ex) { - log.warn("Internal error flushing the buffer in release()"); - } - - servlet = null; - config = null; - context = null; - applicationContext = null; - elContext = null; - errorPageURL = null; - request = null; - response = null; - depth = -1; - baseOut.recycle(); - session = null; - - attributes.clear(); + IllegalStateException ise = new IllegalStateException("Internal error flushing the buffer in release()", ex); + throw ise; + } finally { + servlet = null; + config = null; + context = null; + applicationContext = null; + elContext = null; + errorPageURL = null; + request = null; + response = null; + depth = -1; + baseOut.recycle(); + session = null; + attributes.clear(); + } } public Object getAttribute(final String name) { diff --git a/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java b/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java index e5f21d483..9d2bc709e 100644 --- a/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java +++ b/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java @@ -43,6 +43,7 @@ public class XMLEncodingDetector { private InputStream stream; private String encoding; private boolean isEncodingSetInProlog; + private boolean isBomPresent; private Boolean isBigEndian; private Reader reader; @@ -121,7 +122,8 @@ public class XMLEncodingDetector { scanXMLDecl(); return new Object[] { this.encoding, - new Boolean(this.isEncodingSetInProlog) }; + new Boolean(this.isEncodingSetInProlog), + new Boolean(this.isBomPresent) }; } // stub method @@ -147,6 +149,11 @@ public class XMLEncodingDetector { Object [] encodingDesc = getEncodingName(b4, count); encoding = (String)(encodingDesc[0]); isBigEndian = (Boolean)(encodingDesc[1]); + if (encodingDesc.length > 2) { + isBomPresent = (Boolean)(encodingDesc[2]); + } else { + isBomPresent = true; + } stream.reset(); // Special case UTF-8 files with BOM created by Microsoft @@ -278,7 +285,7 @@ public class XMLEncodingDetector { private Object[] getEncodingName(byte[] b4, int count) { if (count < 2) { - return new Object[]{"UTF-8", null}; + return new Object[]{"UTF-8", null, Boolean.FALSE}; } // UTF-16, with BOM @@ -286,17 +293,17 @@ public class XMLEncodingDetector { int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian - return new Object [] {"UTF-16BE", new Boolean(true)}; + return new Object [] {"UTF-16BE", Boolean.TRUE}; } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian - return new Object [] {"UTF-16LE", new Boolean(false)}; + return new Object [] {"UTF-16LE", Boolean.FALSE}; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { - return new Object [] {"UTF-8", null}; + return new Object [] {"UTF-8", null, Boolean.FALSE}; } // UTF-8 with a BOM @@ -349,7 +356,7 @@ public class XMLEncodingDetector { } // default encoding - return new Object [] {"UTF-8", null}; + return new Object [] {"UTF-8", null, Boolean.FALSE}; }