boolean directivesOnly,
String pageEnc,
String jspConfigPageEnc,
- boolean isEncodingSpecifiedInProlog)
+ boolean isEncodingSpecifiedInProlog,
+ boolean isBomPresent)
throws JasperException {
JspDocumentParser jspDocParser =
dummyRoot.setJspConfigPageEncoding(jspConfigPageEnc);
dummyRoot.setIsEncodingSpecifiedInProlog(
isEncodingSpecifiedInProlog);
+ dummyRoot.setIsBomPresent(isBomPresent);
jspDocParser.current = dummyRoot;
if (parent == null) {
jspDocParser.addInclude(
private boolean isEncodingSpecifiedInProlog;
/*
+ * Indicates whether an encoding has been explicitly specified in the
+ * page's bom.
+ */
+ private boolean isBomPresent;
+
+ /*
* Constructor.
*/
Root(Mark start, Node parent, boolean isXmlSyntax) {
return isEncodingSpecifiedInProlog;
}
+ public void setIsBomPresent(boolean isBom) {
+ isBomPresent = isBom;
+ }
+
+ public boolean isBomPresent() {
+ return isBomPresent;
+ }
+
/**
* @return The enclosing root to this Root. Usually represents the page
* that includes this one.
public static Node.Nodes parse(ParserController pc, JspReader reader,
Node parent, boolean isTagFile, boolean directivesOnly,
URL jarFileUrl, String pageEnc, String jspConfigPageEnc,
- boolean isDefaultPageEncoding) throws JasperException {
+ boolean isDefaultPageEncoding, boolean isBomPresent) throws JasperException {
Parser parser = new Parser(pc, reader, isTagFile, directivesOnly,
jarFileUrl);
root.setPageEncoding(pageEnc);
root.setJspConfigPageEncoding(jspConfigPageEnc);
root.setIsDefaultPageEncoding(isDefaultPageEncoding);
+ root.setIsBomPresent(isBomPresent);
if (directivesOnly) {
parser.parseTagFileDirectives(root);
private Stack baseDirStack = new Stack();
private boolean isEncodingSpecifiedInProlog;
+ private boolean isBomPresent;
private String sourceEnc;
Node.Nodes parsedPage = null;
isEncodingSpecifiedInProlog = false;
+ isBomPresent = false;
isDefaultPageEncoding = false;
JarFile jarFile = getJarFile(jarFileUrl);
compiler.getPageInfo().addDependant(absFileName);
}
- if (isXml && isEncodingSpecifiedInProlog) {
+ if ((isXml && isEncodingSpecifiedInProlog) || isBomPresent) {
/*
* Make sure the encoding explicitly specified in the XML
* prolog (if any) matches that in the JSP config element
*/
if (jspConfigPageEnc != null && !jspConfigPageEnc.equals(sourceEnc)
&& (!jspConfigPageEnc.startsWith("UTF-16")
- || !sourceEnc.startsWith("UTF-16"))) {
+ || !sourceEnc.startsWith("UTF-16"))) {
err.jspError("jsp.error.prolog_config_encoding_mismatch",
sourceEnc, jspConfigPageEnc);
}
isTagFile, directiveOnly,
sourceEnc,
jspConfigPageEnc,
- isEncodingSpecifiedInProlog);
+ isEncodingSpecifiedInProlog,
+ isBomPresent);
} else {
// Standard syntax
InputStreamReader inStreamReader = null;
parsedPage = Parser.parse(this, jspReader, parent, isTagFile,
directiveOnly, jarFileUrl,
sourceEnc, jspConfigPageEnc,
- isDefaultPageEncoding);
+ isDefaultPageEncoding, isBomPresent);
} finally {
if (inStreamReader != null) {
try {
if (sourceEnc != null) {
return;
}
- // We don't know the encoding
+ // We don't know the encoding, so use BOM to determine it
sourceEnc = "ISO-8859-1";
} else {
// XML syntax or unknown, (auto)detect encoding ...
jarFile, ctxt, err);
sourceEnc = (String) ret[0];
if (((Boolean) ret[1]).booleanValue()) {
- isEncodingSpecifiedInProlog = true;
+ isEncodingSpecifiedInProlog = true;
+ }
+ if (((Boolean) ret[2]).booleanValue()) {
+ isBomPresent = true;
}
- if (!isXml && sourceEnc.equals("UTF-8")) {
+ if (!isXml && sourceEnc.equals("UTF-8")) {
/*
* We don't know if we're dealing with XML or standard syntax.
* Therefore, we need to check to see if the page contains
if (!isExternal) {
jspReader.reset(startMark);
if (hasJspRoot(jspReader)) {
+ if (revert) sourceEnc = "UTF-8";
isXml = true;
- if (revert) sourceEnc = "UTF-8";
return;
} else {
+ if (revert && isBomPresent) sourceEnc = "UTF-8";
isXml = false;
}
}
* Determine the page encoding from the page directive, unless it's
* specified via JSP config.
*/
- sourceEnc = jspConfigPageEnc;
- if (sourceEnc == null) {
- sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark);
- if (sourceEnc == null) {
- // Default to "ISO-8859-1" per JSP spec
- sourceEnc = "ISO-8859-1";
- isDefaultPageEncoding = true;
- }
- }
+ if (sourceEnc == null) {
+ sourceEnc = jspConfigPageEnc;
+ if (sourceEnc == null) {
+ sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark);
+ if (sourceEnc == null) {
+ // Default to "ISO-8859-1" per JSP spec
+ sourceEnc = "ISO-8859-1";
+ isDefaultPageEncoding = true;
+ }
+ }
+ }
}
/*
err.jspError(n, "jsp.error.page.multi.pageencoding");
// 'pageEncoding' can occur at most once per file
pageEncodingSeen = true;
- comparePageEncodings(value, n);
+ String actual = comparePageEncodings(value, n);
+ n.getRoot().setPageEncoding(actual);
} else if ("deferredSyntaxAllowedAsLiteral".equals(attr)) {
if (pageInfo.getDeferredSyntaxAllowedAsLiteral() == null) {
pageInfo.setDeferredSyntaxAllowedAsLiteral(value, n,
if (pageEncodingSeen)
err.jspError(n, "jsp.error.tag.multi.pageencoding");
pageEncodingSeen = true;
+ compareTagEncodings(value, n);
n.getRoot().setPageEncoding(value);
} else if ("deferredSyntaxAllowedAsLiteral".equals(attr)) {
if (pageInfo.getDeferredSyntaxAllowedAsLiteral() == null) {
*
* @throws JasperException in case of page encoding mismatch
*/
- private void comparePageEncodings(String pageDirEnc,
+ private String comparePageEncodings(String pageDirEnc,
Node.PageDirective pageDir) throws JasperException {
Node.Root root = pageDir.getRoot();
* pattern matches this page. Treat "UTF-16", "UTF-16BE", and
* "UTF-16LE" as identical.
*/
- if (configEnc != null
- && !pageDirEnc.equals(configEnc)
- && (!pageDirEnc.startsWith("UTF-16") || !configEnc
- .startsWith("UTF-16"))) {
- err.jspError(pageDir,
- "jsp.error.config_pagedir_encoding_mismatch",
- configEnc, pageDirEnc);
+ if (configEnc != null) {
+ if (!pageDirEnc.equals(configEnc)
+ && (!pageDirEnc.startsWith("UTF-16") || !configEnc
+ .startsWith("UTF-16"))) {
+ err.jspError(pageDir,
+ "jsp.error.config_pagedir_encoding_mismatch",
+ configEnc, pageDirEnc);
+ } else {
+ return configEnc;
+ }
}
/*
* declaration). Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as
* identical.
*/
- if (root.isXmlSyntax() && root.isEncodingSpecifiedInProlog()) {
+ if ((root.isXmlSyntax() && root.isEncodingSpecifiedInProlog()) || root.isBomPresent()) {
String pageEnc = root.getPageEncoding();
if (!pageDirEnc.equals(pageEnc)
&& (!pageDirEnc.startsWith("UTF-16") || !pageEnc
err.jspError(pageDir,
"jsp.error.prolog_pagedir_encoding_mismatch",
pageEnc, pageDirEnc);
+ } else {
+ return pageEnc;
}
}
+
+ return pageDirEnc;
}
+
+ /*
+ * Compares page encodings specified in various places, and throws
+ * exception in case of page encoding mismatch.
+ *
+ * @param pageDirEnc The value of the pageEncoding attribute of the page
+ * directive @param pageDir The page directive node
+ *
+ * @throws JasperException in case of page encoding mismatch
+ */
+ private void compareTagEncodings(String pageDirEnc,
+ Node.TagDirective pageDir) throws JasperException {
+
+ Node.Root root = pageDir.getRoot();
+
+ /*
+ * Compare the 'pageEncoding' attribute of the page directive with
+ * the encoding specified in the XML prolog (only for XML syntax,
+ * and only if JSP document contains XML prolog with encoding
+ * declaration). Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as
+ * identical.
+ */
+ if ((root.isXmlSyntax() && root.isEncodingSpecifiedInProlog()) || root.isBomPresent()) {
+ String pageEnc = root.getPageEncoding();
+ if (!pageDirEnc.equals(pageEnc)
+ && (!pageDirEnc.startsWith("UTF-16") || !pageEnc
+ .startsWith("UTF-16"))) {
+ err.jspError(pageDir,
+ "jsp.error.prolog_pagedir_encoding_mismatch",
+ pageEnc, pageDirEnc);
+ }
+ }
+ }
+
}
/**
((JspWriterImpl) out).flushBuffer();
}
} catch (IOException ex) {
- log.warn("Internal error flushing the buffer in release()");
- }
-
- servlet = null;
- config = null;
- context = null;
- applicationContext = null;
- elContext = null;
- errorPageURL = null;
- request = null;
- response = null;
- depth = -1;
- baseOut.recycle();
- session = null;
-
- attributes.clear();
+ IllegalStateException ise = new IllegalStateException("Internal error flushing the buffer in release()", ex);
+ throw ise;
+ } finally {
+ servlet = null;
+ config = null;
+ context = null;
+ applicationContext = null;
+ elContext = null;
+ errorPageURL = null;
+ request = null;
+ response = null;
+ depth = -1;
+ baseOut.recycle();
+ session = null;
+ attributes.clear();
+ }
}
public Object getAttribute(final String name) {
private InputStream stream;
private String encoding;
private boolean isEncodingSetInProlog;
+ private boolean isBomPresent;
private Boolean isBigEndian;
private Reader reader;
scanXMLDecl();
return new Object[] { this.encoding,
- new Boolean(this.isEncodingSetInProlog) };
+ new Boolean(this.isEncodingSetInProlog),
+ new Boolean(this.isBomPresent) };
}
// stub method
Object [] encodingDesc = getEncodingName(b4, count);
encoding = (String)(encodingDesc[0]);
isBigEndian = (Boolean)(encodingDesc[1]);
+ if (encodingDesc.length > 2) {
+ isBomPresent = (Boolean)(encodingDesc[2]);
+ } else {
+ isBomPresent = true;
+ }
stream.reset();
// Special case UTF-8 files with BOM created by Microsoft
private Object[] getEncodingName(byte[] b4, int count) {
if (count < 2) {
- return new Object[]{"UTF-8", null};
+ return new Object[]{"UTF-8", null, Boolean.FALSE};
}
// UTF-16, with BOM
int b1 = b4[1] & 0xFF;
if (b0 == 0xFE && b1 == 0xFF) {
// UTF-16, big-endian
- return new Object [] {"UTF-16BE", new Boolean(true)};
+ return new Object [] {"UTF-16BE", Boolean.TRUE};
}
if (b0 == 0xFF && b1 == 0xFE) {
// UTF-16, little-endian
- return new Object [] {"UTF-16LE", new Boolean(false)};
+ return new Object [] {"UTF-16LE", Boolean.FALSE};
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 3) {
- return new Object [] {"UTF-8", null};
+ return new Object [] {"UTF-8", null, Boolean.FALSE};
}
// UTF-8 with a BOM
}
// default encoding
- return new Object [] {"UTF-8", null};
+ return new Object [] {"UTF-8", null, Boolean.FALSE};
}