From: Michael M Slusarz Date: Fri, 20 Aug 2010 17:40:35 +0000 (-0600) Subject: Bug #9190: Add Horde_Support_Domhtml. X-Git-Url: https://git.internetallee.de/?a=commitdiff_plain;h=6149c84e973f3fb5c61760834c148bae4cbf04b8;p=horde.git Bug #9190: Add Horde_Support_Domhtml. Charset handling with libxml is less than ideal. Abstract the loadHTML() code out into a single place so that we can experiment and fix things all at once instead of piecemeal. Suggestions for where else this could reside? It does require Horde_String, so maybe horde/Util would be better. --- diff --git a/framework/Mime_Viewer/lib/Horde/Mime/Viewer/Html.php b/framework/Mime_Viewer/lib/Horde/Mime/Viewer/Html.php index b42051adc..6c3276df8 100644 --- a/framework/Mime_Viewer/lib/Horde/Mime/Viewer/Html.php +++ b/framework/Mime_Viewer/lib/Horde/Mime/Viewer/Html.php @@ -144,6 +144,7 @@ class Horde_Mime_Viewer_Html extends Horde_Mime_Viewer_Base 'charset' => $charset ), array( + 'charset' => $charset, 'noprefetch' => !empty($options['noprefetch']), 'return_dom' => true, 'strip_styles' => (!empty($options['inline']) || $strip_style_attributes), @@ -158,9 +159,9 @@ class Horde_Mime_Viewer_Html extends Horde_Mime_Viewer_Base ); $this->_phishWarn = false; - $this->_node($data, $data); + $this->_node($data->dom, $data->dom); - return Horde_String::convertCharset($data->saveHTML(), $data->encoding, $charset); + return $data->returnHtml(); } /** diff --git a/framework/Support/lib/Horde/Support/Domhtml.php b/framework/Support/lib/Horde/Support/Domhtml.php new file mode 100644 index 000000000..61b9127ef --- /dev/null +++ b/framework/Support/lib/Horde/Support/Domhtml.php @@ -0,0 +1,86 @@ + + * @category Horde + * @package Support + * @copyright 2010 The Horde Project (http://www.horde.org/) + * @license http://opensource.org/licenses/bsd-license.php + */ +class Horde_Support_Domhtml +{ + /** + * DOM object. + * + * @var DOMDocument + */ + public $dom; + + /** + * Charset/encoding used in object. + * + * @var string + */ + public $encoding; + + /** + * Original charset of data. + * + * @var string + */ + protected $_origCharset; + + /** + * @param string $text + * @param string $charset + * + * @throws Exception + */ + public function __construct($text, $charset = null) + { + if (!extension_loaded('dom')) { + throw new Exception('DOM extension is not available.'); + } + + $this->_origCharset = $charset; + + $old_error = libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadHTML($text); + $this->encoding = $doc->encoding; + + if (!is_null($charset)) { + if (!$doc->encoding) { + $doc->loadHTML(Horde_String::convertCharset($text, $charset, 'ISO-8859-1')); + $this->encoding = null; + } elseif ($doc->encoding != $charset) { + /* If libxml can't auto-detect encoding, convert to what it + * *thinks* the encoding should be. */ + $doc->loadHTML(Horde_String::convertCharset($text, $charset, $doc->encoding)); + } + } + + if ($old_error) { + libxml_use_internal_errors(false); + } + + $this->dom = $doc; + } + + /** + * @return string + */ + public function returnHtml() + { + return Horde_String::convertCharset($this->dom->saveHTML(), $this->encoding, $this->_origCharset); + } + +} diff --git a/framework/Support/package.xml b/framework/Support/package.xml index 2e655ffb7..2e9fc1eb4 100644 --- a/framework/Support/package.xml +++ b/framework/Support/package.xml @@ -21,7 +21,8 @@ beta BSD - * Add Horde_Support_Randomid::. + * Add Horde_Support_Domhtml::. + * Add Horde_Support_Randomid::. * Add Portuguese numerizer. @@ -40,6 +41,7 @@ + @@ -90,6 +92,11 @@ pear.horde.org + + + dom + + @@ -97,6 +104,7 @@ + diff --git a/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php b/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php index b89ab29d0..cbd5362d8 100644 --- a/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php +++ b/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php @@ -106,21 +106,10 @@ class Horde_Text_Filter_Html2text extends Horde_Text_Filter_Base */ public function postProcess($text) { - if (extension_loaded('dom')) { - $old_error = libxml_use_internal_errors(true); - $doc = new DOMDocument(); - $doc->loadHTML($text); - if (!$doc->encoding) { - /* If libxml can't auto-detect encoding, convert to ISO-8859-1 - * manually. */ - $doc->loadHTML(Horde_String::convertCharset($text, $this->_params['charset'], 'ISO-8859-1')); - } - if ($old_error) { - libxml_use_internal_errors(false); - } - - $text = Horde_String::convertCharset($this->_node($doc, $doc), $doc->encoding, $this->_params['charset']); - } else { + try { + $dom = new Horde_Support_Domhtml($text, $this->_params['charset']); + $text = Horde_String::convertCharset($this->_node($dom->dom, $dom->dom), $dom->encoding, $this->_params['charset']); + } catch (Exception $e) { $text = strip_tags(preg_replace("/\/i", "\n", $text)); } diff --git a/framework/Text_Filter/lib/Horde/Text/Filter/Xss.php b/framework/Text_Filter/lib/Horde/Text/Filter/Xss.php index 456971817..d24e42f9f 100644 --- a/framework/Text_Filter/lib/Horde/Text/Filter/Xss.php +++ b/framework/Text_Filter/lib/Horde/Text/Filter/Xss.php @@ -15,8 +15,8 @@ * the document. * DEFAULT: false (returns the contents contained inside * the BODY tag) - * 'return_dom' - (boolean) If true, return a DOMDocument object instead of - * HTML text (overrides return_document). + * 'return_dom' - (boolean) If true, return a Horde_Support_Domhtml object + * instead of HTML text (overrides return_document). * DEFAULT: false * 'strip_styles' - (boolean) Strip style tags? * DEFAULT: true @@ -94,59 +94,52 @@ class Horde_Text_Filter_Xss extends Horde_Text_Filter_Base * * @param string $text The text after the filtering. * - * @return string|DOMDocument The modified text or a DOMDocument object - * if the 'return_dom' parameter is set. + * @return string|Horde_Support_Domhtml The modified text or a Domhtml + * object if the 'return_dom' + * parameter is set. */ public function postProcess($text) { - if (!extension_loaded('dom')) { + try { + $dom = new Horde_Support_Domhtml($text, $this->_params['charset']); + } catch (Exception $e) { return $text; } - $old_error = libxml_use_internal_errors(true); - $doc = new DOMDocument(); - $doc->loadHTML($text); - if (!$doc->encoding) { - /* If libxml can't auto-detect encoding, convert to ISO-8859-1 - * manually. */ - $doc->loadHTML(Horde_String::convertCharset($text, $this->_params['charset'], 'ISO-8859-1')); - } - if ($old_error) { - libxml_use_internal_errors(false); - } - - $this->_node($doc, $doc); + $this->_node($dom->dom, $dom->dom); if (!$this->_params['return_document']) { - $body = $doc->getElementsByTagName('body')->item(0); + $body = $dom->dom->getElementsByTagName('body')->item(0); } if ($this->_params['noprefetch']) { - $meta = $doc->createElement('meta'); + $meta = $dom->dom->createElement('meta'); $meta->setAttribute('http-equiv', 'x-dns-prefetch-control'); $meta->setAttribute('value-equiv', 'off'); if ($this->_params['return_document']) { - $doc->getElementsByTagName('head')->item(0)->appendChild($meta); + $dom->dom->getElementsByTagName('head')->item(0)->appendChild($meta); } elseif ($body) { $body->appendChild($meta); } } if ($this->_params['return_dom']) { - return $doc; + return $dom; } - $text = ''; if ($this->_params['return_document']) { - $text = $doc->saveHTML(); - } elseif ($body && $body->hasChildNodes()) { + return $dom->returnHtml(); + } + + $text = ''; + if ($body && $body->hasChildNodes()) { foreach ($body->childNodes as $child) { - $text .= $doc->saveXML($child); + $text .= $dom->dom->saveXML($child); } } - return Horde_String::convertCharset($text, $doc->encoding, $this->_params['charset']); + return Horde_String::convertCharset($text, $dom->encoding, $this->_params['charset']); } /** diff --git a/framework/Text_Filter/package.xml b/framework/Text_Filter/package.xml index a5d26df71..688c546ec 100644 --- a/framework/Text_Filter/package.xml +++ b/framework/Text_Filter/package.xml @@ -113,6 +113,10 @@ pear.horde.org + Support + pear.horde.org + + Util pear.horde.org diff --git a/imp/lib/Compose.php b/imp/lib/Compose.php index b83e97b85..fe0b2d348 100644 --- a/imp/lib/Compose.php +++ b/imp/lib/Compose.php @@ -2480,6 +2480,7 @@ class IMP_Compose $type = $part->getType(); $part_charset = $part->getCharset(); $charset = $GLOBALS['registry']->getCharset(); + $msg = Horde_String::convertCharset($part->getContents(), $part_charset, $charset); /* Enforce reply limits. */ @@ -2492,9 +2493,9 @@ class IMP_Compose } if ($mode == 'html') { - $msg = $GLOBALS['injector']->getInstance('Horde_Text_Filter')->filter($msg, array('cleanhtml', 'xss'), array(array('body_only' => true), array('charset' => $charset, 'strip_styles' => true, 'strip_style_attributes' => false))); + $msg = $GLOBALS['injector']->getInstance('Horde_Text_Filter')->filter($msg, array('Cleanhtml', 'Xss'), array(array('body_only' => true), array('charset' => $charset, 'strip_styles' => true, 'strip_style_attributes' => false))); } elseif ($type == 'text/html') { - $msg = $GLOBALS['injector']->getInstance('Horde_Text_Filter')->filter($msg, 'html2text'); + $msg = $GLOBALS['injector']->getInstance('Horde_Text_Filter')->filter($msg, 'Html2text', array('charset' => $charset)); $type = 'text/plain'; } diff --git a/imp/lib/tests/mime_viewer_html.phpt b/imp/lib/tests/mime_viewer_html.phpt index ce660f771..2a7a1f861 100644 --- a/imp/lib/tests/mime_viewer_html.phpt +++ b/imp/lib/tests/mime_viewer_html.phpt @@ -9,9 +9,6 @@ Horde_Registry::appInit('imp', array( 'cli' => true )); -// Suppress DOM parsing errors. -libxml_use_internal_errors(true); - require_once dirname(__FILE__) . '/../Mime/Viewer/Html.php'; class IMP_Html_Viewer_Test extends IMP_Horde_Mime_Viewer_Html { @@ -25,10 +22,10 @@ class IMP_Html_Viewer_Test extends IMP_Horde_Mime_Viewer_Html 'target' => '_blank' ); - $doc = DOMDocument::loadHTML($html); - $this->_node($doc, $doc); + $dom = new Horde_Support_Domhtml($html); + $this->_node($dom->dom, $dom->dom); - return $doc->saveXML($doc->getElementsByTagName('body')->item(0)->firstChild) . "\n"; + return $dom->dom->saveXML($dom->dom->getElementsByTagName('body')->item(0)->firstChild) . "\n"; } protected function _node($doc, $node) diff --git a/imp/view.php b/imp/view.php index 4ef84546e..6557a8b37 100644 --- a/imp/view.php +++ b/imp/view.php @@ -291,22 +291,12 @@ case 'print_attach': if ($browser->isBrowser('mozilla')) { $pstring = Horde_Mime::decodeParam('content-type', $render[$key]['type']); - $old_error = libxml_use_internal_errors(true); - $doc = new DOMDocument(); - $doc->loadHTML($render[$key]['data']); - if (!$doc->encoding) { - /* If libxml can't auto-detect encoding, convert to - * ISO-8859-1 manually. */ - $doc->loadHTML(Horde_String::convertCharset($render[$key]['data'], $pstring['params']['charset'], 'ISO-8859-1')); - } - if (!$old_error) { - libxml_use_internal_errors(false); - } + $doc = new Horde_Support_Domhtml($render[$key]['data'], $pstring['params']['charset']); - $bodyelt = $doc->getElementsByTagName('body')->item(0); - $bodyelt->insertBefore($doc->importNode($div, true), $bodyelt->firstChild); + $bodyelt = $doc->dom->getElementsByTagName('body')->item(0); + $bodyelt->insertBefore($doc->dom->importNode($div, true), $bodyelt->firstChild); - echo Horde_String::convertCharset($doc->saveHTML(), $doc->encoding, $pstring['params']['charset']); + echo $doc->returnHtml(); } else { echo $render[$key]['data']; }