From: Michael M Slusarz Date: Thu, 22 Jul 2010 20:38:50 +0000 (-0600) Subject: Convert IMP HTML viewer to using XML/DOM parser. X-Git-Url: https://git.internetallee.de/?a=commitdiff_plain;h=b077e75cf250faa5be9fdda55431621709b71508;p=horde.git Convert IMP HTML viewer to using XML/DOM parser. --- diff --git a/imp/js/imp.js b/imp/js/imp.js index 4cf95d9df..eee2dd389 100644 --- a/imp/js/imp.js +++ b/imp/js/imp.js @@ -54,7 +54,7 @@ document.observe('dom:loaded', function() { callback = this.imgOnload.bind(this, iframeid); s.findElements(iframe.contentWindow.document).each(function(img) { - var src = decodeURIComponent(img.getAttribute('htmlimgblocked')); + var src = img.getAttribute('htmlimgblocked'); if (img.getAttribute('src')) { img.onload = callback; ++IMP.imgs[iframeid]; diff --git a/imp/lib/Mime/Viewer/Html.php b/imp/lib/Mime/Viewer/Html.php index ee0cae7a8..364303bdf 100644 --- a/imp/lib/Mime/Viewer/Html.php +++ b/imp/lib/Mime/Viewer/Html.php @@ -33,6 +33,13 @@ class IMP_Horde_Mime_Viewer_Html extends Horde_Mime_Viewer_Html public $newwinTarget = null; /** + * Temp array for storing data when parsing the HTML document. + * + * @var array + */ + protected $_tmp = array(); + + /** * This driver's display capabilities. * * @var array @@ -45,46 +52,6 @@ class IMP_Horde_Mime_Viewer_Html extends Horde_Mime_Viewer_Html ); /** - * The regular expression to catch any tags and attributes that load - * external images. - * - * @var string - */ - protected $_img_regex = '/ - # match 1 - ( - # tags - ]+?src= - # tags - |]+?src= - # "background" attributes - |]+?background=|]*background=|]*background= - # "style" attributes; match 2; quotes: match 3 - |(style=\s*("|\')?[^>]*?background(?:-image)?:(?(3)[^"\']|[^>])*?url\s*\() - ) - # whitespace - \s* - # opening quotes, parenthesis; match 4 - ("|\')? - # the image url; match 5 - ((?(2) - # matched a "style" attribute - (?(4)[^"\')>]*|[^\s)>]*) - # did not match a "style" attribute - |(?(4)[^"\'>]*|[^\s>]*) - )) - # closing quotes - (?(4)\\4) - # matched a "style" attribute? - (?(2) - # closing parenthesis - \s*\) - # remainder of the "style" attribute; match 6 - ((?(3)[^"\'>]*|[^\s>]*)(?(3)\\3)) - ) - /isx'; - - /** * Return the full rendered version of the Horde_Mime_Part object. * * @return array See Horde_Mime_Viewer_Driver::render(). @@ -203,6 +170,18 @@ class IMP_Horde_Mime_Viewer_Html extends Horde_Mime_Viewer_Html ); } + $data = $this->parseHtml($inline, $data)->saveHTML(); + + if ($this->_tmp['imgblock']) { + $status[] = array( + 'icon' => Horde::img('mime/image.png'), + 'text' => array( + _("Images have been blocked to protect your privacy."), + Horde::link('#', '', 'unblockImageLink') . _("Show Images?") . '' + ) + ); + } + /* Search for inlined links that we can display (multipart/related * parts). */ if (isset($this->_params['related_id'])) { @@ -221,45 +200,24 @@ class IMP_Horde_Mime_Viewer_Html extends Horde_Mime_Viewer_Html } } - /* Convert links to open in new windows. First we hide all - * mailto: links, links that have an "#xyz" anchor and ignore - * all links that already have a target. */ - $data = $this->openLinksInNewWindow($data); + $filters = array(); + if ($GLOBALS['prefs']->getValue('emoticons')) { + $filters['emoticons'] = array( + 'entities' => true + ); + } - /* If displaying inline (in IFRAME), tables with 100% height seems to - * confuse many browsers re: the iframe internal height. */ if ($inline) { - $data = preg_replace('/]*)\bheight=["\']?100\%["\']?/i', 'hasMethod('mail/compose')) { - $data = preg_replace_callback('/href\s*=\s*(["\'])?mailto:((?(1)[^\1]*?|[^\s>]+))(?(1)\1|)/i', array($this, '_mailtoCallback'), $data); + if (!empty($filters)) { + $data = $GLOBALS['injector']->getInstance('Horde_Text_Filter')->filter($data, array_keys($filters), array(array_values($filters))); } /* Filter bad language. */ $data = IMP::filterText($data); - /* Image filtering. */ - if ($inline && - $GLOBALS['prefs']->getValue('html_image_replacement') && - preg_match($this->_img_regex, $this->_mimepart->getContents()) && - !$this->_inAddressBook()) { - $data = $this->blockImages($data); - - $status[] = array( - 'icon' => Horde::img('mime/image.png'), - 'text' => array( - _("Images have been blocked to protect your privacy."), - Horde::link('#', '', 'unblockImageLink') . _("Show Images?") . '' - ) - ); - } - - if ($GLOBALS['prefs']->getValue('emoticons')) { - $data = $GLOBALS['injector']->getInstance('Horde_Text_Filter')->filter($data, array('emoticons'), array(array('entities' => true))); - } - return array( 'data' => $data, 'status' => $status, @@ -268,75 +226,31 @@ class IMP_Horde_Mime_Viewer_Html extends Horde_Mime_Viewer_Html } /** - * Scans HTML data and alters links to open in a new window. - * In public function so that it can be tested. - * - * @param string $data Data in. - * - * @return string Altered data. */ - public function openLinksInNewWindow($data) + public function parseHtml($inline, $data) { - $target = is_null($this->newwinTarget) - ? 'target_' . uniqid(mt_rand()) - : $this->newwinTarget; - - return preg_replace( - array('/]*\bhref=["\']?(#|mailto:))/i', - '/]*)\btarget=["\']?[^>"\'\s]*["\']?/i', - '/]*\bhref=["\']?(#|mailto:))/i', - '/]*)\btarget=["\']?[^>"\'\s]*["\']?/i', - '/_img_regex, array($this, '_blockImages'), $data); - } + $this->_tmp = array( + 'img' => ($inline && $GLOBALS['prefs']->getValue('html_image_replacement') && !$this->_inAddressBook()), + 'imgblock' => false, + 'inline' => $inline, + 'target' => (is_null($this->newwinTarget) ? 'target_' . uniqid(mt_rand()) : $this->newwinTarget) + ); - /** - * Called from the image-blocking regexp to construct the new image tags. - * - * @param array $matches - * - * @return string The new image tag. - */ - protected function _blockImages($matches) - { - if (is_null($this->blockimg)) { + /* Image filtering. */ + if ($this->_tmp['img'] && is_null($this->blockimg)) { $this->blockimg = Horde::url(Horde_Themes::img('spacer_red.png'), true, -1); } - return empty($matches[2]) - ? $matches[1] . '"' . $this->blockimg . '" htmlimgblocked="' . rawurlencode(str_replace('&', '&', trim($matches[5], '\'" '))) . '"' - : trim($matches[1] . "'" . $this->blockimg . '\')' . $matches[6], '\'" ') . '" htmlimgblocked="' . rawurlencode(str_replace('&', '&', trim($matches[5], '\'" '))) . '"'; + $old_error = libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadHTML($data); + if ($old_error) { + libxml_use_internal_errors(false); + } + + $this->_node($doc, $doc); + + return $doc; } /** @@ -346,6 +260,10 @@ class IMP_Horde_Mime_Viewer_Html extends Horde_Mime_Viewer_Html */ protected function _inAddressBook() { + if (empty($this->_params['contents'])) { + return false; + } + $from = Horde_Mime_Address::bareAddress($this->_params['contents']->getHeaderOb()->getValue('from')); if ($GLOBALS['prefs']->getValue('html_image_addrbook') && @@ -363,4 +281,90 @@ class IMP_Horde_Mime_Viewer_Html extends Horde_Mime_Viewer_Html return (!empty($safe_addrs) && in_array($from, $safe_addrs)); } + /** + * Process DOM node. + * + * @param DOMDocument $doc Document node. + * @param DOMElement $node Element node. + */ + protected function _node($doc, $node) + { + if ($node->hasChildNodes()) { + foreach ($node->childNodes as $child) { + if ($child instanceof DOMElement) { + switch (strtolower($child->tagName)) { + case 'a': + case 'area': + /* Convert links to open in new windows. Ignore + * mailto: links, links that have an "#xyz" anchor, + * and links that already have a target. */ + if (!$child->hasAttribute('target') && + $child->hasAttribute('href')) { + $url = parse_url($child->getAttribute('href')); + if (empty($url['fragment']) && + ($url['scheme'] != 'mailto:')) { + $child->setAttribute('target', $this->_tmp['target']); + } + } + break; + + case 'img': + case 'input': + if ($this->_tmp['img'] && $child->hasAttribute('src')) { + $child->setAttribute('htmlimgblocked', $child->getAttribute('src')); + $child->setAttribute('src', $this->blockimg); + $this->_tmp['imgblock'] = true; + } + break; + + case 'table': + /* If displaying inline (in IFRAME), tables with 100% + * height seems to confuse many browsers re: the + * iframe internal height. */ + if ($this->_tmp['inline'] && + $child->hasAttribute('height') && + ($child->getAttribute('height') == '100%')) { + $child->removeAttribute('height'); + } + + // Fall-through + + case 'body': + case 'td': + if ($this->_tmp['img'] && + $child->hasAttribute('background')) { + $child->setAttribute('htmlimgblocked', $child->getAttribute('background')); + $child->setAttribute('background', $this->blockimg); + $this->_tmp['imgblock'] = true; + } + break; + } + + if ($this->_tmp['img'] && $child->hasAttribute('style')) { + $this->_tmp['child'] = $child; + $style = preg_replace_callback('/(background(?:-image)?:[^;}]*(?:url\(["\']?))(.*?)((?:["\']?\)))/i', array($this, '_styleCallback'), $child->getAttribute('style'), -1, $matches); + if ($matches) { + $child->setAttribute('style', $style); + } + } + } + + $this->_node($doc, $child); + } + } + } + + /** + * preg_replace_callback() callback for style/background matching. + * + * @param array $matches The list of matches. + * + * @return string The replacement image string. + */ + protected function _styleCallback($matches) + { + $this->_tmp['child']->setAttribute('htmlimgblocked', $matches[2]); + return $matches[1] . $this->blockimg . $matches[3]; + } + } diff --git a/imp/lib/tests/mime_viewer_html.phpt b/imp/lib/tests/mime_viewer_html.phpt index 2ce1c7c96..81976193b 100644 --- a/imp/lib/tests/mime_viewer_html.phpt +++ b/imp/lib/tests/mime_viewer_html.phpt @@ -17,46 +17,59 @@ $v->blockimg = 'imgblock.png'; $v->newwinTarget = '_blank'; // Test regex for converting links to open in a new window. -echo $v->openLinksInNewWindow('foo') . "\n"; -echo $v->openLinksInNewWindow('example@example.com') . "\n"; -echo $v->openLinksInNewWindow('foo Anchor') . "\n"; -echo $v->openLinksInNewWindow('foo example') . "\n"; -echo $v->openLinksInNewWindow('foo example') . "\n"; -echo $v->openLinksInNewWindow('foo example') . "\n"; -echo $v->openLinksInNewWindow('foo Example Email') . "\n"; -echo $v->openLinksInNewWindow('') . "\n"; -echo $v->openLinksInNewWindow('') . "\n"; +$links = array( + 'foo', + 'example@example.com', + 'foo Anchor', + 'foo example', + 'foo example', + 'foo example', + 'foo Example Email', + '', + '' +); + +foreach ($links as $val) { + $doc = $v->parseHtml(true, $val); + echo $doc->saveXML($doc->getElementsByTagName('body')->item(0)->firstChild) . "\n"; +} + echo "\n"; // Test regex for hiding images. -echo $v->blockImages('') . "\n"; -echo $v->blockImages('') . "\n"; -echo $v->blockImages('
') . "\n"; -echo $v->blockImages("Best flight deals") . "\n"; -echo $v->blockImages('') . "\n"; -echo $v->blockImages('') . "\n"; -echo $v->blockImages('') . "\n"; -echo $v->blockImages(' -') . "\n"; +$images = array( + '', + '', + '', + "Best flight deals", + '', + '', + '', + '' +); + +foreach ($images as $val) { + $doc = $v->parseHtml(true, $val); + echo $doc->saveXML($doc->getElementsByTagName('body')->item(0)->firstChild) . "\n"; +} ?> --EXPECT-- -foo -example@example.com -foo Anchor -foo example -foo example -foo example -foo Example Email - - - - - - -Best flight deals - - - - - +

foo

+

example@example.com

+

foo Anchor

+

foo example

+

foo example

+

foo example

+

foo Example Email

+ + + + + +
+Best flight deals + + + +