From: Michael M Slusarz Date: Mon, 6 Jul 2009 20:11:01 +0000 (-0600) Subject: Add Horde_Text_Filter_Cleanhtml::. X-Git-Url: https://git.internetallee.de/?a=commitdiff_plain;h=d45ddbaa5456e129109feba0c554ce7a109479a0;p=horde.git Add Horde_Text_Filter_Cleanhtml::. --- diff --git a/framework/Mime/lib/Horde/Mime/Viewer/Html.php b/framework/Mime/lib/Horde/Mime/Viewer/Html.php index bbe1e0eec..a2b66dc96 100644 --- a/framework/Mime/lib/Horde/Mime/Viewer/Html.php +++ b/framework/Mime/lib/Horde/Mime/Viewer/Html.php @@ -112,10 +112,16 @@ class Horde_Mime_Viewer_html extends Horde_Mime_Viewer_Driver $browser->getMajor() == 4) || $browser->isBrowser('msie')); $strip_styles = $inline || $strip_style_attributes; - $data = Horde_Text_Filter::filter($data, 'xss', array( - 'body_only' => $inline, - 'strip_styles' => $strip_styles, - 'strip_style_attributes' => $strip_style_attributes + + $data = Horde_Text_Filter::filter($data, array('cleanhtml', 'xss'), array( + array( + 'charset' => $this->_mimepart->getCharset() + ), + array( + 'body_only' => $inline, + 'strip_styles' => $strip_styles, + 'strip_style_attributes' => $strip_style_attributes + ) )); /* Check for phishing exploits. */ diff --git a/framework/Text_Filter/lib/Horde/Text/Filter/Cleanhtml.php b/framework/Text_Filter/lib/Horde/Text/Filter/Cleanhtml.php new file mode 100644 index 000000000..838d199d8 --- /dev/null +++ b/framework/Text_Filter/lib/Horde/Text/Filter/Cleanhtml.php @@ -0,0 +1,76 @@ + + * body_only - (boolean) Only return the body data? + * DEFAULT: Return the whole HTML document + * charset - (string) The charset of the text. + * DEFAULT: US-ASCII + * size - (integer) Only filter if data is below this size. + * DEFAULT: No default + * + * + * Copyright 2009 The Horde Project (http://www.horde.org/) + * + * See the enclosed file COPYING for license information (LGPL). If you + * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html. + * + * @author Michael Slusarz + * @package Horde_Text + */ +class Horde_Text_Filter_Cleanhtml extends Horde_Text_Filter +{ + /** + * Filter parameters. + * + * @var array + */ + protected $_params = array( + 'body_only' => false, + 'charset' => 'us-ascii', + 'size' => false + ); + + /** + * Executes any code necessary after applying the filter patterns. + * + * @param string $text The text after the filtering. + * + * @return string The modified text. + */ + public function postProcess($text) + { + if (!Horde_Util::extensionExists('tidy') || + (($this->_params['size'] !== false) && + (strlen($text) > $this->_params['size']))) { + return $text; + } + + $tidy_config = array( + 'enclose-block-text' => true, + 'hide-comments' => true, + 'indent' => true, + 'indent-spaces' => 4, + 'numeric-entities' => true, + 'output-xhtml' => true, + 'show-body-only' => !empty($this->_params['body_only']), + 'tab-size' => 4, + 'wrap' => 0 + ); + + if (strtolower($this->_params['charset']) == 'us-ascii') { + $tidy = tidy_parse_string($text, $tidy_config, 'ascii'); + $tidy->cleanRepair(); + $text = tidy_get_output($tidy); + } else { + $tidy = tidy_parse_string(Horde_String::convertCharset($text, $this->_params['charset'], 'UTF-8'), $tidy_config, 'utf8'); + $tidy->cleanRepair(); + $text = Horde_String::convertCharset(tidy_get_output($tidy), 'UTF-8', $this->_params['charset']); + } + + return $text; + } + +} diff --git a/framework/Text_Filter/package.xml b/framework/Text_Filter/package.xml index bee5156b6..57ecc5015 100644 --- a/framework/Text_Filter/package.xml +++ b/framework/Text_Filter/package.xml @@ -37,7 +37,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> beta LGPL - * Initial Horde 4 package. + * Add support for using the tidy extension when filtering HTML data. + * Initial Horde 4 package. @@ -47,6 +48,7 @@ http://pear.php.net/dtd/package-2.0.xsd"> + @@ -199,6 +201,7 @@ http://pear.php.net/dtd/package-2.0.xsd"> +