$browser->getMajor() == 4) ||
$browser->isBrowser('msie'));
$strip_styles = $inline || $strip_style_attributes;
- $data = Horde_Text_Filter::filter($data, 'xss', array(
- 'body_only' => $inline,
- 'strip_styles' => $strip_styles,
- 'strip_style_attributes' => $strip_style_attributes
+
+ $data = Horde_Text_Filter::filter($data, array('cleanhtml', 'xss'), array(
+ array(
+ 'charset' => $this->_mimepart->getCharset()
+ ),
+ array(
+ 'body_only' => $inline,
+ 'strip_styles' => $strip_styles,
+ 'strip_style_attributes' => $strip_style_attributes
+ )
));
/* Check for phishing exploits. */
--- /dev/null
+<?php
+/**
+ * This filter attempts to sanitize HTML by cleaning up malformed HTML tags.
+ *
+ * Parameters:
+ * <pre>
+ * body_only - (boolean) Only return the body data?
+ * DEFAULT: Return the whole HTML document
+ * charset - (string) The charset of the text.
+ * DEFAULT: US-ASCII
+ * size - (integer) Only filter if data is below this size.
+ * DEFAULT: No default
+ * </pre>
+ *
+ * Copyright 2009 The Horde Project (http://www.horde.org/)
+ *
+ * See the enclosed file COPYING for license information (LGPL). If you
+ * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
+ *
+ * @author Michael Slusarz <jan@horde.org>
+ * @package Horde_Text
+ */
+class Horde_Text_Filter_Cleanhtml extends Horde_Text_Filter
+{
+ /**
+ * Filter parameters.
+ *
+ * @var array
+ */
+ protected $_params = array(
+ 'body_only' => false,
+ 'charset' => 'us-ascii',
+ 'size' => false
+ );
+
+ /**
+ * Executes any code necessary after applying the filter patterns.
+ *
+ * @param string $text The text after the filtering.
+ *
+ * @return string The modified text.
+ */
+ public function postProcess($text)
+ {
+ if (!Horde_Util::extensionExists('tidy') ||
+ (($this->_params['size'] !== false) &&
+ (strlen($text) > $this->_params['size']))) {
+ return $text;
+ }
+
+ $tidy_config = array(
+ 'enclose-block-text' => true,
+ 'hide-comments' => true,
+ 'indent' => true,
+ 'indent-spaces' => 4,
+ 'numeric-entities' => true,
+ 'output-xhtml' => true,
+ 'show-body-only' => !empty($this->_params['body_only']),
+ 'tab-size' => 4,
+ 'wrap' => 0
+ );
+
+ if (strtolower($this->_params['charset']) == 'us-ascii') {
+ $tidy = tidy_parse_string($text, $tidy_config, 'ascii');
+ $tidy->cleanRepair();
+ $text = tidy_get_output($tidy);
+ } else {
+ $tidy = tidy_parse_string(Horde_String::convertCharset($text, $this->_params['charset'], 'UTF-8'), $tidy_config, 'utf8');
+ $tidy->cleanRepair();
+ $text = Horde_String::convertCharset(tidy_get_output($tidy), 'UTF-8', $this->_params['charset']);
+ }
+
+ return $text;
+ }
+
+}
<api>beta</api>
</stability>
<license uri="http://www.gnu.org/copyleft/lesser.html">LGPL</license>
- <notes>* Initial Horde 4 package.
+ <notes>* Add support for using the tidy extension when filtering HTML data.
+ * Initial Horde 4 package.
</notes>
<contents>
<dir name="/">
<dir name="Filter">
<file name="Bbcode.php" role="php" />
<file name="Cleanascii.php" role="php" />
+ <file name="Cleanhtml.php" role="php" />
<file name="Dimsignature.php" role="php" />
<file name="Emails.php" role="php" />
<file name="Emoticons.php" role="php" />
<filelist>
<install name="lib/Horde/Text/Filter/Bbcode.php" as="Horde/Text/Filter/Bbcode.php" />
<install name="lib/Horde/Text/Filter/Cleanascii.php" as="Horde/Text/Filter/Cleanascii.php" />
+ <install name="lib/Horde/Text/Filter/Cleanhtml.php" as="Horde/Text/Filter/Cleanhtml.php" />
<install name="lib/Horde/Text/Filter/Dimsignature.php" as="Horde/Text/Filter/Dimsignature.php" />
<install name="lib/Horde/Text/Filter/Emails.php" as="Horde/Text/Filter/Emails.php" />
<install name="lib/Horde/Text/Filter/Emoticons.php" as="Horde/Text/Filter/Emoticons.php" />