Convert XSS filter to use DOM/XML parsing.
authorMichael M Slusarz <slusarz@curecanti.org>
Thu, 22 Jul 2010 00:08:22 +0000 (18:08 -0600)
committerMichael M Slusarz <slusarz@curecanti.org>
Thu, 22 Jul 2010 19:25:35 +0000 (13:25 -0600)
This frees us from having to worry about malformed HTML and
instead directly filter the HTML input by removing specific tags
and/or attributes.

framework/Mime/lib/Horde/Mime/Viewer/Html.php
framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php
framework/Text_Filter/lib/Horde/Text/Filter/Xss.php
framework/Text_Filter/package.xml
framework/Text_Filter/test/Horde/Text/Filter/xss.phpt
imp/lib/Compose.php

index 4509ef1..00662fd 100644 (file)
@@ -142,8 +142,8 @@ class Horde_Mime_Viewer_Html extends Horde_Mime_Viewer_Driver
                 'charset' => isset($options['charset']) ? $options['charset'] : $this->_mimepart->getCharset()
             ),
             array(
-                'body_only' => !empty($options['inline']),
                 'noprefetch' => !empty($options['noprefetch']),
+                'return_document' => empty($options['inline']),
                 'strip_styles' => $strip_styles,
                 'strip_style_attributes' => $strip_style_attributes
             )
index f3591eb..29c0865 100644 (file)
@@ -6,6 +6,8 @@
  * <pre>
  * charset - (string) The charset to use for html_entity_decode() calls.
  * width - (integer) The wrapping width. Set to 0 to not wrap.
+ * charset - (string) The charset of the text.
+ * width - (integer) The wrapping width.
  * </pre>
  *
  * Copyright 2004-2010 The Horde Project (http://www.horde.org/)
index a37e5ba..0e48997 100644 (file)
@@ -1,25 +1,23 @@
 <?php
 /**
  * This filter attempts to make HTML safe for viewing. IT IS NOT PERFECT. If
- * you enable HTML viewing, you are opening a security hole. With the current
- * state of the web, I believe that the best we can do is to make sure that
- * people *KNOW* HTML is a security hole, clean up what we can, and leave it
- * at that.
+ * you enable HTML viewing, you are opening a security hole.
  *
  * Filter parameters:
  * ------------------
  * <pre>
- * 'body_only' - (boolean) Only scan within the HTML body tags?
- *               DEFAULT: true
+ * 'charset' - (string) The charset of the text.
+ *             DEFAULT: UTF-8
  * 'noprefetch' - (boolean) Disable DNS pre-fetching? See:
  *                https://developer.mozilla.org/En/Controlling_DNS_prefetching
  *                DEFAULT: false
- * 'replace' - (string) The string to replace filtered tags with.
- *             DEFAULT: 'XSSCleaned'
+ * 'return_document' - (string) If true, returns a full HTML representation of
+ *                     the document.
+ *                     DEFAULT: false (returns the contents contained inside
+ *                              the BODY tag)
  * 'strip_styles' - (boolean) Strip style tags?
  *                  DEFAULT: true
- * 'strip_style_attributes' - (boolean) Strip style attributes in all HTML
- *                            tags?
+ * 'strip_style_attributes' - (boolean) Strip style attributes in all tags?
  *                            DEFAULT: true
  * </pre>
  *
@@ -29,6 +27,7 @@
  * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
  *
  * @author   Jan Schneider <jan@horde.org>
+ * @author   Michael Slusarz <slusarz@horde.org>
  * @category Horde
  * @license  http://www.fsf.org/copyleft/lgpl.html LGPL
  * @package  Text_Filter
@@ -41,237 +40,47 @@ class Horde_Text_Filter_Xss extends Horde_Text_Filter_Base
      * @var array
      */
     protected $_params = array(
-        'body_only' => true,
+        'charset' => 'UTF-8',
         'noprefetch' => false,
-        'replace' => 'XSSCleaned',
+        'return_document' => false,
         'strip_styles' => true,
         'strip_style_attributes' => true
     );
 
     /**
-     * Stored CDATA information.
-     *
-     * @var string
-     */
-    protected $_cdata = array();
-
-    /**
-     * CDATA count.
-     *
-     * @var integer
-     */
-    protected $_cdatacount = 0;
-
-    /**
      * Returns a hash with replace patterns.
      *
      * @return array  Patterns hash.
      */
     public function getPatterns()
     {
-        $patterns = array();
-
-        /* Remove all control characters. */
-        $patterns['/[\x00-\x08\x0e-\x1f]/'] = '';
-
-        /* Removes HTML comments (including some scripts & styles). */
-        if ($this->_params['strip_styles']) {
-            $patterns['/<!--.*?-->/s'] = '';
-        }
-
-        /* Change space entities to space characters. */
-        $patterns['/&#(?:x0*20|0*32);?/i'] = ' ';
-
-        /* If we have a semicolon, it is deterministically detectable and
-         * fixable, without introducing collateral damage. */
-        $patterns['/&#x?0*(?:[9A-D]|1[0-3]);/i'] = '&nbsp;';
-
-        /* Hex numbers (usually having an x prefix) are also deterministic,
-         * even if we don't have the semi. Note that some browsers will treat
-         * &#a or &#0a as a hex number even without the x prefix; hence /x?/
-         * which will cover those cases in this rule. */
-        $patterns['/&#x?0*[9A-D]([^0-9A-F]|$)/i'] = '&nbsp\\1';
-
-        /* Decimal numbers without trailing semicolons. The problem is that
-         * some browsers will interpret &#10a as "\na", some as "&#x10a" so we
-         * have to clean the &#10 to be safe for the "\na" case at the expense
-         * of mangling a valid entity in other cases. (Solution for valid HTML
-         * authors: always use the semicolon.) */
-        $patterns['/&#0*(?:9|1[0-3])([^0-9]|$)/i'] = '&nbsp\\1';
-
-        /* Remove overly long numeric entities. */
-        $patterns['/&#x?0*[0-9A-F]{6,};?/i'] = '&nbsp;';
-
-        /* Remove everything outside of and including the <html> and <body>
-         * tags. */
-        if ($this->_params['body_only']) {
-            $patterns['/^.*<(?:body|html)[^>]*>/si'] = '';
-            $patterns['/<\/(?:body|html)>.*$/si'] = '';
-        }
-
-        /* Get all attribute="javascript:foo()" tags. This is essentially the
-         * regex /(=|url\()("?)[^>]*script:/ but expanded to catch camouflage
-         * with spaces and entities. */
-        $preg = '/((=|&#0*61;?|&#x0*3D;?)|' .
-                '((u|&#0*85;?|&#x0*55;?|&#0*117;?|&#x0*75;?|\\\\0*75)\s*' .
-                '(r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?|\\\\0*72)\s*' .
-                '(l|&#0*76;?|&#x0*4c;?|&#0*108;?|&#x0*6c;?|\\\\0*6c)\s*' .
-                '(\(|\\\\0*28)))\s*' .
-                '(\'|&#0*34;?|&#x0*22;?|"|&#0*39;?|&#x0*27;?)?' .
-                '[^>]*\s*' .
-                '(s|&#0*83;?|&#x0*53;?|&#0*115;?|&#x0*73;?|\\\\0*73)\s*' .
-                '(c|&#0*67;?|&#x0*43;?|&#0*99;?|&#x0*63;?|\\\\0*63)\s*' .
-                '(r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?|\\\\0*72)\s*' .
-                '(i|&#0*73;?|&#x0*49;?|&#0*105;?|&#x0*69;?|\\\\0*69)\s*' .
-                '(p|&#0*80;?|&#x0*50;?|&#0*112;?|&#x0*70;?|\\\\0*70)\s*' .
-                '(t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?|\\\\0*74)\s*' .
-                '(:|&#0*58;?|&#x0*3a;?|\\\\0*3a)/i';
-        $patterns[$preg] = '\1\8' . $this->_params['replace'];
-
-        /* Get all on<foo>="bar()". NEVER allow these. */
-        $patterns['/([\s"\'\/]+' .
-                  '(o|&#0*79;?|&#0*4f;?|&#0*111;?|&#0*6f;?)' .
-                  '(n|&#0*78;?|&#0*4e;?|&#0*110;?|&#0*6e;?)' .
-                  '\w+)[^=a-z0-9"\'>]*=/i'] = '\1' . $this->_params['replace'] . '=';
-
-        /* Remove all scripts since they might introduce garbage if they are
-         * not quoted properly. */
-        $patterns['|<script[^>]*>.*?</script>|is'] = '<' . $this->_params['replace'] . '_script />';
-
-        /* Get all tags that might cause trouble - <object>, <embed>,
-         * <applet>, etc. Meta refreshes and iframes, too. */
-        $malicious = array(
-            '/<([^>a-z]*)' .
-            '(?:s|&#0*83;?|&#x0*53;?|&#0*115;?|&#x0*73;?)\s*' .
-            '(?:c|&#0*67;?|&#x0*43;?|&#0*99;?|&#x0*63;?)\s*' .
-            '(?:r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*' .
-            '(?:i|&#0*73;?|&#x0*49;?|&#0*105;?|&#x0*69;?)\s*' .
-            '(?:p|&#0*80;?|&#x0*50;?|&#0*112;?|&#x0*70;?)\s*' .
-            '(?:t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)/i',
-
-            '/<([^>a-z]*)' .
-            '(?:e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
-            '(?:m|&#0*77;?|&#0*4d;?|&#0*109;?|&#0*6d;?)\s*' .
-            '(?:b|&#0*66;?|&#0*42;?|&#0*98;?|&#0*62;?)\s*' .
-            '(?:e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
-            '(?:d|&#0*68;?|&#0*44;?|&#0*100;?|&#0*64;?)/i',
-
-            '/<([^>a-z]*)' .
-            '(?:x|&#0*88;?|&#0*58;?|&#0*120;?|&#0*78;?)\s*' .
-            '(?:m|&#0*77;?|&#0*4d;?|&#0*109;?|&#0*6d;?)\s*' .
-            '(?:l|&#0*76;?|&#x0*4c;?|&#0*108;?|&#x0*6c;?)/i',
-
-            '/<([^>a-z]*)\?([^>a-z]*)' .
-            '(?:i|&#0*73;?|&#x0*49;?|&#0*105;?|&#x0*69;?)\s*' .
-            '(?:m|&#0*77;?|&#0*4d;?|&#0*109;?|&#0*6d;?)\s*' .
-            '(?:p|&#0*80;?|&#x0*50;?|&#0*112;?|&#x0*70;?)\s*' .
-            '(?:o|&#0*79;?|&#0*4f;?|&#0*111;?|&#0*6f;?)\s*' .
-            '(?:r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*' .
-            '(?:t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)/i',
-
-            '/<([^>a-z]*)' .
-            '(?:m|&#0*77;?|&#0*4d;?|&#0*109;?|&#0*6d;?)\s*' .
-            '(?:e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
-            '(?:t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)\s*' .
-            '(?:a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)/i',
-
-            '/<([^>a-z]*)' .
-            '(?:j|&#0*74;?|&#0*4a;?|&#0*106;?|&#0*6a;?)\s*' .
-            '(?:a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)\s*' .
-            '(?:v|&#0*86;?|&#0*56;?|&#0*118;?|&#0*76;?)\s*' .
-            '(?:a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)/i',
-
-            '/<([^>a-z]*)' .
-            '(?:o|&#0*79;?|&#0*4f;?|&#0*111;?|&#0*6f;?)\s*' .
-            '(?:b|&#0*66;?|&#0*42;?|&#0*98;?|&#0*62;?)\s*' .
-            '(?:j|&#0*74;?|&#0*4a;?|&#0*106;?|&#0*6a;?)\s*' .
-            '(?:e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
-            '(?:c|&#0*67;?|&#x0*43;?|&#0*99;?|&#x0*63;?)\s*' .
-            '(?:t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)/i',
-
-            '/<([^>a-z]*)' .
-            '(?:a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)\s*' .
-            '(?:p|&#0*80;?|&#x0*50;?|&#0*112;?|&#x0*70;?)\s*' .
-            '(?:p|&#0*80;?|&#x0*50;?|&#0*112;?|&#x0*70;?)\s*' .
-            '(?:l|&#0*76;?|&#x0*4c;?|&#0*108;?|&#x0*6c;?)\s*' .
-            '(?:e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
-            '(?:t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)/i',
-
-            '/<([^>a-z]*)' .
-            '(?:l|&#0*76;?|&#x0*4c;?|&#0*108;?|&#x0*6c;?)\s*' .
-            '(?:a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)\s*' .
-            '(?:y|&#0*89;?|&#0*59;?|&#0*121;?|&#0*79;?)\s*' .
-            '(?:e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
-            '(?:r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)/i',
-
-            '/<([^>a-z]*)' .
-            '(?:i|&#0*73;?|&#x0*49;?|&#0*105;?|&#x0*69;?)?\s*' .
-            '(?:f|&#0*70;?|&#0*46;?|&#0*102;?|&#0*66;?)\s*' .
-            '(?:r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*' .
-            '(?:a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)\s*' .
-            '(?:m|&#0*77;?|&#0*4d;?|&#0*109;?|&#0*6d;?)\s*' .
-            '(?:e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)/i');
-
-        foreach ($malicious as $pattern) {
-            $patterns[$pattern] = '<$1' . $this->_params['replace'] . '_tag';
-        }
-
-        /* Comment out style/link tags. */
-        if ($this->_params['strip_styles']) {
-            if ($this->_params['strip_style_attributes']) {
-                $patterns['/(\s+|([\'"]))style\s*=/i'] = '$2 ' . $this->_params['replace'] . '=';
-            }
-            $patterns['|<style[^>]*>(?:\s*<\!--)*|i'] = '<!--';
-            $patterns['|(?:-->\s*)*</style>|i'] = '-->';
-            $patterns['|(<link[^>]*>)|i'] = '<!-- $1 -->';
-
-            /* We primarily strip out <base> tags due to styling concerns.
-             * There is a security issue with HREF tags, but the 'javascript'
-             * search/replace code sufficiently filters these strings. */
-            $patterns['|(<base[^>]*>)|i'] = '<!-- $1 -->';
-        }
-
-        /* A few other matches. */
-        $patterns['|<([^>]*)&{.*}([^>]*)>|'] = '<\1&{;}\2>';
-        $patterns['|<([^>]*)mocha:([^>]*)>|i'] = '<\1' . $this->_params['replace'] . ':\2>';
-        $patterns['/<(([^>]*)|(style[^>]*>[^<]*))binding:((?(3)[^<]*<\/style)[^>]*)>/i'] = '<\1' . $this->_params['replace'] . ':\4>';
-
-        return array('regexp' => $patterns);
-    }
-
-    /**
-     * Executes any code necessary before applying the filter patterns.
-     *
-     * @param string $text  The text before the filtering.
-     *
-     * @return string  The modified text.
-     */
-    public function preProcess($text)
-    {
-        // As of PHP 5.2, backtrack limits have been set to an unreasonably
-        // low number. The body check will often times trigger backtrack
-        // errors so up the backtrack limit if we are doing this match.
-        if ($this->_params['body_only'] && ini_get('pcre.backtrack_limit')) {
-            ini_set('pcre.backtrack_limit', 5000000);
-        }
-
-        // Remove and store CDATA data.
-        $text = preg_replace_callback('/<!\[CDATA\[.*?\]\]>/is', array($this, '_preProcessCallback'), $text);
-
-        return $text;
-    }
-
-    /**
-     * Preg callback for preProcess().
-     *
-     * @param array $matches  The list of matches.
-     *
-     * @return string  The replacement text.
-     */
-    protected function _preProcessCallback($matches)
-    {
-        $this->_cdata[] = $matches[0];
-        return '<HORDE_CDATA' . $this->_cdatacount++ . ' />';
+        return array('regexp' => array(
+            /* Remove all control characters. */
+            '/[\x00-\x08\x0e-\x1f]/' => '',
+
+            /* Change space entities to space characters. */
+            '/&#(?:x0*20|0*32);?/i' => ' ',
+
+            /* If we have a semicolon, it is deterministically detectable and
+             * fixable, without introducing collateral damage. */
+            '/&#x?0*(?:[9A-D]|1[0-3]);/i' => '&nbsp;',
+
+            /* Hex numbers (usually having an x prefix) are also deterministic,
+             * even if we don't have the semi. Note that some browsers will
+             * treat &#a or &#0a as a hex number even without the x prefix;
+             * hence /x?/ which will cover those cases in this rule. */
+            '/&#x?0*[9A-D]([^0-9A-F]|$)/i' => '&nbsp\\1',
+
+            /* Decimal numbers without trailing semicolons. The problem is
+             * that some browsers will interpret &#10a as "\na", some as
+             * "&#x10a" so we have to clean the &#10 to be safe for the "\na"
+             * case at the expense of mangling a valid entity in other cases.
+             * (Solution for valid HTML authors: always use the semicolon.) */
+            '/&#0*(?:9|1[0-3])([^0-9]|$)/i' => '&nbsp\\1',
+
+            /* Remove overly long numeric entities. */
+            '/&#x?0*[0-9A-F]{6,};?/i' => '&nbsp;'
+        ));
     }
 
     /**
@@ -283,53 +92,139 @@ class Horde_Text_Filter_Xss extends Horde_Text_Filter_Base
      */
     public function postProcess($text)
     {
-        /* Strip out data URLs living in an A HREF element (Bug #8715).
-         * Done here because we need to match more than 1 possible data
-         * entry per tag. */
-        $data_from = '/<((?:a|&#0*65;?|&#0*41;?|&#0*97;?|&#0*61;?)\b[^>]+?)' .
-            '(?:h|&#0*72;?|&#0*48;?|&#0*104;?|&#0*68;?)\s*' .
-            '(?:r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*' .
-            '(?:e|&#0*69;?|&#0*45;?|&#0*101;?|&#0*65;?)\s*' .
-            '(?:f|&#0*70;?|&#0*46;?|&#0*102;?|&#0*66;?)\s*=' .
-            '("|\')?\s*data:(?(2)[^"\')>]*|[^\s)>]*)(?(2)\\2)/is';
-        $data_to = '<$1';
-        do {
-            $text = preg_replace($data_from, $data_to, $text, -1, $count);
-        } while ($count);
+        if (!extension_loaded('dom')) {
+            return $text;
+        }
+
+        $old_error = libxml_use_internal_errors(true);
+        $doc = new DOMDocument();
+        $doc->loadHTML($text);
+        if ($old_error) {
+            libxml_use_internal_errors(false);
+        }
 
-        ini_restore('pcre.backtrack_limit');
+        $this->_node($doc, $doc);
 
-        // Restore CDATA data
-        if ($this->_cdatacount) {
-            $text = preg_replace_callback('/<HORDE_CDATA(\d+) \/>/', array($this, '_postProcessCallback'), $text);
-            $this->_cdata = array();
-            $this->_cdatacount = 0;
+        if (!$this->_params['return_document']) {
+            $body = $doc->getElementsByTagName('body')->item(0);
         }
 
         if ($this->_params['noprefetch']) {
-            if (preg_match('/<head[^>]*>/si', $text, $matches, PREG_OFFSET_CAPTURE)) {
-                $end = $matches[0][1] + strlen($matches[0][0]);
-                $text = substr($text, 0, $end) .
-                    '<meta http-equiv="x-dns-prefetch-control" content="off" />' .
-                    substr($text, $end);
-            } else {
-                $text = '<meta http-equiv="x-dns-prefetch-control" content="off" />' . $text;
+            $meta = $doc->createElement('meta');
+            $meta->setAttribute('http-equiv', 'x-dns-prefetch-control');
+            $meta->setAttribute('value-equiv', 'off');
+
+            if ($this->_params['return_document']) {
+                $doc->getElementsByTagName('head')->item(0)->appendChild($meta);
+            } elseif ($body) {
+                $body->appendChild($meta);
+            }
+        }
+
+        $text = '';
+        if ($this->_params['return_document']) {
+            $text = $doc->saveHTML();
+        } elseif ($body && $body->hasChildNodes()) {
+            foreach ($body->childNodes as $child) {
+                $text .= $doc->saveXML($child);
             }
         }
 
-        return $text;
+        return Horde_String::convertCharset($text, $doc->encoding, $this->_params['charset']);
     }
 
     /**
-     * Preg callback for preProcess().
+     * Process DOM node.
      *
-     * @param array $matches  The list of matches.
+     * @param DOMDocument $doc  Document node.
+     * @param DOMElement $node  Element node.
      *
-     * @return string  The replacement text.
+     * @return string  The plaintext representation.
      */
-    protected function _postProcessCallback($matches)
+    protected function _node($doc, $node)
     {
-        return $this->_cdata[$matches[1]];
+        if ($node->hasChildNodes()) {
+            foreach ($node->childNodes as $child) {
+                if ($child instanceof DOMElement) {
+                    switch (strtolower($child->tagName)) {
+                    case 'a':
+                        /* Strip out data URLs living in an A HREF element
+                         * (Bug #8715). */
+                        if ($child->hasAttribute('href') &&
+                            preg_match("/\s*data:/i", $child->getAttribute('href'))) {
+                            $child->removeAttribute('href');
+                        }
+                        break;
+
+                    case 'applet':
+                    case 'embed':
+                    case 'iframe':
+                    case 'import':
+                    case 'java':
+                    case 'layer':
+                    case 'meta':
+                    case 'object':
+                    case 'script':
+                    case 'xml':
+                        /* Remove all tags that might cause trouble. */
+                        $node->removeChild($child);
+                        continue 2;
+
+                    case 'base':
+                    case 'link':
+                    case 'style':
+                        /* We primarily strip out <base> tags due to styling
+                         * concerns. There is a security issue with HREF tags,
+                         * but the 'javascript' search/replace code
+                         * sufficiently filters these strings. */
+                        if ($this->_params['strip_styles']) {
+                            $node->removeChild($child);
+                            continue 2;
+                        }
+                        break;
+
+                    case 'set':
+                        /* I believe this attack only works on old browsers.
+                         * But makes no sense allowing HTML to try to set
+                         * innerHTML anyway. */
+                        if ($child->hasAttribute('attributename') &&
+                            (strcasecmp($child->getAttribute('attributename'), 'innerHTML') === 0)) {
+                            $node->removeChild($child);
+                            continue 2;
+                        }
+                    }
+
+                    $remove = $this->_params['strip_style_attributes']
+                        ? array('style')
+                        : array();
+
+                    foreach ($child->attributes as $val) {
+                        /* Never allow on<foo>="bar()",
+                         * attribute="[mocha|*script]:foo()", or
+                         * attribute="&{...}". */
+                        if ((stripos(ltrim($val->name), 'on') === 0) ||
+                            preg_match("/^\s*(?:mocha:|[^:]+script:|&{)/i", $val->value)) {
+                            $remove[] = $val->name;
+                        }
+                    }
+
+                    foreach ($remove as $val) {
+                        $child->removeAttribute($val);
+                    }
+
+                    //$patterns['/<(([^>]*)|(style[^>]*>[^<]*))binding:((?(3)[^<]*<\/style)[^>]*)>/i'] = '<\1' . $this->_params['replace'] . ':\4>';
+                } elseif ($child instanceof DOMComment) {
+                    /* Remove HTML comments (including some scripts &
+                     * styles). */
+                    if ($this->_params['strip_styles']) {
+                        $node->removeChild($child);
+                        continue;
+                    }
+                }
+
+                $this->_node($doc, $child);
+            }
+        }
     }
 
 }
index 5bfbf5e..9b57d63 100644 (file)
@@ -37,7 +37,8 @@ http://pear.php.net/dtd/package-2.0.xsd">
   <api>beta</api>
  </stability>
  <license uri="http://www.gnu.org/copyleft/lesser.html">LGPL</license>
- <notes>* Remove Horde/Core dependency.
+ <notes>* XSS filter now uses PHP DOM parser to process incoming text.
+ * Remove Horde/Core dependency.
  * Add Horde_Text_Filter_Exception::.
  * Html2text converter now uses XML parser to generate output.
  * Add ability to define filters to use with preg_replace_callback().
index c63ec0d..e5cdb43 100644 (file)
@@ -8,240 +8,208 @@ Horde_Text_Filter_Xss tests
 require dirname(__FILE__) . '/../../../../lib/Horde/Text/Filter.php';
 require dirname(__FILE__) . '/../../../../lib/Horde/Text/Filter/Base.php';
 require dirname(__FILE__) . '/../../../../lib/Horde/Text/Filter/Xss.php';
+require dirname(__FILE__) . '/../../../../../Util/lib/Horde/String.php';
+require dirname(__FILE__) . '/../../../../../Util/lib/Horde/Util.php';
 
 foreach (glob(dirname(__FILE__) . '/fixtures/xss*.html') as $file) {
-    $data = file_get_contents($file);
-    echo basename($file) . "\n";
-    echo Horde_Text_Filter::filter($data, 'xss', array('body_only' => false));
+    echo basename($file) . "\n" .
+        Horde_Text_Filter::filter(file_get_contents($file), 'xss') .
+        "\n";
 }
 
 foreach (glob(dirname(__FILE__) . '/fixtures/style_xss*.html') as $file) {
-    $data = file_get_contents($file);
-    echo basename($file) . "\n";
-    echo Horde_Text_Filter::filter($data, 'xss', array('body_only' => false, 'strip_styles' => false));
+    echo basename($file) . "\n" .
+        Horde_Text_Filter::filter(file_get_contents($file), 'xss', array(
+            'strip_styles' => false
+        )) .
+        "\n";
 }
 
 ?>
 --EXPECT--
 xss01.html
-<XSSCleaned_script />
+
 xss02.html
-<IMG SRC="XSSCleanedalert('XSS');">
+<img/>
 xss03.html
-<IMG SRC=XSSCleanedalert('XSS')>
+<img/>
 xss04.html
-<IMG SRC=XSSCleanedalert('XSS')>
+<img/>
 xss05.html
-<IMG SRC=XSSCleanedalert(&quot;XSS&quot;)>
+<img/>
 xss06.html
-<IMG SRC=XSSCleanedalert("RSnake says, 'XSS'")`>
+<img says=""/>
 xss07.html
-<IMG """><XSSCleaned_script />">
+<img/>"&gt;
+
 xss08.html
-<IMG SRC=XSSCleanedalert(String.fromCharCode(88,83,83))>
+<img/>
 xss09.html
-<IMG SRC=XSSCleaned&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41>
+<img/>
 xss10.html
-<IMG SRC=&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;>
+<img src="&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;"/>
 xss100.html
-<img src='blank.jpg' XSSCleaned='width:expression(alert("xssed"))'>
+<img src="blank.jpg"/>
 xss11.html
-<IMG SRC=XSSCleaned&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>
+<img/>
 xss12.html
-<IMG SRC="XSSCleanedalert('XSS');">
+<img/>
 xss13.html
-<IMG SRC="XSSCleanedalert('XSS');">
+<img/>
 xss14.html
-<IMG SRC="XSSCleanedalert('XSS');">
+<img/>
 xss15.html
-<IMG SRC="XSSCleanedalert('XSS');">
+<img/>
 xss16.html
-<IMG
-SRC
-=XSSCleaned
-a
-l
-e
-r
-t
-(
-'
-X
-S
-S
-'
-)
-"
->
+<img src="j" a="" v="" s="" c="" r="" i="" p="" t="" :="" l="" e="" x=""/>
 xss17.html
-<IMG SRC=XSSCleanedalert("XSS")>
+<img/>
 xss18.html
-<XSSCleaned_script />
+
 xss19.html
-<IMG SRC="XSSCleanedalert('XSS');">
+<img src=" "/>
 xss20.html
-<XSSCleaned_script />
+
 xss21.html
-<BODY onloadXSSCleaned=alert("XSS")>
+
 xss22.html
-<XSSCleaned_script />
+
 xss23.html
-<<XSSCleaned_script />
+<p>alert("XSS");//</p>
 xss24.html
-<XSSCleaned_tag SRC=http://ha.ckers.org/xss.js?<B>
+
 xss25.html
-<XSSCleaned_tag SRC=//ha.ckers.org/.j>
+
 xss26.html
-<IMG SRC="XSSCleanedalert('XSS')"
+<img/>
 xss27.html
-<XSSCleaned_tag src=http://ha.ckers.org/scriptlet.html <
+
 xss28.html
-<XSSCleaned_script />
+
 xss29.html
-</TITLE><XSSCleaned_script />
+
 xss30.html
-<INPUT TYPE="XSSCleanedalert('XSS');">
+<input type="IMAGE"/>
 xss31.html
-<BODY BACKGROUND="XSSCleanedalert('XSS')">
+
 xss32.html
-<BODY ONLOADXSSCleaned=alert('XSS')>
+
 xss33.html
-<IMG DYNSRC="XSSCleanedalert('XSS')">
+<img/>
 xss34.html
-<IMG LOWSRC="XSSCleanedalert('XSS')">
+<img/>
 xss35.html
-<BGSOUND SRC="XSSCleanedalert('XSS');">
+<bgsound/>
 xss36.html
-<BR SIZE="&{;}">
+<br/>
 xss37.html
-<XSSCleaned_tag SRC="http://ha.ckers.org/scriptlet.html"></XSSCleaned_tag>
+
 xss38.html
-<!-- <LINK REL="XSSCleanedalert('XSS');"> -->
+
 xss39.html
-<!-- <LINK REL="stylesheet" HREF="http://ha.ckers.org/xss.css"> -->
+
 xss40.html
-<!--@import'http://ha.ckers.org/xss.css';-->
+
 xss41.html
-<XSSCleaned_tag HTTP-EQUIV="Link" Content="<http://ha.ckers.org/xss.css>; REL=stylesheet">
+
 xss42.html
-<!--BODY{-moz-XSSCleaned:url("http://ha.ckers.org/xssmoz.xml#xss")}-->
+
 xss43.html
-<XSS XSSCleaned="behavior: url(xss.htc);">
+<xss/>
 xss44.html
-<!--li {list-style-image: url("XSSCleanedalert('XSS')");}--><UL><LI>XSS
+<ul><li>XSS
+</li></ul>
 xss45.html
-<IMG SRC='XSSCleanedmsgbox("XSS")'>
+<img/>
 xss46.html
-<IMG SRC="XSSCleaned:[code]">
+<img/>
 xss47.html
-<IMG SRC="XSSCleaned[code]">
+<img/>
 xss48.html
-<XSSCleaned_tag HTTP-EQUIV="XSSCleanedalert('XSS');">
+
 xss49.html
-<XSSCleaned_tag HTTP-EQUIV="refresh" CONTENT="0;url=data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K">
+
 xss50.html
-<XSSCleaned_tag HTTP-EQUIV="XSSCleanedalert('XSS');">
+
 xss51.html
-<XSSCleaned_tag SRC=XSSCleanedalert('XSS')></XSSCleaned_tag>
+
 xss52.html
-<XSSCleaned_tagSET><XSSCleaned_tag SRC=XSSCleanedalert('XSS')></XSSCleaned_tag></XSSCleaned_tagSET>
+
 xss53.html
-<TABLE BACKGROUND="XSSCleanedalert('XSS')">
+<table/>
 xss54.html
-<TABLE><TD BACKGROUND="XSSCleanedalert('XSS')">
+<table><td/></table>
 xss55.html
-<DIV XSSCleaned="XSSCleanedalert('XSS'))">
+<div/>
 xss56.html
-<DIV XSSCleaned="XSSCleaned\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029">
+<div/>
 xss57.html
-<DIV XSSCleaned="XSSCleanedalert('XSS'))">
+<div/>
 xss58.html
-<DIV XSSCleaned="width: expression(alert('XSS'));">
+<div/>
 xss59.html
-<!--@im\port'\ja\vasc\ript:alert("XSS")';-->
+
 xss60.html
-<IMG XSSCleaned="xss:expr/*XSS*/ession(alert('XSS'))">
+<img/>
 xss61.html
-<XSS XSSCleaned="xss:expression(alert('XSS'))">
+<xss/>
 xss62.html
-exp/*<A XSSCleaned='no\xss:noxss("*//*");
-xss:&#101;x&#x2F;*XSS*//*/*/pression(alert("XSS"))'>
+<p>exp/*<a/></p>
 xss63.html
-<!--alert('XSS');-->
+
 xss64.html
-<!--.XSS{background-image:url("XSSCleanedalert('XSS')");}--><A CLASS=XSS></A>
+
 xss65.html
-<!--BODY{background:url("XSSCleanedalert('XSS')")}-->
+
 xss66.html
 
 xss67.html
-<!-- <BASE HREF="XSSCleanedalert('XSS');//"> -->
+
 xss68.html
-<XSSCleaned_tag TYPE="text/x-scriptlet" DATA="http://ha.ckers.org/scriptlet.html"></XSSCleaned_tag>
+
 xss69.html
-<XSSCleaned_tag classid=clsid:ae24fdae-03c6-11d1-8b76-0080c744f389><param name=XSSCleanedalert('XSS')></XSSCleaned_tag>
+
 xss70.html
-<XSSCleaned_tag SRC="http://ha.ckers.org/xss.swf" AllowScriptAccess="always"></XSSCleaned_tag>
+
 xss71.html
-<XSSCleaned_tag SRC=" A6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcv MjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hs aW5rIiB2ZXJzaW9uPSIxLjAiIHg9IjAiIHk9IjAiIHdpZHRoPSIxOTQiIGhlaWdodD0iMjAw IiBpZD0ieHNzIj48c2NyaXB0IHR5cGU9InRleHQvZWNtYXNjcmlwdCI+YWxlcnQoIlh TUyIpOzwvc2NyaXB0Pjwvc3ZnPg==" type="image/svg+xml" AllowScriptAccess="always"></XSSCleaned_tag>
+
 xss72.html
-<HTML xmlns:xss>
-  <XSSCleaned_tag namespace="xss" implementation="http://ha.ckers.org/xss.htc">
-  <xss:xss>XSS</xss:xss>
-</HTML>
+<xss>XSS</xss>
 xss73.html
-<XSSCleaned_tag ID=I><X><C><![CDATA[<IMG SRC="javas]]><![CDATA[cript:alert('XSS');">]]>
-</C></X></XSSCleaned_tag><SPAN DATASRC=#I DATAFLD=C DATAFORMATAS=HTML></SPAN>
+<span datasrc="#I" datafld="C" dataformatas="HTML"/>
 xss74.html
-<XSSCleaned_tag ID="xss"><I><B>&lt;IMG SRC="XSSCleanedalert('XSS')"&gt;</B></I></XSSCleaned_tag>
-<SPAN DATASRC="#xss" DATAFLD="B" DATAFORMATAS="HTML"></SPAN>
+<span datasrc="#xss" datafld="B" dataformatas="HTML"/>
 xss75.html
-<XSSCleaned_tag SRC="xsstest.xml" ID=I></XSSCleaned_tag>
-<SPAN DATASRC=#I DATAFLD=C DATAFORMATAS=HTML></SPAN>
+<span datasrc="#I" datafld="C" dataformatas="HTML"/>
 xss76.html
-<HTML><BODY>
-<?XSSCleaned_tag:namespace prefix="t" ns="urn:schemas-microsoft-com:time">
-<XSSCleaned_tag namespace="t" implementation="#default#time2">
-<t:set attributeName="innerHTML" to="XSS&lt;SCRIPT DEFER&gt;alert(&quot;XSS&quot;)&lt;/SCRIPT&gt;">
-</BODY></HTML>
+
+
 xss77.html
-<XSSCleaned_tag SRC="http://ha.ckers.org/xss.jpg"><XSSCleaned_tag>
+
 xss78.html
-<IMG SRC="XSSCleanedalert('XSS')"
+<img/>
 xss79.html
-<XSSCleaned_script />
+
 xss80.html
-<XSSCleaned_script />
+
 xss81.html
-<XSSCleaned_script />
+
 xss82.html
-<XSSCleaned_script />
+
 xss83.html
-<XSSCleaned_script />
+
 xss84.html
-<XSSCleaned_script />
+
 xss85.html
-<XSSCleaned_script />PT SRC="http://ha.ckers.org/a.js"></XSSCleaned_tag>
+<p>PT SRC="http://ha.ckers.org/a.js"&gt;</p>
 xss95.html
-<a  >Click me</a>
+<a>Click me</a>
 xss96.html
-<a >Click me</a>
+<a>Click me</a>
 xss97.html
-<body/onloadXSSCleaned=alert(/xss/)>
+
 xss98.html
-<XSSCleaned_tagset rows="15,15,15,15,15,15,15,15,15,*">
-<XSSCleaned_tag src="mailbox.php?page=1&amp;actionID=delete_messages&amp;targetMbox=&amp;newMbox=0&amp;flag=&amp;indices%5B%5D=199&amp;indices%5B%5D=200&amp;indices%5B%5D=201&amp;indices%5B%5D=202&amp;indices%5B%5D=203&amp;indices%5B%5D=204&amp;indices%5B%5D=205&amp;indices%5B%5D=206&amp;indices%5B%5D=207&amp;indices%5B%5D=208&amp;indices%5B%5D=209&amp;indices%5B%5D=210&amp;indices%5B%5D=211&amp;indices%5B%5D=212&amp;indices%5B%5D=213&amp;indices%5B%5D=214&amp;indices%5B%5D=215&amp;indices%5B%5D=216&amp;indices%5B%5D=217&amp;indices%5B%5D=218&amp;indices%5B%5D=219&amp;indices%5B%5D=220&amp;indices%5B%5D=221&amp;indices%5B%5D=222&amp;indices%5B%5D=223&amp;indices%5B%5D=224&amp;indices%5B%5D=225&amp;indices%5B%5D=226&amp;indices%5B%5D=227&amp;indices%5B%5D=228&amp;indices%5B%5D=229&amp;indices%5B%5D=230&amp;indices%5B%5D=231&amp;indices%5B%5D=232&amp;indices%5B%5D=233&amp;indices%5B%5D=234&amp;indices%5B%5D=235&amp;indices%5B%5D=236&amp;indices%5B%5D=237&amp;indices%5B%5D=238&amp;indices%5B%5D=239&amp;indices%5B%5D=240&amp;indices%5B%5D=241&amp;indices%5B%5D=242&amp;indices%5B%5D=243&amp;indices%5B%5D=244&amp;indices%5B%5D=245&amp;indices%5B%5D=246&amp;indices%5B%5D=247&amp;indices%5B%5D=248&amp;indices%5B%5D=249&amp;indices%5B%5D=250&amp;indices%5B%5D=251&amp;indices%5B%5D=252&amp;indices%5B%5D=253&amp;indices%5B%5D=254&amp;indices%5B%5D=255&amp;indices%5B%5D=256&amp;indices%5B%5D=257&amp;indices%5B%5D=258&amp;indices%5B%5D=259&amp;indices%5B%5D=260&amp;indices%5B%5D=261&amp;indices%5B%5D=262&amp;indices%5B%5D=263&amp;indices%5B%5D=264&amp;indices%5B%5D=265&amp;indices%5B%5D=266&amp;indices%5B%5D=267&amp;indices%5B%5D=268&amp;indices%5B%5D=269&amp;indices%5B%5D=270&amp;indices%5B%5D=271&amp;indices%5B%5D=272&amp;indices%5B%5D=273&amp;indices%5B%5D=274&amp;indices%5B%5D=275&amp;indices%5B%5D=276&amp;indices%5B%5D=277&amp;indices%5B%5D=278&amp;indices%5B%5D=279&amp;indices%5B%5D=280&amp;indices%5B%5D=281&amp;indices%5B%5D=282&amp;indices%5B%5D=283&amp;indices%5B%5D=284&amp;indices%5B%5D=285&amp;indices%5B%5D=286&amp;indices%5B%5D=287&amp;indices%5B%5D=288&amp;indices%5B%5D=289&amp;indices%5B%5D=290&amp;indices%5B%5D=291&amp;indices%5B%5D=292&amp;indices%5B%5D=293&amp;indices%5B%5D=294&amp;indices%5B%5D=295&amp;indices%5B%5D=296&amp;indices%5B%5D=297&amp;indices%5B%5D=298">
-<XSSCleaned_tag src="mailbox.php?page=1&amp;actionID=delete_messages&amp;targetMbox=&amp;newMbox=0&amp;flag=&amp;indices%5B%5D=299&amp;indices%5B%5D=300&amp;indices%5B%5D=301&amp;indices%5B%5D=302&amp;indices%5B%5D=303&amp;indices%5B%5D=304&amp;indices%5B%5D=305&amp;indices%5B%5D=306&amp;indices%5B%5D=307&amp;indices%5B%5D=308&amp;indices%5B%5D=309&amp;indices%5B%5D=310&amp;indices%5B%5D=311&amp;indices%5B%5D=312&amp;indices%5B%5D=313&amp;indices%5B%5D=314&amp;indices%5B%5D=315&amp;indices%5B%5D=316&amp;indices%5B%5D=317&amp;indices%5B%5D=318&amp;indices%5B%5D=319&amp;indices%5B%5D=320&amp;indices%5B%5D=321&amp;indices%5B%5D=322&amp;indices%5B%5D=323&amp;indices%5B%5D=324&amp;indices%5B%5D=325&amp;indices%5B%5D=326&amp;indices%5B%5D=327&amp;indices%5B%5D=328&amp;indices%5B%5D=329&amp;indices%5B%5D=330&amp;indices%5B%5D=331&amp;indices%5B%5D=332&amp;indices%5B%5D=333&amp;indices%5B%5D=334&amp;indices%5B%5D=335&amp;indices%5B%5D=336&amp;indices%5B%5D=337&amp;indices%5B%5D=338&amp;indices%5B%5D=339&amp;indices%5B%5D=340&amp;indices%5B%5D=341&amp;indices%5B%5D=342&amp;indices%5B%5D=343&amp;indices%5B%5D=344&amp;indices%5B%5D=345&amp;indices%5B%5D=346&amp;indices%5B%5D=347&amp;indices%5B%5D=348&amp;indices%5B%5D=349&amp;indices%5B%5D=350&amp;indices%5B%5D=351&amp;indices%5B%5D=352&amp;indices%5B%5D=353&amp;indices%5B%5D=354&amp;indices%5B%5D=355&amp;indices%5B%5D=356&amp;indices%5B%5D=357&amp;indices%5B%5D=358&amp;indices%5B%5D=359&amp;indices%5B%5D=360&amp;indices%5B%5D=361&amp;indices%5B%5D=362&amp;indices%5B%5D=363&amp;indices%5B%5D=364&amp;indices%5B%5D=365&amp;indices%5B%5D=366&amp;indices%5B%5D=367&amp;indices%5B%5D=368&amp;indices%5B%5D=369&amp;indices%5B%5D=370&amp;indices%5B%5D=371&amp;indices%5B%5D=372&amp;indices%5B%5D=373&amp;indices%5B%5D=374&amp;indices%5B%5D=375&amp;indices%5B%5D=376&amp;indices%5B%5D=377&amp;indices%5B%5D=378&amp;indices%5B%5D=379&amp;indices%5B%5D=380&amp;indices%5B%5D=381&amp;indices%5B%5D=382&amp;indices%5B%5D=383&amp;indices%5B%5D=384&amp;indices%5B%5D=385&amp;indices%5B%5D=386&amp;indices%5B%5D=387&amp;indices%5B%5D=388&amp;indices%5B%5D=389&amp;indices%5B%5D=390&amp;indices%5B%5D=391&amp;indices%5B%5D=392&amp;indices%5B%5D=393&amp;indices%5B%5D=394&amp;indices%5B%5D=395&amp;indices%5B%5D=396&amp;indices%5B%5D=397&amp;indices%5B%5D=398">
-<XSSCleaned_tag src="mailbox.php?page=1&amp;actionID=delete_messages&amp;targetMbox=&amp;newMbox=0&amp;flag=&amp;indices%5B%5D=399&amp;indices%5B%5D=400&amp;indices%5B%5D=401&amp;indices%5B%5D=402&amp;indices%5B%5D=403&amp;indices%5B%5D=404&amp;indices%5B%5D=405&amp;indices%5B%5D=406&amp;indices%5B%5D=407&amp;indices%5B%5D=408&amp;indices%5B%5D=409&amp;indices%5B%5D=410&amp;indices%5B%5D=411&amp;indices%5B%5D=412&amp;indices%5B%5D=413&amp;indices%5B%5D=414&amp;indices%5B%5D=415&amp;indices%5B%5D=416&amp;indices%5B%5D=417&amp;indices%5B%5D=418&amp;indices%5B%5D=419&amp;indices%5B%5D=420&amp;indices%5B%5D=421&amp;indices%5B%5D=422&amp;indices%5B%5D=423&amp;indices%5B%5D=424&amp;indices%5B%5D=425&amp;indices%5B%5D=426&amp;indices%5B%5D=427&amp;indices%5B%5D=428&amp;indices%5B%5D=429&amp;indices%5B%5D=430&amp;indices%5B%5D=431&amp;indices%5B%5D=432&amp;indices%5B%5D=433&amp;indices%5B%5D=434&amp;indices%5B%5D=435&amp;indices%5B%5D=436&amp;indices%5B%5D=437&amp;indices%5B%5D=438&amp;indices%5B%5D=439&amp;indices%5B%5D=440&amp;indices%5B%5D=441&amp;indices%5B%5D=442&amp;indices%5B%5D=443&amp;indices%5B%5D=444&amp;indices%5B%5D=445&amp;indices%5B%5D=446&amp;indices%5B%5D=447&amp;indices%5B%5D=448&amp;indices%5B%5D=449&amp;indices%5B%5D=450&amp;indices%5B%5D=451&amp;indices%5B%5D=452&amp;indices%5B%5D=453&amp;indices%5B%5D=454&amp;indices%5B%5D=455&amp;indices%5B%5D=456&amp;indices%5B%5D=457&amp;indices%5B%5D=458&amp;indices%5B%5D=459&amp;indices%5B%5D=460&amp;indices%5B%5D=461&amp;indices%5B%5D=462&amp;indices%5B%5D=463&amp;indices%5B%5D=464&amp;indices%5B%5D=465&amp;indices%5B%5D=466&amp;indices%5B%5D=467&amp;indices%5B%5D=468&amp;indices%5B%5D=469&amp;indices%5B%5D=470&amp;indices%5B%5D=471&amp;indices%5B%5D=472&amp;indices%5B%5D=473&amp;indices%5B%5D=474&amp;indices%5B%5D=475&amp;indices%5B%5D=476&amp;indices%5B%5D=477&amp;indices%5B%5D=478&amp;indices%5B%5D=479&amp;indices%5B%5D=480&amp;indices%5B%5D=481&amp;indices%5B%5D=482&amp;indices%5B%5D=483&amp;indices%5B%5D=484&amp;indices%5B%5D=485&amp;indices%5B%5D=486&amp;indices%5B%5D=487&amp;indices%5B%5D=488&amp;indices%5B%5D=489&amp;indices%5B%5D=490&amp;indices%5B%5D=491&amp;indices%5B%5D=492&amp;indices%5B%5D=493&amp;indices%5B%5D=494&amp;indices%5B%5D=495&amp;indices%5B%5D=496&amp;indices%5B%5D=497&amp;indices%5B%5D=498">
-<XSSCleaned_tag src="mailbox.php?page=1&amp;actionID=delete_messages&amp;targetMbox=&amp;newMbox=0&amp;flag=&amp;indices%5B%5D=499&amp;indices%5B%5D=500&amp;indices%5B%5D=501&amp;indices%5B%5D=502&amp;indices%5B%5D=503&amp;indices%5B%5D=504&amp;indices%5B%5D=505&amp;indices%5B%5D=506&amp;indices%5B%5D=507&amp;indices%5B%5D=508&amp;indices%5B%5D=509&amp;indices%5B%5D=510&amp;indices%5B%5D=511&amp;indices%5B%5D=512&amp;indices%5B%5D=513&amp;indices%5B%5D=514&amp;indices%5B%5D=515&amp;indices%5B%5D=516&amp;indices%5B%5D=517&amp;indices%5B%5D=518&amp;indices%5B%5D=519&amp;indices%5B%5D=520&amp;indices%5B%5D=521&amp;indices%5B%5D=522&amp;indices%5B%5D=523&amp;indices%5B%5D=524&amp;indices%5B%5D=525&amp;indices%5B%5D=526&amp;indices%5B%5D=527&amp;indices%5B%5D=528&amp;indices%5B%5D=529&amp;indices%5B%5D=530&amp;indices%5B%5D=531&amp;indices%5B%5D=532&amp;indices%5B%5D=533&amp;indices%5B%5D=534&amp;indices%5B%5D=535&amp;indices%5B%5D=536&amp;indices%5B%5D=537&amp;indices%5B%5D=538&amp;indices%5B%5D=539&amp;indices%5B%5D=540&amp;indices%5B%5D=541&amp;indices%5B%5D=542&amp;indices%5B%5D=543&amp;indices%5B%5D=544&amp;indices%5B%5D=545&amp;indices%5B%5D=546&amp;indices%5B%5D=547&amp;indices%5B%5D=548&amp;indices%5B%5D=549&amp;indices%5B%5D=550&amp;indices%5B%5D=551&amp;indices%5B%5D=552&amp;indices%5B%5D=553&amp;indices%5B%5D=554&amp;indices%5B%5D=555&amp;indices%5B%5D=556&amp;indices%5B%5D=557&amp;indices%5B%5D=558&amp;indices%5B%5D=559&amp;indices%5B%5D=560&amp;indices%5B%5D=561&amp;indices%5B%5D=562&amp;indices%5B%5D=563&amp;indices%5B%5D=564&amp;indices%5B%5D=565&amp;indices%5B%5D=566&amp;indices%5B%5D=567&amp;indices%5B%5D=568&amp;indices%5B%5D=569&amp;indices%5B%5D=570&amp;indices%5B%5D=571&amp;indices%5B%5D=572&amp;indices%5B%5D=573&amp;indices%5B%5D=574&amp;indices%5B%5D=575&amp;indices%5B%5D=576&amp;indices%5B%5D=577&amp;indices%5B%5D=578&amp;indices%5B%5D=579&amp;indices%5B%5D=580&amp;indices%5B%5D=581&amp;indices%5B%5D=582&amp;indices%5B%5D=583&amp;indices%5B%5D=584&amp;indices%5B%5D=585&amp;indices%5B%5D=586&amp;indices%5B%5D=587&amp;indices%5B%5D=588&amp;indices%5B%5D=589&amp;indices%5B%5D=590&amp;indices%5B%5D=591&amp;indices%5B%5D=592&amp;indices%5B%5D=593&amp;indices%5B%5D=594&amp;indices%5B%5D=595&amp;indices%5B%5D=596&amp;indices%5B%5D=597&amp;indices%5B%5D=598">
-<XSSCleaned_tag src="mailbox.php?page=1&amp;actionID=delete_messages&amp;targetMbox=&amp;newMbox=0&amp;flag=&amp;indices%5B%5D=599&amp;indices%5B%5D=600&amp;indices%5B%5D=601&amp;indices%5B%5D=602&amp;indices%5B%5D=603&amp;indices%5B%5D=604&amp;indices%5B%5D=605&amp;indices%5B%5D=606&amp;indices%5B%5D=607&amp;indices%5B%5D=608&amp;indices%5B%5D=609&amp;indices%5B%5D=610&amp;indices%5B%5D=611&amp;indices%5B%5D=612&amp;indices%5B%5D=613&amp;indices%5B%5D=614&amp;indices%5B%5D=615&amp;indices%5B%5D=616&amp;indices%5B%5D=617&amp;indices%5B%5D=618&amp;indices%5B%5D=619&amp;indices%5B%5D=620&amp;indices%5B%5D=621&amp;indices%5B%5D=622&amp;indices%5B%5D=623&amp;indices%5B%5D=624&amp;indices%5B%5D=625&amp;indices%5B%5D=626&amp;indices%5B%5D=627&amp;indices%5B%5D=628&amp;indices%5B%5D=629&amp;indices%5B%5D=630&amp;indices%5B%5D=631&amp;indices%5B%5D=632&amp;indices%5B%5D=633&amp;indices%5B%5D=634&amp;indices%5B%5D=635&amp;indices%5B%5D=636&amp;indices%5B%5D=637&amp;indices%5B%5D=638&amp;indices%5B%5D=639&amp;indices%5B%5D=640&amp;indices%5B%5D=641&amp;indices%5B%5D=642&amp;indices%5B%5D=643&amp;indices%5B%5D=644&amp;indices%5B%5D=645&amp;indices%5B%5D=646&amp;indices%5B%5D=647&amp;indices%5B%5D=648&amp;indices%5B%5D=649&amp;indices%5B%5D=650&amp;indices%5B%5D=651&amp;indices%5B%5D=652&amp;indices%5B%5D=653&amp;indices%5B%5D=654&amp;indices%5B%5D=655&amp;indices%5B%5D=656&amp;indices%5B%5D=657&amp;indices%5B%5D=658&amp;indices%5B%5D=659&amp;indices%5B%5D=660&amp;indices%5B%5D=661&amp;indices%5B%5D=662&amp;indices%5B%5D=663&amp;indices%5B%5D=664&amp;indices%5B%5D=665&amp;indices%5B%5D=666&amp;indices%5B%5D=667&amp;indices%5B%5D=668&amp;indices%5B%5D=669&amp;indices%5B%5D=670&amp;indices%5B%5D=671&amp;indices%5B%5D=672&amp;indices%5B%5D=673&amp;indices%5B%5D=674&amp;indices%5B%5D=675&amp;indices%5B%5D=676&amp;indices%5B%5D=677&amp;indices%5B%5D=678&amp;indices%5B%5D=679&amp;indices%5B%5D=680&amp;indices%5B%5D=681&amp;indices%5B%5D=682&amp;indices%5B%5D=683&amp;indices%5B%5D=684&amp;indices%5B%5D=685&amp;indices%5B%5D=686&amp;indices%5B%5D=687&amp;indices%5B%5D=688&amp;indices%5B%5D=689&amp;indices%5B%5D=690&amp;indices%5B%5D=691&amp;indices%5B%5D=692&amp;indices%5B%5D=693&amp;indices%5B%5D=694&amp;indices%5B%5D=695&amp;indices%5B%5D=696&amp;indices%5B%5D=697&amp;indices%5B%5D=698">
-<XSSCleaned_tag src="mailbox.php?page=1&amp;actionID=expunge_mailbox">
-<XSSCleaned_tag src="mailbox.php?page=1&amp;actionID=expunge_mailbox">
-<XSSCleaned_tag src="mailbox.php?page=1&amp;actionID=expunge_mailbox">
-<XSSCleaned_tag src="mailbox.php?page=1&amp;actionID=expunge_mailbox">
-<XSSCleaned_tag src="http://secunia.com/">
-</XSSCleaned_tagset>
+
 xss99.html
-<img src=""> <BODY ONLOADXSSCleaned="a();"><XSSCleaned_script /><"" />
+<img src=""/>
 style_xss01.html
-<BASE HREF="XSSCleanedalert('XSS');//">
index 31e7231..5cfc69c 100644 (file)
@@ -2462,7 +2462,7 @@ class IMP_Compose
         }
 
         if ($mode == 'html') {
-            $msg = $GLOBALS['injector']->getInstance('Horde_Text_Filter')->filter($msg, array('cleanhtml', 'xss'), array(array('body_only' => true), array('body_only' => true, 'strip_styles' => true, 'strip_style_attributes' => false)));
+            $msg = $GLOBALS['injector']->getInstance('Horde_Text_Filter')->filter($msg, array('cleanhtml', 'xss'), array(array('body_only' => true), array('strip_styles' => true, 'strip_style_attributes' => false)));
         } elseif ($type == 'text/html') {
             $msg = $GLOBALS['injector']->getInstance('Horde_Text_Filter')->filter($msg, 'html2text');
             $type = 'text/plain';