Fix bad encoding when parsing HTML data
authorMichael M Slusarz <slusarz@curecanti.org>
Fri, 6 Aug 2010 18:54:50 +0000 (12:54 -0600)
committerMichael M Slusarz <slusarz@curecanti.org>
Fri, 6 Aug 2010 19:03:11 +0000 (13:03 -0600)
framework/Text_Filter/lib/Horde/Text/Filter/Xss.php

index 0dc9d3c..4569718 100644 (file)
@@ -106,6 +106,11 @@ class Horde_Text_Filter_Xss extends Horde_Text_Filter_Base
         $old_error = libxml_use_internal_errors(true);
         $doc = new DOMDocument();
         $doc->loadHTML($text);
+        if (!$doc->encoding) {
+            /* If libxml can't auto-detect encoding, convert to ISO-8859-1
+             * manually. */
+            $doc->loadHTML(Horde_String::convertCharset($text, $this->_params['charset'], 'ISO-8859-1'));
+        }
         if ($old_error) {
             libxml_use_internal_errors(false);
         }