Bug #9187: Use same DOM loading technique that we use for XSS filter
authorMichael M Slusarz <slusarz@curecanti.org>
Thu, 19 Aug 2010 17:07:14 +0000 (11:07 -0600)
committerMichael M Slusarz <slusarz@curecanti.org>
Thu, 19 Aug 2010 17:07:14 +0000 (11:07 -0600)
framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php

index 71751fe..9212d96 100644 (file)
@@ -102,11 +102,14 @@ class Horde_Text_Filter_Html2text extends Horde_Text_Filter_Base
     public function postProcess($text)
     {
         if (extension_loaded('dom')) {
-            $text = Horde_String::convertCharset($text, $this->_params['charset'], 'UTF-8');
-
             $old_error = libxml_use_internal_errors(true);
             $doc = new DOMDocument();
-            $doc->loadHTML('<?xml encoding="UTF-8">' . $text);
+            $doc->loadHTML($text);
+            if (!$doc->encoding) {
+                /* If libxml can't auto-detect encoding, convert to ISO-8859-1
+                 * manually. */
+                $doc->loadHTML(Horde_String::convertCharset($text, $this->_params['charset'], 'ISO-8859-1'));
+            }
             if ($old_error) {
                 libxml_use_internal_errors(false);
             }