From: Michael M Slusarz Date: Thu, 7 Oct 2010 21:07:59 +0000 (-0600) Subject: Partially revert "All data passed around in Horde internally should be UTF-8." X-Git-Url: https://git.internetallee.de/?a=commitdiff_plain;h=86f36efd4c3d51cbb39cac45f52cf774f3c91a04;p=horde.git Partially revert "All data passed around in Horde internally should be UTF-8." This partially reverts commit e60da622060274282dd92c2d41c4c0dce4724dd6. Horde_Text_Filter is not part of Horde the application - so there is no guarantee that text is in UTF-8. In fact, much of the usefulness of this package is it does the necessary charset conversion as needed. Example: for Horde_Mime_Part objects created from message data, the text may be in a different charset than UTF-8. This is perfectly fine - unnecessary charset conversion may just be a waste of time. This fixes incorrect character display in IMP (at least for the few messages I have tested so far). --- diff --git a/framework/Text_Filter/lib/Horde/Text/Filter/Cleanhtml.php b/framework/Text_Filter/lib/Horde/Text/Filter/Cleanhtml.php index 0f9a39388..934a70d50 100644 --- a/framework/Text_Filter/lib/Horde/Text/Filter/Cleanhtml.php +++ b/framework/Text_Filter/lib/Horde/Text/Filter/Cleanhtml.php @@ -29,6 +29,7 @@ class Horde_Text_Filter_Cleanhtml extends Horde_Text_Filter_Base */ protected $_params = array( 'body_only' => false, + 'charset' => 'us-ascii', 'size' => false ); @@ -59,10 +60,17 @@ class Horde_Text_Filter_Cleanhtml extends Horde_Text_Filter_Base 'wrap' => 0 ); - $tidy = @tidy_parse_string($text, $tidy_config, 'utf8'); - $tidy->cleanRepair(); + if (strtolower($this->_params['charset']) == 'us-ascii') { + $tidy = @tidy_parse_string($text, $tidy_config, 'ascii'); + $tidy->cleanRepair(); + $text = tidy_get_output($tidy); + } else { + $tidy = @tidy_parse_string(Horde_String::convertCharset($text, $this->_params['charset'], 'UTF-8'), $tidy_config, 'utf8'); + $tidy->cleanRepair(); + $text = Horde_String::convertCharset(tidy_get_output($tidy), 'UTF-8', $this->_params['charset']); + } - return tidy_get_output($tidy); + return $text; } } diff --git a/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php b/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php index 97117d912..a824d14eb 100644 --- a/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php +++ b/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php @@ -45,6 +45,7 @@ class Horde_Text_Filter_Html2text extends Horde_Text_Filter_Base */ protected $_params = array( 'callback' => null, + 'charset' => 'UTF-8', 'width' => 75 ); @@ -92,8 +93,8 @@ class Horde_Text_Filter_Html2text extends Horde_Text_Filter_Base public function postProcess($text) { try { - $dom = new Horde_Domhtml($text, 'UTF-8'); - $text = $this->_node($dom->dom, $dom->dom); + $dom = new Horde_Domhtml($text, $this->_params['charset']); + $text = Horde_String::convertCharset($this->_node($dom->dom, $dom->dom), null, $this->_params['charset']); $dom_convert = true; } catch (Exception $e) { $text = strip_tags(preg_replace("/\/i", "\n", $text)); @@ -109,7 +110,7 @@ class Horde_Text_Filter_Html2text extends Horde_Text_Filter_Base if ($dom_convert && $this->_params['_bq'] && class_exists('Horde_Text_Flowed')) { - $flowed = new Horde_Text_Flowed($text, 'UTF-8'); + $flowed = new Horde_Text_Flowed($text, $this->_params['charset']); $flowed->setOptLength($this->_params['width']); $text = $flowed->toFlowed(); } else { diff --git a/framework/Text_Filter/lib/Horde/Text/Filter/Space2html.php b/framework/Text_Filter/lib/Horde/Text/Filter/Space2html.php index 2cee35687..17f902deb 100644 --- a/framework/Text_Filter/lib/Horde/Text/Filter/Space2html.php +++ b/framework/Text_Filter/lib/Horde/Text/Filter/Space2html.php @@ -28,6 +28,7 @@ class Horde_Text_Filter_Space2html extends Horde_Text_Filter_Base * @var array */ protected $_params = array( + 'charset' => 'ISO-8859-1', 'encode' => false, 'encode_all' => false ); @@ -42,7 +43,7 @@ class Horde_Text_Filter_Space2html extends Horde_Text_Filter_Base public function preProcess($text) { if ($this->_params['encode']) { - $text = htmlspecialchars($text); + $text = @htmlspecialchars($text, ENT_COMPAT, $this->_params['charset']); } return $text; } diff --git a/framework/Text_Filter/lib/Horde/Text/Filter/Text2html.php b/framework/Text_Filter/lib/Horde/Text/Filter/Text2html.php index 37a24e6fd..56e692e70 100644 --- a/framework/Text_Filter/lib/Horde/Text/Filter/Text2html.php +++ b/framework/Text_Filter/lib/Horde/Text/Filter/Text2html.php @@ -53,6 +53,7 @@ class Horde_Text_Filter_Text2html extends Horde_Text_Filter_Base * @var array */ protected $_params = array( + 'charset' => 'ISO-8859-1', 'class' => 'fixed', 'emails' => false, 'linkurls' => false, @@ -62,6 +63,21 @@ class Horde_Text_Filter_Text2html extends Horde_Text_Filter_Base ); /** + * Constructor. + * + * @param array $params Any parameters that the filter instance needs. + */ + public function __construct($params = array()) + { + parent::__construct($params); + + // Use ISO-8859-1 instead of US-ASCII + if (Horde_String::lower($this->_params['charset']) == 'us-ascii') { + $this->_params['charset'] = 'iso-8859-1'; + } + } + + /** * Executes any code necessary before applying the filter patterns. * * @param string $text The text before the filtering. @@ -76,7 +92,7 @@ class Horde_Text_Filter_Text2html extends Horde_Text_Filter_Base } if ($this->_params['parselevel'] == self::NOHTML_NOBREAK) { - return htmlspecialchars($text); + return @htmlspecialchars($text, ENT_COMPAT, $this->_params['charset']); } if ($this->_params['parselevel'] < self::NOHTML) { @@ -108,12 +124,12 @@ class Horde_Text_Filter_Text2html extends Horde_Text_Filter_Base /* For level MICRO or NOHTML, start with htmlspecialchars(). */ $old_error = error_reporting(0); - $text2 = htmlspecialchars($text); + $text2 = htmlspecialchars($text, ENT_COMPAT, $this->_params['charset']); /* Bad charset input in may result in an empty string. If so, try * using the default charset encoding instead. */ if (!$text2) { - $text2 = htmlspecialchars($text); + $text2 = htmlspecialchars($text, ENT_COMPAT); } $text = $text2; error_reporting($old_error); diff --git a/framework/Text_Filter/lib/Horde/Text/Filter/Xss.php b/framework/Text_Filter/lib/Horde/Text/Filter/Xss.php index 90411e7c6..08d274843 100644 --- a/framework/Text_Filter/lib/Horde/Text/Filter/Xss.php +++ b/framework/Text_Filter/lib/Horde/Text/Filter/Xss.php @@ -6,6 +6,8 @@ * Filter parameters: * ------------------ *
+ * 'charset' - (string) The charset of the text.
+ *             DEFAULT: UTF-8
  * 'noprefetch' - (boolean) Disable DNS pre-fetching? See:
  *                https://developer.mozilla.org/En/Controlling_DNS_prefetching
  *                DEFAULT: false
@@ -43,6 +45,7 @@ class Horde_Text_Filter_Xss extends Horde_Text_Filter_Base
      * @var array
      */
     protected $_params = array(
+        'charset' => 'UTF-8',
         'noprefetch' => false,
         'return_document' => false,
         'return_dom' => false,
@@ -97,7 +100,7 @@ class Horde_Text_Filter_Xss extends Horde_Text_Filter_Base
     public function postProcess($text)
     {
         try {
-            $dom = new Horde_Domhtml($text, 'UTF-8');
+            $dom = new Horde_Domhtml($text, $this->_params['charset']);
         } catch (Exception $e) {
             return $text;
         }
@@ -135,7 +138,7 @@ class Horde_Text_Filter_Xss extends Horde_Text_Filter_Base
             }
         }
 
-        return Horde_String::convertCharset($text, $dom->encoding, 'UTF-8');
+        return Horde_String::convertCharset($text, $dom->encoding, $this->_params['charset']);
     }
 
     /**