From 9313cdee4f6ab63e30b3be1a858ab101b6a5b111 Mon Sep 17 00:00:00 2001 From: Michael M Slusarz Date: Mon, 22 Feb 2010 23:46:23 -0700 Subject: [PATCH] Use XML parser to generate Html2text output. --- .../lib/Horde/Text/Filter/Html2text.php | 395 ++++++++++++--------- framework/Text_Filter/package.xml | 6 +- .../test/Horde/Text/Filter/fixtures/html2text.html | 12 + .../test/Horde/Text/Filter/html2text.phpt | 88 +++-- .../test/Horde/Text/Filter/html2text3.phpt | 32 +- 5 files changed, 308 insertions(+), 225 deletions(-) diff --git a/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php b/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php index 698aa1bc3..460d0a713 100644 --- a/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php +++ b/framework/Text_Filter/lib/Horde/Text/Filter/Html2text.php @@ -9,14 +9,11 @@ * wrap - (boolean) Whether to wrap the text or not. * * - * Copyright 2003-2004 Jon Abernathy - * Original source: http://www.chuggnutt.com/html2text.php * Copyright 2004-2010 The Horde Project (http://www.horde.org/) * * See the enclosed file COPYING for license information (LGPL). If you * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html. * - * @author Jon Abernathy * @author Jan Schneider * @author Michael Slusarz * @package Horde_Text @@ -31,30 +28,38 @@ class Horde_Text_Filter_Html2text extends Horde_Text_Filter_Base protected $_linkList = array(); /** + * Current list indentation level. + * + * @var integer + */ + protected $_indent = 0; + + /** + * Current blockquote level. + * + * @var integer + */ + protected $_bqlevel = 0; + + /** + * Current blockquote data. + * + * @var array + */ + protected $_bqdata = array(); + + /** * Filter parameters. * * @var array */ protected $_params = array( - 'charset' => 'ISO-8859-1', + 'charset' => 'UTF-8', 'width' => 70, 'wrap' => true ); /** - * Executes any code necessary before applying the filter patterns. - * - * @param string $text The text before the filtering. - * - * @return string The modified text. - */ - public function preProcess($text) - { - $this->_linkList = array(); - return trim($text); - } - - /** * Returns a hash with replace patterns. * * @return array Patterns hash. @@ -62,96 +67,32 @@ class Horde_Text_Filter_Html2text extends Horde_Text_Filter_Base public function getPatterns() { $replace = array( - // Non-legal carriage return. - '/\r/' => '' - ); - - $regexp = array( - // Newlines and tabs. - '/[\n\t]+/' => ' ', - - // Normalize
(remove leading/trailing whitespace) - '/\s*]*>\s*/i' => '
', - - //