From d6bdbb9655dbe85dc56e2493408ca017d41903c2 Mon Sep 17 00:00:00 2001 From: Michael M Slusarz Date: Sat, 21 Feb 2009 18:32:45 -0700 Subject: [PATCH] Import Horde_SpellChecker from CVS HEAD. --- framework/SpellChecker/lib/Horde/SpellChecker.php | 137 ++++++++++++++++++ .../SpellChecker/lib/Horde/SpellChecker/Aspell.php | 158 +++++++++++++++++++++ framework/SpellChecker/package.xml | 110 ++++++++++++++ .../test/Horde/SpellChecker/basic-aspell.phpt | 84 +++++++++++ 4 files changed, 489 insertions(+) create mode 100644 framework/SpellChecker/lib/Horde/SpellChecker.php create mode 100644 framework/SpellChecker/lib/Horde/SpellChecker/Aspell.php create mode 100644 framework/SpellChecker/package.xml create mode 100644 framework/SpellChecker/test/Horde/SpellChecker/basic-aspell.phpt diff --git a/framework/SpellChecker/lib/Horde/SpellChecker.php b/framework/SpellChecker/lib/Horde/SpellChecker.php new file mode 100644 index 000000000..a7203fb8a --- /dev/null +++ b/framework/SpellChecker/lib/Horde/SpellChecker.php @@ -0,0 +1,137 @@ + + * @author Michael Slusarz + * @package Horde_SpellChecker + */ +abstract class Horde_SpellChecker +{ + const SUGGEST_FAST = 1; + const SUGGEST_NORMAL = 2; + const SUGGEST_SLOW = 3; + + /** + * @var integer + */ + protected $_maxSuggestions = 10; + + /** + * @var integer + */ + protected $_minLength = 3; + + /** + * @var string + */ + protected $_locale = 'en'; + + /** + * @var string + */ + protected $_encoding = 'utf-8'; + + /** + * @var boolean + */ + protected $_html = false; + + /** + * @var integer + */ + protected $_suggestMode = self::SUGGEST_FAST; + + /** + * @var array + */ + protected $_localDict = array(); + + /** + * Attempts to return a concrete Horde_SpellChecker instance based on + * $driver. + * + * @param string $driver The type of concrete Horde_SpellChecker subclass + * to return. + * @param array $params A hash containing any additional configuration or + * connection parameters a subclass might need. + * + * @return Horde_SpellChecker The newly created Horde_SpellChecker + * instance. + * @throws Exception + */ + static public function getInstance($driver, $params = array()) + { + $class = 'Horde_SpellChecker_' . String::ucfirst(basename($driver)); + if (!class_exists($class)) { + throw new Exception('Driver ' . $driver . ' not found'); + } + return new $class($params); + } + + /** + * Constructor. + */ + public function __construct($params = array()) + { + $this->setParams($params); + } + + /** + * TODO + * + * @param array $params TODO + */ + public function setParams($params) + { + foreach ($params as $key => $val) { + $key = '_' . $key; + $this->$key = $val; + } + } + + /** + * TODO + * + * @param string $text TODO + * + * @return array TODO + * @throws Exception + */ + abstract public function spellCheck($text); + + /** + * TODO + * + * @param string $text TODO + * + * @return array TODO + */ + protected function _getWords($text) + { + return array_keys(array_flip(preg_split('/[\s\[\]]+/s', $text, -1, PREG_SPLIT_NO_EMPTY))); + } + + /** + * Determine if a word exists in the local dictionary. + * + * @param string $word The word to check. + * + * @return boolean True if the word appears in the local dictionary. + */ + protected function _inLocalDictionary($word) + { + return (empty($this->_localDict)) + ? false + : in_array(String::lower($word, true), $this->_localDict); + } + +} diff --git a/framework/SpellChecker/lib/Horde/SpellChecker/Aspell.php b/framework/SpellChecker/lib/Horde/SpellChecker/Aspell.php new file mode 100644 index 000000000..7a0bf6bd3 --- /dev/null +++ b/framework/SpellChecker/lib/Horde/SpellChecker/Aspell.php @@ -0,0 +1,158 @@ + + * @author Michael Slusarz + * @package Horde_SpellChecker + */ +class Horde_SpellChecker_Aspell extends Horde_SpellChecker +{ + /** + * TODO + * + * @param string $text TODO + * + * @return array TODO + * @throws Exception + */ + public function spellCheck($text) + { + if ($this->_html) { + $input = strtr($text, "\n", ' '); + } else { + $words = $this->_getWords($text); + if (!count($words)) { + return array('bad' => array(), 'suggestions' => array()); + } + $input = implode(' ', $words); + } + + // Descriptor array. + $descspec = array( + 0 => array('pipe', 'r'), + 1 => array('pipe', 'w'), + 2 => array('pipe', 'w') + ); + + $process = proc_open($this->_cmd(), $descspec, $pipes); + if (!is_resource($process)) { + throw Exception('Spellcheck failed. Command line: ', $this->_cmd()); + } + + require_once 'Horde/NLS.php'; + $charset = NLS::getCharset(); + + // Write to stdin. + if ($this->_encoding) { + $input = String::convertCharset($input, $charset, $this->_encoding); + } + + // The '^' character tells aspell to spell check the entire line. + fwrite($pipes[0], '^' . $input); + fclose($pipes[0]); + + // Read stdout. + $out = ''; + while (!feof($pipes[1])) { + $out .= fread($pipes[1], 8192); + } + fclose($pipes[1]); + + // Read stderr. + $err = ''; + while (!feof($pipes[2])) { + $err .= fread($pipes[2], 8192); + } + fclose($pipes[2]); + + // We can't rely on the return value of proc_close: + // http://bugs.php.net/bug.php?id=29123 + proc_close($process); + + if (strlen($out) === 0) { + if ($this->_encoding) { + $err = String::convertCharset($err, $this->_encoding, $charset); + } + throw Exception('Spellcheck failed. Command line: ', $this->_cmd()); + } + + if ($this->_encoding) { + $out = String::convertCharset($out, $this->_encoding, $charset); + } + + // Parse output. + $bad = $suggestions = array(); + $lines = explode("\n", $out); + foreach ($lines as $line) { + $line = trim($line); + if (empty($line)) { + continue; + } + + @list(,$word,) = explode(' ', $line, 3); + + if ($this->_inLocalDictionary($word) || in_array($word, $bad)) { + continue; + } + + switch ($line[0]) { + case '#': + // Misspelling with no suggestions. + $bad[] = $word; + $suggestions[] = array(); + break; + + case '&': + // Suggestions. + $bad[] = $word; + $suggestions[] = array_slice(explode(', ', substr($line, strpos($line, ':') + 2)), 0, $this->_maxSuggestions); + break; + } + } + + return array('bad' => $bad, 'suggestions' => $suggestions); + } + + /** + * Create the command line string. + * + * @return string The command to run. + */ + protected function _cmd() + { + $args = ''; + + switch ($this->_suggestMode) { + case self::SUGGEST_FAST: + $args .= ' --sug-mode=fast'; + break; + + case self::SUGGEST_SLOW: + $args .= ' --sug-mode=bad-spellers'; + break; + + default: + $args .= ' --sug-mode=normal'; + } + + if ($this->_encoding) { + $args .= ' --encoding=' . escapeshellarg($this->_encoding); + } + + $args .= ' --lang=' . escapeshellarg($this->_locale); + + if ($this->_html) { + $args .= ' -H'; + } + + return sprintf('%s -a %s', 'aspell', $args); + } + +} diff --git a/framework/SpellChecker/package.xml b/framework/SpellChecker/package.xml new file mode 100644 index 000000000..51042ad2a --- /dev/null +++ b/framework/SpellChecker/package.xml @@ -0,0 +1,110 @@ + + + Horde_SpellChecker + pear.horde.org + Spellcheck API + Unified spellchecking API. + + + Chuck Hagenbuch + chuck + chuck@horde.org + yes + + + Michael Slusarz + slusarz + slusarz@horde.org + yes + + 2009-02-21 + + 0.1.0 + 0.1.0 + + + beta + beta + + LGPL + * Initial Horde 4 package. + + + + + + + + + + + + + + + + + + + + + + 5.2.0 + + + 1.5.0 + + + Horde_NLS + pear.horde.org + + + Util + pear.horde.org + + + + + + + + + + + + 2006-05-08 + + + 0.0.2 + 0.0.2 + + + alpha + alpha + + LGPL + * Work around an issue with proc_close and --enable-sigchild (Bug #6625) +* Make sure charset is correctly reported to aspell +* Remove pspell driver +* Converted to package.xml 2.0 for pear.horde.org + + + + + 0.0.1 + 0.0.1 + + + alpha + alpha + + 2004-01-01 + LGPL + Initial Release. + + + + diff --git a/framework/SpellChecker/test/Horde/SpellChecker/basic-aspell.phpt b/framework/SpellChecker/test/Horde/SpellChecker/basic-aspell.phpt new file mode 100644 index 000000000..df9547e93 --- /dev/null +++ b/framework/SpellChecker/test/Horde/SpellChecker/basic-aspell.phpt @@ -0,0 +1,84 @@ +--TEST-- +Basic aspell driver test +--SKIPIF-- + $aspell)); +var_dump($speller->spellCheck('some tet [mispeled] ?')); + +--EXPECT-- +array(2) { + ["bad"]=> + array(2) { + [0]=> + string(3) "tet" + [1]=> + string(8) "mispeled" + } + ["suggestions"]=> + array(2) { + [0]=> + array(10) { + [0]=> + string(3) "Tet" + [1]=> + string(4) "teat" + [2]=> + string(4) "tent" + [3]=> + string(4) "test" + [4]=> + string(3) "yet" + [5]=> + string(2) "Te" + [6]=> + string(2) "ET" + [7]=> + string(3) "Ted" + [8]=> + string(3) "Tut" + [9]=> + string(3) "tat" + } + [1]=> + array(10) { + [0]=> + string(10) "misspelled" + [1]=> + string(10) "misapplied" + [2]=> + string(6) "misled" + [3]=> + string(9) "dispelled" + [4]=> + string(8) "misfiled" + [5]=> + string(8) "misruled" + [6]=> + string(7) "mislead" + [7]=> + string(7) "spelled" + [8]=> + string(7) "spieled" + [9]=> + string(9) "misplaced" + } + } +} -- 2.11.0