Import Horde_SpellChecker from CVS HEAD.
authorMichael M Slusarz <slusarz@curecanti.org>
Sun, 22 Feb 2009 01:32:45 +0000 (18:32 -0700)
committerMichael M Slusarz <slusarz@curecanti.org>
Sun, 22 Feb 2009 01:32:45 +0000 (18:32 -0700)
framework/SpellChecker/lib/Horde/SpellChecker.php [new file with mode: 0644]
framework/SpellChecker/lib/Horde/SpellChecker/Aspell.php [new file with mode: 0644]
framework/SpellChecker/package.xml [new file with mode: 0644]
framework/SpellChecker/test/Horde/SpellChecker/basic-aspell.phpt [new file with mode: 0644]

diff --git a/framework/SpellChecker/lib/Horde/SpellChecker.php b/framework/SpellChecker/lib/Horde/SpellChecker.php
new file mode 100644 (file)
index 0000000..a7203fb
--- /dev/null
@@ -0,0 +1,137 @@
+<?php
+
+require_once 'Horde/String.php';
+
+/**
+ * The Horde_SpellChecker:: class provides a unified spellchecker API.
+ *
+ * Copyright 2005-2009 The Horde Project (http://www.horde.org/)
+ *
+ * See the enclosed file COPYING for license information (LGPL). If you
+ * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
+ *
+ * @author  Chuck Hagenbuch <chuck@horde.org>
+ * @author  Michael Slusarz <slusarz@horde.org>
+ * @package Horde_SpellChecker
+ */
+abstract class Horde_SpellChecker
+{
+    const SUGGEST_FAST = 1;
+    const SUGGEST_NORMAL = 2;
+    const SUGGEST_SLOW = 3;
+
+    /**
+     * @var integer
+     */
+    protected $_maxSuggestions = 10;
+
+    /**
+     * @var integer
+     */
+    protected $_minLength = 3;
+
+    /**
+     * @var string
+     */
+    protected $_locale = 'en';
+
+    /**
+     * @var string
+     */
+    protected $_encoding = 'utf-8';
+
+    /**
+     * @var boolean
+     */
+    protected $_html = false;
+
+    /**
+     * @var integer
+     */
+    protected $_suggestMode = self::SUGGEST_FAST;
+
+    /**
+     * @var array
+     */
+    protected $_localDict = array();
+
+    /**
+     * Attempts to return a concrete Horde_SpellChecker instance based on
+     * $driver.
+     *
+     * @param string $driver  The type of concrete Horde_SpellChecker subclass
+     *                        to return.
+     * @param array $params   A hash containing any additional configuration or
+     *                        connection parameters a subclass might need.
+     *
+     * @return Horde_SpellChecker  The newly created Horde_SpellChecker
+     *                             instance.
+     * @throws Exception
+     */
+    static public function getInstance($driver, $params = array())
+    {
+        $class = 'Horde_SpellChecker_' . String::ucfirst(basename($driver));
+        if (!class_exists($class)) {
+            throw new Exception('Driver ' . $driver . ' not found');
+        }
+        return new $class($params);
+    }
+
+    /**
+     * Constructor.
+     */
+    public function __construct($params = array())
+    {
+        $this->setParams($params);
+    }
+
+    /**
+     * TODO
+     *
+     * @param array $params  TODO
+     */
+    public function setParams($params)
+    {
+        foreach ($params as $key => $val) {
+            $key = '_' . $key;
+            $this->$key = $val;
+        }
+    }
+
+    /**
+     * TODO
+     *
+     * @param string $text  TODO
+     *
+     * @return array  TODO
+     * @throws Exception
+     */
+    abstract public function spellCheck($text);
+
+    /**
+     * TODO
+     *
+     * @param string $text  TODO
+     *
+     * @return array  TODO
+     */
+    protected function _getWords($text)
+    {
+        return array_keys(array_flip(preg_split('/[\s\[\]]+/s', $text, -1, PREG_SPLIT_NO_EMPTY)));
+    }
+
+    /**
+     * Determine if a word exists in the local dictionary.
+     *
+     * @param string $word  The word to check.
+     *
+     * @return boolean  True if the word appears in the local dictionary.
+     */
+    protected function _inLocalDictionary($word)
+    {
+        return (empty($this->_localDict))
+            ? false
+            : in_array(String::lower($word, true), $this->_localDict);
+    }
+
+}
diff --git a/framework/SpellChecker/lib/Horde/SpellChecker/Aspell.php b/framework/SpellChecker/lib/Horde/SpellChecker/Aspell.php
new file mode 100644 (file)
index 0000000..7a0bf6b
--- /dev/null
@@ -0,0 +1,158 @@
+<?php
+/**
+ * The Horde_SpellChecker_aspell:: class provides a driver for the 'aspell'
+ * program.
+ *
+ * Copyright 2005-2009 The Horde Project (http://www.horde.org/)
+ *
+ * See the enclosed file COPYING for license information (LGPL). If you
+ * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
+ *
+ * @author  Chuck Hagenbuch <chuck@horde.org>
+ * @author  Michael Slusarz <slusarz@horde.org>
+ * @package Horde_SpellChecker
+ */
+class Horde_SpellChecker_Aspell extends Horde_SpellChecker
+{
+    /**
+     * TODO
+     *
+     * @param string $text  TODO
+     *
+     * @return array  TODO
+     * @throws Exception
+     */
+    public function spellCheck($text)
+    {
+        if ($this->_html) {
+            $input = strtr($text, "\n", ' ');
+        } else {
+            $words = $this->_getWords($text);
+            if (!count($words)) {
+                return array('bad' => array(), 'suggestions' => array());
+            }
+            $input = implode(' ', $words);
+        }
+
+        // Descriptor array.
+        $descspec = array(
+            0 => array('pipe', 'r'),
+            1 => array('pipe', 'w'),
+            2 => array('pipe', 'w')
+        );
+
+        $process = proc_open($this->_cmd(), $descspec, $pipes);
+        if (!is_resource($process)) {
+            throw Exception('Spellcheck failed. Command line: ', $this->_cmd());
+        }
+
+        require_once 'Horde/NLS.php';
+        $charset = NLS::getCharset();
+
+        // Write to stdin.
+        if ($this->_encoding) {
+            $input = String::convertCharset($input, $charset, $this->_encoding);
+        }
+
+        // The '^' character tells aspell to spell check the entire line.
+        fwrite($pipes[0], '^' . $input);
+        fclose($pipes[0]);
+
+        // Read stdout.
+        $out = '';
+        while (!feof($pipes[1])) {
+            $out .= fread($pipes[1], 8192);
+        }
+        fclose($pipes[1]);
+
+        // Read stderr.
+        $err = '';
+        while (!feof($pipes[2])) {
+            $err .= fread($pipes[2], 8192);
+        }
+        fclose($pipes[2]);
+
+        // We can't rely on the return value of proc_close:
+        // http://bugs.php.net/bug.php?id=29123
+        proc_close($process);
+
+        if (strlen($out) === 0) {
+            if ($this->_encoding) {
+                $err = String::convertCharset($err, $this->_encoding, $charset);
+            }
+            throw Exception('Spellcheck failed. Command line: ', $this->_cmd());
+        }
+
+        if ($this->_encoding) {
+            $out = String::convertCharset($out, $this->_encoding, $charset);
+        }
+
+        // Parse output.
+        $bad = $suggestions = array();
+        $lines = explode("\n", $out);
+        foreach ($lines as $line) {
+            $line = trim($line);
+            if (empty($line)) {
+                continue;
+            }
+
+            @list(,$word,) = explode(' ', $line, 3);
+
+            if ($this->_inLocalDictionary($word) || in_array($word, $bad)) {
+                continue;
+            }
+
+            switch ($line[0]) {
+            case '#':
+                // Misspelling with no suggestions.
+                $bad[] = $word;
+                $suggestions[] = array();
+                break;
+
+            case '&':
+                // Suggestions.
+                $bad[] = $word;
+                $suggestions[] = array_slice(explode(', ', substr($line, strpos($line, ':') + 2)), 0, $this->_maxSuggestions);
+                break;
+            }
+        }
+
+        return array('bad' => $bad, 'suggestions' => $suggestions);
+    }
+
+    /**
+     * Create the command line string.
+     *
+     * @return string  The command to run.
+     */
+    protected function _cmd()
+    {
+        $args = '';
+
+        switch ($this->_suggestMode) {
+        case self::SUGGEST_FAST:
+            $args .= ' --sug-mode=fast';
+            break;
+
+        case self::SUGGEST_SLOW:
+            $args .= ' --sug-mode=bad-spellers';
+            break;
+
+        default:
+            $args .= ' --sug-mode=normal';
+        }
+
+        if ($this->_encoding) {
+            $args .= ' --encoding=' . escapeshellarg($this->_encoding);
+        }
+
+        $args .= ' --lang=' . escapeshellarg($this->_locale);
+
+        if ($this->_html) {
+            $args .= ' -H';
+        }
+
+        return sprintf('%s -a %s', 'aspell', $args);
+    }
+
+}
diff --git a/framework/SpellChecker/package.xml b/framework/SpellChecker/package.xml
new file mode 100644 (file)
index 0000000..51042ad
--- /dev/null
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<package packagerversion="1.4.9" version="2.0" xmlns="http://pear.php.net/dtd/package-2.0" xmlns:tasks="http://pear.php.net/dtd/tasks-1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pear.php.net/dtd/tasks-1.0
+http://pear.php.net/dtd/tasks-1.0.xsd
+http://pear.php.net/dtd/package-2.0
+http://pear.php.net/dtd/package-2.0.xsd">
+ <name>Horde_SpellChecker</name>
+ <channel>pear.horde.org</channel>
+ <summary>Spellcheck API</summary>
+ <description>Unified spellchecking API.
+ </description>
+ <lead>
+  <name>Chuck Hagenbuch</name>
+  <user>chuck</user>
+  <email>chuck@horde.org</email>
+  <active>yes</active>
+ </lead>
+ <lead>
+  <name>Michael Slusarz</name>
+  <user>slusarz</user>
+  <email>slusarz@horde.org</email>
+  <active>yes</active>
+ </lead>
+ <date>2009-02-21</date>
+ <version>
+  <release>0.1.0</release>
+  <api>0.1.0</api>
+ </version>
+ <stability>
+  <release>beta</release>
+  <api>beta</api>
+ </stability>
+ <license uri="http://www.gnu.org/copyleft/lesser.html">LGPL</license>
+ <notes>* Initial Horde 4 package.</notes>
+ <contents>
+  <dir name="/">
+   <dir name="lib">
+    <dir name="Horde">
+     <dir name="SpellChecker">
+      <file name="Aspell.php" role="php" />
+     </dir> <!-- /lib/Horde/SpellChecker -->
+     <file name="SpellChecker.php" role="php" />
+    </dir> <!-- /lib/Horde -->
+   </dir> <!-- /lib -->
+   <dir name="test">
+    <dir name="Horde">
+     <dir name="SpellChecker">
+     </dir> <!-- /test/Horde/SpellChecker -->
+    </dir> <!-- /test/Horde -->
+   </dir> <!-- /test -->
+  </dir> <!-- / -->
+ </contents>
+ <dependencies>
+  <required>
+   <php>
+    <min>5.2.0</min>
+   </php>
+   <pearinstaller>
+    <min>1.5.0</min>
+   </pearinstaller>
+   <package>
+    <name>Horde_NLS</name>
+    <channel>pear.horde.org</channel>
+   </package>
+   <package>
+    <name>Util</name>
+    <channel>pear.horde.org</channel>
+   </package>
+  </required>
+ </dependencies>
+ <phprelease>
+  <filelist>
+   <install name="lib/Horde/SpellChecker/Aspell.php" as="Horde/SpellChecker/Aspell.php" />
+   <install name="lib/Horde/SpellChecker.php" as="Horde/SpellChecker.php" />
+  </filelist>
+ </phprelease>
+ <changelog>
+  <release>
+   <date>2006-05-08</date>
+   <time>23:24:34</time>
+   <version>
+    <release>0.0.2</release>
+    <api>0.0.2</api>
+   </version>
+   <stability>
+    <release>alpha</release>
+    <api>alpha</api>
+   </stability>
+   <license uri="http://www.gnu.org/copyleft/lesser.html">LGPL</license>
+   <notes>* Work around an issue with proc_close and --enable-sigchild (Bug #6625)
+* Make sure charset is correctly reported to aspell
+* Remove pspell driver
+* Converted to package.xml 2.0 for pear.horde.org
+   </notes>
+  </release>
+  <release>
+   <version>
+    <release>0.0.1</release>
+    <api>0.0.1</api>
+   </version>
+   <stability>
+    <release>alpha</release>
+    <api>alpha</api>
+   </stability>
+   <date>2004-01-01</date>
+   <license uri="http://www.gnu.org/copyleft/lesser.html">LGPL</license>
+   <notes>Initial Release.
+   </notes>
+  </release>
+ </changelog>
+</package>
diff --git a/framework/SpellChecker/test/Horde/SpellChecker/basic-aspell.phpt b/framework/SpellChecker/test/Horde/SpellChecker/basic-aspell.phpt
new file mode 100644 (file)
index 0000000..df9547e
--- /dev/null
@@ -0,0 +1,84 @@
+--TEST--
+Basic aspell driver test
+--SKIPIF--
+<?php
+
+$aspell = trim(`which aspell`);
+if (!is_executable($aspell)) {
+    $aspell = trim(`which ispell`);
+}
+if (!is_executable($aspell)) {
+    echo 'skip No aspell/ispell binary found.';
+}
+
+--FILE--
+<?php
+
+$aspell = trim(`which aspell`);
+if (!is_executable($aspell)) {
+    $aspell = trim(`which ispell`);
+}
+
+require_once 'Horde/SpellChecker.php';
+$speller = Horde_SpellChecker::factory('Aspell', array('path' => $aspell));
+var_dump($speller->spellCheck('some tet [mispeled] ?'));
+
+--EXPECT--
+array(2) {
+  ["bad"]=>
+  array(2) {
+    [0]=>
+    string(3) "tet"
+    [1]=>
+    string(8) "mispeled"
+  }
+  ["suggestions"]=>
+  array(2) {
+    [0]=>
+    array(10) {
+      [0]=>
+      string(3) "Tet"
+      [1]=>
+      string(4) "teat"
+      [2]=>
+      string(4) "tent"
+      [3]=>
+      string(4) "test"
+      [4]=>
+      string(3) "yet"
+      [5]=>
+      string(2) "Te"
+      [6]=>
+      string(2) "ET"
+      [7]=>
+      string(3) "Ted"
+      [8]=>
+      string(3) "Tut"
+      [9]=>
+      string(3) "tat"
+    }
+    [1]=>
+    array(10) {
+      [0]=>
+      string(10) "misspelled"
+      [1]=>
+      string(10) "misapplied"
+      [2]=>
+      string(6) "misled"
+      [3]=>
+      string(9) "dispelled"
+      [4]=>
+      string(8) "misfiled"
+      [5]=>
+      string(8) "misruled"
+      [6]=>
+      string(7) "mislead"
+      [7]=>
+      string(7) "spelled"
+      [8]=>
+      string(7) "spieled"
+      [9]=>
+      string(9) "misplaced"
+    }
+  }
+}