add Horde_Support_Numerizer to the main Horde_Support package
authorChuck Hagenbuch <chuck@horde.org>
Wed, 27 May 2009 04:08:58 +0000 (00:08 -0400)
committerChuck Hagenbuch <chuck@horde.org>
Wed, 27 May 2009 04:09:05 +0000 (00:09 -0400)
framework/Support/lib/Horde/Support/Numerizer.php [new file with mode: 0644]
framework/Support/lib/Horde/Support/Numerizer/Locale/Base.php [new file with mode: 0644]
framework/Support/lib/Horde/Support/Numerizer/Locale/De.php [new file with mode: 0644]
framework/Support/package.xml
framework/Support/test/Horde/Support/Numerizer/Locale/BaseTest.php [new file with mode: 0644]
framework/Support/test/Horde/Support/Numerizer/Locale/DeTest.php [new file with mode: 0644]

diff --git a/framework/Support/lib/Horde/Support/Numerizer.php b/framework/Support/lib/Horde/Support/Numerizer.php
new file mode 100644 (file)
index 0000000..9f7317a
--- /dev/null
@@ -0,0 +1,34 @@
+<?php
+/**
+ *
+ */
+class Horde_Support_Numerizer
+{
+    public static function numerize($string, $args = array())
+    {
+        return self::factory($args)->numerize($string);
+    }
+
+    public static function factory($args = array())
+    {
+        $locale = isset($args['locale']) ? $args['locale'] : null;
+        if ($locale && strtolower($locale) != 'base') {
+            $locale = str_replace(' ', '_', ucwords(str_replace('_', ' ', strtolower($locale))));
+            $class = 'Horde_Support_Numerizer_Locale_' . $locale;
+            if (class_exists($class)) {
+                return new $class($args);
+            }
+
+            $language = array_shift(explode('_', $locale));
+            if ($language != $locale) {
+                $class = 'Horde_Support_Numerizer_Locale_' . $language;
+                if (class_exists($class)) {
+                    return new $class($args);
+                }
+            }
+        }
+
+        return new Horde_Support_Numerizer_Locale_Base($args);
+    }
+
+}
diff --git a/framework/Support/lib/Horde/Support/Numerizer/Locale/Base.php b/framework/Support/lib/Horde/Support/Numerizer/Locale/Base.php
new file mode 100644 (file)
index 0000000..0a1908e
--- /dev/null
@@ -0,0 +1,149 @@
+<?php
+class Horde_Support_Numerizer_Locale_Base
+{
+    public $DIRECT_NUMS = array(
+        'eleven' => '11',
+        'twelve' => '12',
+        'thirteen' => '13',
+        'fourteen' => '14',
+        'fifteen' => '15',
+        'sixteen' => '16',
+        'seventeen' => '17',
+        'eighteen' => '18',
+        'nineteen' => '19',
+        'ninteen' => '19',      // Common mis-spelling
+        'zero' => '0',
+        'one' => '1',
+        'two' => '2',
+        'three' => '3',
+        'four(\W|$)' => '4$1',  // The weird regex is so that it matches four but not fourty
+        'five' => '5',
+        'six(\W|$)' => '6$1',
+        'seven(\W|$)' => '7$1',
+        'eight(\W|$)' => '8$1',
+        'nine(\W|$)' => '9$1',
+        'ten' => '10',
+        '\ba[\b^$]' => '1',     // doesn't make sense for an 'a' at the end to be a 1
+    );
+
+    public $TEN_PREFIXES = array(
+        'twenty' => 20,
+        'thirty' => 30,
+        'fourty' => 40,
+        'fifty' => 50,
+        'sixty' => 60,
+        'seventy' => 70,
+        'eighty' => 80,
+        'ninety' => 90,
+    );
+
+    public $BIG_PREFIXES = array(
+        'hundred' => 100,
+        'thousand' => 1000,
+        'million' => 1000000,
+        'billion' => 1000000000,
+        'trillion' => 1000000000000,
+    );
+
+    public function numerize($string)
+    {
+        // preprocess
+        $string = $this->_splitHyphenatedWords($string);
+        $string = $this->_hideAHalf($string);
+
+        $string = $this->_directReplacements($string);
+        $string = $this->_replaceTenPrefixes($string);
+        $string = $this->_replaceBigPrefixes($string);
+        $string = $this->_fractionalAddition($string);
+
+        return $string;
+    }
+
+    /**
+     * will mutilate hyphenated-words but shouldn't matter for date extraction
+     */
+    protected function _splitHyphenatedWords($string)
+    {
+        return preg_replace('/ +|([^\d])-([^d])/', '$1 $2', $string);
+    }
+
+    /**
+     * take the 'a' out so it doesn't turn into a 1, save the half for the end
+     */
+    protected function _hideAHalf($string)
+    {
+        return str_replace('a half', 'haAlf', $string);
+    }
+
+    /**
+     * easy/direct replacements
+     */
+    protected function _directReplacements($string)
+    {
+        foreach ($this->DIRECT_NUMS as $dn => $dn_replacement) {
+            $string = preg_replace("/$dn/i", $dn_replacement, $string);
+        }
+        return $string;
+    }
+
+    /**
+     * ten, twenty, etc.
+     */
+    protected function _replaceTenPrefixes($string)
+    {
+        foreach ($this->TEN_PREFIXES as $tp => $tp_replacement) {
+            $string = preg_replace_callback(
+                "/(?:$tp)( *\d(?=[^\d]|\$))*/i",
+                create_function(
+                    '$m',
+                    'return ' . $tp_replacement . ' + (isset($m[1]) ? (int)$m[1] : 0);'
+                ),
+                $string);
+        }
+        return $string;
+    }
+
+    /**
+     * hundreds, thousands, millions, etc.
+     */
+    protected function _replaceBigPrefixes($string)
+    {
+        foreach ($this->BIG_PREFIXES as $bp => $bp_replacement) {
+            $string = preg_replace_callback(
+                '/(\d*) *' . $bp . '/i',
+                create_function(
+                    '$m',
+                    'return ' . $bp_replacement . ' * (int)$m[1];'
+                ),
+                $string);
+            $string = $this->_andition($string);
+        }
+        return $string;
+    }
+
+    protected function _andition($string)
+    {
+        while (true) {
+            if (preg_match('/(\d+)( | and )(\d+)(?=[^\w]|$)/i', $string, $sc, PREG_OFFSET_CAPTURE)) {
+                if (preg_match('/and/', $sc[2][0]) || (strlen($sc[1][0]) > strlen($sc[3][0]))) {
+                    $string = substr($string, 0, $sc[1][1]) . ((int)$sc[1][0] + (int)$sc[3][0]) . substr($string, $sc[3][1] + strlen($sc[3][0]));
+                    continue;
+                }
+            }
+            break;
+        }
+        return $string;
+    }
+
+    protected function _fractionalAddition($string)
+    {
+        return preg_replace_callback(
+            '/(\d+)(?: | and |-)*haAlf/i',
+            create_function(
+                '$m',
+                'return (string)((float)$m[1] + 0.5);'
+            ),
+            $string);
+    }
+
+}
diff --git a/framework/Support/lib/Horde/Support/Numerizer/Locale/De.php b/framework/Support/lib/Horde/Support/Numerizer/Locale/De.php
new file mode 100644 (file)
index 0000000..2e13fae
--- /dev/null
@@ -0,0 +1,110 @@
+<?php
+class Horde_Support_Numerizer_Locale_De extends Horde_Support_Numerizer_Locale_Base
+{
+    public $DIRECT_NUMS = array(
+        'dreizehn' => 13,
+        'vierzehn' => 14,
+        'fünfzehn' => 15,
+        'sechzehn' => 16,
+        'siebzehn' => 17,
+        'achtzehn' => 18,
+        'neunzehn' => 19,
+        'eins' => 1,
+        'zwei' => 2,
+        'zwo' => 2,
+        'drei' => 3,
+        'vier' => 4,
+        'fünf' => 5,
+        'sechs' => 6,
+        'sieben' => 7,
+        'acht' => 8,
+        'neun' => 9,
+        'zehn' => 10,
+        'elf' => 11,
+        'zwölf' => 12,
+        'eine?' => 1,
+    );
+
+    public $TEN_PREFIXES = array(
+        'zwanzig' => 20,
+        'dreißig' => 30,
+        'vierzig' => 40,
+        'fünfzig' => 50,
+        'sechzig' => 60,
+        'siebzig' => 70,
+        'achtzig' => 80,
+        'neunzig' => 90,
+    );
+
+    public $BIG_PREFIXES = array(
+        'hundert' => 100,
+        'tausend' => 1000,
+        'million' => 1000000,
+        'milliarde' => 1000000000,
+        'billion' => 1000000000000,
+    );
+
+    /**
+     * Rules:
+     *
+     * - there are irregular word for 11 and 12 like in English
+     * - numbers below one million are written together (1 M = "eine Million", 100 = "einhundert")
+     * - "a" is declinable (see above, "one" = "eins", "a" = "ein/eine")
+     * - numbers below 100 are flipped compared to english, and have an "and = "und" (21 = "twenty-one" = "einundzwanzig")
+     */
+    public function numerize($string)
+    {
+        // preprocess?
+
+        $string = $this->_replaceTenPrefixes($string);
+        $string = $this->_directReplacements($string);
+        $string = $this->_replaceBigPrefixes($string);
+        $string = $this->_fractionalAddition($string);
+        $string = $this->_andition($string);
+
+        return $string;
+    }
+
+    /**
+     * ten, twenty, etc.
+     */
+    protected function _replaceTenPrefixes($string)
+    {
+        foreach ($this->TEN_PREFIXES as $tp => $tp_replacement) {
+            $string = preg_replace_callback(
+                "/(?:$tp)( *\d(?=[^\d]|\$))*/i",
+                create_function(
+                    '$m',
+                    'return ' . $tp_replacement . ' + (isset($m[1]) ? (int)$m[1] : 0);'
+                ),
+                $string);
+        }
+        return $string;
+    }
+
+    /**
+     * hundreds, thousands, millions, etc.
+     */
+    protected function _replaceBigPrefixes($string)
+    {
+        foreach ($this->BIG_PREFIXES as $bp => $bp_replacement) {
+            $string = preg_replace_callback(
+                '/(\d*) *' . $bp . '/i',
+                create_function(
+                    '$m',
+                    '$factor = (int)$m[1]; if (!$factor) $factor = 1; return (' . $bp_replacement . ' * $factor) . "und";'
+                ),
+                $string);
+        }
+        return $string;
+    }
+
+    protected function _andition($string)
+    {
+        while (preg_match('/(\d+)((?:und)+)(\d*)(?=[^\w]|$)/i', $string, $sc, PREG_OFFSET_CAPTURE)) {
+            $string = substr($string, 0, $sc[1][1]) . ((int)$sc[1][0] + (int)$sc[3][0]) . substr($string, $sc[3][1] + strlen($sc[3][0]));
+        }
+        return $string;
+    }
+
+}
index a21039f..470adcc 100644 (file)
@@ -32,15 +32,23 @@ http://pear.php.net/dtd/package-2.0.xsd">
    * Initial Horde_Support_Stub object
    * Initial Horde_Support_Timer object
    * Initial Horde_Support_Uuid object
+   * Initial Horde_Support_Numerizer objects
  </notes>
  <contents>
   <dir name="/">
    <dir name="lib">
     <dir name="Horde">
      <dir name="Support">
+      <dir name="Numerizer">
+       <dir name="Locale">
+        <file name="Base.php" role="php" />
+        <file name="De.php" role="php" />
+       </dir> <!-- /lib/Horde/Support/Numerizer/Locale -->
+      </dir> <!-- /lib/Horde/Support/Numerizer -->
       <file name="Array.php" role="php" />
       <file name="ConsistentHash.php" role="php" />
       <file name="Inflector.php" role="php" />
+      <file name="Numerizer.php" role="php" />
       <file name="Stub.php" role="php" />
       <file name="Timer.php" role="php" />
       <file name="Uuid.php" role="php" />
@@ -64,6 +72,9 @@ http://pear.php.net/dtd/package-2.0.xsd">
    <install name="lib/Horde/Support/Array.php" as="Horde/Support/Array.php" />
    <install name="lib/Horde/Support/ConsistentHash.php" as="Horde/Support/ConsistentHash.php" />
    <install name="lib/Horde/Support/Inflector.php" as="Horde/Support/Inflector.php" />
+   <install name="lib/Horde/Support/Numerizer/Locale/Base.php" as="Horde/Support/Numerizer/Locale/Base.php" />
+   <install name="lib/Horde/Support/Numerizer/Locale/De.php" as="Horde/Support/Numerizer/Locale/De.php" />
+   <install name="lib/Horde/Support/Numerizer.php" as="Horde/Support/Numerizer.php" />
    <install name="lib/Horde/Support/Stub.php" as="Horde/Support/Stub.php" />
    <install name="lib/Horde/Support/Timer.php" as="Horde/Support/Timer.php" />
    <install name="lib/Horde/Support/Uuid.php" as="Horde/Support/Uuid.php" />
diff --git a/framework/Support/test/Horde/Support/Numerizer/Locale/BaseTest.php b/framework/Support/test/Horde/Support/Numerizer/Locale/BaseTest.php
new file mode 100644 (file)
index 0000000..a370511
--- /dev/null
@@ -0,0 +1,70 @@
+<?php
+/**
+ * @category   Horde
+ * @package    Horde_Support
+ * @subpackage UnitTests
+ */
+
+/**
+ * @category   Horde
+ * @package    Horde_Support
+ * @subpackage UnitTests
+ */
+class Horde_Support_Numerizer_Locale_BaseTest extends PHPUnit_Framework_TestCase
+{
+    public function testStraightParsing()
+    {
+        $numerizer = Horde_Support_Numerizer::factory();
+        $strings = array(
+            1 => 'one',
+            5 => 'five',
+            10 => 'ten',
+            11 => 'eleven',
+            12 => 'twelve',
+            13 => 'thirteen',
+            14 => 'fourteen',
+            15 => 'fifteen',
+            16 => 'sixteen',
+            17 => 'seventeen',
+            18 => 'eighteen',
+            19 => 'nineteen',
+            20 => 'twenty',
+            27 => 'twenty seven',
+            31 => 'thirty-one',
+            59 => 'fifty nine',
+            100 => 'a hundred',
+            100 => 'one hundred',
+            150 => 'one hundred and fifty',
+            // 150 => 'one fifty',
+            200 => 'two-hundred',
+            500 => '5 hundred',
+            999 => 'nine hundred and ninety nine',
+            1000 => 'one thousand',
+            1200 => 'twelve hundred',
+            1200 => 'one thousand two hundred',
+            17000 => 'seventeen thousand',
+            21473 => 'twentyone-thousand-four-hundred-and-seventy-three',
+            74002 => 'seventy four thousand and two',
+            99999 => 'ninety nine thousand nine hundred ninety nine',
+            100000 => '100 thousand',
+            250000 => 'two hundred fifty thousand',
+            1000000 => 'one million',
+            1250007 => 'one million two hundred fifty thousand and seven',
+            1000000000 => 'one billion',
+            1000000001 => 'one billion and one',
+        );
+
+        foreach ($strings as $key => $string) {
+            $this->assertEquals($key, (int)$numerizer->numerize($string));
+        }
+    }
+
+    public function testLeavesDatesAlone()
+    {
+        $numerizer = Horde_Support_Numerizer::factory();
+
+        $this->assertEquals('2006-08-20 03:00', $numerizer->numerize('2006-08-20 03:00'));
+        $this->assertEquals('2006-08-20 15:30:30', $numerizer->numerize('2006-08-20 15:30:30'));
+    }
+
+}
diff --git a/framework/Support/test/Horde/Support/Numerizer/Locale/DeTest.php b/framework/Support/test/Horde/Support/Numerizer/Locale/DeTest.php
new file mode 100644 (file)
index 0000000..806d9bb
--- /dev/null
@@ -0,0 +1,64 @@
+<?php
+/**
+ * @category   Horde
+ * @package    Horde_Support
+ * @subpackage UnitTests
+ */
+
+/**
+ * @category   Horde
+ * @package    Horde_Support
+ * @subpackage UnitTests
+ */
+class Horde_Support_Numerizer_Locale_DeTest extends PHPUnit_Framework_TestCase
+{
+    public function testStraightParsing()
+    {
+        $numerizer = Horde_Support_Numerizer::factory(array('locale' => 'de'));
+        $strings = array(
+            array(1, 'eins'),
+            array(5, 'fünf'),
+            array(10, 'zehn'),
+            array(11, 'elf'),
+            array(12, 'zwölf'),
+            array(13, 'dreizehn'),
+            array(14, 'vierzehn'),
+            array(15, 'fünfzehn'),
+            array(16, 'sechzehn'),
+            array(17, 'siebzehn'),
+            array(18, 'achtzehn'),
+            array(19, 'neunzehn'),
+            array(20, 'zwanzig'),
+            array(27, 'siebenundzwanzig'),
+            array(31, 'einunddreißig'),
+            array(59, 'neunundfünfzig'),
+            array(100, 'einhundert'),
+            array(100, 'ein hundert'),
+            array(150, 'hundertundfünfzig'),
+            array(150, 'einhundertundfünfzig'),
+            array(200, 'zweihundert'),
+            array(500, 'fünfhundert'),
+            array(999, 'neunhundertneunundneunzig'),
+            array(1000, 'eintausend'),
+            array(1200, 'zwölfhundert'),
+            array(1200, 'eintausendzweihundert'),
+            array(17000, 'siebzehntausend'),
+            array(21473, 'einundzwanzigtausendvierhundertdreiundsiebzig'),
+            array(74002, 'vierundsiebzigtausendzwei'),
+            array(74002, 'vierundsiebzigtausendundzwei'),
+            array(99999, 'neunundneunzigtausendneunhundertneunundneunzig'),
+            array(100000, 'hunderttausend'),
+            array(100000, 'einhunderttausend'),
+            array(250000, 'zweihundertfünfzigtausend'),
+            array(1000000, 'eine million'),
+            array(1250007, 'eine million zweihundertfünfzigtausendundsieben'),
+            array(1000000000, 'eine milliarde'),
+            array(1000000001, 'eine milliarde und eins'),
+        );
+
+        foreach ($strings as $pair) {
+            $this->assertEquals((string)$pair[0], $numerizer->numerize($pair[1]));
+        }
+    }
+
+}