From 55eb078b7f65ab6afc9c623057c75ccd29b76745 Mon Sep 17 00:00:00 2001 From: Chuck Hagenbuch Date: Wed, 27 May 2009 00:08:58 -0400 Subject: [PATCH] add Horde_Support_Numerizer to the main Horde_Support package --- framework/Support/lib/Horde/Support/Numerizer.php | 34 +++++ .../lib/Horde/Support/Numerizer/Locale/Base.php | 149 +++++++++++++++++++++ .../lib/Horde/Support/Numerizer/Locale/De.php | 110 +++++++++++++++ framework/Support/package.xml | 11 ++ .../Horde/Support/Numerizer/Locale/BaseTest.php | 70 ++++++++++ .../test/Horde/Support/Numerizer/Locale/DeTest.php | 64 +++++++++ 6 files changed, 438 insertions(+) create mode 100644 framework/Support/lib/Horde/Support/Numerizer.php create mode 100644 framework/Support/lib/Horde/Support/Numerizer/Locale/Base.php create mode 100644 framework/Support/lib/Horde/Support/Numerizer/Locale/De.php create mode 100644 framework/Support/test/Horde/Support/Numerizer/Locale/BaseTest.php create mode 100644 framework/Support/test/Horde/Support/Numerizer/Locale/DeTest.php diff --git a/framework/Support/lib/Horde/Support/Numerizer.php b/framework/Support/lib/Horde/Support/Numerizer.php new file mode 100644 index 000000000..9f7317af3 --- /dev/null +++ b/framework/Support/lib/Horde/Support/Numerizer.php @@ -0,0 +1,34 @@ +numerize($string); + } + + public static function factory($args = array()) + { + $locale = isset($args['locale']) ? $args['locale'] : null; + if ($locale && strtolower($locale) != 'base') { + $locale = str_replace(' ', '_', ucwords(str_replace('_', ' ', strtolower($locale)))); + $class = 'Horde_Support_Numerizer_Locale_' . $locale; + if (class_exists($class)) { + return new $class($args); + } + + $language = array_shift(explode('_', $locale)); + if ($language != $locale) { + $class = 'Horde_Support_Numerizer_Locale_' . $language; + if (class_exists($class)) { + return new $class($args); + } + } + } + + return new Horde_Support_Numerizer_Locale_Base($args); + } + +} diff --git a/framework/Support/lib/Horde/Support/Numerizer/Locale/Base.php b/framework/Support/lib/Horde/Support/Numerizer/Locale/Base.php new file mode 100644 index 000000000..0a1908ef4 --- /dev/null +++ b/framework/Support/lib/Horde/Support/Numerizer/Locale/Base.php @@ -0,0 +1,149 @@ + '11', + 'twelve' => '12', + 'thirteen' => '13', + 'fourteen' => '14', + 'fifteen' => '15', + 'sixteen' => '16', + 'seventeen' => '17', + 'eighteen' => '18', + 'nineteen' => '19', + 'ninteen' => '19', // Common mis-spelling + 'zero' => '0', + 'one' => '1', + 'two' => '2', + 'three' => '3', + 'four(\W|$)' => '4$1', // The weird regex is so that it matches four but not fourty + 'five' => '5', + 'six(\W|$)' => '6$1', + 'seven(\W|$)' => '7$1', + 'eight(\W|$)' => '8$1', + 'nine(\W|$)' => '9$1', + 'ten' => '10', + '\ba[\b^$]' => '1', // doesn't make sense for an 'a' at the end to be a 1 + ); + + public $TEN_PREFIXES = array( + 'twenty' => 20, + 'thirty' => 30, + 'fourty' => 40, + 'fifty' => 50, + 'sixty' => 60, + 'seventy' => 70, + 'eighty' => 80, + 'ninety' => 90, + ); + + public $BIG_PREFIXES = array( + 'hundred' => 100, + 'thousand' => 1000, + 'million' => 1000000, + 'billion' => 1000000000, + 'trillion' => 1000000000000, + ); + + public function numerize($string) + { + // preprocess + $string = $this->_splitHyphenatedWords($string); + $string = $this->_hideAHalf($string); + + $string = $this->_directReplacements($string); + $string = $this->_replaceTenPrefixes($string); + $string = $this->_replaceBigPrefixes($string); + $string = $this->_fractionalAddition($string); + + return $string; + } + + /** + * will mutilate hyphenated-words but shouldn't matter for date extraction + */ + protected function _splitHyphenatedWords($string) + { + return preg_replace('/ +|([^\d])-([^d])/', '$1 $2', $string); + } + + /** + * take the 'a' out so it doesn't turn into a 1, save the half for the end + */ + protected function _hideAHalf($string) + { + return str_replace('a half', 'haAlf', $string); + } + + /** + * easy/direct replacements + */ + protected function _directReplacements($string) + { + foreach ($this->DIRECT_NUMS as $dn => $dn_replacement) { + $string = preg_replace("/$dn/i", $dn_replacement, $string); + } + return $string; + } + + /** + * ten, twenty, etc. + */ + protected function _replaceTenPrefixes($string) + { + foreach ($this->TEN_PREFIXES as $tp => $tp_replacement) { + $string = preg_replace_callback( + "/(?:$tp)( *\d(?=[^\d]|\$))*/i", + create_function( + '$m', + 'return ' . $tp_replacement . ' + (isset($m[1]) ? (int)$m[1] : 0);' + ), + $string); + } + return $string; + } + + /** + * hundreds, thousands, millions, etc. + */ + protected function _replaceBigPrefixes($string) + { + foreach ($this->BIG_PREFIXES as $bp => $bp_replacement) { + $string = preg_replace_callback( + '/(\d*) *' . $bp . '/i', + create_function( + '$m', + 'return ' . $bp_replacement . ' * (int)$m[1];' + ), + $string); + $string = $this->_andition($string); + } + return $string; + } + + protected function _andition($string) + { + while (true) { + if (preg_match('/(\d+)( | and )(\d+)(?=[^\w]|$)/i', $string, $sc, PREG_OFFSET_CAPTURE)) { + if (preg_match('/and/', $sc[2][0]) || (strlen($sc[1][0]) > strlen($sc[3][0]))) { + $string = substr($string, 0, $sc[1][1]) . ((int)$sc[1][0] + (int)$sc[3][0]) . substr($string, $sc[3][1] + strlen($sc[3][0])); + continue; + } + } + break; + } + return $string; + } + + protected function _fractionalAddition($string) + { + return preg_replace_callback( + '/(\d+)(?: | and |-)*haAlf/i', + create_function( + '$m', + 'return (string)((float)$m[1] + 0.5);' + ), + $string); + } + +} diff --git a/framework/Support/lib/Horde/Support/Numerizer/Locale/De.php b/framework/Support/lib/Horde/Support/Numerizer/Locale/De.php new file mode 100644 index 000000000..2e13fae30 --- /dev/null +++ b/framework/Support/lib/Horde/Support/Numerizer/Locale/De.php @@ -0,0 +1,110 @@ + 13, + 'vierzehn' => 14, + 'fünfzehn' => 15, + 'sechzehn' => 16, + 'siebzehn' => 17, + 'achtzehn' => 18, + 'neunzehn' => 19, + 'eins' => 1, + 'zwei' => 2, + 'zwo' => 2, + 'drei' => 3, + 'vier' => 4, + 'fünf' => 5, + 'sechs' => 6, + 'sieben' => 7, + 'acht' => 8, + 'neun' => 9, + 'zehn' => 10, + 'elf' => 11, + 'zwölf' => 12, + 'eine?' => 1, + ); + + public $TEN_PREFIXES = array( + 'zwanzig' => 20, + 'dreißig' => 30, + 'vierzig' => 40, + 'fünfzig' => 50, + 'sechzig' => 60, + 'siebzig' => 70, + 'achtzig' => 80, + 'neunzig' => 90, + ); + + public $BIG_PREFIXES = array( + 'hundert' => 100, + 'tausend' => 1000, + 'million' => 1000000, + 'milliarde' => 1000000000, + 'billion' => 1000000000000, + ); + + /** + * Rules: + * + * - there are irregular word for 11 and 12 like in English + * - numbers below one million are written together (1 M = "eine Million", 100 = "einhundert") + * - "a" is declinable (see above, "one" = "eins", "a" = "ein/eine") + * - numbers below 100 are flipped compared to english, and have an "and = "und" (21 = "twenty-one" = "einundzwanzig") + */ + public function numerize($string) + { + // preprocess? + + $string = $this->_replaceTenPrefixes($string); + $string = $this->_directReplacements($string); + $string = $this->_replaceBigPrefixes($string); + $string = $this->_fractionalAddition($string); + $string = $this->_andition($string); + + return $string; + } + + /** + * ten, twenty, etc. + */ + protected function _replaceTenPrefixes($string) + { + foreach ($this->TEN_PREFIXES as $tp => $tp_replacement) { + $string = preg_replace_callback( + "/(?:$tp)( *\d(?=[^\d]|\$))*/i", + create_function( + '$m', + 'return ' . $tp_replacement . ' + (isset($m[1]) ? (int)$m[1] : 0);' + ), + $string); + } + return $string; + } + + /** + * hundreds, thousands, millions, etc. + */ + protected function _replaceBigPrefixes($string) + { + foreach ($this->BIG_PREFIXES as $bp => $bp_replacement) { + $string = preg_replace_callback( + '/(\d*) *' . $bp . '/i', + create_function( + '$m', + '$factor = (int)$m[1]; if (!$factor) $factor = 1; return (' . $bp_replacement . ' * $factor) . "und";' + ), + $string); + } + return $string; + } + + protected function _andition($string) + { + while (preg_match('/(\d+)((?:und)+)(\d*)(?=[^\w]|$)/i', $string, $sc, PREG_OFFSET_CAPTURE)) { + $string = substr($string, 0, $sc[1][1]) . ((int)$sc[1][0] + (int)$sc[3][0]) . substr($string, $sc[3][1] + strlen($sc[3][0])); + } + return $string; + } + +} diff --git a/framework/Support/package.xml b/framework/Support/package.xml index a21039f6e..470adccd3 100644 --- a/framework/Support/package.xml +++ b/framework/Support/package.xml @@ -32,15 +32,23 @@ http://pear.php.net/dtd/package-2.0.xsd"> * Initial Horde_Support_Stub object * Initial Horde_Support_Timer object * Initial Horde_Support_Uuid object + * Initial Horde_Support_Numerizer objects + + + + + + + @@ -64,6 +72,9 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + + diff --git a/framework/Support/test/Horde/Support/Numerizer/Locale/BaseTest.php b/framework/Support/test/Horde/Support/Numerizer/Locale/BaseTest.php new file mode 100644 index 000000000..a370511e2 --- /dev/null +++ b/framework/Support/test/Horde/Support/Numerizer/Locale/BaseTest.php @@ -0,0 +1,70 @@ + 'one', + 5 => 'five', + 10 => 'ten', + 11 => 'eleven', + 12 => 'twelve', + 13 => 'thirteen', + 14 => 'fourteen', + 15 => 'fifteen', + 16 => 'sixteen', + 17 => 'seventeen', + 18 => 'eighteen', + 19 => 'nineteen', + 20 => 'twenty', + 27 => 'twenty seven', + 31 => 'thirty-one', + 59 => 'fifty nine', + 100 => 'a hundred', + 100 => 'one hundred', + 150 => 'one hundred and fifty', + // 150 => 'one fifty', + 200 => 'two-hundred', + 500 => '5 hundred', + 999 => 'nine hundred and ninety nine', + 1000 => 'one thousand', + 1200 => 'twelve hundred', + 1200 => 'one thousand two hundred', + 17000 => 'seventeen thousand', + 21473 => 'twentyone-thousand-four-hundred-and-seventy-three', + 74002 => 'seventy four thousand and two', + 99999 => 'ninety nine thousand nine hundred ninety nine', + 100000 => '100 thousand', + 250000 => 'two hundred fifty thousand', + 1000000 => 'one million', + 1250007 => 'one million two hundred fifty thousand and seven', + 1000000000 => 'one billion', + 1000000001 => 'one billion and one', + ); + + foreach ($strings as $key => $string) { + $this->assertEquals($key, (int)$numerizer->numerize($string)); + } + } + + public function testLeavesDatesAlone() + { + $numerizer = Horde_Support_Numerizer::factory(); + + $this->assertEquals('2006-08-20 03:00', $numerizer->numerize('2006-08-20 03:00')); + $this->assertEquals('2006-08-20 15:30:30', $numerizer->numerize('2006-08-20 15:30:30')); + } + +} diff --git a/framework/Support/test/Horde/Support/Numerizer/Locale/DeTest.php b/framework/Support/test/Horde/Support/Numerizer/Locale/DeTest.php new file mode 100644 index 000000000..806d9bb04 --- /dev/null +++ b/framework/Support/test/Horde/Support/Numerizer/Locale/DeTest.php @@ -0,0 +1,64 @@ + 'de')); + $strings = array( + array(1, 'eins'), + array(5, 'fünf'), + array(10, 'zehn'), + array(11, 'elf'), + array(12, 'zwölf'), + array(13, 'dreizehn'), + array(14, 'vierzehn'), + array(15, 'fünfzehn'), + array(16, 'sechzehn'), + array(17, 'siebzehn'), + array(18, 'achtzehn'), + array(19, 'neunzehn'), + array(20, 'zwanzig'), + array(27, 'siebenundzwanzig'), + array(31, 'einunddreißig'), + array(59, 'neunundfünfzig'), + array(100, 'einhundert'), + array(100, 'ein hundert'), + array(150, 'hundertundfünfzig'), + array(150, 'einhundertundfünfzig'), + array(200, 'zweihundert'), + array(500, 'fünfhundert'), + array(999, 'neunhundertneunundneunzig'), + array(1000, 'eintausend'), + array(1200, 'zwölfhundert'), + array(1200, 'eintausendzweihundert'), + array(17000, 'siebzehntausend'), + array(21473, 'einundzwanzigtausendvierhundertdreiundsiebzig'), + array(74002, 'vierundsiebzigtausendzwei'), + array(74002, 'vierundsiebzigtausendundzwei'), + array(99999, 'neunundneunzigtausendneunhundertneunundneunzig'), + array(100000, 'hunderttausend'), + array(100000, 'einhunderttausend'), + array(250000, 'zweihundertfünfzigtausend'), + array(1000000, 'eine million'), + array(1250007, 'eine million zweihundertfünfzigtausendundsieben'), + array(1000000000, 'eine milliarde'), + array(1000000001, 'eine milliarde und eins'), + ); + + foreach ($strings as $pair) { + $this->assertEquals((string)$pair[0], $numerizer->numerize($pair[1])); + } + } + +} -- 2.11.0